diff options
| author | aarne <unknown> | 2003-09-22 13:16:55 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2003-09-22 13:16:55 +0000 |
| commit | b1402e8bd6a68a891b00a214d6cf184d66defe19 (patch) | |
| tree | 90372ac4e53dce91cf949dbf8e93be06f1d9e8bd /src/GF/UseGrammar/Morphology.hs | |
Founding the newly structured GF2.0 cvs archive.
Diffstat (limited to 'src/GF/UseGrammar/Morphology.hs')
| -rw-r--r-- | src/GF/UseGrammar/Morphology.hs | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs new file mode 100644 index 000000000..102e41340 --- /dev/null +++ b/src/GF/UseGrammar/Morphology.hs @@ -0,0 +1,116 @@ +module Morphology where + +import AbsGFC +import GFC +import PrGrammar + +import Operations + +import Char +import List (sortBy, intersperse) +import Monad (liftM) + +-- construct a morphological analyser from a GF grammar. AR 11/4/2001 + +-- we have found the binary search tree sorted by word forms more efficient +-- than a trie, at least for grammars with 7000 word forms + +type Morpho = BinTree (String,[String]) + +emptyMorpho = NT + +-- with literals +appMorpho :: Morpho -> String -> (String,[String]) +appMorpho m s = (s, ps ++ ms) where + ms = case lookupTree id s m of + Ok vs -> vs + _ -> [] + ps = [] ---- case lookupLiteral s of + ---- Ok (t,_) -> [tagPrt t] + ---- _ -> [] + +-- without literals +appMorphoOnly :: Morpho -> String -> (String,[String]) +appMorphoOnly m s = (s, ms) where + ms = case lookupTree id s m of + Ok vs -> vs + _ -> [] + +-- recognize word, exluding literals +isKnownWord :: Morpho -> String -> Bool +isKnownWord mo = not . null . snd . appMorphoOnly mo + +mkMorpho :: CanonGrammar -> Morpho +mkMorpho gr = emptyMorpho ---- +{- ---- +mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun + mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun + + -- gather forms of lexical items + allLins fun = errVal [] $ do + ts <- allLinsOfFun gr fun + ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts + return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] + prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps)) + + -- gather syncategorematic words + allSyns fun = errVal [] $ do + tss <- allLinsOfFun gr fun + let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] + return $ concat $ map wordsInTerm ss + prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f) + + -- all words, Left from lexical rules and Right syncategorematic + allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where + allFuns = allFunsWithValCat ab + lookType = errVal [] . liftM (:[]) . lookupFunType ab + lexRole t = case typeForm t of + Ok ([],_,_) -> Left + _ -> Right +-} + +-- printing full-form lexicon and results + +prMorpho :: Morpho -> String +prMorpho = unlines . map prMorphoAnalysis . tree2list + +prMorphoAnalysis :: (String,[String]) -> String +prMorphoAnalysis (w,fs) = unlines (w:fs) + +prMorphoAnalysisShort :: (String,[String]) -> String +prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where + w' = if null fs then w +++ "*" else "" + +tagPrt :: Print a => a -> String +tagPrt = ("+" ++) . prt --- could look up print name in grammar + +-- print all words recognized + +allMorphoWords :: Morpho -> [String] +allMorphoWords = map fst . tree2list + +-- analyse running text and show results either in short form or on separate lines +morphoTextShort mo = unwords . map (prMorphoAnalysisShort . appMorpho mo) . words +morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words + +-- format used in the Italian Verb Engine +prFullForm :: Morpho -> String +prFullForm = unlines . map prOne . tree2list where + prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) + +-- auxiliaries + +mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b]) +mkMorphoTree = sorted2tree . sortAssocs + +sortAssocs :: (Ord a, Eq b) => [(a,b)] -> [(a,[b])] +sortAssocs = arrange . sortBy (\ (x,_) (y,_) -> compare x y) where + arrange ((x,v):xvs) = arr x [v] xvs + arrange [] = [] + arr y vs xs = case xs of + (x,v):xvs -> if x==y then arr y vvs xvs else (y,vs) : arr x [v] xvs + where vvs = if elem v vs then vs else (v:vs) + _ -> [(y,vs)] + + |
