From 249d506f58a8b5f8ef87295ab3dde2d13ddd3885 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 10 Nov 2003 07:55:45 +0000 Subject: Morphological analysis and glueing. --- src/GF/UseGrammar/Morphology.hs | 45 ++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'src/GF/UseGrammar/Morphology.hs') diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs index 102e41340..c8f00615a 100644 --- a/src/GF/UseGrammar/Morphology.hs +++ b/src/GF/UseGrammar/Morphology.hs @@ -3,8 +3,14 @@ module Morphology where import AbsGFC import GFC import PrGrammar +import CMacros +import LookAbs +import Ident +import qualified Macros as M +import Linear import Operations +import Glue import Char import List (sortBy, intersperse) @@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where isKnownWord :: Morpho -> String -> Bool isKnownWord mo = not . null . snd . appMorphoOnly mo -mkMorpho :: CanonGrammar -> Morpho -mkMorpho gr = emptyMorpho ---- -{- ---- -mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where +mkMorpho :: CanonGrammar -> Ident -> Morpho +---- mkMorpho gr = emptyMorpho ---- +mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun -- gather forms of lexical items - allLins fun = errVal [] $ do - ts <- allLinsOfFun gr fun + allLins fun@(m,f) = errVal [] $ do + ts <- allLinsOfFun gr (CIQ a f) ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] - prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps)) + prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps)) -- gather syncategorematic words - allSyns fun = errVal [] $ do - tss <- allLinsOfFun gr fun + allSyns fun@(m,f) = errVal [] $ do + tss <- allLinsOfFun gr (CIQ a f) let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] return $ concat $ map wordsInTerm ss prSyn f s = (s, "+" ++ tagPrt f) -- all words, Left from lexical rules and Right syncategorematic - allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where - allFuns = allFunsWithValCat ab - lookType = errVal [] . liftM (:[]) . lookupFunType ab - lexRole t = case typeForm t of + allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where + allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] + lexRole t = case M.typeForm t of Ok ([],_,_) -> Left _ -> Right --} -- printing full-form lexicon and results @@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where w' = if null fs then w +++ "*" else "" -tagPrt :: Print a => a -> String -tagPrt = ("+" ++) . prt --- could look up print name in grammar +tagPrt :: Print a => (a,a) -> String +tagPrt (m,c) = "+" ++ prt c --- module name -- print all words recognized @@ -99,6 +103,15 @@ prFullForm :: Morpho -> String prFullForm = unlines . map prOne . tree2list where prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) +-- using Huet's unglueing method to find word boundaries +---- it would be much better to use a trie also for morphological analysis, +---- so this is for the sake of experiment +---- Moreover, we should specify the cases in which this happens - not all words + +decomposeWords :: Morpho -> String -> [String] +decomposeWords mo s = errVal (words s) $ + decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s + -- auxiliaries mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b]) -- cgit v1.2.3