diff options
| author | aarne <unknown> | 2003-11-10 07:55:45 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2003-11-10 07:55:45 +0000 |
| commit | 249d506f58a8b5f8ef87295ab3dde2d13ddd3885 (patch) | |
| tree | 658679a324c4f360901c38637464e8f63b59515d /src/GF/UseGrammar/Morphology.hs | |
| parent | 4c99687f217ce258f821d55e68f5403233f6dea7 (diff) | |
Morphological analysis and glueing.
Diffstat (limited to 'src/GF/UseGrammar/Morphology.hs')
| -rw-r--r-- | src/GF/UseGrammar/Morphology.hs | 45 |
1 files changed, 29 insertions, 16 deletions
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs index 102e41340..c8f00615a 100644 --- a/src/GF/UseGrammar/Morphology.hs +++ b/src/GF/UseGrammar/Morphology.hs @@ -3,8 +3,14 @@ module Morphology where import AbsGFC import GFC import PrGrammar +import CMacros +import LookAbs +import Ident +import qualified Macros as M +import Linear import Operations +import Glue import Char import List (sortBy, intersperse) @@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where isKnownWord :: Morpho -> String -> Bool isKnownWord mo = not . null . snd . appMorphoOnly mo -mkMorpho :: CanonGrammar -> Morpho -mkMorpho gr = emptyMorpho ---- -{- ---- -mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where +mkMorpho :: CanonGrammar -> Ident -> Morpho +---- mkMorpho gr = emptyMorpho ---- +mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun -- gather forms of lexical items - allLins fun = errVal [] $ do - ts <- allLinsOfFun gr fun + allLins fun@(m,f) = errVal [] $ do + ts <- allLinsOfFun gr (CIQ a f) ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] - prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps)) + prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps)) -- gather syncategorematic words - allSyns fun = errVal [] $ do - tss <- allLinsOfFun gr fun + allSyns fun@(m,f) = errVal [] $ do + tss <- allLinsOfFun gr (CIQ a f) let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] return $ concat $ map wordsInTerm ss prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f) -- all words, Left from lexical rules and Right syncategorematic - allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where - allFuns = allFunsWithValCat ab - lookType = errVal [] . liftM (:[]) . lookupFunType ab - lexRole t = case typeForm t of + allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where + allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] + lexRole t = case M.typeForm t of Ok ([],_,_) -> Left _ -> Right --} -- printing full-form lexicon and results @@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where w' = if null fs then w +++ "*" else "" -tagPrt :: Print a => a -> String -tagPrt = ("+" ++) . prt --- could look up print name in grammar +tagPrt :: Print a => (a,a) -> String +tagPrt (m,c) = "+" ++ prt c --- module name -- print all words recognized @@ -99,6 +103,15 @@ prFullForm :: Morpho -> String prFullForm = unlines . map prOne . tree2list where prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) +-- using Huet's unglueing method to find word boundaries +---- it would be much better to use a trie also for morphological analysis, +---- so this is for the sake of experiment +---- Moreover, we should specify the cases in which this happens - not all words + +decomposeWords :: Morpho -> String -> [String] +decomposeWords mo s = errVal (words s) $ + decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s + -- auxiliaries mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b]) |
