diff options
| author | aarne <unknown> | 2003-11-10 07:55:45 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2003-11-10 07:55:45 +0000 |
| commit | 249d506f58a8b5f8ef87295ab3dde2d13ddd3885 (patch) | |
| tree | 658679a324c4f360901c38637464e8f63b59515d /src/GF/UseGrammar | |
| parent | 4c99687f217ce258f821d55e68f5403233f6dea7 (diff) | |
Morphological analysis and glueing.
Diffstat (limited to 'src/GF/UseGrammar')
| -rw-r--r-- | src/GF/UseGrammar/Custom.hs | 18 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Linear.hs | 5 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Morphology.hs | 45 |
3 files changed, 41 insertions, 27 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 1048aab95..4d5eb8122 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -21,11 +21,13 @@ import CFIdent ---- import CFtoGrammar import PPrCF +import PrLBNF import PrGrammar +import MkGFC import Zipper -----import Morphology +import Morphology -----import GrammarToHaskell -----import GrammarToCanon (showCanon, showCanonOpt) -----import qualified GrammarToGFC as GFC @@ -141,16 +143,16 @@ customGrammarParser = customGrammarPrinter = customData "Grammar printers, selected by option -printer=x" $ [ ----- (strCI "gf", prt) -- DEFAULT - (strCI "cf", prCF . stateCF) - + (strCI "gfc", prCanon . stateGrammarST) -- DEFAULT + ,(strCI "cf", prCF . stateCF) + ,(strCI "lbnf", prLBNF . stateCF) + ,(strCI "morpho", prMorpho . stateMorpho) + ,(strCI "opts", prOpts . stateOptions) {- ---- (strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT ,(strCI "canon", showCanon "Lang" . stateGrammarST) ,(strCI "gfc", GFC.showGFC . stateGrammarST) ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST) - ,(strCI "morpho", prMorpho . stateMorpho) - ,(strCI "opts", prOpts . stateOptions) -} -- add your own grammar printers here --- also include printing via grammar2syntax! @@ -236,6 +238,7 @@ customTokenizer = ,(strCI "chars", const $ map (tS . singleton)) ,(strCI "code", const $ lexHaskell) ,(strCI "text", const $ lexText) + ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr)) ---- ,(strCI "codelit", lexHaskellLiteral . stateIsWord) ---- ,(strCI "textlit", lexTextLiteral . stateIsWord) ,(strCI "codeC", const $ lexC2M) @@ -253,7 +256,8 @@ customUntokenizer = ,(strCI "textlit", const $ formatAsTextLit) ,(strCI "codelit", const $ formatAsCodeLit) ,(strCI "concat", const $ concat . words) - ,(strCI "bind", const $ performBinds) + ,(strCI "glue", const $ performBinds) + ,(strCI "bind", const $ performBinds) -- backward compat -- add your own untokenizers here ] ++ moreCustomUntokenizer diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs index a46200b36..c439d62b2 100644 --- a/src/GF/UseGrammar/Linear.hs +++ b/src/GF/UseGrammar/Linear.hs @@ -159,7 +159,7 @@ linearizeToStrss gr mk e = do R rs <- linearizeToRecord gr mk e ---- t <- lookupErr linLab0 [(r,s) | Ass r s <- rs] return $ map strsFromTerm $ allInTable t - +-} -- the value is a list of strings, not forgetting their arguments allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]] @@ -168,9 +168,6 @@ allLinsOfFun gr f = do allLinValues t - --} - -- returns printname if one exists; otherwise linearizes with metas printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs index 102e41340..c8f00615a 100644 --- a/src/GF/UseGrammar/Morphology.hs +++ b/src/GF/UseGrammar/Morphology.hs @@ -3,8 +3,14 @@ module Morphology where import AbsGFC import GFC import PrGrammar +import CMacros +import LookAbs +import Ident +import qualified Macros as M +import Linear import Operations +import Glue import Char import List (sortBy, intersperse) @@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where isKnownWord :: Morpho -> String -> Bool isKnownWord mo = not . null . snd . appMorphoOnly mo -mkMorpho :: CanonGrammar -> Morpho -mkMorpho gr = emptyMorpho ---- -{- ---- -mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where +mkMorpho :: CanonGrammar -> Ident -> Morpho +---- mkMorpho gr = emptyMorpho ---- +mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun -- gather forms of lexical items - allLins fun = errVal [] $ do - ts <- allLinsOfFun gr fun + allLins fun@(m,f) = errVal [] $ do + ts <- allLinsOfFun gr (CIQ a f) ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] - prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps)) + prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps)) -- gather syncategorematic words - allSyns fun = errVal [] $ do - tss <- allLinsOfFun gr fun + allSyns fun@(m,f) = errVal [] $ do + tss <- allLinsOfFun gr (CIQ a f) let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] return $ concat $ map wordsInTerm ss prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f) -- all words, Left from lexical rules and Right syncategorematic - allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where - allFuns = allFunsWithValCat ab - lookType = errVal [] . liftM (:[]) . lookupFunType ab - lexRole t = case typeForm t of + allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where + allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] + lexRole t = case M.typeForm t of Ok ([],_,_) -> Left _ -> Right --} -- printing full-form lexicon and results @@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where w' = if null fs then w +++ "*" else "" -tagPrt :: Print a => a -> String -tagPrt = ("+" ++) . prt --- could look up print name in grammar +tagPrt :: Print a => (a,a) -> String +tagPrt (m,c) = "+" ++ prt c --- module name -- print all words recognized @@ -99,6 +103,15 @@ prFullForm :: Morpho -> String prFullForm = unlines . map prOne . tree2list where prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) +-- using Huet's unglueing method to find word boundaries +---- it would be much better to use a trie also for morphological analysis, +---- so this is for the sake of experiment +---- Moreover, we should specify the cases in which this happens - not all words + +decomposeWords :: Morpho -> String -> [String] +decomposeWords mo s = errVal (words s) $ + decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s + -- auxiliaries mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b]) |
