Morphological analysis and glueing.

author: aarne <unknown> 2003-11-10 07:55:45 +0000
committer: aarne <unknown> 2003-11-10 07:55:45 +0000
commit: 249d506f58a8b5f8ef87295ab3dde2d13ddd3885 (patch)
tree: 658679a324c4f360901c38637464e8f63b59515d /src/GF/UseGrammar
parent: 4c99687f217ce258f821d55e68f5403233f6dea7 (diff)
3 files changed, 41 insertions, 27 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 1048aab95..4d5eb8122 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -21,11 +21,13 @@ import CFIdent
 
 ---- import CFtoGrammar
 import PPrCF
+import PrLBNF
 import PrGrammar
+import MkGFC
 
 import Zipper
 
-----import Morphology
+import Morphology
 -----import GrammarToHaskell
 -----import GrammarToCanon (showCanon, showCanonOpt)
 -----import qualified GrammarToGFC as GFC
@@ -141,16 +143,16 @@ customGrammarParser =
 customGrammarPrinter = 
   customData "Grammar printers, selected by option -printer=x" $
   [
-----   (strCI "gf",      prt)  -- DEFAULT
-  (strCI "cf",      prCF . stateCF)
-
+   (strCI "gfc",     prCanon . stateGrammarST) -- DEFAULT
+  ,(strCI "cf",      prCF . stateCF)
+  ,(strCI "lbnf",    prLBNF . stateCF)
+  ,(strCI "morpho",  prMorpho . stateMorpho)
+  ,(strCI "opts",    prOpts . stateOptions)
 {- ----
    (strCI "gf",      prt  . st2grammar . stateGrammarST)  -- DEFAULT
   ,(strCI "canon",   showCanon "Lang" . stateGrammarST)
   ,(strCI "gfc",     GFC.showGFC . stateGrammarST)
   ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
-  ,(strCI "morpho",  prMorpho . stateMorpho)
-  ,(strCI "opts",    prOpts . stateOptions)
 -}
 -- add your own grammar printers here
 --- also include printing via grammar2syntax!
@@ -236,6 +238,7 @@ customTokenizer =
   ,(strCI "chars",     const $ map (tS . singleton))
   ,(strCI "code",      const $ lexHaskell)
   ,(strCI "text",      const $ lexText)
+  ,(strCI "unglue",    \gr -> map tS . decomposeWords (stateMorpho gr))
 ----  ,(strCI "codelit",   lexHaskellLiteral . stateIsWord)
 ----  ,(strCI "textlit",   lexTextLiteral . stateIsWord)
   ,(strCI "codeC",     const $ lexC2M)
@@ -253,7 +256,8 @@ customUntokenizer =
   ,(strCI "textlit",   const $ formatAsTextLit)
   ,(strCI "codelit",   const $ formatAsCodeLit)
   ,(strCI "concat",    const $ concat . words)
-  ,(strCI "bind",      const $ performBinds)
+  ,(strCI "glue",      const $ performBinds)
+  ,(strCI "bind",      const $ performBinds) -- backward compat
 -- add your own untokenizers here
   ]
   ++ moreCustomUntokenizer
diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs
index a46200b36..c439d62b2 100644
--- a/src/GF/UseGrammar/Linear.hs
+++ b/src/GF/UseGrammar/Linear.hs
@@ -159,7 +159,7 @@ linearizeToStrss gr mk e = do
   R rs <- linearizeToRecord gr mk e ----
   t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
   return $ map strsFromTerm $ allInTable t 
-  
+-}
 
 -- the value is a list of strings, not forgetting their arguments
 allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
@@ -168,9 +168,6 @@ allLinsOfFun gr f = do
   allLinValues t
 
 
-
--}
-
 -- returns printname if one exists; otherwise linearizes with metas
 
 printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs
index 102e41340..c8f00615a 100644
--- a/src/GF/UseGrammar/Morphology.hs
+++ b/src/GF/UseGrammar/Morphology.hs
@@ -3,8 +3,14 @@ module Morphology where
 import AbsGFC
 import GFC
 import PrGrammar
+import CMacros
+import LookAbs
+import Ident
+import qualified Macros as M
+import Linear
 
 import Operations
+import Glue
 
 import Char
 import List (sortBy, intersperse)
@@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where
 isKnownWord :: Morpho -> String -> Bool
 isKnownWord mo = not . null . snd . appMorphoOnly mo
 
-mkMorpho :: CanonGrammar -> Morpho 
-mkMorpho gr = emptyMorpho ----
-{- ----
-mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where
+mkMorpho :: CanonGrammar -> Ident -> Morpho 
+---- mkMorpho gr = emptyMorpho ----
+mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where
+
   mkOne (Left (fun,c))  = map (prOne fun c) $ allLins fun
   mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
   
   -- gather forms of lexical items
-  allLins fun = errVal [] $ do
-    ts <- allLinsOfFun gr fun 
+  allLins fun@(m,f) = errVal [] $ do
+    ts <- allLinsOfFun gr (CIQ a f)  
     ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
     return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
-  prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps))
+  prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps))
 
   -- gather syncategorematic words
-  allSyns fun = errVal [] $ do
-    tss <- allLinsOfFun gr fun 
+  allSyns fun@(m,f) = errVal [] $ do
+    tss <- allLinsOfFun gr (CIQ a f) 
     let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
     return $ concat $ map wordsInTerm ss
   prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f)
 
   -- all words, Left from lexical rules and Right syncategorematic
-  allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where
-    allFuns = allFunsWithValCat ab
-    lookType = errVal [] . liftM (:[]) . lookupFunType ab
-    lexRole t = case typeForm t of
+  allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
+    allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
+    lexRole t = case M.typeForm t of
       Ok ([],_,_) -> Left
       _ -> Right
--}
 
 -- printing full-form lexicon and results
 
@@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String
 prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
   w' = if null fs then w +++ "*" else ""
 
-tagPrt :: Print a => a -> String
-tagPrt = ("+" ++) . prt --- could look up print name in grammar
+tagPrt :: Print a => (a,a) -> String
+tagPrt (m,c) = "+" ++ prt c --- module name
 
 -- print all words recognized
 
@@ -99,6 +103,15 @@ prFullForm :: Morpho -> String
 prFullForm = unlines . map prOne . tree2list where
   prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
 
+-- using Huet's unglueing method to find word boundaries
+---- it would be much better to use a trie also for morphological analysis,
+---- so this is for the sake of experiment
+---- Moreover, we should specify the cases in which this happens - not all words
+
+decomposeWords :: Morpho -> String -> [String]
+decomposeWords mo s = errVal (words s) $
+  decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s
+
 -- auxiliaries
 
 mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b])
author	aarne <unknown>	2003-11-10 07:55:45 +0000
committer	aarne <unknown>	2003-11-10 07:55:45 +0000
commit	249d506f58a8b5f8ef87295ab3dde2d13ddd3885 (patch)
tree	658679a324c4f360901c38637464e8f63b59515d /src/GF/UseGrammar
parent	4c99687f217ce258f821d55e68f5403233f6dea7 (diff)