summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar
diff options
context:
space:
mode:
authoraarne <unknown>2003-11-18 15:30:08 +0000
committeraarne <unknown>2003-11-18 15:30:08 +0000
commitaf4bf660024928da20b3a1e004d347d6bc0647c4 (patch)
tree53e4bea7a712ec02dd49b7893df0a42ac86d810d /src/GF/UseGrammar
parent8ecf475d5a4c09939ee76106440bf08878be34b4 (diff)
Using trie more.
Diffstat (limited to 'src/GF/UseGrammar')
-rw-r--r--src/GF/UseGrammar/Custom.hs1
-rw-r--r--src/GF/UseGrammar/Morphology.hs56
2 files changed, 17 insertions, 40 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 10446413a..64cb29680 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -147,6 +147,7 @@ customGrammarPrinter =
,(strCI "cf", prCF . stateCF)
,(strCI "lbnf", prLBNF . stateCF)
,(strCI "morpho", prMorpho . stateMorpho)
+ ,(strCI "fullform",prFullForm . stateMorpho)
,(strCI "opts", prOpts . stateOptions)
,(strCI "words", unwords . stateGrammarWords)
{- ----
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs
index c8f00615a..9d9371c3c 100644
--- a/src/GF/UseGrammar/Morphology.hs
+++ b/src/GF/UseGrammar/Morphology.hs
@@ -15,40 +15,33 @@ import Glue
import Char
import List (sortBy, intersperse)
import Monad (liftM)
+import Trie2
-- construct a morphological analyser from a GF grammar. AR 11/4/2001
--- we have found the binary search tree sorted by word forms more efficient
+-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
+-- (18/11/2003) but this may change since we have to use a trie
+-- for decompositions and also want to use it in the parser
-type Morpho = BinTree (String,[String])
+type Morpho = Trie Char String
-emptyMorpho = NT
+emptyMorpho = emptyTrie
--- with literals
appMorpho :: Morpho -> String -> (String,[String])
-appMorpho m s = (s, ps ++ ms) where
- ms = case lookupTree id s m of
- Ok vs -> vs
- _ -> []
- ps = [] ---- case lookupLiteral s of
- ---- Ok (t,_) -> [tagPrt t]
- ---- _ -> []
+appMorpho = appMorphoOnly
+---- add lookup for literals
-- without literals
appMorphoOnly :: Morpho -> String -> (String,[String])
-appMorphoOnly m s = (s, ms) where
- ms = case lookupTree id s m of
- Ok vs -> vs
- _ -> []
+appMorphoOnly m s = trieLookup m s
-- recognize word, exluding literals
isKnownWord :: Morpho -> String -> Bool
isKnownWord mo = not . null . snd . appMorphoOnly mo
mkMorpho :: CanonGrammar -> Ident -> Morpho
----- mkMorpho gr = emptyMorpho ----
-mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where
+mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
@@ -58,14 +51,14 @@ mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where
ts <- allLinsOfFun gr (CIQ a f)
ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
- prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps))
+ prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
-- gather syncategorematic words
allSyns fun@(m,f) = errVal [] $ do
tss <- allLinsOfFun gr (CIQ a f)
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
return $ concat $ map wordsInTerm ss
- prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f)
+ prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])
-- all words, Left from lexical rules and Right syncategorematic
allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
@@ -77,7 +70,7 @@ mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where
-- printing full-form lexicon and results
prMorpho :: Morpho -> String
-prMorpho = unlines . map prMorphoAnalysis . tree2list
+prMorpho = unlines . map prMorphoAnalysis . collapse
prMorphoAnalysis :: (String,[String]) -> String
prMorphoAnalysis (w,fs) = unlines (w:fs)
@@ -92,7 +85,7 @@ tagPrt (m,c) = "+" ++ prt c --- module name
-- print all words recognized
allMorphoWords :: Morpho -> [String]
-allMorphoWords = map fst . tree2list
+allMorphoWords = map fst . collapse
-- analyse running text and show results either in short form or on separate lines
morphoTextShort mo = unwords . map (prMorphoAnalysisShort . appMorpho mo) . words
@@ -100,7 +93,7 @@ morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . word
-- format used in the Italian Verb Engine
prFullForm :: Morpho -> String
-prFullForm = unlines . map prOne . tree2list where
+prFullForm = unlines . map prOne . collapse where
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
-- using Huet's unglueing method to find word boundaries
@@ -109,21 +102,4 @@ prFullForm = unlines . map prOne . tree2list where
---- Moreover, we should specify the cases in which this happens - not all words
decomposeWords :: Morpho -> String -> [String]
-decomposeWords mo s = errVal (words s) $
- decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s
-
--- auxiliaries
-
-mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b])
-mkMorphoTree = sorted2tree . sortAssocs
-
-sortAssocs :: (Ord a, Eq b) => [(a,b)] -> [(a,[b])]
-sortAssocs = arrange . sortBy (\ (x,_) (y,_) -> compare x y) where
- arrange ((x,v):xvs) = arr x [v] xvs
- arrange [] = []
- arr y vs xs = case xs of
- (x,v):xvs -> if x==y then arr y vvs xvs else (y,vs) : arr x [v] xvs
- where vvs = if elem v vs then vs else (v:vs)
- _ -> [(y,vs)]
-
-
+decomposeWords mo s = errVal (words s) $ decomposeSimple mo s