summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Morphology.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
commitb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch)
tree0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/UseGrammar/Morphology.hs
parentfe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff)
removed src for 2.9
Diffstat (limited to 'src/GF/UseGrammar/Morphology.hs')
-rw-r--r--src/GF/UseGrammar/Morphology.hs140
1 files changed, 0 insertions, 140 deletions
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs
deleted file mode 100644
index 3aeb08dc7..000000000
--- a/src/GF/UseGrammar/Morphology.hs
+++ /dev/null
@@ -1,140 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : Morphology
--- Maintainer : AR
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/04/21 16:23:49 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.8 $
---
--- Morphological analyser constructed from a GF grammar.
---
--- we first found the binary search tree sorted by word forms more efficient
--- than a trie, at least for grammars with 7000 word forms
--- (18\/11\/2003) but this may change since we have to use a trie
--- for decompositions and also want to use it in the parser
------------------------------------------------------------------------------
-
-module GF.UseGrammar.Morphology where
-
-import GF.Canon.AbsGFC
-import GF.Canon.GFC
-import GF.Grammar.PrGrammar
-import GF.Canon.CMacros
-import GF.Canon.Look
-import GF.Grammar.LookAbs
-import GF.Infra.Ident
-import qualified GF.Grammar.Macros as M
-import GF.UseGrammar.Linear
-
-import GF.Data.Operations
-import GF.Data.Glue
-
-import Data.Char
-import Data.List (sortBy, intersperse)
-import Control.Monad (liftM)
-import GF.Data.Trie2
-
--- construct a morphological analyser from a GF grammar. AR 11/4/2001
-
--- we first found the binary search tree sorted by word forms more efficient
--- than a trie, at least for grammars with 7000 word forms
--- (18\/11\/2003) but this may change since we have to use a trie
--- for decompositions and also want to use it in the parser
-
-type Morpho = Trie Char String
-
-emptyMorpho :: Morpho
-emptyMorpho = emptyTrie
-
-appMorpho :: Morpho -> String -> (String,[String])
-appMorpho = appMorphoOnly
----- add lookup for literals
-
--- without literals
-appMorphoOnly :: Morpho -> String -> (String,[String])
-appMorphoOnly m s = trieLookup m s
-
--- recognize word, exluding literals
-isKnownWord :: Morpho -> String -> Bool
-isKnownWord mo = not . null . snd . appMorphoOnly mo
-
-mkMorpho :: CanonGrammar -> Ident -> Morpho
-mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
-
- comp = ccompute gr [] -- to undo 'values' optimization
-
- mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
- mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
-
- -- gather forms of lexical items
- allLins fun@(m,f) = errVal [] $ do
- ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues
- ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
- return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
- prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
-
- -- gather syncategorematic words
- allSyns fun@(m,f) = errVal [] $ do
- tss <- allLinsOfFun gr (CIQ a f)
- let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
- return $ concat $ map wordsInTerm ss
- prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])
-
- -- all words, Left from lexical rules and Right syncategorematic
- allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
- allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
- lexRole t = case M.typeForm t of
- Ok ([],_,_) -> Left
- _ -> Right
-
--- printing full-form lexicon and results
-
-prMorpho :: Morpho -> String
-prMorpho = unlines . map prMorphoAnalysis . collapse
-
-prMorphoAnalysis :: (String,[String]) -> String
-prMorphoAnalysis (w,fs0) =
- let fs = filter (not . null) fs0 in
- if null fs then w ++++ "*" else unlines (w:fs)
-
-prMorphoAnalysisShort :: (String,[String]) -> String
-prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
- w' = if null fs then w +++ "*" else ""
-
-tagPrt :: Print a => (a,a) -> String
-tagPrt (m,c) = "+" ++ prt c --- module name
-
--- | print all words recognized
-allMorphoWords :: Morpho -> [String]
-allMorphoWords = map fst . collapse
-
--- analyse running text and show results either in short form or on separate lines
-
--- | analyse running text and show just the word, with "*" if not found
-morphoTextStatus :: Morpho -> String -> String
-morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where
- prMark (w,fs) = if null fs then "*" +++ w else w
-
--- | analyse running text and show results in short form, one word per line
-morphoTextShort :: Morpho -> String -> String
-morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words
-
--- | analyse running text and show results on separate lines
-morphoText :: Morpho -> String -> String
-morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words
-
--- format used in the Italian Verb Engine
-prFullForm :: Morpho -> String
-prFullForm = unlines . map prOne . collapse where
- prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
-
--- using Huet's unglueing method to find word boundaries
----- it would be much better to use a trie also for morphological analysis,
----- so this is for the sake of experiment
----- Moreover, we should specify the cases in which this happens - not all words
-
-decomposeWords :: Morpho -> String -> [String]
-decomposeWords mo s = errVal (words s) $ decomposeSimple mo s