diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
| commit | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch) | |
| tree | 0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/UseGrammar/Morphology.hs | |
| parent | fe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff) | |
removed src for 2.9
Diffstat (limited to 'src/GF/UseGrammar/Morphology.hs')
| -rw-r--r-- | src/GF/UseGrammar/Morphology.hs | 140 |
1 files changed, 0 insertions, 140 deletions
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs deleted file mode 100644 index 3aeb08dc7..000000000 --- a/src/GF/UseGrammar/Morphology.hs +++ /dev/null @@ -1,140 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Morphology --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:49 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.8 $ --- --- Morphological analyser constructed from a GF grammar. --- --- we first found the binary search tree sorted by word forms more efficient --- than a trie, at least for grammars with 7000 word forms --- (18\/11\/2003) but this may change since we have to use a trie --- for decompositions and also want to use it in the parser ------------------------------------------------------------------------------ - -module GF.UseGrammar.Morphology where - -import GF.Canon.AbsGFC -import GF.Canon.GFC -import GF.Grammar.PrGrammar -import GF.Canon.CMacros -import GF.Canon.Look -import GF.Grammar.LookAbs -import GF.Infra.Ident -import qualified GF.Grammar.Macros as M -import GF.UseGrammar.Linear - -import GF.Data.Operations -import GF.Data.Glue - -import Data.Char -import Data.List (sortBy, intersperse) -import Control.Monad (liftM) -import GF.Data.Trie2 - --- construct a morphological analyser from a GF grammar. AR 11/4/2001 - --- we first found the binary search tree sorted by word forms more efficient --- than a trie, at least for grammars with 7000 word forms --- (18\/11\/2003) but this may change since we have to use a trie --- for decompositions and also want to use it in the parser - -type Morpho = Trie Char String - -emptyMorpho :: Morpho -emptyMorpho = emptyTrie - -appMorpho :: Morpho -> String -> (String,[String]) -appMorpho = appMorphoOnly ----- add lookup for literals - --- without literals -appMorphoOnly :: Morpho -> String -> (String,[String]) -appMorphoOnly m s = trieLookup m s - --- recognize word, exluding literals -isKnownWord :: Morpho -> String -> Bool -isKnownWord mo = not . null . snd . appMorphoOnly mo - -mkMorpho :: CanonGrammar -> Ident -> Morpho -mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where - - comp = ccompute gr [] -- to undo 'values' optimization - - mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun - mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun - - -- gather forms of lexical items - allLins fun@(m,f) = errVal [] $ do - ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues - ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts - return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] - prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)]) - - -- gather syncategorematic words - allSyns fun@(m,f) = errVal [] $ do - tss <- allLinsOfFun gr (CIQ a f) - let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] - return $ concat $ map wordsInTerm ss - prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f]) - - -- all words, Left from lexical rules and Right syncategorematic - allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where - allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] - lexRole t = case M.typeForm t of - Ok ([],_,_) -> Left - _ -> Right - --- printing full-form lexicon and results - -prMorpho :: Morpho -> String -prMorpho = unlines . map prMorphoAnalysis . collapse - -prMorphoAnalysis :: (String,[String]) -> String -prMorphoAnalysis (w,fs0) = - let fs = filter (not . null) fs0 in - if null fs then w ++++ "*" else unlines (w:fs) - -prMorphoAnalysisShort :: (String,[String]) -> String -prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where - w' = if null fs then w +++ "*" else "" - -tagPrt :: Print a => (a,a) -> String -tagPrt (m,c) = "+" ++ prt c --- module name - --- | print all words recognized -allMorphoWords :: Morpho -> [String] -allMorphoWords = map fst . collapse - --- analyse running text and show results either in short form or on separate lines - --- | analyse running text and show just the word, with "*" if not found -morphoTextStatus :: Morpho -> String -> String -morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where - prMark (w,fs) = if null fs then "*" +++ w else w - --- | analyse running text and show results in short form, one word per line -morphoTextShort :: Morpho -> String -> String -morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words - --- | analyse running text and show results on separate lines -morphoText :: Morpho -> String -> String -morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words - --- format used in the Italian Verb Engine -prFullForm :: Morpho -> String -prFullForm = unlines . map prOne . collapse where - prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) - --- using Huet's unglueing method to find word boundaries ----- it would be much better to use a trie also for morphological analysis, ----- so this is for the sake of experiment ----- Moreover, we should specify the cases in which this happens - not all words - -decomposeWords :: Morpho -> String -> [String] -decomposeWords mo s = errVal (words s) $ decomposeSimple mo s |
