1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
----------------------------------------------------------------------
-- |
-- Module : Morphology
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:49 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- Morphological analyser constructed from a GF grammar.
--
-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie
-- for decompositions and also want to use it in the parser
-----------------------------------------------------------------------------
module GF.UseGrammar.Morphology where
import GF.Canon.AbsGFC
import GF.Canon.GFC
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import GF.Canon.Look
import GF.Grammar.LookAbs
import GF.Infra.Ident
import qualified GF.Grammar.Macros as M
import GF.UseGrammar.Linear
import GF.Data.Operations
import GF.Data.Glue
import Data.Char
import Data.List (sortBy, intersperse)
import Control.Monad (liftM)
import GF.Data.Trie2
-- construct a morphological analyser from a GF grammar. AR 11/4/2001
-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie
-- for decompositions and also want to use it in the parser
type Morpho = Trie Char String
emptyMorpho :: Morpho
emptyMorpho = emptyTrie
appMorpho :: Morpho -> String -> (String,[String])
appMorpho = appMorphoOnly
---- add lookup for literals
-- without literals
appMorphoOnly :: Morpho -> String -> (String,[String])
appMorphoOnly m s = trieLookup m s
-- recognize word, exluding literals
isKnownWord :: Morpho -> String -> Bool
isKnownWord mo = not . null . snd . appMorphoOnly mo
mkMorpho :: CanonGrammar -> Ident -> Morpho
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
comp = ccompute gr [] -- to undo 'values' optimization
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
-- gather forms of lexical items
allLins fun@(m,f) = errVal [] $ do
ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues
ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
-- gather syncategorematic words
allSyns fun@(m,f) = errVal [] $ do
tss <- allLinsOfFun gr (CIQ a f)
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
return $ concat $ map wordsInTerm ss
prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])
-- all words, Left from lexical rules and Right syncategorematic
allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
lexRole t = case M.typeForm t of
Ok ([],_,_) -> Left
_ -> Right
-- printing full-form lexicon and results
prMorpho :: Morpho -> String
prMorpho = unlines . map prMorphoAnalysis . collapse
prMorphoAnalysis :: (String,[String]) -> String
prMorphoAnalysis (w,fs0) =
let fs = filter (not . null) fs0 in
if null fs then w ++++ "*" else unlines (w:fs)
prMorphoAnalysisShort :: (String,[String]) -> String
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
w' = if null fs then w +++ "*" else ""
tagPrt :: Print a => (a,a) -> String
tagPrt (m,c) = "+" ++ prt c --- module name
-- | print all words recognized
allMorphoWords :: Morpho -> [String]
allMorphoWords = map fst . collapse
-- analyse running text and show results either in short form or on separate lines
-- | analyse running text and show just the word, with "*" if not found
morphoTextStatus :: Morpho -> String -> String
morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where
prMark (w,fs) = if null fs then "*" +++ w else w
-- | analyse running text and show results in short form, one word per line
morphoTextShort :: Morpho -> String -> String
morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words
-- | analyse running text and show results on separate lines
morphoText :: Morpho -> String -> String
morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words
-- format used in the Italian Verb Engine
prFullForm :: Morpho -> String
prFullForm = unlines . map prOne . collapse where
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
-- using Huet's unglueing method to find word boundaries
---- it would be much better to use a trie also for morphological analysis,
---- so this is for the sake of experiment
---- Moreover, we should specify the cases in which this happens - not all words
decomposeWords :: Morpho -> String -> [String]
decomposeWords mo s = errVal (words s) $ decomposeSimple mo s
|