1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
----------------------------------------------------------------------
-- |
-- Module : CanonToCF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/03/29 11:17:56 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.12 $
--
-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003
-----------------------------------------------------------------------------
module CanonToCF (canon2cf) where
import GF.System.Tracing -- peb 8/6-04
import Operations
import Option
import Ident
import AbsGFC
import LookAbs (allBindCatsOf)
import GFC
import Values (isPredefCat,cPredefAbs)
import PrGrammar
import CMacros
import qualified Modules as M
import CF
import CFIdent
import Morphology
import Trie2
import List (nub,partition)
import Monad
-- | The main function: for a given cnc module 'm', build the CF grammar with all the
-- rules coming from modules that 'm' extends. The categories are qualified by
-- the abstract module name 'a' that 'm' is of.
canon2cf :: Options -> CanonGrammar -> Ident -> Err CF
canon2cf opts gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
let ms = M.allExtends gr c
a <- M.abstractOfConcrete gr c
let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
let mms = [(a, tree2list (M.jments m)) | m <- cncs]
rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms
let bindcats = map snd $ allBindCatsOf gr
let rules = filter (not . isCircularCF) rules0 ---- temporarily here
let grules = groupCFRules rules
let predef = mkCFPredef opts bindcats grules
return $ CF predef
cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule]
cnc2cfCond opts m gr =
liftM concat $
mapM lin2cf [(m,fun,cat,args,lin) | (fun, CncFun cat args lin _) <- gr]
type IFun = Ident
type ICat = CIdent
-- | all CF rules corresponding to a linearization rule
lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
rhss0 <- allLinValues lin -- :: [(Label, [([Patt],Term)])]
rhss1 <- mapM (mkCFItems m) (concat rhss0) -- :: [(Label, [[PreCFItem]])]
mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
-- | making sequences of CF items from every branch in a linearization
mkCFItems :: Ident -> (Label, [([Patt],Term)]) -> Err (Label, [[PreCFItem]])
mkCFItems m (lab,pts) = do
itemss <- mapM (term2CFItems m) (map snd pts)
return (lab, concat itemss) ---- combinations? (test!)
-- | making CF rules from sequences of CF items
mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> (Label, [[PreCFItem]]) -> Err [CFRule]
mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
where
mkOneRule its = do
let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
profile = mkProfile nonterms
cfcat = CFCat (redirectIdent m cat,lab)
cffun = CFFun (AC (CIQ m fun), profile)
cfits = map precf2cf its
return (cffun,(cfcat,cfits))
mkProfile nonterms = map mkOne args
where
mkOne (A c i) = mkOne (AB c 0 i)
mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
where
mkB x = [k | (k,(j, LV y,False)) <- nonterms, j == i, y == x]
-- | intermediate data structure of CFItems with information for profiles
data PreCFItem =
PTerm RegExp -- ^ like ordinary Terminal
| PNonterm CIdent Integer Label Bool -- ^ cat, position, part\/bind, whether arg
deriving Eq
precf2cf :: PreCFItem -> CFItem
precf2cf (PTerm r) = CFTerm r
precf2cf (PNonterm cm _ (L c) True) = CFNonterm (ident2CFCat cm c)
precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
-- | the main job in translating linearization rules into sequences of cf items
term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
term2CFItems m t = errIn "forming cf items" $ case t of
S c _ -> t2c c
T _ cc -> do
its <- mapM t2c [t | Cas _ t <- cc]
tryMkCFTerm (concat its)
V _ cc -> do
its <- mapM t2c [t | t <- cc]
tryMkCFTerm (concat its)
C t1 t2 -> do
its1 <- t2c t1
its2 <- t2c t2
return [x ++ y | x <- its1, y <- its2]
FV ts -> do
its <- mapM t2c ts
tryMkCFTerm (concat its)
P arg s -> extrR arg s
K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]
E -> return [[]]
K (KP d vs) -> do
let its = [PTerm (RegAlts [s]) | s <- d]
let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
tryMkCFTerm (its : itss)
_ -> prtBad "no cf for" t ----
where
t2c = term2CFItems m
-- optimize the number of rules by a factorization
tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]
tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
case mapM mkOne (counterparts ii) of
Ok tt -> return [tt]
_ -> return ii
where
mkOne cfits = case mapM mkOneTerm cfits of
Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
_ -> mkOneNonTerm cfits
mkOneTerm (PTerm (RegAlts t)) = return t
mkOneTerm _ = Bad ""
mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) =
if all (== n) cc
then return n
else Bad ""
mkOneNonTerm _ = Bad ""
counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
tryMkCFTerm itss = return itss
extrR arg lab = case (arg,lab) of
(Arg (A cat pos), l@(L _)) -> return [[PNonterm (cIQ cat) pos l True]]
(Arg (A cat pos), l@(LV _)) -> return [[PNonterm (cIQ cat) pos l False]]
(Arg (AB cat b pos), l@(L _)) -> return [[PNonterm (cIQ cat) pos l True]]
(Arg (AB cat b pos), l@(LV _)) -> return [[PNonterm (cIQ cat) pos l False]]
---- ??
_ -> prtBad "cannot extract record field from" arg
cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c
mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where
(ruls,preds) = if oElem lexerByNeed opts -- option -cflexer
then predefLexer rules
else (rules,emptyTrie)
preds0 s =
[(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
[(cat, varCFFun x) | TV x <- [s], cat <- catVarCF : bindcats] ++
[(cfCatString, stringCFFun t) | TL t <- [s]] ++
[(cfCatInt, intCFFun t) | TI t <- [s]]
cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its]
bindcats = [c | c <- cats, elem (cfCat2Ident c) binds]
look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens
--- TODO: integrate with morphology
--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
predefLexer groups = (reverse ruls, tcompile preds) where
(ruls,preds) = foldr mkOne ([],[]) groups
mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
(rule,pre) = case partition isLexical rules of
([],_) -> (group,[])
(ls,rest) -> ((cat,rest), concatMap mkLexRule ls)
isLexical (f,(c,its)) = case its of
[CFTerm (RegAlts ws)] -> True
_ -> False
mkLexRule r = case r of
(fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws]
_ -> []
|