summaryrefslogtreecommitdiff
path: root/src/GF/CF/CanonToCF.hs
blob: a343a2473e25c7e8104ff3208f201833cbe3a031 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
----------------------------------------------------------------------
-- |
-- Module      : (Module)
-- Maintainer  : (Maintainer)
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date $ 
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-----------------------------------------------------------------------------

module CanonToCF where

import Tracing -- peb 8/6-04

import Operations
import Option
import Ident
import AbsGFC
import LookAbs (allBindCatsOf)
import GFC
import Values (isPredefCat,cPredefAbs)
import PrGrammar
import CMacros
import qualified Modules as M
import CF
import CFIdent
import Morphology
import Trie2
import List (nub,partition)
import Monad

-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003 

-- The main function: for a given cnc module m, build the CF grammar with all the
-- rules coming from modules that m extends. The categories are qualified by
-- the abstract module name a that m is of.

canon2cf :: Options -> CanonGrammar -> Ident -> Err CF
canon2cf opts gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
  let ms = M.allExtends gr c
  a <- M.abstractOfConcrete gr c
  let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
  let mms  = [(a, tree2list (M.jments m)) | m <- cncs]
  rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms
  let bindcats = map snd $ allBindCatsOf gr 
  let rules = filter (not . isCircularCF) rules0 ---- temporarily here
  let grules = groupCFRules rules
  let predef = mkCFPredef opts bindcats grules
  return $ CF predef

cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule]
cnc2cfCond opts m gr = 
  liftM concat $ 
  mapM lin2cf [(m,fun,cat,args,lin) | (fun, CncFun cat args lin _) <- gr] 

type IFun = Ident
type ICat = CIdent

-- all CF rules corresponding to a linearization rule
lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
  rhss0  <- allLinValues lin                   -- :: [(Label, [([Patt],Term)])]
  rhss1  <- mapM (mkCFItems m) (concat rhss0)  -- :: [(Label, [[PreCFItem]])]
  mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat

-- making sequences of CF items from every branch in a linearization
mkCFItems :: Ident -> (Label, [([Patt],Term)]) -> Err (Label, [[PreCFItem]])
mkCFItems m (lab,pts) = do
  itemss <- mapM (term2CFItems m) (map snd pts)
  return (lab, concat itemss) ---- combinations? (test!)

-- making CF rules from sequences of CF items
mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> (Label, [[PreCFItem]]) -> Err [CFRule]
mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
 where
  mkOneRule its = do
    let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
        profile  = mkProfile nonterms
	cfcat    = CFCat (redirectIdent m cat,lab)
        cffun    = CFFun (AC (CIQ m fun), profile)
        cfits    = map precf2cf its
    return (cffun,(cfcat,cfits))
  mkProfile nonterms = map mkOne args 
    where
      mkOne (A  c i) = mkOne (AB c 0 i)
      mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
        where
          mkB x = [k | (k,(j, LV y,False)) <- nonterms, j == i, y == x]

-- intermediate data structure of CFItems with information for profiles
data PreCFItem = 
    PTerm RegExp                       -- like ordinary Terminal 
  | PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg
   deriving Eq                    

precf2cf :: PreCFItem -> CFItem
precf2cf (PTerm r) = CFTerm r
precf2cf (PNonterm cm _ (L c) True) = CFNonterm (ident2CFCat cm c)
precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF


-- the main job in translating linearization rules into sequences of cf items 
term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
term2CFItems m t = errIn "forming cf items" $ case t of
   S c _ -> t2c c

   T _ cc -> do
     its  <- mapM t2c [t | Cas _ t <- cc]
     tryMkCFTerm (concat its)
   V _ cc -> do
     its  <- mapM t2c [t | t <- cc]
     tryMkCFTerm (concat its)

   C t1 t2 -> do
     its1 <- t2c t1
     its2 <- t2c t2
     return [x ++ y | x <- its1, y <- its2]

   FV ts -> do
     its <- mapM t2c ts
     tryMkCFTerm (concat its)

   P arg s -> extrR arg s

   K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]

   E -> return [[]]

   K (KP d vs) -> do
     let its  =  [PTerm (RegAlts [s]) | s <- d]
     let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
     tryMkCFTerm (its : itss)

   _ -> prtBad "no cf for" t ----

  where 

    t2c = term2CFItems m

    -- optimize the number of rules by a factorization
    tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]  
    tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
      case mapM mkOne (counterparts ii) of
        Ok tt -> return [tt]
        _ -> return ii
       where
         mkOne cfits = case mapM mkOneTerm cfits of
           Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
           _ -> mkOneNonTerm cfits
         mkOneTerm (PTerm (RegAlts t)) = return t
         mkOneTerm _ = Bad ""
         mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) = 
           if all (== n) cc 
	      then return n
	      else Bad ""
         mkOneNonTerm _ = Bad ""
         counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
    tryMkCFTerm itss = return itss

    extrR arg lab = case (arg,lab) of
      (Arg (A  cat pos),   l@(L _))  -> return [[PNonterm (cIQ cat) pos l True]]
      (Arg (A  cat pos),   l@(LV _)) -> return [[PNonterm (cIQ cat) pos l False]]
      (Arg (AB cat b pos), l@(L _))  -> return [[PNonterm (cIQ cat) pos l True]]
      (Arg (AB cat b pos), l@(LV _)) -> return [[PNonterm (cIQ cat) pos l False]]
                                     ---- ??
      _   -> prtBad "cannot extract record field from" arg
    cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c

mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where
  (ruls,preds) = if oElem lexerByNeed opts  -- option -cflexer
                   then predefLexer rules 
                   else (rules,emptyTrie)
  preds0 s = 
    [(cat,         metaCFFun)     | TM _ _ <- [s], cat <- cats] ++
    [(cat,         varCFFun x)    | TV x   <- [s], cat <- catVarCF : bindcats] ++
    [(cfCatString, stringCFFun t) | TL t   <- [s]]              ++
    [(cfCatInt,    intCFFun t)    | TI t   <- [s]]
  cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its]
  bindcats = [c | c <- cats, elem (cfCat2Ident c) binds]
  look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens

--- TODO: integrate with morphology
--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
predefLexer groups = (reverse ruls, tcompile preds) where
  (ruls,preds) = foldr mkOne ([],[]) groups
  mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
    (rule,pre) = case partition isLexical rules of
      ([],_) -> (group,[])
      (ls,rest) -> ((cat,rest), concatMap mkLexRule ls)
  isLexical (f,(c,its)) = case its of
    [CFTerm (RegAlts ws)] -> True
    _ -> False
  mkLexRule r = case r of
    (fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws]
    _ -> []