summaryrefslogtreecommitdiff
path: root/src/GF/CF/CanonToCF.hs
blob: 80ce2e79d9339e728f7586b0131416bdea0859f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
----------------------------------------------------------------------
-- |
-- Module      : CanonToCF
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:41 $ 
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.15 $
--
-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003 
-----------------------------------------------------------------------------

module GF.CF.CanonToCF (canon2cf) where

import GF.System.Tracing -- peb 8/6-04

import GF.Data.Operations
import GF.Infra.Option
import GF.Infra.Ident
import GF.Canon.AbsGFC
import GF.Grammar.LookAbs (allBindCatsOf)
import GF.Canon.GFC
import GF.Grammar.Values (isPredefCat,cPredefAbs)
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import qualified GF.Infra.Modules as M
import GF.CF.CF
import GF.CF.CFIdent
import GF.UseGrammar.Morphology
import GF.Data.Trie2
import Data.List (nub,partition)
import Control.Monad

-- | The main function: for a given cnc module 'm', build the CF grammar with all the
-- rules coming from modules that 'm' extends. The categories are qualified by
-- the abstract module name 'a' that 'm' is of.
-- The ign argument tells what rules not to generate a parser for.
canon2cf :: Options -> (Ident -> Bool) -> CanonGrammar -> Ident -> Err CF
canon2cf opts ign gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
  let ms = M.allExtends gr c
  a <- M.abstractOfConcrete gr c
  let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
  let mms  = [(a, tree2list (M.jments m)) | m <- cncs]
  cnc <- liftM M.jments $ M.lookupModMod gr c
  rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts ign cnc)) mms
  let bindcats = map snd $ allBindCatsOf gr 
  let rules = filter (not . isCircularCF) rules0 ---- temporarily here
  let grules = groupCFRules rules
  let predef = mkCFPredef opts bindcats grules
  return $ CF predef

cnc2cfCond :: Options -> (Ident -> Bool) -> BinTree Ident Info -> 
              Ident -> [(Ident,Info)] -> Err [CFRule]
cnc2cfCond opts ign cnc m gr = 
  liftM concat $ 
  mapM lin2cf [(m,fun,cat,args,lin) | 
                 (fun, CncFun cat args lin _) <- gr, notign fun, is fun] 
 where
   is f = isInBinTree f cnc
   notign = not . ign

type IFun = Ident
type ICat = CIdent

-- | all CF rules corresponding to a linearization rule
lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
  let rhss0 = allLinBranches lin          -- :: [([Label], Term)]
  rhss1  <- mapM (mkCFItems m) rhss0    -- :: [([Label], [[PreCFItem]])]
  mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat

-- | making sequences of CF items from every branch in a linearization
mkCFItems :: Ident -> ([Label], Term) -> Err ([Label], [[PreCFItem]])
mkCFItems m (labs,t) = do
  items <- term2CFItems m t
  return (labs, items)

-- | making CF rules from sequences of CF items
mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> ([Label], [[PreCFItem]]) -> Err [CFRule]
mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
 where
  mkOneRule its = do
    let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
        profile  = mkProfile nonterms
	cfcat    = labels2CFCat (redirectIdent m cat) lab
        cffun    = CFFun (AC (CIQ m fun), profile)
        cfits    = map precf2cf its
    return (cffun,(cfcat,cfits))
  mkProfile nonterms = map mkOne args 
    where
      mkOne (A  c i) = mkOne (AB c 0 i)
      mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
        where
          mkB x = [k | (k,(j, [LV y], False)) <- nonterms, j == i, y == x]

-- | intermediate data structure of CFItems with information for profiles
data PreCFItem = 
    PTerm RegExp                         -- ^ like ordinary Terminal 
  | PNonterm CIdent Integer [Label] Bool -- ^ cat, position, part\/bind, whether arg
   deriving Eq                    

precf2cf :: PreCFItem -> CFItem
precf2cf (PTerm r) = CFTerm r
precf2cf (PNonterm cm _ ls True) = CFNonterm (labels2CFCat cm ls)
precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF


-- | the main job in translating linearization rules into sequences of cf items 
term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
term2CFItems m t = errIn "forming cf items" $ case t of
   S c _ -> t2c c

   T _ cc -> do
     its  <- mapM t2c [t | Cas _ t <- cc]
     tryMkCFTerm (concat its)
   V _ cc -> do
     its  <- mapM t2c [t | t <- cc]
     tryMkCFTerm (concat its)

   C t1 t2 -> do
     its1 <- t2c t1
     its2 <- t2c t2
     return [x ++ y | x <- its1, y <- its2]

   FV ts -> do
     its <- mapM t2c ts
     tryMkCFTerm (concat its)

   P (S c _) _ -> t2c c --- w-around for bug in Compute? AR 31/1/2006

   P arg s -> extrR arg s

   K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]

   E -> return [[]]

   K (KP d vs) -> do
     let its  =  [PTerm (RegAlts [s]) | s <- d]
     let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
     tryMkCFTerm (its : itss)

   _ -> return [] ---- prtBad "no cf for" t ----

  where 

    t2c = term2CFItems m

    -- optimize the number of rules by a factorization
    tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]  
    tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
      case mapM mkOne (counterparts ii) of
        Ok tt -> return [tt]
        _ -> return ii
       where
         mkOne cfits = case mapM mkOneTerm cfits of
           Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
           _ -> mkOneNonTerm cfits
         mkOneTerm (PTerm (RegAlts t)) = return t
         mkOneTerm _ = Bad ""
         mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) = 
           if all (== n) cc 
	      then return n
	      else Bad ""
         mkOneNonTerm _ = Bad ""
         counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
    tryMkCFTerm itss = return itss

    extrR arg lab = case (arg0,labs) of
      (Arg (A  cat pos),   [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
      (Arg (AB cat b pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
      (Arg (A  cat pos),   _)  -> return [[PNonterm (cIQ cat) pos labs True]]
      (Arg (AB cat b pos), _)  -> return [[PNonterm (cIQ cat) pos labs True]]
                                     ---- ??
      _   -> prtBad "cannot extract record field from" arg
     where 
       (arg0,labs) = headProj arg [lab]

    headProj r ls = case r of
      P r0 l0 -> headProj r0 (l0:ls)
      S r0 _  -> headProj r0 ls
      _ -> (r,ls)
    cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c

mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where
  (ruls,preds) = if oElem lexerByNeed opts  -- option -cflexer
                   then predefLexer rules 
                   else (rules,emptyTrie)
  preds0 s = 
    [(cat,         metaCFFun)     | TM _ _ <- [s], cat <- cats] ++
    [(cat,         varCFFun x)    | TV x   <- [s], cat <- catVarCF : bindcats] ++
    [(cfCatString, stringCFFun t) | TL t   <- [s]]              ++
    [(cfCatInt,    intCFFun t)    | TI t   <- [s]] ++
    [(cfCatFloat,  floatCFFun t)  | TF t   <- [s]]
  cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its]
  bindcats = [c | c <- cats, elem (cfCat2Ident c) binds]
  look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens

--- TODO: integrate with morphology
--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
predefLexer groups = (reverse ruls, tcompile preds) where
  (ruls,preds) = foldr mkOne ([],[]) groups
  mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
    (rule,pre) = case partition isLexical rules of
      ([],_) -> (group,[])
      (ls,rest) -> ((cat,rest), concatMap mkLexRule ls)
  isLexical (f,(c,its)) = case its of
    [CFTerm (RegAlts ws)] -> True
    _ -> False
  mkLexRule r = case r of
    (fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws]
    _ -> []