summaryrefslogtreecommitdiff
path: root/src/GF/Conversion/SimpleToFCFG.hs
blob: e0e639800a144d356d3d03f516e16aca3b5d93bc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
----------------------------------------------------------------------
-- |
-- Maintainer  : Krasimir Angelov
-- Stability   : (stable)
-- Portability : (portable)
--
-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
--
-- the resulting grammars might be /very large/
--
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------


module GF.Conversion.SimpleToFCFG
    (convertGrammar) where

import GF.System.Tracing
import GF.Infra.Print
import GF.Infra.Ident

import Control.Monad

import GF.Formalism.Utilities
import GF.Formalism.FCFG
import GF.Conversion.Types
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.DataGFCC

import GF.Data.BacktrackM
import GF.Data.SortedList
import GF.Data.Utilities (updateNthM)

import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
import Data.Array
import Data.Maybe

----------------------------------------------------------------------
-- main conversion function

convertGrammar :: Grammar -> [(Ident,FGrammar)]
convertGrammar g@(Grm hdr (Abs abs_defs) cncs) = [(i2i cncname,convert abs_defs conc) | cncname <- cncnames gfcc, conc <- Map.lookup cncname (concretes gfcc)]
  where
    gfcc = mkGFCC g

    i2i (CId i) = IC i

    convert :: [AbsDef] -> TermMap -> FGrammar
    convert abs_defs cnc_defs = getFRules (loop frulesEnv)
      where
        srules = [(XRule id args res (map findLinType args) (findLinType res) term) | Fun id (Typ args res) exp <- abs_defs, term <- Map.lookup id cnc_defs]
        
        findLinType (CId id) = fromJust (Map.lookup (CId ("__"++id)) cnc_defs)

        (srulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules
           where
	     helper (srulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = 
	       let srulesMap' = Map.insertWith (++) abs_res [rule] srulesMap
	           frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
	                                    frulesEnv
	                                    (mkSingletonSelectors cnc_res)
               in srulesMap' `seq` frulesEnv' `seq` (srulesMap',frulesEnv')

        loop frulesEnv =
          let (todo, frulesEnv') = takeToDoRules srulesMap frulesEnv
          in case todo of
               [] -> frulesEnv'
               _  -> loop $! List.foldl' (\env (srules,selector) -> 
                             List.foldl' (\env srule             -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo

convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv
convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
  foldBM addRule
         frulesEnv
         (convertTerm cnc_defs selector term [([],[])])
         (initialFCat cat, map (\scat -> (initialFCat scat,[])) args, ctype, ctypes)
  where
    addRule linRec (newCat', newArgs', _, _) env0 =
      let (env1, newCat)          = genFCatHead env0 newCat'
          (env2, newArgs,idxArgs) = foldr (\((fcat@(FCat _ cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) ->
                                       let xargs          = fcat:[FCat 0 cat [path] tcs | path <- reverse xpaths]
                                           (env1, xargs1) = List.mapAccumL (genFCatArg ctype) env xargs
                                       in case fcat of
                                            FCat _ _ [] _ -> (env ,        args,             all_args)
                                            _             -> (env1,xargs1++args,(idx,xargs1):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])

          newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat of {FCat _ _ rcs _ -> rcs}]

          (_,newProfile) = List.mapAccumL accumProf 0 newArgs'
            where
              accumProf nr (FCat _ _ [] _,_     ) = (nr,       Unify []          )
              accumProf nr (_            ,xpaths) = (nr+cnt+1, Unify [nr..nr+cnt])
                where cnt = length xpaths

          rule = FRule (Name fun newProfile) newArgs newCat newLinRec
      in addFCatRule env2 rule

translateLin idxArgs lbl' []                    = array (0,-1) []
translateLin idxArgs lbl' ((lbl,syms) : lins)
  | lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
  | otherwise   = translateLin idxArgs lbl' lins
  where
    instSym = symbol (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
    instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
      | nr == idx = let arg@(FCat _ _ rcs _) = xargs !! xnr
                    in FSymCat arg (index lbl rcs 0) (nr'+xnr)
      | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs

    index lbl' (lbl:lbls) idx
      | lbl' == lbl = idx
      | otherwise   = index lbl' lbls $! (idx+1)


----------------------------------------------------------------------
-- term conversion

type CnvMonad a = BacktrackM Env a

type Env     = (FCat, [(FCat,[FPath])], Term, [Term])
type LinRec  = [(FPath, [Symbol (FPath, FIndex, Int) Token])]

type TermMap = Map.Map CId Term

convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec
convertTerm cnc_defs selector (V nr)       ((lbl_path,lin) : lins) = convertArg selector nr []    lbl_path lin lins
convertTerm cnc_defs selector (C nr)       ((lbl_path,lin) : lins) = convertCon selector nr       lbl_path lin lins
convertTerm cnc_defs selector (R record)   ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins
convertTerm cnc_defs selector (P term sel)                   lins  = do nr <- evalTerm cnc_defs [] sel
                                                                        convertTerm cnc_defs (TuplePrj nr selector) term lins
convertTerm cnc_defs selector (FV vars)                      lins  = do term <- member vars
                                                                        convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (S ts)       ((lbl_path,lin) : lins) = do projectHead lbl_path
                                                                        foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs selector (K (KS str))     ((lbl_path,lin) : lins) = 
  do projectHead lbl_path
     return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (K (KP (str:_)_))((lbl_path,lin) : lins) = 
  do projectHead lbl_path
     return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (RP _ term)                    lins  = convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (F id)                         lins  = do term <- Map.lookup id cnc_defs
                                                                        convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (W s t)     ((lbl_path,lin) : lins) = do
  ss <- case t of
    R ss -> return ss
    F f -> do
      t <- Map.lookup f cnc_defs 
      case t of
        R ss -> return ss
  convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
convertTerm cnc_defs selector x lins  = error ("convertTerm ("++show x++")")


convertArg (TupleSel record)  nr path lbl_path lin lins =
  foldM (\lins (lbl,  selector) -> convertArg selector nr (lbl:path)  (lbl:lbl_path) lin lins) lins record
convertArg (TuplePrj lbl selector) nr path lbl_path lin lins = 
  convertArg selector nr (lbl:path) lbl_path lin lins
convertArg (ConSel indices) nr path lbl_path lin lins = do
  index <- member indices
  restrictHead lbl_path index
  restrictArg nr path index
  return lins
convertArg StrSel nr path lbl_path lin lins = do
  projectHead lbl_path
  xnr <- projectArg nr path
  return ((lbl_path, Cat (path, nr, xnr) : lin) : lins)

convertCon (ConSel indices) index lbl_path lin lins = do
  guard (index `elem` indices)
  restrictHead lbl_path index
  return lins

convertRec cnc_defs selector                   index []           lbl_path lin lins = return lins
convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields
  where
    select []                          = convertRec cnc_defs selector (index+1) record lbl_path lin lins
    select ((index',sub_sel) : fields)
      | index == index'                = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins)
                                            convertRec cnc_defs selector (index+1) record lbl_path lin lins
      | otherwise                      = select fields
convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do
  convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins)


------------------------------------------------------------
-- eval a term to ground terms

evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
evalTerm cnc_defs path (V nr)       = do term <- readArgCType nr
                                         unifyPType nr (reverse path) (selectTerm path term)
evalTerm cnc_defs path (C nr)       = return nr
evalTerm cnc_defs path (R record)   = case path of
                                        (index:path) -> evalTerm cnc_defs path (record !! index)
evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
                                         evalTerm cnc_defs (index:path) term
evalTerm cnc_defs path (FV terms)   = member terms >>= evalTerm cnc_defs path
evalTerm cnc_defs path (RP alias _) = evalTerm cnc_defs path alias
evalTerm cnc_defs path (F id)       = do term <- Map.lookup id cnc_defs
                                         evalTerm cnc_defs path term
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")

unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
unifyPType nr path (C max_index) =
    do (_, args, _, _) <- readState
       let (FCat _ _ _ tcs,_) = args !! nr
       case lookup path tcs of
         Just index -> return index
         Nothing    -> do index <- member [0..max_index-1]
		          restrictArg nr path index
		          return index
unifyPType nr path (RP alias _) = unifyPType nr path alias

selectTerm :: FPath -> Term -> Term
selectTerm []           term        = term
selectTerm (index:path) (R  record) = selectTerm path (record !! index)
selectTerm path         (RP _ term) = selectTerm path term

----------------------------------------------------------------------
-- FRulesEnv

data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]

type XRulesMap = Map.Map CId [XRule]
data XRule     = XRule CId     {- function -}
                       [CId]   {- argument types -}
                       CId     {- result   type  -}
                       [Term]  {- argument lin-types representation -}
                       Term    {- result   lin-type  representation -}
                       Term    {- body -}
type FCatSet   = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))


emptyFRulesEnv = FRulesEnv 0 (ins fcatString (ins fcatInt (ins fcatFloat Map.empty))) []
  where
    ins fcat@(FCat _ cat rcs tcs) fcatSet =
      Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs x_fcat) rcs tmap_s) cat rmap_s fcatSet
      where
        x_fcat = Right fcat
        tmap_s = Map.singleton tcs x_fcat
        rmap_s = Map.singleton rcs tmap_s

genFCatHead :: FRulesEnv -> FCat -> (FRulesEnv, FCat)
genFCatHead env@(FRulesEnv last_id fcatSet rules) m1@(FCat _ cat rcs tcs) =
  case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
    Just (Left  fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
    Just (Right fcat) -> (env, fcat)
    Nothing           -> let next_id = last_id+1
                             fcat    = FCat next_id cat rcs tcs
                         in (FRulesEnv next_id (ins fcat) rules, fcat)
  where
    ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs x_fcat) rcs tmap_s) cat rmap_s fcatSet
      where
        x_fcat = Right fcat
        tmap_s = Map.singleton tcs x_fcat
        rmap_s = Map.singleton rcs tmap_s

genFCatArg :: Term -> FRulesEnv -> FCat -> (FRulesEnv, FCat)
genFCatArg ctype env@(FRulesEnv last_id fcatSet rules) m1@(FCat _ cat rcs tcs) =
  case Map.lookup cat fcatSet >>= Map.lookup rcs of
    Just tmap -> case Map.lookup tcs tmap of
                   Just (Left  fcat) -> (env,   fcat)
                   Just (Right fcat) -> (env,   fcat)
                   Nothing           -> ins tmap
    Nothing   -> ins Map.empty
  where
    ins tmap =
      let next_id = last_id+1
          fcat    = FCat next_id cat rcs tcs
          (x_fcat,last_id1,tmap1,rules1)
                  = foldBM (\tcs st (x_fcat,last_id,tmap,rules) ->
                                   let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
                                       rule = FRule (Name (CId "_") [Unify [0]]) [fcat_arg] fcat
                                                    (listArray (0,length rcs-1) [listArray (0,0) [FSymCat fcat_arg lbl 0] | lbl <- [0..length rcs-1]])
                                   in if st
                                        then (Right fcat,last_id1,tmap1,rule:rules)
                                        else (x_fcat,    last_id,  tmap,     rules))
                           (Left fcat,next_id,Map.insert tcs x_fcat tmap,rules)
                           (gen_tcs ctype [] [])
                           False
          rmap1 = Map.singleton rcs tmap1
      in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
      where
        addArg tcs last_id tmap =
	  case Map.lookup tcs tmap of
	    Just (Left  fcat) -> (last_id, tmap, fcat)
	    Just (Right fcat) -> (last_id, tmap, fcat)
	    Nothing           -> let next_id = last_id+1
	                             fcat    = FCat next_id cat rcs tcs
                                 in (next_id, Map.insert tcs (Left fcat) tmap, fcat)

    gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
    gen_tcs (R record)    path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
    gen_tcs (S _)         path acc = return acc
    gen_tcs (RP _ term)   path acc = gen_tcs term path acc
    gen_tcs (C max_index) path acc =
      case List.lookup path tcs of
        Just index -> return $! addConstraint path index acc
        Nothing    -> do writeState True
                         index <- member [0..max_index-1]
                         return $! addConstraint path index acc
      where
        addConstraint path0 index0 (c@(path,index) : cs)
	  | path0 > path = c:addConstraint path0 index0 cs
        addConstraint path0 index0 cs  = (path0,index0) : cs

takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv)
takeToDoRules srulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
  where
    (todo,fcatSet') =
          Map.mapAccumWithKey (\todo cat rmap ->
                let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> 
                                          let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs x_fcat ->
                                                                   case x_fcat of
                                                                     Left  fcat -> (tcs:tcss,Right fcat)
                                                                     Right fcat -> (    tcss,    x_fcat)) [] tmap
                                          in case tcss of
                                               [] -> (                               todo,tmap )
                                               _  -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
                    mb_srules   = Map.lookup cat srulesMap
                    Just srules = mb_srules

                in case mb_srules of
                     Just srules -> (todo1,rmap1)
                     Nothing     -> (todo ,rmap1)) [] fcatSet

addFCatRule :: FRulesEnv -> FRule -> FRulesEnv
addFCatRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)

getFRules :: FRulesEnv -> [FRule]
getFRules (FRulesEnv last_id fcatSet rules) = rules


------------------------------------------------------------
-- The TermSelector

data TermSelector
  = TupleSel [(FIndex, TermSelector)]
  | TuplePrj   FIndex  TermSelector
  | ConSel   [FIndex]
  | StrSel
  deriving Show

mkSingletonSelectors :: Term               -- ^ Type representation term
                     -> [TermSelector]     -- ^ list of selectors containing just one string field
mkSingletonSelectors term = sels0
  where
    (sels0,tcss0) = loop [] ([],[]) term

    loop path st          (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record)
    loop path st          (RP _ t)   = loop path st t
    loop path (sels,tcss) (C i)      = (                          sels,map ((,) path) [0..i-1] : tcss)
    loop path (sels,tcss) (S _)      = (mkSelector [path] tcss0 : sels,                          tcss)

mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector
mkSelector rcs tcss =
  List.foldl' addRestriction (case xs of
                                (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys
  where
    xs = [ reverse path       |               path       <- rcs]
    ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs]
    
    addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector
    addRestriction (ConSel indices)  ([]          ,n_index) = ConSel (add indices)
      where
        add []                      = [n_index]
        add (index':indices)
          | n_index == index'       = index':    indices
          | otherwise               = index':add indices
    addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields)
      where
        add []                      = [(index,path2selector (ConSel [n_index]) path)]
        add (field@(index',sub_sel):fields)
          | index == index'         = (index',addRestriction sub_sel (path,n_index)):fields
          | otherwise               = field : add fields

    addProjection :: TermSelector -> FPath -> TermSelector
    addProjection StrSel            []             = StrSel
    addProjection (TupleSel fields) (index : path) = TupleSel (add fields)
      where
        add []                      = [(index,path2selector StrSel path)]
        add (field@(index',sub_sel):fields)
          | index == index'         = (index',addProjection sub_sel path):fields
          | otherwise               = field : add fields
    
    path2selector base []             = base
    path2selector base (index : path) = TupleSel [(index,path2selector base path)]

------------------------------------------------------------
-- updating the MCF rule

readArgCType :: FIndex -> CnvMonad Term
readArgCType nr = do (_, _, _, ctypes) <- readState
		     return (ctypes !! nr)

restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
restrictArg nr path index = do
  (head, args, ctype, ctypes) <- readState 
  args' <- updateNthM (\(fcat,xs) -> do fcat <- restrictFCat path index fcat
                                        return (fcat,xs)                    ) nr args
  writeState (head, args', ctype, ctypes)

projectArg :: FIndex -> FPath -> CnvMonad Int
projectArg nr path = do
  (head, args, ctype, ctypes) <- readState
  (xnr,args') <- updateArgs nr args
  writeState (head, args', ctype, ctypes)
  return xnr
  where
    updateArgs :: FIndex -> [(FCat,[FPath])] -> CnvMonad (Int,[(FCat,[FPath])])
    updateArgs 0 ((a@(FCat _ _ rcs _),xpaths) : as)
      | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as)
      | otherwise       = do a <- projectFCat path a
                             return (0,(a,xpaths):as)
    updateArgs n (a : as) = do
      (xnr,as) <- updateArgs (n-1) as
      return (xnr,a:as)

readHeadCType :: CnvMonad Term
readHeadCType = do (_, _, ctype, _) <- readState
		   return ctype

restrictHead :: FPath -> FIndex -> CnvMonad ()
restrictHead path term
    = do (head, args, ctype, ctypes) <- readState
	 head' <- restrictFCat path term head
	 writeState (head', args, ctype, ctypes)

projectHead :: FPath -> CnvMonad ()
projectHead path
    = do (head, args, ctype, ctypes) <- readState
	 head' <- projectFCat path head
	 writeState (head', args, ctype, ctypes)

restrictFCat :: FPath -> FIndex -> FCat -> CnvMonad FCat
restrictFCat path0 index0 (FCat id cat rcs tcs) = do
  tcs <- addConstraint tcs
  return (FCat id cat rcs tcs)
  where
    addConstraint (c@(path,index) : cs)
        | path0 >  path = liftM (c:) (addConstraint cs)
        | path0 == path = guard (index0 == index) >>
                          return (c : cs)
    addConstraint cs    = return ((path0,index0) : cs)

projectFCat :: FPath -> FCat -> CnvMonad FCat
projectFCat path0 (FCat id cat rcs tcs) = do
  return (FCat id cat (addConstraint rcs) tcs)
  where
    addConstraint (path : rcs)
        | path0 >  path = path  : addConstraint rcs
        | path0 == path = path  : rcs
    addConstraint rcs   = path0 : rcs