summaryrefslogtreecommitdiff
path: root/src/GF/Compile
diff options
context:
space:
mode:
Diffstat (limited to 'src/GF/Compile')
-rw-r--r--src/GF/Compile/Export.hs1
-rw-r--r--src/GF/Compile/GFCCtoJS.hs41
-rw-r--r--src/GF/Compile/GenerateFCFG.hs168
-rw-r--r--src/GF/Compile/GeneratePMCFG.hs269
-rw-r--r--src/GF/Compile/GrammarToGFCC.hs5
5 files changed, 266 insertions, 218 deletions
diff --git a/src/GF/Compile/Export.hs b/src/GF/Compile/Export.hs
index f4e5b2884..8b924113d 100644
--- a/src/GF/Compile/Export.hs
+++ b/src/GF/Compile/Export.hs
@@ -40,7 +40,6 @@ exportPGF opts fmt pgf =
FmtProlog_Abs -> multi "pl" grammar2prolog_abs
FmtBNF -> single "bnf" bnfPrinter
FmtEBNF -> single "ebnf" (ebnfPrinter opts)
- FmtFCFG -> single "fcfg" fcfgPrinter
FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts)
FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts)
FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts)
diff --git a/src/GF/Compile/GFCCtoJS.hs b/src/GF/Compile/GFCCtoJS.hs
index 3fe8b1635..12c424844 100644
--- a/src/GF/Compile/GFCCtoJS.hs
+++ b/src/GF/Compile/GFCCtoJS.hs
@@ -11,11 +11,13 @@ import GF.Data.ErrM
import GF.Infra.Option
import Control.Monad (mplus)
-import Data.Array (Array)
-import qualified Data.Array as Array
+import Data.Array.Unboxed (UArray)
+import qualified Data.Array.IArray as Array
import Data.Maybe (fromMaybe)
import Data.Map (Map)
+import qualified Data.Set as Set
import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
pgf2js :: PGF -> String
pgf2js pgf =
@@ -89,31 +91,44 @@ children = JS.Ident "cs"
-- Parser
parser2js :: String -> ParserInfo -> [JS.Expr]
parser2js start p = [new "Parser" [JS.EStr start,
- JS.EArray $ map frule2js (Array.elems (allRules p)),
- JS.EObj $ map cats (Map.assocs (startupCats p))]]
+ JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set],
+ JS.EObj $ map cats (Map.assocs (startCats p))]]
where
cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is))
-frule2js :: FRule -> JS.Expr
-frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins]
+frule2js :: ParserInfo -> FCat -> Production -> JS.Expr
+frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins]
+ where
+ FFun f ps lins = functions p Array.! funid
+frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]]
+ where
+ catLinArity :: FCat -> Int
+ catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
+
+ topdownRules cat = f cat []
+ where
+ f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p))
+
+ g (FApply funid args) rules = (functions p Array.! funid,args) : rules
+ g (FCoerce cat) rules = f cat rules
+
name2js :: (CId,[Profile]) -> JS.Expr
-name2js (f,ps) | f == wildCId = fromProfile (head ps)
- | otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
+name2js (f,ps) = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)]
where
fromProfile :: Profile -> JS.Expr
fromProfile [] = new "MetaVar" []
fromProfile [x] = daughter x
fromProfile args = new "Unify" [JS.EArray (map daughter args)]
- daughter i = new "Arg" [JS.EInt i]
+daughter i = new "Arg" [JS.EInt i]
-lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr
-lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls]
+lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr
+lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls]
sym2js :: FSymbol -> JS.Expr
-sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l]
-sym2js (FSymTok t) = new "Terminal" [JS.EStr t]
+sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l]
+sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t]
new :: String -> [JS.Expr] -> JS.Expr
new f xs = JS.ENew (JS.Ident f) xs
diff --git a/src/GF/Compile/GenerateFCFG.hs b/src/GF/Compile/GenerateFCFG.hs
index c2854ef3d..de6c05ef1 100644
--- a/src/GF/Compile/GenerateFCFG.hs
+++ b/src/GF/Compile/GenerateFCFG.hs
@@ -25,17 +25,18 @@ import GF.Data.SortedList
import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.ByteString.Char8 as BS
-import Data.Array
+import Data.Array.IArray
import Data.Maybe
import Control.Monad
----------------------------------------------------------------------
-- main conversion function
-convertConcrete :: Abstr -> Concr -> FGrammar
+convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
@@ -91,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
-- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions.
-fixHoasFuns :: FGrammar -> FGrammar
-fixHoasFuns (rs, cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs)
+fixHoasFuns :: ParserInfo -> ParserInfo
+fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n
-convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar
-convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
+convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
+convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
where
srules = [
(XRule id args res (map findLinType args) (findLinType res) term) |
@@ -107,26 +108,26 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
- (xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules
+ (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
where
- helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
+ helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
- frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
- frulesEnv
+ grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
+ grammarEnv
(mkSingletonSelectors cnc_defs cnc_res)
- in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv')
+ in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
- loop frulesEnv =
- let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv
+ loop grammarEnv =
+ let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
in case todo of
- [] -> frulesEnv'
+ [] -> grammarEnv'
_ -> loop $! List.foldl' (\env (srules,selector) ->
- List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo
+ List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
-convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv
-convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
+convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
+convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
foldBM addRule
- frulesEnv
+ grammarEnv
(convertTerm cnc_defs selector term [([],[])])
(protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
where
@@ -137,9 +138,10 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
(env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
in case xcat of
PFCat _ [] _ -> (env , args, all_args)
- _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])
+ _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
+ (env1,[],[]) (zip3 newArgs' ctypes [0..])
- newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}]
+ (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
(_,newProfile) = List.mapAccumL accumProf 0 newArgs'
where
@@ -147,18 +149,19 @@ convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
where cnt = length xpaths
- rule = FRule fun newProfile newArgs newCat newLinRec
- in addFRule env2 rule
+ (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
-translateLin idxArgs lbl' [] = array (0,-1) []
-translateLin idxArgs lbl' ((lbl,syms) : lins)
- | lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
- | otherwise = translateLin idxArgs lbl' lins
+ in addProduction env4 newCat (FApply funid newArgs)
+
+translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
+translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
+ | lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
+ | otherwise = translateLin idxArgs lins grammarEnv lbl'
where
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
- in FSymCat (index lbl rcs 0) (nr'+xnr)
+ in FSymCat (nr'+xnr) (index lbl rcs 0)
| otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
index lbl' (lbl:lbls) idx
@@ -173,7 +176,7 @@ type CnvMonad a = BacktrackM Env a
type FPath = [FIndex]
type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
-type LinRec = [(FPath, [Either (FPath, FIndex, Int) FToken])]
+type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
type TermMap = Map.Map CId Term
@@ -190,11 +193,11 @@ convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectH
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
do projectHead lbl_path
- return ((lbl_path,Right str : lin) : lins)
+ return ((lbl_path,Right (KS str) : lin) : lins)
convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
do projectHead lbl_path
toks <- member (strs:[strs' | Alt strs' _ <- vars])
- return ((lbl_path, map Right toks ++ lin) : lins)
+ return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
@@ -273,75 +276,105 @@ selectTerm (index:path) (R record) = selectTerm path (record !! index)
----------------------------------------------------------------------
--- FRulesEnv
+-- GrammarEnv
+
-data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
+data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
+type FSeqSet = Map.Map FSeq SeqId
+type FFunSet = Map.Map FFun FunId
data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
protoFCat :: CId -> ProtoFCat
protoFCat cat = PFCat cat [] []
-emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [[0]] [] $
- ins fcatInt (mkCId "Int") [[0]] [] $
- ins fcatFloat (mkCId "Float") [[0]] [] $
- ins fcatVar (mkCId "_Var") [[0]] [] $
- Map.empty) []
+emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
where
- ins fcat cat rcs tcs fcatSet =
- Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
+ initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
+ ins fcatInt (mkCId "Int") [[0]] [] $
+ ins fcatFloat (mkCId "Float") [[0]] [] $
+ ins fcatVar (mkCId "_Var") [[0]] [] $
+ Map.empty)
+
+ ins fcat cat rcs tcs catSet =
+ Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where
right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s
-addFRule :: FRulesEnv -> FRule -> FRulesEnv
-addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
+addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
+addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
+ GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
-getFGrammar :: FRulesEnv -> FGrammar
-getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet)
+addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
+addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
+ case Map.lookup seq seqSet of
+ Just id -> (env,id)
+ Nothing -> let !last_seq = Map.size seqSet
+ in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
+ where
+ seq = mkArray lst
+
+addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
+addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
+ case Map.lookup fun funSet of
+ Just id -> (env,id)
+ Nothing -> let !last_funid = Map.size funSet
+ in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
+
+getParserInfo :: GrammarEnv -> ParserInfo
+getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
+ ParserInfo { functions = mkArray funSet
+ , sequences = mkArray seqSet
+ , productions = prodSet
+ , startCats = Map.map getFCatList catSet
+ }
where
+ mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
+
getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
-genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
-genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
- case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
- Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
+
+genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
+genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
+ case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
+ Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
Just (Right fcat) -> (env, fcat)
Nothing -> let fcat = last_id+1
- in (FRulesEnv fcat (ins fcat) rules, fcat)
+ in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
where
- ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
+ ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
where
right_fcat = Right fcat
tmap_s = Map.singleton tcs right_fcat
rmap_s = Map.singleton rcs tmap_s
-genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
-genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
- case Map.lookup cat fcatSet >>= Map.lookup rcs of
+genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
+genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
+ case Map.lookup cat catSet >>= Map.lookup rcs of
Just tmap -> case Map.lookup tcs tmap of
- Just (Left fcat) -> (env, fcat)
- Just (Right fcat) -> (env, fcat)
+ Just (Left fcat) -> (env, fcat)
+ Just (Right fcat) -> (env, fcat)
Nothing -> ins tmap
Nothing -> ins Map.empty
where
ins tmap =
let fcat = last_id+1
- (either_fcat,last_id1,tmap1,rules1)
- = foldBM (\tcs st (either_fcat,last_id,tmap,rules) ->
+ (either_fcat,last_id1,tmap1,prodSet1)
+ = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
- rule = FRule wildCId [[0]] [fcat_arg] fcat
- (listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
+ p = FCoerce fcat_arg
+ prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
in if st
- then (Right fcat, last_id1,tmap1,rule:rules)
- else (either_fcat,last_id, tmap, rules))
- (Left fcat,fcat,Map.insert tcs either_fcat tmap,rules)
+ then (Right fcat, last_id1,tmap1,prodSet1)
+ else (either_fcat,last_id, tmap ,prodSet ))
+ (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
(gen_tcs ctype [] [])
False
rmap1 = Map.singleton rcs tmap1
- in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
+ in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
where
addArg tcs last_id tmap =
case Map.lookup tcs tmap of
@@ -380,10 +413,11 @@ data XRule = XRule CId {- function -}
Term {- result lin-type representation -}
Term {- body -}
-takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv)
-takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
+takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
+takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
+ (todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
where
- (todo,fcatSet') =
+ (todo,catSet') =
Map.mapAccumWithKey (\todo cat rmap ->
let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
@@ -398,7 +432,7 @@ takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last
in case mb_srules of
Just srules -> (todo1,rmap1)
- Nothing -> (todo ,rmap1)) [] fcatSet
+ Nothing -> (todo ,rmap1)) [] catSet
------------------------------------------------------------
@@ -524,3 +558,5 @@ projectProtoFCat path0 (PFCat cat rcs tcs) = do
| path0 > path = path : addConstraint rcs
| path0 == path = path : rcs
addConstraint rcs = path0 : rcs
+
+mkArray lst = listArray (0,length lst-1) lst
diff --git a/src/GF/Compile/GeneratePMCFG.hs b/src/GF/Compile/GeneratePMCFG.hs
index e0343e8d6..b24a629a7 100644
--- a/src/GF/Compile/GeneratePMCFG.hs
+++ b/src/GF/Compile/GeneratePMCFG.hs
@@ -1,4 +1,4 @@
-{-# OPTIONS -fbang-patterns #-}
+{-# OPTIONS -fbang-patterns -cpp #-}
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
@@ -12,14 +12,12 @@
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
-
module GF.Compile.GeneratePMCFG
(convertConcrete) where
import PGF.CId
import PGF.Data
import PGF.Macros --hiding (prt)
-import PGF.Parsing.FCFG.Utilities
import GF.Data.BacktrackM
import GF.Data.SortedList
@@ -28,8 +26,9 @@ import GF.Data.Utilities (updateNthM, sortNub)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
+import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS
-import Data.Array
+import Data.Array.IArray
import Data.Maybe
import Control.Monad
import Debug.Trace
@@ -37,7 +36,7 @@ import Debug.Trace
----------------------------------------------------------------------
-- main conversion function
-convertConcrete :: Abstr -> Concr -> FGrammar
+convertConcrete :: Abstr -> Concr -> ParserInfo
convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
@@ -93,14 +92,14 @@ expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
-- replaces __NCat with _B and _Var_Cat with _.
-- the temporary names are just there to avoid name collisions.
-fixHoasFuns :: FGrammar -> FGrammar
-fixHoasFuns (!rs, !cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs)
+fixHoasFuns :: ParserInfo -> ParserInfo
+fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
| BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
fixName n = n
-convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar
-convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_defs) emptyFRulesEnv srules)
+convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
+convert abs_defs cnc_defs cat_defs = getParserInfo (List.foldl' (convertRule cnc_defs) (emptyFRulesEnv cnc_defs cat_defs) srules)
where
srules = [
(XRule id args res (map findLinType args) (findLinType res) term) |
@@ -109,23 +108,40 @@ convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_d
findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
-
-convertRule :: TermMap -> FRulesEnv -> XRule -> FRulesEnv
-convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
- foldBM addRule
- frulesEnv
- (convertTerm cnc_defs [] ctype term [([],[])])
- (protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)
+brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
+brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
+ case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
+ (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
+ where
+ optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
+ where
+ ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
+ ff funid xs env
+ | product (map Set.size ys) == count =
+ case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
+ (env,args) -> addProduction env cat (FApply funid args)
+ | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
+ where
+ count = length xs
+ ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
+
+convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
+convertRule cnc_defs grammarEnv (XRule fun args cat ctypes ctype term) = trace (show fun) $
+ brk (\grammarEnv -> foldBM addRule
+ grammarEnv
+ (convertTerm cnc_defs [] ctype term [([],[])])
+ (protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
where
addRule linRec (newCat', newArgs') env0 =
- let (env1, newCat) = genFCatHead env0 newCat'
- (env2, newArgs) = List.mapAccumL (genFCatArg cnc_defs) env1 newArgs'
+ let [newCat] = getFCats env0 newCat'
+ (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
- newLinRec = mkArray (map (mkArray . snd) linRec)
- mkArray lst = listArray (0,length lst-1) lst
+ (env2,lins) = List.mapAccumL addFSeq env1 linRec
+ newLinRec = mkArray lins
- rule = FRule fun [] newArgs newCat newLinRec
- in addFRule env2 rule
+ (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
+
+ in addProduction env3 newCat (FApply funid newArgs)
----------------------------------------------------------------------
-- term conversion
@@ -133,7 +149,7 @@ convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) =
type CnvMonad a = BacktrackM Env a
type FPath = [FIndex]
-data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] Term
+data ProtoFCat = PFCat CId [FPath] [(FPath,[FIndex])]
type Env = (ProtoFCat, [ProtoFCat])
type LinRec = [(FPath, [FSymbol])]
data XRule = XRule CId {- function -}
@@ -144,7 +160,16 @@ data XRule = XRule CId {- function -}
Term {- body -}
protoFCat :: TermMap -> CId -> Term -> ProtoFCat
-protoFCat cnc_defs cat ctype = PFCat cat (getRCS cnc_defs ctype) [] ctype
+protoFCat cnc_defs cat ctype =
+ let (rcs,tcs) = loop [] [] [] ctype
+ in PFCat cat rcs tcs
+ where
+ loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
+ loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
+ loop path rcs tcs (S _) = (path:rcs, tcs)
+ loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
+ Just term -> loop path rcs tcs term
+ Nothing -> error ("unknown identifier: "++show id)
type TermMap = Map.Map CId Term
@@ -156,11 +181,12 @@ convertTerm cnc_defs sel ctype (P term p) lins = do nr <- e
convertTerm cnc_defs (nr:sel) ctype term lins
convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
convertTerm cnc_defs sel ctype term lins
-convertTerm cnc_defs sel ctype (S ts) ((lbl_path,lin) : lins) = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) ((lbl_path,lin) : lins) (reverse ts)
-convertTerm cnc_defs sel ctype (K (KS str)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok str : lin) : lins)
+convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
+--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
+convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
do toks <- member (strs:[strs' | Alt strs' _ <- vars])
- return ((lbl_path, map FSymTok toks ++ lin) : lins)
+ return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs sel ctype term lins
convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
@@ -183,8 +209,8 @@ convertArg (C max) nr path lbl_path lin lins = do
return lins
convertArg (S _) nr path lbl_path lin lins = do
(_, args) <- readState
- let PFCat cat rcs tcs _ = args !! nr
- return ((lbl_path, FSymCat (index path rcs 0) nr : lin) : lins)
+ let PFCat cat rcs tcs = args !! nr
+ return ((lbl_path, FSymCat nr (index path rcs 0) : lin) : lins)
where
index lbl' (lbl:lbls) idx
| lbl' == lbl = idx
@@ -210,8 +236,11 @@ convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
evalTerm cnc_defs path (V nr) = do (_, args) <- readState
- let PFCat _ _ _ ctype = args !! nr
- unifyPType nr (reverse path) (selectTerm path ctype)
+ let PFCat _ _ tcs = args !! nr
+ rpath = reverse path
+ index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
+ restrictArg nr rpath index
+ return index
evalTerm cnc_defs path (C nr) = return nr
evalTerm cnc_defs path (R record) = case path of
(index:path) -> evalTerm cnc_defs path (record !! index)
@@ -222,112 +251,80 @@ evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs
evalTerm cnc_defs path term
evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
-unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
-unifyPType nr path (C max_index) =
- do (_, args) <- readState
- let PFCat _ _ tcs _ = args !! nr
- case lookup path tcs of
- Just index -> return index
- Nothing -> do index <- member [0..max_index]
- restrictArg nr path index
- return index
-unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
-
-selectTerm :: FPath -> Term -> Term
-selectTerm [] term = term
-selectTerm (index:path) (R record) = selectTerm path (record !! index)
-
----------------------------------------------------------------------
--- FRulesEnv
+-- GrammarEnv
-data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
-type FCatSet = Map.Map CId (Map.Map [(FPath,FIndex)] FCat)
+data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
+type CatSet = Map.Map CId (FCat,FCat,[Int])
+type SeqSet = Map.Map FSeq SeqId
+type FunSet = Map.Map FFun FunId
+type CoerceSet= Map.Map [FCat] FCat
-emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [] $
- ins fcatInt (mkCId "Int") [] $
- ins fcatFloat (mkCId "Float") [] $
- ins fcatVar (mkCId "_Var") [] $
- Map.empty) []
+emptyFRulesEnv cnc_defs lincats =
+ let (last_id,catSet) = Map.mapAccum computeCatRange 0 lincats
+ in GrammarEnv last_id catSet Map.empty Map.empty Map.empty IntMap.empty
where
- ins fcat cat tcs fcatSet =
- Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
+ computeCatRange index ctype = (index+size,(index,index+size-1,poly))
where
- tmap_s = Map.singleton tcs fcat
-
-addFRule :: FRulesEnv -> FRule -> FRulesEnv
-addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
-
-getFGrammar :: FRulesEnv -> FGrammar
-getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map Map.elems fcatSet)
-
-genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
-genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs _) =
- case Map.lookup cat fcatSet >>= Map.lookup tcs of
- Just fcat -> (env, fcat)
- Nothing -> let fcat = last_id+1
- in (FRulesEnv fcat (ins fcat) rules, fcat)
+ (size,poly) = getMultipliers 1 [] ctype
+
+ getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
+ getMultipliers m ms (S _) = (m,ms)
+ getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
+ getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
+ Just term -> getMultipliers m ms term
+ Nothing -> error ("unknown identifier: "++prCId id)
+
+addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
+addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
+ GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
+
+addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
+addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
+ case Map.lookup seq seqSet of
+ Just id -> (env,id)
+ Nothing -> let !last_seq = Map.size seqSet
+ in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
where
- ins fcat = Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet
- where
- tmap_s = Map.singleton tcs fcat
-
-genFCatArg :: TermMap -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
-genFCatArg cnc_defs env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs ctype) =
- case Map.lookup cat fcatSet of
- Just tmap -> case Map.lookup tcs tmap of
- Just fcat -> (env, fcat)
- Nothing -> ins tmap
- Nothing -> ins Map.empty
+ seq = mkArray lst
+
+addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
+addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
+ case Map.lookup fun funSet of
+ Just id -> (env,id)
+ Nothing -> let !last_funid = Map.size funSet
+ in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
+
+addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
+addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
+ case sub_fcats of
+ [fcat] -> (env,fcat)
+ _ -> case Map.lookup sub_fcats crcSet of
+ Just fcat -> (env,fcat)
+ Nothing -> let !fcat = last_id+1
+ in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
+
+getParserInfo :: GrammarEnv -> ParserInfo
+getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
+ ParserInfo { functions = mkArray funSet
+ , sequences = mkArray seqSet
+ , productions = IntMap.union prodSet coercions
+ , startCats = Map.map (\(start,end,_) -> range (start,end)) catSet
+ }
where
- ins tmap =
- let fcat = last_id+1
- (last_id1,tmap1,rules1)
- = foldBM (\tcs st (last_id,tmap,rules) ->
- let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
- rule = FRule wildCId [[0]] [fcat_arg] fcat
- (listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]])
- in if st
- then (last_id1,tmap1,rule:rules)
- else (last_id, tmap, rules))
- (fcat,Map.insert tcs fcat tmap,rules)
- (gen_tcs ctype [] [])
- False
- in (FRulesEnv last_id1 (Map.insert cat tmap1 fcatSet) rules1, fcat)
- where
- addArg tcs last_id tmap =
- case Map.lookup tcs tmap of
- Just fcat -> (last_id, tmap, fcat)
- Nothing -> let fcat = last_id+1
- in (fcat, Map.insert tcs fcat tmap, fcat)
-
- gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
- gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
- gen_tcs (S _) path acc = return acc
- gen_tcs (C max_index) path acc =
- case List.lookup path tcs of
- Just index -> return $! addConstraint path index acc
- Nothing -> do writeState True
- index <- member [0..max_index]
- return $! addConstraint path index acc
- where
- addConstraint path0 index0 (c@(path,index) : cs)
- | path0 > path = c:addConstraint path0 index0 cs
- addConstraint path0 index0 cs = (path0,index0) : cs
- gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
- Just term -> gen_tcs term path acc
- Nothing -> error ("unknown identifier: "++prCId id)
-
-
-getRCS :: TermMap -> Term -> [FPath]
-getRCS cnc_defs = loop [] []
+ mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
+
+ coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
+
+getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
+getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat cat rcs tcs) =
+ case Map.lookup cat catSet of
+ Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
where
- loop path rcs (R record) = List.foldl' (\rcs (index,term) -> loop (index:path) rcs term) rcs (zip [0..] record)
- loop path rcs (C i) = rcs
- loop path rcs (S _) = path:rcs
- loop path rcs (F id) = case Map.lookup id cnc_defs of
- Just term -> loop path rcs term
- Nothing -> error ("unknown identifier: "++show id)
+ variants _ [] fcat = return fcat
+ variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
+ variants ms tcs ((m*index) + fcat)
------------------------------------------------------------
-- updating the MCF rule
@@ -345,12 +342,14 @@ restrictHead path term
writeState (head', args)
restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
-restrictProtoFCat path0 index0 (PFCat cat rcs tcs ctype) = do
+restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
tcs <- addConstraint tcs
- return (PFCat cat rcs tcs ctype)
+ return (PFCat cat rcs tcs)
where
- addConstraint (c@(path,index) : cs)
- | path0 > path = liftM (c:) (addConstraint cs)
- | path0 == path = guard (index0 == index) >>
- return (c : cs)
- addConstraint cs = return ((path0,index0) : cs)
+ addConstraint [] = error "restrictProtoFCat: unknown path"
+ addConstraint (c@(path,indices) : tcs)
+ | path0 == path = guard (index0 `elem` indices) >>
+ return ((path,[index0]) : tcs)
+ | otherwise = liftM (c:) (addConstraint tcs)
+
+mkArray lst = listArray (0,length lst-1) lst
diff --git a/src/GF/Compile/GrammarToGFCC.hs b/src/GF/Compile/GrammarToGFCC.hs
index f16497a0a..e57937f52 100644
--- a/src/GF/Compile/GrammarToGFCC.hs
+++ b/src/GF/Compile/GrammarToGFCC.hs
@@ -7,7 +7,6 @@ import qualified GF.Compile.GenerateFCFG as FCFG
import qualified GF.Compile.GeneratePMCFG as PMCFG
import PGF.CId
-import PGF.BuildParser (buildParserInfo)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
import qualified PGF.Data as D
@@ -54,9 +53,9 @@ mkCanon2gfcc opts cnc gr =
addParsers :: D.PGF -> D.PGF
addParsers pgf = pgf { D.concretes = Map.map conv (D.concretes pgf) }
where
- conv cnc = cnc { D.parser = Just (buildParserInfo fcfg) }
+ conv cnc = cnc { D.parser = Just pinfo }
where
- fcfg
+ pinfo
| Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on" = PMCFG.convertConcrete (D.abstract pgf) cnc
| otherwise = FCFG.convertConcrete (D.abstract pgf) cnc