summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2009-12-14 10:54:22 +0000
committerkrasimir <krasimir@chalmers.se>2009-12-14 10:54:22 +0000
commitc036459214852ca01868f5da81408f49b22a49e9 (patch)
tree72a767680911cba272a033b07fc750c0d4f1d0d3 /src/compiler
parentfaa638d6fc5dbc47d5e3ef3d4da42449005c3a0d (diff)
remove the old parsing code and the -erasing=on flag
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/GF/Compile/GenerateFCFG.hs568
-rw-r--r--src/compiler/GF/Compile/GeneratePMCFG.hs6
-rw-r--r--src/compiler/GF/Compile/GeneratePMCFGOld.hs374
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs9
-rw-r--r--src/compiler/GF/Compile/PGFtoJS.hs5
-rw-r--r--src/compiler/GF/Infra/Option.hs5
-rw-r--r--src/compiler/GF/Speech/PGFToCFG.hs16
7 files changed, 16 insertions, 967 deletions
diff --git a/src/compiler/GF/Compile/GenerateFCFG.hs b/src/compiler/GF/Compile/GenerateFCFG.hs
deleted file mode 100644
index 52e95f686..000000000
--- a/src/compiler/GF/Compile/GenerateFCFG.hs
+++ /dev/null
@@ -1,568 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Maintainer : Krasimir Angelov
--- Stability : (stable)
--- Portability : (portable)
---
--- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
---
--- the resulting grammars might be /very large/
---
--- the conversion is only equivalent if the GFC grammar has a context-free backbone.
------------------------------------------------------------------------------
-
-
-module GF.Compile.GenerateFCFG
- (convertConcrete) where
-
-import PGF.CId
-import PGF.Data
-import PGF.Macros --hiding (prt)
-import PGF.Parsing.FCFG.Utilities
-
-import GF.Data.BacktrackM
-import GF.Data.SortedList
-import GF.Data.Utilities (updateNthM, sortNub)
-
-import qualified Data.Map as Map
-import qualified Data.IntMap as IntMap
-import qualified Data.Set as Set
-import qualified Data.List as List
-import qualified Data.ByteString.Char8 as BS
-import Data.Array.IArray
-import Data.Maybe
-import Control.Monad
-
-----------------------------------------------------------------------
--- main conversion function
-
-convertConcrete :: Abstr -> Concr -> ParserInfo
-convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
- where abs_defs = Map.assocs (funs abs)
- conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
- cats = lincats cnc
- (abs_defs',conc',cats') = expandHOAS abs_defs conc cats
-
-expandHOAS :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ([(CId,(Type,Int,[Equation]))],TermMap,TermMap)
-expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
- Map.unions [lins, hoLins, varLins],
- Map.unions [lincats, hoLincats, varLincat])
- where
- -- replace higher-order fun argument types with new categories
- funs' = [(f,(fixType ty,a,e)) | (f,(ty,a,e)) <- funs]
- where
- fixType :: Type -> Type
- fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt
-
- hoTypes :: [(Int,CId)]
- hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0]
- hoCats = sortNub (map snd hoTypes)
- -- for each Cat with N bindings, we add a new category _NCat
- -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat
- hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),0,[])) | ty@(n,c) <- hoTypes]
- -- lincats for the new categories
- hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes]
- -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ...
- hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes]
- where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c)
- -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat
- varFuns = [(varFunName cat, (cftype [varCat] cat,0,[])) | cat <- hoCats]
- -- linearizations of the _Var_Cat functions
- varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats]
- -- lincat for the _Var category
- varLincat = Map.singleton varCat (R [S []])
-
- lincatOf c = fromMaybe (error $ "No lincat for " ++ showCId c) $ Map.lookup c lincats
-
- modifyRec :: ([Term] -> [Term]) -> Term -> Term
- modifyRec f (R xs) = R (f xs)
- modifyRec _ t = error $ "Not a record: " ++ show t
-
- varCat = mkCId "_Var"
-
- catName :: (Int,CId) -> CId
- catName (0,c) = c
- catName (n,c) = mkCId ("_" ++ show n ++ showCId c)
-
- funName :: (Int,CId) -> CId
- funName (n,c) = mkCId ("__" ++ show n ++ showCId c)
-
- varFunName :: CId -> CId
- varFunName c = mkCId ("_Var_" ++ showCId c)
-
--- replaces __NCat with _B and _Var_Cat with _.
--- the temporary names are just there to avoid name collisions.
-fixHoasFuns :: ParserInfo -> ParserInfo
-fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]}
- where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B")
- | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId
- fixName n = n
-
-convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo
-convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv)
- where
- srules = [
- (XRule id args res (map findLinType args) (findLinType res) term) |
- (id, (ty,_,_)) <- abs_defs, let (args,res) = catSkeleton ty,
- term <- maybeToList (Map.lookup id cnc_defs)]
-
- findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
-
- (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules
- where
- helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
- let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
- grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
- grammarEnv
- (mkSingletonSelectors cnc_defs cnc_res)
- in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv')
-
- loop grammarEnv =
- let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv
- in case todo of
- [] -> grammarEnv'
- _ -> loop $! List.foldl' (\env (srules,selector) ->
- List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo
-
-convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv
-convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv =
- foldBM addRule
- grammarEnv
- (convertTerm cnc_defs selector term [([],[])])
- (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
- where
- addRule linRec (newCat', newArgs', _, _) env0 =
- let (env1, newCat) = genFCatHead env0 newCat'
- (env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) ->
- let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths]
- (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
- in case xcat of
- PFCat _ [] _ -> (env , args, all_args)
- _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args))
- (env1,[],[]) (zip3 newArgs' ctypes [0..])
-
- (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs})
-
- (_,newProfile) = List.mapAccumL accumProf 0 newArgs'
- where
- accumProf nr (PFCat _ [] _,_ ) = (nr, [] )
- accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt])
- where cnt = length xpaths
-
- (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec))
-
- in addProduction env4 newCat (FApply funid newArgs)
-
-translateLin idxArgs [] grammarEnv lbl' = error "translateLin"
-translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
- | lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
- | otherwise = translateLin idxArgs lins grammarEnv lbl'
- where
- instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs)
- (\t -> case t of
- KS s -> FSymKS [s]
- KP strs vars -> FSymKP strs vars)
- instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
- | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
- in FSymCat (nr'+xnr) (index lbl rcs 0)
- | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
-
- index lbl' (lbl:lbls) idx
- | lbl' == lbl = idx
- | otherwise = index lbl' lbls $! (idx+1)
-
-
-----------------------------------------------------------------------
--- term conversion
-
-type CnvMonad a = BacktrackM Env a
-
-type FPath = [FIndex]
-type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
-type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])]
-
-type TermMap = Map.Map CId Term
-
-convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec
-convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins
-convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins
-convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins
-
-convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel
- convertTerm cnc_defs (TuplePrj nr selector) term lins
-convertTerm cnc_defs selector (FV vars) lins = do term <- member vars
- convertTerm cnc_defs selector term lins
-convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path
- foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
-convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
- do projectHead lbl_path
- return ((lbl_path,Right (KS str) : lin) : lins)
-convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
- do projectHead lbl_path
- toks <- member (strs:[strs' | Alt strs' _ <- vars])
- return ((lbl_path, map (Right . KS) toks ++ lin) : lins)
-convertTerm cnc_defs selector (F id) lins = case Map.lookup id cnc_defs of
- Just term -> convertTerm cnc_defs selector term lins
- Nothing -> mzero
-convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
- ss <- case t of
- R ss -> return ss
- F f -> case Map.lookup f cnc_defs of
- Just (R ss) -> return ss
- _ -> mzero
- convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
-convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")
-
-
-convertArg (TupleSel record) nr path lbl_path lin lins =
- foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record
-convertArg (TuplePrj lbl selector) nr path lbl_path lin lins =
- convertArg selector nr (lbl:path) lbl_path lin lins
-convertArg (ConSel indices) nr path lbl_path lin lins = do
- index <- member indices
- restrictHead lbl_path index
- restrictArg nr path index
- return lins
-convertArg StrSel nr path lbl_path lin lins = do
- projectHead lbl_path
- xnr <- projectArg nr path
- return ((lbl_path, Left (path, nr, xnr) : lin) : lins)
-
-convertCon (ConSel indices) index lbl_path lin lins = do
- guard (index `elem` indices)
- restrictHead lbl_path index
- return lins
-convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
-
-convertRec cnc_defs selector index [] lbl_path lin lins = return lins
-convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields
- where
- select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins
- select ((index',sub_sel) : fields)
- | index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins)
- convertRec cnc_defs selector (index+1) record lbl_path lin lins
- | otherwise = select fields
-convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do
- convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins)
-
-
-------------------------------------------------------------
--- eval a term to ground terms
-
-evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
-evalTerm cnc_defs path (V nr) = do term <- readArgCType nr
- unifyPType nr (reverse path) (selectTerm path term)
-evalTerm cnc_defs path (C nr) = return nr
-evalTerm cnc_defs path (R record) = case path of
- (index:path) -> evalTerm cnc_defs path (record !! index)
-evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
- evalTerm cnc_defs (index:path) term
-evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
-evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of
- Just term -> evalTerm cnc_defs path term
- Nothing -> mzero
-evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
-
-unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
-unifyPType nr path (C max_index) =
- do (_, args, _, _) <- get
- let (PFCat _ _ tcs,_) = args !! nr
- case lookup path tcs of
- Just index -> return index
- Nothing -> do index <- member [0..max_index]
- restrictArg nr path index
- return index
-unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
-
-selectTerm :: FPath -> Term -> Term
-selectTerm [] term = term
-selectTerm (index:path) (R record) = selectTerm path (record !! index)
-
-
-----------------------------------------------------------------------
--- GrammarEnv
-
-
-data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production))
-type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
-type FSeqSet = Map.Map FSeq SeqId
-type FFunSet = Map.Map FFun FunId
-
-data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
-
-protoFCat :: CId -> ProtoFCat
-protoFCat cat = PFCat cat [] []
-
-emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty
- where
- initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $
- ins fcatInt (mkCId "Int") [[0]] [] $
- ins fcatFloat (mkCId "Float") [[0]] [] $
- ins fcatVar (mkCId "_Var") [[0]] [] $
- Map.empty)
-
- ins fcat cat rcs tcs catSet =
- Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
- where
- right_fcat = Right fcat
- tmap_s = Map.singleton tcs right_fcat
- rmap_s = Map.singleton rcs tmap_s
-
-addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
-addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p =
- GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
-
-addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
-addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) =
- case Map.lookup seq seqSet of
- Just id -> (env,id)
- Nothing -> let !last_seq = Map.size seqSet
- in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq)
- where
- seq = mkArray lst
-
-addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
-addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun =
- case Map.lookup fun funSet of
- Just id -> (env,id)
- Nothing -> let !last_funid = Map.size funSet
- in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid)
-
-getParserInfo :: GrammarEnv -> ParserInfo
-getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
- ParserInfo { functions = mkArray funSet
- , sequences = mkArray seqSet
- , productions0= prodSet
- , productions = prodSet
- , startCats = Map.map getFCatList catSet
- , totalCats = last_id+1
- }
- where
- mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
-
- getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
-
-
-genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
-genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
- case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of
- Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat)
- Just (Right fcat) -> (env, fcat)
- Nothing -> let fcat = last_id+1
- in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat)
- where
- ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet
- where
- right_fcat = Right fcat
- tmap_s = Map.singleton tcs right_fcat
- rmap_s = Map.singleton rcs tmap_s
-
-genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat)
-genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) =
- case Map.lookup cat catSet >>= Map.lookup rcs of
- Just tmap -> case Map.lookup tcs tmap of
- Just (Left fcat) -> (env, fcat)
- Just (Right fcat) -> (env, fcat)
- Nothing -> ins tmap
- Nothing -> ins Map.empty
- where
- ins tmap =
- let fcat = last_id+1
- (either_fcat,last_id1,tmap1,prodSet1)
- = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) ->
- let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
- p = FCoerce fcat_arg
- prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet
- in if st
- then (Right fcat, last_id1,tmap1,prodSet1)
- else (either_fcat,last_id, tmap ,prodSet ))
- (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet)
- (gen_tcs ctype [] [])
- False
- rmap1 = Map.singleton rcs tmap1
- in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat)
- where
- addArg tcs last_id tmap =
- case Map.lookup tcs tmap of
- Just (Left fcat) -> (last_id, tmap, fcat)
- Just (Right fcat) -> (last_id, tmap, fcat)
- Nothing -> let fcat = last_id+1
- in (fcat, Map.insert tcs (Left fcat) tmap, fcat)
-
- gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
- gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
- gen_tcs (S _) path acc = return acc
- gen_tcs (C max_index) path acc =
- case List.lookup path tcs of
- Just index -> return $! addConstraint path index acc
- Nothing -> do put True
- index <- member [0..max_index]
- return $! addConstraint path index acc
- where
- addConstraint path0 index0 (c@(path,index) : cs)
- | path0 > path = c:addConstraint path0 index0 cs
- addConstraint path0 index0 cs = (path0,index0) : cs
- gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
- Just term -> gen_tcs term path acc
- Nothing -> error ("unknown identifier: "++showCId id)
-
-
-
-------------------------------------------------------------
--- TODO queue organization
-
-type XRulesMap = Map.Map CId [XRule]
-data XRule = XRule CId {- function -}
- [CId] {- argument types -}
- CId {- result type -}
- [Term] {- argument lin-types representation -}
- Term {- result lin-type representation -}
- Term {- body -}
-
-takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv)
-takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) =
- (todo,GrammarEnv last_id catSet' seqSet funSet prodSet)
- where
- (todo,catSet') =
- Map.mapAccumWithKey (\todo cat rmap ->
- let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
- let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
- case either_xcat of
- Left xcat -> (tcs:tcss,Right xcat)
- Right xcat -> ( tcss,either_xcat)) [] tmap
- in case tcss of
- [] -> ( todo,tmap )
- _ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
- mb_srules = Map.lookup cat xrulesMap
- Just srules = mb_srules
-
- in case mb_srules of
- Just srules -> (todo1,rmap1)
- Nothing -> (todo ,rmap1)) [] catSet
-
-
-------------------------------------------------------------
--- The TermSelector
-
-data TermSelector
- = TupleSel [(FIndex, TermSelector)]
- | TuplePrj FIndex TermSelector
- | ConSel [FIndex]
- | StrSel
- deriving Show
-
-mkSingletonSelectors :: TermMap
- -> Term -- ^ Type representation term
- -> [TermSelector] -- ^ list of selectors containing just one string field
-mkSingletonSelectors cnc_defs term = sels0
- where
- (sels0,tcss0) = loop [] ([],[]) term
-
- loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record)
- loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss)
- loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss)
- loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of
- Just term -> loop path (sels,tcss) term
- Nothing -> error ("unknown identifier: "++showCId id)
-
-mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector
-mkSelector rcs tcss =
- List.foldl' addRestriction (case xs of
- (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys
- where
- xs = [ reverse path | path <- rcs]
- ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs]
-
- addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector
- addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices)
- where
- add [] = [n_index]
- add (index':indices)
- | n_index == index' = index': indices
- | otherwise = index':add indices
- addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields)
- where
- add [] = [(index,path2selector (ConSel [n_index]) path)]
- add (field@(index',sub_sel):fields)
- | index == index' = (index',addRestriction sub_sel (path,n_index)):fields
- | otherwise = field : add fields
-
- addProjection :: TermSelector -> FPath -> TermSelector
- addProjection StrSel [] = StrSel
- addProjection (TupleSel fields) (index : path) = TupleSel (add fields)
- where
- add [] = [(index,path2selector StrSel path)]
- add (field@(index',sub_sel):fields)
- | index == index' = (index',addProjection sub_sel path):fields
- | otherwise = field : add fields
-
- path2selector base [] = base
- path2selector base (index : path) = TupleSel [(index,path2selector base path)]
-
-------------------------------------------------------------
--- updating the MCF rule
-
-readArgCType :: FIndex -> CnvMonad Term
-readArgCType nr = do (_, _, _, ctypes) <- get
- return (ctypes !! nr)
-
-restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
-restrictArg nr path index = do
- (head, args, ctype, ctypes) <- get
- args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat
- return (xcat,xs) ) nr args
- put (head, args', ctype, ctypes)
-
-projectArg :: FIndex -> FPath -> CnvMonad Int
-projectArg nr path = do
- (head, args, ctype, ctypes) <- get
- (xnr,args') <- updateArgs nr args
- put (head, args', ctype, ctypes)
- return xnr
- where
- updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])])
- updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as)
- | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as)
- | otherwise = do a <- projectProtoFCat path a
- return (0,(a,xpaths):as)
- updateArgs n (a : as) = do
- (xnr,as) <- updateArgs (n-1) as
- return (xnr,a:as)
-
-readHeadCType :: CnvMonad Term
-readHeadCType = do (_, _, ctype, _) <- get
- return ctype
-
-restrictHead :: FPath -> FIndex -> CnvMonad ()
-restrictHead path term
- = do (head, args, ctype, ctypes) <- get
- head' <- restrictProtoFCat path term head
- put (head', args, ctype, ctypes)
-
-projectHead :: FPath -> CnvMonad ()
-projectHead path
- = do (head, args, ctype, ctypes) <- get
- head' <- projectProtoFCat path head
- put (head', args, ctype, ctypes)
-
-restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
-restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
- tcs <- addConstraint tcs
- return (PFCat cat rcs tcs)
- where
- addConstraint (c@(path,index) : cs)
- | path0 > path = liftM (c:) (addConstraint cs)
- | path0 == path = guard (index0 == index) >>
- return (c : cs)
- addConstraint cs = return ((path0,index0) : cs)
-
-projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat
-projectProtoFCat path0 (PFCat cat rcs tcs) = do
- return (PFCat cat (addConstraint rcs) tcs)
- where
- addConstraint (path : rcs)
- | path0 > path = path : addConstraint rcs
- | path0 == path = path : rcs
- addConstraint rcs = path0 : rcs
-
-mkArray lst = listArray (0,length lst-1) lst
diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs
index 458cf3f5c..fed2521e1 100644
--- a/src/compiler/GF/Compile/GeneratePMCFG.hs
+++ b/src/compiler/GF/Compile/GeneratePMCFG.hs
@@ -123,7 +123,7 @@ convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) =
let [newCat] = getFCats env0 newCat'
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
- (env2,funid) = addFFun env1 (FFun fun [[n] | n <- [0..length newArgs-1]] (mkArray lins))
+ (env2,funid) = addFFun env1 (FFun fun (mkArray lins))
in addProduction env2 newCat (FApply funid newArgs)
@@ -394,7 +394,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
(env1,lins) = List.mapAccumL addFSeq env linRec
newLinRec = mkArray lins
- (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec)
+ (env2,funid) = addFFun env1 (FFun _B newLinRec)
env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
env2
@@ -462,7 +462,7 @@ getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
, sequences = mkArray seqSet
, productions0= productions0
, productions = filterProductions productions0
- , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
+ , startCats = maybe Map.empty (Map.map (\(start,end,_) -> (start,end))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where
diff --git a/src/compiler/GF/Compile/GeneratePMCFGOld.hs b/src/compiler/GF/Compile/GeneratePMCFGOld.hs
deleted file mode 100644
index 244ed68fe..000000000
--- a/src/compiler/GF/Compile/GeneratePMCFGOld.hs
+++ /dev/null
@@ -1,374 +0,0 @@
-{-# LANGUAGE BangPatterns, CPP #-}
-----------------------------------------------------------------------
--- |
--- Maintainer : Krasimir Angelov
--- Stability : (stable)
--- Portability : (portable)
---
--- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
---
--- the resulting grammars might be /very large/
---
--- the conversion is only equivalent if the GFC grammar has a context-free backbone.
------------------------------------------------------------------------------
-
-module GF.Compile.GeneratePMCFG
- (convertConcrete) where
-
-import PGF.CId
-import PGF.Data
-import PGF.Macros --hiding (prt)
-
-import GF.Data.BacktrackM
-import GF.Data.SortedList
-import GF.Data.Utilities (updateNthM, sortNub)
-
-import qualified Data.Map as Map
-import qualified Data.Set as Set
-import qualified Data.List as List
-import qualified Data.IntMap as IntMap
-import qualified Data.ByteString.Char8 as BS
-import Data.Array.IArray
-import Data.Maybe
-import Control.Monad
-import Debug.Trace
-
-----------------------------------------------------------------------
--- main conversion function
-
-convertConcrete :: Abstr -> Concr -> ParserInfo
-convertConcrete abs cnc = convert abs_defs conc cats
- where abs_defs = Map.assocs (funs abs)
- conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
- cats = lincats cnc
-
-convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo
-convert abs_defs cnc_defs cat_defs =
- let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs)
- in getParserInfo (List.foldl' (convertRule cnc_defs) env xrules)
- where
- xrules = [
- (XRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
- (id, (ty,_)) <- abs_defs, let (args,res) = typeSkeleton ty,
- term <- maybeToList (Map.lookup id cnc_defs)]
-
- findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
-
-brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
-brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
- case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of
- (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1
- where
- optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps])
- where
- ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv
- ff funid xs env
- | product (map Set.size ys) == count =
- case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of
- (env,args) -> addProduction env cat (FApply funid args)
- | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs
- where
- count = length xs
- ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
-
-convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv
-convertRule cnc_defs grammarEnv (XRule fun args res ctypes ctype term) =
- brk (\grammarEnv -> foldBM addRule
- grammarEnv
- (convertTerm cnc_defs [] ctype term [([],[])])
- (protoFCat cnc_defs res ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv
- where
- addRule linRec (newCat', newArgs') env0 =
- let [newCat] = getFCats env0 newCat'
- (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
-
- (env2,lins) = List.mapAccumL addFSeq env1 linRec
- newLinRec = mkArray lins
-
- (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec)
-
- in addProduction env3 newCat (FApply funid newArgs)
-
-----------------------------------------------------------------------
--- term conversion
-
-type CnvMonad a = BacktrackM Env a
-
-type FPath = [FIndex]
-data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])]
-type Env = (ProtoFCat, [ProtoFCat])
-type LinRec = [(FPath, [FSymbol])]
-data XRule = XRule CId {- function -}
- [(Int,CId)] {- argument types: context size and category -}
- (Int,CId) {- result type : context size (always 0) and category -}
- [Term] {- argument lin-types representation -}
- Term {- result lin-type representation -}
- Term {- body -}
-
-protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat
-protoFCat cnc_defs (n,cat) ctype =
- let (rcs,tcs) = loop [] [] [] ctype'
- in PFCat n cat rcs tcs
- where
- ctype' -- extend the high-order linearization type
- | n > 0 = case ctype of
- R xs -> R (xs ++ replicate n (S []))
- _ -> error $ "Not a record: " ++ show ctype
- | otherwise = ctype
-
- loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
- loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
- loop path rcs tcs (S _) = (path:rcs, tcs)
- loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
- Just term -> loop path rcs tcs term
- Nothing -> error ("unknown identifier: "++show id)
-
-type TermMap = Map.Map CId Term
-
-convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec
-convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins
-convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins
-convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins
-convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p
- convertTerm cnc_defs (nr:sel) ctype term lins
-convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars
- convertTerm cnc_defs sel ctype term lins
-convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts)
---convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins)
-convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins)
-convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) =
- do toks <- member (strs:[strs' | Alt strs' _ <- vars])
- return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins)
-convertTerm cnc_defs sel ctype (F id) lins = case Map.lookup id cnc_defs of
- Just term -> convertTerm cnc_defs sel ctype term lins
- Nothing -> mzero
-convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do
- ss <- case t of
- R ss -> return ss
- F f -> case Map.lookup f cnc_defs of
- Just (R ss) -> return ss
- _ -> mzero
- convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
-convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")")
-
-
-convertArg (R record) nr path lbl_path lin lins =
- foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record)
-convertArg (C max) nr path lbl_path lin lins = do
- index <- member [0..max]
- restrictHead lbl_path index
- restrictArg nr path index
- return lins
-convertArg (S _) nr path lbl_path lin lins = do
- (_, args) <- get
- let PFCat _ cat rcs tcs = args !! nr
- l = index path rcs 0
- sym | isLiteralCat cat = FSymLit nr l
- | otherwise = FSymCat nr l
- return ((lbl_path, sym : lin) : lins)
- where
- index lbl' (lbl:lbls) idx
- | lbl' == lbl = idx
- | otherwise = index lbl' lbls $! (idx+1)
-
-
-convertCon (C max) index [] lbl_path lin lins = do
- guard (index <= max)
- restrictHead lbl_path index
- return lins
-convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
-
-convertRec cnc_defs [] (R ctypes) record lbl_path lin lins =
- foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins))
- lins
- (zip3 [0..] ctypes record)
-convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do
- convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins)
-
-
-------------------------------------------------------------
--- eval a term to ground terms
-
-evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
-evalTerm cnc_defs path (V nr) = do (_, args) <- get
- let PFCat _ _ _ tcs = args !! nr
- rpath = reverse path
- index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs))
- restrictArg nr rpath index
- return index
-evalTerm cnc_defs path (C nr) = return nr
-evalTerm cnc_defs path (R record) = case path of
- (index:path) -> evalTerm cnc_defs path (record !! index)
-evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
- evalTerm cnc_defs (index:path) term
-evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
-evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of
- Just term -> evalTerm cnc_defs path term
- Nothing -> mzero
-evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
-
-
-----------------------------------------------------------------------
--- GrammarEnv
-
-data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
-type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int]))
-type SeqSet = Map.Map FSeq SeqId
-type FunSet = Map.Map FFun FunId
-type CoerceSet= Map.Map [FCat] FCat
-
-emptyGrammarEnv cnc_defs lincats =
- let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
- in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
- where
- computeCatRange index cat ctype
- | cat == cidString = (index, (fcatString,fcatString,[]))
- | cat == cidInt = (index, (fcatInt, fcatInt, []))
- | cat == cidFloat = (index, (fcatFloat, fcatFloat, []))
- | otherwise = (index+size,(index,index+size-1,poly))
- where
- (size,poly) = getMultipliers 1 [] ctype
-
- getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
- getMultipliers m ms (S _) = (m,ms)
- getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
- getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
- Just term -> getMultipliers m ms term
- Nothing -> error ("unknown identifier: "++prCId id)
-
-
-expandHOAS abs_defs cnc_defs lincats env =
- foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
- where
- hoTypes :: [(Int,CId)]
- hoTypes = sortNub [(n,c) | (_,(ty,_)) <- abs_defs
- , (n,c) <- fst (typeSkeleton ty), n > 0]
-
- hoCats :: [CId]
- hoCats = sortNub [c | (_,(ty,_)) <- abs_defs
- , Hyp _ ty <- case ty of {DTyp hyps val _ -> hyps}
- , c <- fst (catSkeleton ty)]
-
- -- add a range of PMCFG categories for each GF high-order category
- add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
- case IntMap.lookup 0 catSet >>= Map.lookup cat of
- Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet
- !last_id' = last_id+(end-start)+1
- in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet)
- Nothing -> env
-
- -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
- add_hoFun env (n,cat) =
- let linRec = reverse $
- [(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++
- [([],[FSymLit i 0]) | i <- [1..n]]
- (env1,lins) = List.mapAccumL addFSeq env linRec
- newLinRec = mkArray lins
-
- (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec)
-
- env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar)))
- env2
- (zip (getFCats env2 arg) (getFCats env2 res))
- in env3
- where
- (arg,res) = case Map.lookup cat lincats of
- Nothing -> error $ "No lincat for " ++ prCId cat
- Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype)
-
- -- add one PMCFG function for each high-order category: _V : Var -> Cat
- add_varFun env cat =
- let (env1,seqid) = addFSeq env ([],[FSymLit 0 0])
- lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid
- (env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins))
- env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar]))
- env2
- (getFCats env2 res)
- in env3
- where
- res = case Map.lookup cat lincats of
- Nothing -> error $ "No lincat for " ++ prCId cat
- Just ctype -> protoFCat cnc_defs (0,cat) ctype
-
- _B = mkCId "_B"
- _V = mkCId "_V"
-
-
-addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv
-addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
- GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
-
-addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId)
-addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) =
- case Map.lookup seq seqSet of
- Just id -> (env,id)
- Nothing -> let !last_seq = Map.size seqSet
- in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq)
- where
- seq = mkArray lst
-
-addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId)
-addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun =
- case Map.lookup fun funSet of
- Just id -> (env,id)
- Nothing -> let !last_funid = Map.size funSet
- in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid)
-
-addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat)
-addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats =
- case sub_fcats of
- [fcat] -> (env,fcat)
- _ -> case Map.lookup sub_fcats crcSet of
- Just fcat -> (env,fcat)
- Nothing -> let !fcat = last_id+1
- in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
-
-getParserInfo :: GrammarEnv -> ParserInfo
-getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
- ParserInfo { functions = mkArray funSet
- , sequences = mkArray seqSet
- , productions = IntMap.union prodSet coercions
- , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
- , totalCats = last_id+1
- }
- where
- mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
-
- coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
-
-getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
-getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
- case IntMap.lookup n catSet >>= Map.lookup cat of
- Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
- where
- variants _ [] fcat = return fcat
- variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
- variants ms tcs ((m*index) + fcat)
-
-------------------------------------------------------------
--- updating the MCF rule
-
-restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
-restrictArg nr path index = do
- (head, args) <- get
- args' <- updateNthM (restrictProtoFCat path index) nr args
- put (head, args')
-
-restrictHead :: FPath -> FIndex -> CnvMonad ()
-restrictHead path term
- = do (head, args) <- get
- head' <- restrictProtoFCat path term head
- put (head', args)
-
-restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
-restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do
- tcs <- addConstraint tcs
- return (PFCat n cat rcs tcs)
- where
- addConstraint [] = error "restrictProtoFCat: unknown path"
- addConstraint (c@(path,indices) : tcs)
- | path0 == path = guard (index0 `elem` indices) >>
- return ((path,[index0]) : tcs)
- | otherwise = liftM (c:) (addConstraint tcs)
-
-mkArray lst = listArray (0,length lst-1) lst
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index ce7e5be73..7cd00550a 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -2,8 +2,7 @@
module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where
import GF.Compile.Export
-import qualified GF.Compile.GenerateFCFG as FCFG
-import qualified GF.Compile.GeneratePMCFG as PMCFG
+import GF.Compile.GeneratePMCFG
import PGF.CId
import qualified PGF.Macros as CM
@@ -48,12 +47,8 @@ addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
return pgf { D.concretes = Map.fromList cncs }
where
- conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts)
- then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc
- else return $ FCFG.convertConcrete (D.abstract pgf) cnc
+ conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo })
- where
- erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"})
-- Generate PGF from GFCM.
-- this assumes a grammar translated by canon2canon
diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs
index ff75d8fe6..01be44e8c 100644
--- a/src/compiler/GF/Compile/PGFtoJS.hs
+++ b/src/compiler/GF/Compile/PGFtoJS.hs
@@ -96,13 +96,14 @@ parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray
JS.EObj $ map cats (Map.assocs (startCats p)),
JS.EInt (totalCats p)]]
where
- cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EArray (map JS.EInt is))
+ cats (c,(start,end)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
+ ,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
frule2js :: Production -> JS.Expr
frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)]
frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg]
-ffun2js (FFun f _ lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))]
+ffun2js (FFun f lins) = new "FFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt (Array.elems lins))]
seq2js :: Array.Array FIndex FSymbol -> JS.Expr
seq2js seq = JS.EArray [sym2js s | s <- Array.elems seq]
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index dc15d1929..dba40cbf3 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -172,7 +172,6 @@ data Flags = Flags {
optSpeechLanguage :: Maybe String,
optLexer :: Maybe String,
optUnlexer :: Maybe String,
- optErasing :: Bool,
optBuildParser :: BuildParser,
optWarnings :: [Warning],
optDump :: [Dump]
@@ -219,7 +218,6 @@ optionsPGF :: Options -> [(String,String)]
optionsPGF opts =
maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
- ++ (if flag optErasing opts then [("erasing","on")] else [])
++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else [])
-- Option manipulation
@@ -276,7 +274,6 @@ defaultFlags = Flags {
optSpeechLanguage = Nothing,
optLexer = Nothing,
optUnlexer = Nothing,
- optErasing = True,
optBuildParser = BuildParser,
optWarnings = [],
optDump = []
@@ -354,7 +351,6 @@ optDescr =
Option [] ["coding"] (ReqArg coding "ENCODING")
("Character encoding of the source grammar, ENCODING = "
++ concat (intersperse " | " (map fst encodings)) ++ "."),
- Option [] ["erasing"] (onOff erasing False) "Generate erasing grammar (default off).",
Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand",
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
@@ -414,7 +410,6 @@ optDescr =
coding x = case lookup x encodings of
Just c -> set $ \o -> o { optEncoding = c }
Nothing -> fail $ "Unknown character encoding: " ++ x
- erasing x = set $ \o -> o { optErasing = x }
buildParser x = do v <- case x of
"on" -> return BuildParser
"off" -> return DontBuildParser
diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs
index d22a4ea8d..a9bb20ef6 100644
--- a/src/compiler/GF/Speech/PGFToCFG.hs
+++ b/src/compiler/GF/Speech/PGFToCFG.hs
@@ -27,6 +27,7 @@ bnfPrinter = toBNF id
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
+type Profile = [Int]
pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name
@@ -42,7 +43,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
| (c,fcs) <- Map.toList (startCats pinfo),
- (fc,i) <- zip fcs [1..]]
+ (fc,i) <- zip (range fcs) [1..]]
fcatCat :: FCat -> Cat
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
@@ -53,7 +54,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
-- gets the number of fields in the lincat for the given category
catLinArity :: FCat -> Int
- catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
+ catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
@@ -69,17 +70,17 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
startRules :: [CFRule]
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,fcs) <- Map.toList (startCats pinfo),
- fc <- fcs, not (isLiteralFCat fc),
+ fc <- range fcs, not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]]
fruleToCFRule :: (FCat,Production) -> [CFRule]
fruleToCFRule (c,FApply funid args) =
- [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
+ [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
| (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, not (containsLiterals row)]
where
- FFun f ps rhs = functions pinfo ! funid
+ FFun f rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = concatMap fsymbolToSymbol . Array.elems
@@ -94,11 +95,10 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
fsymbolToSymbol (FSymKS ts) = map Terminal ts
- fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
- fixProfile row = concatMap positions
+ fixProfile :: Array FPointPos FSymbol -> Int -> Profile
+ fixProfile row i = [k | (k,j) <- nts, j == i]
where
nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
- positions i = [k | (k,j) <- nts, j == i]
getPos (FSymCat j _) = [j]
getPos (FSymLit j _) = [j]