diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
| commit | e9e80fc389365e24d4300d7d5390c7d833a96c50 (patch) | |
| tree | f0b58473adaa670bd8fc52ada419d8cad470ee03 /src-3.0/GF/Compile/GeneratePMCFG.hs | |
| parent | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff) | |
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src-3.0/GF/Compile/GeneratePMCFG.hs')
| -rw-r--r-- | src-3.0/GF/Compile/GeneratePMCFG.hs | 356 |
1 files changed, 0 insertions, 356 deletions
diff --git a/src-3.0/GF/Compile/GeneratePMCFG.hs b/src-3.0/GF/Compile/GeneratePMCFG.hs deleted file mode 100644 index e0343e8d6..000000000 --- a/src-3.0/GF/Compile/GeneratePMCFG.hs +++ /dev/null @@ -1,356 +0,0 @@ -{-# OPTIONS -fbang-patterns #-} ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. --- --- the resulting grammars might be /very large/ --- --- the conversion is only equivalent if the GFC grammar has a context-free backbone. ------------------------------------------------------------------------------ - - -module GF.Compile.GeneratePMCFG - (convertConcrete) where - -import PGF.CId -import PGF.Data -import PGF.Macros --hiding (prt) -import PGF.Parsing.FCFG.Utilities - -import GF.Data.BacktrackM -import GF.Data.SortedList -import GF.Data.Utilities (updateNthM, sortNub) - -import qualified Data.Map as Map -import qualified Data.Set as Set -import qualified Data.List as List -import qualified Data.ByteString.Char8 as BS -import Data.Array -import Data.Maybe -import Control.Monad -import Debug.Trace - ----------------------------------------------------------------------- --- main conversion function - -convertConcrete :: Abstr -> Concr -> FGrammar -convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' - where abs_defs = Map.assocs (funs abs) - conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" - cats = lincats cnc - (abs_defs',conc',cats') = expandHOAS abs_defs conc cats - -expandHOAS :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ([(CId,(Type,Expr))],TermMap,TermMap) -expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, - Map.unions [lins, hoLins, varLins], - Map.unions [lincats, hoLincats, varLincat]) - where - -- replace higher-order fun argument types with new categories - funs' = [(f,(fixType ty,e)) | (f,(ty,e)) <- funs] - where - fixType :: Type -> Type - fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt - - hoTypes :: [(Int,CId)] - hoTypes = sortNub [(n,c) | (_,(ty,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0] - hoCats = sortNub (map snd hoTypes) - -- for each Cat with N bindings, we add a new category _NCat - -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat - hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),EEq [])) | ty@(n,c) <- hoTypes] - -- lincats for the new categories - hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes] - -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ... - hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes] - where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c) - -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat - varFuns = [(varFunName cat, (cftype [varCat] cat,EEq [])) | cat <- hoCats] - -- linearizations of the _Var_Cat functions - varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats] - -- lincat for the _Var category - varLincat = Map.singleton varCat (R [S []]) - - lincatOf c = fromMaybe (error $ "No lincat for " ++ prCId c) $ Map.lookup c lincats - - modifyRec :: ([Term] -> [Term]) -> Term -> Term - modifyRec f (R xs) = R (f xs) - modifyRec _ t = error $ "Not a record: " ++ show t - - varCat = mkCId "_Var" - - catName :: (Int,CId) -> CId - catName (0,c) = c - catName (n,c) = mkCId ("_" ++ show n ++ prCId c) - - funName :: (Int,CId) -> CId - funName (n,c) = mkCId ("__" ++ show n ++ prCId c) - - varFunName :: CId -> CId - varFunName c = mkCId ("_Var_" ++ prCId c) - --- replaces __NCat with _B and _Var_Cat with _. --- the temporary names are just there to avoid name collisions. -fixHoasFuns :: FGrammar -> FGrammar -fixHoasFuns (!rs, !cs) = ([FRule (fixName n) ps args cat lins | FRule n ps args cat lins <- rs], cs) - where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") - | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId - fixName n = n - -convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> FGrammar -convert abs_defs cnc_defs cat_defs = getFGrammar (List.foldl' (convertRule cnc_defs) emptyFRulesEnv srules) - where - srules = [ - (XRule id args res (map findLinType args) (findLinType res) term) | - (id, (ty,_)) <- abs_defs, let (args,res) = catSkeleton ty, - term <- Map.lookup id cnc_defs] - - findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) - - -convertRule :: TermMap -> FRulesEnv -> XRule -> FRulesEnv -convertRule cnc_defs frulesEnv (XRule fun args cat ctypes ctype term) = - foldBM addRule - frulesEnv - (convertTerm cnc_defs [] ctype term [([],[])]) - (protoFCat cnc_defs cat ctype, zipWith (protoFCat cnc_defs) args ctypes) - where - addRule linRec (newCat', newArgs') env0 = - let (env1, newCat) = genFCatHead env0 newCat' - (env2, newArgs) = List.mapAccumL (genFCatArg cnc_defs) env1 newArgs' - - newLinRec = mkArray (map (mkArray . snd) linRec) - mkArray lst = listArray (0,length lst-1) lst - - rule = FRule fun [] newArgs newCat newLinRec - in addFRule env2 rule - ----------------------------------------------------------------------- --- term conversion - -type CnvMonad a = BacktrackM Env a - -type FPath = [FIndex] -data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] Term -type Env = (ProtoFCat, [ProtoFCat]) -type LinRec = [(FPath, [FSymbol])] -data XRule = XRule CId {- function -} - [CId] {- argument types -} - CId {- result type -} - [Term] {- argument lin-types representation -} - Term {- result lin-type representation -} - Term {- body -} - -protoFCat :: TermMap -> CId -> Term -> ProtoFCat -protoFCat cnc_defs cat ctype = PFCat cat (getRCS cnc_defs ctype) [] ctype - -type TermMap = Map.Map CId Term - -convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec -convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins -convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins -convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins -convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p - convertTerm cnc_defs (nr:sel) ctype term lins -convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars - convertTerm cnc_defs sel ctype term lins -convertTerm cnc_defs sel ctype (S ts) ((lbl_path,lin) : lins) = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) ((lbl_path,lin) : lins) (reverse ts) -convertTerm cnc_defs sel ctype (K (KS str)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok str : lin) : lins) -convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) = - do toks <- member (strs:[strs' | Alt strs' _ <- vars]) - return ((lbl_path, map FSymTok toks ++ lin) : lins) -convertTerm cnc_defs sel ctype (F id) lins = do term <- Map.lookup id cnc_defs - convertTerm cnc_defs sel ctype term lins -convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do - ss <- case t of - R ss -> return ss - F f -> do - t <- Map.lookup f cnc_defs - case t of - R ss -> return ss - convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins -convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")") - - -convertArg (R record) nr path lbl_path lin lins = - foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record) -convertArg (C max) nr path lbl_path lin lins = do - index <- member [0..max] - restrictHead lbl_path index - restrictArg nr path index - return lins -convertArg (S _) nr path lbl_path lin lins = do - (_, args) <- readState - let PFCat cat rcs tcs _ = args !! nr - return ((lbl_path, FSymCat (index path rcs 0) nr : lin) : lins) - where - index lbl' (lbl:lbls) idx - | lbl' == lbl = idx - | otherwise = index lbl' lbls $! (idx+1) - - -convertCon (C max) index [] lbl_path lin lins = do - guard (index <= max) - restrictHead lbl_path index - return lins -convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x - -convertRec cnc_defs [] (R ctypes) record lbl_path lin lins = - foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins)) - lins - (zip3 [0..] ctypes record) -convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do - convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins) - - ------------------------------------------------------------- --- eval a term to ground terms - -evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex -evalTerm cnc_defs path (V nr) = do (_, args) <- readState - let PFCat _ _ _ ctype = args !! nr - unifyPType nr (reverse path) (selectTerm path ctype) -evalTerm cnc_defs path (C nr) = return nr -evalTerm cnc_defs path (R record) = case path of - (index:path) -> evalTerm cnc_defs path (record !! index) -evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel - evalTerm cnc_defs (index:path) term -evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path -evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs - evalTerm cnc_defs path term -evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") - -unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex -unifyPType nr path (C max_index) = - do (_, args) <- readState - let PFCat _ _ tcs _ = args !! nr - case lookup path tcs of - Just index -> return index - Nothing -> do index <- member [0..max_index] - restrictArg nr path index - return index -unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007 - -selectTerm :: FPath -> Term -> Term -selectTerm [] term = term -selectTerm (index:path) (R record) = selectTerm path (record !! index) - - ----------------------------------------------------------------------- --- FRulesEnv - -data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule] -type FCatSet = Map.Map CId (Map.Map [(FPath,FIndex)] FCat) - -emptyFRulesEnv = FRulesEnv 0 (ins fcatString (mkCId "String") [] $ - ins fcatInt (mkCId "Int") [] $ - ins fcatFloat (mkCId "Float") [] $ - ins fcatVar (mkCId "_Var") [] $ - Map.empty) [] - where - ins fcat cat tcs fcatSet = - Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet - where - tmap_s = Map.singleton tcs fcat - -addFRule :: FRulesEnv -> FRule -> FRulesEnv -addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules) - -getFGrammar :: FRulesEnv -> FGrammar -getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map Map.elems fcatSet) - -genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) -genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs _) = - case Map.lookup cat fcatSet >>= Map.lookup tcs of - Just fcat -> (env, fcat) - Nothing -> let fcat = last_id+1 - in (FRulesEnv fcat (ins fcat) rules, fcat) - where - ins fcat = Map.insertWith (\_ -> Map.insert tcs fcat) cat tmap_s fcatSet - where - tmap_s = Map.singleton tcs fcat - -genFCatArg :: TermMap -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) -genFCatArg cnc_defs env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs ctype) = - case Map.lookup cat fcatSet of - Just tmap -> case Map.lookup tcs tmap of - Just fcat -> (env, fcat) - Nothing -> ins tmap - Nothing -> ins Map.empty - where - ins tmap = - let fcat = last_id+1 - (last_id1,tmap1,rules1) - = foldBM (\tcs st (last_id,tmap,rules) -> - let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap - rule = FRule wildCId [[0]] [fcat_arg] fcat - (listArray (0,length rcs-1) [listArray (0,0) [FSymCat lbl 0] | lbl <- [0..length rcs-1]]) - in if st - then (last_id1,tmap1,rule:rules) - else (last_id, tmap, rules)) - (fcat,Map.insert tcs fcat tmap,rules) - (gen_tcs ctype [] []) - False - in (FRulesEnv last_id1 (Map.insert cat tmap1 fcatSet) rules1, fcat) - where - addArg tcs last_id tmap = - case Map.lookup tcs tmap of - Just fcat -> (last_id, tmap, fcat) - Nothing -> let fcat = last_id+1 - in (fcat, Map.insert tcs fcat tmap, fcat) - - gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] - gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) - gen_tcs (S _) path acc = return acc - gen_tcs (C max_index) path acc = - case List.lookup path tcs of - Just index -> return $! addConstraint path index acc - Nothing -> do writeState True - index <- member [0..max_index] - return $! addConstraint path index acc - where - addConstraint path0 index0 (c@(path,index) : cs) - | path0 > path = c:addConstraint path0 index0 cs - addConstraint path0 index0 cs = (path0,index0) : cs - gen_tcs (F id) path acc = case Map.lookup id cnc_defs of - Just term -> gen_tcs term path acc - Nothing -> error ("unknown identifier: "++prCId id) - - -getRCS :: TermMap -> Term -> [FPath] -getRCS cnc_defs = loop [] [] - where - loop path rcs (R record) = List.foldl' (\rcs (index,term) -> loop (index:path) rcs term) rcs (zip [0..] record) - loop path rcs (C i) = rcs - loop path rcs (S _) = path:rcs - loop path rcs (F id) = case Map.lookup id cnc_defs of - Just term -> loop path rcs term - Nothing -> error ("unknown identifier: "++show id) - ------------------------------------------------------------- --- updating the MCF rule - -restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () -restrictArg nr path index = do - (head, args) <- readState - args' <- updateNthM (restrictProtoFCat path index) nr args - writeState (head, args') - -restrictHead :: FPath -> FIndex -> CnvMonad () -restrictHead path term - = do (head, args) <- readState - head' <- restrictProtoFCat path term head - writeState (head', args) - -restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat -restrictProtoFCat path0 index0 (PFCat cat rcs tcs ctype) = do - tcs <- addConstraint tcs - return (PFCat cat rcs tcs ctype) - where - addConstraint (c@(path,index) : cs) - | path0 > path = liftM (c:) (addConstraint cs) - | path0 == path = guard (index0 == index) >> - return (c : cs) - addConstraint cs = return ((path0,index0) : cs) |
