From 992a7ffb381190ffa67f59f33d0dfadf41f84e78 Mon Sep 17 00:00:00 2001 From: krasimir Date: Fri, 18 Jun 2010 12:55:58 +0000 Subject: Yay!! Direct generation of PMCFG from GF grammar --- src/compiler/GF/Compile/GeneratePMCFG.hs | 660 +++++++++++++++++++------------ 1 file changed, 411 insertions(+), 249 deletions(-) (limited to 'src/compiler/GF/Compile/GeneratePMCFG.hs') diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index a735b7adc..b0f566cea 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses #-} +{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses, PatternGuards #-} ---------------------------------------------------------------------- -- | -- Maintainer : Krasimir Angelov @@ -13,11 +13,15 @@ module GF.Compile.GeneratePMCFG (convertConcrete) where import PGF.CId -import PGF.Data -import PGF.Macros +import PGF.Data hiding (Type) import GF.Infra.Option +import GF.Grammar hiding (Env, mkRecord, mkTable) +import qualified GF.Infra.Modules as M +import GF.Grammar.Lookup +import GF.Grammar.Predef import GF.Data.BacktrackM +import GF.Data.Operations import GF.Data.Utilities (updateNthM, updateNth, sortNub) import System.IO @@ -26,36 +30,52 @@ import qualified Data.Set as Set import qualified Data.List as List import qualified Data.IntMap as IntMap import qualified Data.ByteString.Char8 as BS +import Text.PrettyPrint hiding (Str) import Data.Array.IArray import Data.Maybe +import Data.Char (isDigit) import Control.Monad +import Control.Monad.Identity import Control.Exception ---------------------------------------------------------------------- -- main conversion function ---convertConcrete :: Options -> Abstr -> CId -> Concr -> IO Concr -convertConcrete opts lang flags printnames abs_defs cnc_defs lincats params lin_defs = do - let env0 = emptyGrammarEnv cat_defs params +convertConcrete :: Options -> SourceGrammar -> SourceModule -> SourceModule -> IO Concr +convertConcrete opts gr am cm = do + let env0 = emptyGrammarEnv gr cm when (flag optProf opts) $ do - profileGrammar lang env0 pfrules - env1 <- expandHOAS opts abs_defs cat_defs lin_defs env0 - env2 <- foldM (convertRule opts) env1 pfrules - return $ getParserInfo flags printnames env2 + profileGrammar cm env0 pfrules + env1 <- expandHOAS opts cm env0 + env2 <- foldM (convertRule gr opts) env1 pfrules + return $ getConcr flags printnames env2 where - cat_defs = Map.insert cidVar (S []) lincats + (m,mo) = cm pfrules = [ - (PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | - (id, (ty,_,_)) <- Map.toList abs_defs, let (args,res) = typeSkeleton ty, - term <- maybeToList (Map.lookup id cnc_defs)] - - findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + (PFRule id args (0,res) (map (\(_,_,ty) -> ty) cont) val term) | + (id,GF.Grammar.CncFun (Just (cat,cont,val)) (Just (L _ term)) _) <- Map.toList (M.jments mo), + let (args,res) = err error typeSkeleton (lookupFunType gr (fst am) id)] + + flags = Map.fromList [(mkCId f,LStr x) | (f,x) <- optionsPGF (M.flags mo)] + + printnames = Map.fromAscList [(i2i id, name) | (id,info) <- Map.toList (M.jments mo), name <- prn info] + where + prn (GF.Grammar.CncFun _ _ (Just (L _ tr))) = [flatten tr] + prn (GF.Grammar.CncCat _ _ (Just (L _ tr))) = [flatten tr] + prn _ = [] + + flatten (K s) = s + flatten (Alts x _) = flatten x + flatten (C x y) = flatten x +++ flatten y -profileGrammar lang (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do +i2i :: Ident -> CId +i2i = CId . ident2bs + +profileGrammar (m,mo) env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do hPutStrLn stderr "" - hPutStrLn stderr ("Language: " ++ show lang) + hPutStrLn stderr ("Language: " ++ showIdent m) hPutStrLn stderr "" hPutStrLn stderr "Categories Count" hPutStrLn stderr "--------------------------------" @@ -69,22 +89,52 @@ profileGrammar lang (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfr mapM_ profileRule pfrules hPutStrLn stderr "--------------------------------" where - profileCat (cid,(fcat1,fcat2,_,_)) = do - hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1)) + profileCat (cid,(fcat1,fcat2,_)) = do + hPutStrLn stderr (lformat 23 (showIdent cid) ++ rformat 9 (show (fcat2-fcat1+1))) profileRule (PFRule fun args res ctypes ctype term) = do - let pargs = zipWith protoFCat args ctypes - hPutStrLn stderr (lformat 23 fun ++ rformat 9 (product [length xs | PFCat _ _ _ tcs <- pargs, (_,xs) <- tcs])) - - lformat :: Show a => Int -> a -> String - lformat n x = s ++ replicate (n-length s) ' ' + let pargs = map (protoFCat env) args + hPutStrLn stderr (lformat 23 (showIdent fun) ++ rformat 9 (show (product (map (catFactor env) args)))) where - s = show x + catFactor (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,(_,cat)) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (s,e,_) -> e-s+1 + Nothing -> 0 + + lformat :: Int -> String -> String + lformat n s = s ++ replicate (n-length s) ' ' + + rformat :: Int -> String -> String + rformat n s = replicate (n-length s) ' ' ++ s + +data ProtoFRule = PFRule Ident {- function -} + [(Int,Cat)] {- argument types: context size and category -} + (Int,Cat) {- result type : context size (always 0) and category -} + [Type] {- argument lin-types representation -} + Type {- result lin-type representation -} + Term {- body -} + +convertRule :: SourceGrammar -> Options -> GrammarEnv -> ProtoFRule -> IO GrammarEnv +convertRule gr opts grammarEnv (PFRule fun args res ctypes ctype term) = do + let pres = protoFCat grammarEnv res + pargs = map (protoFCat grammarEnv) args + + b = runCnvMonad gr (unfactor term >>= convertTerm CNil ctype) (pargs,[]) + (grammarEnv1,b1) = addSequencesB grammarEnv b + grammarEnv2 = brk (\grammarEnv -> foldBM addRule + grammarEnv + (goB b1 CNil []) + (pres,pargs) ) grammarEnv1 + when (verbAtLeast opts Verbose) $ hPutStrLn stderr ("+ "++showIdent fun) + return $! grammarEnv2 + where + addRule lins (newCat', newArgs') env0 = + let [newCat] = getFCatsX env0 newCat' + (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' - rformat :: Show a => Int -> a -> String - rformat n x = replicate (n-length s) ' ' ++ s - where - s = show x + (env2,funid) = addCncFun env1 (PGF.Data.CncFun (i2i fun) (mkArray lins)) + + in addProduction env2 newCat (PApply funid newArgs) brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = @@ -103,141 +153,245 @@ brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = count = length xs ys = foldr (zipWith Set.insert) (repeat Set.empty) xs -convertRule :: Options -> GrammarEnv -> ProtoFRule -> IO GrammarEnv -convertRule opts grammarEnv (PFRule fun args res ctypes ctype term) = do - let pres = protoFCat res ctype - pargs = zipWith protoFCat args ctypes - - b = runBranchM (convertTerm [] ctype term) (pargs,[]) - (grammarEnv1,b1) = addSequences' grammarEnv b - grammarEnv2 = brk (\grammarEnv -> foldBM addRule - grammarEnv - (go' b1 [] []) - (pres,pargs) ) grammarEnv1 - when (verbAtLeast opts Verbose) $ hPutStrLn stderr ("+ "++showCId fun) - return $! grammarEnv2 +unfactor :: Term -> CnvMonad Term +unfactor t = CM (\gr c -> c (unfac gr t)) where - addRule lins (newCat', newArgs') env0 = - let [newCat] = getFCats env0 newCat' - (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' - - (env2,funid) = addCncFun env1 (CncFun fun (mkArray lins)) - - in addProduction env2 newCat (PApply funid newArgs) + unfac gr t = + case t of + T (TTyped ty) [(PV x,u)] -> V ty [restore x v (unfac gr u) | v <- err error id (allParamValues gr ty)] + _ -> composSafeOp (unfac gr) t + where + restore x u t = case t of + Vr y | y == x -> u + _ -> composSafeOp (restore x u) t ---------------------------------------------------------------------- --- Branch monad - -newtype BranchM a = BM (forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b) -> ([ProtoFCat],[Symbol]) -> Branch b) - -instance Monad BranchM where - return a = BM (\c s -> c a s) - BM m >>= k = BM (\c s -> m (\a s -> unBM (k a) c s) s) - where unBM (BM m) = m - -instance MonadState ([ProtoFCat],[Symbol]) BranchM where - get = BM (\c s -> c s s) - put s = BM (\c _ -> c () s) +-- CnvMonad monad +-- +-- The branching monad provides backtracking together with +-- recording of the choices made. We have two cases +-- when we have alternative choices: +-- +-- * when we have parameter type, then +-- we have to try all possible values +-- * when we have variants we have to try all alternatives +-- +-- The conversion monad keeps track of the choices and they are +-- returned as 'Branch' data type. -instance Functor BranchM where - fmap f (BM m) = BM (\c s -> m (c . f) s) +data Branch a + = Case Int Path [(Term,Branch a)] + | Variant [Branch a] + | Return a -runBranchM :: BranchM (Value a) -> ([ProtoFCat],[Symbol]) -> Branch a -runBranchM (BM m) s = m (\v s -> Return v) s +newtype CnvMonad a = CM {unCM :: SourceGrammar + -> forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b) + -> ([ProtoFCat],[Symbol]) + -> Branch b} -variants :: [a] -> BranchM a -variants xs = BM (\c s -> Variant [c x s | x <- xs]) +instance Monad CnvMonad where + return a = CM (\gr c s -> c a s) + CM m >>= k = CM (\gr c s -> m gr (\a s -> unCM (k a) gr c s) s) -choices :: Int -> FPath -> BranchM LIndex -choices nr path = BM (\c s -> let (args,_) = s - PFCat _ _ _ tcs = args !! nr - in case fromMaybe (error "evalTerm: wrong path") (lookup path tcs) of - [index] -> c index s - indices -> Case nr path [c i (updateEnv i s) | i <- indices]) - where - updateEnv index (args,seq) = (updateNth (restrictArg path index) nr args,seq) +instance MonadState ([ProtoFCat],[Symbol]) CnvMonad where + get = CM (\gr c s -> c s s) + put s = CM (\gr c _ -> c () s) - restrictArg path index (PFCat n cat rcs tcs) = PFCat n cat rcs (addConstraint path index tcs) +instance Functor CnvMonad where + fmap f (CM m) = CM (\gr c s -> m gr (c . f) s) - addConstraint path0 index0 [] = error "restrictProtoFCat: unknown path" - addConstraint path0 index0 (c@(path,indices) : tcs) - | path0 == path = ((path,[index0]) : tcs) - | otherwise = c : addConstraint path0 index0 tcs +runCnvMonad :: SourceGrammar -> CnvMonad a -> ([ProtoFCat],[Symbol]) -> Branch a +runCnvMonad gr (CM m) s = m gr (\v s -> Return v) s -mkRecord :: [BranchM (Value a)] -> BranchM (Value a) -mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs []) +-- | backtracking for all variants +variants :: [a] -> CnvMonad a +variants xs = CM (\gr c s -> Variant [c x s | x <- xs]) +-- | backtracking for all parameter values that a variable could take +choices :: Int -> Path -> CnvMonad Term +choices nr path = do (args,_) <- get + let PFCat _ _ schema = args !! nr + descend schema path CNil + where + descend (CRec rs) (CProj lbl path) rpath = case lookup lbl rs of + Just (Identity t) -> descend t path (CProj lbl rpath) + descend (CRec rs) CNil rpath = do rs <- mapM (\(lbl,Identity t) -> fmap (assign lbl) (descend t CNil (CProj lbl rpath))) rs + return (R rs) + descend (CTbl pt cs) (CSel trm path) rpath = case lookup trm cs of + Just (Identity t) -> descend t path (CSel trm rpath) + descend (CTbl pt cs) CNil rpath = do cs <- mapM (\(trm,Identity t) -> descend t CNil (CSel trm rpath)) cs + return (V pt cs) + descend (CPar (m,vs)) CNil rpath = case vs of + [(value,index)] -> return value + values -> let path = reversePath rpath + in CM (\gr c s -> Case nr path [(value, updateEnv path value gr c s) + | (value,index) <- values]) + + updateEnv path value gr c (args,seq) = + case updateNthM (restrictProtoFCat path value) nr args of + Just args -> c value (args,seq) + Nothing -> error "conflict in updateEnv" + +-- | the argument should be a parameter type and then +-- the function returns all possible values. +getAllParamValues :: Type -> CnvMonad [Term] +getAllParamValues ty = CM (\gr c -> c (err error id (allParamValues gr ty))) + +mkRecord :: [(Label,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c) +mkRecord xs = CM (\gr c -> foldl (\c (lbl,CM m) bs s -> c ((lbl,m gr (\v s -> Return v) s) : bs) s) (c . CRec) xs []) + +mkTable :: Type -> [(Term ,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c) +mkTable pt xs = CM (\gr c -> foldl (\c (trm,CM m) bs s -> c ((trm,m gr (\v s -> Return v) s) : bs) s) (c . CTbl pt) xs []) ---------------------------------------------------------------------- --- term conversion - -type CnvMonad a = BranchM a - -type FPath = [LIndex] -data ProtoFCat = PFCat Int CId [FPath] [(FPath,[LIndex])] +-- Term Schema +-- +-- The term schema is a term-like structure, with records, tables, +-- strings and parameters values, but in addition we could add +-- annotations of arbitrary types + +-- | Term schema +data Schema b s c + = CRec [(Label,b (Schema b s c))] + | CTbl Type [(Term, b (Schema b s c))] + | CStr s + | CPar c + +-- | Path into a term or term schema +data Path + = CProj Label Path + | CSel Term Path + | CNil + deriving (Eq,Show) + +-- | The ProtoFCat represents a linearization type as term schema. +-- The annotations are as follows: the strings are annotated with +-- their index in the PMCFG tuple, the parameters are annotated +-- with their value both as term and as index. +data ProtoFCat = PFCat Int Ident (Schema Identity Int (Int,[(Term,Int)])) type Env = (ProtoFCat, [ProtoFCat]) -data ProtoFRule = PFRule CId {- function -} - [(Int,CId)] {- argument types: context size and category -} - (Int,CId) {- result type : context size (always 0) and category -} - [Term] {- argument lin-types representation -} - Term {- result lin-type representation -} - Term {- body -} -type TermMap = Map.Map CId Term - - -protoFCat :: (Int,CId) -> Term -> ProtoFCat -protoFCat (n,cat) ctype = - let (rcs,tcs) = loop [] [] [] ctype' - in PFCat n cat rcs tcs - where - ctype' -- extend the high-order linearization type - | n > 0 = case ctype of - R xs -> R (xs ++ replicate n (S [])) - _ -> error $ "Not a record: " ++ show ctype - | otherwise = ctype - - loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) - loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) - loop path rcs tcs (S _) = (path:rcs, tcs) -data Branch a - = Case Int FPath [Branch a] - | Variant [Branch a] - | Return (Value a) - -data Value a - = Rec [Branch a] - | Str a - | Con LIndex +protoFCat :: GrammarEnv -> (Int,Cat) -> ProtoFCat +protoFCat (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,(_,cat)) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (_,_,pfcat) -> pfcat + Nothing -> error "unknown category" + +ppPath (CProj lbl path) = ppLabel lbl <+> ppPath path +ppPath (CSel trm path) = ppTerm Unqualified 5 trm <+> ppPath path +ppPath CNil = empty +reversePath path = rev CNil path + where + rev path0 CNil = path0 + rev path0 (CProj lbl path) = rev (CProj lbl path0) path + rev path0 (CSel trm path) = rev (CSel trm path0) path -go' :: Branch SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] -go' (Case nr path_ bs) path ss = do (index,b) <- member (zip [0..] bs) - restrictArg nr path_ index - go' b path ss -go' (Variant bs) path ss = do b <- member bs - go' b path ss -go' (Return v) path ss = go v path ss -go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] -go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs)) -go (Str seqid) path ss = return (seqid : ss) -go (Con i) path ss = restrictHead path i >> return ss +---------------------------------------------------------------------- +-- term conversion -addSequences' :: GrammarEnv -> Branch [Symbol] -> (GrammarEnv, Branch SeqId) -addSequences' env (Case nr path bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs +type Value a = Schema Branch a Term + +convertTerm :: Path -> Type -> Term -> CnvMonad (Value [Symbol]) +convertTerm sel ctype (Vr x) = convertArg ctype (getVarIndex x) (reversePath sel) +convertTerm sel ctype (Abs _ _ t) = convertTerm sel ctype t -- there are only top-level abstractions and we ignore them !!! +convertTerm sel ctype (R record) = convertRec sel ctype record +convertTerm sel ctype (P term l) = convertTerm (CProj l sel) ctype term +convertTerm sel ctype (V pt ts) = convertTbl sel ctype pt ts +convertTerm sel ctype (S term p) = do v <- evalTerm CNil p + convertTerm (CSel v sel) ctype term +convertTerm sel ctype (FV vars) = do term <- variants vars + convertTerm sel ctype term +convertTerm sel ctype (C t1 t2) = do v1 <- convertTerm sel ctype t1 + v2 <- convertTerm sel ctype t2 + return (CStr (concat [s | CStr s <- [v1,v2]])) +convertTerm sel ctype (K t) = return (CStr [SymKS [t]]) +convertTerm sel ctype Empty = return (CStr []) +convertTerm sel ctype (Alts s alts) + = return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]]) + where + strings (K s) = [s] + strings (C u v) = strings u ++ strings v + strings (Strs ss) = concatMap strings ss +convertTerm CNil ctype t = do v <- evalTerm CNil t + return (CPar v) +convertTerm _ _ t = error (render (text "convertTerm" <+> parens (ppTerm Unqualified 0 t))) + +convertArg :: Term -> Int -> Path -> CnvMonad (Value [Symbol]) +convertArg (RecType rs) nr path = + mkRecord (map (\(lbl,ctype) -> (lbl,convertArg ctype nr (CProj lbl path))) rs) +convertArg (Table pt vt) nr path = do + vs <- getAllParamValues pt + mkTable pt (map (\v -> (v,convertArg vt nr (CSel v path))) vs) +convertArg (Sort _) nr path = do + (args,_) <- get + let PFCat _ cat schema = args !! nr + l = index (reversePath path) schema + sym | isLiteralCat cat = SymLit nr l + | otherwise = SymCat nr l + return (CStr [sym]) + where + index (CProj lbl path) (CRec rs) = case lookup lbl rs of + Just (Identity t) -> index path t + index (CSel trm path) (CTbl _ rs) = case lookup trm rs of + Just (Identity t) -> index path t + index CNil (CStr idx) = idx +convertArg ty nr path = do + value <- choices nr (reversePath path) + return (CPar value) + +convertRec CNil (RecType rs) record = + mkRecord (map (\(lbl,ctype) -> (lbl,convertTerm CNil ctype (projectRec lbl record))) rs) +convertRec (CProj lbl path) ctype record = + convertTerm path ctype (projectRec lbl record) +convertRec _ ctype _ = error ("convertRec: "++show ctype) + +convertTbl CNil (Table _ vt) pt ts = do + vs <- getAllParamValues pt + mkTable pt (zipWith (\v t -> (v,convertTerm CNil vt t)) vs ts) +convertTbl (CSel v sub_sel) ctype pt ts = do + vs <- getAllParamValues pt + case lookup v (zip vs ts) of + Just t -> convertTerm sub_sel ctype t + Nothing -> error (render (text "convertTbl:" <+> (text "missing value" <+> ppTerm Unqualified 0 v $$ + text "among" <+> vcat (map (ppTerm Unqualified 0) vs)))) +convertTbl _ ctype _ _ = error ("convertTbl: "++show ctype) + + +goB :: Branch (Value SeqId) -> Path -> [SeqId] -> BacktrackM Env [SeqId] +goB (Case nr path bs) rpath ss = do (value,b) <- member bs + restrictArg nr path value + goB b rpath ss +goB (Variant bs) rpath ss = do b <- member bs + goB b rpath ss +goB (Return v) rpath ss = goV v rpath ss + +goV :: Value SeqId -> Path -> [SeqId] -> BacktrackM Env [SeqId] +goV (CRec xs) rpath ss = foldM (\ss (lbl,b) -> goB b (CProj lbl rpath) ss) ss (reverse xs) +goV (CTbl _ xs) rpath ss = foldM (\ss (trm,b) -> goB b (CSel trm rpath) ss) ss (reverse xs) +goV (CStr seqid) rpath ss = return (seqid : ss) +goV (CPar t) rpath ss = restrictHead (reversePath rpath) t >> return ss + +addSequencesB :: GrammarEnv -> Branch (Value [Symbol]) -> (GrammarEnv, Branch (Value SeqId)) +addSequencesB env (Case nr path bs) = let (env1,bs1) = List.mapAccumL (\env (trm,b) -> let (env',b') = addSequencesB env b + in (env',(trm,b'))) env bs in (env1,Case nr path bs1) -addSequences' env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs +addSequencesB env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequencesB env bs in (env1,Variant bs1) -addSequences' env (Return v) = let (env1,v1) = addSequences env v +addSequencesB env (Return v) = let (env1,v1) = addSequencesV env v in (env1,Return v1) -addSequences :: GrammarEnv -> Value [Symbol] -> (GrammarEnv, Value SeqId) -addSequences env (Rec vs) = let (env1,vs1) = List.mapAccumL addSequences' env vs - in (env1,Rec vs1) -addSequences env (Str lin) = let (env1,seqid) = addFSeq env (optimizeLin lin) - in (env1,Str seqid) -addSequences env (Con i) = (env,Con i) +addSequencesV :: GrammarEnv -> Value [Symbol] -> (GrammarEnv, Value SeqId) +addSequencesV env (CRec vs) = let (env1,vs1) = List.mapAccumL (\env (lbl,b) -> let (env',b') = addSequencesB env b + in (env',(lbl,b'))) env vs + in (env1,CRec vs1) +addSequencesV env (CTbl pt vs)=let (env1,vs1) = List.mapAccumL (\env (trm,b) -> let (env',b') = addSequencesB env b + in (env',(trm,b'))) env vs + in (env1,CTbl pt vs1) +addSequencesV env (CStr lin) = let (env1,seqid) = addFSeq env (optimizeLin lin) + in (env1,CStr seqid) +addSequencesV env (CPar i) = (env,CPar i) optimizeLin [] = [] @@ -251,98 +405,76 @@ optimizeLin lin@(SymKS _ : _) = optimizeLin (sym : lin) = sym : optimizeLin lin -convertTerm :: FPath -> Term -> Term -> CnvMonad (Value [Symbol]) -convertTerm sel ctype (V nr) = convertArg ctype nr (reverse sel) -convertTerm sel ctype (C nr) = convertCon ctype nr (reverse sel) -convertTerm sel ctype (R record) = convertRec sel ctype record -convertTerm sel ctype (P term p) = do nr <- evalTerm [] p - convertTerm (nr:sel) ctype term -convertTerm sel ctype (FV vars) = do term <- variants vars - convertTerm sel ctype term -convertTerm sel ctype (S ts) = do vs <- mapM (convertTerm sel ctype) ts - return (Str (concat [s | Str s <- vs])) -convertTerm sel ctype (K (KS t)) = return (Str [SymKS [t]]) -convertTerm sel ctype (K (KP s v))=return (Str [SymKP s v]) -convertTerm sel ctype (W s t) = do - ss <- case t of - R ss -> return ss - convertRec sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] -convertTerm sel ctype x = error ("convertTerm ("++show x++")") - -convertArg :: Term -> Int -> FPath -> CnvMonad (Value [Symbol]) -convertArg (R ctypes) nr path = do - mkRecord (zipWith (\lbl ctype -> convertArg ctype nr (lbl:path)) [0..] ctypes) -convertArg (C max) nr path = do - index <- choices nr path - return (Con index) -convertArg (S _) nr path = do - (args,_) <- get - let PFCat _ cat rcs tcs = args !! nr - l = index path rcs 0 - sym | isLiteralCat cat = SymLit nr l - | otherwise = SymCat nr l - return (Str [sym]) - where - index lbl' (lbl:lbls) idx - | lbl' == lbl = idx - | otherwise = index lbl' lbls $! (idx+1) - -convertCon (C max) index [] = return (Con index) -convertCon x _ _ = fail $ "SimpleToFCFG.convertCon: " ++ show x - -convertRec [] (R ctypes) record = do - mkRecord (zipWith (convertTerm []) ctypes record) -convertRec (index:sub_sel) ctype record = - convertTerm sub_sel ctype (record !! index) - - ------------------------------------------------------------ -- eval a term to ground terms -evalTerm :: FPath -> Term -> CnvMonad LIndex -evalTerm path (V nr) = choices nr (reverse path) -evalTerm path (C nr) = return nr -evalTerm path (R record) = case path of - (index:path) -> evalTerm path (record !! index) -evalTerm path (P term sel) = do index <- evalTerm [] sel - evalTerm (index:path) term +evalTerm :: Path -> Term -> CnvMonad Term +evalTerm CNil (QC f) = return (QC f) +evalTerm CNil (App x y) = do x <- evalTerm CNil x + y <- evalTerm CNil y + return (App x y) +evalTerm path (Vr x) = choices (getVarIndex x) path +evalTerm path (R rs) = case path of + (CProj lbl path) -> evalTerm path (projectRec lbl rs) + CNil -> do rs <- mapM (\(lbl,(_,t)) -> do t <- evalTerm path t + return (assign lbl t)) rs + return (R rs) +evalTerm path (P term lbl) = evalTerm (CProj lbl path) term +evalTerm path (V pt ts) = case path of + (CSel trm path) -> do vs <- getAllParamValues pt + case lookup trm (zip vs ts) of + Just t -> evalTerm path t + Nothing -> error "evalTerm: missing value" + CNil -> do ts <- mapM (evalTerm path) ts + return (V pt ts) +evalTerm path (S term sel) = do v <- evalTerm CNil sel + evalTerm (CSel v path) term evalTerm path (FV terms) = variants terms >>= evalTerm path -evalTerm path x = error ("evalTerm ("++show x++")") +evalTerm path t = error (render (text "evalTerm" <+> parens (ppTerm Unqualified 0 t))) +getVarIndex (IA _ i) = i +getVarIndex (IAV _ _ i) = i +getVarIndex (IC s) | isDigit (BS.last s) = (read . BS.unpack . snd . BS.spanEnd isDigit) s ---------------------------------------------------------------------- -- GrammarEnv data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) -type CatSet = IntMap.IntMap (Map.Map CId (FId,FId,[Int],Array LIndex String)) +type CatSet = IntMap.IntMap (Map.Map Ident (FId,FId,ProtoFCat)) type SeqSet = Map.Map Sequence SeqId type FunSet = Map.Map CncFun FunId type CoerceSet= Map.Map [FId] FId -emptyGrammarEnv lincats params = +emptyGrammarEnv gr (m,mo) = let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty where - computeCatRange index cat ctype - | cat == cidString = (index, (fcatString,fcatString,[],listArray (0,0) ["s"])) - | cat == cidInt = (index, (fcatInt, fcatInt, [],listArray (0,0) ["s"])) - | cat == cidFloat = (index, (fcatFloat, fcatFloat, [],listArray (0,0) ["s"])) - | cat == cidVar = (index, (fcatVar, fcatVar, [],listArray (0,0) ["s"])) - | otherwise = (index+size,(index,index+size-1, poly,maybe (error "missing params") (mkArray . getLabels []) (Map.lookup cat params))) + computeCatRange index cat ctype = + (index+size,(index,index+size-1,PFCat 0 cat schema)) where - (size,poly) = getMultipliers 1 [] ctype + ((_,size),schema) = compute (0,1) ctype - getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record - getMultipliers m ms (S _) = (m,ms) - getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) - - getLabels ls (R record) = concat [getLabels (l:ls) t | P (K (KS l)) t <- record] - getLabels ls (S [FV ps,t]) = concat [getLabels (l:ls) t | K (KS l) <- ps] - getLabels ls (S []) = [unwords (reverse ls)] - getLabels ls (FV _) = [] - getLabels _ t = error (show t) - -expandHOAS opts abs_defs lincats lindefs env = + compute st (RecType rs) = let (st',rs') = List.mapAccumL (\st (lbl,t) -> let (st',t') = compute st t + in (st',(lbl,Identity t'))) st rs + in (st',CRec rs') + compute st (Table pt vt) = let vs = err error id (allParamValues gr pt) + (st',cs') = List.mapAccumL (\st v -> let (st',vt') = compute st vt + in (st',(v,Identity vt'))) st vs + in (st',CTbl pt cs') + compute st (Sort s) + | s == cStr = let (index,m) = st + in ((index+1,m),CStr index) + compute st t = let vs = err error id (allParamValues gr t) + (index,m) = st + in ((index,m*length vs),CPar (m,zip vs [0..])) + + lincats = + Map.insert cVar (Sort cStr) $ + Map.fromAscList + [(c, ty) | (c,GF.Grammar.CncCat (Just (L _ ty)) _ _) <- Map.toList (M.jments mo)] + + +expandHOAS opts (m,mo) env = return env {- foldM add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats) where hoTypes :: [(Int,CId)] @@ -379,10 +511,10 @@ expandHOAS opts abs_defs lincats lindefs env = add_varFun env cat = case Map.lookup cat lindefs of Nothing -> return env - Just lindef -> convertRule opts env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) + Just lindef -> convertRule opts env (PFRule _V [(0,cVar)] (0,cat) [arg] res lindef) where arg = - case Map.lookup cidVar lincats of + case Map.lookup cVar lincats of Nothing -> error $ "No lincat for " ++ showCId cat Just ctype -> ctype @@ -390,7 +522,7 @@ expandHOAS opts abs_defs lincats lindefs env = case Map.lookup cat lincats of Nothing -> error $ "No lincat for " ++ showCId cat Just ctype -> ctype - +-} addProduction :: GrammarEnv -> FId -> Production -> GrammarEnv addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) @@ -420,57 +552,87 @@ addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fc Nothing -> let !fcat = last_id+1 in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) -getParserInfo :: Map.Map CId Literal -> Map.Map CId String -> GrammarEnv -> Concr -getParserInfo flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = +getConcr :: Map.Map CId Literal -> Map.Map CId String -> GrammarEnv -> Concr +getConcr flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = Concr { cflags = flags , printnames = printnames - , cncfuns = mkArray funSet - , sequences = mkArray seqSet + , cncfuns = mkSetArray funSet + , sequences = mkSetArray seqSet , productions = IntMap.union prodSet coercions , pproductions = IntMap.empty , lproductions = Map.empty - , cnccats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (CncCat start end lbls))) (IntMap.lookup 0 catSet) + , cnccats = Map.fromList [(i2i cat,PGF.Data.CncCat start end (mkArray (map (renderStyle style{mode=OneLineMode} . ppPath) (getStrPaths schema)))) + | (cat,(start,end,PFCat _ _ schema)) <- maybe [] Map.toList (IntMap.lookup 0 catSet)] , totalCats = last_id+1 } where - mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] coercions = IntMap.fromList [(fcat,Set.fromList (map PCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] + getStrPaths :: Schema Identity s c -> [Path] + getStrPaths = collect CNil [] + where + collect path paths (CRec rs) = foldr (\(lbl,Identity t) paths -> collect (CProj lbl path) paths t) paths rs + collect path paths (CTbl _ cs) = foldr (\(trm,Identity t) paths -> collect (CSel trm path) paths t) paths cs + collect path paths (CStr _) = reversePath path : paths + collect path paths (CPar _) = paths + + getFCats :: GrammarEnv -> ProtoFCat -> [FId] -getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = +getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat schema) = case IntMap.lookup n catSet >>= Map.lookup cat of - Just (start,end,ms,_) -> reverse (solutions (variants ms tcs start) ()) + Just (start,end,_) -> reverse (solutions (fmap (start +) $ variants schema) ()) where - variants _ [] fcat = return fcat - variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices - variants ms tcs ((m*index) + fcat) - + variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs + variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs + variants (CStr _) = return 0 + variants (CPar (m,values)) = do (value,index) <- member values + return (m*index) + +getFCatsX :: GrammarEnv -> ProtoFCat -> [FId] +getFCatsX (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat schema) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (start,end,_) -> reverse (solutions (fmap (start +) $ variants schema) ()) + where + variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs + variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs + variants (CStr _) = return 0 + variants (CPar (m,values)) = do (value,index) <- member values + return (m*index) ------------------------------------------------------------ -- updating the MCF rule -restrictArg :: LIndex -> FPath -> LIndex -> BacktrackM Env () +restrictArg :: LIndex -> Path -> Term -> BacktrackM Env () restrictArg nr path index = do (head, args) <- get - args' <- updateNthM (restrictProtoFCat path index) nr args - put (head, args') - -restrictHead :: FPath -> LIndex -> BacktrackM Env () -restrictHead path term - = do (head, args) <- get - head' <- restrictProtoFCat path term head - put (head', args) - -restrictProtoFCat :: FPath -> LIndex -> ProtoFCat -> BacktrackM Env ProtoFCat -restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do - tcs <- addConstraint tcs - return (PFCat n cat rcs tcs) + args <- updateNthM (restrictProtoFCat path index) nr args + put (head, args) + +restrictHead :: Path -> Term -> BacktrackM Env () +restrictHead path term = do + (head, args) <- get + head <- restrictProtoFCat path term head + put (head, args) + +restrictProtoFCat :: (Functor m, MonadPlus m) => Path -> Term -> ProtoFCat -> m ProtoFCat +restrictProtoFCat path v (PFCat n cat schema) = do + schema <- addConstraint path v schema + return (PFCat n cat schema) where - addConstraint [] = error "restrictProtoFCat: unknown path" - addConstraint (c@(path,indices) : tcs) - | path0 == path = guard (index0 `elem` indices) >> - return ((path,[index0]) : tcs) - | otherwise = liftM (c:) (addConstraint tcs) + addConstraint (CProj lbl path) v (CRec rs) = fmap CRec $ update lbl (addConstraint path v) rs + addConstraint (CSel trm path) v (CTbl pt cs) = fmap (CTbl pt) $ update trm (addConstraint path v) cs + addConstraint CNil v (CPar (m,vs)) = case lookup v vs of + Just index -> return (CPar (m,[(v,index)])) + Nothing -> mzero + addConstraint CNil v (CStr _) = error "restrictProtoFCat: string path" + + update k0 f [] = return [] + update k0 f (x@(k,Identity v):xs) + | k0 == k = do v <- f v + return ((k,Identity v):xs) + | otherwise = do xs <- update k0 f xs + return (x:xs) mkArray lst = listArray (0,length lst-1) lst -- cgit v1.2.3