summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Compile/GeneratePMCFG.hs
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2010-06-18 12:55:58 +0000
committerkrasimir <krasimir@chalmers.se>2010-06-18 12:55:58 +0000
commit992a7ffb381190ffa67f59f33d0dfadf41f84e78 (patch)
treef76a7b6120f4bcc92b41a17651efb51717c8f7bb /src/compiler/GF/Compile/GeneratePMCFG.hs
parent5dfc9bbc0b87d27b4ef8848a36520605fa868fe3 (diff)
Yay!! Direct generation of PMCFG from GF grammar
Diffstat (limited to 'src/compiler/GF/Compile/GeneratePMCFG.hs')
-rw-r--r--src/compiler/GF/Compile/GeneratePMCFG.hs660
1 files changed, 411 insertions, 249 deletions
diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs
index a735b7adc..b0f566cea 100644
--- a/src/compiler/GF/Compile/GeneratePMCFG.hs
+++ b/src/compiler/GF/Compile/GeneratePMCFG.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses #-}
+{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses, PatternGuards #-}
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
@@ -13,11 +13,15 @@ module GF.Compile.GeneratePMCFG
(convertConcrete) where
import PGF.CId
-import PGF.Data
-import PGF.Macros
+import PGF.Data hiding (Type)
import GF.Infra.Option
+import GF.Grammar hiding (Env, mkRecord, mkTable)
+import qualified GF.Infra.Modules as M
+import GF.Grammar.Lookup
+import GF.Grammar.Predef
import GF.Data.BacktrackM
+import GF.Data.Operations
import GF.Data.Utilities (updateNthM, updateNth, sortNub)
import System.IO
@@ -26,36 +30,52 @@ import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS
+import Text.PrettyPrint hiding (Str)
import Data.Array.IArray
import Data.Maybe
+import Data.Char (isDigit)
import Control.Monad
+import Control.Monad.Identity
import Control.Exception
----------------------------------------------------------------------
-- main conversion function
---convertConcrete :: Options -> Abstr -> CId -> Concr -> IO Concr
-convertConcrete opts lang flags printnames abs_defs cnc_defs lincats params lin_defs = do
- let env0 = emptyGrammarEnv cat_defs params
+convertConcrete :: Options -> SourceGrammar -> SourceModule -> SourceModule -> IO Concr
+convertConcrete opts gr am cm = do
+ let env0 = emptyGrammarEnv gr cm
when (flag optProf opts) $ do
- profileGrammar lang env0 pfrules
- env1 <- expandHOAS opts abs_defs cat_defs lin_defs env0
- env2 <- foldM (convertRule opts) env1 pfrules
- return $ getParserInfo flags printnames env2
+ profileGrammar cm env0 pfrules
+ env1 <- expandHOAS opts cm env0
+ env2 <- foldM (convertRule gr opts) env1 pfrules
+ return $ getConcr flags printnames env2
where
- cat_defs = Map.insert cidVar (S []) lincats
+ (m,mo) = cm
pfrules = [
- (PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
- (id, (ty,_,_)) <- Map.toList abs_defs, let (args,res) = typeSkeleton ty,
- term <- maybeToList (Map.lookup id cnc_defs)]
-
- findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
+ (PFRule id args (0,res) (map (\(_,_,ty) -> ty) cont) val term) |
+ (id,GF.Grammar.CncFun (Just (cat,cont,val)) (Just (L _ term)) _) <- Map.toList (M.jments mo),
+ let (args,res) = err error typeSkeleton (lookupFunType gr (fst am) id)]
+
+ flags = Map.fromList [(mkCId f,LStr x) | (f,x) <- optionsPGF (M.flags mo)]
+
+ printnames = Map.fromAscList [(i2i id, name) | (id,info) <- Map.toList (M.jments mo), name <- prn info]
+ where
+ prn (GF.Grammar.CncFun _ _ (Just (L _ tr))) = [flatten tr]
+ prn (GF.Grammar.CncCat _ _ (Just (L _ tr))) = [flatten tr]
+ prn _ = []
+
+ flatten (K s) = s
+ flatten (Alts x _) = flatten x
+ flatten (C x y) = flatten x +++ flatten y
-profileGrammar lang (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do
+i2i :: Ident -> CId
+i2i = CId . ident2bs
+
+profileGrammar (m,mo) env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do
hPutStrLn stderr ""
- hPutStrLn stderr ("Language: " ++ show lang)
+ hPutStrLn stderr ("Language: " ++ showIdent m)
hPutStrLn stderr ""
hPutStrLn stderr "Categories Count"
hPutStrLn stderr "--------------------------------"
@@ -69,22 +89,52 @@ profileGrammar lang (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfr
mapM_ profileRule pfrules
hPutStrLn stderr "--------------------------------"
where
- profileCat (cid,(fcat1,fcat2,_,_)) = do
- hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1))
+ profileCat (cid,(fcat1,fcat2,_)) = do
+ hPutStrLn stderr (lformat 23 (showIdent cid) ++ rformat 9 (show (fcat2-fcat1+1)))
profileRule (PFRule fun args res ctypes ctype term) = do
- let pargs = zipWith protoFCat args ctypes
- hPutStrLn stderr (lformat 23 fun ++ rformat 9 (product [length xs | PFCat _ _ _ tcs <- pargs, (_,xs) <- tcs]))
-
- lformat :: Show a => Int -> a -> String
- lformat n x = s ++ replicate (n-length s) ' '
+ let pargs = map (protoFCat env) args
+ hPutStrLn stderr (lformat 23 (showIdent fun) ++ rformat 9 (show (product (map (catFactor env) args))))
where
- s = show x
+ catFactor (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,(_,cat)) =
+ case IntMap.lookup n catSet >>= Map.lookup cat of
+ Just (s,e,_) -> e-s+1
+ Nothing -> 0
+
+ lformat :: Int -> String -> String
+ lformat n s = s ++ replicate (n-length s) ' '
+
+ rformat :: Int -> String -> String
+ rformat n s = replicate (n-length s) ' ' ++ s
+
+data ProtoFRule = PFRule Ident {- function -}
+ [(Int,Cat)] {- argument types: context size and category -}
+ (Int,Cat) {- result type : context size (always 0) and category -}
+ [Type] {- argument lin-types representation -}
+ Type {- result lin-type representation -}
+ Term {- body -}
+
+convertRule :: SourceGrammar -> Options -> GrammarEnv -> ProtoFRule -> IO GrammarEnv
+convertRule gr opts grammarEnv (PFRule fun args res ctypes ctype term) = do
+ let pres = protoFCat grammarEnv res
+ pargs = map (protoFCat grammarEnv) args
+
+ b = runCnvMonad gr (unfactor term >>= convertTerm CNil ctype) (pargs,[])
+ (grammarEnv1,b1) = addSequencesB grammarEnv b
+ grammarEnv2 = brk (\grammarEnv -> foldBM addRule
+ grammarEnv
+ (goB b1 CNil [])
+ (pres,pargs) ) grammarEnv1
+ when (verbAtLeast opts Verbose) $ hPutStrLn stderr ("+ "++showIdent fun)
+ return $! grammarEnv2
+ where
+ addRule lins (newCat', newArgs') env0 =
+ let [newCat] = getFCatsX env0 newCat'
+ (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
- rformat :: Show a => Int -> a -> String
- rformat n x = replicate (n-length s) ' ' ++ s
- where
- s = show x
+ (env2,funid) = addCncFun env1 (PGF.Data.CncFun (i2i fun) (mkArray lins))
+
+ in addProduction env2 newCat (PApply funid newArgs)
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
@@ -103,141 +153,245 @@ brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
count = length xs
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
-convertRule :: Options -> GrammarEnv -> ProtoFRule -> IO GrammarEnv
-convertRule opts grammarEnv (PFRule fun args res ctypes ctype term) = do
- let pres = protoFCat res ctype
- pargs = zipWith protoFCat args ctypes
-
- b = runBranchM (convertTerm [] ctype term) (pargs,[])
- (grammarEnv1,b1) = addSequences' grammarEnv b
- grammarEnv2 = brk (\grammarEnv -> foldBM addRule
- grammarEnv
- (go' b1 [] [])
- (pres,pargs) ) grammarEnv1
- when (verbAtLeast opts Verbose) $ hPutStrLn stderr ("+ "++showCId fun)
- return $! grammarEnv2
+unfactor :: Term -> CnvMonad Term
+unfactor t = CM (\gr c -> c (unfac gr t))
where
- addRule lins (newCat', newArgs') env0 =
- let [newCat] = getFCats env0 newCat'
- (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
-
- (env2,funid) = addCncFun env1 (CncFun fun (mkArray lins))
-
- in addProduction env2 newCat (PApply funid newArgs)
+ unfac gr t =
+ case t of
+ T (TTyped ty) [(PV x,u)] -> V ty [restore x v (unfac gr u) | v <- err error id (allParamValues gr ty)]
+ _ -> composSafeOp (unfac gr) t
+ where
+ restore x u t = case t of
+ Vr y | y == x -> u
+ _ -> composSafeOp (restore x u) t
----------------------------------------------------------------------
--- Branch monad
-
-newtype BranchM a = BM (forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b) -> ([ProtoFCat],[Symbol]) -> Branch b)
-
-instance Monad BranchM where
- return a = BM (\c s -> c a s)
- BM m >>= k = BM (\c s -> m (\a s -> unBM (k a) c s) s)
- where unBM (BM m) = m
-
-instance MonadState ([ProtoFCat],[Symbol]) BranchM where
- get = BM (\c s -> c s s)
- put s = BM (\c _ -> c () s)
+-- CnvMonad monad
+--
+-- The branching monad provides backtracking together with
+-- recording of the choices made. We have two cases
+-- when we have alternative choices:
+--
+-- * when we have parameter type, then
+-- we have to try all possible values
+-- * when we have variants we have to try all alternatives
+--
+-- The conversion monad keeps track of the choices and they are
+-- returned as 'Branch' data type.
-instance Functor BranchM where
- fmap f (BM m) = BM (\c s -> m (c . f) s)
+data Branch a
+ = Case Int Path [(Term,Branch a)]
+ | Variant [Branch a]
+ | Return a
-runBranchM :: BranchM (Value a) -> ([ProtoFCat],[Symbol]) -> Branch a
-runBranchM (BM m) s = m (\v s -> Return v) s
+newtype CnvMonad a = CM {unCM :: SourceGrammar
+ -> forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b)
+ -> ([ProtoFCat],[Symbol])
+ -> Branch b}
-variants :: [a] -> BranchM a
-variants xs = BM (\c s -> Variant [c x s | x <- xs])
+instance Monad CnvMonad where
+ return a = CM (\gr c s -> c a s)
+ CM m >>= k = CM (\gr c s -> m gr (\a s -> unCM (k a) gr c s) s)
-choices :: Int -> FPath -> BranchM LIndex
-choices nr path = BM (\c s -> let (args,_) = s
- PFCat _ _ _ tcs = args !! nr
- in case fromMaybe (error "evalTerm: wrong path") (lookup path tcs) of
- [index] -> c index s
- indices -> Case nr path [c i (updateEnv i s) | i <- indices])
- where
- updateEnv index (args,seq) = (updateNth (restrictArg path index) nr args,seq)
+instance MonadState ([ProtoFCat],[Symbol]) CnvMonad where
+ get = CM (\gr c s -> c s s)
+ put s = CM (\gr c _ -> c () s)
- restrictArg path index (PFCat n cat rcs tcs) = PFCat n cat rcs (addConstraint path index tcs)
+instance Functor CnvMonad where
+ fmap f (CM m) = CM (\gr c s -> m gr (c . f) s)
- addConstraint path0 index0 [] = error "restrictProtoFCat: unknown path"
- addConstraint path0 index0 (c@(path,indices) : tcs)
- | path0 == path = ((path,[index0]) : tcs)
- | otherwise = c : addConstraint path0 index0 tcs
+runCnvMonad :: SourceGrammar -> CnvMonad a -> ([ProtoFCat],[Symbol]) -> Branch a
+runCnvMonad gr (CM m) s = m gr (\v s -> Return v) s
-mkRecord :: [BranchM (Value a)] -> BranchM (Value a)
-mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs [])
+-- | backtracking for all variants
+variants :: [a] -> CnvMonad a
+variants xs = CM (\gr c s -> Variant [c x s | x <- xs])
+-- | backtracking for all parameter values that a variable could take
+choices :: Int -> Path -> CnvMonad Term
+choices nr path = do (args,_) <- get
+ let PFCat _ _ schema = args !! nr
+ descend schema path CNil
+ where
+ descend (CRec rs) (CProj lbl path) rpath = case lookup lbl rs of
+ Just (Identity t) -> descend t path (CProj lbl rpath)
+ descend (CRec rs) CNil rpath = do rs <- mapM (\(lbl,Identity t) -> fmap (assign lbl) (descend t CNil (CProj lbl rpath))) rs
+ return (R rs)
+ descend (CTbl pt cs) (CSel trm path) rpath = case lookup trm cs of
+ Just (Identity t) -> descend t path (CSel trm rpath)
+ descend (CTbl pt cs) CNil rpath = do cs <- mapM (\(trm,Identity t) -> descend t CNil (CSel trm rpath)) cs
+ return (V pt cs)
+ descend (CPar (m,vs)) CNil rpath = case vs of
+ [(value,index)] -> return value
+ values -> let path = reversePath rpath
+ in CM (\gr c s -> Case nr path [(value, updateEnv path value gr c s)
+ | (value,index) <- values])
+
+ updateEnv path value gr c (args,seq) =
+ case updateNthM (restrictProtoFCat path value) nr args of
+ Just args -> c value (args,seq)
+ Nothing -> error "conflict in updateEnv"
+
+-- | the argument should be a parameter type and then
+-- the function returns all possible values.
+getAllParamValues :: Type -> CnvMonad [Term]
+getAllParamValues ty = CM (\gr c -> c (err error id (allParamValues gr ty)))
+
+mkRecord :: [(Label,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c)
+mkRecord xs = CM (\gr c -> foldl (\c (lbl,CM m) bs s -> c ((lbl,m gr (\v s -> Return v) s) : bs) s) (c . CRec) xs [])
+
+mkTable :: Type -> [(Term ,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c)
+mkTable pt xs = CM (\gr c -> foldl (\c (trm,CM m) bs s -> c ((trm,m gr (\v s -> Return v) s) : bs) s) (c . CTbl pt) xs [])
----------------------------------------------------------------------
--- term conversion
-
-type CnvMonad a = BranchM a
-
-type FPath = [LIndex]
-data ProtoFCat = PFCat Int CId [FPath] [(FPath,[LIndex])]
+-- Term Schema
+--
+-- The term schema is a term-like structure, with records, tables,
+-- strings and parameters values, but in addition we could add
+-- annotations of arbitrary types
+
+-- | Term schema
+data Schema b s c
+ = CRec [(Label,b (Schema b s c))]
+ | CTbl Type [(Term, b (Schema b s c))]
+ | CStr s
+ | CPar c
+
+-- | Path into a term or term schema
+data Path
+ = CProj Label Path
+ | CSel Term Path
+ | CNil
+ deriving (Eq,Show)
+
+-- | The ProtoFCat represents a linearization type as term schema.
+-- The annotations are as follows: the strings are annotated with
+-- their index in the PMCFG tuple, the parameters are annotated
+-- with their value both as term and as index.
+data ProtoFCat = PFCat Int Ident (Schema Identity Int (Int,[(Term,Int)]))
type Env = (ProtoFCat, [ProtoFCat])
-data ProtoFRule = PFRule CId {- function -}
- [(Int,CId)] {- argument types: context size and category -}
- (Int,CId) {- result type : context size (always 0) and category -}
- [Term] {- argument lin-types representation -}
- Term {- result lin-type representation -}
- Term {- body -}
-type TermMap = Map.Map CId Term
-
-
-protoFCat :: (Int,CId) -> Term -> ProtoFCat
-protoFCat (n,cat) ctype =
- let (rcs,tcs) = loop [] [] [] ctype'
- in PFCat n cat rcs tcs
- where
- ctype' -- extend the high-order linearization type
- | n > 0 = case ctype of
- R xs -> R (xs ++ replicate n (S []))
- _ -> error $ "Not a record: " ++ show ctype
- | otherwise = ctype
-
- loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
- loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
- loop path rcs tcs (S _) = (path:rcs, tcs)
-data Branch a
- = Case Int FPath [Branch a]
- | Variant [Branch a]
- | Return (Value a)
-
-data Value a
- = Rec [Branch a]
- | Str a
- | Con LIndex
+protoFCat :: GrammarEnv -> (Int,Cat) -> ProtoFCat
+protoFCat (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,(_,cat)) =
+ case IntMap.lookup n catSet >>= Map.lookup cat of
+ Just (_,_,pfcat) -> pfcat
+ Nothing -> error "unknown category"
+
+ppPath (CProj lbl path) = ppLabel lbl <+> ppPath path
+ppPath (CSel trm path) = ppTerm Unqualified 5 trm <+> ppPath path
+ppPath CNil = empty
+reversePath path = rev CNil path
+ where
+ rev path0 CNil = path0
+ rev path0 (CProj lbl path) = rev (CProj lbl path0) path
+ rev path0 (CSel trm path) = rev (CSel trm path0) path
-go' :: Branch SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
-go' (Case nr path_ bs) path ss = do (index,b) <- member (zip [0..] bs)
- restrictArg nr path_ index
- go' b path ss
-go' (Variant bs) path ss = do b <- member bs
- go' b path ss
-go' (Return v) path ss = go v path ss
-go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
-go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs))
-go (Str seqid) path ss = return (seqid : ss)
-go (Con i) path ss = restrictHead path i >> return ss
+----------------------------------------------------------------------
+-- term conversion
-addSequences' :: GrammarEnv -> Branch [Symbol] -> (GrammarEnv, Branch SeqId)
-addSequences' env (Case nr path bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs
+type Value a = Schema Branch a Term
+
+convertTerm :: Path -> Type -> Term -> CnvMonad (Value [Symbol])
+convertTerm sel ctype (Vr x) = convertArg ctype (getVarIndex x) (reversePath sel)
+convertTerm sel ctype (Abs _ _ t) = convertTerm sel ctype t -- there are only top-level abstractions and we ignore them !!!
+convertTerm sel ctype (R record) = convertRec sel ctype record
+convertTerm sel ctype (P term l) = convertTerm (CProj l sel) ctype term
+convertTerm sel ctype (V pt ts) = convertTbl sel ctype pt ts
+convertTerm sel ctype (S term p) = do v <- evalTerm CNil p
+ convertTerm (CSel v sel) ctype term
+convertTerm sel ctype (FV vars) = do term <- variants vars
+ convertTerm sel ctype term
+convertTerm sel ctype (C t1 t2) = do v1 <- convertTerm sel ctype t1
+ v2 <- convertTerm sel ctype t2
+ return (CStr (concat [s | CStr s <- [v1,v2]]))
+convertTerm sel ctype (K t) = return (CStr [SymKS [t]])
+convertTerm sel ctype Empty = return (CStr [])
+convertTerm sel ctype (Alts s alts)
+ = return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]])
+ where
+ strings (K s) = [s]
+ strings (C u v) = strings u ++ strings v
+ strings (Strs ss) = concatMap strings ss
+convertTerm CNil ctype t = do v <- evalTerm CNil t
+ return (CPar v)
+convertTerm _ _ t = error (render (text "convertTerm" <+> parens (ppTerm Unqualified 0 t)))
+
+convertArg :: Term -> Int -> Path -> CnvMonad (Value [Symbol])
+convertArg (RecType rs) nr path =
+ mkRecord (map (\(lbl,ctype) -> (lbl,convertArg ctype nr (CProj lbl path))) rs)
+convertArg (Table pt vt) nr path = do
+ vs <- getAllParamValues pt
+ mkTable pt (map (\v -> (v,convertArg vt nr (CSel v path))) vs)
+convertArg (Sort _) nr path = do
+ (args,_) <- get
+ let PFCat _ cat schema = args !! nr
+ l = index (reversePath path) schema
+ sym | isLiteralCat cat = SymLit nr l
+ | otherwise = SymCat nr l
+ return (CStr [sym])
+ where
+ index (CProj lbl path) (CRec rs) = case lookup lbl rs of
+ Just (Identity t) -> index path t
+ index (CSel trm path) (CTbl _ rs) = case lookup trm rs of
+ Just (Identity t) -> index path t
+ index CNil (CStr idx) = idx
+convertArg ty nr path = do
+ value <- choices nr (reversePath path)
+ return (CPar value)
+
+convertRec CNil (RecType rs) record =
+ mkRecord (map (\(lbl,ctype) -> (lbl,convertTerm CNil ctype (projectRec lbl record))) rs)
+convertRec (CProj lbl path) ctype record =
+ convertTerm path ctype (projectRec lbl record)
+convertRec _ ctype _ = error ("convertRec: "++show ctype)
+
+convertTbl CNil (Table _ vt) pt ts = do
+ vs <- getAllParamValues pt
+ mkTable pt (zipWith (\v t -> (v,convertTerm CNil vt t)) vs ts)
+convertTbl (CSel v sub_sel) ctype pt ts = do
+ vs <- getAllParamValues pt
+ case lookup v (zip vs ts) of
+ Just t -> convertTerm sub_sel ctype t
+ Nothing -> error (render (text "convertTbl:" <+> (text "missing value" <+> ppTerm Unqualified 0 v $$
+ text "among" <+> vcat (map (ppTerm Unqualified 0) vs))))
+convertTbl _ ctype _ _ = error ("convertTbl: "++show ctype)
+
+
+goB :: Branch (Value SeqId) -> Path -> [SeqId] -> BacktrackM Env [SeqId]
+goB (Case nr path bs) rpath ss = do (value,b) <- member bs
+ restrictArg nr path value
+ goB b rpath ss
+goB (Variant bs) rpath ss = do b <- member bs
+ goB b rpath ss
+goB (Return v) rpath ss = goV v rpath ss
+
+goV :: Value SeqId -> Path -> [SeqId] -> BacktrackM Env [SeqId]
+goV (CRec xs) rpath ss = foldM (\ss (lbl,b) -> goB b (CProj lbl rpath) ss) ss (reverse xs)
+goV (CTbl _ xs) rpath ss = foldM (\ss (trm,b) -> goB b (CSel trm rpath) ss) ss (reverse xs)
+goV (CStr seqid) rpath ss = return (seqid : ss)
+goV (CPar t) rpath ss = restrictHead (reversePath rpath) t >> return ss
+
+addSequencesB :: GrammarEnv -> Branch (Value [Symbol]) -> (GrammarEnv, Branch (Value SeqId))
+addSequencesB env (Case nr path bs) = let (env1,bs1) = List.mapAccumL (\env (trm,b) -> let (env',b') = addSequencesB env b
+ in (env',(trm,b'))) env bs
in (env1,Case nr path bs1)
-addSequences' env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs
+addSequencesB env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequencesB env bs
in (env1,Variant bs1)
-addSequences' env (Return v) = let (env1,v1) = addSequences env v
+addSequencesB env (Return v) = let (env1,v1) = addSequencesV env v
in (env1,Return v1)
-addSequences :: GrammarEnv -> Value [Symbol] -> (GrammarEnv, Value SeqId)
-addSequences env (Rec vs) = let (env1,vs1) = List.mapAccumL addSequences' env vs
- in (env1,Rec vs1)
-addSequences env (Str lin) = let (env1,seqid) = addFSeq env (optimizeLin lin)
- in (env1,Str seqid)
-addSequences env (Con i) = (env,Con i)
+addSequencesV :: GrammarEnv -> Value [Symbol] -> (GrammarEnv, Value SeqId)
+addSequencesV env (CRec vs) = let (env1,vs1) = List.mapAccumL (\env (lbl,b) -> let (env',b') = addSequencesB env b
+ in (env',(lbl,b'))) env vs
+ in (env1,CRec vs1)
+addSequencesV env (CTbl pt vs)=let (env1,vs1) = List.mapAccumL (\env (trm,b) -> let (env',b') = addSequencesB env b
+ in (env',(trm,b'))) env vs
+ in (env1,CTbl pt vs1)
+addSequencesV env (CStr lin) = let (env1,seqid) = addFSeq env (optimizeLin lin)
+ in (env1,CStr seqid)
+addSequencesV env (CPar i) = (env,CPar i)
optimizeLin [] = []
@@ -251,98 +405,76 @@ optimizeLin lin@(SymKS _ : _) =
optimizeLin (sym : lin) = sym : optimizeLin lin
-convertTerm :: FPath -> Term -> Term -> CnvMonad (Value [Symbol])
-convertTerm sel ctype (V nr) = convertArg ctype nr (reverse sel)
-convertTerm sel ctype (C nr) = convertCon ctype nr (reverse sel)
-convertTerm sel ctype (R record) = convertRec sel ctype record
-convertTerm sel ctype (P term p) = do nr <- evalTerm [] p
- convertTerm (nr:sel) ctype term
-convertTerm sel ctype (FV vars) = do term <- variants vars
- convertTerm sel ctype term
-convertTerm sel ctype (S ts) = do vs <- mapM (convertTerm sel ctype) ts
- return (Str (concat [s | Str s <- vs]))
-convertTerm sel ctype (K (KS t)) = return (Str [SymKS [t]])
-convertTerm sel ctype (K (KP s v))=return (Str [SymKP s v])
-convertTerm sel ctype (W s t) = do
- ss <- case t of
- R ss -> return ss
- convertRec sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss]
-convertTerm sel ctype x = error ("convertTerm ("++show x++")")
-
-convertArg :: Term -> Int -> FPath -> CnvMonad (Value [Symbol])
-convertArg (R ctypes) nr path = do
- mkRecord (zipWith (\lbl ctype -> convertArg ctype nr (lbl:path)) [0..] ctypes)
-convertArg (C max) nr path = do
- index <- choices nr path
- return (Con index)
-convertArg (S _) nr path = do
- (args,_) <- get
- let PFCat _ cat rcs tcs = args !! nr
- l = index path rcs 0
- sym | isLiteralCat cat = SymLit nr l
- | otherwise = SymCat nr l
- return (Str [sym])
- where
- index lbl' (lbl:lbls) idx
- | lbl' == lbl = idx
- | otherwise = index lbl' lbls $! (idx+1)
-
-convertCon (C max) index [] = return (Con index)
-convertCon x _ _ = fail $ "SimpleToFCFG.convertCon: " ++ show x
-
-convertRec [] (R ctypes) record = do
- mkRecord (zipWith (convertTerm []) ctypes record)
-convertRec (index:sub_sel) ctype record =
- convertTerm sub_sel ctype (record !! index)
-
-
------------------------------------------------------------
-- eval a term to ground terms
-evalTerm :: FPath -> Term -> CnvMonad LIndex
-evalTerm path (V nr) = choices nr (reverse path)
-evalTerm path (C nr) = return nr
-evalTerm path (R record) = case path of
- (index:path) -> evalTerm path (record !! index)
-evalTerm path (P term sel) = do index <- evalTerm [] sel
- evalTerm (index:path) term
+evalTerm :: Path -> Term -> CnvMonad Term
+evalTerm CNil (QC f) = return (QC f)
+evalTerm CNil (App x y) = do x <- evalTerm CNil x
+ y <- evalTerm CNil y
+ return (App x y)
+evalTerm path (Vr x) = choices (getVarIndex x) path
+evalTerm path (R rs) = case path of
+ (CProj lbl path) -> evalTerm path (projectRec lbl rs)
+ CNil -> do rs <- mapM (\(lbl,(_,t)) -> do t <- evalTerm path t
+ return (assign lbl t)) rs
+ return (R rs)
+evalTerm path (P term lbl) = evalTerm (CProj lbl path) term
+evalTerm path (V pt ts) = case path of
+ (CSel trm path) -> do vs <- getAllParamValues pt
+ case lookup trm (zip vs ts) of
+ Just t -> evalTerm path t
+ Nothing -> error "evalTerm: missing value"
+ CNil -> do ts <- mapM (evalTerm path) ts
+ return (V pt ts)
+evalTerm path (S term sel) = do v <- evalTerm CNil sel
+ evalTerm (CSel v path) term
evalTerm path (FV terms) = variants terms >>= evalTerm path
-evalTerm path x = error ("evalTerm ("++show x++")")
+evalTerm path t = error (render (text "evalTerm" <+> parens (ppTerm Unqualified 0 t)))
+getVarIndex (IA _ i) = i
+getVarIndex (IAV _ _ i) = i
+getVarIndex (IC s) | isDigit (BS.last s) = (read . BS.unpack . snd . BS.spanEnd isDigit) s
----------------------------------------------------------------------
-- GrammarEnv
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
-type CatSet = IntMap.IntMap (Map.Map CId (FId,FId,[Int],Array LIndex String))
+type CatSet = IntMap.IntMap (Map.Map Ident (FId,FId,ProtoFCat))
type SeqSet = Map.Map Sequence SeqId
type FunSet = Map.Map CncFun FunId
type CoerceSet= Map.Map [FId] FId
-emptyGrammarEnv lincats params =
+emptyGrammarEnv gr (m,mo) =
let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
where
- computeCatRange index cat ctype
- | cat == cidString = (index, (fcatString,fcatString,[],listArray (0,0) ["s"]))
- | cat == cidInt = (index, (fcatInt, fcatInt, [],listArray (0,0) ["s"]))
- | cat == cidFloat = (index, (fcatFloat, fcatFloat, [],listArray (0,0) ["s"]))
- | cat == cidVar = (index, (fcatVar, fcatVar, [],listArray (0,0) ["s"]))
- | otherwise = (index+size,(index,index+size-1, poly,maybe (error "missing params") (mkArray . getLabels []) (Map.lookup cat params)))
+ computeCatRange index cat ctype =
+ (index+size,(index,index+size-1,PFCat 0 cat schema))
where
- (size,poly) = getMultipliers 1 [] ctype
+ ((_,size),schema) = compute (0,1) ctype
- getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record
- getMultipliers m ms (S _) = (m,ms)
- getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
-
- getLabels ls (R record) = concat [getLabels (l:ls) t | P (K (KS l)) t <- record]
- getLabels ls (S [FV ps,t]) = concat [getLabels (l:ls) t | K (KS l) <- ps]
- getLabels ls (S []) = [unwords (reverse ls)]
- getLabels ls (FV _) = []
- getLabels _ t = error (show t)
-
-expandHOAS opts abs_defs lincats lindefs env =
+ compute st (RecType rs) = let (st',rs') = List.mapAccumL (\st (lbl,t) -> let (st',t') = compute st t
+ in (st',(lbl,Identity t'))) st rs
+ in (st',CRec rs')
+ compute st (Table pt vt) = let vs = err error id (allParamValues gr pt)
+ (st',cs') = List.mapAccumL (\st v -> let (st',vt') = compute st vt
+ in (st',(v,Identity vt'))) st vs
+ in (st',CTbl pt cs')
+ compute st (Sort s)
+ | s == cStr = let (index,m) = st
+ in ((index+1,m),CStr index)
+ compute st t = let vs = err error id (allParamValues gr t)
+ (index,m) = st
+ in ((index,m*length vs),CPar (m,zip vs [0..]))
+
+ lincats =
+ Map.insert cVar (Sort cStr) $
+ Map.fromAscList
+ [(c, ty) | (c,GF.Grammar.CncCat (Just (L _ ty)) _ _) <- Map.toList (M.jments mo)]
+
+
+expandHOAS opts (m,mo) env = return env {-
foldM add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where
hoTypes :: [(Int,CId)]
@@ -379,10 +511,10 @@ expandHOAS opts abs_defs lincats lindefs env =
add_varFun env cat =
case Map.lookup cat lindefs of
Nothing -> return env
- Just lindef -> convertRule opts env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
+ Just lindef -> convertRule opts env (PFRule _V [(0,cVar)] (0,cat) [arg] res lindef)
where
arg =
- case Map.lookup cidVar lincats of
+ case Map.lookup cVar lincats of
Nothing -> error $ "No lincat for " ++ showCId cat
Just ctype -> ctype
@@ -390,7 +522,7 @@ expandHOAS opts abs_defs lincats lindefs env =
case Map.lookup cat lincats of
Nothing -> error $ "No lincat for " ++ showCId cat
Just ctype -> ctype
-
+-}
addProduction :: GrammarEnv -> FId -> Production -> GrammarEnv
addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
@@ -420,57 +552,87 @@ addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fc
Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
-getParserInfo :: Map.Map CId Literal -> Map.Map CId String -> GrammarEnv -> Concr
-getParserInfo flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
+getConcr :: Map.Map CId Literal -> Map.Map CId String -> GrammarEnv -> Concr
+getConcr flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
Concr { cflags = flags
, printnames = printnames
- , cncfuns = mkArray funSet
- , sequences = mkArray seqSet
+ , cncfuns = mkSetArray funSet
+ , sequences = mkSetArray seqSet
, productions = IntMap.union prodSet coercions
, pproductions = IntMap.empty
, lproductions = Map.empty
- , cnccats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (CncCat start end lbls))) (IntMap.lookup 0 catSet)
+ , cnccats = Map.fromList [(i2i cat,PGF.Data.CncCat start end (mkArray (map (renderStyle style{mode=OneLineMode} . ppPath) (getStrPaths schema))))
+ | (cat,(start,end,PFCat _ _ schema)) <- maybe [] Map.toList (IntMap.lookup 0 catSet)]
, totalCats = last_id+1
}
where
- mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
+ mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
coercions = IntMap.fromList [(fcat,Set.fromList (map PCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
+ getStrPaths :: Schema Identity s c -> [Path]
+ getStrPaths = collect CNil []
+ where
+ collect path paths (CRec rs) = foldr (\(lbl,Identity t) paths -> collect (CProj lbl path) paths t) paths rs
+ collect path paths (CTbl _ cs) = foldr (\(trm,Identity t) paths -> collect (CSel trm path) paths t) paths cs
+ collect path paths (CStr _) = reversePath path : paths
+ collect path paths (CPar _) = paths
+
+
getFCats :: GrammarEnv -> ProtoFCat -> [FId]
-getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
+getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat schema) =
case IntMap.lookup n catSet >>= Map.lookup cat of
- Just (start,end,ms,_) -> reverse (solutions (variants ms tcs start) ())
+ Just (start,end,_) -> reverse (solutions (fmap (start +) $ variants schema) ())
where
- variants _ [] fcat = return fcat
- variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
- variants ms tcs ((m*index) + fcat)
-
+ variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs
+ variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs
+ variants (CStr _) = return 0
+ variants (CPar (m,values)) = do (value,index) <- member values
+ return (m*index)
+
+getFCatsX :: GrammarEnv -> ProtoFCat -> [FId]
+getFCatsX (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat schema) =
+ case IntMap.lookup n catSet >>= Map.lookup cat of
+ Just (start,end,_) -> reverse (solutions (fmap (start +) $ variants schema) ())
+ where
+ variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs
+ variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs
+ variants (CStr _) = return 0
+ variants (CPar (m,values)) = do (value,index) <- member values
+ return (m*index)
------------------------------------------------------------
-- updating the MCF rule
-restrictArg :: LIndex -> FPath -> LIndex -> BacktrackM Env ()
+restrictArg :: LIndex -> Path -> Term -> BacktrackM Env ()
restrictArg nr path index = do
(head, args) <- get
- args' <- updateNthM (restrictProtoFCat path index) nr args
- put (head, args')
-
-restrictHead :: FPath -> LIndex -> BacktrackM Env ()
-restrictHead path term
- = do (head, args) <- get
- head' <- restrictProtoFCat path term head
- put (head', args)
-
-restrictProtoFCat :: FPath -> LIndex -> ProtoFCat -> BacktrackM Env ProtoFCat
-restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do
- tcs <- addConstraint tcs
- return (PFCat n cat rcs tcs)
+ args <- updateNthM (restrictProtoFCat path index) nr args
+ put (head, args)
+
+restrictHead :: Path -> Term -> BacktrackM Env ()
+restrictHead path term = do
+ (head, args) <- get
+ head <- restrictProtoFCat path term head
+ put (head, args)
+
+restrictProtoFCat :: (Functor m, MonadPlus m) => Path -> Term -> ProtoFCat -> m ProtoFCat
+restrictProtoFCat path v (PFCat n cat schema) = do
+ schema <- addConstraint path v schema
+ return (PFCat n cat schema)
where
- addConstraint [] = error "restrictProtoFCat: unknown path"
- addConstraint (c@(path,indices) : tcs)
- | path0 == path = guard (index0 `elem` indices) >>
- return ((path,[index0]) : tcs)
- | otherwise = liftM (c:) (addConstraint tcs)
+ addConstraint (CProj lbl path) v (CRec rs) = fmap CRec $ update lbl (addConstraint path v) rs
+ addConstraint (CSel trm path) v (CTbl pt cs) = fmap (CTbl pt) $ update trm (addConstraint path v) cs
+ addConstraint CNil v (CPar (m,vs)) = case lookup v vs of
+ Just index -> return (CPar (m,[(v,index)]))
+ Nothing -> mzero
+ addConstraint CNil v (CStr _) = error "restrictProtoFCat: string path"
+
+ update k0 f [] = return []
+ update k0 f (x@(k,Identity v):xs)
+ | k0 == k = do v <- f v
+ return ((k,Identity v):xs)
+ | otherwise = do xs <- update k0 f xs
+ return (x:xs)
mkArray lst = listArray (0,length lst-1) lst