diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/compiler/GF/Compile | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/compiler/GF/Compile')
26 files changed, 6581 insertions, 0 deletions
diff --git a/src/compiler/GF/Compile/Abstract/Compute.hs b/src/compiler/GF/Compile/Abstract/Compute.hs new file mode 100644 index 000000000..d5c9a163c --- /dev/null +++ b/src/compiler/GF/Compile/Abstract/Compute.hs @@ -0,0 +1,138 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Compile.Abstract.Compute +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/02 20:50:19 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.8 $ +-- +-- computation in abstract syntax w.r.t. explicit definitions. +-- +-- old GF computation; to be updated +----------------------------------------------------------------------------- + +module GF.Compile.Abstract.Compute (LookDef, + compute, + computeAbsTerm, + computeAbsTermIn, + beta + ) where + +import GF.Data.Operations + +import GF.Grammar +import GF.Grammar.Lookup + +import Debug.Trace +import Data.List(intersperse) +import Control.Monad (liftM, liftM2) +import Text.PrettyPrint + +-- for debugging +tracd m t = t +-- tracd = trace + +compute :: SourceGrammar -> Exp -> Err Exp +compute = computeAbsTerm + +computeAbsTerm :: SourceGrammar -> Exp -> Err Exp +computeAbsTerm gr = computeAbsTermIn (lookupAbsDef gr) [] + +-- | a hack to make compute work on source grammar as well +type LookDef = Ident -> Ident -> Err (Maybe Int,Maybe [Equation]) + +computeAbsTermIn :: LookDef -> [Ident] -> Exp -> Err Exp +computeAbsTermIn lookd xs e = errIn (render (text "computing" <+> ppTerm Unqualified 0 e)) $ compt xs e where + compt vv t = case t of +-- Prod x a b -> liftM2 (Prod x) (compt vv a) (compt (x:vv) b) +-- Abs x b -> liftM (Abs x) (compt (x:vv) b) + _ -> do + let t' = beta vv t + (yy,f,aa) <- termForm t' + let vv' = map snd yy ++ vv + aa' <- mapM (compt vv') aa + case look f of + Just eqs -> tracd (text "\nmatching" <+> ppTerm Unqualified 0 f) $ + case findMatch eqs aa' of + Ok (d,g) -> do + --- let (xs,ts) = unzip g + --- ts' <- alphaFreshAll vv' ts + let g' = g --- zip xs ts' + d' <- compt vv' $ substTerm vv' g' d + tracd (text "by Egs:" <+> ppTerm Unqualified 0 d') $ return $ mkAbs yy $ d' + _ -> tracd (text "no match" <+> ppTerm Unqualified 0 t') $ + do + let v = mkApp f aa' + return $ mkAbs yy $ v + _ -> do + let t2 = mkAbs yy $ mkApp f aa' + tracd (text "not defined" <+> ppTerm Unqualified 0 t2) $ return t2 + + look t = case t of + (Q m f) -> case lookd m f of + Ok (_,md) -> md + _ -> Nothing + _ -> Nothing + +beta :: [Ident] -> Exp -> Exp +beta vv c = case c of + Let (x,(_,a)) b -> beta vv $ substTerm vv [(x,beta vv a)] (beta (x:vv) b) + App f a -> + let (a',f') = (beta vv a, beta vv f) in + case f' of + Abs _ x b -> beta vv $ substTerm vv [(x,a')] (beta (x:vv) b) + _ -> (if a'==a && f'==f then id else beta vv) $ App f' a' + Prod b x a t -> Prod b x (beta vv a) (beta (x:vv) t) + Abs b x t -> Abs b x (beta (x:vv) t) + _ -> c + +-- special version of pattern matching, to deal with comp under lambda + +findMatch :: [([Patt],Term)] -> [Term] -> Err (Term, Substitution) +findMatch cases terms = case cases of + [] -> Bad $ render (text "no applicable case for" <+> hcat (punctuate comma (map (ppTerm Unqualified 0) terms))) + (patts,_):_ | length patts /= length terms -> + Bad (render (text "wrong number of args for patterns :" <+> + hsep (map (ppPatt Unqualified 0) patts) <+> text "cannot take" <+> hsep (map (ppTerm Unqualified 0) terms))) + (patts,val):cc -> case mapM tryMatch (zip patts terms) of + Ok substs -> return (tracd (text "value" <+> ppTerm Unqualified 0 val) val, concat substs) + _ -> findMatch cc terms + +tryMatch :: (Patt, Term) -> Err [(Ident, Term)] +tryMatch (p,t) = do + t' <- termForm t + trym p t' + where + + trym p t' = err (\s -> tracd s (Bad s)) (\t -> tracd (prtm p t) (return t)) $ ---- + case (p,t') of + (PW, _) | notMeta t -> return [] -- optimization with wildcard + (PV x, _) | notMeta t -> return [(x,t)] + (PString s, ([],K i,[])) | s==i -> return [] + (PInt s, ([],EInt i,[])) | s==i -> return [] + (PFloat s,([],EFloat i,[])) | s==i -> return [] --- rounding? + (PP q p pp, ([], QC r f, tt)) | + p `eqStrIdent` f && length pp == length tt -> do + matches <- mapM tryMatch (zip pp tt) + return (concat matches) + (PP q p pp, ([], Q r f, tt)) | + p `eqStrIdent` f && length pp == length tt -> do + matches <- mapM tryMatch (zip pp tt) + return (concat matches) + (PT _ p',_) -> trym p' t' + (PAs x p',_) -> do + subst <- trym p' t' + return $ (x,t) : subst + _ -> Bad (render (text "no match in pattern" <+> ppPatt Unqualified 0 p <+> text "for" <+> ppTerm Unqualified 0 t)) + + notMeta e = case e of + Meta _ -> False + App f a -> notMeta f && notMeta a + Abs _ _ b -> notMeta b + _ -> True + + prtm p g = + ppPatt Unqualified 0 p <+> colon $$ hsep (punctuate semi [ppIdent x <+> char '=' <+> ppTerm Unqualified 0 y | (x,y) <- g]) diff --git a/src/compiler/GF/Compile/Abstract/TC.hs b/src/compiler/GF/Compile/Abstract/TC.hs new file mode 100644 index 000000000..163301838 --- /dev/null +++ b/src/compiler/GF/Compile/Abstract/TC.hs @@ -0,0 +1,294 @@ +---------------------------------------------------------------------- +-- | +-- Module : TC +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/02 20:50:19 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.11 $ +-- +-- Thierry Coquand's type checking algorithm that creates a trace +----------------------------------------------------------------------------- + +module GF.Compile.Abstract.TC (AExp(..), + Theory, + checkExp, + inferExp, + checkBranch, + eqVal, + whnf + ) where + +import GF.Data.Operations +import GF.Grammar +import GF.Grammar.Predef + +import Control.Monad +import Data.List (sortBy) +import Data.Maybe +import Text.PrettyPrint + +data AExp = + AVr Ident Val + | ACn QIdent Val + | AType + | AInt Integer + | AFloat Double + | AStr String + | AMeta MetaId Val + | AApp AExp AExp Val + | AAbs Ident Val AExp + | AProd Ident AExp AExp + | AEqs [([Exp],AExp)] --- not used + | ARecType [ALabelling] + | AR [AAssign] + | AP AExp Label Val + | AData Val + deriving (Eq,Show) + +type ALabelling = (Label, AExp) +type AAssign = (Label, (Val, AExp)) + +type Theory = QIdent -> Err Val + +lookupConst :: Theory -> QIdent -> Err Val +lookupConst th f = th f + +lookupVar :: Env -> Ident -> Err Val +lookupVar g x = maybe (Bad (render (text "unknown variable" <+> ppIdent x))) return $ lookup x ((IW,uVal):g) +-- wild card IW: no error produced, ?0 instead. + +type TCEnv = (Int,Env,Env) + +emptyTCEnv :: TCEnv +emptyTCEnv = (0,[],[]) + +whnf :: Val -> Err Val +whnf v = ---- errIn ("whnf" +++ prt v) $ ---- debug + case v of + VApp u w -> do + u' <- whnf u + w' <- whnf w + app u' w' + VClos env e -> eval env e + _ -> return v + +app :: Val -> Val -> Err Val +app u v = case u of + VClos env (Abs _ x e) -> eval ((x,v):env) e + _ -> return $ VApp u v + +eval :: Env -> Exp -> Err Val +eval env e = ---- errIn ("eval" +++ prt e +++ "in" +++ prEnv env) $ + case e of + Vr x -> lookupVar env x + Q m c -> return $ VCn (m,c) + QC m c -> return $ VCn (m,c) ---- == Q ? + Sort c -> return $ VType --- the only sort is Type + App f a -> join $ liftM2 app (eval env f) (eval env a) + RecType xs -> do xs <- mapM (\(l,e) -> eval env e >>= \e -> return (l,e)) xs + return (VRecType xs) + _ -> return $ VClos env e + +eqVal :: Int -> Val -> Val -> Err [(Val,Val)] +eqVal k u1 u2 = ---- errIn (prt u1 +++ "<>" +++ prBracket (show k) +++ prt u2) $ + do + w1 <- whnf u1 + w2 <- whnf u2 + let v = VGen k + case (w1,w2) of + (VApp f1 a1, VApp f2 a2) -> liftM2 (++) (eqVal k f1 f2) (eqVal k a1 a2) + (VClos env1 (Abs _ x1 e1), VClos env2 (Abs _ x2 e2)) -> + eqVal (k+1) (VClos ((x1,v x1):env1) e1) (VClos ((x2,v x1):env2) e2) + (VClos env1 (Prod _ x1 a1 e1), VClos env2 (Prod _ x2 a2 e2)) -> + liftM2 (++) + (eqVal k (VClos env1 a1) (VClos env2 a2)) + (eqVal (k+1) (VClos ((x1,v x1):env1) e1) (VClos ((x2,v x1):env2) e2)) + (VGen i _, VGen j _) -> return [(w1,w2) | i /= j] + (VCn (_, i), VCn (_,j)) -> return [(w1,w2) | i /= j] + --- thus ignore qualifications; valid because inheritance cannot + --- be qualified. Simplifies annotation. AR 17/3/2005 + _ -> return [(w1,w2) | w1 /= w2] +-- invariant: constraints are in whnf + +checkType :: Theory -> TCEnv -> Exp -> Err (AExp,[(Val,Val)]) +checkType th tenv e = checkExp th tenv e vType + +checkExp :: Theory -> TCEnv -> Exp -> Val -> Err (AExp, [(Val,Val)]) +checkExp th tenv@(k,rho,gamma) e ty = do + typ <- whnf ty + let v = VGen k + case e of + Meta m -> return $ (AMeta m typ,[]) + + Abs _ x t -> case typ of + VClos env (Prod _ y a b) -> do + a' <- whnf $ VClos env a --- + (t',cs) <- checkExp th + (k+1,(x,v x):rho, (x,a'):gamma) t (VClos ((y,v x):env) b) + return (AAbs x a' t', cs) + _ -> Bad (render (text "function type expected for" <+> ppTerm Unqualified 0 e <+> text "instead of" <+> ppValue Unqualified 0 typ)) + + Prod _ x a b -> do + testErr (typ == vType) "expected Type" + (a',csa) <- checkType th tenv a + (b',csb) <- checkType th (k+1, (x,v x):rho, (x,VClos rho a):gamma) b + return (AProd x a' b', csa ++ csb) + + R xs -> + case typ of + VRecType ys -> do case [l | (l,_) <- ys, isNothing (lookup l xs)] of + [] -> return () + ls -> fail (render (text "no value given for label:" <+> fsep (punctuate comma (map ppLabel ls)))) + r <- mapM (checkAssign th tenv ys) xs + let (xs,css) = unzip r + return (AR xs, concat css) + _ -> Bad (render (text "record type expected for" <+> ppTerm Unqualified 0 e <+> text "instead of" <+> ppValue Unqualified 0 typ)) + + P r l -> do (r',cs) <- checkExp th tenv r (VRecType [(l,typ)]) + return (AP r' l typ,cs) + + _ -> checkInferExp th tenv e typ + +checkInferExp :: Theory -> TCEnv -> Exp -> Val -> Err (AExp, [(Val,Val)]) +checkInferExp th tenv@(k,_,_) e typ = do + (e',w,cs1) <- inferExp th tenv e + cs2 <- eqVal k w typ + return (e',cs1 ++ cs2) + +inferExp :: Theory -> TCEnv -> Exp -> Err (AExp, Val, [(Val,Val)]) +inferExp th tenv@(k,rho,gamma) e = case e of + Vr x -> mkAnnot (AVr x) $ noConstr $ lookupVar gamma x + Q m c | m == cPredefAbs && isPredefCat c + -> return (ACn (m,c) vType, vType, []) + | otherwise -> mkAnnot (ACn (m,c)) $ noConstr $ lookupConst th (m,c) + QC m c -> mkAnnot (ACn (m,c)) $ noConstr $ lookupConst th (m,c) ---- + EInt i -> return (AInt i, valAbsInt, []) + EFloat i -> return (AFloat i, valAbsFloat, []) + K i -> return (AStr i, valAbsString, []) + Sort _ -> return (AType, vType, []) + RecType xs -> do r <- mapM (checkLabelling th tenv) xs + let (xs,css) = unzip r + return (ARecType xs, vType, concat css) + App f t -> do + (f',w,csf) <- inferExp th tenv f + typ <- whnf w + case typ of + VClos env (Prod _ x a b) -> do + (a',csa) <- checkExp th tenv t (VClos env a) + b' <- whnf $ VClos ((x,VClos rho t):env) b + return $ (AApp f' a' b', b', csf ++ csa) + _ -> Bad (render (text "Prod expected for function" <+> ppTerm Unqualified 0 f <+> text "instead of" <+> ppValue Unqualified 0 typ)) + _ -> Bad (render (text "cannot infer type of expression" <+> ppTerm Unqualified 0 e)) + +checkLabelling :: Theory -> TCEnv -> Labelling -> Err (ALabelling, [(Val,Val)]) +checkLabelling th tenv (lbl,typ) = do + (atyp,cs) <- checkType th tenv typ + return ((lbl,atyp),cs) + +checkAssign :: Theory -> TCEnv -> [(Label,Val)] -> Assign -> Err (AAssign, [(Val,Val)]) +checkAssign th tenv@(k,rho,gamma) typs (lbl,(Just typ,exp)) = do + (atyp,cs1) <- checkType th tenv typ + val <- eval rho typ + cs2 <- case lookup lbl typs of + Nothing -> return [] + Just val0 -> eqVal k val val0 + (aexp,cs3) <- checkExp th tenv exp val + return ((lbl,(val,aexp)),cs1++cs2++cs3) +checkAssign th tenv@(k,rho,gamma) typs (lbl,(Nothing,exp)) = do + case lookup lbl typs of + Nothing -> do (aexp,val,cs) <- inferExp th tenv exp + return ((lbl,(val,aexp)),cs) + Just val -> do (aexp,cs) <- checkExp th tenv exp val + return ((lbl,(val,aexp)),cs) + +checkBranch :: Theory -> TCEnv -> Equation -> Val -> Err (([Exp],AExp),[(Val,Val)]) +checkBranch th tenv b@(ps,t) ty = errIn ("branch" +++ show b) $ + chB tenv' ps' ty + where + + (ps',_,rho2,k') = ps2ts k ps + tenv' = (k, rho2++rho, gamma) ---- k' ? + (k,rho,gamma) = tenv + + chB tenv@(k,rho,gamma) ps ty = case ps of + p:ps2 -> do + typ <- whnf ty + case typ of + VClos env (Prod _ y a b) -> do + a' <- whnf $ VClos env a + (p', sigma, binds, cs1) <- checkP tenv p y a' + let tenv' = (length binds, sigma ++ rho, binds ++ gamma) + ((ps',exp),cs2) <- chB tenv' ps2 (VClos ((y,p'):env) b) + return ((p:ps',exp), cs1 ++ cs2) -- don't change the patt + _ -> Bad (render (text "Product expected for definiens" <+> ppTerm Unqualified 0 t <+> text "instead of" <+> ppValue Unqualified 0 typ)) + [] -> do + (e,cs) <- checkExp th tenv t ty + return (([],e),cs) + checkP env@(k,rho,gamma) t x a = do + (delta,cs) <- checkPatt th env t a + let sigma = [(x, VGen i x) | ((x,_),i) <- zip delta [k..]] + return (VClos sigma t, sigma, delta, cs) + + ps2ts k = foldr p2t ([],0,[],k) + p2t p (ps,i,g,k) = case p of + PW -> (Meta i : ps, i+1,g,k) + PV x -> (Vr x : ps, i, upd x k g,k+1) + PString s -> (K s : ps, i, g, k) + PInt n -> (EInt n : ps, i, g, k) + PFloat n -> (EFloat n : ps, i, g, k) + PP m c xs -> (mkApp (Q m c) xss : ps, j, g',k') + where (xss,j,g',k') = foldr p2t ([],i,g,k) xs + _ -> error $ render (text "undefined p2t case" <+> ppPatt Unqualified 0 p <+> text "in checkBranch") + + upd x k g = (x, VGen k x) : g --- hack to recognize pattern variables + + +checkPatt :: Theory -> TCEnv -> Exp -> Val -> Err (Binds,[(Val,Val)]) +checkPatt th tenv exp val = do + (aexp,_,cs) <- checkExpP tenv exp val + let binds = extrBinds aexp + return (binds,cs) + where + extrBinds aexp = case aexp of + AVr i v -> [(i,v)] + AApp f a _ -> extrBinds f ++ extrBinds a + _ -> [] -- no other cases are possible + +--- ad hoc, to find types of variables + checkExpP tenv@(k,rho,gamma) exp val = case exp of + Meta m -> return $ (AMeta m val, val, []) + Vr x -> return $ (AVr x val, val, []) + EInt i -> return (AInt i, valAbsInt, []) + EFloat i -> return (AFloat i, valAbsFloat, []) + K s -> return (AStr s, valAbsString, []) + + Q m c -> do + typ <- lookupConst th (m,c) + return $ (ACn (m,c) typ, typ, []) + QC m c -> do + typ <- lookupConst th (m,c) + return $ (ACn (m,c) typ, typ, []) ---- + App f t -> do + (f',w,csf) <- checkExpP tenv f val + typ <- whnf w + case typ of + VClos env (Prod _ x a b) -> do + (a',_,csa) <- checkExpP tenv t (VClos env a) + b' <- whnf $ VClos ((x,VClos rho t):env) b + return $ (AApp f' a' b', b', csf ++ csa) + _ -> Bad (render (text "Prod expected for function" <+> ppTerm Unqualified 0 f <+> text "instead of" <+> ppValue Unqualified 0 typ)) + _ -> Bad (render (text "cannot typecheck pattern" <+> ppTerm Unqualified 0 exp)) + +-- auxiliaries + +noConstr :: Err Val -> Err (Val,[(Val,Val)]) +noConstr er = er >>= (\v -> return (v,[])) + +mkAnnot :: (Val -> AExp) -> Err (Val,[(Val,Val)]) -> Err (AExp,Val,[(Val,Val)]) +mkAnnot a ti = do + (v,cs) <- ti + return (a v, v, cs) + diff --git a/src/compiler/GF/Compile/Abstract/TypeCheck.hs b/src/compiler/GF/Compile/Abstract/TypeCheck.hs new file mode 100644 index 000000000..2632c54dd --- /dev/null +++ b/src/compiler/GF/Compile/Abstract/TypeCheck.hs @@ -0,0 +1,83 @@ +---------------------------------------------------------------------- +-- | +-- Module : TypeCheck +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/15 16:22:02 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.16 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Compile.Abstract.TypeCheck (-- * top-level type checking functions; TC should not be called directly. + checkContext, + checkTyp, + checkDef, + checkConstrs, + ) where + +import GF.Data.Operations + +import GF.Infra.CheckM +import GF.Grammar +import GF.Grammar.Lookup +import GF.Grammar.Unify +import GF.Compile.Refresh +import GF.Compile.Abstract.Compute +import GF.Compile.Abstract.TC + +import Text.PrettyPrint +import Control.Monad (foldM, liftM, liftM2) + +-- | invariant way of creating TCEnv from context +initTCEnv gamma = + (length gamma,[(x,VGen i x) | ((x,_),i) <- zip gamma [0..]], gamma) + +-- interface to TC type checker + +type2val :: Type -> Val +type2val = VClos [] + +cont2exp :: Context -> Exp +cont2exp c = mkProd c eType [] -- to check a context + +cont2val :: Context -> Val +cont2val = type2val . cont2exp + +-- some top-level batch-mode checkers for the compiler + +justTypeCheck :: SourceGrammar -> Exp -> Val -> Err Constraints +justTypeCheck gr e v = do + (_,constrs0) <- checkExp (grammar2theory gr) (initTCEnv []) e v + (constrs1,_) <- unifyVal constrs0 + return $ filter notJustMeta constrs1 + +notJustMeta (c,k) = case (c,k) of + (VClos g1 (Meta m1), VClos g2 (Meta m2)) -> False + _ -> True + +grammar2theory :: SourceGrammar -> Theory +grammar2theory gr (m,f) = case lookupFunType gr m f of + Ok t -> return $ type2val t + Bad s -> case lookupCatContext gr m f of + Ok cont -> return $ cont2val cont + _ -> Bad s + +checkContext :: SourceGrammar -> Context -> [Message] +checkContext st = checkTyp st . cont2exp + +checkTyp :: SourceGrammar -> Type -> [Message] +checkTyp gr typ = err (\x -> [text x]) ppConstrs $ justTypeCheck gr typ vType + +checkDef :: SourceGrammar -> Fun -> Type -> [Equation] -> [Message] +checkDef gr (m,fun) typ eqs = err (\x -> [text x]) ppConstrs $ do + bcs <- mapM (\b -> checkBranch (grammar2theory gr) (initTCEnv []) b (type2val typ)) eqs + let (bs,css) = unzip bcs + (constrs,_) <- unifyVal (concat css) + return $ filter notJustMeta constrs + +checkConstrs :: SourceGrammar -> Cat -> [Ident] -> [String] +checkConstrs gr cat _ = [] ---- check constructors! diff --git a/src/compiler/GF/Compile/CheckGrammar.hs b/src/compiler/GF/Compile/CheckGrammar.hs new file mode 100644 index 000000000..f4765eb26 --- /dev/null +++ b/src/compiler/GF/Compile/CheckGrammar.hs @@ -0,0 +1,284 @@ +---------------------------------------------------------------------- +-- | +-- Module : CheckGrammar +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:33 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.31 $ +-- +-- AR 4\/12\/1999 -- 1\/4\/2000 -- 8\/9\/2001 -- 15\/5\/2002 -- 27\/11\/2002 -- 18\/6\/2003 +-- +-- type checking also does the following modifications: +-- +-- - types of operations and local constants are inferred and put in place +-- +-- - both these types and linearization types are computed +-- +-- - tables are type-annotated +----------------------------------------------------------------------------- + +module GF.Compile.CheckGrammar(checkModule) where + +import GF.Infra.Ident +import GF.Infra.Modules + +import GF.Compile.Abstract.TypeCheck +import GF.Compile.Concrete.TypeCheck + +import GF.Grammar +import GF.Grammar.Lexer +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Grammar.PatternMatch + +import GF.Data.Operations +import GF.Infra.CheckM + +import Data.List +import qualified Data.Set as Set +import Control.Monad +import Text.PrettyPrint + +-- | checking is performed in the dependency order of modules +checkModule :: [SourceModule] -> SourceModule -> Check SourceModule +checkModule ms m@(name,mo) = checkIn (text "checking module" <+> ppIdent name) $ do + checkRestrictedInheritance ms m + m <- case mtype mo of + MTConcrete a -> do let gr = MGrammar (m:ms) + abs <- checkErr $ lookupModule gr a + checkCompleteGrammar gr (a,abs) m + _ -> return m + infos <- checkErr $ topoSortJments m + foldM updateCheckInfo m infos + where + updateCheckInfo (name,mo) (i,info) = do + info <- checkInfo ms (name,mo) i info + return (name,updateModule mo i info) + +-- check if restricted inheritance modules are still coherent +-- i.e. that the defs of remaining names don't depend on omitted names +checkRestrictedInheritance :: [SourceModule] -> SourceModule -> Check () +checkRestrictedInheritance mos (name,mo) = do + let irs = [ii | ii@(_,mi) <- extend mo, mi /= MIAll] -- names with restr. inh. + let mrs = [((i,m),mi) | (i,m) <- mos, Just mi <- [lookup i irs]] + -- the restr. modules themself, with restr. infos + mapM_ checkRem mrs + where + checkRem ((i,m),mi) = do + let (incl,excl) = partition (isInherited mi) (map fst (tree2list (jments m))) + let incld c = Set.member c (Set.fromList incl) + let illegal c = Set.member c (Set.fromList excl) + let illegals = [(f,is) | + (f,cs) <- allDeps, incld f, let is = filter illegal cs, not (null is)] + case illegals of + [] -> return () + cs -> checkError (text "In inherited module" <+> ppIdent i <> text ", dependence of excluded constants:" $$ + nest 2 (vcat [ppIdent f <+> text "on" <+> fsep (map ppIdent is) | (f,is) <- cs])) + allDeps = concatMap (allDependencies (const True) . jments . snd) mos + +checkCompleteGrammar :: SourceGrammar -> SourceModule -> SourceModule -> Check SourceModule +checkCompleteGrammar gr (am,abs) (cm,cnc) = do + let jsa = jments abs + let jsc = jments cnc + + -- check that all concrete constants are in abstract; build types for all lin + jsc <- foldM checkCnc emptyBinTree (tree2list jsc) + + -- check that all abstract constants are in concrete; build default lin and lincats + jsc <- foldM checkAbs jsc (tree2list jsa) + + return (cm,replaceJudgements cnc jsc) + where + checkAbs js i@(c,info) = + case info of + AbsFun (Just ty) _ _ -> do let mb_def = do + let (cxt,(_,i),_) = typeForm ty + info <- lookupIdent i js + info <- case info of + (AnyInd _ m) -> do (m,info) <- lookupOrigInfo gr m i + return info + _ -> return info + case info of + CncCat (Just (RecType [])) _ _ -> return (foldr (\_ -> Abs Explicit identW) (R []) cxt) + _ -> Bad "no def lin" + + case lookupIdent c js of + Ok (AnyInd _ _) -> return js + Ok (CncFun ty (Just def) pn) -> + return $ updateTree (c,CncFun ty (Just def) pn) js + Ok (CncFun ty Nothing pn) -> + case mb_def of + Ok def -> return $ updateTree (c,CncFun ty (Just def) pn) js + Bad _ -> do checkWarn $ text "no linearization of" <+> ppIdent c + return js + _ -> do + case mb_def of + Ok def -> do (cont,val) <- linTypeOfType gr cm ty + let linty = (snd (valCat ty),cont,val) + return $ updateTree (c,CncFun (Just linty) (Just def) Nothing) js + Bad _ -> do checkWarn $ text "no linearization of" <+> ppIdent c + return js + AbsCat (Just _) _ -> case lookupIdent c js of + Ok (AnyInd _ _) -> return js + Ok (CncCat (Just _) _ _) -> return js + Ok (CncCat _ mt mp) -> do + checkWarn $ + text "no linearization type for" <+> ppIdent c <> text ", inserting default {s : Str}" + return $ updateTree (c,CncCat (Just defLinType) mt mp) js + _ -> do + checkWarn $ + text "no linearization type for" <+> ppIdent c <> text ", inserting default {s : Str}" + return $ updateTree (c,CncCat (Just defLinType) Nothing Nothing) js + _ -> return js + + checkCnc js i@(c,info) = + case info of + CncFun _ d pn -> case lookupOrigInfo gr am c of + Ok (_,AbsFun (Just ty) _ _) -> + do (cont,val) <- linTypeOfType gr cm ty + let linty = (snd (valCat ty),cont,val) + return $ updateTree (c,CncFun (Just linty) d pn) js + _ -> do checkWarn $ text "function" <+> ppIdent c <+> text "is not in abstract" + return js + CncCat _ _ _ -> case lookupOrigInfo gr am c of + Ok _ -> return $ updateTree i js + _ -> do checkWarn $ text "category" <+> ppIdent c <+> text "is not in abstract" + return js + _ -> return $ updateTree i js + + +-- | General Principle: only Just-values are checked. +-- A May-value has always been checked in its origin module. +checkInfo :: [SourceModule] -> SourceModule -> Ident -> Info -> Check Info +checkInfo ms (m,mo) c info = do + checkReservedId c + case info of + AbsCat (Just cont) _ -> mkCheck "category" $ + checkContext gr cont + + AbsFun (Just typ0) ma md -> do + typ <- compAbsTyp [] typ0 -- to calculate let definitions + mkCheck "type of function" $ + checkTyp gr typ + case md of + Just eqs -> mkCheck "definition of function" $ + checkDef gr (m,c) typ eqs + Nothing -> return info + return (AbsFun (Just typ) ma md) + + CncFun linty@(Just (cat,cont,val)) (Just trm) mpr -> chIn "linearization of" $ do + (trm',_) <- checkLType gr [] trm (mkFunType (map (\(_,_,ty) -> ty) cont) val) -- erases arg vars + mpr <- checkPrintname gr mpr + return (CncFun linty (Just trm') mpr) + + CncCat (Just typ) mdef mpr -> chIn "linearization type of" $ do + (typ,_) <- checkLType gr [] typ typeType + typ <- computeLType gr [] typ + mdef <- case mdef of + Just def -> do + (def,_) <- checkLType gr [] def (mkFunType [typeStr] typ) + return $ Just def + _ -> return mdef + mpr <- checkPrintname gr mpr + return (CncCat (Just typ) mdef mpr) + + ResOper pty pde -> chIn "operation" $ do + (pty', pde') <- case (pty,pde) of + (Just ty, Just de) -> do + ty' <- checkLType gr [] ty typeType >>= computeLType gr [] . fst + (de',_) <- checkLType gr [] de ty' + return (Just ty', Just de') + (_ , Just de) -> do + (de',ty') <- inferLType gr [] de + return (Just ty', Just de') + (_ , Nothing) -> do + checkError (text "No definition given to the operation") + return (ResOper pty' pde') + + ResOverload os tysts -> chIn "overloading" $ do + tysts' <- mapM (uncurry $ flip (checkLType gr [])) tysts -- return explicit ones + tysts0 <- checkErr $ lookupOverload gr m c -- check against inherited ones too + tysts1 <- mapM (uncurry $ flip (checkLType gr [])) + [(mkFunType args val,tr) | (args,(val,tr)) <- tysts0] + --- this can only be a partial guarantee, since matching + --- with value type is only possible if expected type is given + checkUniq $ + sort [let (xs,t) = typeFormCnc x in t : map (\(b,x,t) -> t) xs | (_,x) <- tysts1] + return (ResOverload os [(y,x) | (x,y) <- tysts']) + + ResParam (Just pcs) _ -> chIn "parameter type" $ do + ts <- checkErr $ liftM concat $ mapM mkPar pcs + return (ResParam (Just pcs) (Just ts)) + + _ -> return info + where + gr = MGrammar ((m,mo) : ms) + chIn cat = checkIn (text "Happened in" <+> text cat <+> ppIdent c <+> ppPosition mo c <> colon) + + mkPar (f,co) = do + vs <- liftM combinations $ mapM (\(_,_,ty) -> allParamValues gr ty) co + return $ map (mkApp (QC m f)) vs + + checkUniq xss = case xss of + x:y:xs + | x == y -> checkError $ text "ambiguous for type" <+> + ppType (mkFunType (tail x) (head x)) + | otherwise -> checkUniq $ y:xs + _ -> return () + + mkCheck cat ss = case ss of + [] -> return info + _ -> checkError (vcat ss $$ text "in" <+> text cat <+> ppIdent c <+> ppPosition mo c) + + compAbsTyp g t = case t of + Vr x -> maybe (checkError (text "no value given to variable" <+> ppIdent x)) return $ lookup x g + Let (x,(_,a)) b -> do + a' <- compAbsTyp g a + compAbsTyp ((x, a'):g) b + Prod b x a t -> do + a' <- compAbsTyp g a + t' <- compAbsTyp ((x,Vr x):g) t + return $ Prod b x a' t' + Abs _ _ _ -> return t + _ -> composOp (compAbsTyp g) t + + +checkPrintname :: SourceGrammar -> Maybe Term -> Check (Maybe Term) +checkPrintname gr (Just t) = do (t,_) <- checkLType gr [] t typeStr + return (Just t) +checkPrintname gr Nothing = return Nothing + +-- | for grammars obtained otherwise than by parsing ---- update!! +checkReservedId :: Ident -> Check () +checkReservedId x + | isReservedWord (ident2bs x) = checkWarn (text "reserved word used as identifier:" <+> ppIdent x) + | otherwise = return () + +-- auxiliaries + +-- | linearization types and defaults +linTypeOfType :: SourceGrammar -> Ident -> Type -> Check (Context,Type) +linTypeOfType cnc m typ = do + let (cont,cat) = typeSkeleton typ + val <- lookLin cat + args <- mapM mkLinArg (zip [0..] cont) + return (args, val) + where + mkLinArg (i,(n,mc@(m,cat))) = do + val <- lookLin mc + let vars = mkRecType varLabel $ replicate n typeStr + symb = argIdent n cat i + rec <- if n==0 then return val else + checkErr $ errIn (render (text "extending" $$ + nest 2 (ppTerm Unqualified 0 vars) $$ + text "with" $$ + nest 2 (ppTerm Unqualified 0 val))) $ + plusRecType vars val + return (Explicit,symb,rec) + lookLin (_,c) = checks [ --- rather: update with defLinType ? + checkErr (lookupLincat cnc m c) >>= computeLType cnc [] + ,return defLinType + ] diff --git a/src/compiler/GF/Compile/Coding.hs b/src/compiler/GF/Compile/Coding.hs new file mode 100644 index 000000000..49538bd35 --- /dev/null +++ b/src/compiler/GF/Compile/Coding.hs @@ -0,0 +1,55 @@ +module GF.Compile.Coding where + +import GF.Grammar.Grammar +import GF.Grammar.Macros +import GF.Text.Coding +import GF.Infra.Modules +import GF.Infra.Option +import GF.Data.Operations + +import Data.Char + +encodeStringsInModule :: SourceModule -> SourceModule +encodeStringsInModule = codeSourceModule (encodeUnicode UTF_8) + +decodeStringsInModule :: SourceModule -> SourceModule +decodeStringsInModule mo = codeSourceModule (decodeUnicode (flag optEncoding (flagsModule mo))) mo + +codeSourceModule :: (String -> String) -> SourceModule -> SourceModule +codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo))) + where + codj (c,info) = case info of + ResOper pty pt -> ResOper (fmap (codeTerm co) pty) (fmap (codeTerm co) pt) + ResOverload es tyts -> ResOverload es [(codeTerm co ty,codeTerm co t) | (ty,t) <- tyts] + CncCat pty pt mpr -> CncCat pty (fmap (codeTerm co) pt) (fmap (codeTerm co) mpr) + CncFun mty pt mpr -> CncFun mty (fmap (codeTerm co) pt) (fmap (codeTerm co) mpr) + _ -> info + +codeTerm :: (String -> String) -> Term -> Term +codeTerm co t = case t of + K s -> K (co s) + T ty cs -> T ty [(codp p,codeTerm co v) | (p,v) <- cs] + EPatt p -> EPatt (codp p) + _ -> composSafeOp (codeTerm co) t + where + codp p = case p of --- really: composOpPatt + PR rs -> PR [(l,codp p) | (l,p) <- rs] + PString s -> PString (co s) + PChars s -> PChars (co s) + PT x p -> PT x (codp p) + PAs x p -> PAs x (codp p) + PNeg p -> PNeg (codp p) + PRep p -> PRep (codp p) + PSeq p q -> PSeq (codp p) (codp q) + PAlt p q -> PAlt (codp p) (codp q) + _ -> p + +-- | Run an encoding function on all string literals within the given string. +codeStringLiterals :: (String -> String) -> String -> String +codeStringLiterals _ [] = [] +codeStringLiterals co ('"':cs) = '"' : inStringLiteral cs + where inStringLiteral [] = error "codeStringLiterals: unterminated string literal" + inStringLiteral ('"':ds) = '"' : codeStringLiterals co ds + inStringLiteral ('\\':d:ds) = '\\' : co [d] ++ inStringLiteral ds + inStringLiteral (d:ds) = co [d] ++ inStringLiteral ds +codeStringLiterals co (c:cs) = c : codeStringLiterals co cs diff --git a/src/compiler/GF/Compile/Concrete/AppPredefined.hs b/src/compiler/GF/Compile/Concrete/AppPredefined.hs new file mode 100644 index 000000000..c05127191 --- /dev/null +++ b/src/compiler/GF/Compile/Concrete/AppPredefined.hs @@ -0,0 +1,158 @@ +---------------------------------------------------------------------- +-- | +-- Module : AppPredefined +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/06 14:21:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.13 $ +-- +-- Predefined function type signatures and definitions. +----------------------------------------------------------------------------- + +module GF.Compile.Concrete.AppPredefined (isInPredefined, typPredefined, appPredefined + ) where + +import GF.Infra.Ident +import GF.Data.Operations +import GF.Grammar.Predef +import GF.Grammar.Grammar +import GF.Grammar.Macros +import GF.Grammar.Printer +import qualified Data.ByteString.Char8 as BS +import Text.PrettyPrint + +-- predefined function type signatures and definitions. AR 12/3/2003. + +isInPredefined :: Ident -> Bool +isInPredefined = err (const True) (const False) . typPredefined + +typPredefined :: Ident -> Err Type +typPredefined f + | f == cInt = return typePType + | f == cFloat = return typePType + | f == cErrorType = return typeType + | f == cInts = return $ mkFunType [typeInt] typePType + | f == cPBool = return typePType + | f == cError = return $ mkFunType [typeStr] typeError -- non-can. of empty set + | f == cPFalse = return $ typePBool + | f == cPTrue = return $ typePBool + | f == cDp = return $ mkFunType [typeInt,typeTok] typeTok + | f == cDrop = return $ mkFunType [typeInt,typeTok] typeTok + | f == cEqInt = return $ mkFunType [typeInt,typeInt] typePBool + | f == cLessInt = return $ mkFunType [typeInt,typeInt] typePBool + | f == cEqStr = return $ mkFunType [typeTok,typeTok] typePBool + | f == cLength = return $ mkFunType [typeTok] typeInt + | f == cOccur = return $ mkFunType [typeTok,typeTok] typePBool + | f == cOccurs = return $ mkFunType [typeTok,typeTok] typePBool + | f == cPlus = return $ mkFunType [typeInt,typeInt] (typeInt) +---- "read" -> (P : Type) -> Tok -> P + | f == cShow = return $ mkProd -- (P : PType) -> P -> Tok + [(Explicit,varP,typePType),(Explicit,identW,Vr varP)] typeStr [] + | f == cToStr = return $ mkProd -- (L : Type) -> L -> Str + [(Explicit,varL,typeType),(Explicit,identW,Vr varL)] typeStr [] + | f == cMapStr = return $ mkProd -- (L : Type) -> (Str -> Str) -> L -> L + [(Explicit,varL,typeType),(Explicit,identW,mkFunType [typeStr] typeStr),(Explicit,identW,Vr varL)] (Vr varL) [] + | f == cTake = return $ mkFunType [typeInt,typeTok] typeTok + | f == cTk = return $ mkFunType [typeInt,typeTok] typeTok + | otherwise = Bad (render (text "unknown in Predef:" <+> ppIdent f)) + +varL :: Ident +varL = identC (BS.pack "L") + +varP :: Ident +varP = identC (BS.pack "P") + +appPredefined :: Term -> Err (Term,Bool) +appPredefined t = case t of + App f x0 -> do + (x,_) <- appPredefined x0 + case f of + -- one-place functions + Q mod f | mod == cPredef -> + case x of + (K s) | f == cLength -> retb $ EInt $ toInteger $ length s + _ -> retb t + + -- two-place functions + App (Q mod f) z0 | mod == cPredef -> do + (z,_) <- appPredefined z0 + case (norm z, norm x) of + (EInt i, K s) | f == cDrop -> retb $ K (drop (fi i) s) + (EInt i, K s) | f == cTake -> retb $ K (take (fi i) s) + (EInt i, K s) | f == cTk -> retb $ K (take (max 0 (length s - fi i)) s) + (EInt i, K s) | f == cDp -> retb $ K (drop (max 0 (length s - fi i)) s) + (K s, K t) | f == cEqStr -> retb $ if s == t then predefTrue else predefFalse + (K s, K t) | f == cOccur -> retb $ if substring s t then predefTrue else predefFalse + (K s, K t) | f == cOccurs -> retb $ if any (flip elem t) s then predefTrue else predefFalse + (EInt i, EInt j) | f == cEqInt -> retb $ if i==j then predefTrue else predefFalse + (EInt i, EInt j) | f == cLessInt -> retb $ if i<j then predefTrue else predefFalse + (EInt i, EInt j) | f == cPlus -> retb $ EInt $ i+j + (_, t) | f == cShow -> retb $ foldr C Empty $ map K $ words $ render (ppTerm Unqualified 0 t) + (_, K s) | f == cRead -> retb $ Cn (identC (BS.pack s)) --- because of K, only works for atomic tags + (_, t) | f == cToStr -> trm2str t >>= retb + _ -> retb t ---- prtBad "cannot compute predefined" t + + -- three-place functions + App (App (Q mod f) z0) y0 | mod == cPredef -> do + (y,_) <- appPredefined y0 + (z,_) <- appPredefined z0 + case (z, y, x) of + (ty,op,t) | f == cMapStr -> retf $ mapStr ty op t + _ -> retb t ---- prtBad "cannot compute predefined" t + + _ -> retb t ---- prtBad "cannot compute predefined" t + _ -> retb t + ---- should really check the absence of arg variables + where + retb t = return (retc t,True) -- no further computing needed + retf t = return (retc t,False) -- must be computed further + retc t = case t of + K [] -> t + K s -> foldr1 C (map K (words s)) + _ -> t + norm t = case t of + Empty -> K [] + C u v -> case (norm u,norm v) of + (K x,K y) -> K (x +++ y) + _ -> t + _ -> t + fi = fromInteger + +-- read makes variables into constants + +predefTrue = QC cPredef cPTrue +predefFalse = QC cPredef cPFalse + +substring :: String -> String -> Bool +substring s t = case (s,t) of + (c:cs, d:ds) -> (c == d && substring cs ds) || substring s ds + ([],_) -> True + _ -> False + +trm2str :: Term -> Err Term +trm2str t = case t of + R ((_,(_,s)):_) -> trm2str s + T _ ((_,s):_) -> trm2str s + V _ (s:_) -> trm2str s + C _ _ -> return $ t + K _ -> return $ t + S c _ -> trm2str c + Empty -> return $ t + _ -> Bad (render (text "cannot get Str from term" <+> ppTerm Unqualified 0 t)) + +-- simultaneous recursion on type and term: type arg is essential! +-- But simplify the task by assuming records are type-annotated +-- (this has been done in type checking) +mapStr :: Type -> Term -> Term -> Term +mapStr ty f t = case (ty,t) of + _ | elem ty [typeStr,typeTok] -> App f t + (_, R ts) -> R [(l,mapField v) | (l,v) <- ts] + (Table a b,T ti cs) -> T ti [(p,mapStr b f v) | (p,v) <- cs] + _ -> t + where + mapField (mty,te) = case mty of + Just ty -> (mty,mapStr ty f te) + _ -> (mty,te) diff --git a/src/compiler/GF/Compile/Concrete/Compute.hs b/src/compiler/GF/Compile/Concrete/Compute.hs new file mode 100644 index 000000000..9c016116b --- /dev/null +++ b/src/compiler/GF/Compile/Concrete/Compute.hs @@ -0,0 +1,456 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Compile.Concrete.Compute +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/01 15:39:12 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.19 $ +-- +-- Computation of source terms. Used in compilation and in @cc@ command. +----------------------------------------------------------------------------- + +module GF.Compile.Concrete.Compute (computeConcrete, computeTerm,computeConcreteRec) where + +import GF.Data.Operations +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Option +import GF.Infra.Modules +import GF.Data.Str +import GF.Grammar.Printer +import GF.Grammar.Predef +import GF.Grammar.Macros +import GF.Grammar.Lookup +import GF.Compile.Refresh +import GF.Grammar.PatternMatch +import GF.Grammar.Lockfield (isLockLabel,unlockRecord) ---- + +import GF.Compile.Concrete.AppPredefined + +import Data.List (nub,intersperse) +import Control.Monad (liftM2, liftM) +import Text.PrettyPrint + +-- | computation of concrete syntax terms into normal form +-- used mainly for partial evaluation +computeConcrete :: SourceGrammar -> Term -> Err Term +computeConcrete g t = {- refreshTerm t >>= -} computeTerm g [] t +computeConcreteRec g t = {- refreshTerm t >>= -} computeTermOpt True g [] t + +computeTerm :: SourceGrammar -> Substitution -> Term -> Err Term +computeTerm = computeTermOpt False + +-- rec=True is used if it cannot be assumed that looked-up constants +-- have already been computed (mainly with -optimize=noexpand in .gfr) + +computeTermOpt :: Bool -> SourceGrammar -> Substitution -> Term -> Err Term +computeTermOpt rec gr = comput True where + + comput full g t = ---- errIn ("subterm" +++ prt t) $ --- for debugging + case t of + + Q p c | p == cPredef -> return t + | otherwise -> look p c + + Vr x -> do + t' <- maybe (Bad (render (text "no value given to variable" <+> ppIdent x))) return $ lookup x g + case t' of + _ | t == t' -> return t + _ -> comp g t' + + -- Abs x@(IA _) b -> do + Abs _ _ _ | full -> do + let (xs,b1) = termFormCnc t + b' <- comp ([(x,Vr x) | (_,x) <- xs] ++ g) b1 + return $ mkAbs xs b' + -- b' <- comp (ext x (Vr x) g) b + -- return $ Abs x b' + Abs _ _ _ -> return t -- hnf + + Let (x,(_,a)) b -> do + a' <- comp g a + comp (ext x a' g) b + + Prod b x a t -> do + a' <- comp g a + t' <- comp (ext x (Vr x) g) t + return $ Prod b x a' t' + + -- beta-convert + App f a -> case appForm t of + (h,as) | length as > 1 -> do + h' <- hnf g h + as' <- mapM (comp g) as + case h' of + _ | not (null [() | FV _ <- as']) -> compApp g (mkApp h' as') + c@(QC _ _) -> do + return $ mkApp c as' + Q mod f | mod == cPredef -> do + (t',b) <- appPredefined (mkApp h' as') + if b then return t' else comp g t' + + Abs _ _ _ -> do + let (xs,b) = termFormCnc h' + let g' = (zip (map snd xs) as') ++ g + let as2 = drop (length xs) as' + let xs2 = drop (length as') xs + b' <- comp g' (mkAbs xs2 b) + if null as2 then return b' else comp g (mkApp b' as2) + + _ -> compApp g (mkApp h' as') + _ -> compApp g t + + P t l | isLockLabel l -> return $ R [] + ---- a workaround 18/2/2005: take this away and find the reason + ---- why earlier compilation destroys the lock field + + + P t l -> do + t' <- comp g t + case t' of + FV rs -> mapM (\c -> comp g (P c l)) rs >>= returnC . variants + R r -> maybe (Bad (render (text "no value for label" <+> ppLabel l))) (comp g . snd) $ + lookup l $ reverse r + + ExtR a (R b) -> + case comp g (P (R b) l) of + Ok v -> return v + _ -> comp g (P a l) + +--- { - --- this is incorrect, since b can contain the proper value + ExtR (R a) b -> -- NOT POSSIBLE both a and b records! + case comp g (P (R a) l) of + Ok v -> return v + _ -> comp g (P b l) +--- - } --- + + S (T i cs) e -> prawitz g i (flip P l) cs e + S (V i cs) e -> prawitzV g i (flip P l) cs e + + _ -> returnC $ P t' l + + S t v -> do + t' <- compTable g t + v' <- comp g v + t1 <- case t' of +---- V (RecType fs) _ -> uncurrySelect g fs t' v' +---- T (TComp (RecType fs)) _ -> uncurrySelect g fs t' v' + _ -> return $ S t' v' + compSelect g t1 + + -- normalize away empty tokens + K "" -> return Empty + + -- glue if you can + Glue x0 y0 -> do + x <- comp g x0 + y <- comp g y0 + case (x,y) of + (FV ks,_) -> do + kys <- mapM (comp g . flip Glue y) ks + return $ variants kys + (_,FV ks) -> do + xks <- mapM (comp g . Glue x) ks + return $ variants xks + + (S (T i cs) e, s) -> prawitz g i (flip Glue s) cs e + (s, S (T i cs) e) -> prawitz g i (Glue s) cs e + (S (V i cs) e, s) -> prawitzV g i (flip Glue s) cs e + (s, S (V i cs) e) -> prawitzV g i (Glue s) cs e + (_,Empty) -> return x + (Empty,_) -> return y + (K a, K b) -> return $ K (a ++ b) + (_, Alts (d,vs)) -> do +---- (K a, Alts (d,vs)) -> do + let glx = Glue x + comp g $ Alts (glx d, [(glx v,c) | (v,c) <- vs]) + (Alts _, ka) -> checks [do + y' <- strsFromTerm ka +---- (Alts _, K a) -> checks [do + x' <- strsFromTerm x -- this may fail when compiling opers + return $ variants [ + foldr1 C (map K (str2strings (glueStr v u))) | v <- x', u <- y'] +---- foldr1 C (map K (str2strings (glueStr v (str a)))) | v <- x'] + ,return $ Glue x y + ] + (C u v,_) -> comp g $ C u (Glue v y) + + _ -> do + mapM_ checkNoArgVars [x,y] + r <- composOp (comp g) t + returnC r + + Alts (d,aa) -> do + d' <- comp g d + aa' <- mapM (compInAlts g) aa + returnC (Alts (d',aa')) + + -- remove empty + C a b -> do + a' <- comp g a + b' <- comp g b + case (a',b') of + (Alts _, K a) -> checks [do + as <- strsFromTerm a' -- this may fail when compiling opers + return $ variants [ + foldr1 C (map K (str2strings (plusStr v (str a)))) | v <- as] + , + return $ C a' b' + ] + (Empty,_) -> returnC b' + (_,Empty) -> returnC a' + _ -> returnC $ C a' b' + + -- reduce free variation as much as you can + FV ts -> mapM (comp g) ts >>= returnC . variants + + -- merge record extensions if you can + ExtR r s -> do + r' <- comp g r + s' <- comp g s + case (r',s') of + (R rs, R ss) -> plusRecord r' s' + (RecType rs, RecType ss) -> plusRecType r' s' + _ -> return $ ExtR r' s' + + ELin c r -> do + r' <- comp g r + unlockRecord c r' + + T _ _ -> compTable g t + V _ _ -> compTable g t + + -- otherwise go ahead + _ -> composOp (comp g) t >>= returnC + + where + + compApp g (App f a) = do + f' <- hnf g f + a' <- comp g a + case (f',a') of + (Abs _ x b, FV as) -> + mapM (\c -> comp (ext x c g) b) as >>= return . variants + (_, FV as) -> mapM (\c -> comp g (App f' c)) as >>= return . variants + (FV fs, _) -> mapM (\c -> comp g (App c a')) fs >>= return . variants + (Abs _ x b,_) -> comp (ext x a' g) b + + (QC _ _,_) -> returnC $ App f' a' + + (S (T i cs) e,_) -> prawitz g i (flip App a') cs e + (S (V i cs) e,_) -> prawitzV g i (flip App a') cs e + + _ -> do + (t',b) <- appPredefined (App f' a') + if b then return t' else comp g t' + + hnf = comput False + comp = comput True + + look p c + | rec = lookupResDef gr p c >>= comp [] + | otherwise = lookupResDef gr p c + + ext x a g = (x,a):g + + returnC = return --- . computed + + variants ts = case nub ts of + [t] -> t + ts -> FV ts + + isCan v = case v of + Con _ -> True + QC _ _ -> True + App f a -> isCan f && isCan a + R rs -> all (isCan . snd . snd) rs + _ -> False + + compPatternMacro p = case p of + PM m c -> case look m c of + Ok (EPatt p') -> compPatternMacro p' + _ -> Bad (render (text "pattern expected as value of" $$ nest 2 (ppPatt Unqualified 0 p))) + PAs x p -> do + p' <- compPatternMacro p + return $ PAs x p' + PAlt p q -> do + p' <- compPatternMacro p + q' <- compPatternMacro q + return $ PAlt p' q' + PSeq p q -> do + p' <- compPatternMacro p + q' <- compPatternMacro q + return $ PSeq p' q' + PRep p -> do + p' <- compPatternMacro p + return $ PRep p' + PNeg p -> do + p' <- compPatternMacro p + return $ PNeg p' + PR rs -> do + rs' <- mapPairsM compPatternMacro rs + return $ PR rs' + + _ -> return p + + compSelect g (S t' v') = case v' of + FV vs -> mapM (\c -> comp g (S t' c)) vs >>= returnC . variants + _ -> case t' of + FV ccs -> mapM (\c -> comp g (S c v')) ccs >>= returnC . variants + + T _ [(PW,c)] -> comp g c --- an optimization + T _ [(PT _ PW,c)] -> comp g c + + T _ [(PV z,c)] -> comp (ext z v' g) c --- another optimization + T _ [(PT _ (PV z),c)] -> comp (ext z v' g) c + + -- course-of-values table: look up by index, no pattern matching needed + + V ptyp ts -> do + vs <- allParamValues gr ptyp + case lookupR v' (zip vs [0 .. length vs - 1]) of + Just i -> comp g $ ts !! i + _ -> return $ S t' v' -- if v' is not canonical + T _ cc -> do + case matchPattern cc v' of + Ok (c,g') -> comp (g' ++ g) c + _ | isCan v' -> Bad (render (text "missing case" <+> ppTerm Unqualified 0 v' <+> text "in" <+> ppTerm Unqualified 0 t)) + _ -> return $ S t' v' -- if v' is not canonical + + S (T i cs) e -> prawitz g i (flip S v') cs e + S (V i cs) e -> prawitzV g i (flip S v') cs e + _ -> returnC $ S t' v' + + --- needed to match records with and without type information + ---- todo: eliminate linear search in a list of records! + lookupR v vs = case v of + R rs -> lookup ([(x,y) | (x,(_,y)) <- rs]) + [([(x,y) | (x,(_,y)) <- rs],v) | (R rs,v) <- vs] + _ -> lookup v vs + + -- case-expand tables + -- if already expanded, don't expand again + compTable g t = case t of + T i@(TComp ty) cs -> do + -- if there are no variables, don't even go inside + cs' <- if (null g) then return cs else mapPairsM (comp g) cs +---- return $ V ty (map snd cs') + return $ T i cs' + V ty cs -> do + ty' <- comp g ty + -- if there are no variables, don't even go inside + cs' <- if (null g) then return cs else mapM (comp g) cs + return $ V ty' cs' + + T i cs -> do + pty0 <- getTableType i + ptyp <- comp g pty0 + case allParamValues gr ptyp of + Ok vs0 -> do + let vs = vs0 ---- [Val v ptyp i | (v,i) <- zip vs0 [0..]] + ps0 <- mapM (compPatternMacro . fst) cs + cs' <- mapM (compBranchOpt g) (zip ps0 (map snd cs)) + sts <- mapM (matchPattern cs') vs + ts <- mapM (\ (c,g') -> comp (g' ++ g) c) sts + ps <- mapM term2patt vs + let ps' = ps --- PT ptyp (head ps) : tail ps +---- return $ V ptyp ts -- to save space, just course of values + return $ T (TComp ptyp) (zip ps' ts) + _ -> do + ps0 <- mapM (compPatternMacro . fst) cs + cs' <- mapM (compBranch g) (zip ps0 (map snd cs)) + +---- cs' <- mapM (compBranch g) cs + return $ T i cs' -- happens with variable types + _ -> comp g t + + compBranch g (p,v) = do + let g' = contP p ++ g + v' <- comp g' v + return (p,v') + + compBranchOpt g c@(p,v) = case contP p of + [] -> return c + _ -> err (const (return c)) return $ compBranch g c + + contP p = case p of + PV x -> [(x,Vr x)] + PC _ ps -> concatMap contP ps + PP _ _ ps -> concatMap contP ps + PT _ p -> contP p + PR rs -> concatMap (contP . snd) rs + + PAs x p -> (x,Vr x) : contP p + + PSeq p q -> concatMap contP [p,q] + PAlt p q -> concatMap contP [p,q] + PRep p -> contP p + PNeg p -> contP p + + _ -> [] + + prawitz g i f cs e = do + cs' <- mapM (compBranch g) [(p, f v) | (p,v) <- cs] + return $ S (T i cs') e + prawitzV g i f cs e = do + cs' <- mapM (comp g) [(f v) | v <- cs] + return $ S (V i cs') e + + compInAlts g (v,c) = do + v' <- comp g v + c' <- comp g c + c2 <- case c' of + EPatt p -> liftM Strs $ getPatts p + _ -> return c' + return (v',c2) + where + getPatts p = case p of + PAlt a b -> liftM2 (++) (getPatts a) (getPatts b) + PString s -> return [K s] + PSeq a b -> do + as <- getPatts a + bs <- getPatts b + return [K (s ++ t) | K s <- as, K t <- bs] + _ -> fail (render (text "not valid pattern in pre expression" <+> ppPatt Unqualified 0 p)) + +{- ---- + uncurrySelect g fs t v = do + ts <- mapM (allParamValues gr . snd) fs + vs <- mapM (comp g) [P v r | r <- map fst fs] + return $ reorderSelect t fs ts vs + + reorderSelect t fs pss vs = case (t,fs,pss,vs) of + (V _ ts, f:fs1, ps:pss1, v:vs1) -> + S (V (snd f) + [reorderSelect (V (RecType fs1) t) fs1 pss1 vs1 | + t <- segments (length ts `div` length ps) ts]) v + (T (TComp _) cs, f:fs1, ps:pss1, v:vs1) -> + S (T (TComp (snd f)) + [(p,reorderSelect (T (TComp (RecType fs1)) c) fs1 pss1 vs1) | + (ep,c) <- zip ps (segments (length cs `div` length ps) cs), + let Ok p = term2patt ep]) v + _ -> t + + segments i xs = + let (x0,xs1) = splitAt i xs in x0 : takeWhile (not . null) (segments i xs1) +-} + + +-- | argument variables cannot be glued +checkNoArgVars :: Term -> Err Term +checkNoArgVars t = case t of + Vr (IA _ _) -> Bad $ glueErrorMsg $ ppTerm Unqualified 0 t + Vr (IAV _ _ _) -> Bad $ glueErrorMsg $ ppTerm Unqualified 0 t + _ -> composOp checkNoArgVars t + +glueErrorMsg s = + render (text "Cannot glue (+) term with run-time variable" <+> s <> char '.' $$ + text "Use Prelude.bind instead.") + +getArgType t = case t of + V ty _ -> return ty + T (TComp ty) _ -> return ty + _ -> Bad (render (text "cannot get argument type of table" $$ nest 2 (ppTerm Unqualified 0 t))) diff --git a/src/compiler/GF/Compile/Concrete/TypeCheck.hs b/src/compiler/GF/Compile/Concrete/TypeCheck.hs new file mode 100644 index 000000000..670f36625 --- /dev/null +++ b/src/compiler/GF/Compile/Concrete/TypeCheck.hs @@ -0,0 +1,690 @@ +{-# LANGUAGE PatternGuards #-} +module GF.Compile.Concrete.TypeCheck( checkLType, inferLType, computeLType, ppType ) where + +import GF.Infra.CheckM +import GF.Infra.Modules +import GF.Data.Operations + +import GF.Grammar +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Grammar.PatternMatch +import GF.Grammar.Lockfield (isLockLabel, lockRecType, unlockRecord) +import GF.Compile.Concrete.AppPredefined + +import Data.List +import Control.Monad +import Text.PrettyPrint + +computeLType :: SourceGrammar -> Context -> Type -> Check Type +computeLType gr g0 t = comp (reverse [(b,x, Vr x) | (b,x,_) <- g0] ++ g0) t + where + comp g ty = case ty of + _ | Just _ <- isTypeInts ty -> return ty ---- shouldn't be needed + | isPredefConstant ty -> return ty ---- shouldn't be needed + + Q m ident -> checkIn (text "module" <+> ppIdent m) $ do + ty' <- checkErr (lookupResDef gr m ident) + if ty' == ty then return ty else comp g ty' --- is this necessary to test? + + Vr ident -> checkLookup ident g -- never needed to compute! + + App f a -> do + f' <- comp g f + a' <- comp g a + case f' of + Abs b x t -> comp ((b,x,a'):g) t + _ -> return $ App f' a' + + Prod bt x a b -> do + a' <- comp g a + b' <- comp ((bt,x,Vr x) : g) b + return $ Prod bt x a' b' + + Abs bt x b -> do + b' <- comp ((bt,x,Vr x):g) b + return $ Abs bt x b' + + ExtR r s -> do + r' <- comp g r + s' <- comp g s + case (r',s') of + (RecType rs, RecType ss) -> checkErr (plusRecType r' s') >>= comp g + _ -> return $ ExtR r' s' + + RecType fs -> do + let fs' = sortRec fs + liftM RecType $ mapPairsM (comp g) fs' + + ELincat c t -> do + t' <- comp g t + checkErr $ lockRecType c t' ---- locking to be removed AR 20/6/2009 + + _ | ty == typeTok -> return typeStr + _ | isPredefConstant ty -> return ty + + _ -> composOp (comp g) ty + +-- the underlying algorithms + +inferLType :: SourceGrammar -> Context -> Term -> Check (Term, Type) +inferLType gr g trm = case trm of + + Q m ident | isPredef m -> termWith trm $ checkErr (typPredefined ident) + + Q m ident -> checks [ + termWith trm $ checkErr (lookupResType gr m ident) >>= computeLType gr g + , + checkErr (lookupResDef gr m ident) >>= inferLType gr g + , + checkError (text "cannot infer type of constant" <+> ppTerm Unqualified 0 trm) + ] + + QC m ident | isPredef m -> termWith trm $ checkErr (typPredefined ident) + + QC m ident -> checks [ + termWith trm $ checkErr (lookupResType gr m ident) >>= computeLType gr g + , + checkErr (lookupResDef gr m ident) >>= inferLType gr g + , + checkError (text "cannot infer type of canonical constant" <+> ppTerm Unqualified 0 trm) + ] + + Vr ident -> termWith trm $ checkLookup ident g + + Typed e t -> do + t' <- computeLType gr g t + checkLType gr g e t' + return (e,t') + + App f a -> do + over <- getOverload gr g Nothing trm + case over of + Just trty -> return trty + _ -> do + (f',fty) <- inferLType gr g f + fty' <- computeLType gr g fty + case fty' of + Prod bt z arg val -> do + a' <- justCheck g a arg + ty <- if isWildIdent z + then return val + else substituteLType [(bt,z,a')] val + return (App f' a',ty) + _ -> checkError (text "A function type is expected for" <+> ppTerm Unqualified 0 f <+> text "instead of type" <+> ppType fty) + + S f x -> do + (f', fty) <- inferLType gr g f + case fty of + Table arg val -> do + x'<- justCheck g x arg + return (S f' x', val) + _ -> checkError (text "table lintype expected for the table in" $$ nest 2 (ppTerm Unqualified 0 trm)) + + P t i -> do + (t',ty) <- inferLType gr g t --- ?? + ty' <- computeLType gr g ty + let tr2 = P t' i + termWith tr2 $ case ty' of + RecType ts -> case lookup i ts of + Nothing -> checkError (text "unknown label" <+> ppLabel i <+> text "in" $$ nest 2 (ppTerm Unqualified 0 ty')) + Just x -> return x + _ -> checkError (text "record type expected for:" <+> ppTerm Unqualified 0 t $$ + text " instead of the inferred:" <+> ppTerm Unqualified 0 ty') + + R r -> do + let (ls,fs) = unzip r + fsts <- mapM inferM fs + let ts = [ty | (Just ty,_) <- fsts] + checkCond (text "cannot infer type of record" $$ nest 2 (ppTerm Unqualified 0 trm)) (length ts == length fsts) + return $ (R (zip ls fsts), RecType (zip ls ts)) + + T (TTyped arg) pts -> do + (_,val) <- checks $ map (inferCase (Just arg)) pts + checkLType gr g trm (Table arg val) + T (TComp arg) pts -> do + (_,val) <- checks $ map (inferCase (Just arg)) pts + checkLType gr g trm (Table arg val) + T ti pts -> do -- tries to guess: good in oper type inference + let pts' = [pt | pt@(p,_) <- pts, isConstPatt p] + case pts' of + [] -> checkError (text "cannot infer table type of" <+> ppTerm Unqualified 0 trm) +---- PInt k : _ -> return $ Ints $ max [i | PInt i <- pts'] + _ -> do + (arg,val) <- checks $ map (inferCase Nothing) pts' + checkLType gr g trm (Table arg val) + V arg pts -> do + (_,val) <- checks $ map (inferLType gr g) pts + return (trm, Table arg val) + + K s -> do + if elem ' ' s + then do + let ss = foldr C Empty (map K (words s)) + ----- removed irritating warning AR 24/5/2008 + ----- checkWarn ("token \"" ++ s ++ + ----- "\" converted to token list" ++ prt ss) + return (ss, typeStr) + else return (trm, typeStr) + + EInt i -> return (trm, typeInt) + + EFloat i -> return (trm, typeFloat) + + Empty -> return (trm, typeStr) + + C s1 s2 -> + check2 (flip (justCheck g) typeStr) C s1 s2 typeStr + + Glue s1 s2 -> + check2 (flip (justCheck g) typeStr) Glue s1 s2 typeStr ---- typeTok + +---- hack from Rename.identRenameTerm, to live with files with naming conflicts 18/6/2007 + Strs (Cn c : ts) | c == cConflict -> do + checkWarn (text "unresolved constant, could be any of" <+> hcat (map (ppTerm Unqualified 0) ts)) + inferLType gr g (head ts) + + Strs ts -> do + ts' <- mapM (\t -> justCheck g t typeStr) ts + return (Strs ts', typeStrs) + + Alts (t,aa) -> do + t' <- justCheck g t typeStr + aa' <- flip mapM aa (\ (c,v) -> do + c' <- justCheck g c typeStr + v' <- checks $ map (justCheck g v) [typeStrs, EPattType typeStr] + return (c',v')) + return (Alts (t',aa'), typeStr) + + RecType r -> do + let (ls,ts) = unzip r + ts' <- mapM (flip (justCheck g) typeType) ts + return (RecType (zip ls ts'), typeType) + + ExtR r s -> do + (r',rT) <- inferLType gr g r + rT' <- computeLType gr g rT + (s',sT) <- inferLType gr g s + sT' <- computeLType gr g sT + + let trm' = ExtR r' s' + ---- trm' <- checkErr $ plusRecord r' s' + case (rT', sT') of + (RecType rs, RecType ss) -> do + rt <- checkErr $ plusRecType rT' sT' + checkLType gr g trm' rt ---- return (trm', rt) + _ | rT' == typeType && sT' == typeType -> return (trm', typeType) + _ -> checkError (text "records or record types expected in" <+> ppTerm Unqualified 0 trm) + + Sort _ -> + termWith trm $ return typeType + + Prod bt x a b -> do + a' <- justCheck g a typeType + b' <- justCheck ((bt,x,a'):g) b typeType + return (Prod bt x a' b', typeType) + + Table p t -> do + p' <- justCheck g p typeType --- check p partype! + t' <- justCheck g t typeType + return $ (Table p' t', typeType) + + FV vs -> do + (_,ty) <- checks $ map (inferLType gr g) vs +--- checkIfComplexVariantType trm ty + checkLType gr g trm ty + + EPattType ty -> do + ty' <- justCheck g ty typeType + return (EPattType ty',typeType) + EPatt p -> do + ty <- inferPatt p + return (trm, EPattType ty) + + ELin c trm -> do + (trm',ty) <- inferLType gr g trm + ty' <- checkErr $ lockRecType c ty ---- lookup c; remove lock AR 20/6/2009 + return $ (ELin c trm', ty') + + _ -> checkError (text "cannot infer lintype of" <+> ppTerm Unqualified 0 trm) + + where + isPredef m = elem m [cPredef,cPredefAbs] + + justCheck g ty te = checkLType gr g ty te >>= return . fst + + -- for record fields, which may be typed + inferM (mty, t) = do + (t', ty') <- case mty of + Just ty -> checkLType gr g ty t + _ -> inferLType gr g t + return (Just ty',t') + + inferCase mty (patt,term) = do + arg <- maybe (inferPatt patt) return mty + cont <- pattContext gr g arg patt + (_,val) <- inferLType gr (reverse cont ++ g) term + return (arg,val) + isConstPatt p = case p of + PC _ ps -> True --- all isConstPatt ps + PP _ _ ps -> True --- all isConstPatt ps + PR ps -> all (isConstPatt . snd) ps + PT _ p -> isConstPatt p + PString _ -> True + PInt _ -> True + PFloat _ -> True + PChar -> True + PChars _ -> True + PSeq p q -> isConstPatt p && isConstPatt q + PAlt p q -> isConstPatt p && isConstPatt q + PRep p -> isConstPatt p + PNeg p -> isConstPatt p + PAs _ p -> isConstPatt p + _ -> False + + inferPatt p = case p of + PP q c ps | q /= cPredef -> checkErr $ liftM valTypeCnc (lookupResType gr q c) + PAs _ p -> inferPatt p + PNeg p -> inferPatt p + PAlt p q -> checks [inferPatt p, inferPatt q] + PSeq _ _ -> return $ typeStr + PRep _ -> return $ typeStr + PChar -> return $ typeStr + PChars _ -> return $ typeStr + _ -> inferLType gr g (patt2term p) >>= return . snd + + +-- type inference: Nothing, type checking: Just t +-- the latter permits matching with value type +getOverload :: SourceGrammar -> Context -> Maybe Type -> Term -> Check (Maybe (Term,Type)) +getOverload gr g mt ot = case appForm ot of + (f@(Q m c), ts) -> case lookupOverload gr m c of + Ok typs -> do + ttys <- mapM (inferLType gr g) ts + v <- matchOverload f typs ttys + return $ Just v + _ -> return Nothing + _ -> return Nothing + where + matchOverload f typs ttys = do + let (tts,tys) = unzip ttys + let vfs = lookupOverloadInstance tys typs + let matches = [vf | vf@((v,_),_) <- vfs, matchVal mt v] + + case ([vf | (vf,True) <- matches],[vf | (vf,False) <- matches]) of + ([(val,fun)],_) -> return (mkApp fun tts, val) + ([],[(val,fun)]) -> do + checkWarn (text "ignoring lock fields in resolving" <+> ppTerm Unqualified 0 ot) + return (mkApp fun tts, val) + ([],[]) -> do + let showTypes ty = hsep (map ppType ty) + checkError $ text "no overload instance of" <+> ppTerm Unqualified 0 f $$ + text "for" $$ + nest 2 (showTypes tys) $$ + text "among" $$ + nest 2 (vcat [showTypes ty | (ty,_) <- typs]) $$ + maybe empty (\x -> text "with value type" <+> ppType x) mt + + (vfs1,vfs2) -> case (noProds vfs1,noProds vfs2) of + ([(val,fun)],_) -> do + return (mkApp fun tts, val) + ([],[(val,fun)]) -> do + checkWarn (text "ignoring lock fields in resolving" <+> ppTerm Unqualified 0 ot) + return (mkApp fun tts, val) + +----- unsafely exclude irritating warning AR 24/5/2008 +----- checkWarn $ "overloading of" +++ prt f +++ +----- "resolved by excluding partial applications:" ++++ +----- unlines [prtType env ty | (ty,_) <- vfs', not (noProd ty)] + + + _ -> checkError $ text "ambiguous overloading of" <+> ppTerm Unqualified 0 f <+> + text "for" <+> hsep (map ppType tys) $$ + text "with alternatives" $$ + nest 2 (vcat [ppType ty | (ty,_) <- if null vfs1 then vfs2 else vfs2]) + + matchVal mt v = elem mt [Nothing,Just v,Just (unlocked v)] + + unlocked v = case v of + RecType fs -> RecType $ filter (not . isLockLabel . fst) fs + _ -> v + ---- TODO: accept subtypes + ---- TODO: use a trie + lookupOverloadInstance tys typs = + [((mkFunType rest val, t),isExact) | + let lt = length tys, + (ty,(val,t)) <- typs, length ty >= lt, + let (pre,rest) = splitAt lt ty, + let isExact = pre == tys, + isExact || map unlocked pre == map unlocked tys + ] + + noProds vfs = [(v,f) | (v,f) <- vfs, noProd v] + + noProd ty = case ty of + Prod _ _ _ _ -> False + _ -> True + +checkLType :: SourceGrammar -> Context -> Term -> Type -> Check (Term, Type) +checkLType gr g trm typ0 = do + + typ <- computeLType gr g typ0 + + case trm of + + Abs bt x c -> do + case typ of + Prod bt' z a b -> do + (c',b') <- if isWildIdent z + then checkLType gr ((bt,x,a):g) c b + else do b' <- checkIn (text "abs") $ substituteLType [(bt',z,Vr x)] b + checkLType gr ((bt,x,a):g) c b' + return $ (Abs bt x c', Prod bt' x a b') + _ -> checkError $ text "function type expected instead of" <+> ppType typ + + App f a -> do + over <- getOverload gr g (Just typ) trm + case over of + Just trty -> return trty + _ -> do + (trm',ty') <- inferLType gr g trm + termWith trm' $ checkEqLType gr g typ ty' trm' + + Q _ _ -> do + over <- getOverload gr g (Just typ) trm + case over of + Just trty -> return trty + _ -> do + (trm',ty') <- inferLType gr g trm + termWith trm' $ checkEqLType gr g typ ty' trm' + + T _ [] -> + checkError (text "found empty table in type" <+> ppTerm Unqualified 0 typ) + T _ cs -> case typ of + Table arg val -> do + case allParamValues gr arg of + Ok vs -> do + let ps0 = map fst cs + ps <- checkErr $ testOvershadow ps0 vs + if null ps + then return () + else checkWarn (text "patterns never reached:" $$ + nest 2 (vcat (map (ppPatt Unqualified 0) ps))) + _ -> return () -- happens with variable types + cs' <- mapM (checkCase arg val) cs + return (T (TTyped arg) cs', typ) + _ -> checkError $ text "table type expected for table instead of" $$ nest 2 (ppType typ) + + R r -> case typ of --- why needed? because inference may be too difficult + RecType rr -> do + let (ls,_) = unzip rr -- labels of expected type + fsts <- mapM (checkM r) rr -- check that they are found in the record + return $ (R fsts, typ) -- normalize record + + _ -> checkError (text "record type expected in type checking instead of" $$ nest 2 (ppTerm Unqualified 0 typ)) + + ExtR r s -> case typ of + _ | typ == typeType -> do + trm' <- computeLType gr g trm + case trm' of + RecType _ -> termWith trm $ return typeType + ExtR (Vr _) (RecType _) -> termWith trm $ return typeType + -- ext t = t ** ... + _ -> checkError (text "invalid record type extension" <+> nest 2 (ppTerm Unqualified 0 trm)) + RecType rr -> do + (r',ty,s') <- checks [ + do (r',ty) <- inferLType gr g r + return (r',ty,s) + , + do (s',ty) <- inferLType gr g s + return (s',ty,r) + ] + case ty of + RecType rr1 -> do + let (rr0,rr2) = recParts rr rr1 + r2 <- justCheck g r' rr0 + s2 <- justCheck g s' rr2 + return $ (ExtR r2 s2, typ) + _ -> checkError (text "record type expected in extension of" <+> ppTerm Unqualified 0 r $$ + text "but found" <+> ppTerm Unqualified 0 ty) + + ExtR ty ex -> do + r' <- justCheck g r ty + s' <- justCheck g s ex + return $ (ExtR r' s', typ) --- is this all? + + _ -> checkError (text "record extension not meaningful for" <+> ppTerm Unqualified 0 typ) + + FV vs -> do + ttys <- mapM (flip (checkLType gr g) typ) vs +--- checkIfComplexVariantType trm typ + return (FV (map fst ttys), typ) --- typ' ? + + S tab arg -> checks [ do + (tab',ty) <- inferLType gr g tab + ty' <- computeLType gr g ty + case ty' of + Table p t -> do + (arg',val) <- checkLType gr g arg p + checkEqLType gr g typ t trm + return (S tab' arg', t) + _ -> checkError (text "table type expected for applied table instead of" <+> ppType ty') + , do + (arg',ty) <- inferLType gr g arg + ty' <- computeLType gr g ty + (tab',_) <- checkLType gr g tab (Table ty' typ) + return (S tab' arg', typ) + ] + Let (x,(mty,def)) body -> case mty of + Just ty -> do + (def',ty') <- checkLType gr g def ty + body' <- justCheck ((Explicit,x,ty'):g) body typ + return (Let (x,(Just ty',def')) body', typ) + _ -> do + (def',ty) <- inferLType gr g def -- tries to infer type of local constant + checkLType gr g (Let (x,(Just ty,def')) body) typ + + ELin c tr -> do + tr1 <- checkErr $ unlockRecord c tr + checkLType gr g tr1 typ + + _ -> do + (trm',ty') <- inferLType gr g trm + termWith trm' $ checkEqLType gr g typ ty' trm' + where + justCheck g ty te = checkLType gr g ty te >>= return . fst + + recParts rr t = (RecType rr1,RecType rr2) where + (rr1,rr2) = partition (flip elem (map fst t) . fst) rr + + checkM rms (l,ty) = case lookup l rms of + Just (Just ty0,t) -> do + checkEqLType gr g ty ty0 t + (t',ty') <- checkLType gr g t ty + return (l,(Just ty',t')) + Just (_,t) -> do + (t',ty') <- checkLType gr g t ty + return (l,(Just ty',t')) + _ -> checkError $ + if isLockLabel l + then let cat = drop 5 (showIdent (label2ident l)) + in ppTerm Unqualified 0 (R rms) <+> text "is not in the lincat of" <+> text cat <> + text "; try wrapping it with lin" <+> text cat + else text "cannot find value for label" <+> ppLabel l <+> text "in" <+> ppTerm Unqualified 0 (R rms) + + checkCase arg val (p,t) = do + cont <- pattContext gr g arg p + t' <- justCheck (reverse cont ++ g) t val + return (p,t') + +pattContext :: SourceGrammar -> Context -> Type -> Patt -> Check Context +pattContext env g typ p = case p of + PV x -> return [(Explicit,x,typ)] + PP q c ps | q /= cPredef -> do ---- why this /=? AR 6/1/2006 + t <- checkErr $ lookupResType env q c + let (cont,v) = typeFormCnc t + checkCond (text "wrong number of arguments for constructor in" <+> ppPatt Unqualified 0 p) + (length cont == length ps) + checkEqLType env g typ v (patt2term p) + mapM (\((_,_,ty),p) -> pattContext env g ty p) (zip cont ps) >>= return . concat + PR r -> do + typ' <- computeLType env g typ + case typ' of + RecType t -> do + let pts = [(ty,tr) | (l,tr) <- r, Just ty <- [lookup l t]] + ----- checkWarn $ prt p ++++ show pts ----- debug + mapM (uncurry (pattContext env g)) pts >>= return . concat + _ -> checkError (text "record type expected for pattern instead of" <+> ppTerm Unqualified 0 typ') + PT t p' -> do + checkEqLType env g typ t (patt2term p') + pattContext env g typ p' + + PAs x p -> do + g' <- pattContext env g typ p + return ((Explicit,x,typ):g') + + PAlt p' q -> do + g1 <- pattContext env g typ p' + g2 <- pattContext env g typ q + let pts = nub ([x | pt@(_,x,_) <- g1, notElem pt g2] ++ [x | pt@(_,x,_) <- g2, notElem pt g1]) + checkCond + (text "incompatible bindings of" <+> + fsep (map ppIdent pts) <+> + text "in pattern alterantives" <+> ppPatt Unqualified 0 p) (null pts) + return g1 -- must be g1 == g2 + PSeq p q -> do + g1 <- pattContext env g typ p + g2 <- pattContext env g typ q + return $ g1 ++ g2 + PRep p' -> noBind typeStr p' + PNeg p' -> noBind typ p' + + _ -> return [] ---- check types! + where + noBind typ p' = do + co <- pattContext env g typ p' + if not (null co) + then checkWarn (text "no variable bound inside pattern" <+> ppPatt Unqualified 0 p) + >> return [] + else return [] + +checkEqLType :: SourceGrammar -> Context -> Type -> Type -> Term -> Check Type +checkEqLType gr g t u trm = do + (b,t',u',s) <- checkIfEqLType gr g t u trm + case b of + True -> return t' + False -> checkError $ text s <+> text "type of" <+> ppTerm Unqualified 0 trm $$ + text "expected:" <+> ppType t $$ + text "inferred:" <+> ppType u + +checkIfEqLType :: SourceGrammar -> Context -> Type -> Type -> Term -> Check (Bool,Type,Type,String) +checkIfEqLType gr g t u trm = do + t' <- computeLType gr g t + u' <- computeLType gr g u + case t' == u' || alpha [] t' u' of + True -> return (True,t',u',[]) + -- forgive missing lock fields by only generating a warning. + --- better: use a flag to forgive? (AR 31/1/2006) + _ -> case missingLock [] t' u' of + Ok lo -> do + checkWarn $ text "missing lock field" <+> fsep (map ppLabel lo) + return (True,t',u',[]) + Bad s -> return (False,t',u',s) + + where + + -- t is a subtype of u + --- quick hack version of TC.eqVal + alpha g t u = case (t,u) of + + -- error (the empty type!) is subtype of any other type + (_,u) | u == typeError -> True + + -- contravariance + (Prod _ x a b, Prod _ y c d) -> alpha g c a && alpha ((x,y):g) b d + + -- record subtyping + (RecType rs, RecType ts) -> all (\ (l,a) -> + any (\ (k,b) -> alpha g a b && l == k) ts) rs + (ExtR r s, ExtR r' s') -> alpha g r r' && alpha g s s' + (ExtR r s, t) -> alpha g r t || alpha g s t + + -- the following say that Ints n is a subset of Int and of Ints m >= n + (t,u) | Just m <- isTypeInts t, Just n <- isTypeInts t -> m >= n + | Just _ <- isTypeInts t, u == typeInt -> True ---- check size! + | t == typeInt, Just _ <- isTypeInts u -> True ---- why this ???? AR 11/12/2005 + + ---- this should be made in Rename + (Q m a, Q n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + || m == n --- for Predef + (QC m a, QC n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + (QC m a, Q n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + (Q m a, QC n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + + (Table a b, Table c d) -> alpha g a c && alpha g b d + (Vr x, Vr y) -> x == y || elem (x,y) g || elem (y,x) g + _ -> t == u + --- the following should be one-way coercions only. AR 4/1/2001 + || elem t sTypes && elem u sTypes + || (t == typeType && u == typePType) + || (u == typeType && t == typePType) + + missingLock g t u = case (t,u) of + (RecType rs, RecType ts) -> + let + ls = [l | (l,a) <- rs, + not (any (\ (k,b) -> alpha g a b && l == k) ts)] + (locks,others) = partition isLockLabel ls + in case others of + _:_ -> Bad $ render (text "missing record fields:" <+> fsep (punctuate comma (map ppLabel others))) + _ -> return locks + -- contravariance + (Prod _ x a b, Prod _ y c d) -> do + ls1 <- missingLock g c a + ls2 <- missingLock g b d + return $ ls1 ++ ls2 + + _ -> Bad "" + + sTypes = [typeStr, typeTok, typeString] + +-- auxiliaries + +-- | light-weight substitution for dep. types +substituteLType :: Context -> Type -> Check Type +substituteLType g t = case t of + Vr x -> return $ maybe t id $ lookup x [(x,t) | (_,x,t) <- g] + _ -> composOp (substituteLType g) t + +termWith :: Term -> Check Type -> Check (Term, Type) +termWith t ct = do + ty <- ct + return (t,ty) + +-- | compositional check\/infer of binary operations +check2 :: (Term -> Check Term) -> (Term -> Term -> Term) -> + Term -> Term -> Type -> Check (Term,Type) +check2 chk con a b t = do + a' <- chk a + b' <- chk b + return (con a' b', t) + +-- printing a type with a lock field lock_C as C +ppType :: Type -> Doc +ppType ty = + case ty of + RecType fs -> case filter isLockLabel $ map fst fs of + [lock] -> text (drop 5 (showIdent (label2ident lock))) + _ -> ppTerm Unqualified 0 ty + Prod _ x a b -> ppType a <+> text "->" <+> ppType b + _ -> ppTerm Unqualified 0 ty + +checkLookup :: Ident -> Context -> Check Type +checkLookup x g = + case [ty | (b,y,ty) <- g, x == y] of + [] -> checkError (text "unknown variable" <+> ppIdent x) + (ty:_) -> return ty diff --git a/src/compiler/GF/Compile/Export.hs b/src/compiler/GF/Compile/Export.hs new file mode 100644 index 000000000..d03eb947e --- /dev/null +++ b/src/compiler/GF/Compile/Export.hs @@ -0,0 +1,64 @@ +module GF.Compile.Export where + +import PGF.CId +import PGF.Data (PGF(..)) +import GF.Compile.GFCCtoHaskell +import GF.Compile.GFCCtoProlog +import GF.Compile.GFCCtoJS +import GF.Compile.PGFPretty +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.PGFToCFG +import GF.Speech.SRGS_ABNF +import GF.Speech.SRGS_XML +import GF.Speech.JSGF +import GF.Speech.GSL +import GF.Speech.SRG +import GF.Speech.VoiceXML +import GF.Speech.SLF +import GF.Speech.PrRegExp + +import Data.Maybe +import System.FilePath + +-- top-level access to code generation + +exportPGF :: Options + -> OutputFormat + -> PGF + -> [(FilePath,String)] -- ^ List of recommended file names and contents. +exportPGF opts fmt pgf = + case fmt of + FmtPGFPretty -> multi "txt" prPGFPretty + FmtPMCFGPretty -> single "pmcfg" prPMCFGPretty + FmtJavaScript -> multi "js" pgf2js + FmtHaskell -> multi "hs" (grammar2haskell opts name) + FmtProlog -> multi "pl" grammar2prolog + FmtProlog_Abs -> multi "pl" grammar2prolog_abs + FmtBNF -> single "bnf" bnfPrinter + FmtEBNF -> single "ebnf" (ebnfPrinter opts) + FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts) + FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts) + FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts) + FmtSRGS_ABNF_NonRec -> single "gram" (srgsAbnfNonRecursivePrinter opts) + FmtJSGF -> single "jsgf" (jsgfPrinter opts) + FmtGSL -> single "gsl" (gslPrinter opts) + FmtVoiceXML -> single "vxml" grammar2vxml + FmtSLF -> single "slf" slfPrinter + FmtRegExp -> single "rexp" regexpPrinter + FmtFA -> single "dot" slfGraphvizPrinter + where + name = fromMaybe (showCId (absname pgf)) (flag optName opts) + + multi :: String -> (PGF -> String) -> [(FilePath,String)] + multi ext pr = [(name <.> ext, pr pgf)] + + single :: String -> (PGF -> CId -> String) -> [(FilePath,String)] + single ext pr = [(showCId cnc <.> ext, pr pgf cnc) | cnc <- cncnames pgf] + +-- | Get the name of the concrete syntax to generate output from. +-- FIXME: there should be an option to change this. +outputConcr :: PGF -> CId +outputConcr pgf = case cncnames pgf of + [] -> error "No concrete syntax." + cnc:_ -> cnc diff --git a/src/compiler/GF/Compile/GFCCtoHaskell.hs b/src/compiler/GF/Compile/GFCCtoHaskell.hs new file mode 100644 index 000000000..d44d6705c --- /dev/null +++ b/src/compiler/GF/Compile/GFCCtoHaskell.hs @@ -0,0 +1,230 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCtoHaskell +-- Maintainer : Aarne Ranta +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/06/17 12:39:07 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- to write a GF abstract grammar into a Haskell module with translations from +-- data objects into GF trees. Example: GSyntax for Agda. +-- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004 +----------------------------------------------------------------------------- + +module GF.Compile.GFCCtoHaskell (grammar2haskell) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations +import GF.Infra.Option +import GF.Text.UTF8 + +import Data.List --(isPrefixOf, find, intersperse) +import qualified Data.Map as Map + +type Prefix = String -> String + +-- | the main function +grammar2haskell :: Options + -> String -- ^ Module name. + -> PGF + -> String +grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $ + pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr'] + where gr' = hSkeleton gr + gadt = haskellOption opts HaskellGADT + lexical cat = haskellOption opts HaskellLexical && isLexicalCat opts cat + gId | haskellOption opts HaskellNoPrefix = id + | otherwise = ("G"++) + pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}"] + | otherwise = [] + types | gadt = datatypesGADT gId lexical gr' + | otherwise = datatypes gId lexical gr' + +haskPreamble name = + [ + "module " ++ name ++ " where", + "", + "import PGF", + "----------------------------------------------------", + "-- automatic translation from GF to Haskell", + "----------------------------------------------------", + "", + "class Gf a where", + " gf :: a -> Tree", + " fg :: Tree -> a", + "", + predefInst "GString" "String" "unStr" "mkStr", + "", + predefInst "GInt" "Integer" "unInt" "mkInt", + "", + predefInst "GFloat" "Double" "unDouble" "mkDouble", + "", + "----------------------------------------------------", + "-- below this line machine-generated", + "----------------------------------------------------", + "" + ] + +predefInst gtyp typ destr consr = + "newtype" +++ gtyp +++ "=" +++ gtyp +++ typ +++ " deriving Show" +++++ + "instance Gf" +++ gtyp +++ "where" ++++ + " gf (" ++ gtyp +++ "x) =" +++ consr +++ "x" ++++ + " fg t =" ++++ + " case "++destr++" t of" ++++ + " Just x -> " +++ gtyp +++ "x" ++++ + " Nothing -> error (\"no" +++ gtyp +++ "\" ++ show t)" + +type OIdent = String + +type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] + +datatypes :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +datatypes gId lexical = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId lexical)) . snd + +gfinstances :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +gfinstances gId lexical (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId lexical m)) g + + +hDatatype :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> String +hDatatype _ _ ("Cn",_) = "" --- +hDatatype _ _ (cat,[]) = "" +hDatatype gId _ (cat,rules) | isListCat (cat,rules) = + "newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]" + +++ "deriving Show" +hDatatype gId lexical (cat,rules) = + "data" +++ gId cat +++ "=" ++ + (if length rules == 1 then "" else "\n ") +++ + foldr1 (\x y -> x ++ "\n |" +++ y) constructors ++++ + " deriving Show" + where + constructors = [gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- nonLexicalRules (lexical cat) rules] + ++ if lexical cat then [lexicalConstructor cat +++ "String"] else [] + +nonLexicalRules :: Bool -> [(OIdent, [OIdent])] -> [(OIdent, [OIdent])] +nonLexicalRules False rules = rules +nonLexicalRules True rules = [r | r@(f,t) <- rules, not (null t)] + +lexicalConstructor :: OIdent -> String +lexicalConstructor cat = "Lex" ++ cat + +-- GADT version of data types +datatypesGADT :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +datatypesGADT gId lexical (_,skel) = + unlines (concatMap (hCatTypeGADT gId) skel) + +++++ + "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId lexical) skel) + +hCatTypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String] +hCatTypeGADT gId (cat,rules) + = ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_", + "data"+++gId cat++"_"] + +hDatatypeGADT :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> [String] +hDatatypeGADT gId lexical (cat, rules) + | isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t] + | otherwise = + [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t + | (f,args) <- nonLexicalRules (lexical cat) rules ] + ++ if lexical cat then [lexicalConstructor cat +++ ":: String ->"+++ t] else [] + where t = "Tree" +++ gId cat ++ "_" + +gfInstance :: Prefix -> (OIdent -> Bool) -> String -> (OIdent, [(OIdent, [OIdent])]) -> String +gfInstance gId lexical m crs = hInstance gId lexical m crs ++++ fInstance gId lexical m crs + +----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004 +hInstance _ _ m (cat,[]) = "" +hInstance gId lexical m (cat,rules) + | isListCat (cat,rules) = + "instance Gf" +++ gId cat +++ "where" ++++ + " gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])" + +++ "=" +++ mkRHS ("Base"++ec) baseVars ++++ + " gf (" ++ gId cat +++ "(x:xs)) = " + ++ mkRHS ("Cons"++ec) ["x",prParenth (gId cat+++"xs")] +-- no show for GADTs +-- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)" + | otherwise = + "instance Gf" +++ gId cat +++ "where\n" ++ + unlines ([mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] + ++ if lexical cat then [" gf (" ++ lexicalConstructor cat +++ "x) = mkApp (mkCId x) []"] else []) + where + ec = elemCat cat + baseVars = mkVars (baseSize (cat,rules)) + mkInst f xx = let xx' = mkVars (length xx) in " gf " ++ + (if length xx == 0 then gId f else prParenth (gId f +++ foldr1 (+++) xx')) +++ + "=" +++ mkRHS f xx' + mkVars n = ["x" ++ show i | i <- [1..n]] + mkRHS f vars = "mkApp (mkCId \"" ++ f ++ "\")" +++ + "[" ++ prTList ", " ["gf" +++ x | x <- vars] ++ "]" + + +----fInstance m ("Cn",_) = "" --- +fInstance _ _ m (cat,[]) = "" +fInstance gId lexical m (cat,rules) = + " fg t =" ++++ + " case unApp t of" ++++ + unlines [mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] ++++ + (if lexical cat then " (i,[]) -> " ++ lexicalConstructor cat +++ "(prCId i)" else "") ++++ + " _ -> error (\"no" +++ cat ++ " \" ++ show t)" + where + mkInst f xx = + " Just (i," ++ + "[" ++ prTList "," xx' ++ "])" +++ + "| i == mkCId \"" ++ f ++ "\" ->" +++ mkRHS f xx' + where xx' = ["x" ++ show i | (_,i) <- zip xx [1..]] + mkRHS f vars + | isListCat (cat,rules) = + if "Base" `isPrefixOf` f then + gId cat +++ "[" ++ prTList ", " [ "fg" +++ x | x <- vars ] ++ "]" + else + let (i,t) = (init vars,last vars) + in "let" +++ gId cat +++ "xs = fg " ++ t +++ "in" +++ + gId cat +++ prParenth (prTList ":" (["fg"+++v | v <- i] ++ ["xs"])) + | otherwise = + gId f +++ + prTList " " [prParenth ("fg" +++ x) | x <- vars] + + +--type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] +hSkeleton :: PGF -> (String,HSkeleton) +hSkeleton gr = + (showCId (absname gr), + [(showCId c, [(showCId f, map showCId cs) | (f, (cs,_)) <- fs]) | + fs@((_, (_,c)):_) <- fns] + ) + where + fns = groupBy valtypg (sortBy valtyps (map jty (Map.assocs (funs (abstract gr))))) + valtyps (_, (_,x)) (_, (_,y)) = compare x y + valtypg (_, (_,x)) (_, (_,y)) = x == y + jty (f,(ty,_,_)) = (f,catSkeleton ty) + +updateSkeleton :: OIdent -> HSkeleton -> (OIdent, [OIdent]) -> HSkeleton +updateSkeleton cat skel rule = + case skel of + (cat0,rules):rr | cat0 == cat -> (cat0, rule:rules) : rr + (cat0,rules):rr -> (cat0, rules) : updateSkeleton cat rr rule + +isListCat :: (OIdent, [(OIdent, [OIdent])]) -> Bool +isListCat (cat,rules) = "List" `isPrefixOf` cat && length rules == 2 + && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs + where c = elemCat cat + fs = map fst rules + +-- | Gets the element category of a list category. +elemCat :: OIdent -> OIdent +elemCat = drop 4 + +isBaseFun :: OIdent -> Bool +isBaseFun f = "Base" `isPrefixOf` f + +isConsFun :: OIdent -> Bool +isConsFun f = "Cons" `isPrefixOf` f + +baseSize :: (OIdent, [(OIdent, [OIdent])]) -> Int +baseSize (_,rules) = length bs + where Just (_,bs) = find (("Base" `isPrefixOf`) . fst) rules diff --git a/src/compiler/GF/Compile/GFCCtoJS.hs b/src/compiler/GF/Compile/GFCCtoJS.hs new file mode 100644 index 000000000..312701e3b --- /dev/null +++ b/src/compiler/GF/Compile/GFCCtoJS.hs @@ -0,0 +1,138 @@ +module GF.Compile.GFCCtoJS (pgf2js) where + +import PGF.CId +import PGF.Data hiding (mkStr) +import qualified PGF.Macros as M +import qualified GF.JavaScript.AbsJS as JS +import qualified GF.JavaScript.PrintJS as JS + +import GF.Text.UTF8 +import GF.Data.ErrM +import GF.Infra.Option + +import Control.Monad (mplus) +import Data.Array.Unboxed (UArray) +import qualified Data.Array.IArray as Array +import Data.Maybe (fromMaybe) +import Data.Map (Map) +import qualified Data.Set as Set +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap + +pgf2js :: PGF -> String +pgf2js pgf = + encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] + where + n = showCId $ absname pgf + as = abstract pgf + cs = Map.assocs (concretes pgf) + start = showCId $ M.lookStartCat pgf + grammar = new "GFGrammar" [js_abstract, js_concrete] + js_abstract = abstract2js start as + js_concrete = JS.EObj $ map (concrete2js start n) cs + +abstract2js :: String -> Abstr -> JS.Expr +abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))] + +absdef2js :: (CId,(Type,Int,[Equation])) -> JS.Property +absdef2js (f,(typ,_,_)) = + let (args,cat) = M.catSkeleton typ in + JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)]) + +concrete2js :: String -> String -> (CId,Concr) -> JS.Property +concrete2js start n (c, cnc) = + JS.Prop l (new "GFConcrete" ([flags,(JS.EObj $ ((map (cncdef2js n (showCId c)) ds) ++ litslins))] ++ + maybe [] (parser2js start) (parser cnc))) + where + flags = mapToJSObj JS.EStr $ cflags cnc + l = JS.IdentPropName (JS.Ident (showCId c)) + ds = concatMap Map.assocs [lins cnc, opers cnc, lindefs cnc] + litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), + JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), + JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])] + + +cncdef2js :: String -> String -> (CId,Term) -> JS.Property +cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)]) + +term2js :: String -> String -> Term -> JS.Expr +term2js n l t = f t + where + f t = + case t of + R xs -> new "Arr" (map f xs) + P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y] + S xs -> mkSeq (map f xs) + K t -> tokn2js t + V i -> JS.EIndex (JS.EVar children) (JS.EInt i) + C i -> new "Int" [JS.EInt i] + F f -> JS.ECall (JS.EMember (JS.EIndex (JS.EMember (JS.EVar $ JS.Ident n) (JS.Ident "concretes")) (JS.EStr l)) (JS.Ident "rule")) [JS.EStr (showCId f), JS.EVar children] + FV xs -> new "Variants" (map f xs) + W str x -> new "Suffix" [JS.EStr str, f x] + TM _ -> new "Meta" [] + +tokn2js :: Tokn -> JS.Expr +tokn2js (KS s) = mkStr s +tokn2js (KP ss vs) = mkSeq (map mkStr ss) -- FIXME + +mkStr :: String -> JS.Expr +mkStr s = new "Str" [JS.EStr s] + +mkSeq :: [JS.Expr] -> JS.Expr +mkSeq [x] = x +mkSeq xs = new "Seq" xs + +argIdent :: Integer -> JS.Ident +argIdent n = JS.Ident ("x" ++ show n) + +children :: JS.Ident +children = JS.Ident "cs" + +-- Parser +parser2js :: String -> ParserInfo -> [JS.Expr] +parser2js start p = [new "Parser" [JS.EStr start, + JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set], + JS.EObj $ map cats (Map.assocs (startCats p))]] + where + cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EArray (map JS.EInt is)) + +frule2js :: ParserInfo -> FCat -> Production -> JS.Expr +frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins] + where + FFun f ps lins = functions p Array.! funid +frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]] + where + catLinArity :: FCat -> Int + catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + + topdownRules cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p)) + + g (FApply funid args) rules = (functions p Array.! funid,args) : rules + g (FCoerce cat) rules = f cat rules + + +name2js :: (CId,[Profile]) -> JS.Expr +name2js (f,ps) = new "FunApp" $ [JS.EStr $ showCId f, JS.EArray (map fromProfile ps)] + where + fromProfile :: Profile -> JS.Expr + fromProfile [] = new "MetaVar" [] + fromProfile [x] = daughter x + fromProfile args = new "Unify" [JS.EArray (map daughter args)] + +daughter i = new "Arg" [JS.EInt i] + +lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr +lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls] + +sym2js :: FSymbol -> JS.Expr +sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymLit n l) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymKS [t]) = new "Terminal" [JS.EStr t] + +new :: String -> [JS.Expr] -> JS.Expr +new f xs = JS.ENew (JS.Ident f) xs + +mapToJSObj :: (a -> JS.Expr) -> Map CId a -> JS.Expr +mapToJSObj f m = JS.EObj [ JS.Prop (JS.IdentPropName (JS.Ident (showCId k))) (f v) | (k,v) <- Map.toList m ] diff --git a/src/compiler/GF/Compile/GFCCtoProlog.hs b/src/compiler/GF/Compile/GFCCtoProlog.hs new file mode 100644 index 000000000..702d4afe5 --- /dev/null +++ b/src/compiler/GF/Compile/GFCCtoProlog.hs @@ -0,0 +1,279 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCtoProlog +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- to write a GF grammar into a Prolog module +----------------------------------------------------------------------------- + +module GF.Compile.GFCCtoProlog (grammar2prolog, grammar2prolog_abs) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations +import GF.Text.UTF8 + +import qualified Data.Map as Map +import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord) +import Data.List (isPrefixOf,mapAccumL) + +grammar2prolog, grammar2prolog_abs :: PGF -> String +-- Most prologs have problems with UTF8 encodings, so we skip that: +grammar2prolog = {- encodeUTF8 . -} foldr (++++) [] . pgf2clauses +grammar2prolog_abs = {- encodeUTF8 . -} foldr (++++) [] . pgf2clauses_abs + + +pgf2clauses :: PGF -> [String] +pgf2clauses (PGF absname cncnames gflags abstract concretes) = + [":- " ++ plFact "module" [plp absname, "[]"]] ++ + clauseHeader "%% concrete(?Module)" + [plFact "concrete" [plp cncname] | cncname <- cncnames] ++ + clauseHeader "%% flag(?Flag, ?Value): global flags" + (map (plpFact2 "flag") (Map.assocs gflags)) ++ + plAbstract (absname, abstract) ++ + concatMap plConcrete (Map.assocs concretes) + +pgf2clauses_abs :: PGF -> [String] +pgf2clauses_abs (PGF absname _cncnames gflags abstract _concretes) = + [":- " ++ plFact "module" [plp absname, "[]"]] ++ + clauseHeader "%% flag(?Flag, ?Value): global flags" + (map (plpFact2 "flag") (Map.assocs gflags)) ++ + plAbstract (absname, abstract) + +clauseHeader :: String -> [String] -> [String] +clauseHeader hdr [] = [] +clauseHeader hdr clauses = "":hdr:clauses + + +---------------------------------------------------------------------- +-- abstract syntax + +plAbstract :: (CId, Abstr) -> [String] +plAbstract (name, Abstr aflags funs cats _catfuns) = + ["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%", + "%% abstract module: " ++ plp name] ++ + clauseHeader "%% absflag(?Flag, ?Value): flags for abstract syntax" + (map (plpFact2 "absflag") (Map.assocs aflags)) ++ + clauseHeader "%% cat(?Type, ?[X:Type,...])" + (map plCat (Map.assocs cats)) ++ + clauseHeader "%% fun(?Fun, ?Type, ?[X:Type,...])" + (map plFun (Map.assocs funs)) ++ + clauseHeader "%% def(?Fun, ?Expr)" + (concatMap plFundef (Map.assocs funs)) + +plCat :: (CId, [Hypo]) -> String +plCat (cat, hypos) = plFact "cat" (plTypeWithHypos typ) + where ((_,subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos + args = reverse [EFun x | (_,x) <- subst] + typ = DTyp hypos' cat args + +plFun :: (CId, (Type, Int, [Equation])) -> String +plFun (fun, (typ,_,_)) = plFact "fun" (plp fun : plTypeWithHypos typ') + where typ' = snd $ alphaConvert emptyEnv typ + +plTypeWithHypos :: Type -> [String] +plTypeWithHypos (DTyp hypos cat args) = [plTerm (plp cat) (map plp args), plList (map (\(_,x,ty) -> plOper ":" (plp x) (plp ty)) hypos)] + +plFundef :: (CId, (Type,Int,[Equation])) -> [String] +plFundef (fun, (_,_,[])) = [] +plFundef (fun, (_,_,eqs)) = [plFact "def" [plp fun, plp fundef']] + where fundef' = snd $ alphaConvert emptyEnv eqs + + +---------------------------------------------------------------------- +-- concrete syntax + +plConcrete :: (CId, Concr) -> [String] +plConcrete (cncname, Concr cflags lins opers lincats lindefs + _printnames _paramlincats _parser) = + ["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%", + "%% concrete module: " ++ plp cncname] ++ + clauseHeader "%% cncflag(?Flag, ?Value): flags for concrete syntax" + (map (mod . plpFact2 "cncflag") (Map.assocs cflags)) ++ + clauseHeader "%% lincat(?Cat, ?Linearization type)" + (map (mod . plpFact2 "lincat") (Map.assocs lincats)) ++ + clauseHeader "%% lindef(?Cat, ?Linearization default)" + (map (mod . plpFact2 "lindef") (Map.assocs lindefs)) ++ + clauseHeader "%% lin(?Fun, ?Linearization)" + (map (mod . plpFact2 "lin") (Map.assocs lins)) ++ + clauseHeader "%% oper(?Oper, ?Linearization)" + (map (mod . plpFact2 "oper") (Map.assocs opers)) + where mod clause = plp cncname ++ ": " ++ clause + + +---------------------------------------------------------------------- +-- prolog-printing pgf datatypes + +instance PLPrint Type where + plp (DTyp hypos cat args) | null hypos = result + | otherwise = plOper " -> " (plList (map (\(_,x,ty) -> plOper ":" (plp x) (plp ty)) hypos)) result + where result = plTerm (plp cat) (map plp args) + +instance PLPrint Expr where + plp (EFun x) = plp x + plp (EAbs _ x e)= plOper "^" (plp x) (plp e) + plp (EApp e e') = plOper " * " (plp e) (plp e') + plp (ELit lit) = plp lit + plp (EMeta n) = "Meta_" ++ show n + +instance PLPrint Patt where + plp (PVar x) = plp x + plp (PApp f ps) = plOper " * " (plp f) (plp ps) + plp (PLit lit) = plp lit + +instance PLPrint Equation where + plp (Equ patterns result) = plOper ":" (plp patterns) (plp result) + +instance PLPrint Term where + plp (S terms) = plTerm "s" [plp terms] + plp (C n) = plTerm "c" [show n] + plp (K tokn) = plTerm "k" [plp tokn] + plp (FV trms) = plTerm "fv" [plp trms] + plp (P t1 t2) = plTerm "p" [plp t1, plp t2] + plp (W s trm) = plTerm "w" [plp s, plp trm] + plp (R terms) = plTerm "r" [plp terms] + plp (F oper) = plTerm "f" [plp oper] + plp (V n) = plTerm "v" [show n] + plp (TM str) = plTerm "tm" [plp str] + +{-- more prolog-like syntax for PGF terms, but also more difficult to handle: +instance PLPrint Term where + plp (S terms) = plp terms + plp (C n) = show n + plp (K token) = plp token + plp (FV terms) = prCurlyList (map plp terms) + plp (P t1 t2) = plOper "/" (plp t1) (plp t2) + plp (W s trm) = plOper "+" (plp s) (plp trm) + plp (R terms) = plTerm "r" (map plp terms) + plp (F oper) = plTerm "f" [plp oper] + plp (V n) = plTerm "arg" [show n] + plp (TM str) = plTerm "meta" [plp str] +--} + +instance PLPrint CId where + plp cid | isLogicalVariable str || + cid == wildCId = plVar str + | otherwise = plAtom str + where str = showCId cid + +instance PLPrint Literal where + plp (LStr s) = plp s + plp (LInt n) = plp (show n) + plp (LFlt f) = plp (show f) + +instance PLPrint Tokn where + plp (KS tokn) = plp tokn + plp (KP strs alts) = plTerm "kp" [plp strs, plList [plOper "/" (plp ss1) (plp ss2) | + Alt ss1 ss2 <- alts]] + +---------------------------------------------------------------------- +-- basic prolog-printing + +class PLPrint a where + plp :: a -> String + plps :: [a] -> String + plps = plList . map plp + +instance PLPrint Char where + plp c = plAtom [c] + plps s = plAtom s + +instance PLPrint a => PLPrint [a] where + plp = plps + +plpFact2 :: (PLPrint a, PLPrint b) => String -> (a, b) -> String +plpFact2 fun (arg1, arg2) = plFact fun [plp arg1, plp arg2] + +plFact :: String -> [String] -> String +plFact fun args = plTerm fun args ++ "." + +plTerm :: String -> [String] -> String +plTerm fun args = plAtom fun ++ prParenth (prTList ", " args) + +plList :: [String] -> String +plList = prBracket . prTList "," + +plOper :: String -> String -> String -> String +plOper op a b = prParenth (a ++ op ++ b) + +plVar :: String -> String +plVar = varPrefix . concatMap changeNonAlphaNum + where varPrefix var@(c:_) | isAsciiUpper c || c=='_' = var + | otherwise = "_" ++ var + changeNonAlphaNum c | isAlphaNumUnderscore c = [c] + | otherwise = "_" ++ show (ord c) ++ "_" + +plAtom :: String -> String +plAtom "" = "''" +plAtom atom@(c:cs) | isAsciiLower c && all isAlphaNumUnderscore cs + || c == '\'' && cs /= "" && last cs == '\'' = atom + | otherwise = "'" ++ concatMap changeQuote atom ++ "'" + where changeQuote '\'' = "\\'" + changeQuote c = [c] + +isAlphaNumUnderscore :: Char -> Bool +isAlphaNumUnderscore c = isAlphaNum c || c == '_' + + +---------------------------------------------------------------------- +-- prolog variables + +createLogicalVariable :: Int -> CId +createLogicalVariable n = mkCId (logicalVariablePrefix ++ show n) + +isLogicalVariable :: String -> Bool +isLogicalVariable = isPrefixOf logicalVariablePrefix + +logicalVariablePrefix :: String +logicalVariablePrefix = "X" + +---------------------------------------------------------------------- +-- alpha convert variables to (unique) logical variables +-- * this is needed if we want to translate variables to Prolog variables +-- * used for abstract syntax, not concrete +-- * not (yet?) used for variables bound in pattern equations + +type ConvertEnv = (Int, [(CId,CId)]) + +emptyEnv :: ConvertEnv +emptyEnv = (0, []) + +class AlphaConvert a where + alphaConvert :: ConvertEnv -> a -> (ConvertEnv, a) + +instance AlphaConvert a => AlphaConvert [a] where + alphaConvert env [] = (env, []) + alphaConvert env (a:as) = (env'', a':as') + where (env', a') = alphaConvert env a + (env'', as') = alphaConvert env' as + +instance AlphaConvert Type where + alphaConvert env@(_,subst) (DTyp hypos cat args) + = ((ctr,subst), DTyp hypos' cat args') + where (env', hypos') = mapAccumL alphaConvertHypo env hypos + ((ctr,_), args') = alphaConvert env' args + +alphaConvertHypo env (b,x,typ) = ((ctr+1,(x,x'):subst), (b,x',typ')) + where ((ctr,subst), typ') = alphaConvert env typ + x' = createLogicalVariable ctr + +instance AlphaConvert Expr where + alphaConvert (ctr,subst) (EAbs b x e) = ((ctr',subst), EAbs b x' e') + where ((ctr',_), e') = alphaConvert (ctr+1,(x,x'):subst) e + x' = createLogicalVariable ctr + alphaConvert env (EApp e1 e2) = (env'', EApp e1' e2') + where (env', e1') = alphaConvert env e1 + (env'', e2') = alphaConvert env' e2 + alphaConvert env expr@(EFun i) = (env, maybe expr EFun (lookup i (snd env))) + alphaConvert env expr = (env, expr) + +-- pattern variables are not alpha converted +-- (but they probably should be...) +instance AlphaConvert Equation where + alphaConvert env@(_,subst) (Equ patterns result) + = ((ctr,subst), Equ patterns result') + where ((ctr,_), result') = alphaConvert env result diff --git a/src/compiler/GF/Compile/GenerateFCFG.hs b/src/compiler/GF/Compile/GenerateFCFG.hs new file mode 100644 index 000000000..52e95f686 --- /dev/null +++ b/src/compiler/GF/Compile/GenerateFCFG.hs @@ -0,0 +1,568 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Compile.GenerateFCFG + (convertConcrete) where + +import PGF.CId +import PGF.Data +import PGF.Macros --hiding (prt) +import PGF.Parsing.FCFG.Utilities + +import GF.Data.BacktrackM +import GF.Data.SortedList +import GF.Data.Utilities (updateNthM, sortNub) + +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set +import qualified Data.List as List +import qualified Data.ByteString.Char8 as BS +import Data.Array.IArray +import Data.Maybe +import Control.Monad + +---------------------------------------------------------------------- +-- main conversion function + +convertConcrete :: Abstr -> Concr -> ParserInfo +convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' + where abs_defs = Map.assocs (funs abs) + conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cats = lincats cnc + (abs_defs',conc',cats') = expandHOAS abs_defs conc cats + +expandHOAS :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ([(CId,(Type,Int,[Equation]))],TermMap,TermMap) +expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, + Map.unions [lins, hoLins, varLins], + Map.unions [lincats, hoLincats, varLincat]) + where + -- replace higher-order fun argument types with new categories + funs' = [(f,(fixType ty,a,e)) | (f,(ty,a,e)) <- funs] + where + fixType :: Type -> Type + fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt + + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0] + hoCats = sortNub (map snd hoTypes) + -- for each Cat with N bindings, we add a new category _NCat + -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat + hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),0,[])) | ty@(n,c) <- hoTypes] + -- lincats for the new categories + hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes] + -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ... + hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes] + where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c) + -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat + varFuns = [(varFunName cat, (cftype [varCat] cat,0,[])) | cat <- hoCats] + -- linearizations of the _Var_Cat functions + varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats] + -- lincat for the _Var category + varLincat = Map.singleton varCat (R [S []]) + + lincatOf c = fromMaybe (error $ "No lincat for " ++ showCId c) $ Map.lookup c lincats + + modifyRec :: ([Term] -> [Term]) -> Term -> Term + modifyRec f (R xs) = R (f xs) + modifyRec _ t = error $ "Not a record: " ++ show t + + varCat = mkCId "_Var" + + catName :: (Int,CId) -> CId + catName (0,c) = c + catName (n,c) = mkCId ("_" ++ show n ++ showCId c) + + funName :: (Int,CId) -> CId + funName (n,c) = mkCId ("__" ++ show n ++ showCId c) + + varFunName :: CId -> CId + varFunName c = mkCId ("_Var_" ++ showCId c) + +-- replaces __NCat with _B and _Var_Cat with _. +-- the temporary names are just there to avoid name collisions. +fixHoasFuns :: ParserInfo -> ParserInfo +fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]} + where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") + | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId + fixName n = n + +convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo +convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv) + where + srules = [ + (XRule id args res (map findLinType args) (findLinType res) term) | + (id, (ty,_,_)) <- abs_defs, let (args,res) = catSkeleton ty, + term <- maybeToList (Map.lookup id cnc_defs)] + + findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + + (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules + where + helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = + let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap + grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) + grammarEnv + (mkSingletonSelectors cnc_defs cnc_res) + in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv') + + loop grammarEnv = + let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv + in case todo of + [] -> grammarEnv' + _ -> loop $! List.foldl' (\env (srules,selector) -> + List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo + +convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv +convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv = + foldBM addRule + grammarEnv + (convertTerm cnc_defs selector term [([],[])]) + (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes) + where + addRule linRec (newCat', newArgs', _, _) env0 = + let (env1, newCat) = genFCatHead env0 newCat' + (env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) -> + let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths] + (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs + in case xcat of + PFCat _ [] _ -> (env , args, all_args) + _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) + (env1,[],[]) (zip3 newArgs' ctypes [0..]) + + (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs}) + + (_,newProfile) = List.mapAccumL accumProf 0 newArgs' + where + accumProf nr (PFCat _ [] _,_ ) = (nr, [] ) + accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt]) + where cnt = length xpaths + + (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec)) + + in addProduction env4 newCat (FApply funid newArgs) + +translateLin idxArgs [] grammarEnv lbl' = error "translateLin" +translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl' + | lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms) + | otherwise = translateLin idxArgs lins grammarEnv lbl' + where + instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) + (\t -> case t of + KS s -> FSymKS [s] + KP strs vars -> FSymKP strs vars) + instCat lbl nr xnr nr' ((idx,xargs):idxArgs) + | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr + in FSymCat (nr'+xnr) (index lbl rcs 0) + | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs + + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BacktrackM Env a + +type FPath = [FIndex] +type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term]) +type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])] + +type TermMap = Map.Map CId Term + +convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec +convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins +convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins +convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins + +convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel + convertTerm cnc_defs (TuplePrj nr selector) term lins +convertTerm cnc_defs selector (FV vars) lins = do term <- member vars + convertTerm cnc_defs selector term lins +convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path + foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) +convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = + do projectHead lbl_path + return ((lbl_path,Right (KS str) : lin) : lins) +convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) = + do projectHead lbl_path + toks <- member (strs:[strs' | Alt strs' _ <- vars]) + return ((lbl_path, map (Right . KS) toks ++ lin) : lins) +convertTerm cnc_defs selector (F id) lins = case Map.lookup id cnc_defs of + Just term -> convertTerm cnc_defs selector term lins + Nothing -> mzero +convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do + ss <- case t of + R ss -> return ss + F f -> case Map.lookup f cnc_defs of + Just (R ss) -> return ss + _ -> mzero + convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins +convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")") + + +convertArg (TupleSel record) nr path lbl_path lin lins = + foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record +convertArg (TuplePrj lbl selector) nr path lbl_path lin lins = + convertArg selector nr (lbl:path) lbl_path lin lins +convertArg (ConSel indices) nr path lbl_path lin lins = do + index <- member indices + restrictHead lbl_path index + restrictArg nr path index + return lins +convertArg StrSel nr path lbl_path lin lins = do + projectHead lbl_path + xnr <- projectArg nr path + return ((lbl_path, Left (path, nr, xnr) : lin) : lins) + +convertCon (ConSel indices) index lbl_path lin lins = do + guard (index `elem` indices) + restrictHead lbl_path index + return lins +convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x + +convertRec cnc_defs selector index [] lbl_path lin lins = return lins +convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields + where + select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins + select ((index',sub_sel) : fields) + | index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins) + convertRec cnc_defs selector (index+1) record lbl_path lin lins + | otherwise = select fields +convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do + convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = do term <- readArgCType nr + unifyPType nr (reverse path) (selectTerm path term) +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of + Just term -> evalTerm cnc_defs path term + Nothing -> mzero +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + +unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex +unifyPType nr path (C max_index) = + do (_, args, _, _) <- get + let (PFCat _ _ tcs,_) = args !! nr + case lookup path tcs of + Just index -> return index + Nothing -> do index <- member [0..max_index] + restrictArg nr path index + return index +unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007 + +selectTerm :: FPath -> Term -> Term +selectTerm [] term = term +selectTerm (index:path) (R record) = selectTerm path (record !! index) + + +---------------------------------------------------------------------- +-- GrammarEnv + + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production)) +type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat))) +type FSeqSet = Map.Map FSeq SeqId +type FFunSet = Map.Map FFun FunId + +data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] + +protoFCat :: CId -> ProtoFCat +protoFCat cat = PFCat cat [] [] + +emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty + where + initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $ + ins fcatInt (mkCId "Int") [[0]] [] $ + ins fcatFloat (mkCId "Float") [[0]] [] $ + ins fcatVar (mkCId "_Var") [[0]] [] $ + Map.empty) + + ins fcat cat rcs tcs catSet = + Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet + where + right_fcat = Right fcat + tmap_s = Map.singleton tcs right_fcat + rmap_s = Map.singleton rcs tmap_s + +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) + +addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq) + where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions0= prodSet + , productions = prodSet + , startCats = Map.map getFCatList catSet + , totalCats = last_id+1 + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs + + +genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) +genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of + Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat) + Just (Right fcat) -> (env, fcat) + Nothing -> let fcat = last_id+1 + in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat) + where + ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet + where + right_fcat = Right fcat + tmap_s = Map.singleton tcs right_fcat + rmap_s = Map.singleton rcs tmap_s + +genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) +genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet >>= Map.lookup rcs of + Just tmap -> case Map.lookup tcs tmap of + Just (Left fcat) -> (env, fcat) + Just (Right fcat) -> (env, fcat) + Nothing -> ins tmap + Nothing -> ins Map.empty + where + ins tmap = + let fcat = last_id+1 + (either_fcat,last_id1,tmap1,prodSet1) + = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) -> + let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap + p = FCoerce fcat_arg + prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet + in if st + then (Right fcat, last_id1,tmap1,prodSet1) + else (either_fcat,last_id, tmap ,prodSet )) + (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet) + (gen_tcs ctype [] []) + False + rmap1 = Map.singleton rcs tmap1 + in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat) + where + addArg tcs last_id tmap = + case Map.lookup tcs tmap of + Just (Left fcat) -> (last_id, tmap, fcat) + Just (Right fcat) -> (last_id, tmap, fcat) + Nothing -> let fcat = last_id+1 + in (fcat, Map.insert tcs (Left fcat) tmap, fcat) + + gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] + gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) + gen_tcs (S _) path acc = return acc + gen_tcs (C max_index) path acc = + case List.lookup path tcs of + Just index -> return $! addConstraint path index acc + Nothing -> do put True + index <- member [0..max_index] + return $! addConstraint path index acc + where + addConstraint path0 index0 (c@(path,index) : cs) + | path0 > path = c:addConstraint path0 index0 cs + addConstraint path0 index0 cs = (path0,index0) : cs + gen_tcs (F id) path acc = case Map.lookup id cnc_defs of + Just term -> gen_tcs term path acc + Nothing -> error ("unknown identifier: "++showCId id) + + + +------------------------------------------------------------ +-- TODO queue organization + +type XRulesMap = Map.Map CId [XRule] +data XRule = XRule CId {- function -} + [CId] {- argument types -} + CId {- result type -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} + +takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv) +takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) = + (todo,GrammarEnv last_id catSet' seqSet funSet prodSet) + where + (todo,catSet') = + Map.mapAccumWithKey (\todo cat rmap -> + let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> + let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat -> + case either_xcat of + Left xcat -> (tcs:tcss,Right xcat) + Right xcat -> ( tcss,either_xcat)) [] tmap + in case tcss of + [] -> ( todo,tmap ) + _ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap + mb_srules = Map.lookup cat xrulesMap + Just srules = mb_srules + + in case mb_srules of + Just srules -> (todo1,rmap1) + Nothing -> (todo ,rmap1)) [] catSet + + +------------------------------------------------------------ +-- The TermSelector + +data TermSelector + = TupleSel [(FIndex, TermSelector)] + | TuplePrj FIndex TermSelector + | ConSel [FIndex] + | StrSel + deriving Show + +mkSingletonSelectors :: TermMap + -> Term -- ^ Type representation term + -> [TermSelector] -- ^ list of selectors containing just one string field +mkSingletonSelectors cnc_defs term = sels0 + where + (sels0,tcss0) = loop [] ([],[]) term + + loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record) + loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss) + loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss) + loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of + Just term -> loop path (sels,tcss) term + Nothing -> error ("unknown identifier: "++showCId id) + +mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector +mkSelector rcs tcss = + List.foldl' addRestriction (case xs of + (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys + where + xs = [ reverse path | path <- rcs] + ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs] + + addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector + addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices) + where + add [] = [n_index] + add (index':indices) + | n_index == index' = index': indices + | otherwise = index':add indices + addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields) + where + add [] = [(index,path2selector (ConSel [n_index]) path)] + add (field@(index',sub_sel):fields) + | index == index' = (index',addRestriction sub_sel (path,n_index)):fields + | otherwise = field : add fields + + addProjection :: TermSelector -> FPath -> TermSelector + addProjection StrSel [] = StrSel + addProjection (TupleSel fields) (index : path) = TupleSel (add fields) + where + add [] = [(index,path2selector StrSel path)] + add (field@(index',sub_sel):fields) + | index == index' = (index',addProjection sub_sel path):fields + | otherwise = field : add fields + + path2selector base [] = base + path2selector base (index : path) = TupleSel [(index,path2selector base path)] + +------------------------------------------------------------ +-- updating the MCF rule + +readArgCType :: FIndex -> CnvMonad Term +readArgCType nr = do (_, _, _, ctypes) <- get + return (ctypes !! nr) + +restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () +restrictArg nr path index = do + (head, args, ctype, ctypes) <- get + args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat + return (xcat,xs) ) nr args + put (head, args', ctype, ctypes) + +projectArg :: FIndex -> FPath -> CnvMonad Int +projectArg nr path = do + (head, args, ctype, ctypes) <- get + (xnr,args') <- updateArgs nr args + put (head, args', ctype, ctypes) + return xnr + where + updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])]) + updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as) + | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as) + | otherwise = do a <- projectProtoFCat path a + return (0,(a,xpaths):as) + updateArgs n (a : as) = do + (xnr,as) <- updateArgs (n-1) as + return (xnr,a:as) + +readHeadCType :: CnvMonad Term +readHeadCType = do (_, _, ctype, _) <- get + return ctype + +restrictHead :: FPath -> FIndex -> CnvMonad () +restrictHead path term + = do (head, args, ctype, ctypes) <- get + head' <- restrictProtoFCat path term head + put (head', args, ctype, ctypes) + +projectHead :: FPath -> CnvMonad () +projectHead path + = do (head, args, ctype, ctypes) <- get + head' <- projectProtoFCat path head + put (head', args, ctype, ctypes) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat +restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat cat rcs tcs) + where + addConstraint (c@(path,index) : cs) + | path0 > path = liftM (c:) (addConstraint cs) + | path0 == path = guard (index0 == index) >> + return (c : cs) + addConstraint cs = return ((path0,index0) : cs) + +projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat +projectProtoFCat path0 (PFCat cat rcs tcs) = do + return (PFCat cat (addConstraint rcs) tcs) + where + addConstraint (path : rcs) + | path0 > path = path : addConstraint rcs + | path0 == path = path : rcs + addConstraint rcs = path0 : rcs + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs new file mode 100644 index 000000000..458cf3f5c --- /dev/null +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -0,0 +1,510 @@ +{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses #-} +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Convert PGF grammar to PMCFG grammar. +-- +----------------------------------------------------------------------------- + +module GF.Compile.GeneratePMCFG + (convertConcrete) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Infra.Option +import GF.Data.BacktrackM +import GF.Data.Utilities (updateNthM, updateNth, sortNub) + +import System.IO +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.List as List +import qualified Data.IntMap as IntMap +import qualified Data.ByteString.Char8 as BS +import Data.Array.IArray +import Data.Maybe +import Control.Monad +import Control.Exception + +---------------------------------------------------------------------- +-- main conversion function + + +convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo +convertConcrete opts abs lang cnc = do + let env0 = emptyGrammarEnv cnc_defs cat_defs + when (flag optProf opts) $ do + profileGrammar lang cnc_defs env0 pfrules + let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0 + env2 = List.foldl' (convertRule cnc_defs) env1 pfrules + return $ getParserInfo env2 + where + abs_defs = Map.assocs (funs abs) + cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cat_defs = Map.insert cidVar (S []) (lincats cnc) + lin_defs = lindefs cnc + + pfrules = [ + (PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | + (id, (ty,_,_)) <- abs_defs, let (args,res) = typeSkeleton ty, + term <- maybeToList (Map.lookup id cnc_defs)] + + findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + +profileGrammar lang cnc_defs (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do + hPutStrLn stderr "" + hPutStrLn stderr ("Language: " ++ show lang) + hPutStrLn stderr "" + hPutStrLn stderr "Categories Count" + hPutStrLn stderr "--------------------------------" + case IntMap.lookup 0 catSet of + Just cats -> mapM_ profileCat (Map.toList cats) + Nothing -> return () + hPutStrLn stderr "--------------------------------" + hPutStrLn stderr "" + hPutStrLn stderr "Rules Count" + hPutStrLn stderr "--------------------------------" + mapM_ profileRule pfrules + hPutStrLn stderr "--------------------------------" + where + profileCat (cid,(fcat1,fcat2,_)) = do + hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1)) + + profileRule (PFRule fun args res ctypes ctype term) = do + let pargs = zipWith (protoFCat cnc_defs) args ctypes + hPutStrLn stderr (lformat 23 fun ++ rformat 9 (product [length xs | PFCat _ _ _ tcs <- pargs, (_,xs) <- tcs])) + + lformat :: Show a => Int -> a -> String + lformat n x = s ++ replicate (n-length s) ' ' + where + s = show x + + rformat :: Show a => Int -> a -> String + rformat n x = replicate (n-length s) ' ' ++ s + where + s = show x + +brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) +brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of + (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1 + where + optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps]) + where + ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv + ff funid xs env + | product (map Set.size ys) == count = + case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of + (env,args) -> addProduction env cat (FApply funid args) + | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs + where + count = length xs + ys = foldr (zipWith Set.insert) (repeat Set.empty) xs + +convertRule :: TermMap -> GrammarEnv -> ProtoFRule -> GrammarEnv +convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) = + let pres = protoFCat cnc_defs res ctype + pargs = zipWith (protoFCat cnc_defs) args ctypes + + b = runBranchM (convertTerm cnc_defs [] ctype term) (pargs,[]) + (grammarEnv1,b1) = addSequences' grammarEnv b + grammarEnv2 = brk (\grammarEnv -> foldBM addRule + grammarEnv + (go' b1 [] []) + (pres,pargs) ) grammarEnv1 + in grammarEnv2 + where + addRule lins (newCat', newArgs') env0 = + let [newCat] = getFCats env0 newCat' + (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' + + (env2,funid) = addFFun env1 (FFun fun [[n] | n <- [0..length newArgs-1]] (mkArray lins)) + + in addProduction env2 newCat (FApply funid newArgs) + +---------------------------------------------------------------------- +-- Branch monad + +newtype BranchM a = BM (forall b . (a -> ([ProtoFCat],[FSymbol]) -> Branch b) -> ([ProtoFCat],[FSymbol]) -> Branch b) + +instance Monad BranchM where + return a = BM (\c s -> c a s) + BM m >>= k = BM (\c s -> m (\a s -> unBM (k a) c s) s) + where unBM (BM m) = m + +instance MonadState ([ProtoFCat],[FSymbol]) BranchM where + get = BM (\c s -> c s s) + put s = BM (\c _ -> c () s) + +instance Functor BranchM where + fmap f (BM m) = BM (\c s -> m (c . f) s) + +runBranchM :: BranchM (Value a) -> ([ProtoFCat],[FSymbol]) -> Branch a +runBranchM (BM m) s = m (\v s -> Return v) s + +variants :: [a] -> BranchM a +variants xs = BM (\c s -> Variant (go xs c s)) + where + go [] c s = [] + go (x:xs) c s = c x s : go xs c s + +choices :: Int -> FPath -> BranchM FIndex +choices nr path = BM (\c s -> let (args,_) = s + PFCat _ _ _ tcs = args !! nr + in case fromMaybe (error "evalTerm: wrong path") (lookup path tcs) of + [index] -> c index s + indices -> Case nr path (go indices c s)) + where + go [] c s = [] + go (i:is) c s = (c i (updateEnv i s)) : go is c s + + updateEnv index (args,seq) = (updateNth (restrictArg path index) nr args,seq) + + restrictArg path index (PFCat n cat rcs tcs) = PFCat n cat rcs (addConstraint path index tcs) + + addConstraint path0 index0 [] = error "restrictProtoFCat: unknown path" + addConstraint path0 index0 (c@(path,indices) : tcs) + | path0 == path = ((path,[index0]) : tcs) + | otherwise = c : addConstraint path0 index0 tcs + +mkRecord :: [BranchM (Value a)] -> BranchM (Value a) +mkRecord xs = BM (\c -> go xs (c . Rec)) + where + go [] c s = c [] s + go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s + +-- cutBranch :: BranchM (Value a) -> BranchM (Branch a) +-- cutBranch (BM m) = BM (\c e -> c (m (\v e -> Return v) e) e) + + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BranchM a + +type FPath = [FIndex] +data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])] +type Env = (ProtoFCat, [ProtoFCat]) +data ProtoFRule = PFRule CId {- function -} + [(Int,CId)] {- argument types: context size and category -} + (Int,CId) {- result type : context size (always 0) and category -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} +type TermMap = Map.Map CId Term + + +protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat +protoFCat cnc_defs (n,cat) ctype = + let (rcs,tcs) = loop [] [] [] ctype' + in PFCat n cat rcs tcs + where + ctype' -- extend the high-order linearization type + | n > 0 = case ctype of + R xs -> R (xs ++ replicate n (S [])) + _ -> error $ "Not a record: " ++ show ctype + | otherwise = ctype + + loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) + loop path rcs tcs (S _) = (path:rcs, tcs) + loop path rcs tcs (F id) = case Map.lookup id cnc_defs of + Just term -> loop path rcs tcs term + Nothing -> error ("unknown identifier: "++show id) + +data Branch a + = Case Int FPath [Branch a] + | Variant [Branch a] + | Return (Value a) + +data Value a + = Rec [Branch a] + | Str a + | Con FIndex + + +go' :: Branch SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] +go' (Case nr path_ bs) path ss = do (index,b) <- member (zip [0..] bs) + restrictArg nr path_ index + go' b path ss +go' (Variant bs) path ss = do b <- member bs + go' b path ss +go' (Return v) path ss = go v path ss + +go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] +go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs) +go (Str seqid) path ss = return (seqid : ss) +go (Con i) path ss = restrictHead path i >> return ss + +addSequences' :: GrammarEnv -> Branch [FSymbol] -> (GrammarEnv, Branch SeqId) +addSequences' env (Case nr path bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs + in (env1,Case nr path bs1) +addSequences' env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs + in (env1,Variant bs1) +addSequences' env (Return v) = let (env1,v1) = addSequences env v + in (env1,Return v1) + +addSequences :: GrammarEnv -> Value [FSymbol] -> (GrammarEnv, Value SeqId) +addSequences env (Rec vs) = let (env1,vs1) = List.mapAccumL addSequences' env vs + in (env1,Rec vs1) +addSequences env (Str lin) = let (env1,seqid) = addFSeq env (optimizeLin lin) + in (env1,Str seqid) +addSequences env (Con i) = (env,Con i) + + +optimizeLin [] = [] +optimizeLin lin@(FSymKS _ : _) = + let (ts,lin') = getRest lin + in FSymKS ts : optimizeLin lin' + where + getRest (FSymKS ts : lin) = let (ts1,lin') = getRest lin + in (ts++ts1,lin') + getRest lin = ([],lin) +optimizeLin (sym : lin) = sym : optimizeLin lin + + +convertTerm :: TermMap -> FPath -> Term -> Term -> CnvMonad (Value [FSymbol]) +convertTerm cnc_defs sel ctype (V nr) = convertArg ctype nr (reverse sel) +convertTerm cnc_defs sel ctype (C nr) = convertCon ctype nr (reverse sel) +convertTerm cnc_defs sel ctype (R record) = convertRec cnc_defs sel ctype record +convertTerm cnc_defs sel ctype (P term p) = do nr <- evalTerm cnc_defs [] p + convertTerm cnc_defs (nr:sel) ctype term +convertTerm cnc_defs sel ctype (FV vars) = do term <- variants vars + convertTerm cnc_defs sel ctype term +convertTerm cnc_defs sel ctype (S ts) = do vs <- mapM (convertTerm cnc_defs sel ctype) ts + return (Str (concat [s | Str s <- vs])) +convertTerm cnc_defs sel ctype (K (KS t)) = return (Str [FSymKS [t]]) +convertTerm cnc_defs sel ctype (K (KP s v))=return (Str [FSymKP s v]) +convertTerm cnc_defs sel ctype (F id) = case Map.lookup id cnc_defs of + Just term -> convertTerm cnc_defs sel ctype term + Nothing -> error ("unknown id " ++ showCId id) +convertTerm cnc_defs sel ctype (W s t) = do + ss <- case t of + R ss -> return ss + F f -> case Map.lookup f cnc_defs of + Just (R ss) -> return ss + _ -> error ("unknown id " ++ showCId f) + convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] +convertTerm cnc_defs sel ctype x = error ("convertTerm ("++show x++")") + +convertArg :: Term -> Int -> FPath -> CnvMonad (Value [FSymbol]) +convertArg (R ctypes) nr path = do + mkRecord (zipWith (\lbl ctype -> convertArg ctype nr (lbl:path)) [0..] ctypes) +convertArg (C max) nr path = do + index <- choices nr path + return (Con index) +convertArg (S _) nr path = do + (args,_) <- get + let PFCat _ cat rcs tcs = args !! nr + l = index path rcs 0 + sym | isLiteralCat cat = FSymLit nr l + | otherwise = FSymCat nr l + return (Str [sym]) + where + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + +convertCon (C max) index [] = return (Con index) +convertCon x _ _ = fail $ "SimpleToFCFG.convertCon: " ++ show x + +convertRec cnc_defs [] (R ctypes) record = do + mkRecord (zipWith (convertTerm cnc_defs []) ctypes record) +convertRec cnc_defs (index:sub_sel) ctype record = + convertTerm cnc_defs sub_sel ctype (record !! index) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = choices nr (reverse path) +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = variants terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of + Just term -> evalTerm cnc_defs path term + Nothing -> error ("unknown id " ++ showCId id) +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + + +---------------------------------------------------------------------- +-- GrammarEnv + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) +type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int])) +type SeqSet = Map.Map FSeq SeqId +type FunSet = Map.Map FFun FunId +type CoerceSet= Map.Map [FCat] FCat + +emptyGrammarEnv cnc_defs lincats = + let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats + in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty + where + computeCatRange index cat ctype + | cat == cidString = (index, (fcatString,fcatString,[])) + | cat == cidInt = (index, (fcatInt, fcatInt, [])) + | cat == cidFloat = (index, (fcatFloat, fcatFloat, [])) + | cat == cidVar = (index, (fcatVar, fcatVar, [])) + | otherwise = (index+size,(index,index+size-1,poly)) + where + (size,poly) = getMultipliers 1 [] ctype + + getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (S _) = (m,ms) + getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) + getMultipliers m ms (F id) = case Map.lookup id cnc_defs of + Just term -> getMultipliers m ms term + Nothing -> error ("unknown identifier: "++showCId id) + +expandHOAS abs_defs cnc_defs lincats lindefs env = + foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats + where + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs + , (n,c) <- fst (typeSkeleton ty), n > 0] + + hoCats :: [CId] + hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs + , h <- case ty of {DTyp hyps val _ -> hyps} + , let ty = typeOfHypo h + , c <- fst (catSkeleton ty)] + + -- add a range of PMCFG categories for each GF high-order category + add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = + case IntMap.lookup 0 catSet >>= Map.lookup cat of + Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet + !last_id' = last_id+(end-start)+1 + in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) + Nothing -> env + + -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat + add_hoFun env (n,cat) = + let linRec = reverse $ + [[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ + [[FSymLit i 0] | i <- [1..n]] + (env1,lins) = List.mapAccumL addFSeq env linRec + newLinRec = mkArray lins + + (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec) + + env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar))) + env2 + (zip (getFCats env2 arg) (getFCats env2 res)) + in env3 + where + (arg,res) = case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ showCId cat + Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype) + + -- add one PMCFG function for each high-order category: _V : Var -> Cat + add_varFun env cat = + convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) + where + lindef = + case Map.lookup cat lindefs of + Nothing -> error $ "No lindef for " ++ showCId cat + Just def -> def + + arg = + case Map.lookup cidVar lincats of + Nothing -> error $ "No lincat for " ++ showCId cat + Just ctype -> ctype + + res = + case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ showCId cat + Just ctype -> ctype + + _B = mkCId "_B" + _V = mkCId "_V" + +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) + +addFSeq :: GrammarEnv -> [FSymbol] -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) lst = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq) + where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid) + +addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat) +addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats = + case sub_fcats of + [fcat] -> (env,fcat) + _ -> case Map.lookup sub_fcats crcSet of + Just fcat -> (env,fcat) + Nothing -> let !fcat = last_id+1 + in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions0= productions0 + , productions = filterProductions productions0 + , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) + , totalCats = last_id+1 + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + productions0 = IntMap.union prodSet coercions + coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] + +getFCats :: GrammarEnv -> ProtoFCat -> [FCat] +getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) + where + variants _ [] fcat = return fcat + variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices + variants ms tcs ((m*index) + fcat) + + +------------------------------------------------------------ +-- updating the MCF rule + +restrictArg :: FIndex -> FPath -> FIndex -> BacktrackM Env () +restrictArg nr path index = do + (head, args) <- get + args' <- updateNthM (restrictProtoFCat path index) nr args + put (head, args') + +restrictHead :: FPath -> FIndex -> BacktrackM Env () +restrictHead path term + = do (head, args) <- get + head' <- restrictProtoFCat path term head + put (head', args) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> BacktrackM Env ProtoFCat +restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat n cat rcs tcs) + where + addConstraint [] = error "restrictProtoFCat: unknown path" + addConstraint (c@(path,indices) : tcs) + | path0 == path = guard (index0 `elem` indices) >> + return ((path,[index0]) : tcs) + | otherwise = liftM (c:) (addConstraint tcs) + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GeneratePMCFGOld.hs b/src/compiler/GF/Compile/GeneratePMCFGOld.hs new file mode 100644 index 000000000..244ed68fe --- /dev/null +++ b/src/compiler/GF/Compile/GeneratePMCFGOld.hs @@ -0,0 +1,374 @@ +{-# LANGUAGE BangPatterns, CPP #-} +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + +module GF.Compile.GeneratePMCFG + (convertConcrete) where + +import PGF.CId +import PGF.Data +import PGF.Macros --hiding (prt) + +import GF.Data.BacktrackM +import GF.Data.SortedList +import GF.Data.Utilities (updateNthM, sortNub) + +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.List as List +import qualified Data.IntMap as IntMap +import qualified Data.ByteString.Char8 as BS +import Data.Array.IArray +import Data.Maybe +import Control.Monad +import Debug.Trace + +---------------------------------------------------------------------- +-- main conversion function + +convertConcrete :: Abstr -> Concr -> ParserInfo +convertConcrete abs cnc = convert abs_defs conc cats + where abs_defs = Map.assocs (funs abs) + conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cats = lincats cnc + +convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo +convert abs_defs cnc_defs cat_defs = + let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs) + in getParserInfo (List.foldl' (convertRule cnc_defs) env xrules) + where + xrules = [ + (XRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | + (id, (ty,_)) <- abs_defs, let (args,res) = typeSkeleton ty, + term <- maybeToList (Map.lookup id cnc_defs)] + + findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + +brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) +brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of + (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1 + where + optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps]) + where + ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv + ff funid xs env + | product (map Set.size ys) == count = + case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of + (env,args) -> addProduction env cat (FApply funid args) + | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs + where + count = length xs + ys = foldr (zipWith Set.insert) (repeat Set.empty) xs + +convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv +convertRule cnc_defs grammarEnv (XRule fun args res ctypes ctype term) = + brk (\grammarEnv -> foldBM addRule + grammarEnv + (convertTerm cnc_defs [] ctype term [([],[])]) + (protoFCat cnc_defs res ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv + where + addRule linRec (newCat', newArgs') env0 = + let [newCat] = getFCats env0 newCat' + (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' + + (env2,lins) = List.mapAccumL addFSeq env1 linRec + newLinRec = mkArray lins + + (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec) + + in addProduction env3 newCat (FApply funid newArgs) + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BacktrackM Env a + +type FPath = [FIndex] +data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])] +type Env = (ProtoFCat, [ProtoFCat]) +type LinRec = [(FPath, [FSymbol])] +data XRule = XRule CId {- function -} + [(Int,CId)] {- argument types: context size and category -} + (Int,CId) {- result type : context size (always 0) and category -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} + +protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat +protoFCat cnc_defs (n,cat) ctype = + let (rcs,tcs) = loop [] [] [] ctype' + in PFCat n cat rcs tcs + where + ctype' -- extend the high-order linearization type + | n > 0 = case ctype of + R xs -> R (xs ++ replicate n (S [])) + _ -> error $ "Not a record: " ++ show ctype + | otherwise = ctype + + loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) + loop path rcs tcs (S _) = (path:rcs, tcs) + loop path rcs tcs (F id) = case Map.lookup id cnc_defs of + Just term -> loop path rcs tcs term + Nothing -> error ("unknown identifier: "++show id) + +type TermMap = Map.Map CId Term + +convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec +convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins +convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins +convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins +convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p + convertTerm cnc_defs (nr:sel) ctype term lins +convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars + convertTerm cnc_defs sel ctype term lins +convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts) +--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins) +convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins) +convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) = + do toks <- member (strs:[strs' | Alt strs' _ <- vars]) + return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins) +convertTerm cnc_defs sel ctype (F id) lins = case Map.lookup id cnc_defs of + Just term -> convertTerm cnc_defs sel ctype term lins + Nothing -> mzero +convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do + ss <- case t of + R ss -> return ss + F f -> case Map.lookup f cnc_defs of + Just (R ss) -> return ss + _ -> mzero + convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins +convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")") + + +convertArg (R record) nr path lbl_path lin lins = + foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record) +convertArg (C max) nr path lbl_path lin lins = do + index <- member [0..max] + restrictHead lbl_path index + restrictArg nr path index + return lins +convertArg (S _) nr path lbl_path lin lins = do + (_, args) <- get + let PFCat _ cat rcs tcs = args !! nr + l = index path rcs 0 + sym | isLiteralCat cat = FSymLit nr l + | otherwise = FSymCat nr l + return ((lbl_path, sym : lin) : lins) + where + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + + +convertCon (C max) index [] lbl_path lin lins = do + guard (index <= max) + restrictHead lbl_path index + return lins +convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x + +convertRec cnc_defs [] (R ctypes) record lbl_path lin lins = + foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins)) + lins + (zip3 [0..] ctypes record) +convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do + convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = do (_, args) <- get + let PFCat _ _ _ tcs = args !! nr + rpath = reverse path + index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs)) + restrictArg nr rpath index + return index +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of + Just term -> evalTerm cnc_defs path term + Nothing -> mzero +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + + +---------------------------------------------------------------------- +-- GrammarEnv + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) +type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int])) +type SeqSet = Map.Map FSeq SeqId +type FunSet = Map.Map FFun FunId +type CoerceSet= Map.Map [FCat] FCat + +emptyGrammarEnv cnc_defs lincats = + let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats + in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty + where + computeCatRange index cat ctype + | cat == cidString = (index, (fcatString,fcatString,[])) + | cat == cidInt = (index, (fcatInt, fcatInt, [])) + | cat == cidFloat = (index, (fcatFloat, fcatFloat, [])) + | otherwise = (index+size,(index,index+size-1,poly)) + where + (size,poly) = getMultipliers 1 [] ctype + + getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (S _) = (m,ms) + getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) + getMultipliers m ms (F id) = case Map.lookup id cnc_defs of + Just term -> getMultipliers m ms term + Nothing -> error ("unknown identifier: "++prCId id) + + +expandHOAS abs_defs cnc_defs lincats env = + foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats + where + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_)) <- abs_defs + , (n,c) <- fst (typeSkeleton ty), n > 0] + + hoCats :: [CId] + hoCats = sortNub [c | (_,(ty,_)) <- abs_defs + , Hyp _ ty <- case ty of {DTyp hyps val _ -> hyps} + , c <- fst (catSkeleton ty)] + + -- add a range of PMCFG categories for each GF high-order category + add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = + case IntMap.lookup 0 catSet >>= Map.lookup cat of + Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet + !last_id' = last_id+(end-start)+1 + in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) + Nothing -> env + + -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat + add_hoFun env (n,cat) = + let linRec = reverse $ + [(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ + [([],[FSymLit i 0]) | i <- [1..n]] + (env1,lins) = List.mapAccumL addFSeq env linRec + newLinRec = mkArray lins + + (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec) + + env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar))) + env2 + (zip (getFCats env2 arg) (getFCats env2 res)) + in env3 + where + (arg,res) = case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ prCId cat + Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype) + + -- add one PMCFG function for each high-order category: _V : Var -> Cat + add_varFun env cat = + let (env1,seqid) = addFSeq env ([],[FSymLit 0 0]) + lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid + (env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins)) + env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar])) + env2 + (getFCats env2 res) + in env3 + where + res = case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ prCId cat + Just ctype -> protoFCat cnc_defs (0,cat) ctype + + _B = mkCId "_B" + _V = mkCId "_V" + + +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) + +addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq) + where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid) + +addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat) +addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats = + case sub_fcats of + [fcat] -> (env,fcat) + _ -> case Map.lookup sub_fcats crcSet of + Just fcat -> (env,fcat) + Nothing -> let !fcat = last_id+1 + in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions = IntMap.union prodSet coercions + , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) + , totalCats = last_id+1 + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] + +getFCats :: GrammarEnv -> ProtoFCat -> [FCat] +getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) + where + variants _ [] fcat = return fcat + variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices + variants ms tcs ((m*index) + fcat) + +------------------------------------------------------------ +-- updating the MCF rule + +restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () +restrictArg nr path index = do + (head, args) <- get + args' <- updateNthM (restrictProtoFCat path index) nr args + put (head, args') + +restrictHead :: FPath -> FIndex -> CnvMonad () +restrictHead path term + = do (head, args) <- get + head' <- restrictProtoFCat path term head + put (head', args) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat +restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat n cat rcs tcs) + where + addConstraint [] = error "restrictProtoFCat: unknown path" + addConstraint (c@(path,indices) : tcs) + | path0 == path = guard (index0 `elem` indices) >> + return ((path,[index0]) : tcs) + | otherwise = liftM (c:) (addConstraint tcs) + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs new file mode 100644 index 000000000..c85f9588f --- /dev/null +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -0,0 +1,52 @@ +---------------------------------------------------------------------- +-- | +-- Module : GetGrammar +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/15 17:56:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.16 $ +-- +-- this module builds the internal GF grammar that is sent to the type checker +----------------------------------------------------------------------------- + +module GF.Compile.GetGrammar (getSourceModule, addOptionsToModule) where + +import GF.Data.Operations + +import GF.Infra.UseIO +import GF.Infra.Modules +import GF.Infra.Option +import GF.Grammar.Lexer +import GF.Grammar.Parser +import GF.Grammar.Grammar + +import GF.Compile.ReadFiles + +import Data.Char (toUpper) +import Data.List (nub) +import qualified Data.ByteString.Char8 as BS +import Control.Monad (foldM) +import System.Cmd (system) + +getSourceModule :: Options -> FilePath -> IOE SourceModule +getSourceModule opts file0 = ioe $ + catch (do file <- foldM runPreprocessor file0 (flag optPreprocessors opts) + content <- BS.readFile file + case runP pModDef content of + Left (Pn l c,msg) -> return (Bad (file++":"++show l++":"++show c++": "++msg)) + Right mo -> return (Ok (addOptionsToModule opts mo))) + (\e -> return (Bad (show e))) + +addOptionsToModule :: Options -> SourceModule -> SourceModule +addOptionsToModule opts = mapSourceModule (\m -> m { flags = flags m `addOptions` opts }) + +-- FIXME: should use System.IO.openTempFile +runPreprocessor :: FilePath -> String -> IO FilePath +runPreprocessor file0 p = do + let tmp = "_gf_preproc.tmp" + cmd = p +++ file0 ++ ">" ++ tmp + system cmd + return tmp diff --git a/src/compiler/GF/Compile/GrammarToGFCC.hs b/src/compiler/GF/Compile/GrammarToGFCC.hs new file mode 100644 index 000000000..fb92ef74c --- /dev/null +++ b/src/compiler/GF/Compile/GrammarToGFCC.hs @@ -0,0 +1,587 @@ +{-# LANGUAGE PatternGuards #-} +module GF.Compile.GrammarToGFCC (mkCanon2gfcc,addParsers) where + +import GF.Compile.Export +import qualified GF.Compile.GenerateFCFG as FCFG +import qualified GF.Compile.GeneratePMCFG as PMCFG + +import PGF.CId +import qualified PGF.Macros as CM +import qualified PGF.Data as C +import qualified PGF.Data as D +import GF.Grammar.Predef +import GF.Grammar.Printer +import GF.Grammar.Grammar +import qualified GF.Grammar.Lookup as Look +import qualified GF.Grammar as A +import qualified GF.Grammar.Macros as GM +import qualified GF.Compile.Concrete.Compute as Compute ---- +import qualified GF.Infra.Modules as M +import qualified GF.Infra.Option as O + +import GF.Infra.Ident +import GF.Infra.Option +import GF.Data.Operations + +import Data.List +import Data.Char (isDigit,isSpace) +import qualified Data.Map as Map +import qualified Data.ByteString.Char8 as BS +import Text.PrettyPrint +import Debug.Trace ---- + +-- when developing, swap commenting +--traceD s t = trace s t +traceD s t = t + + +-- the main function: generate PGF from GF. +mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF) +mkCanon2gfcc opts cnc gr = + (showIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon opts abs) gr) + where + abs = err (const c) id $ M.abstractOfConcrete gr c where c = identC (BS.pack cnc) + pars = mkParamLincat gr + +-- Adds parsers for all concretes +addParsers :: Options -> D.PGF -> IO D.PGF +addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] + return pgf { D.concretes = Map.fromList cncs } + where + conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts) + then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc + else return $ FCFG.convertConcrete (D.abstract pgf) cnc + return (lang,cnc { D.parser = Just pinfo }) + where + erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"}) + +-- Generate PGF from GFCM. +-- this assumes a grammar translated by canon2canon + +canon2gfcc :: Options -> (Ident -> Ident -> C.Term) -> SourceGrammar -> D.PGF +canon2gfcc opts pars cgr@(M.MGrammar ((a,abm):cms)) = + (if dump opts DumpCanon then trace (render (vcat (map (ppModule Qualified) (M.modules cgr)))) else id) $ + D.PGF an cns gflags abs cncs + where + -- abstract + an = (i2i a) + cns = map (i2i . fst) cms + abs = D.Abstr aflags funs cats catfuns + gflags = Map.empty + aflags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags abm)] + + mkDef (Just eqs) = [C.Equ ps' (mkExp scope' e) | (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps] + mkDef Nothing = [] + + mkArrity (Just a) = a + mkArrity Nothing = 0 + + -- concretes + lfuns = [(f', (mkType [] ty, mkArrity ma, mkDef pty)) | + (f,AbsFun (Just ty) ma pty) <- tree2list (M.jments abm), let f' = i2i f] + funs = Map.fromAscList lfuns + lcats = [(i2i c, snd (mkContext [] cont)) | + (c,AbsCat (Just cont) _) <- tree2list (M.jments abm)] + cats = Map.fromAscList lcats + catfuns = Map.fromList + [(cat,[f | (f, (C.DTyp _ c _,_,_)) <- lfuns, c==cat]) | (cat,_) <- lcats] + + cncs = Map.fromList [mkConcr lang (i2i lang) mo | (lang,mo) <- cms] + mkConcr lang0 lang mo = + (lang,D.Concr flags lins opers lincats lindefs printnames params fcfg) + where + js = tree2list (M.jments mo) + flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags mo)] + opers = Map.fromAscList [] -- opers will be created as optimization + utf = id -- trace (show lang0 +++ show flags) $ + -- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8 + -- then id else id + ---- then (trace "decode" D.convertStringsInTerm decodeUTF8) else id + umkTerm = utf . mkTerm + lins = Map.fromAscList + [(f', umkTerm tr) | (f,CncFun _ (Just tr) _) <- js, + let f' = i2i f, exists f'] -- eliminating lins without fun + -- needed even here because of restricted inheritance + lincats = Map.fromAscList + [(i2i c, mkCType ty) | (c,CncCat (Just ty) _ _) <- js] + lindefs = Map.fromAscList + [(i2i c, umkTerm tr) | (c,CncCat _ (Just tr) _) <- js] + printnames = Map.union + (Map.fromAscList [(i2i f, umkTerm tr) | (f,CncFun _ _ (Just tr)) <- js]) + (Map.fromAscList [(i2i f, umkTerm tr) | (f,CncCat _ _ (Just tr)) <- js]) + params = Map.fromAscList + [(i2i c, pars lang0 c) | (c,CncCat (Just ty) _ _) <- js] + fcfg = Nothing + + exists f = Map.member f funs + +i2i :: Ident -> CId +i2i = CId . ident2bs + +b2b :: A.BindType -> C.BindType +b2b A.Explicit = C.Explicit +b2b A.Implicit = C.Implicit + +mkType :: [Ident] -> A.Type -> C.Type +mkType scope t = + case GM.typeForm t of + (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps + in C.DTyp hyps' (i2i cat) (map (mkExp scope') args) + +mkExp :: [Ident] -> A.Term -> C.Expr +mkExp scope t = case GM.termForm t of + Ok (xs,c,args) -> mkAbs xs (mkApp (map snd (reverse xs)++scope) c (map (mkExp scope) args)) + where + mkAbs xs t = foldr (\(b,v) -> C.EAbs (b2b b) (i2i v)) t xs + mkApp scope c args = case c of + Q _ c -> foldl C.EApp (C.EFun (i2i c)) args + QC _ c -> foldl C.EApp (C.EFun (i2i c)) args + Vr x -> case lookup x (zip scope [0..]) of + Just i -> foldl C.EApp (C.EVar i) args + Nothing -> foldl C.EApp (C.EMeta 0) args + EInt i -> C.ELit (C.LInt i) + EFloat f -> C.ELit (C.LFlt f) + K s -> C.ELit (C.LStr s) + Meta i -> C.EMeta i + _ -> C.EMeta 0 + +mkPatt scope p = + case p of + A.PP _ c ps -> let (scope',ps') = mapAccumL mkPatt scope ps + in (scope',C.PApp (i2i c) ps') + A.PV x -> (x:scope,C.PVar (i2i x)) + A.PW -> ( scope,C.PWild) + A.PInt i -> ( scope,C.PLit (C.LInt i)) + A.PFloat f -> ( scope,C.PLit (C.LFlt f)) + A.PString s -> ( scope,C.PLit (C.LStr s)) + + +mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo]) +mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty + in if x == identW + then ( scope,(b2b bt,i2i x,ty')) + else (x:scope,(b2b bt,i2i x,ty'))) scope hyps + +mkTerm :: Term -> C.Term +mkTerm tr = case tr of + Vr (IA _ i) -> C.V i + Vr (IAV _ _ i) -> C.V i + Vr (IC s) | isDigit (BS.last s) -> + C.V ((read . BS.unpack . snd . BS.spanEnd isDigit) s) + ---- from gf parser of gfc + EInt i -> C.C $ fromInteger i + R rs -> C.R [mkTerm t | (_, (_,t)) <- rs] + P t l -> C.P (mkTerm t) (C.C (mkLab l)) + T _ cs -> C.R [mkTerm t | (_,t) <- cs] ------ + V _ cs -> C.R [mkTerm t | t <- cs] + S t p -> C.P (mkTerm t) (mkTerm p) + C s t -> C.S $ concatMap flats [mkTerm x | x <- [s,t]] + FV ts -> C.FV [mkTerm t | t <- ts] + K s -> C.K (C.KS s) +----- K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants + Empty -> C.S [] + App _ _ -> prtTrace tr $ C.C 66661 ---- for debugging + Abs _ _ t -> mkTerm t ---- only on toplevel + Alts (td,tvs) -> + C.K (C.KP (strings td) [C.Alt (strings u) (strings v) | (u,v) <- tvs]) + _ -> prtTrace tr $ C.S [C.K (C.KS (render (A.ppTerm Unqualified 0 tr <+> int 66662)))] ---- for debugging + where + mkLab (LIdent l) = case BS.unpack l of + '_':ds -> (read ds) :: Int + _ -> prtTrace tr $ 66663 + strings t = case t of + K s -> [s] + C u v -> strings u ++ strings v + Strs ss -> concatMap strings ss + _ -> prtTrace tr $ ["66660"] + flats t = case t of + C.S ts -> concatMap flats ts + _ -> [t] + +-- encoding PGF-internal lincats as terms +mkCType :: Type -> C.Term +mkCType t = case t of + EInt i -> C.C $ fromInteger i + RecType rs -> C.R [mkCType t | (_, t) <- rs] + Table pt vt -> case pt of + EInt i -> C.R $ replicate (1 + fromInteger i) $ mkCType vt + RecType rs -> mkCType $ foldr Table vt (map snd rs) + _ | Just i <- GM.isTypeInts pt -> C.R $ replicate (fromInteger i) $ mkCType vt + + Sort s | s == cStr -> C.S [] --- Str only + _ | Just i <- GM.isTypeInts t -> C.C $ fromInteger i + _ -> error $ "mkCType " ++ show t + +-- encoding showable lincats (as in source gf) as terms +mkParamLincat :: SourceGrammar -> Ident -> Ident -> C.Term +mkParamLincat sgr lang cat = errVal (C.R [C.S []]) $ do + typ <- Look.lookupLincat sgr lang cat + mkPType typ + where + mkPType typ = case typ of + RecType lts -> do + ts <- mapM (mkPType . snd) lts + return $ C.R [ C.P (kks $ showIdent (label2ident l)) t | ((l,_),t) <- zip lts ts] + Table (RecType lts) v -> do + ps <- mapM (mkPType . snd) lts + v' <- mkPType v + return $ foldr (\p v -> C.S [p,v]) v' ps + Table p v -> do + p' <- mkPType p + v' <- mkPType v + return $ C.S [p',v'] + Sort s | s == cStr -> return $ C.S [] + _ -> return $ + C.FV $ map (kks . filter showable . render . ppTerm Unqualified 0) $ + errVal [] $ Look.allParamValues sgr typ + showable c = not (isSpace c) ---- || (c == ' ') -- to eliminate \n in records + kks = C.K . C.KS + +-- return just one module per language + +reorder :: Ident -> SourceGrammar -> SourceGrammar +reorder abs cg = M.MGrammar $ + (abs, M.ModInfo M.MTAbstract M.MSComplete aflags [] Nothing [] [] adefs poss): + [(c, M.ModInfo (M.MTConcrete abs) M.MSComplete fs [] Nothing [] [] (sorted2tree js) poss) + | (c,(fs,js)) <- cncs] + where + poss = emptyBinTree -- positions no longer needed + mos = M.modules cg + adefs = sorted2tree $ sortIds $ + predefADefs ++ Look.allOrigInfos cg abs + predefADefs = + [(c, AbsCat (Just []) Nothing) | c <- [cFloat,cInt,cString]] + aflags = + concatOptions [M.flags mo | (_,mo) <- M.modules cg, M.isModAbs mo] + + cncs = sortIds [(lang, concr lang) | lang <- M.allConcretes cg abs] + concr la = (flags, + sortIds (predefCDefs ++ jments)) where + jments = Look.allOrigInfos cg la + flags = concatOptions + [M.flags mo | + (i,mo) <- mos, M.isModCnc mo, + Just r <- [lookup i (M.allExtendSpecs cg la)]] + + predefCDefs = + [(c, CncCat (Just GM.defLinType) Nothing Nothing) | c <- [cInt,cFloat,cString]] + + sortIds = sortBy (\ (f,_) (g,_) -> compare f g) + + +-- one grammar per language - needed for symtab generation +repartition :: Ident -> SourceGrammar -> [SourceGrammar] +repartition abs cg = + [M.partOfGrammar cg (lang,mo) | + let mos = M.modules cg, + lang <- case M.allConcretes cg abs of + [] -> [abs] -- to make pgf nonempty even when there are no concretes + cncs -> cncs, + let mo = errVal + (error (render (text "no module found for" <+> A.ppIdent lang))) $ M.lookupModule cg lang + ] + +-- translate tables and records to arrays, parameters and labels to indices + +canon2canon :: Options -> Ident -> SourceGrammar -> SourceGrammar +canon2canon opts abs cg0 = + (recollect . map cl2cl . repartition abs . purgeGrammar abs) cg0 + where + recollect = M.MGrammar . nubBy (\ (i,_) (j,_) -> i==j) . concatMap M.modules + cl2cl = M.MGrammar . js2js . map (c2c p2p) . M.modules + + js2js ms = map (c2c (j2j (M.MGrammar ms))) ms + + c2c f2 (c,mo) = (c, M.replaceJudgements mo $ mapTree f2 (M.jments mo)) + + j2j cg (f,j) = + let debug = if verbAtLeast opts Verbose then trace ("+ " ++ showIdent f) else id in + case j of + CncFun x (Just tr) z -> CncFun x (Just (debug (t2t (unfactor cg0 tr)))) z + CncCat (Just ty) (Just x) y -> CncCat (Just (ty2ty ty)) (Just (t2t (unfactor cg0 x))) y + _ -> j + where + cg1 = cg + t2t = term2term f cg1 pv + ty2ty = type2type cg1 pv + pv@(labels,untyps,typs) = trs $ paramValues cg1 + + unfactor :: SourceGrammar -> Term -> Term + unfactor gr t = case t of + T (TTyped ty) [(PV x,u)] -> V ty [restore x v (unfac u) | v <- vals ty] + _ -> GM.composSafeOp unfac t + where + unfac = unfactor gr + vals = err error id . Look.allParamValues gr + restore x u t = case t of + Vr y | y == x -> u + _ -> GM.composSafeOp (restore x u) t + + -- flatten record arguments of param constructors + p2p (f,j) = case j of + ResParam (Just ps) (Just vs) -> + ResParam (Just [(c,concatMap unRec cont) | (c,cont) <- ps]) (Just (map unrec vs)) + _ -> j + unRec (bt,x,ty) = case ty of + RecType fs -> [ity | (_,typ) <- fs, ity <- unRec (Explicit,identW,typ)] + _ -> [(bt,x,ty)] + unrec t = case t of + App f (R fs) -> GM.mkApp (unrec f) [unrec u | (_,(_,u)) <- fs] + _ -> GM.composSafeOp unrec t + + +---- + trs v = traceD (render (tr v)) v + + tr (labels,untyps,typs) = + (text "LABELS:" <+> + vcat [A.ppIdent c <> char '.' <> hsep (map A.ppLabel l) <+> char '=' <+> text (show i) | ((c,l),i) <- Map.toList labels]) $$ + (text "UNTYPS:" <+> + vcat [A.ppTerm Unqualified 0 t <+> char '=' <+> text (show i) | (t,i) <- Map.toList untyps]) $$ + (text "TYPS: " <+> + vcat [A.ppTerm Unqualified 0 t <+> char '=' <+> text (show (Map.assocs i)) | (t,i) <- Map.toList typs]) +---- + +purgeGrammar :: Ident -> SourceGrammar -> SourceGrammar +purgeGrammar abstr gr = + (M.MGrammar . list . filter complete . purge . M.modules) gr + where + list ms = traceD (render (text "MODULES" <+> hsep (punctuate comma (map (ppIdent . fst) ms)))) ms + purge = nubBy (\x y -> fst x == fst y) . filter (flip elem needed . fst) + needed = nub $ concatMap (requiredCanModules isSingle gr) acncs + acncs = abstr : M.allConcretes gr abstr + isSingle = True + complete (i,m) = M.isCompleteModule m --- not . isIncompleteCanon + +type ParamEnv = + (Map.Map (Ident,[Label]) (Type,Integer), -- numbered labels + Map.Map Term Integer, -- untyped terms to values + Map.Map Type (Map.Map Term Integer)) -- types to their terms to values + +--- gathers those param types that are actually used in lincats and lin terms +paramValues :: SourceGrammar -> ParamEnv +paramValues cgr = (labels,untyps,typs) where + partyps = nub $ + --- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt + [ty | + (_,(_,CncCat (Just ty0) _ _)) <- jments, + ty <- typsFrom ty0 + ] ++ [ + Q m ty | + (m,(ty,ResParam _ _)) <- jments + ] ++ [ty | + (_,(_,CncFun _ (Just tr) _)) <- jments, + ty <- err (const []) snd $ appSTM (typsFromTrm tr) [] + ] + params = [(ty, errVal (traceD ("UNKNOWN PARAM TYPE" +++ show ty) []) $ + Look.allParamValues cgr ty) | ty <- partyps] + typsFrom ty = (if isParam ty then (ty:) else id) $ case ty of + Table p t -> typsFrom p ++ typsFrom t + RecType ls -> concat [typsFrom t | (_, t) <- ls] + _ -> [] + + isParam ty = case ty of + Q _ _ -> True + QC _ _ -> True + RecType rs -> all isParam (map snd rs) + _ -> False + + typsFromTrm :: Term -> STM [Type] Term + typsFromTrm tr = case tr of + R fs -> mapM_ (typsFromField . snd) fs >> return tr + where + typsFromField (mty, t) = case mty of + Just x -> updateSTM (x:) >> typsFromTrm t + _ -> typsFromTrm t + V ty ts -> updateSTM (ty:) >> mapM_ typsFromTrm ts >> return tr + T (TTyped ty) cs -> + updateSTM (ty:) >> mapM_ typsFromTrm [t | (_, t) <- cs] >> return tr + T (TComp ty) cs -> + updateSTM (ty:) >> mapM_ typsFromTrm [t | (_, t) <- cs] >> return tr + _ -> GM.composOp typsFromTrm tr + + mods = traceD (render (hsep (map (ppIdent . fst) ms))) ms where ms = M.modules cgr + + jments = + [(m,j) | (m,mo) <- mods, j <- tree2list $ M.jments mo] + typs = + Map.fromList [(ci,Map.fromList (zip vs [0..])) | (ci,vs) <- params] + untyps = + Map.fromList $ concatMap Map.toList [typ | (_,typ) <- Map.toList typs] + lincats = + [(cat,[f | let RecType fs = GM.defLinType, f <- fs]) | cat <- [cInt,cFloat, cString]] ++ + reverse ---- TODO: really those lincats that are reached + ---- reverse is enough to expel overshadowed ones... + [(cat,ls) | (_,(cat,CncCat (Just ty) _ _)) <- jments, + RecType ls <- [unlockTy ty]] + labels = Map.fromList $ concat + [((cat,[lab]),(typ,i)): + [((cat,[LVar v]),(typ,toInteger (mx + v))) | v <- [0,1]] ++ ---- 1 or 2 vars + [((cat,[lab,lab2]),(ty,j)) | + rs <- getRec typ, ((lab2, ty),j) <- zip rs [0..]] + | + (cat,ls) <- lincats, ((lab, typ),i) <- zip ls [0..], let mx = length ls] + -- go to tables recursively + ---- TODO: even go to deeper records + where + getRec typ = case typ of + RecType rs -> [rs] ---- [unlockTyp rs] -- (sort (unlockTyp ls)) + Table _ t -> getRec t + _ -> [] + +type2type :: SourceGrammar -> ParamEnv -> Type -> Type +type2type cgr env@(labels,untyps,typs) ty = case ty of + RecType rs -> + RecType [(mkLab i, t2t t) | (i,(l, t)) <- zip [0..] (unlockTyp rs)] + Table pt vt -> Table (t2t pt) (t2t vt) + QC _ _ -> look ty + _ -> ty + where + t2t = type2type cgr env + look ty = EInt $ (+ (-1)) $ toInteger $ case Map.lookup ty typs of + Just vs -> length $ Map.assocs vs + _ -> trace ("unknown partype " ++ show ty) 66669 + +term2term :: Ident -> SourceGrammar -> ParamEnv -> Term -> Term +term2term fun cgr env@(labels,untyps,typs) tr = case tr of + App _ _ -> mkValCase (unrec tr) + QC _ _ -> mkValCase tr + R rs -> R [(mkLab i, (Nothing, t2t t)) | + (i,(l,(_,t))) <- zip [0..] (GM.sortRec (unlock rs))] + P t l -> r2r tr + + T (TWild _) _ -> error $ (render (text "wild" <+> ppTerm Qualified 0 tr)) + T (TComp ty) cs -> t2t $ V ty $ map snd cs ---- should be elim'ed in tc + T (TTyped ty) cs -> t2t $ V ty $ map snd cs ---- should be elim'ed in tc + V ty ts -> mkCurry $ V ty [t2t t | t <- ts] + S t p -> mkCurrySel (t2t t) (t2t p) + + _ -> GM.composSafeOp t2t tr + where + t2t = term2term fun cgr env + + unrec t = case t of + App f (R fs) -> GM.mkApp (unrec f) [unrec u | (_,(_,u)) <- fs] + _ -> GM.composSafeOp unrec t + + mkValCase tr = case appSTM (doVar tr) [] of + Ok (tr', st@(_:_)) -> t2t $ comp $ foldr mkCase tr' st + _ -> valNum $ comp tr + + --- this is mainly needed for parameter record projections + ---- was: + comp t = errVal t $ Compute.computeConcreteRec cgr t + + doVar :: Term -> STM [((Type,[Term]),(Term,Term))] Term + doVar tr = case getLab tr of + Ok (cat, lab) -> do + k <- readSTM >>= return . length + let tr' = Vr $ identC $ (BS.pack (show k)) ----- + + let tyvs = case Map.lookup (cat,lab) labels of + Just (ty,_) -> case Map.lookup ty typs of + Just vs -> (ty,[t | + (t,_) <- sortBy (\x y -> compare (snd x) (snd y)) + (Map.assocs vs)]) + _ -> error $ render (text "doVar1" <+> A.ppTerm Unqualified 0 ty) + _ -> error $ render (text "doVar2" <+> A.ppTerm Unqualified 0 tr <+> text (show (cat,lab))) ---- debug + updateSTM ((tyvs, (tr', tr)):) + return tr' + _ -> GM.composOp doVar tr + + r2r tr@(P (S (V ty ts) v) l) = t2t $ S (V ty [comp (P t l) | t <- ts]) v + + r2r tr@(P p _) = case getLab tr of + Ok (cat,labs) -> P (t2t p) . mkLab $ + maybe (prtTrace tr $ 66664) snd $ + Map.lookup (cat,labs) labels + _ -> K (render (A.ppTerm Unqualified 0 tr <+> prtTrace tr (int 66665))) + + -- this goes recursively into tables (ignored) and records (accumulated) + getLab tr = case tr of + Vr (IA cat _) -> return (identC cat,[]) + Vr (IAV cat _ _) -> return (identC cat,[]) + Vr (IC s) -> return (identC cat,[]) where + cat = BS.takeWhile (/='_') s ---- also to match IAVs; no _ in a cat tolerated + ---- init (reverse (dropWhile (/='_') (reverse s))) ---- from gf parser +---- Vr _ -> error $ "getLab " ++ show tr + P p lab2 -> do + (cat,labs) <- getLab p + return (cat,labs++[lab2]) + S p _ -> getLab p + _ -> Bad "getLab" + + + mkCase ((ty,vs),(x,p)) tr = + S (V ty [mkBranch x v tr | v <- vs]) p + mkBranch x t tr = case tr of + _ | tr == x -> t + _ -> GM.composSafeOp (mkBranch x t) tr + + valNum tr = maybe (valNumFV $ tryFV tr) EInt $ Map.lookup tr untyps + where + tryFV tr = case GM.appForm tr of + (c@(QC _ _), ts) -> [GM.mkApp c ts' | ts' <- combinations (map tryFV ts)] + (FV ts,_) -> ts + _ -> [tr] + valNumFV ts = case ts of + [tr] -> let msg = render (text "DEBUG" <+> ppIdent fun <> text ": error in valNum" <+> ppTerm Qualified 0 tr) in + trace msg $ error (showIdent fun) + _ -> FV $ map valNum ts + + mkCurry trm = case trm of + V (RecType [(_,ty)]) ts -> V ty ts + V (RecType ((_,ty):ltys)) ts -> + V ty [mkCurry (V (RecType ltys) cs) | + cs <- chop (product (map (lengthtyp . snd) ltys)) ts] + _ -> trm + lengthtyp ty = case Map.lookup ty typs of + Just m -> length (Map.assocs m) + _ -> error $ "length of type " ++ show ty + chop i xs = case splitAt i xs of + (xs1,[]) -> [xs1] + (xs1,xs2) -> xs1:chop i xs2 + + + mkCurrySel t p = S t p -- done properly in CheckGFCC + + +mkLab k = LIdent (BS.pack ("_" ++ show k)) + +-- remove lock fields; in fact, any empty records and record types +unlock = filter notlock where + notlock (l,(_, t)) = case t of --- need not look at l + R [] -> False + RecType [] -> False + _ -> True + +unlockTyp = filter notlock + +notlock (l, t) = case t of --- need not look at l + RecType [] -> False + _ -> True + +unlockTy ty = case ty of + RecType ls -> RecType $ GM.sortRec [(l, unlockTy t) | (l,t) <- ls, notlock (l,t)] + _ -> GM.composSafeOp unlockTy ty + + +prtTrace tr n = + trace (render (text "-- INTERNAL COMPILER ERROR" <+> A.ppTerm Unqualified 0 tr $$ text (show n))) n +prTrace tr n = trace (render (text "-- OBSERVE" <+> A.ppTerm Unqualified 0 tr <+> text (show n) <+> text (show tr))) n + + +-- | this function finds out what modules are really needed in the canonical gr. +-- its argument is typically a concrete module name +requiredCanModules :: (Ord i, Show i) => Bool -> M.MGrammar i a -> i -> [i] +requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where + exts = M.allExtends gr c + ops = if isSingle + then map fst (M.modules gr) + else iterFix (concatMap more) $ exts + more i = errVal [] $ do + m <- M.lookupModule gr i + return $ M.extends m ++ [o | o <- map M.openedModule (M.opens m)] + notReuse i = errVal True $ do + m <- M.lookupModule gr i + return $ M.isModRes m -- to exclude reused Cnc and Abs from required diff --git a/src/compiler/GF/Compile/ModDeps.hs b/src/compiler/GF/Compile/ModDeps.hs new file mode 100644 index 000000000..1e689aabc --- /dev/null +++ b/src/compiler/GF/Compile/ModDeps.hs @@ -0,0 +1,145 @@ +---------------------------------------------------------------------- +-- | +-- Module : ModDeps +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.14 $ +-- +-- Check correctness of module dependencies. Incomplete. +-- +-- AR 13\/5\/2003 +----------------------------------------------------------------------------- + +module GF.Compile.ModDeps (mkSourceGrammar, + moduleDeps, + openInterfaces, + requiredCanModules + ) where + +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Option +import GF.Grammar.Printer +import GF.Compile.Update +import GF.Grammar.Lookup +import GF.Infra.Modules + +import GF.Data.Operations + +import Control.Monad +import Data.List + +-- | to check uniqueness of module names and import names, the +-- appropriateness of import and extend types, +-- to build a dependency graph of modules, and to sort them topologically +mkSourceGrammar :: [SourceModule] -> Err SourceGrammar +mkSourceGrammar ms = do + let ns = map fst ms + checkUniqueErr ns + mapM (checkUniqueImportNames ns . snd) ms + deps <- moduleDeps ms + deplist <- either + return + (\ms -> Bad $ "circular modules" +++ unwords (map show ms)) $ + topoTest deps + return $ MGrammar [(m, maybe undefined id $ lookup m ms) | IdentM m _ <- deplist] + +checkUniqueErr :: (Show i, Eq i) => [i] -> Err () +checkUniqueErr ms = do + let msg = checkUnique ms + if null msg then return () else Bad $ unlines msg + +-- | check that import names don't clash with module names +checkUniqueImportNames :: [Ident] -> SourceModInfo -> Err () +checkUniqueImportNames ns mo = test [n | OQualif n v <- opens mo, n /= v] + where + test ms = testErr (all (`notElem` ns) ms) + ("import names clashing with module names among" +++ unwords (map prt ms)) + +type Dependencies = [(IdentM Ident,[IdentM Ident])] + +-- | to decide what modules immediately depend on what, and check if the +-- dependencies are appropriate +moduleDeps :: [SourceModule] -> Err Dependencies +moduleDeps ms = mapM deps ms where + deps (c,m) = errIn ("checking dependencies of module" +++ prt c) $ case mtype m of + MTConcrete a -> do + aty <- lookupModuleType gr a + testErr (aty == MTAbstract) "the of-module is not an abstract syntax" + chDep (IdentM c (MTConcrete a)) + (extends m) (MTConcrete a) (opens m) MTResource + t -> chDep (IdentM c t) (extends m) t (opens m) t + + chDep it es ety os oty = do + ests <- mapM (lookupModuleType gr) es + testErr (all (compatMType ety) ests) "inappropriate extension module type" +---- osts <- mapM (lookupModuleType gr . openedModule) os +---- testErr (all (compatOType oty) osts) "inappropriate open module type" + let ab = case it of + IdentM _ (MTConcrete a) -> [IdentM a MTAbstract] + _ -> [] ---- + return (it, ab ++ + [IdentM e ety | e <- es] ++ + [IdentM (openedModule o) oty | o <- os]) + + -- check for superficial compatibility, not submodule relation etc: what can be extended + compatMType mt0 mt = case (mt0,mt) of + (MTResource, MTConcrete _) -> True + (MTInstance _, MTConcrete _) -> True + (MTInterface, MTAbstract) -> True + (MTConcrete _, MTConcrete _) -> True + (MTInstance _, MTInstance _) -> True + (MTInstance _, MTResource) -> True + (MTResource, MTInstance _) -> True + ---- some more? + _ -> mt0 == mt + -- in the same way; this defines what can be opened + compatOType mt0 mt = case mt0 of + MTAbstract -> mt == MTAbstract + _ -> case mt of + MTResource -> True + MTInterface -> True + MTInstance _ -> True + _ -> False + + gr = MGrammar ms --- hack + +openInterfaces :: Dependencies -> Ident -> Err [Ident] +openInterfaces ds m = do + let deps = [(i,ds) | (IdentM i _,ds) <- ds] + let more (c,_) = [(i,mt) | Just is <- [lookup c deps], IdentM i mt <- is] + let mods = iterFix (concatMap more) (more (m,undefined)) + return $ [i | (i,MTInterface) <- mods] + +-- | this function finds out what modules are really needed in the canonical gr. +-- its argument is typically a concrete module name +requiredCanModules :: (Ord i, Show i) => Bool -> MGrammar i a -> i -> [i] +requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where + exts = allExtends gr c + ops = if isSingle + then map fst (modules gr) + else iterFix (concatMap more) $ exts + more i = errVal [] $ do + m <- lookupModule gr i + return $ extends m ++ [o | o <- map openedModule (opens m)] + notReuse i = errVal True $ do + m <- lookupModule gr i + return $ isModRes m -- to exclude reused Cnc and Abs from required + + +{- +-- to test +exampleDeps = [ + (ir "Nat",[ii "Gen", ir "Adj"]), + (ir "Adj",[ii "Num", ii "Gen", ir "Nou"]), + (ir "Nou",[ii "Cas"]) + ] + +ii s = IdentM (IC s) MTInterface +ir s = IdentM (IC s) MTResource +-} + diff --git a/src/compiler/GF/Compile/Optimize.hs b/src/compiler/GF/Compile/Optimize.hs new file mode 100644 index 000000000..2c556b36f --- /dev/null +++ b/src/compiler/GF/Compile/Optimize.hs @@ -0,0 +1,228 @@ +{-# LANGUAGE PatternGuards #-} +---------------------------------------------------------------------- +-- | +-- Module : Optimize +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/16 13:56:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.18 $ +-- +-- Top-level partial evaluation for GF source modules. +----------------------------------------------------------------------------- + +module GF.Compile.Optimize (optimizeModule) where + +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Grammar.Printer +import GF.Grammar.Macros +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Compile.Refresh +import GF.Compile.Concrete.Compute +import GF.Compile.CheckGrammar +import GF.Compile.Update + +import GF.Data.Operations +import GF.Infra.CheckM +import GF.Infra.Option + +import Control.Monad +import Data.List +import qualified Data.Set as Set +import Text.PrettyPrint +import Debug.Trace +import qualified Data.ByteString.Char8 as BS + + +-- | partial evaluation of concrete syntax. AR 6\/2001 -- 16\/5\/2003 -- 5\/2\/2005. + +optimizeModule :: Options -> [SourceModule] -> SourceModule -> Err SourceModule +optimizeModule opts ms m@(name,mi) + | mstatus mi == MSComplete = do + ids <- topoSortJments m + mi <- foldM updateEvalInfo mi ids + return (name,mi) + | otherwise = return m + where + oopts = opts `addOptions` flagsModule m + + updateEvalInfo mi (i,info) = do + info' <- evalInfo oopts ms (name,mi) i info + return (updateModule mi i info') + +evalInfo :: Options -> [SourceModule] -> SourceModule -> Ident -> Info -> Err Info +evalInfo opts ms m c info = do + + (if verbAtLeast opts Verbose then trace (" " ++ showIdent c) else id) return () + + errIn ("optimizing " ++ showIdent c) $ case info of + + CncCat ptyp pde ppr -> do + pde' <- case (ptyp,pde) of + (Just typ, Just de) -> do + de <- partEval opts gr ([(Explicit, varStr, typeStr)], typ) de + return (Just (factor param c 0 de)) + (Just typ, Nothing) -> do + de <- mkLinDefault gr typ + de <- partEval opts gr ([(Explicit, varStr, typeStr)], typ) de + return (Just (factor param c 0 de)) + _ -> return pde -- indirection + + ppr' <- liftM Just $ evalPrintname gr c ppr (Just $ K $ showIdent c) + + return (CncCat ptyp pde' ppr') + + CncFun (mt@(Just (_,cont,val))) pde ppr -> --trace (prt c) $ + eIn (text "linearization in type" <+> ppTerm Unqualified 0 (mkProd cont val []) $$ text "of function") $ do + pde' <- case pde of + Just de -> do de <- partEval opts gr (cont,val) de + return (Just (factor param c 0 de)) + Nothing -> return pde + ppr' <- liftM Just $ evalPrintname gr c ppr pde' + return $ CncFun mt pde' ppr' -- only cat in type actually needed + + ResOper pty pde + | OptExpand `Set.member` optim -> do + pde' <- case pde of + Just de -> do de <- computeConcrete gr de + return (Just (factor param c 0 de)) + Nothing -> return Nothing + return $ ResOper pty pde' + + _ -> return info + where + gr = MGrammar (m : ms) + optim = flag optOptimizations opts + param = OptParametrize `Set.member` optim + eIn cat = errIn (render (text "Error optimizing" <+> cat <+> ppIdent c <+> colon)) + +-- | the main function for compiling linearizations +partEval :: Options -> SourceGrammar -> (Context,Type) -> Term -> Err Term +partEval opts gr (context, val) trm = errIn (render (text "partial evaluation" <+> ppTerm Qualified 0 trm)) $ do + let vars = map (\(bt,x,t) -> x) context + args = map Vr vars + subst = [(v, Vr v) | v <- vars] + trm1 = mkApp trm args + trm2 <- computeTerm gr subst trm1 + trm3 <- if rightType trm2 + then computeTerm gr subst trm2 + else recordExpand val trm2 >>= computeTerm gr subst + return $ mkAbs [(Explicit,v) | v <- vars] trm3 + where + -- don't eta expand records of right length (correct by type checking) + rightType (R rs) = case val of + RecType ts -> length rs == length ts + _ -> False + rightType _ = False + + + + +-- here we must be careful not to reduce +-- variants {{s = "Auto" ; g = N} ; {s = "Wagen" ; g = M}} +-- {s = variants {"Auto" ; "Wagen"} ; g = variants {N ; M}} ; + +recordExpand :: Type -> Term -> Err Term +recordExpand typ trm = case typ of + RecType tys -> case trm of + FV rs -> return $ FV [R [assign lab (P r lab) | (lab,_) <- tys] | r <- rs] + _ -> return $ R [assign lab (P trm lab) | (lab,_) <- tys] + _ -> return trm + + +-- | auxiliaries for compiling the resource + +mkLinDefault :: SourceGrammar -> Type -> Err Term +mkLinDefault gr typ = liftM (Abs Explicit varStr) $ mkDefField typ + where + mkDefField typ = case typ of + Table p t -> do + t' <- mkDefField t + let T _ cs = mkWildCases t' + return $ T (TWild p) cs + Sort s | s == cStr -> return $ Vr varStr + QC q p -> do vs <- lookupParamValues gr q p + case vs of + v:_ -> return v + _ -> Bad (render (text "no parameter values given to type" <+> ppIdent p)) + RecType r -> do + let (ls,ts) = unzip r + ts <- mapM mkDefField ts + return $ R (zipWith assign ls ts) + _ | Just _ <- isTypeInts typ -> return $ EInt 0 -- exists in all as first val + _ -> Bad (render (text "linearization type field cannot be" <+> ppTerm Unqualified 0 typ)) + +-- | Form the printname: if given, compute. If not, use the computed +-- lin for functions, cat name for cats (dispatch made in evalCncDef above). +--- We cannot use linearization at this stage, since we do not know the +--- defaults we would need for question marks - and we're not yet in canon. +evalPrintname :: SourceGrammar -> Ident -> Maybe Term -> Maybe Term -> Err Term +evalPrintname gr c ppr lin = + case ppr of + Just pr -> comp pr + Nothing -> case lin of + Just t -> return $ K $ clean $ render (ppTerm Unqualified 0 (oneBranch t)) + Nothing -> return $ K $ showIdent c ---- + where + comp = computeConcrete gr + + oneBranch t = case t of + Abs _ _ b -> oneBranch b + R (r:_) -> oneBranch $ snd $ snd r + T _ (c:_) -> oneBranch $ snd c + V _ (c:_) -> oneBranch c + FV (t:_) -> oneBranch t + C x y -> C (oneBranch x) (oneBranch y) + S x _ -> oneBranch x + P x _ -> oneBranch x + Alts (d,_) -> oneBranch d + _ -> t + + --- very unclean cleaner + clean s = case s of + '+':'+':' ':cs -> clean cs + '"':cs -> clean cs + c:cs -> c: clean cs + _ -> s + + +-- do even more: factor parametric branches + +factor :: Bool -> Ident -> Int -> Term -> Term +factor param c i t = + case t of + T (TComp ty) cs -> factors ty [(p, factor param c (i+1) v) | (p, v) <- cs] + _ -> composSafeOp (factor param c i) t + where + factors ty pvs0 + | not param = V ty (map snd pvs0) + factors ty [] = V ty [] + factors ty pvs0@[(p,v)] = V ty [v] + factors ty pvs0@(pv:pvs) = + let t = mkFun pv + ts = map mkFun pvs + in if all (==t) ts + then T (TTyped ty) (mkCases t) + else V ty (map snd pvs0) + + --- we hope this will be fresh and don't check... in GFC would be safe + qvar = identC (BS.pack ("q_" ++ showIdent c ++ "__" ++ show i)) + + mkFun (patt, val) = replace (patt2term patt) (Vr qvar) val + mkCases t = [(PV qvar, t)] + +-- we need to replace subterms +replace :: Term -> Term -> Term -> Term +replace old new trm = + case trm of + -- these are the important cases, since they can correspond to patterns + QC _ _ | trm == old -> new + App _ _ | trm == old -> new + R _ | trm == old -> new + App x y -> App (replace old new x) (replace old new y) + _ -> composSafeOp (replace old new) trm diff --git a/src/compiler/GF/Compile/OptimizeGFCC.hs b/src/compiler/GF/Compile/OptimizeGFCC.hs new file mode 100644 index 000000000..2a218e1bb --- /dev/null +++ b/src/compiler/GF/Compile/OptimizeGFCC.hs @@ -0,0 +1,121 @@ +module GF.Compile.OptimizeGFCC where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations + +import Data.List +import qualified Data.Map as Map + + +-- back-end optimization: +-- suffix analysis followed by common subexpression elimination + +optPGF :: PGF -> PGF +optPGF = cseOptimize . suffixOptimize + +suffixOptimize :: PGF -> PGF +suffixOptimize = mapConcretes opt + where + opt cnc = cnc { + lins = Map.map optTerm (lins cnc), + lindefs = Map.map optTerm (lindefs cnc), + printnames = Map.map optTerm (printnames cnc) + } + +cseOptimize :: PGF -> PGF +cseOptimize = mapConcretes subex + +-- analyse word form lists into prefix + suffixes +-- suffix sets can later be shared by subex elim + +optTerm :: Term -> Term +optTerm tr = case tr of + R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts] + R ts -> R $ map optTerm ts + P t v -> P (optTerm t) v + _ -> tr + where + optToks ss = prf : suffs where + prf = pref (head ss) (tail ss) + suffs = map (drop (length prf)) ss + pref cand ss = case ss of + s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss + _ -> cand + isK t = case t of + K (KS _) -> True + _ -> False + mkSuff ("":ws) = R (map (K . KS) ws) + mkSuff (p:ws) = W p (R (map (K . KS) ws)) + + +-- common subexpression elimination + +---subex :: [(CId,Term)] -> [(CId,Term)] +subex :: Concr -> Concr +subex cnc = err error id $ do + (tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0) + return $ addSubexpConsts tree cnc + +type TermList = Map.Map Term (Int,Int) -- number of occs, id +type TermM a = STM (TermList,Int) a + +addSubexpConsts :: TermList -> Concr -> Concr +addSubexpConsts tree cnc = cnc { + opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops], + lins = rec lins, + lindefs = rec lindefs, + printnames = rec printnames + } + where + ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree] + mkOne (f,trm) = (f, recomp f trm) + recomp f t = case Map.lookup t tree of + Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself + _ -> case t of + R ts -> R $ map (recomp f) ts + S ts -> S $ map (recomp f) ts + W s t -> W s (recomp f t) + P t p -> P (recomp f t) (recomp f p) + _ -> t + fid n = mkCId $ "_" ++ show n + rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)] + + +getSubtermsMod :: Concr -> TermM TermList +getSubtermsMod cnc = do + mapM getSubterms (Map.assocs (lins cnc)) + mapM getSubterms (Map.assocs (lindefs cnc)) + mapM getSubterms (Map.assocs (printnames cnc)) + (tree0,_) <- readSTM + return $ Map.filter (\ (nu,_) -> nu > 1) tree0 + where + getSubterms (f,trm) = collectSubterms trm >> return () + +collectSubterms :: Term -> TermM () +collectSubterms t = case t of + R ts -> do + mapM collectSubterms ts + add t + S ts -> do + mapM collectSubterms ts + add t + W s u -> do + collectSubterms u + add t + P p u -> do + collectSubterms p + collectSubterms u + add t + _ -> return () + where + add t = do + (ts,i) <- readSTM + let + ((count,id),next) = case Map.lookup t ts of + Just (nu,id) -> ((nu+1,id), i) + _ -> ((1, i ), i+1) + writeSTM (Map.insert t (count,id) ts, next) + diff --git a/src/compiler/GF/Compile/PGFPretty.hs b/src/compiler/GF/Compile/PGFPretty.hs new file mode 100644 index 000000000..679714db5 --- /dev/null +++ b/src/compiler/GF/Compile/PGFPretty.hs @@ -0,0 +1,93 @@ +-- | Print a part of a PGF grammar on the human-readable format used in +-- the paper "PGF: A Portable Run-Time Format for Type-Theoretical Grammars". +module GF.Compile.PGFPretty (prPGFPretty, prPMCFGPretty) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.PMCFG + +import GF.Data.Operations + +import Data.Map (Map) +import qualified Data.Map as Map +import Text.PrettyPrint.HughesPJ + + +prPGFPretty :: PGF -> String +prPGFPretty pgf = render $ prAbs (abstract pgf) $$ prAll (prCnc (abstract pgf)) (concretes pgf) + +prPMCFGPretty :: PGF -> CId -> String +prPMCFGPretty pgf lang = render $ + case lookParser pgf lang of + Nothing -> empty + Just pinfo -> text "language" <+> ppCId lang $$ ppPMCFG pinfo + + +prAbs :: Abstr -> Doc +prAbs a = prAll prCat (cats a) $$ prAll prFun (funs a) + +prCat :: CId -> [Hypo] -> Doc +prCat c h | isLiteralCat c = empty + | otherwise = text "cat" <+> ppCId c + +prFun :: CId -> (Type,Int,[Equation]) -> Doc +prFun f (t,_,_) = text "fun" <+> ppCId f <+> text ":" <+> prType t + +prType :: Type -> Doc +prType t = parens (hsep (punctuate (text ",") (map ppCId cs))) <+> text "->" <+> ppCId c + where (cs,c) = catSkeleton t + + +-- FIXME: show concrete name +-- FIXME: inline opers first +prCnc :: Abstr -> CId -> Concr -> Doc +prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c)) + where + prLinCat :: CId -> Term -> Doc + prLinCat c t | isLiteralCat c = empty + | otherwise = text "lincat" <+> ppCId c <+> text "=" <+> pr 0 t + where + pr p (R ts) = prec p 1 (hsep (punctuate (text " *") (map (pr 1) ts))) + pr _ (S []) = text "Str" + pr _ (C n) = text "Int_" <> text (show (n+1)) + + prLin :: CId -> Term -> Doc + prLin f t = text "lin" <+> ppCId f <+> text "=" <+> pr 0 t + where + pr :: Int -> Term -> Doc + pr p (R ts) = text "<" <+> hsep (punctuate (text ",") (map (pr 0) ts)) <+> text ">" + pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2) + pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts))) + pr p (K (KS t)) = doubleQuotes (text t) + pr p (V i) = text ("argv_" ++ show (i+1)) + pr p (C i) = text (show (i+1)) + pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts))) + pr _ t = error $ "PGFPretty.prLin " ++ show t + +linCat :: Concr -> CId -> Term +linCat cnc c = Map.findWithDefault (error $ "lincat: " ++ showCId c) c (lincats cnc) + +prec :: Int -> Int -> Doc -> Doc +prec p m | p >= m = parens + | otherwise = id + +expand :: Concr -> Concr +expand cnc = cnc { lins = Map.map (f "") (lins cnc) } + where + -- FIXME: handle KP + f :: String -> Term -> Term + f w (R ts) = R (map (f w) ts) + f w (P t1 t2) = P (f w t1) (f w t2) + f w (S []) = S (if null w then [] else [K (KS w)]) + f w (S (t:ts)) = S (f w t : map (f "") ts) + f w (FV ts) = FV (map (f w) ts) + f w (W s t) = f (w++s) t + f w (K (KS t)) = K (KS (w++t)) + f w (F o) = f w (Map.findWithDefault (error $ "Bad oper: " ++ showCId o) o (opers cnc)) + f w t = t + +-- Utilities + +prAll :: (a -> b -> Doc) -> Map a b -> Doc +prAll p m = vcat [ p k v | (k,v) <- Map.toList m]
\ No newline at end of file diff --git a/src/compiler/GF/Compile/ReadFiles.hs b/src/compiler/GF/Compile/ReadFiles.hs new file mode 100644 index 000000000..b96d3127b --- /dev/null +++ b/src/compiler/GF/Compile/ReadFiles.hs @@ -0,0 +1,220 @@ +---------------------------------------------------------------------- +-- | +-- Module : ReadFiles +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.26 $ +-- +-- Decide what files to read as function of dependencies and time stamps. +-- +-- make analysis for GF grammar modules. AR 11\/6\/2003--24\/2\/2004 +-- +-- to find all files that have to be read, put them in dependency order, and +-- decide which files need recompilation. Name @file.gf@ is returned for them, +-- and @file.gfo@ otherwise. +----------------------------------------------------------------------------- + +module GF.Compile.ReadFiles + ( getAllFiles,ModName,ModEnv,importsOfModule, + gfoFile,gfFile,isGFO,gf2gfo, + getOptionsFromFile) where + +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Data.Operations +import GF.Grammar.Lexer +import GF.Grammar.Parser +import GF.Grammar.Grammar +import GF.Grammar.Binary + +import Control.Monad +import Data.Char +import Data.List +import Data.Maybe(isJust) +import qualified Data.ByteString.Char8 as BS +import qualified Data.Map as Map +import System.Time +import System.Directory +import System.FilePath +import Text.PrettyPrint + +type ModName = String +type ModEnv = Map.Map ModName (ClockTime,[ModName]) + + +-- | Returns a list of all files to be compiled in topological order i.e. +-- the low level (leaf) modules are first. +getAllFiles :: Options -> [InitPath] -> ModEnv -> FileName -> IOE [FullPath] +getAllFiles opts ps env file = do + -- read module headers from all files recursively + ds <- liftM reverse $ get [] [] (justModuleName file) + ioeIO $ putIfVerb opts $ "all modules:" +++ show [name | (name,_,_,_,_) <- ds] + return $ paths ds + where + -- construct list of paths to read + paths ds = concatMap mkFile ds + where + mkFile (f,st,gfTime,gfoTime,p) = + case st of + CSComp -> [p </> gfFile f] + CSRead | isJust gfTime -> [gf2gfo opts (p </> gfFile f)] + | otherwise -> [p </> gfoFile f] + CSEnv -> [] + + -- | traverses the dependency graph and returns a topologicaly sorted + -- list of ModuleInfo. An error is raised if there is circular dependency + get :: [ModName] -- ^ keeps the current path in the dependency graph to avoid cycles + -> [ModuleInfo] -- ^ a list of already traversed modules + -> ModName -- ^ the current module + -> IOE [ModuleInfo] -- ^ the final + get trc ds name + | name `elem` trc = ioeErr $ Bad $ "circular modules" +++ unwords trc + | (not . null) [n | (n,_,_,_,_) <- ds, name == n] --- file already read + = return ds + | otherwise = do + (name,st0,t0,imps,p) <- findModule name + ds <- foldM (get (name:trc)) ds imps + let (st,t) | (not . null) [f | (f,_,t1,_,_) <- ds, elem f imps && liftM2 (>=) t0 t1 /= Just True] + = (CSComp,Nothing) + | otherwise = (st0,t0) + return ((name,st,t,imps,p):ds) + + -- searches for module in the search path and if it is found + -- returns 'ModuleInfo'. It fails if there is no such module + findModule :: ModName -> IOE ModuleInfo + findModule name = do + (file,gfTime,gfoTime) <- do + mb_gfFile <- ioeIO $ getFilePath ps (gfFile name) + case mb_gfFile of + Just gfFile -> do gfTime <- ioeIO $ getModificationTime gfFile + mb_gfoTime <- ioeIO $ catch (liftM Just $ getModificationTime (gf2gfo opts gfFile)) + (\_->return Nothing) + return (gfFile, Just gfTime, mb_gfoTime) + Nothing -> do mb_gfoFile <- ioeIO $ getFilePath (maybe id (:) (flag optGFODir opts) ps) (gfoFile name) + case mb_gfoFile of + Just gfoFile -> do gfoTime <- ioeIO $ getModificationTime gfoFile + return (gfoFile, Nothing, Just gfoTime) + Nothing -> ioeErr $ Bad (render (text "File" <+> text (gfFile name) <+> text "does not exist." $$ + text "searched in:" <+> vcat (map text ps))) + + + let mb_envmod = Map.lookup name env + (st,t) = selectFormat opts (fmap fst mb_envmod) gfTime gfoTime + + (mname,imps) <- case st of + CSEnv -> return (name, maybe [] snd mb_envmod) + CSRead -> ioeIO $ fmap importsOfModule (decodeModHeader ((if isGFO file then id else gf2gfo opts) file)) + CSComp -> do s <- ioeIO $ BS.readFile file + case runP pModHeader s of + Left (Pn l c,msg) -> ioeBad (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) + Right mo -> return (importsOfModule mo) + ioeErr $ testErr (mname == name) + ("module name" +++ mname +++ "differs from file name" +++ name) + return (name,st,t,imps,dropFileName file) + +isGFO :: FilePath -> Bool +isGFO = (== ".gfo") . takeExtensions + +gfoFile :: FilePath -> FilePath +gfoFile f = addExtension f "gfo" + +gfFile :: FilePath -> FilePath +gfFile f = addExtension f "gf" + +gf2gfo :: Options -> FilePath -> FilePath +gf2gfo opts file = maybe (gfoFile (dropExtension file)) + (\dir -> dir </> gfoFile (dropExtension (takeFileName file))) + (flag optGFODir opts) + +-- From the given Options and the time stamps computes +-- whether the module have to be computed, read from .gfo or +-- the environment version have to be used +selectFormat :: Options -> Maybe ClockTime -> Maybe ClockTime -> Maybe ClockTime -> (CompStatus,Maybe ClockTime) +selectFormat opts mtenv mtgf mtgfo = + case (mtenv,mtgfo,mtgf) of + (_,_,Just tgf) | fromSrc -> (CSComp,Nothing) + (Just tenv,_,_) | fromComp -> (CSEnv, Just tenv) + (_,Just tgfo,_) | fromComp -> (CSRead,Just tgfo) + (Just tenv,_,Just tgf) | tenv > tgf -> (CSEnv, Just tenv) + (_,Just tgfo,Just tgf) | tgfo > tgf -> (CSRead,Just tgfo) + (Just tenv,_,Nothing) -> (CSEnv,Just tenv) -- source does not exist + (_,Just tgfo,Nothing) -> (CSRead,Just tgfo) -- source does not exist + _ -> (CSComp,Nothing) + where + fromComp = flag optRecomp opts == NeverRecomp + fromSrc = flag optRecomp opts == AlwaysRecomp + + +-- internal module dep information + + +data CompStatus = + CSComp -- compile: read gf + | CSRead -- read gfo + | CSEnv -- gfo is in env + deriving Eq + +type ModuleInfo = (ModName,CompStatus,Maybe ClockTime,[ModName],InitPath) + +importsOfModule :: SourceModule -> (ModName,[ModName]) +importsOfModule (m,mi) = (modName m,depModInfo mi []) + where + depModInfo mi = + depModType (mtype mi) . + depExtends (extend mi) . + depWith (mwith mi) . + depExDeps (mexdeps mi). + depOpens (opens mi) + + depModType (MTAbstract) xs = xs + depModType (MTResource) xs = xs + depModType (MTInterface) xs = xs + depModType (MTConcrete m2) xs = modName m2:xs + depModType (MTInstance m2) xs = modName m2:xs + + depExtends es xs = foldr depInclude xs es + + depWith (Just (m,_,is)) xs = modName m : depInsts is xs + depWith Nothing xs = xs + + depExDeps eds xs = map modName eds ++ xs + + depOpens os xs = foldr depOpen xs os + + depInsts is xs = foldr depInst xs is + + depInclude (m,_) xs = modName m:xs + + depOpen (OSimple n ) xs = modName n:xs + depOpen (OQualif _ n) xs = modName n:xs + + depInst (m,n) xs = modName m:modName n:xs + + modName = showIdent + +-- | options can be passed to the compiler by comments in @--#@, in the main file +getOptionsFromFile :: FilePath -> IOE Options +getOptionsFromFile file = do + s <- ioe $ catch (fmap Ok $ BS.readFile file) + (\_ -> return (Bad $ "File " ++ file ++ " does not exist")) + let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s + fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls + ioeErr $ parseModuleOptions fs + +getFilePath :: [FilePath] -> String -> IO (Maybe FilePath) +getFilePath paths file = get paths + where + get [] = return Nothing + get (p:ps) = do + let pfile = p </> file + exist <- doesFileExist pfile + if not exist + then get ps + else do pfile <- canonicalizePath pfile + return (Just pfile) diff --git a/src/compiler/GF/Compile/Refresh.hs b/src/compiler/GF/Compile/Refresh.hs new file mode 100644 index 000000000..04800fcce --- /dev/null +++ b/src/compiler/GF/Compile/Refresh.hs @@ -0,0 +1,133 @@ +---------------------------------------------------------------------- +-- | +-- Module : Refresh +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:27 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Compile.Refresh (refreshTerm, refreshTermN, + refreshModule + ) where + +import GF.Data.Operations +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Grammar.Macros +import Control.Monad + +refreshTerm :: Term -> Err Term +refreshTerm = refreshTermN 0 + +refreshTermN :: Int -> Term -> Err Term +refreshTermN i e = liftM snd $ refreshTermKN i e + +refreshTermKN :: Int -> Term -> Err (Int,Term) +refreshTermKN i e = liftM (\ (t,(_,i)) -> (i,t)) $ + appSTM (refresh e) (initIdStateN i) + +refresh :: Term -> STM IdState Term +refresh e = case e of + + Vr x -> liftM Vr (lookVar x) + Abs b x t -> liftM2 (Abs b) (refVarPlus x) (refresh t) + + Prod b x a t -> do + a' <- refresh a + x' <- refVar x + t' <- refresh t + return $ Prod b x' a' t' + + Let (x,(mt,a)) b -> do + a' <- refresh a + mt' <- case mt of + Just t -> refresh t >>= (return . Just) + _ -> return mt + x' <- refVar x + b' <- refresh b + return (Let (x',(mt',a')) b') + + R r -> liftM R $ refreshRecord r + + ExtR r s -> liftM2 ExtR (refresh r) (refresh s) + + T i cc -> liftM2 T (refreshTInfo i) (mapM refreshCase cc) + + _ -> composOp refresh e + +refreshCase :: (Patt,Term) -> STM IdState (Patt,Term) +refreshCase (p,t) = liftM2 (,) (refreshPatt p) (refresh t) + +refreshPatt p = case p of + PV x -> liftM PV (refVar x) + PC c ps -> liftM (PC c) (mapM refreshPatt ps) + PP q c ps -> liftM (PP q c) (mapM refreshPatt ps) + PR r -> liftM PR (mapPairsM refreshPatt r) + PT t p' -> liftM2 PT (refresh t) (refreshPatt p') + + PAs x p' -> liftM2 PAs (refVar x) (refreshPatt p') + + PSeq p' q' -> liftM2 PSeq (refreshPatt p') (refreshPatt q') + PAlt p' q' -> liftM2 PAlt (refreshPatt p') (refreshPatt q') + PRep p' -> liftM PRep (refreshPatt p') + PNeg p' -> liftM PNeg (refreshPatt p') + + _ -> return p + +refreshRecord r = case r of + [] -> return r + (x,(mt,a)):b -> do + a' <- refresh a + mt' <- case mt of + Just t -> refresh t >>= (return . Just) + _ -> return mt + b' <- refreshRecord b + return $ (x,(mt',a')) : b' + +refreshTInfo i = case i of + TTyped t -> liftM TTyped $ refresh t + TComp t -> liftM TComp $ refresh t + TWild t -> liftM TWild $ refresh t + _ -> return i + +-- for abstract syntax + +refreshEquation :: Equation -> Err ([Patt],Term) +refreshEquation pst = err Bad (return . fst) (appSTM (refr pst) initIdState) where + refr (ps,t) = liftM2 (,) (mapM refreshPatt ps) (refresh t) + +-- for concrete and resource in grammar, before optimizing + +refreshGrammar :: SourceGrammar -> Err SourceGrammar +refreshGrammar = liftM (MGrammar . snd) . foldM refreshModule (0,[]) . modules + +refreshModule :: (Int,[SourceModule]) -> SourceModule -> Err (Int,[SourceModule]) +refreshModule (k,ms) mi@(i,mo) + | isModCnc mo || isModRes mo = do + (k',js') <- foldM refreshRes (k,[]) $ tree2list $ jments mo + return (k', (i, replaceJudgements mo (buildTree js')) : ms) + | otherwise = return (k, mi:ms) + where + refreshRes (k,cs) ci@(c,info) = case info of + ResOper ptyp (Just trm) -> do ---- refresh ptyp + (k',trm') <- refreshTermKN k trm + return $ (k', (c, ResOper ptyp (Just trm')):cs) + ResOverload os tyts -> do + (k',tyts') <- liftM (\ (t,(_,i)) -> (i,t)) $ + appSTM (mapPairsM refresh tyts) (initIdStateN k) + return $ (k', (c, ResOverload os tyts'):cs) + CncCat mt (Just trm) pn -> do ---- refresh mt, pn + (k',trm') <- refreshTermKN k trm + return $ (k', (c, CncCat mt (Just trm') pn):cs) + CncFun mt (Just trm) pn -> do ---- refresh pn + (k',trm') <- refreshTermKN k trm + return $ (k', (c, CncFun mt (Just trm') pn):cs) + _ -> return (k, ci:cs) + diff --git a/src/compiler/GF/Compile/Rename.hs b/src/compiler/GF/Compile/Rename.hs new file mode 100644 index 000000000..30616b4cb --- /dev/null +++ b/src/compiler/GF/Compile/Rename.hs @@ -0,0 +1,313 @@ +---------------------------------------------------------------------- +-- | +-- Module : Rename +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/30 18:39:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.19 $ +-- +-- AR 14\/5\/2003 +-- The top-level function 'renameGrammar' does several things: +-- +-- - extends each module symbol table by indirections to extended module +-- +-- - changes unqualified and as-qualified imports to absolutely qualified +-- +-- - goes through the definitions and resolves names +-- +-- Dependency analysis between modules has been performed before this pass. +-- Hence we can proceed by @fold@ing "from left to right". +----------------------------------------------------------------------------- + +module GF.Compile.Rename ( + renameSourceTerm, + renameModule + ) where + +import GF.Grammar.Grammar +import GF.Grammar.Values +import GF.Grammar.Predef +import GF.Infra.Modules +import GF.Infra.Ident +import GF.Infra.CheckM +import GF.Grammar.Macros +import GF.Grammar.Printer +import GF.Grammar.Lookup +import GF.Grammar.Printer +import GF.Data.Operations + +import Control.Monad +import Data.List (nub) +import Text.PrettyPrint + +-- | this gives top-level access to renaming term input in the cc command +renameSourceTerm :: SourceGrammar -> Ident -> Term -> Check Term +renameSourceTerm g m t = do + mo <- checkErr $ lookupModule g m + status <- buildStatus g m mo + renameTerm status [] t + +renameModule :: [SourceModule] -> SourceModule -> Check SourceModule +renameModule ms (name,mo) = checkIn (text "renaming module" <+> ppIdent name) $ do + let js1 = jments mo + status <- buildStatus (MGrammar ms) name mo + js2 <- checkMap (renameInfo mo status) js1 + return (name, mo {opens = map forceQualif (opens mo), jments = js2}) + +type Status = (StatusTree, [(OpenSpec Ident, StatusTree)]) + +type StatusTree = BinTree Ident StatusInfo + +type StatusInfo = Ident -> Term + +renameIdentTerm :: Status -> Term -> Check Term +renameIdentTerm env@(act,imps) t = + checkIn (text "atomic term" <+> ppTerm Qualified 0 t $$ text "given" <+> hsep (punctuate comma (map (ppIdent . fst) qualifs))) $ + case t of + Vr c -> ident predefAbs c + Cn c -> ident (\_ s -> checkError s) c + Q m' c | m' == cPredef {- && isInPredefined c -} -> return t + Q m' c -> do + m <- checkErr (lookupErr m' qualifs) + f <- lookupTree showIdent c m + return $ f c + QC m' c | m' == cPredef {- && isInPredefined c -} -> return t + QC m' c -> do + m <- checkErr (lookupErr m' qualifs) + f <- lookupTree showIdent c m + return $ f c + _ -> return t + where + opens = [st | (OSimple _,st) <- imps] + qualifs = [(m, st) | (OQualif m _, st) <- imps] ++ + [(m, st) | (OSimple m, st) <- imps] -- qualif is always possible + + -- this facility is mainly for BWC with GF1: you need not import PredefAbs + predefAbs c s + | isPredefCat c = return $ Q cPredefAbs c + | otherwise = checkError s + + ident alt c = case lookupTree showIdent c act of + Ok f -> return $ f c + _ -> case lookupTreeManyAll showIdent opens c of + [f] -> return $ f c + [] -> alt c (text "constant not found:" <+> ppIdent c) + fs -> case nub [f c | f <- fs] of + [tr] -> return tr + ts@(t:_) -> do checkWarn (text "conflict" <+> hsep (punctuate comma (map (ppTerm Qualified 0) ts))) + return t + -- a warning will be generated in CheckGrammar, and the head returned + -- in next V: + -- Bad $ "conflicting imports:" +++ unwords (map prt ts) + +info2status :: Maybe Ident -> (Ident,Info) -> StatusInfo +info2status mq (c,i) = case i of + AbsFun _ _ Nothing -> maybe Con QC mq + ResValue _ -> maybe Con QC mq + ResParam _ _ -> maybe Con QC mq + AnyInd True m -> maybe Con (const (QC m)) mq + AnyInd False m -> maybe Cn (const (Q m)) mq + _ -> maybe Cn Q mq + +tree2status :: OpenSpec Ident -> BinTree Ident Info -> BinTree Ident StatusInfo +tree2status o = case o of + OSimple i -> mapTree (info2status (Just i)) + OQualif i j -> mapTree (info2status (Just j)) + +buildStatus :: SourceGrammar -> Ident -> SourceModInfo -> Check Status +buildStatus gr c mo = let mo' = self2status c mo in do + let gr1 = MGrammar ((c,mo) : modules gr) + ops = [OSimple e | e <- allExtends gr1 c] ++ opens mo + mods <- checkErr $ mapM (lookupModule gr1 . openedModule) ops + let sts = map modInfo2status $ zip ops mods + return $ if isModCnc mo + then (emptyBinTree, reverse sts) -- the module itself does not define any names + else (mo',reverse sts) -- so the empty ident is not needed + +modInfo2status :: (OpenSpec Ident,SourceModInfo) -> (OpenSpec Ident, StatusTree) +modInfo2status (o,mo) = (o,tree2status o (jments mo)) + +self2status :: Ident -> SourceModInfo -> StatusTree +self2status c m = mapTree (info2status (Just c)) (jments m) + +forceQualif o = case o of + OSimple i -> OQualif i i + OQualif _ i -> OQualif i i + +renameInfo :: SourceModInfo -> Status -> Ident -> Info -> Check Info +renameInfo mo status i info = checkIn + (text "renaming definition of" <+> ppIdent i <+> ppPosition mo i) $ + case info of + AbsCat pco pfs -> liftM2 AbsCat (renPerh (renameContext status) pco) + (renPerh (mapM rent) pfs) + AbsFun pty pa ptr -> liftM3 AbsFun (ren pty) (return pa) (renPerh (mapM (renameEquation status [])) ptr) + ResOper pty ptr -> liftM2 ResOper (ren pty) (ren ptr) + ResOverload os tysts -> + liftM (ResOverload os) (mapM (pairM rent) tysts) + + ResParam (Just pp) m -> do + pp' <- mapM (renameParam status) pp + return (ResParam (Just pp') m) + ResValue t -> do + t <- rent t + return (ResValue t) + CncCat pty ptr ppr -> liftM3 CncCat (ren pty) (ren ptr) (ren ppr) + CncFun mt ptr ppr -> liftM2 (CncFun mt) (ren ptr) (ren ppr) + _ -> return info + where + ren = renPerh rent + rent = renameTerm status [] + +renPerh ren (Just t) = liftM Just $ ren t +renPerh ren Nothing = return Nothing + +renameTerm :: Status -> [Ident] -> Term -> Check Term +renameTerm env vars = ren vars where + ren vs trm = case trm of + Abs b x t -> liftM (Abs b x) (ren (x:vs) t) + Prod bt x a b -> liftM2 (Prod bt x) (ren vs a) (ren (x:vs) b) + Typed a b -> liftM2 Typed (ren vs a) (ren vs b) + Vr x + | elem x vs -> return trm + | otherwise -> renid trm + Cn _ -> renid trm + Con _ -> renid trm + Q _ _ -> renid trm + QC _ _ -> renid trm + T i cs -> do + i' <- case i of + TTyped ty -> liftM TTyped $ ren vs ty -- the only annotation in source + _ -> return i + liftM (T i') $ mapM (renCase vs) cs + + Let (x,(m,a)) b -> do + m' <- case m of + Just ty -> liftM Just $ ren vs ty + _ -> return m + a' <- ren vs a + b' <- ren (x:vs) b + return $ Let (x,(m',a')) b' + + P t@(Vr r) l -- Here we have $r.l$ and this is ambiguous it could be either + -- record projection from variable or constant $r$ or qualified expression with module $r$ + | elem r vs -> return trm -- try var proj first .. + | otherwise -> checks [ renid (Q r (label2ident l)) -- .. and qualified expression second. + , renid t >>= \t -> return (P t l) -- try as a constant at the end + , checkError (text "unknown qualified constant" <+> ppTerm Unqualified 0 trm) + ] + + EPatt p -> do + (p',_) <- renpatt p + return $ EPatt p' + + _ -> composOp (ren vs) trm + + renid = renameIdentTerm env + renCase vs (p,t) = do + (p',vs') <- renpatt p + t' <- ren (vs' ++ vs) t + return (p',t') + renpatt = renamePattern env + +-- | vars not needed in env, since patterns always overshadow old vars +renamePattern :: Status -> Patt -> Check (Patt,[Ident]) +renamePattern env patt = case patt of + + PMacro c -> do + c' <- renid $ Vr c + case c' of + Q p d -> renp $ PM p d + _ -> checkError (text "unresolved pattern" <+> ppPatt Unqualified 0 patt) + + PC c ps -> do + c' <- renid $ Cn c + case c' of + QC m c -> do psvss <- mapM renp ps + let (ps,vs) = unzip psvss + return (PP m c ps, concat vs) + Q _ _ -> checkError (text "data constructor expected but" <+> ppTerm Qualified 0 c' <+> text "is found instead") + _ -> checkError (text "unresolved data constructor" <+> ppTerm Qualified 0 c') + + PP p c ps -> do + (QC p' c') <- renid (QC p c) + psvss <- mapM renp ps + let (ps',vs) = unzip psvss + return (PP p' c' ps', concat vs) + + PM p c -> do + x <- renid (Q p c) + (p',c') <- case x of + (Q p' c') -> return (p',c') + _ -> checkError (text "not a pattern macro" <+> ppPatt Qualified 0 patt) + return (PM p' c', []) + + PV x -> checks [ renid (Vr x) >>= \t' -> case t' of + QC m c -> return (PP m c [],[]) + _ -> checkError (text "not a constructor") + , return (patt, [x]) + ] + + PR r -> do + let (ls,ps) = unzip r + psvss <- mapM renp ps + let (ps',vs') = unzip psvss + return (PR (zip ls ps'), concat vs') + + PAlt p q -> do + (p',vs) <- renp p + (q',ws) <- renp q + return (PAlt p' q', vs ++ ws) + + PSeq p q -> do + (p',vs) <- renp p + (q',ws) <- renp q + return (PSeq p' q', vs ++ ws) + + PRep p -> do + (p',vs) <- renp p + return (PRep p', vs) + + PNeg p -> do + (p',vs) <- renp p + return (PNeg p', vs) + + PAs x p -> do + (p',vs) <- renp p + return (PAs x p', x:vs) + + _ -> return (patt,[]) + + where + renp = renamePattern env + renid = renameIdentTerm env + +renameParam :: Status -> (Ident, Context) -> Check (Ident, Context) +renameParam env (c,co) = do + co' <- renameContext env co + return (c,co') + +renameContext :: Status -> Context -> Check Context +renameContext b = renc [] where + renc vs cont = case cont of + (bt,x,t) : xts + | isWildIdent x -> do + t' <- ren vs t + xts' <- renc vs xts + return $ (bt,x,t') : xts' + | otherwise -> do + t' <- ren vs t + let vs' = x:vs + xts' <- renc vs' xts + return $ (bt,x,t') : xts' + _ -> return cont + ren = renameTerm b + +-- | vars not needed in env, since patterns always overshadow old vars +renameEquation :: Status -> [Ident] -> Equation -> Check Equation +renameEquation b vs (ps,t) = do + (ps',vs') <- liftM unzip $ mapM (renamePattern b) ps + t' <- renameTerm b (concat vs' ++ vs) t + return (ps',t') diff --git a/src/compiler/GF/Compile/SubExOpt.hs b/src/compiler/GF/Compile/SubExOpt.hs new file mode 100644 index 000000000..c7dbb5d3d --- /dev/null +++ b/src/compiler/GF/Compile/SubExOpt.hs @@ -0,0 +1,142 @@ +---------------------------------------------------------------------- +-- | +-- Module : SubExOpt +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- This module implements a simple common subexpression elimination +-- for .gfo grammars, to factor out shared subterms in lin rules. +-- It works in three phases: +-- +-- (1) collectSubterms collects recursively all subterms of forms table and (P x..y) +-- from lin definitions (experience shows that only these forms +-- tend to get shared) and counts how many times they occur +-- (2) addSubexpConsts takes those subterms t that occur more than once +-- and creates definitions of form "oper A''n = t" where n is a +-- fresh number; notice that we assume no ids of this form are in +-- scope otherwise +-- (3) elimSubtermsMod goes through lins and the created opers by replacing largest +-- possible subterms by the newly created identifiers +-- +----------------------------------------------------------------------------- + +module GF.Compile.SubExOpt (subexpModule,unsubexpModule) where + +import GF.Grammar.Grammar +import GF.Grammar.Lookup +import GF.Infra.Ident +import qualified GF.Grammar.Macros as C +import qualified GF.Infra.Modules as M +import GF.Data.Operations + +import Control.Monad +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.ByteString.Char8 as BS +import Data.List + +subexpModule :: SourceModule -> SourceModule +subexpModule (n,mo) = errVal (n,mo) $ do + let ljs = tree2list (M.jments mo) + (tree,_) <- appSTM (getSubtermsMod n ljs) (Map.empty,0) + js2 <- liftM buildTree $ addSubexpConsts n tree $ ljs + return (n,M.replaceJudgements mo js2) + +unsubexpModule :: SourceModule -> SourceModule +unsubexpModule sm@(i,mo) + | hasSub ljs = (i,M.replaceJudgements mo (rebuild (map unparInfo ljs))) + | otherwise = sm + where + ljs = tree2list (M.jments mo) + + -- perform this iff the module has opers + hasSub ljs = not $ null [c | (c,ResOper _ _) <- ljs] + unparInfo (c,info) = case info of + CncFun xs (Just t) m -> [(c, CncFun xs (Just (unparTerm t)) m)] + ResOper (Just (EInt 8)) _ -> [] -- subexp-generated opers + ResOper pty (Just t) -> [(c, ResOper pty (Just (unparTerm t)))] + _ -> [(c,info)] + unparTerm t = case t of + Q m c | isOperIdent c -> --- name convention of subexp opers + errVal t $ liftM unparTerm $ lookupResDef gr m c + _ -> C.composSafeOp unparTerm t + gr = M.MGrammar [sm] + rebuild = buildTree . concat + +-- implementation + +type TermList = Map Term (Int,Int) -- number of occs, id +type TermM a = STM (TermList,Int) a + +addSubexpConsts :: + Ident -> Map Term (Int,Int) -> [(Ident,Info)] -> Err [(Ident,Info)] +addSubexpConsts mo tree lins = do + let opers = [oper id trm | (trm,(_,id)) <- list] + mapM mkOne $ opers ++ lins + where + mkOne (f,def) = case def of + CncFun xs (Just trm) pn -> do + trm' <- recomp f trm + return (f,CncFun xs (Just trm') pn) + ResOper ty (Just trm) -> do + trm' <- recomp f trm + return (f,ResOper ty (Just trm')) + _ -> return (f,def) + recomp f t = case Map.lookup t tree of + Just (_,id) | operIdent id /= f -> return $ Q mo (operIdent id) + _ -> C.composOp (recomp f) t + + list = Map.toList tree + + oper id trm = (operIdent id, ResOper (Just (EInt 8)) (Just trm)) + --- impossible type encoding generated opers + +getSubtermsMod :: Ident -> [(Ident,Info)] -> TermM (Map Term (Int,Int)) +getSubtermsMod mo js = do + mapM (getInfo (collectSubterms mo)) js + (tree0,_) <- readSTM + return $ Map.filter (\ (nu,_) -> nu > 1) tree0 + where + getInfo get fi@(f,i) = case i of + CncFun xs (Just trm) pn -> do + get trm + return $ fi + ResOper ty (Just trm) -> do + get trm + return $ fi + _ -> return fi + +collectSubterms :: Ident -> Term -> TermM Term +collectSubterms mo t = case t of + App f a -> do + collect f + collect a + add t + T ty cs -> do + let (_,ts) = unzip cs + mapM collect ts + add t + V ty ts -> do + mapM collect ts + add t +---- K (KP _ _) -> add t + _ -> C.composOp (collectSubterms mo) t + where + collect = collectSubterms mo + add t = do + (ts,i) <- readSTM + let + ((count,id),next) = case Map.lookup t ts of + Just (nu,id) -> ((nu+1,id), i) + _ -> ((1, i ), i+1) + writeSTM (Map.insert t (count,id) ts, next) + return t --- only because of composOp + +operIdent :: Int -> Ident +operIdent i = identC (operPrefix `BS.append` (BS.pack (show i))) --- + +isOperIdent :: Ident -> Bool +isOperIdent id = BS.isPrefixOf operPrefix (ident2bs id) + +operPrefix = BS.pack ("A''") diff --git a/src/compiler/GF/Compile/Update.hs b/src/compiler/GF/Compile/Update.hs new file mode 100644 index 000000000..1e39a2e03 --- /dev/null +++ b/src/compiler/GF/Compile/Update.hs @@ -0,0 +1,226 @@ +---------------------------------------------------------------------- +-- | +-- Module : Update +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/30 18:39:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.8 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Compile.Update (buildAnyTree, extendModule, rebuildModule) where + +import GF.Infra.Ident +import GF.Grammar.Grammar +import GF.Grammar.Printer +import GF.Grammar.Lookup +import GF.Infra.Modules +import GF.Infra.Option + +import GF.Data.Operations + +import Data.List +import qualified Data.Map as Map +import Control.Monad +import Text.PrettyPrint + +-- | combine a list of definitions into a balanced binary search tree +buildAnyTree :: Ident -> [(Ident,Info)] -> Err (BinTree Ident Info) +buildAnyTree m = go Map.empty + where + go map [] = return map + go map ((c,j):is) = do + case Map.lookup c map of + Just i -> case unifyAnyInfo m i j of + Ok k -> go (Map.insert c k map) is + Bad _ -> fail $ render (text "cannot unify the informations" $$ + nest 4 (ppJudgement Qualified (c,i)) $$ + text "and" $+$ + nest 4 (ppJudgement Qualified (c,j)) $$ + text "in module" <+> ppIdent m) + Nothing -> go (Map.insert c j map) is + +extendModule :: SourceGrammar -> SourceModule -> Err SourceModule +extendModule gr (name,m) + ---- Just to allow inheritance in incomplete concrete (which are not + ---- compiled anyway), extensions are not built for them. + ---- Should be replaced by real control. AR 4/2/2005 + | mstatus m == MSIncomplete && isModCnc m = return (name,m) + | otherwise = do m' <- foldM extOne m (extend m) + return (name,m') + where + extOne mo (n,cond) = do + m0 <- lookupModule gr n + + -- test that the module types match, and find out if the old is complete + testErr (sameMType (mtype m) (mtype mo)) + ("illegal extension type to module" +++ showIdent name) + + let isCompl = isCompleteModule m0 + + -- build extension in a way depending on whether the old module is complete + js1 <- extendMod gr isCompl (n, isInherited cond) name (jments m0) (jments mo) + + -- if incomplete, throw away extension information + return $ + if isCompl + then mo {jments = js1} + else mo {extend = filter ((/=n) . fst) (extend mo) + ,mexdeps= nub (n : mexdeps mo) + ,jments = js1 + } + +-- | rebuilding instance + interface, and "with" modules, prior to renaming. +-- AR 24/10/2003 +rebuildModule :: SourceGrammar -> SourceModule -> Err SourceModule +rebuildModule gr mo@(i,mi@(ModInfo mt stat fs_ me mw ops_ med_ js_ ps_)) = do +---- deps <- moduleDeps ms +---- is <- openInterfaces deps i + let is = [] ---- the method above is buggy: try "i -src" for two grs. AR 8/3/2005 + mi' <- case mw of + + -- add the information given in interface into an instance module + Nothing -> do + testErr (null is || mstatus mi == MSIncomplete) + ("module" +++ showIdent i +++ + "has open interfaces and must therefore be declared incomplete") + case mt of + MTInstance i0 -> do + m1 <- lookupModule gr i0 + testErr (isModRes m1) ("interface expected instead of" +++ showIdent i0) + js' <- extendMod gr False (i0,const True) i (jments m1) (jments mi) + --- to avoid double inclusions, in instance I of I0 = J0 ** ... + case extends mi of + [] -> return $ replaceJudgements mi js' + j0s -> do + m0s <- mapM (lookupModule gr) j0s + let notInM0 c _ = all (not . isInBinTree c . jments) m0s + let js2 = filterBinTree notInM0 js' + return $ (replaceJudgements mi js2) + {positions = Map.union (positions m1) (positions mi)} + _ -> return mi + + -- add the instance opens to an incomplete module "with" instances + Just (ext,incl,ops) -> do + let (infs,insts) = unzip ops + let stat' = ifNull MSComplete (const MSIncomplete) + [i | i <- is, notElem i infs] + testErr (stat' == MSComplete || stat == MSIncomplete) + ("module" +++ showIdent i +++ "remains incomplete") + ModInfo mt0 _ fs me' _ ops0 _ js ps0 <- lookupModule gr ext + let ops1 = nub $ + ops_ ++ -- N.B. js has been name-resolved already + [OQualif i j | (i,j) <- ops] ++ + [o | o <- ops0, notElem (openedModule o) infs] ++ + [OQualif i i | i <- insts] ++ + [OSimple i | i <- insts] + + --- check if me is incomplete + let fs1 = fs `addOptions` fs_ -- new flags have priority + let js0 = [ci | ci@(c,_) <- tree2list js, isInherited incl c] + let js1 = buildTree (tree2list js_ ++ js0) + let ps1 = Map.union ps_ ps0 + let med1= nub (ext : infs ++ insts ++ med_) + return $ ModInfo mt0 stat' fs1 me Nothing ops1 med1 js1 ps1 + + return (i,mi') + +-- | When extending a complete module: new information is inserted, +-- and the process is interrupted if unification fails. +-- If the extended module is incomplete, its judgements are just copied. +extendMod :: SourceGrammar -> + Bool -> (Ident,Ident -> Bool) -> Ident -> + BinTree Ident Info -> BinTree Ident Info -> + Err (BinTree Ident Info) +extendMod gr isCompl (name,cond) base old new = foldM try new $ Map.toList old + where + try new (c,i) + | not (cond c) = return new + | otherwise = case Map.lookup c new of + Just j -> case unifyAnyInfo name i j of + Ok k -> return $ updateTree (c,k) new + Bad _ -> do (base,j) <- case j of + AnyInd _ m -> lookupOrigInfo gr m c + _ -> return (base,j) + (name,i) <- case i of + AnyInd _ m -> lookupOrigInfo gr m c + _ -> return (name,i) + fail $ render (text "cannot unify the information" $$ + nest 4 (ppJudgement Qualified (c,i)) $$ + text "in module" <+> ppIdent name <+> text "with" $$ + nest 4 (ppJudgement Qualified (c,j)) $$ + text "in module" <+> ppIdent base) + Nothing-> if isCompl + then return $ updateTree (c,indirInfo name i) new + else return $ updateTree (c,i) new + + indirInfo :: Ident -> Info -> Info + indirInfo n info = AnyInd b n' where + (b,n') = case info of + ResValue _ -> (True,n) + ResParam _ _ -> (True,n) + AbsFun _ _ Nothing -> (True,n) + AnyInd b k -> (b,k) + _ -> (False,n) ---- canonical in Abs + +unifyAnyInfo :: Ident -> Info -> Info -> Err Info +unifyAnyInfo m i j = case (i,j) of + (AbsCat mc1 mf1, AbsCat mc2 mf2) -> + liftM2 AbsCat (unifMaybe mc1 mc2) (unifConstrs mf1 mf2) -- adding constrs + (AbsFun mt1 ma1 md1, AbsFun mt2 ma2 md2) -> + liftM3 AbsFun (unifMaybe mt1 mt2) (unifAbsArrity ma1 ma2) (unifAbsDefs md1 md2) -- adding defs + + (ResParam mt1 mv1, ResParam mt2 mv2) -> + liftM2 ResParam (unifMaybe mt1 mt2) (unifMaybe mv1 mv2) + (ResValue t1, ResValue t2) + | t1==t2 -> return (ResValue t1) + | otherwise -> fail "" + (_, ResOverload ms t) | elem m ms -> + return $ ResOverload ms t + (ResOper mt1 m1, ResOper mt2 m2) -> + liftM2 ResOper (unifMaybe mt1 mt2) (unifMaybe m1 m2) + + (CncCat mc1 mf1 mp1, CncCat mc2 mf2 mp2) -> + liftM3 CncCat (unifMaybe mc1 mc2) (unifMaybe mf1 mf2) (unifMaybe mp1 mp2) + (CncFun m mt1 md1, CncFun _ mt2 md2) -> + liftM2 (CncFun m) (unifMaybe mt1 mt2) (unifMaybe md1 md2) ---- adding defs + + (AnyInd b1 m1, AnyInd b2 m2) -> do + testErr (b1 == b2) $ "indirection status" + testErr (m1 == m2) $ "different sources of indirection" + return i + + _ -> fail "informations" + +-- | this is what happens when matching two values in the same module +unifMaybe :: Eq a => Maybe a -> Maybe a -> Err (Maybe a) +unifMaybe Nothing Nothing = return Nothing +unifMaybe (Just p1) Nothing = return (Just p1) +unifMaybe Nothing (Just p2) = return (Just p2) +unifMaybe (Just p1) (Just p2) + | p1==p2 = return (Just p1) + | otherwise = fail "" + +unifAbsArrity :: Maybe Int -> Maybe Int -> Err (Maybe Int) +unifAbsArrity Nothing Nothing = return Nothing +unifAbsArrity (Just a ) Nothing = return (Just a ) +unifAbsArrity Nothing (Just a ) = return (Just a ) +unifAbsArrity (Just a1) (Just a2) + | a1==a2 = return (Just a1) + | otherwise = fail "" + +unifAbsDefs :: Maybe [Equation] -> Maybe [Equation] -> Err (Maybe [Equation]) +unifAbsDefs Nothing Nothing = return Nothing +unifAbsDefs (Just _ ) Nothing = fail "" +unifAbsDefs Nothing (Just _ ) = fail "" +unifAbsDefs (Just xs) (Just ys) = return (Just (xs ++ ys)) + +unifConstrs :: Maybe [Term] -> Maybe [Term] -> Err (Maybe [Term]) +unifConstrs p1 p2 = case (p1,p2) of + (Nothing, _) -> return p2 + (_, Nothing) -> return p1 + (Just bs, Just ds) -> return $ Just $ bs ++ ds |
