From ac00f77dadd4d447803dd7cab5a36f47365325d0 Mon Sep 17 00:00:00 2001 From: peb Date: Mon, 11 Apr 2005 12:57:45 +0000 Subject: "Committed_by_peb" --- src/GF/Conversion/SimpleToMCFG/Coercions.hs | 62 +++++++++ src/GF/Conversion/SimpleToMCFG/Nondet.hs | 203 ++++++++++++++++++++++++++++ src/GF/Conversion/SimpleToMCFG/Strict.hs | 128 ++++++++++++++++++ 3 files changed, 393 insertions(+) create mode 100644 src/GF/Conversion/SimpleToMCFG/Coercions.hs create mode 100644 src/GF/Conversion/SimpleToMCFG/Nondet.hs create mode 100644 src/GF/Conversion/SimpleToMCFG/Strict.hs (limited to 'src/GF/Conversion/SimpleToMCFG') diff --git a/src/GF/Conversion/SimpleToMCFG/Coercions.hs b/src/GF/Conversion/SimpleToMCFG/Coercions.hs new file mode 100644 index 000000000..c1dc5b07c --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG/Coercions.hs @@ -0,0 +1,62 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Coercions + (addCoercions) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Conversion.Types +import GF.Data.SortedList +import List (groupBy) + +---------------------------------------------------------------------- + +addCoercions :: MGrammar -> MGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule (Abs head args _) (Cnc lbls _ _) <- rules ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#MCFG coercions" (prt . length) $ + concat $ + tracePrt "#MCFG coercions per category" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mcat2cat $ fst $ head heads) (mcat2cat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args + = [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) | + (head@(MCat _ headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@(MCat _ argCns) <- args, + argCns `subset` headCns ] + + + diff --git a/src/GF/Conversion/SimpleToMCFG/Nondet.hs b/src/GF/Conversion/SimpleToMCFG/Nondet.hs new file mode 100644 index 000000000..b98b368ff --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG/Nondet.hs @@ -0,0 +1,203 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically. +-- Afterwards, the grammar has to be extended with coercion functions, +-- from the module 'GF.Conversion.SimpleToMCFG.Coercions' +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Nondet + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM + + +------------------------------------------------------------ +-- type declarations + +type CnvMonad a = BacktrackM Env a + +type Env = (MCat, [MCat], LinRec, [LinType]) +type LinRec = [Lin Cat MLabel Token] + + +---------------------------------------------------------------------- +-- main conversion function + +convertGrammar :: SimpleGrammar -> MGrammar +convertGrammar rules = tracePrt "Nondet conversion: #MCFG rules" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SimpleRule -> CnvMonad MRule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + writeState (initialMCat cat, map initialMCat args, [], ctypes) + rterm <- simplifyTerm term + reduceTerm ctype emptyPath rterm + (newCat, newArgs, linRec, _) <- readState + let newLinRec = map (instantiateArgs newArgs) linRec + catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) +convertRule _ = failure + + +---------------------------------------------------------------------- +-- term simplification + +simplifyTerm :: Term -> CnvMonad Term +simplifyTerm (term :! sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + Tbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm ssel + return (sterm +! sel') +simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms +simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record +simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term +simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table +simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms +simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2) +simplifyTerm term = return term +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + +simplifyAssign :: (Label, Term) -> CnvMonad (Label, Term) +simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term + +simplifyCase :: (Term, Term) -> CnvMonad (Term, Term) +simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term) + + +------------------------------------------------------------ +-- reducing simplified terms, collecting MCF rules + +reduceTerm :: LinType -> Path -> Term -> CnvMonad () +reduceTerm ctype path (Variants terms) + = member terms >>= reduceTerm ctype path +reduceTerm (StrT) path term = updateLin (path, term) +reduceTerm (ConT _ _) path term = do pat <- expandTerm term + updateHead (path, pat) +reduceTerm (RecT rtype) path term + = sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | + (lbl, ctype) <- rtype ] +reduceTerm (TblT ptype vtype) path table + = sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | + pat <- enumeratePatterns ptype ] + + +------------------------------------------------------------ +-- expanding a term to ground terms + +expandTerm :: Term -> CnvMonad Term +expandTerm arg@(Arg nr _ path) + = do ctypes <- readArgCTypes + pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr + pat =?= arg + return pat +expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms +expandTerm (Rec record) = liftM Rec $ mapM expandAssign record +expandTerm (Variants terms) = member terms >>= expandTerm +expandTerm term = error $ "expandTerm: " ++ prt term + +expandAssign :: (Label, Term) -> CnvMonad (Label, Term) +expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term + + +------------------------------------------------------------ +-- unification of patterns and selection terms + +(=?=) :: Term -> Term -> CnvMonad () +Wildcard =?= _ = return () +Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= Arg nr _ path = updateArg nr (path, pat) +(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term + + +------------------------------------------------------------ +-- updating the MCF rule + +readArgCTypes :: CnvMonad [LinType] +readArgCTypes = do (_, _, _, env) <- readState + return env + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins, env) <- readState + args' <- updateNth (addToMCat cn) arg args + writeState (head, args', lins, env) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins, env) <- readState + head' <- addToMCat cn head + writeState (head', args, lins, env) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins, env) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins', env) + +term2lins :: Term -> [[Symbol (Cat, Path, Int) Token]] +term2lins (Arg nr cat path) = return [Cat (cat, path, nr)] +term2lins (Token str) = return [Tok str] +term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (Empty) = return [] +term2lins (Variants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToMCat :: Constraint -> MCat -> CnvMonad MCat +addToMCat cn (MCat cat cns) = liftM (MCat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + + diff --git a/src/GF/Conversion/SimpleToMCFG/Strict.hs b/src/GF/Conversion/SimpleToMCFG/Strict.hs new file mode 100644 index 000000000..17c2293ec --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG/Strict.hs @@ -0,0 +1,128 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, deterministic. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Strict where -- (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM +import GF.Data.SortedList + +---------------------------------------------------------------------- +-- main conversion function + +type CnvMonad a = BacktrackM () a + +convertGrammar :: SimpleGrammar -> MGrammar +convertGrammar rules = tracePrt "Strict conversion: #MCFG rules" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SimpleRule -> CnvMonad MRule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + args_ctypes = zip3 [0..] args ctypes + instArgs <- mapM enumerateArg args_ctypes + let instTerm = substitutePaths instArgs term + newCat <- extractMCat cat ctype instTerm + newArgs <- mapM (extractArg instArgs) args_ctypes + let linRec = strPaths ctype instTerm >>= extractLin newArgs + let newLinRec = map (instantiateArgs newArgs) linRec + catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) +convertRule _ = failure + +---------------------------------------------------------------------- +-- category extraction + +extractArg :: [Term] -> (Int, Cat, LinType) -> CnvMonad MCat +extractArg args (nr, cat, ctype) = extractMCat cat ctype (args !! nr) + +extractMCat :: Cat -> LinType -> Term -> CnvMonad MCat +extractMCat cat ctype term = member $ map (MCat cat) $ parPaths ctype term + +enumerateArg :: (Int, Cat, LinType) -> CnvMonad Term +enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype + +---------------------------------------------------------------------- +-- Substitute each instantiated parameter path for its instantiation + +substitutePaths :: [Term] -> Term -> Term +substitutePaths arguments = subst + where subst (Arg nr _ path) = termFollowPath path (arguments !! nr) + subst (con :^ terms) = con :^ map subst terms + subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ] + subst (term :. lbl) = subst term +. lbl + subst (Tbl table) = Tbl [ (pat, subst term) | + (pat, term) <- table ] + subst (term :! select) = subst term +! subst select + subst (term :++ term') = subst term ?++ subst term' + subst (Variants terms) = Variants $ map subst terms + subst term = term + +---------------------------------------------------------------------- +-- term paths extaction + +termPaths :: LinType -> Term -> [(Path, (LinType, Term))] +termPaths ctype (Variants terms) = terms >>= termPaths ctype +termPaths (RecT rtype) (Rec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let Just ctype = lookup lbl rtype, + (path, value) <- termPaths ctype term ] +termPaths (TblT _ ctype) (Tbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths ctype term ] +termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: LinType -> Term -> [[(Path, Term)]] +parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $ + nubsort [ (path, value) | + (path, (ConT _ _, value)) <- termPaths ctype term ] + +strPaths :: LinType -> Term -> [(Path, Term)] +strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ] + +---------------------------------------------------------------------- +-- linearization extraction + +extractLin :: [MCat] -> (Path, Term) -> [Lin MCat MLabel Token] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (Empty) = [[]] + convertLin (Token tok) = [[Tok tok]] + convertLin (Variants terms) = concatMap convertLin terms + convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]] + convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path) + -- cgit v1.2.3