diff options
Diffstat (limited to 'src/GF/Conversion/SimpleToMCFG')
| -rw-r--r-- | src/GF/Conversion/SimpleToMCFG/Coercions.hs | 63 | ||||
| -rw-r--r-- | src/GF/Conversion/SimpleToMCFG/Nondet.hs | 256 | ||||
| -rw-r--r-- | src/GF/Conversion/SimpleToMCFG/Strict.hs | 129 |
3 files changed, 0 insertions, 448 deletions
diff --git a/src/GF/Conversion/SimpleToMCFG/Coercions.hs b/src/GF/Conversion/SimpleToMCFG/Coercions.hs deleted file mode 100644 index 319b99dcb..000000000 --- a/src/GF/Conversion/SimpleToMCFG/Coercions.hs +++ /dev/null @@ -1,63 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/09 09:28:44 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.5 $ --- --- Adding coercion functions to a MCFG if necessary. ------------------------------------------------------------------------------ - - -module GF.Conversion.SimpleToMCFG.Coercions - (addCoercions) where - -import GF.System.Tracing -import GF.Infra.Print - -import GF.Formalism.Utilities -import GF.Formalism.GCFG -import GF.Formalism.MCFG -import GF.Conversion.Types -import GF.Data.SortedList -import Data.List (groupBy) - ----------------------------------------------------------------------- - -addCoercions :: EGrammar -> EGrammar -addCoercions rules = coercions ++ rules - where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | - Rule (Abs head args _) (Cnc lbls _ _) <- rules ] - allHeadSet = nubsort allHeads - allArgSet = union allArgs <\\> map fst allHeadSet - coercions = tracePrt "SimpleToMCFG.Coercions - MCFG coercions" (prt . length) $ - concat $ - tracePrt "SimpleToMCFG.Coercions - MCFG coercions per category" - (prtList . map length) $ - combineCoercions - (groupBy sameECatFst allHeadSet) - (groupBy sameECat allArgSet) - sameECatFst a b = sameECat (fst a) (fst b) - - -combineCoercions [] _ = [] -combineCoercions _ [] = [] -combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) - = case compare (ecat2scat $ fst $ head heads) (ecat2scat $ head args) of - LT -> combineCoercions allHeads allArgs' - GT -> combineCoercions allHeads' allArgs - EQ -> makeCoercion heads args : combineCoercions allHeads allArgs - - -makeCoercion heads args - = [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) | - (head@(ECat _ headCns), lbls) <- heads, - let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], - arg@(ECat _ argCns) <- args, - argCns `subset` headCns ] - - - diff --git a/src/GF/Conversion/SimpleToMCFG/Nondet.hs b/src/GF/Conversion/SimpleToMCFG/Nondet.hs deleted file mode 100644 index d6ff052f5..000000000 --- a/src/GF/Conversion/SimpleToMCFG/Nondet.hs +++ /dev/null @@ -1,256 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/08/17 08:27:29 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.7 $ --- --- Converting SimpleGFC grammars to MCFG grammars, nondeterministically. --- Afterwards, the grammar has to be extended with coercion functions, --- from the module 'GF.Conversion.SimpleToMCFG.Coercions' --- --- the resulting grammars might be /very large/ --- --- the conversion is only equivalent if the GFC grammar has a context-free backbone. ------------------------------------------------------------------------------ - - -module GF.Conversion.SimpleToMCFG.Nondet - (convertGrammar) where - -import GF.System.Tracing -import GF.Infra.Print - -import Control.Monad - -import GF.Formalism.Utilities -import GF.Formalism.GCFG -import GF.Formalism.MCFG -import GF.Formalism.SimpleGFC -import GF.Conversion.Types - -import GF.Data.BacktrackM -import GF.Data.Utilities (notLongerThan, updateNthM) - ------------------------------------------------------------- --- type declarations - -type CnvMonad a = BacktrackM Env a - -type Env = (ECat, [ECat], LinRec, [SLinType]) -- variable bindings: [(Var, STerm)] -type LinRec = [Lin SCat MLabel Token] - - ----------------------------------------------------------------------- --- main conversion function - -maxNrRules :: Int -maxNrRules = 5000 - -convertGrammar :: SGrammar -> EGrammar -convertGrammar rules = traceCalcFirst rules' $ - tracePrt "SimpleToMCFG.Nondet - MCFG rules" (prt . length) $ - rules' - where rules' = rules >>= convertRule --- solutions conversion undefined --- where conversion = member rules >>= convertRule - -convertRule :: SRule -> [ERule] -- CnvMonad ERule -convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) = --- | prt(name2fun fun) `elem` --- words "UseCl PosTP TPast ASimul SPredV IndefOneNP DefOneNP UseN2 mother_N2 jump_V" = - if notLongerThan maxNrRules rules - then tracePrt ("SimpeToMCFG.Nondet - MCFG rules for " ++ prt fun) (prt . length) $ - rules - else trace2 "SimpeToMCFG.Nondet - TOO MANY RULES, function not converted" - ("More than " ++ show maxNrRules ++ " MCFG rules for " ++ prt fun) $ - [] - where rules = flip solutions undefined $ - do let cat : args = map decl2cat (decl : decls) - writeState (initialECat cat, map initialECat args, [], ctypes) - rterm <- simplifyTerm term - reduceTerm ctype emptyPath rterm - (newCat, newArgs, linRec, _) <- readState - let newLinRec = map (instantiateArgs newArgs) linRec - catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) - -- checkLinRec argsPaths catPaths newLinRec - return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) -convertRule _ = [] -- failure - - ----------------------------------------------------------------------- --- "type-checking" the resulting linearization --- should not be necessary, if the algorithms (type-checking and conversion) are correct - -checkLinRec args lbls = mapM (checkLin args lbls) - -checkLin args lbls (Lin lbl lin) - | lbl `elem` lbls = mapM (symbol (checkArg args) (const (return ()))) lin - | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" "Label mismatch" $ - failure - -checkArg args (_cat, lbl, nr) - | lbl `elem` (args !! nr) = return () --- | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" ("Label mismatch in arg " ++ prt nr) $ --- failure - | otherwise = trace2 ("SimpleToMCFG.Nondet - ERROR: Label mismatch in arg " ++ prt nr) - (prt lbl ++ " `notElem` " ++ prt (args!!nr)) $ - failure - - ----------------------------------------------------------------------- --- term simplification - -simplifyTerm :: STerm -> CnvMonad STerm -simplifyTerm (term :! sel) - = do sterm <- simplifyTerm term - ssel <- simplifyTerm sel - case sterm of - Tbl table -> do (pat, val) <- member table - pat =?= ssel - return val - _ -> do sel' <- expandTerm ssel - return (sterm +! sel') --- simplifyTerm (Var x) = readBinding x -simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms -simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record -simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term -simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table -simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms -simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2) -simplifyTerm term = return term - -simplifyAssign :: (Label, STerm) -> CnvMonad (Label, STerm) -simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term - -simplifyCase :: (STerm, STerm) -> CnvMonad (STerm, STerm) -simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term) - - ------------------------------------------------------------- --- reducing simplified terms, collecting MCF rules - -reduceTerm :: SLinType -> SPath -> STerm -> CnvMonad () ---reduceTerm ctype path (Variants terms) --- = member terms >>= reduceTerm ctype path -reduceTerm (StrT) path term = updateLin (path, term) -reduceTerm (ConT _) path term = do pat <- expandTerm term - updateHead (path, pat) -reduceTerm (RecT rtype) path term - = sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | (lbl, ctype) <- rtype ] -reduceTerm (TblT pats vtype) path table - = sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | pat <- pats ] - - ------------------------------------------------------------- --- expanding a term to ground terms - -expandTerm :: STerm -> CnvMonad STerm -expandTerm arg@(Arg nr _ path) - = do ctypes <- readArgCTypes - unifyPType arg $ lintypeFollowPath path $ ctypes !! nr --- expandTerm arg@(Arg nr _ path) --- = do ctypes <- readArgCTypes --- pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr --- pat =?= arg --- return pat -expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms -expandTerm (Rec record) = liftM Rec $ mapM expandAssign record ---expandTerm (Variants terms) = liftM Variants $ mapM expandTerm terms -expandTerm (Variants terms) = member terms >>= expandTerm -expandTerm term = error $ "expandTerm: " ++ prt term - -expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm) -expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term - -unifyPType :: STerm -> SLinType -> CnvMonad STerm -unifyPType arg (RecT prec) = - liftM Rec $ - sequence [ liftM ((,) lbl) $ - unifyPType (arg +. lbl) ptype | - (lbl, ptype) <- prec ] -unifyPType (Arg nr _ path) (ConT terms) = - do (_, args, _, _) <- readState - case lookup path (ecatConstraints (args !! nr)) of - Just term -> return term - Nothing -> do term <- member terms - updateArg nr (path, term) - return term - ------------------------------------------------------------- --- unification of patterns and selection terms - -(=?=) :: STerm -> STerm -> CnvMonad () --- Wildcard =?= _ = return () --- Var x =?= term = addBinding x term -Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | - (lbl, pat) <- precord ] -pat =?= Arg nr _ path = updateArg nr (path, pat) -(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms) - sequence_ $ zipWith (=?=) pats terms -Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm | - (lbl, pat) <- precord, - let mterm = lookup lbl record ] --- variants are not allowed in patterns, but in selection terms: -term =?= Variants terms = member terms >>= (term =?=) -pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term - ----------------------------------------------------------------------- --- variable bindings (does not work correctly) -{- -addBinding x term = do (a, b, c, d, bindings) <- readState - writeState (a, b, c, d, (x,term):bindings) - -readBinding x = do (_, _, _, _, bindings) <- readState - return $ maybe (Var x) id $ lookup x bindings --} - ------------------------------------------------------------- --- updating the MCF rule - -readArgCTypes :: CnvMonad [SLinType] -readArgCTypes = do (_, _, _, env) <- readState - return env - -updateArg :: Int -> Constraint -> CnvMonad () -updateArg arg cn - = do (head, args, lins, env) <- readState - args' <- updateNthM (addToECat cn) arg args - writeState (head, args', lins, env) - -updateHead :: Constraint -> CnvMonad () -updateHead cn - = do (head, args, lins, env) <- readState - head' <- addToECat cn head - writeState (head', args, lins, env) - -updateLin :: Constraint -> CnvMonad () -updateLin (path, term) - = do let newLins = term2lins term - (head, args, lins, env) <- readState - let lins' = lins ++ map (Lin path) newLins - writeState (head, args, lins', env) - -term2lins :: STerm -> [[Symbol (SCat, SPath, Int) Token]] -term2lins (Arg nr cat path) = return [Cat (cat, path, nr)] -term2lins (Token str) = return [Tok str] -term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2) -term2lins (Empty) = return [] -term2lins (Variants terms) = terms >>= term2lins -term2lins term = error $ "term2lins: " ++ show term - -addToECat :: Constraint -> ECat -> CnvMonad ECat -addToECat cn (ECat cat cns) = liftM (ECat cat) $ addConstraint cn cns - -addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] -addConstraint cn0 (cn : cns) - | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) - | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> - return (cn : cns) -addConstraint cn0 cns = return (cn0 : cns) - - - diff --git a/src/GF/Conversion/SimpleToMCFG/Strict.hs b/src/GF/Conversion/SimpleToMCFG/Strict.hs deleted file mode 100644 index a5519fcd8..000000000 --- a/src/GF/Conversion/SimpleToMCFG/Strict.hs +++ /dev/null @@ -1,129 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/09 09:28:44 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.5 $ --- --- Converting SimpleGFC grammars to MCFG grammars, deterministic. --- --- the resulting grammars might be /very large/ --- --- the conversion is only equivalent if the GFC grammar has a context-free backbone. ------------------------------------------------------------------------------ - - -module GF.Conversion.SimpleToMCFG.Strict - (convertGrammar) where - -import GF.System.Tracing -import GF.Infra.Print - -import Control.Monad - -import GF.Formalism.Utilities -import GF.Formalism.GCFG -import GF.Formalism.MCFG -import GF.Formalism.SimpleGFC -import GF.Conversion.Types - -import GF.Data.BacktrackM -import GF.Data.SortedList - ----------------------------------------------------------------------- --- main conversion function - -type CnvMonad a = BacktrackM () a - -convertGrammar :: SGrammar -> EGrammar -convertGrammar rules = tracePrt "SimpleToMCFG.Strict - MCFG rules" (prt . length) $ - solutions conversion undefined - where conversion = member rules >>= convertRule - -convertRule :: SRule -> CnvMonad ERule -convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) - = do let cat : args = map decl2cat (decl : decls) - args_ctypes = zip3 [0..] args ctypes - instArgs <- mapM enumerateArg args_ctypes - let instTerm = substitutePaths instArgs term - newCat <- extractECat cat ctype instTerm - newArgs <- mapM (extractArg instArgs) args_ctypes - let linRec = strPaths ctype instTerm >>= extractLin newArgs - let newLinRec = map (instantiateArgs newArgs) linRec - catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) - return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) -convertRule _ = failure - ----------------------------------------------------------------------- --- category extraction - -extractArg :: [STerm] -> (Int, SCat, SLinType) -> CnvMonad ECat -extractArg args (nr, cat, ctype) = extractECat cat ctype (args !! nr) - -extractECat :: SCat -> SLinType -> STerm -> CnvMonad ECat -extractECat cat ctype term = member $ map (ECat cat) $ parPaths ctype term - -enumerateArg :: (Int, SCat, SLinType) -> CnvMonad STerm -enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype - ----------------------------------------------------------------------- --- Substitute each instantiated parameter path for its instantiation - -substitutePaths :: [STerm] -> STerm -> STerm -substitutePaths arguments = subst - where subst (Arg nr _ path) = termFollowPath path (arguments !! nr) - subst (con :^ terms) = con :^ map subst terms - subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ] - subst (term :. lbl) = subst term +. lbl - subst (Tbl table) = Tbl [ (pat, subst term) | - (pat, term) <- table ] - subst (term :! select) = subst term +! subst select - subst (term :++ term') = subst term ?++ subst term' - subst (Variants terms) = Variants $ map subst terms - subst term = term - ----------------------------------------------------------------------- --- term paths extaction - -termPaths :: SLinType -> STerm -> [(SPath, (SLinType, STerm))] -termPaths ctype (Variants terms) = terms >>= termPaths ctype -termPaths (RecT rtype) (Rec record) - = [ (path ++. lbl, value) | - (lbl, term) <- record, - let Just ctype = lookup lbl rtype, - (path, value) <- termPaths ctype term ] -termPaths (TblT _ ctype) (Tbl table) - = [ (path ++! pat, value) | - (pat, term) <- table, - (path, value) <- termPaths ctype term ] -termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ] - -{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): -{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} -[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] --} - -parPaths :: SLinType -> STerm -> [[(SPath, STerm)]] -parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $ - nubsort [ (path, value) | - (path, (ConT _, value)) <- termPaths ctype term ] - -strPaths :: SLinType -> STerm -> [(SPath, STerm)] -strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ] - where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ] - ----------------------------------------------------------------------- --- linearization extraction - -extractLin :: [ECat] -> (SPath, STerm) -> [Lin ECat MLabel Token] -extractLin args (path, term) = map (Lin path) (convertLin term) - where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2) - convertLin (Empty) = [[]] - convertLin (Token tok) = [[Tok tok]] - convertLin (Variants terms) = concatMap convertLin terms - convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]] - convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path) - |
