summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Conversion/SimpleToMCFG
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Conversion/SimpleToMCFG
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Conversion/SimpleToMCFG')
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs63
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs256
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs129
3 files changed, 448 insertions, 0 deletions
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs
new file mode 100644
index 000000000..319b99dcb
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs
@@ -0,0 +1,63 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:44 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Adding coercion functions to a MCFG if necessary.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToMCFG.Coercions
+ (addCoercions) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Conversion.Types
+import GF.Data.SortedList
+import Data.List (groupBy)
+
+----------------------------------------------------------------------
+
+addCoercions :: EGrammar -> EGrammar
+addCoercions rules = coercions ++ rules
+ where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) |
+ Rule (Abs head args _) (Cnc lbls _ _) <- rules ]
+ allHeadSet = nubsort allHeads
+ allArgSet = union allArgs <\\> map fst allHeadSet
+ coercions = tracePrt "SimpleToMCFG.Coercions - MCFG coercions" (prt . length) $
+ concat $
+ tracePrt "SimpleToMCFG.Coercions - MCFG coercions per category"
+ (prtList . map length) $
+ combineCoercions
+ (groupBy sameECatFst allHeadSet)
+ (groupBy sameECat allArgSet)
+ sameECatFst a b = sameECat (fst a) (fst b)
+
+
+combineCoercions [] _ = []
+combineCoercions _ [] = []
+combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs)
+ = case compare (ecat2scat $ fst $ head heads) (ecat2scat $ head args) of
+ LT -> combineCoercions allHeads allArgs'
+ GT -> combineCoercions allHeads' allArgs
+ EQ -> makeCoercion heads args : combineCoercions allHeads allArgs
+
+
+makeCoercion heads args
+ = [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) |
+ (head@(ECat _ headCns), lbls) <- heads,
+ let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ],
+ arg@(ECat _ argCns) <- args,
+ argCns `subset` headCns ]
+
+
+
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs
new file mode 100644
index 000000000..d6ff052f5
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs
@@ -0,0 +1,256 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/17 08:27:29 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.7 $
+--
+-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically.
+-- Afterwards, the grammar has to be extended with coercion functions,
+-- from the module 'GF.Conversion.SimpleToMCFG.Coercions'
+--
+-- the resulting grammars might be /very large/
+--
+-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToMCFG.Nondet
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.SimpleGFC
+import GF.Conversion.Types
+
+import GF.Data.BacktrackM
+import GF.Data.Utilities (notLongerThan, updateNthM)
+
+------------------------------------------------------------
+-- type declarations
+
+type CnvMonad a = BacktrackM Env a
+
+type Env = (ECat, [ECat], LinRec, [SLinType]) -- variable bindings: [(Var, STerm)]
+type LinRec = [Lin SCat MLabel Token]
+
+
+----------------------------------------------------------------------
+-- main conversion function
+
+maxNrRules :: Int
+maxNrRules = 5000
+
+convertGrammar :: SGrammar -> EGrammar
+convertGrammar rules = traceCalcFirst rules' $
+ tracePrt "SimpleToMCFG.Nondet - MCFG rules" (prt . length) $
+ rules'
+ where rules' = rules >>= convertRule
+-- solutions conversion undefined
+-- where conversion = member rules >>= convertRule
+
+convertRule :: SRule -> [ERule] -- CnvMonad ERule
+convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) =
+-- | prt(name2fun fun) `elem`
+-- words "UseCl PosTP TPast ASimul SPredV IndefOneNP DefOneNP UseN2 mother_N2 jump_V" =
+ if notLongerThan maxNrRules rules
+ then tracePrt ("SimpeToMCFG.Nondet - MCFG rules for " ++ prt fun) (prt . length) $
+ rules
+ else trace2 "SimpeToMCFG.Nondet - TOO MANY RULES, function not converted"
+ ("More than " ++ show maxNrRules ++ " MCFG rules for " ++ prt fun) $
+ []
+ where rules = flip solutions undefined $
+ do let cat : args = map decl2cat (decl : decls)
+ writeState (initialECat cat, map initialECat args, [], ctypes)
+ rterm <- simplifyTerm term
+ reduceTerm ctype emptyPath rterm
+ (newCat, newArgs, linRec, _) <- readState
+ let newLinRec = map (instantiateArgs newArgs) linRec
+ catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes)
+ -- checkLinRec argsPaths catPaths newLinRec
+ return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec)
+convertRule _ = [] -- failure
+
+
+----------------------------------------------------------------------
+-- "type-checking" the resulting linearization
+-- should not be necessary, if the algorithms (type-checking and conversion) are correct
+
+checkLinRec args lbls = mapM (checkLin args lbls)
+
+checkLin args lbls (Lin lbl lin)
+ | lbl `elem` lbls = mapM (symbol (checkArg args) (const (return ()))) lin
+ | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" "Label mismatch" $
+ failure
+
+checkArg args (_cat, lbl, nr)
+ | lbl `elem` (args !! nr) = return ()
+-- | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" ("Label mismatch in arg " ++ prt nr) $
+-- failure
+ | otherwise = trace2 ("SimpleToMCFG.Nondet - ERROR: Label mismatch in arg " ++ prt nr)
+ (prt lbl ++ " `notElem` " ++ prt (args!!nr)) $
+ failure
+
+
+----------------------------------------------------------------------
+-- term simplification
+
+simplifyTerm :: STerm -> CnvMonad STerm
+simplifyTerm (term :! sel)
+ = do sterm <- simplifyTerm term
+ ssel <- simplifyTerm sel
+ case sterm of
+ Tbl table -> do (pat, val) <- member table
+ pat =?= ssel
+ return val
+ _ -> do sel' <- expandTerm ssel
+ return (sterm +! sel')
+-- simplifyTerm (Var x) = readBinding x
+simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms
+simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record
+simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term
+simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table
+simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms
+simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2)
+simplifyTerm term = return term
+
+simplifyAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
+simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term
+
+simplifyCase :: (STerm, STerm) -> CnvMonad (STerm, STerm)
+simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term)
+
+
+------------------------------------------------------------
+-- reducing simplified terms, collecting MCF rules
+
+reduceTerm :: SLinType -> SPath -> STerm -> CnvMonad ()
+--reduceTerm ctype path (Variants terms)
+-- = member terms >>= reduceTerm ctype path
+reduceTerm (StrT) path term = updateLin (path, term)
+reduceTerm (ConT _) path term = do pat <- expandTerm term
+ updateHead (path, pat)
+reduceTerm (RecT rtype) path term
+ = sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | (lbl, ctype) <- rtype ]
+reduceTerm (TblT pats vtype) path table
+ = sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | pat <- pats ]
+
+
+------------------------------------------------------------
+-- expanding a term to ground terms
+
+expandTerm :: STerm -> CnvMonad STerm
+expandTerm arg@(Arg nr _ path)
+ = do ctypes <- readArgCTypes
+ unifyPType arg $ lintypeFollowPath path $ ctypes !! nr
+-- expandTerm arg@(Arg nr _ path)
+-- = do ctypes <- readArgCTypes
+-- pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr
+-- pat =?= arg
+-- return pat
+expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms
+expandTerm (Rec record) = liftM Rec $ mapM expandAssign record
+--expandTerm (Variants terms) = liftM Variants $ mapM expandTerm terms
+expandTerm (Variants terms) = member terms >>= expandTerm
+expandTerm term = error $ "expandTerm: " ++ prt term
+
+expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
+expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term
+
+unifyPType :: STerm -> SLinType -> CnvMonad STerm
+unifyPType arg (RecT prec) =
+ liftM Rec $
+ sequence [ liftM ((,) lbl) $
+ unifyPType (arg +. lbl) ptype |
+ (lbl, ptype) <- prec ]
+unifyPType (Arg nr _ path) (ConT terms) =
+ do (_, args, _, _) <- readState
+ case lookup path (ecatConstraints (args !! nr)) of
+ Just term -> return term
+ Nothing -> do term <- member terms
+ updateArg nr (path, term)
+ return term
+
+------------------------------------------------------------
+-- unification of patterns and selection terms
+
+(=?=) :: STerm -> STerm -> CnvMonad ()
+-- Wildcard =?= _ = return ()
+-- Var x =?= term = addBinding x term
+Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) |
+ (lbl, pat) <- precord ]
+pat =?= Arg nr _ path = updateArg nr (path, pat)
+(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms)
+ sequence_ $ zipWith (=?=) pats terms
+Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm |
+ (lbl, pat) <- precord,
+ let mterm = lookup lbl record ]
+-- variants are not allowed in patterns, but in selection terms:
+term =?= Variants terms = member terms >>= (term =?=)
+pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term
+
+----------------------------------------------------------------------
+-- variable bindings (does not work correctly)
+{-
+addBinding x term = do (a, b, c, d, bindings) <- readState
+ writeState (a, b, c, d, (x,term):bindings)
+
+readBinding x = do (_, _, _, _, bindings) <- readState
+ return $ maybe (Var x) id $ lookup x bindings
+-}
+
+------------------------------------------------------------
+-- updating the MCF rule
+
+readArgCTypes :: CnvMonad [SLinType]
+readArgCTypes = do (_, _, _, env) <- readState
+ return env
+
+updateArg :: Int -> Constraint -> CnvMonad ()
+updateArg arg cn
+ = do (head, args, lins, env) <- readState
+ args' <- updateNthM (addToECat cn) arg args
+ writeState (head, args', lins, env)
+
+updateHead :: Constraint -> CnvMonad ()
+updateHead cn
+ = do (head, args, lins, env) <- readState
+ head' <- addToECat cn head
+ writeState (head', args, lins, env)
+
+updateLin :: Constraint -> CnvMonad ()
+updateLin (path, term)
+ = do let newLins = term2lins term
+ (head, args, lins, env) <- readState
+ let lins' = lins ++ map (Lin path) newLins
+ writeState (head, args, lins', env)
+
+term2lins :: STerm -> [[Symbol (SCat, SPath, Int) Token]]
+term2lins (Arg nr cat path) = return [Cat (cat, path, nr)]
+term2lins (Token str) = return [Tok str]
+term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2)
+term2lins (Empty) = return []
+term2lins (Variants terms) = terms >>= term2lins
+term2lins term = error $ "term2lins: " ++ show term
+
+addToECat :: Constraint -> ECat -> CnvMonad ECat
+addToECat cn (ECat cat cns) = liftM (ECat cat) $ addConstraint cn cns
+
+addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint]
+addConstraint cn0 (cn : cns)
+ | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns)
+ | fst cn0 == fst cn = guard (snd cn0 == snd cn) >>
+ return (cn : cns)
+addConstraint cn0 cns = return (cn0 : cns)
+
+
+
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs
new file mode 100644
index 000000000..a5519fcd8
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs
@@ -0,0 +1,129 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:44 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Converting SimpleGFC grammars to MCFG grammars, deterministic.
+--
+-- the resulting grammars might be /very large/
+--
+-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToMCFG.Strict
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.SimpleGFC
+import GF.Conversion.Types
+
+import GF.Data.BacktrackM
+import GF.Data.SortedList
+
+----------------------------------------------------------------------
+-- main conversion function
+
+type CnvMonad a = BacktrackM () a
+
+convertGrammar :: SGrammar -> EGrammar
+convertGrammar rules = tracePrt "SimpleToMCFG.Strict - MCFG rules" (prt . length) $
+ solutions conversion undefined
+ where conversion = member rules >>= convertRule
+
+convertRule :: SRule -> CnvMonad ERule
+convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term)))
+ = do let cat : args = map decl2cat (decl : decls)
+ args_ctypes = zip3 [0..] args ctypes
+ instArgs <- mapM enumerateArg args_ctypes
+ let instTerm = substitutePaths instArgs term
+ newCat <- extractECat cat ctype instTerm
+ newArgs <- mapM (extractArg instArgs) args_ctypes
+ let linRec = strPaths ctype instTerm >>= extractLin newArgs
+ let newLinRec = map (instantiateArgs newArgs) linRec
+ catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes)
+ return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec)
+convertRule _ = failure
+
+----------------------------------------------------------------------
+-- category extraction
+
+extractArg :: [STerm] -> (Int, SCat, SLinType) -> CnvMonad ECat
+extractArg args (nr, cat, ctype) = extractECat cat ctype (args !! nr)
+
+extractECat :: SCat -> SLinType -> STerm -> CnvMonad ECat
+extractECat cat ctype term = member $ map (ECat cat) $ parPaths ctype term
+
+enumerateArg :: (Int, SCat, SLinType) -> CnvMonad STerm
+enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype
+
+----------------------------------------------------------------------
+-- Substitute each instantiated parameter path for its instantiation
+
+substitutePaths :: [STerm] -> STerm -> STerm
+substitutePaths arguments = subst
+ where subst (Arg nr _ path) = termFollowPath path (arguments !! nr)
+ subst (con :^ terms) = con :^ map subst terms
+ subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ]
+ subst (term :. lbl) = subst term +. lbl
+ subst (Tbl table) = Tbl [ (pat, subst term) |
+ (pat, term) <- table ]
+ subst (term :! select) = subst term +! subst select
+ subst (term :++ term') = subst term ?++ subst term'
+ subst (Variants terms) = Variants $ map subst terms
+ subst term = term
+
+----------------------------------------------------------------------
+-- term paths extaction
+
+termPaths :: SLinType -> STerm -> [(SPath, (SLinType, STerm))]
+termPaths ctype (Variants terms) = terms >>= termPaths ctype
+termPaths (RecT rtype) (Rec record)
+ = [ (path ++. lbl, value) |
+ (lbl, term) <- record,
+ let Just ctype = lookup lbl rtype,
+ (path, value) <- termPaths ctype term ]
+termPaths (TblT _ ctype) (Tbl table)
+ = [ (path ++! pat, value) |
+ (pat, term) <- table,
+ (path, value) <- termPaths ctype term ]
+termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ]
+
+{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt):
+{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2}
+[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2]
+-}
+
+parPaths :: SLinType -> STerm -> [[(SPath, STerm)]]
+parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $
+ nubsort [ (path, value) |
+ (path, (ConT _, value)) <- termPaths ctype term ]
+
+strPaths :: SLinType -> STerm -> [(SPath, STerm)]
+strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ]
+ where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ]
+
+----------------------------------------------------------------------
+-- linearization extraction
+
+extractLin :: [ECat] -> (SPath, STerm) -> [Lin ECat MLabel Token]
+extractLin args (path, term) = map (Lin path) (convertLin term)
+ where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2)
+ convertLin (Empty) = [[]]
+ convertLin (Token tok) = [[Tok tok]]
+ convertLin (Variants terms) = concatMap convertLin terms
+ convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]]
+ convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path)
+