diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Conversion | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Conversion')
| -rw-r--r-- | src-3.0/GF/Conversion/GFC.hs | 157 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/GFCtoSimple.hs | 175 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/Haskell.hs | 71 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/MCFGtoCFG.hs | 53 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/MCFGtoFCFG.hs | 51 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/Prolog.hs | 205 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/RemoveEpsilon.hs | 46 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/RemoveErasing.hs | 113 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/RemoveSingletons.hs | 82 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/SimpleToFCFG.hs | 536 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/SimpleToFinite.hs | 178 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/SimpleToMCFG.hs | 26 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs | 63 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs | 256 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs | 129 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/TypeGraph.hs | 58 | ||||
| -rw-r--r-- | src-3.0/GF/Conversion/Types.hs | 146 |
17 files changed, 2345 insertions, 0 deletions
diff --git a/src-3.0/GF/Conversion/GFC.hs b/src-3.0/GF/Conversion/GFC.hs new file mode 100644 index 000000000..354bdea65 --- /dev/null +++ b/src-3.0/GF/Conversion/GFC.hs @@ -0,0 +1,157 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/01 09:53:18 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.14 $ +-- +-- All conversions from GFC +----------------------------------------------------------------------------- + +module GF.Conversion.GFC + (module GF.Conversion.GFC, + SGrammar, EGrammar, MGrammar, CGrammar) where + +import GF.Infra.Option +import GF.Canon.GFC (CanonGrammar) +import GF.Infra.Ident (Ident, identC) +import qualified GF.Infra.Modules as M + +import GF.Formalism.GCFG (Rule(..), Abstract(..)) +import GF.Formalism.SimpleGFC (decl2cat) +import GF.Formalism.CFG (CFRule(..)) +import GF.Formalism.Utilities (symbol, name2fun) +import GF.Conversion.Types + +import qualified GF.Conversion.GFCtoSimple as G2S +import qualified GF.Conversion.SimpleToFinite as S2Fin +import qualified GF.Conversion.RemoveSingletons as RemSing +import qualified GF.Conversion.RemoveErasing as RemEra +import qualified GF.Conversion.RemoveEpsilon as RemEps +import qualified GF.Conversion.SimpleToMCFG as S2M +import qualified GF.Conversion.MCFGtoCFG as M2C + +import GF.Infra.Print + +import GF.System.Tracing + +---------------------------------------------------------------------- +-- * GFC -> MCFG & CFG, using options to decide which conversion is used + +convertGFC :: Options -> (CanonGrammar, Ident) + -> (SGrammar, (EGrammar, (MGrammar, CGrammar))) +convertGFC opts = \g -> let s = g2s g + e = s2e s + m = e2m e + in trace2 "Options" (show opts) (s, (e, (m, e2c e))) + where e2c = M2C.convertGrammar + e2m = case getOptVal opts firstCat of + Just cat -> flip erasing [identC cat] + Nothing -> flip erasing [] + s2e = case getOptVal opts gfcConversion of + Just "strict" -> strict + Just "finite-strict" -> strict + Just "epsilon" -> epsilon . nondet + _ -> nondet + g2s = case getOptVal opts gfcConversion of + Just "finite" -> finite . simple + Just "finite2" -> finite . finite . simple + Just "finite3" -> finite . finite . finite . simple + Just "singletons" -> single . simple + Just "finite-singletons" -> single . finite . simple + Just "finite-strict" -> finite . simple + _ -> simple + + simple = G2S.convertGrammar + strict = S2M.convertGrammarStrict + nondet = S2M.convertGrammarNondet + epsilon = RemEps.convertGrammar + finite = S2Fin.convertGrammar + single = RemSing.convertGrammar + erasing = RemEra.convertGrammar + +gfc2simple :: Options -> (CanonGrammar, Ident) -> SGrammar +gfc2simple opts = fst . convertGFC opts + +gfc2mcfg :: Options -> (CanonGrammar, Ident) -> MGrammar +gfc2mcfg opts g = mcfg + where + (mcfg, _) = snd (snd (convertGFC opts g)) + +gfc2cfg :: Options -> (CanonGrammar, Ident) -> CGrammar +gfc2cfg opts g = cfg + where + (_, cfg) = snd (snd (convertGFC opts g)) + + +---------------------------------------------------------------------- +-- * single step conversions + +{- +gfc2simple :: (CanonGrammar, Ident) -> SGrammar +gfc2simple = G2S.convertGrammar + +simple2finite :: SGrammar -> SGrammar +simple2finite = S2Fin.convertGrammar + +removeSingletons :: SGrammar -> SGrammar +removeSingletons = RemSing.convertGrammar + +simple2mcfg_nondet :: SGrammar -> EGrammar +simple2mcfg_nondet = + +simple2mcfg_strict :: SGrammar -> EGrammar +simple2mcfg_strict = S2M.convertGrammarStrict + +mcfg2cfg :: EGrammar -> CGrammar +mcfg2cfg = M2C.convertGrammar + +removeErasing :: EGrammar -> [SCat] -> MGrammar +removeErasing = RemEra.convertGrammar + +removeEpsilon :: EGrammar -> EGrammar +removeEpsilon = RemEps.convertGrammar +-} + +---------------------------------------------------------------------- +-- * converting to some obscure formats + +gfc2abstract :: (CanonGrammar, Ident) -> [Abstract SCat Fun] +gfc2abstract gr = [ Abs (decl2cat decl) (map decl2cat decls) (name2fun name) | + Rule (Abs decl decls name) _ <- G2S.convertGrammar gr ] + +abstract2skvatt :: [Abstract SCat Fun] -> String +abstract2skvatt gr = skvatt_hdr ++ concatMap abs2pl gr + where abs2pl (Abs cat [] fun) = prtQuoted cat ++ " ---> " ++ + "\"" ++ prt fun ++ "\".\n" + abs2pl (Abs cat cats fun) = + prtQuoted cat ++ " ---> " ++ + "\"(" ++ prt fun ++ "\"" ++ + prtBefore ", \" \", " (map prtQuoted cats) ++ ", \")\".\n" + +cfg2skvatt :: CGrammar -> String +cfg2skvatt gr = skvatt_hdr ++ concatMap cfg2pl gr + where cfg2pl (CFRule cat syms _name) = + prtQuoted cat ++ " ---> " ++ + if null syms then "\"\".\n" else + prtSep ", " (map (symbol prtQuoted prTok) syms) ++ ".\n" + prTok tok = "\"" ++ tok ++ " \"" + +skvatt_hdr = ":- use_module(library(skvatt)).\n" ++ + ":- use_module(library(utils), [repeat/1]).\n" ++ + "corpus(File, StartCat, Depth, Size) :- \n" ++ + " set_flag(gendepth, Depth),\n" ++ + " tell(File), repeat(Size),\n" ++ + " generate_words(StartCat, String), format('~s~n~n', [String]),\n" ++ + " write(user_error, '.'),\n" ++ + " fail ; told.\n\n" + +prtQuoted :: Print a => a -> String +prtQuoted a = "'" ++ prt a ++ "'" + + + + diff --git a/src-3.0/GF/Conversion/GFCtoSimple.hs b/src-3.0/GF/Conversion/GFCtoSimple.hs new file mode 100644 index 000000000..b6a34a8ce --- /dev/null +++ b/src-3.0/GF/Conversion/GFCtoSimple.hs @@ -0,0 +1,175 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/07 11:24:51 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.15 $ +-- +-- Converting GFC to SimpleGFC +-- +-- the conversion might fail if the GFC grammar has dependent or higher-order types, +-- or if the grammar contains bound pattern variables +-- (use -optimize=values/share/none when importing) +-- +-- TODO: lift all functions to the 'Err' monad +----------------------------------------------------------------------------- + +module GF.Conversion.GFCtoSimple + (convertGrammar) where + +import qualified GF.Canon.AbsGFC as A +import qualified GF.Infra.Ident as I +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.Utilities +import GF.Conversion.Types + +import GF.UseGrammar.Linear (expandLinTables) +import GF.Canon.GFC (CanonGrammar) +import GF.Canon.MkGFC (grammar2canon) +import GF.Canon.Subexpressions (unSubelimCanon) +import qualified GF.Canon.Look as Look (lookupLin, allParamValues, lookupLincat) +import qualified GF.Canon.CMacros as CMacros (defLinType) +import GF.Data.Operations (err, errVal) +--import qualified Modules as M + +import GF.System.Tracing +import GF.Infra.Print + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, I.Ident) + +convertGrammar :: Env -> SGrammar +convertGrammar (g,i) = trace2 "GFCtoSimple - concrete language" (prt (snd gram)) $ + tracePrt "GFCtoSimple - simpleGFC rules" (prt . length) $ + [ convertAbsFun gram fun typing | + A.Mod (A.MTAbs modname) _ _ _ defs <- modules, + A.AbsDFun fun typing _ <- defs ] + where A.Gr modules = grammar2canon (fst gram) + gram = (unSubelimCanon g,i) + +convertAbsFun :: Env -> I.Ident -> A.Exp -> SRule +convertAbsFun gram fun typing = -- trace2 "GFCtoSimple - converting function" (prt fun) $ + Rule abs cnc + where abs = convertAbstract [] fun typing + cnc = convertConcrete gram abs + +---------------------------------------------------------------------- +-- abstract definitions + +convertAbstract :: [SDecl] -> Fun -> A.Exp -> Abstract SDecl Name +convertAbstract env fun (A.EProd x a b) + = convertAbstract (convertAbsType x' [] a : env) fun b + where x' = if x==I.identC "h_" then anyVar else x +convertAbstract env fun a + = Abs (convertAbsType anyVar [] a) (reverse env) name + where name = Name fun [ Unify [n] | n <- [0 .. length env-1] ] + +convertAbsType :: Var -> [FOType SCat] -> A.Exp -> SDecl +convertAbsType x args (A.EProd _ a b) = convertAbsType x (convertType [] a : args) b +convertAbsType x args a = Decl x (reverse args ::--> convertType [] a) + +convertType :: [TTerm] -> A.Exp -> FOType SCat +convertType args (A.EApp a b) = convertType (convertExp [] b : args) a +convertType args (A.EAtom at) = convertCat at ::@ reverse args +convertType args (A.EProd _ _ b) = convertType args b ---- AR 7/10 workaround +convertType args exp = error $ "GFCtoSimple.convertType: " ++ prt exp + +{- Exp from GF/Canon/GFC.cf: +EApp. Exp1 ::= Exp1 Exp2 ; +EProd. Exp ::= "(" Ident ":" Exp ")" "->" Exp ; +EAbs. Exp ::= "\\" Ident "->" Exp ; +EAtom. Exp2 ::= Atom ; +EData. Exp2 ::= "data" ; +-} + +convertExp :: [TTerm] -> A.Exp -> TTerm +convertExp args (A.EAtom at) = convertAtom args at +convertExp args (A.EApp a b) = convertExp (convertExp [] b : args) a +convertExp args exp = error $ "GFCtoSimple.convertExp: " ++ prt exp + +convertAtom :: [TTerm] -> A.Atom -> TTerm +convertAtom args (A.AC con) = con :@ reverse args +-- A.AD: is this correct??? +convertAtom args (A.AD con) = con :@ args +convertAtom [] (A.AV var) = TVar var +convertAtom args atom = error $ "GFCtoSimple.convertAtom: " ++ prt args ++ " " ++ show atom + +convertCat :: A.Atom -> SCat +convertCat (A.AC (A.CIQ _ cat)) = cat +convertCat atom = error $ "GFCtoSimple.convertCat: " ++ show atom + +---------------------------------------------------------------------- +-- concrete definitions + +convertConcrete :: Env -> Abstract SDecl Name -> Concrete SLinType (Maybe STerm) +convertConcrete gram (Abs decl args name) = Cnc ltyp largs term + where term = fmap (convertTerm gram . expandTerm gram) $ lookupLin gram $ name2fun name + ltyp : largs = map (convertCType gram . lookupCType gram) (decl : args) + +expandTerm :: Env -> A.Term -> A.Term +expandTerm gram term = -- tracePrt "expanded term" prt $ + err error id $ expandLinTables (fst gram) $ + -- tracePrt "initial term" prt $ + term + +convertCType :: Env -> A.CType -> SLinType +convertCType gram (A.RecType rec) = RecT [ (lbl, convertCType gram ctype) | A.Lbg lbl ctype <- rec ] +convertCType gram (A.Table pt vt) = TblT (enumerateTerms Nothing (convertCType gram pt)) (convertCType gram vt) +convertCType gram ct@(A.Cn con) = ConT $ map (convertTerm gram) $ groundTerms gram ct +convertCType gram (A.TStr) = StrT +convertCType gram (A.TInts n) = error "GFCtoSimple.convertCType: cannot handle 'TInts' constructor" + +convertTerm :: Env -> A.Term -> STerm +convertTerm gram (A.Arg arg) = convertArgVar arg +convertTerm gram (A.Par con terms) = con :^ map (convertTerm gram) terms +-- convertTerm gram (A.LI var) = Var var +convertTerm gram (A.R rec) = Rec [ (lbl, convertTerm gram term) | A.Ass lbl term <- rec ] +convertTerm gram (A.P term lbl) = convertTerm gram term +. lbl +convertTerm gram (A.V ctype terms) = Tbl [ (convertTerm gram pat, convertTerm gram term) | + (pat, term) <- zip (groundTerms gram ctype) terms ] +convertTerm gram (A.T ctype tbl) = Tbl [ (convertPatt pat, convertTerm gram term) | + A.Cas pats term <- tbl, pat <- pats ] +convertTerm gram (A.S term sel) = convertTerm gram term :! convertTerm gram sel +convertTerm gram (A.C term1 term2) = convertTerm gram term1 ?++ convertTerm gram term2 +convertTerm gram (A.FV terms) = variants (map (convertTerm gram) terms) +convertTerm gram (A.E) = Empty +convertTerm gram (A.K (A.KS tok)) = Token tok +-- 'pre' tokens are converted to variants (over-generating): +convertTerm gram (A.K (A.KP strs vars)) + = variants $ map conc $ strs : [ vs | A.Var vs _ <- vars ] + where conc [] = Empty + conc ts = foldr1 (?++) $ map Token ts +convertTerm gram (A.I con) = error "GFCtoSimple.convertTerm: cannot handle 'I' constructor" +convertTerm gram (A.EInt int) = error "GFCtoSimple.convertTerm: cannot handle 'EInt' constructor" + +convertArgVar :: A.ArgVar -> STerm +convertArgVar (A.A cat nr) = Arg (fromInteger nr) cat emptyPath +convertArgVar (A.AB cat bindings nr) = Arg (fromInteger nr) cat emptyPath + +convertPatt (A.PC con pats) = con :^ map convertPatt pats +-- convertPatt (A.PV x) = Var x +-- convertPatt (A.PW) = Wildcard +convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ] +convertPatt (A.PI n) = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor" +convertPatt p = error $ "GFCtoSimple.convertPatt: cannot handle " ++ show p + +---------------------------------------------------------------------- + +lookupLin :: Env -> Fun -> Maybe A.Term +lookupLin gram fun = err fail Just $ + Look.lookupLin (fst gram) (A.CIQ (snd gram) fun) + +lookupCType :: Env -> SDecl -> A.CType +lookupCType env decl + = errVal CMacros.defLinType $ + Look.lookupLincat (fst env) (A.CIQ (snd env) (decl2cat decl)) + +groundTerms :: Env -> A.CType -> [A.Term] +groundTerms gram ctype = err error id $ + Look.allParamValues (fst gram) ctype + diff --git a/src-3.0/GF/Conversion/Haskell.hs b/src-3.0/GF/Conversion/Haskell.hs new file mode 100644 index 000000000..abe651e1e --- /dev/null +++ b/src-3.0/GF/Conversion/Haskell.hs @@ -0,0 +1,71 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/11 14:11:46 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting/Printing different grammar formalisms in Haskell-readable format +----------------------------------------------------------------------------- + + +module GF.Conversion.Haskell where + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.MCFG +import GF.Formalism.CFG +import GF.Formalism.Utilities +import GF.Conversion.Types +import GF.Data.Operations ((++++), (+++++)) +import GF.Infra.Print + +import Data.List (intersperse) + +-- | SimpleGFC to Haskell +prtSGrammar :: SGrammar -> String +prtSGrammar rules = "-- Simple GFC grammar as a Haskell file" ++++ + "-- Autogenerated from the Grammatical Framework" +++++ + "import GF.Formalism.GCFG" ++++ + "import GF.Formalism.SimpleGFC" ++++ + "import GF.Formalism.Utilities" ++++ + "import GF.Canon.AbsGFC (CIdent(..), Label(..))" ++++ + "import GF.Infra.Ident (Ident(..))" +++++ + "grammar :: SimpleGrammar Ident (NameProfile Ident) String" ++++ + "grammar = \n\t[ " ++ + concat (intersperse "\n\t, " (map show rules)) ++ "\n\t]\n\n" + +-- | MCFG to Haskell +prtMGrammar :: MGrammar -> String +prtMGrammar rules = "-- Multiple context-free grammar as a Haskell file" ++++ + "-- Autogenerated from the Grammatical Framework" +++++ + "import GF.Formalism.GCFG" ++++ + "import GF.Formalism.MCFG" ++++ + "import GF.Formalism.Utilities" +++++ + "grammar :: MCFGrammar String (NameProfile String) String String" ++++ + "grammar = \n\t[ " ++ + concat (intersperse "\n\t, " (map prtMRule rules)) ++ "\n\t]\n\n" + where prtMRule (Rule (Abs cat cats (Name fun profiles)) (Cnc lcat lcats lins)) + = show (Rule (Abs (prt cat) (map prt cats) (Name (prt fun) (map cnvProfile profiles))) + (Cnc (map prt lcat) (map (map prt) lcats) (map cnvLin lins))) + cnvLin (Lin lbl syms) = Lin (prt lbl) (map (mapSymbol prtMArg id) syms) + prtMArg (cat, lbl, nr) = (prt cat, prt lbl, nr) + +-- | CFG to Haskell +prtCGrammar :: CGrammar -> String +prtCGrammar rules = "-- Context-free grammar as a Haskell file" ++++ + "-- autogenerated from the Grammatical Framework" +++++ + "import GF.Formalism.CFG" ++++ + "import GF.Formalism.Utilities" ++++ + "\ngrammar :: CFGrammar String (NameProfile String) String" ++++ + "grammar = \n\t[ " ++ + concat (intersperse "\n\t, " (map prtCRule rules)) ++ "\n\t]\n\n" + where prtCRule (CFRule cat syms (Name fun profiles)) + = show (CFRule (prt cat) (map (mapSymbol prt id) syms) + (Name (prt fun) (map cnvProfile profiles))) + +cnvProfile (Unify args) = Unify args +cnvProfile (Constant forest) = Constant (fmap prt forest) diff --git a/src-3.0/GF/Conversion/MCFGtoCFG.hs b/src-3.0/GF/Conversion/MCFGtoCFG.hs new file mode 100644 index 000000000..a58c31d37 --- /dev/null +++ b/src-3.0/GF/Conversion/MCFGtoCFG.hs @@ -0,0 +1,53 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Converting MCFG grammars to (possibly overgenerating) CFG +----------------------------------------------------------------------------- + + +module GF.Conversion.MCFGtoCFG + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Control.Monad +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.CFG +import GF.Conversion.Types + +---------------------------------------------------------------------- +-- * converting (possibly erasing) MCFG grammars + +convertGrammar :: EGrammar -> CGrammar +convertGrammar gram = tracePrt "MCFGtoCFG - context-free rules" (prt.length) $ + concatMap convertRule gram + +convertRule :: ERule -> [CRule] +convertRule (Rule (Abs cat args (Name fun mprofile)) (Cnc _ _ record)) + = [ CFRule (CCat cat lbl) rhs (Name fun profile) | + Lin lbl lin <- record, + let rhs = map (mapSymbol convertArg id) lin, + let cprofile = map (Unify . argPlaces lin) [0 .. length args-1], + let profile = mprofile `composeProfiles` cprofile + ] + +convertArg :: (ECat, ELabel, Int) -> CCat +convertArg (cat, lbl, _) = CCat cat lbl + +argPlaces :: [Symbol (cat, lbl, Int) tok] -> Int -> [Int] +argPlaces lin nr = [ place | (nr', place) <- zip linArgs [0..], nr == nr' ] + where linArgs = [ nr' | (_, _, nr') <- filterCats lin ] + + + + diff --git a/src-3.0/GF/Conversion/MCFGtoFCFG.hs b/src-3.0/GF/Conversion/MCFGtoFCFG.hs new file mode 100644 index 000000000..70aa4644d --- /dev/null +++ b/src-3.0/GF/Conversion/MCFGtoFCFG.hs @@ -0,0 +1,51 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Converting MCFG grammars to equivalent optimized FCFG +----------------------------------------------------------------------------- + + +module GF.Conversion.MCFGtoFCFG + (convertGrammar) where + +import Control.Monad +import List (elemIndex) +import Array + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.FCFG +import GF.Conversion.Types +import GF.Data.SortedList (nubsort) + +import GF.Infra.Print + +---------------------------------------------------------------------- +-- * converting MCFG to optimized FCFG + +convertGrammar :: MGrammar -> FGrammar +convertGrammar gram = [ FRule (Abs (fcat cat) (map fcat cats) name) (fcnc cnc) | + Rule (Abs cat cats name) cnc <- gram ] + where mcats = nubsort [ mc | Rule (Abs mcat mcats _) _ <- gram, mc <- mcat:mcats ] + + fcat mcat@(MCat (ECat scat ecns) mlbls) + = case elemIndex mcat mcats of + Just catid -> FCat catid scat mlbls ecns + Nothing -> error ("MCFGtoFCFG.fcat " ++ prt mcat) + + fcnc (Cnc _ arglbls lins) = listArray (0, length lins-1) (map flin lins) + where flin (Lin _ syms) = listArray (0, length syms-1) (map fsym syms) + fsym (Tok tok) = FSymTok tok + fsym (Cat (cat,lbl,arg)) = FSymCat (fcat cat) (flbl arg lbl) arg + flbl arg lbl = case elemIndex lbl (arglbls !! arg) of + Just lblid -> lblid + Nothing -> error ("MCFGtoFCFG.flbl " ++ prt arg ++ " " ++ prt lbl) + diff --git a/src-3.0/GF/Conversion/Prolog.hs b/src-3.0/GF/Conversion/Prolog.hs new file mode 100644 index 000000000..b930cb476 --- /dev/null +++ b/src-3.0/GF/Conversion/Prolog.hs @@ -0,0 +1,205 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/14 09:51:18 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.4 $ +-- +-- Converting/Printing different grammar formalisms in Prolog-readable format +----------------------------------------------------------------------------- + + +module GF.Conversion.Prolog (prtSGrammar, prtSMulti, prtSHeader, prtSRule, + prtMGrammar, prtMMulti, prtMHeader, prtMRule, + prtCGrammar, prtCMulti, prtCHeader, prtCRule) where + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.MCFG +import GF.Formalism.CFG +import GF.Formalism.Utilities +import GF.Conversion.Types +import qualified GF.Conversion.GFC as Cnv + +import GF.Data.Operations ((++++), (+++++)) +import GF.Infra.Print +import qualified GF.Infra.Modules as Mod +import qualified GF.Infra.Option as Option +import GF.Data.Operations (okError) +import GF.Canon.AbsGFC (Flag(..)) +import GF.Canon.GFC (CanonGrammar) +import GF.Infra.Ident (Ident(..)) + +import Data.Maybe (maybeToList, listToMaybe) +import Data.Char (isLower, isAlphaNum) + +import GF.System.Tracing + +---------------------------------------------------------------------- +-- | printing multiple languages at the same time + +prtSMulti, prtMMulti, prtCMulti :: Option.Options -> CanonGrammar -> String +prtSMulti = prtMulti prtSHeader prtSRule Cnv.gfc2simple "gfc_" +prtMMulti = prtMulti prtMHeader prtMRule Cnv.gfc2mcfg "mcfg_" +prtCMulti = prtMulti prtCHeader prtCRule Cnv.gfc2cfg "cfg_" + +-- code and ideas stolen from GF.CFGM.PrintCFGrammar + +prtMulti prtHeader prtRule conversion prefix opts gr + = prtHeader ++++ unlines + [ "\n\n" ++ prtLine ++++ + "%% Language module: " ++ prtQ langmod +++++ + unlines (map (prtRule langmod) rules) | + lang <- maybe [] (Mod.allConcretes gr) (Mod.greatestAbstract gr), + let Mod.ModMod (Mod.Module{Mod.flags=fs}) = okError (Mod.lookupModule gr lang), + let cnvopts = Option.Opts $ map Option.gfcConversion $ getFlag fs "conversion", + let rules = conversion cnvopts (gr, lang), + let langmod = (let IC lg = lang in prefix ++ lg) ] + +getFlag :: [Flag] -> String -> [String] +getFlag fs x = [v | Flg (IC k) (IC v) <- fs, k == x] + +---------------------------------------------------------------------- +-- | SimpleGFC to Prolog +-- +-- assumes that the profiles in the Simple GFC names are trivial +prtSGrammar :: SGrammar -> String +prtSGrammar rules = prtSHeader +++++ unlines (map (prtSRule "") rules) + +prtSHeader :: String +prtSHeader = prtLine ++++ + "%% Simple GFC grammar in Prolog-readable format" ++++ + "%% Autogenerated from the Grammatical Framework" +++++ + "%% The following predicate is defined:" ++++ + "%% \t rule(Fun, Cat, c(Cat,...), LinTerm)" + +prtSRule :: String -> SRule -> String +prtSRule lang (Rule (Abs cat cats (Name fun _prof)) (Cnc _ _ mterm)) + = (if null lang then "" else prtQ lang ++ " : ") ++ + prtFunctor "rule" [plfun, plcat, plcats, plcnc] ++ "." + where plfun = prtQ fun + plcat = prtSDecl cat + plcats = prtFunctor "c" (map prtSDecl cats) + plcnc = "\n\t" ++ prtSTerm (maybe Empty id mterm) + +prtSTerm (Arg n c p) = prtFunctor "arg" [prtQ c, prt (n+1), prtSPath p] +-- prtSTerm (c :^ []) = prtQ c +prtSTerm (c :^ ts) = prtOper "^" (prtQ c) (prtPList (map prtSTerm ts)) +prtSTerm (Rec rec) = prtFunctor "rec" [prtPList [ prtOper "=" (prtQ l) (prtSTerm t) | (l, t) <- rec ]] +prtSTerm (Tbl tbl) = prtFunctor "tbl" [prtPList [ prtOper "=" (prtSTerm p) (prtSTerm t) | (p, t) <- tbl ]] +prtSTerm (Variants ts) = prtFunctor "variants" [prtPList (map prtSTerm ts)] +prtSTerm (t1 :++ t2) = prtOper "+" (prtSTerm t1) (prtSTerm t2) +prtSTerm (Token t) = prtFunctor "tok" [prtQ t] +prtSTerm (Empty) = "empty" +prtSTerm (term :. lbl) = prtOper "*" (prtSTerm term) (prtQ lbl) +prtSTerm (term :! sel) = prtOper "/" (prtSTerm term) (prtSTerm sel) +-- prtSTerm (Wildcard) = "wildcard" +-- prtSTerm (Var var) = prtFunctor "var" [prtQ var] + +prtSPath (Path path) = prtPList (map (either prtQ prtSTerm) path) + +prtSDecl (Decl var typ) | var == anyVar = prtSAbsType typ + | otherwise = "_" ++ prtVar var ++ ":" ++ prtSAbsType typ + + +prtSAbsType ([] ::--> typ) = prtSFOType typ +prtSAbsType (args ::--> typ) = prtOper ":->" (prtPList (map prtSFOType args)) (prtSFOType typ) + +prtSFOType (cat ::@ args) = prtFunctor (prtQ cat) (map prtSTTerm args) + +prtSTTerm (con :@ args) = prtFunctor (prtQ con) (map prtSTTerm args) +prtSTTerm (TVar var) = "_" ++ prtVar var + +---------------------------------------------------------------------- +-- | MCFG to Prolog +prtMGrammar :: MGrammar -> String +prtMGrammar rules = prtMHeader +++++ unlines (map (prtMRule "") rules) + +prtMHeader :: String +prtMHeader = prtLine ++++ + "%% Multiple context-free grammar in Prolog-readable format" ++++ + "%% Autogenerated from the Grammatical Framework" +++++ + "%% The following predicate is defined:" ++++ + "%% \t rule(Profile, Cat, c(Cat,...), [Lbl=Symbols,...])" + +prtMRule :: String -> MRule -> String +prtMRule lang (Rule (Abs cat cats name) (Cnc _lcat _lcats lins)) + = (if null lang then "" else prtQ lang ++ " : ") ++ + prtFunctor "rule" [plname, plcat, plcats, pllins] ++ "." + where plname = prtName name + plcat = prtQ cat + plcats = prtFunctor "c" (map prtQ cats) + pllins = "\n\t[ " ++ prtSep "\n\t, " (map prtMLin lins) ++ " ]" + +prtMLin (Lin lbl lin) = prtOper "=" (prtQ lbl) (prtPList (map prtMSymbol lin)) + +prtMSymbol (Cat (cat, lbl, nr)) = prtFunctor "arg" [prtQ cat, show (nr+1), prtQ lbl] +prtMSymbol (Tok tok) = prtFunctor "tok" [prtQ tok] + +---------------------------------------------------------------------- +-- | CFG to Prolog +prtCGrammar :: CGrammar -> String +prtCGrammar rules = prtCHeader +++++ unlines (map (prtCRule "") rules) + +prtCHeader :: String +prtCHeader = prtLine ++++ + "%% Context-free grammar in Prolog-readable format" ++++ + "%% Autogenerated from the Grammatical Framework" +++++ + "%% The following predicate is defined:" ++++ + "%% \t rule(Profile, Cat, [Symbol,...])" + +prtCRule :: String -> CRule -> String +prtCRule lang (CFRule cat syms name) + = (if null lang then "" else prtQ lang ++ " : ") ++ + prtFunctor "cfgrule" [plname, plcat, plsyms] ++ "." + where plname = prtName name + plcat = prtQ cat + plsyms = prtPList (map prtCSymbol syms) + +prtCSymbol (Cat cat) = prtFunctor "cat" [prtQ cat] +prtCSymbol (Tok tok) = prtFunctor "tok" [prtQ tok] + +---------------------------------------------------------------------- +-- profiles, quoted strings and more + +prtFunctor f xs = f ++ if null xs then "" else "(" ++ prtSep ", " xs ++ ")" +prtPList xs = "[" ++ prtSep ", " xs ++ "]" +prtOper f x y = "(" ++ x ++ " " ++ f ++ " " ++ y ++ ")" + +prtName name@(Name fun profiles) + | name == coercionName = "1" + | and (zipWith (==) profiles (map (Unify . return) [0..])) = prtQ fun + | otherwise = prtFunctor (prtQ fun) (map prtProfile profiles) + +prtProfile (Unify []) = " ? " +prtProfile (Unify args) = foldr1 (prtOper "=") (map (show . succ) args) +prtProfile (Constant forest) = prtForest forest + +prtForest (FMeta) = " ? " +prtForest (FNode fun [fs]) = prtFunctor (prtQ fun) (map prtForest fs) +prtForest (FNode fun fss) = prtPList [ prtFunctor (prtQ fun) (map prtForest fs) | + fs <- fss ] + +prtQ atom = prtQStr (prt atom) + +prtQStr atom@(x:xs) + | isLower x && all isAlphaNumUnder xs = atom + where isAlphaNumUnder '_' = True + isAlphaNumUnder x = isAlphaNum x +prtQStr atom = "'" ++ concatMap esc (prt atom) ++ "'" + where esc '\'' = "\\'" + esc '\n' = "\\n" + esc '\t' = "\\t" + esc c = [c] + +prtVar var = reprime (prt var) + where reprime "" = "" + reprime ('\'' : cs) = "_0" ++ reprime cs + reprime (c:cs) = c : reprime cs + +prtLine = replicate 70 '%' + + diff --git a/src-3.0/GF/Conversion/RemoveEpsilon.hs b/src-3.0/GF/Conversion/RemoveEpsilon.hs new file mode 100644 index 000000000..0e5dafb38 --- /dev/null +++ b/src-3.0/GF/Conversion/RemoveEpsilon.hs @@ -0,0 +1,46 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/30 08:11:32 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.3 $ +-- +-- Removing epsilon linearizations from MCF grammars +----------------------------------------------------------------------------- + + +module GF.Conversion.RemoveEpsilon where +-- (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Control.Monad +import Data.List (mapAccumL) +import Data.Maybe (mapMaybe) +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Conversion.Types +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.GeneralDeduction + +convertGrammar :: EGrammar -> EGrammar +convertGrammar grammar = trace2 "RemoveEpsilon: initialEmpties" (prt initialEmpties) $ + trace2 "RemoveEpsilon: emptyCats" (prt emptyCats) $ + grammar + where initialEmpties = nubsort [ (cat, lbl) | + Rule (Abs cat _ _) (Cnc _ _ lins) <- grammar, + Lin lbl [] <- lins ] + emptyCats = limitEmpties initialEmpties + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ (cat, lbl) | Rule (Abs cat _ _) (Cnc _ _ lins) <- grammar, + Lin lbl rhs <- lins, + all (symbol (\(c,l,n) -> (c,l) `elem` es) (const False)) rhs ] + + + diff --git a/src-3.0/GF/Conversion/RemoveErasing.hs b/src-3.0/GF/Conversion/RemoveErasing.hs new file mode 100644 index 000000000..1dc2560fc --- /dev/null +++ b/src-3.0/GF/Conversion/RemoveErasing.hs @@ -0,0 +1,113 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.3 $ +-- +-- Removing erasingness from MCFG grammars (as in Ljunglöf 2004, sec 4.5.1) +----------------------------------------------------------------------------- + + +module GF.Conversion.RemoveErasing + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Control.Monad +import Data.List (mapAccumL) +import Data.Maybe (mapMaybe) +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Conversion.Types +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.GeneralDeduction + +convertGrammar :: EGrammar -> [SCat] -> MGrammar +convertGrammar grammar starts = newGrammar + where newGrammar = tracePrt "RemoveErasing - nonerasing rules" (prt . length) $ + [ rule | NR rule <- chartLookup finalChart True ] + finalChart = tracePrt "RemoveErasing - nonerasing cats" + (prt . length . flip chartLookup False) $ + buildChart keyof [newRules rulesByCat] $ + tracePrt "RemoveErasing - initial ne-cats" (prt . length) $ + initialCats + initialCats = trace2 "RemoveErasing - starting categories" (prt starts) $ + if null starts + then trace2 "RemoveErasing" "initialCatsBU" $ + initialCatsBU rulesByCat + else trace2 "RemoveErasing" ("initialCatsTD: " ++ prt starts) $ + initialCatsTD rulesByCat starts + rulesByCat = trace2 "RemoveErasing - erasing rules" (prt $ length grammar) $ + accumAssoc id [ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- grammar ] + +data Item r c = NR r | NC c deriving (Eq, Ord, Show) + +keyof (NR _) = True +keyof (NC _) = False + +newRules grammar chart (NR (Rule (Abs _ cats _) _)) + = [ NC cat | cat@(MCat _ lbls) <- cats, not (null lbls) ] +newRules grammar chart (NC newCat@(MCat cat lbls)) + = do Rule (Abs _ args (Name fun profile)) (Cnc _ _ lins0) <- grammar ? cat + + lins <- selectLins lins0 lbls + -- let lins = [ lin | lin@(Lin lbl _) <- lins0, + -- lbl `elem` lbls ] + + let argsInLin = listAssoc $ + map (\((n,c),l) -> (n, MCat c l)) $ + groupPairs $ nubsort $ + [ ((nr, cat), lbl) | + Lin _ lin <- lins, + Cat (cat, lbl, nr) <- lin ] + + newArgs = mapMaybe (lookupAssoc argsInLin) [0 .. length args-1] + argLbls = [ lbls | MCat _ lbls <- newArgs ] + + newLins = [ Lin lbl newLin | Lin lbl lin <- lins, + let newLin = map (mapSymbol cnvCat id) lin ] + cnvCat (cat, lbl, nr) = (mcat, lbl, nr') + where Just mcat = lookupAssoc argsInLin nr + Unify [nr'] = newProfile !! nr + nonEmptyCat (Cat (MCat _ [], _, _)) = False + nonEmptyCat _ = True + + newProfile = snd $ mapAccumL accumProf 0 $ + map (lookupAssoc argsInLin) [0 .. length args-1] + accumProf nr = maybe (nr, Unify []) $ const (nr+1, Unify [nr]) + newName = -- tracePrt "newName" (prtNewName profile newProfile) $ + Name fun (profile `composeProfiles` newProfile) + + guard $ all (not . null) argLbls + return $ NR (Rule (Abs newCat newArgs newName) (Cnc lbls argLbls newLins)) + +selectLins lins0 = mapM selectLbl + where selectLbl lbl = [ lin | lin@(Lin lbl' _) <- lins0, lbl == lbl' ] + + +prtNewName :: [Profile (SyntaxForest Fun)] -> [Profile (SyntaxForest Fun)] -> Name -> String +prtNewName p p' n = prt p ++ " .o. " ++ prt p' ++ " : " ++ prt n + + +initialCatsTD grammar starts = + [ cat | cat@(NC (MCat (ECat start _) _)) <- initialCatsBU grammar, + start `elem` starts ] + +initialCatsBU grammar + = [ NC (MCat cat [lbl]) | (cat, rules) <- aAssocs grammar, + let Rule _ (Cnc lbls _ _) = head rules, + lbl <- lbls ] + + + + + + + diff --git a/src-3.0/GF/Conversion/RemoveSingletons.hs b/src-3.0/GF/Conversion/RemoveSingletons.hs new file mode 100644 index 000000000..4b9992a4d --- /dev/null +++ b/src-3.0/GF/Conversion/RemoveSingletons.hs @@ -0,0 +1,82 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/11 10:28:16 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- Instantiating all types which only have one single element. +-- +-- Should be merged into 'GF.Conversion.FiniteToSimple' +----------------------------------------------------------------------------- + +module GF.Conversion.RemoveSingletons where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.SortedList +import GF.Data.Assoc + +import Data.List (mapAccumL) + +convertGrammar :: SGrammar -> SGrammar +convertGrammar grammar = if singles == emptyAssoc then grammar + else tracePrt "RemoveSingletons - non-singleton rules" (prt . length) $ + map (convertRule singles) grammar + where singles = calcSingletons grammar + +convertRule :: Assoc SCat (SyntaxForest Fun, Maybe STerm) -> SRule -> SRule +convertRule singles rule@(Rule (Abs _ decls _) _) + = if all (Nothing ==) singleArgs then rule + else instantiateSingles singleArgs rule + where singleArgs = map (lookupAssoc singles . decl2cat) decls + +instantiateSingles :: [Maybe (SyntaxForest Fun, Maybe STerm)] -> SRule -> SRule +instantiateSingles singleArgs (Rule (Abs decl decls (Name fun profile)) (Cnc lcat lcats lterm)) + = Rule (Abs decl decls' (Name fun profile')) (Cnc lcat lcats' lterm') + where (decls', lcats') = unzip [ (d, l) | (Nothing, d, l) <- zip3 singleArgs decls lcats ] + profile' = map (fmap fst) exProfile `composeProfiles` profile + newArgs = map (fmap snd) exProfile + lterm' = fmap (instantiateLin newArgs) lterm + exProfile = snd $ mapAccumL mkProfile 0 singleArgs + mkProfile nr (Just trm) = (nr, Constant trm) + mkProfile nr (Nothing) = (nr+1, Unify [nr]) + +instantiateLin :: [Profile (Maybe STerm)] -> STerm -> STerm +instantiateLin newArgs = inst + where inst (Arg nr cat path) + = case newArgs !! nr of + Unify [nr'] -> Arg nr' cat path + Constant (Just term) -> termFollowPath path term + Constant Nothing -> error "RemoveSingletons.instantiateLin: This should not happen (argument has no linearization)" + inst (cn :^ terms) = cn :^ map inst terms + inst (Rec rec) = Rec [ (lbl, inst term) | (lbl, term) <- rec ] + inst (term :. lbl) = inst term +. lbl + inst (Tbl tbl) = Tbl [ (pat, inst term) | (pat, term) <- tbl ] + inst (term :! sel) = inst term +! inst sel + inst (Variants ts) = variants (map inst ts) + inst (t1 :++ t2) = inst t1 ?++ inst t2 + inst term = term + +---------------------------------------------------------------------- + +calcSingletons :: SGrammar -> Assoc SCat (SyntaxForest Fun, Maybe STerm) +calcSingletons rules = listAssoc singleCats + where singleCats = tracePrt "RemoveSingletons - singleton cats" (prtSep " ") $ + [ (cat, (constantNameToForest name, lin)) | + (cat, [([], name, lin)]) <- rulesByCat ] + rulesByCat = groupPairs $ nubsort + [ (decl2cat cat, (args, name, lin)) | + Rule (Abs cat args name) (Cnc _ _ lin) <- rules ] + + + diff --git a/src-3.0/GF/Conversion/SimpleToFCFG.hs b/src-3.0/GF/Conversion/SimpleToFCFG.hs new file mode 100644 index 000000000..4ff5781f9 --- /dev/null +++ b/src-3.0/GF/Conversion/SimpleToFCFG.hs @@ -0,0 +1,536 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToFCFG + (convertConcrete) where + +import GF.Infra.PrintClass + +import Control.Monad + +import GF.Formalism.Utilities +import GF.Formalism.FCFG + +import GF.GFCC.Macros --hiding (prt) +import GF.GFCC.DataGFCC +import GF.GFCC.CId + +import GF.Data.BacktrackM +import GF.Data.SortedList +import GF.Data.Utilities (updateNthM, sortNub) + +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.List as List +import Data.Array +import Data.Maybe + +---------------------------------------------------------------------- +-- main conversion function + +convertConcrete :: Abstr -> Concr -> FGrammar +convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' + where abs_defs = Map.assocs (funs abs) + conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cats = lincats cnc + (abs_defs',conc',cats') = expandHOAS abs_defs conc cats + +expandHOAS :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> ([(CId,(Type,Exp))],TermMap,TermMap) +expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, + Map.unions [lins, hoLins, varLins], + Map.unions [lincats, hoLincats, varLincat]) + where + -- replace higher-order fun argument types with new categories + funs' = [(f,(fixType ty,e)) | (f,(ty,e)) <- funs] + where + fixType :: Type -> Type + fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt + + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0] + hoCats = sortNub (map snd hoTypes) + -- for each Cat with N bindings, we add a new category _NCat + -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat + hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),EEq [])) | ty@(n,c) <- hoTypes] + -- lincats for the new categories + hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes] + -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ... + hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes] + where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c) + -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat + varFuns = [(varFunName cat, (cftype [varCat] cat,EEq [])) | cat <- hoCats] + -- linearizations of the _Var_Cat functions + varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats] + -- lincat for the _Var category + varLincat = Map.singleton varCat (R [S []]) + + lincatOf c = fromMaybe (error $ "No lincat for " ++ prt c) $ Map.lookup c lincats + + modifyRec :: ([Term] -> [Term]) -> Term -> Term + modifyRec f (R xs) = R (f xs) + modifyRec _ t = error $ "Not a record: " ++ show t + + varCat = CId "_Var" + + catName :: (Int,CId) -> CId + catName (0,c) = c + catName (n,CId c) = CId ("_" ++ show n ++ c) + + funName :: (Int,CId) -> CId + funName (n,CId c) = CId ("__" ++ show n ++ c) + + varFunName :: CId -> CId + varFunName (CId c) = CId ("_Var_" ++ c) + +-- replaces __NCat with _B and _Var_Cat with _. +-- the temporary names are just there to avoid name collisions. +fixHoasFuns :: FGrammar -> FGrammar +fixHoasFuns (rs, cs) = ([FRule (fixName n) args cat lins | FRule n args cat lins <- rs], cs) + where fixName (Name (CId ('_':'_':_)) p) = Name (CId "_B") p + fixName (Name (CId n) p) | "_Var_" `List.isPrefixOf` n = Name wildCId p + fixName n = n + +convert :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> FGrammar +convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv) + where + srules = [ + (XRule id args res (map findLinType args) (findLinType res) term) | + (id, (ty,_)) <- abs_defs, let (args,res) = catSkeleton ty, + term <- Map.lookup id cnc_defs] + + findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + + (xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules + where + helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = + let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap + frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) + frulesEnv + (mkSingletonSelectors cnc_defs cnc_res) + in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv') + + loop frulesEnv = + let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv + in case todo of + [] -> frulesEnv' + _ -> loop $! List.foldl' (\env (srules,selector) -> + List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo + +convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv +convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv = + foldBM addRule + frulesEnv + (convertTerm cnc_defs selector term [([],[])]) + (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes) + where + addRule linRec (newCat', newArgs', _, _) env0 = + let (env1, newCat) = genFCatHead env0 newCat' + (env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) -> + let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths] + (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs + in case xcat of + PFCat _ [] _ -> (env , args, all_args) + _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..]) + + newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}] + + (_,newProfile) = List.mapAccumL accumProf 0 newArgs' + where + accumProf nr (PFCat _ [] _,_ ) = (nr, Unify [] ) + accumProf nr (_ ,xpaths) = (nr+cnt+1, Unify [nr..nr+cnt]) + where cnt = length xpaths + + rule = FRule (Name fun newProfile) newArgs newCat newLinRec + in addFRule env2 rule + +translateLin idxArgs lbl' [] = array (0,-1) [] +translateLin idxArgs lbl' ((lbl,syms) : lins) + | lbl' == lbl = listArray (0,length syms-1) (map instSym syms) + | otherwise = translateLin idxArgs lbl' lins + where + instSym = symbol (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok + instCat lbl nr xnr nr' ((idx,xargs):idxArgs) + | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr + in FSymCat fcat (index lbl rcs 0) (nr'+xnr) + | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs + + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BacktrackM Env a + +type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term]) +type LinRec = [(FPath, [Symbol (FPath, FIndex, Int) FToken])] + +type TermMap = Map.Map CId Term + +convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec +convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins +convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins +convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins + +convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel + convertTerm cnc_defs (TuplePrj nr selector) term lins +convertTerm cnc_defs selector (FV vars) lins = do term <- member vars + convertTerm cnc_defs selector term lins +convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path + foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) +convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = + do projectHead lbl_path + return ((lbl_path,Tok str : lin) : lins) +convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) = + do projectHead lbl_path + toks <- member (strs:[strs' | Var strs' _ <- vars]) + return ((lbl_path, map Tok toks ++ lin) : lins) +convertTerm cnc_defs selector (RP _ term) lins = convertTerm cnc_defs selector term lins +convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs + convertTerm cnc_defs selector term lins +convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do + ss <- case t of + R ss -> return ss + F f -> do + t <- Map.lookup f cnc_defs + case t of + R ss -> return ss + convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins +convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")") + + +convertArg (TupleSel record) nr path lbl_path lin lins = + foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record +convertArg (TuplePrj lbl selector) nr path lbl_path lin lins = + convertArg selector nr (lbl:path) lbl_path lin lins +convertArg (ConSel indices) nr path lbl_path lin lins = do + index <- member indices + restrictHead lbl_path index + restrictArg nr path index + return lins +convertArg StrSel nr path lbl_path lin lins = do + projectHead lbl_path + xnr <- projectArg nr path + return ((lbl_path, GF.Formalism.Utilities.Cat (path, nr, xnr) : lin) : lins) + +convertCon (ConSel indices) index lbl_path lin lins = do + guard (index `elem` indices) + restrictHead lbl_path index + return lins +convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x + +convertRec cnc_defs selector index [] lbl_path lin lins = return lins +convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields + where + select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins + select ((index',sub_sel) : fields) + | index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins) + convertRec cnc_defs selector (index+1) record lbl_path lin lins + | otherwise = select fields +convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do + convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = do term <- readArgCType nr + unifyPType nr (reverse path) (selectTerm path term) +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (RP alias _) = evalTerm cnc_defs path alias +evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs + evalTerm cnc_defs path term +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + +unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex +unifyPType nr path (C max_index) = + do (_, args, _, _) <- readState + let (PFCat _ _ tcs,_) = args !! nr + case lookup path tcs of + Just index -> return index + Nothing -> do index <- member [0..max_index] + restrictArg nr path index + return index +unifyPType nr path (RP alias _) = unifyPType nr path alias + +unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007 + +selectTerm :: FPath -> Term -> Term +selectTerm [] term = term +selectTerm (index:path) (R record) = selectTerm path (record !! index) +selectTerm path (RP _ term) = selectTerm path term + + +---------------------------------------------------------------------- +-- FRulesEnv + +data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule] +type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat))) + +data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] + +protoFCat :: CId -> ProtoFCat +protoFCat cat = PFCat cat [] [] + +emptyFRulesEnv = FRulesEnv 0 (ins fcatString (CId "String") [[0]] [] $ + ins fcatInt (CId "Int") [[0]] [] $ + ins fcatFloat (CId "Float") [[0]] [] $ + ins fcatVar (CId "_Var") [[0]] [] $ + Map.empty) [] + where + ins fcat cat rcs tcs fcatSet = + Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet + where + right_fcat = Right fcat + tmap_s = Map.singleton tcs right_fcat + rmap_s = Map.singleton rcs tmap_s + +addFRule :: FRulesEnv -> FRule -> FRulesEnv +addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules) + +getFGrammar :: FRulesEnv -> FGrammar +getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet) + where + getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs + +genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) +genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) = + case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of + Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat) + Just (Right fcat) -> (env, fcat) + Nothing -> let fcat = last_id+1 + in (FRulesEnv fcat (ins fcat) rules, fcat) + where + ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet + where + right_fcat = Right fcat + tmap_s = Map.singleton tcs right_fcat + rmap_s = Map.singleton rcs tmap_s + +genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat) +genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) = + case Map.lookup cat fcatSet >>= Map.lookup rcs of + Just tmap -> case Map.lookup tcs tmap of + Just (Left fcat) -> (env, fcat) + Just (Right fcat) -> (env, fcat) + Nothing -> ins tmap + Nothing -> ins Map.empty + where + ins tmap = + let fcat = last_id+1 + (either_fcat,last_id1,tmap1,rules1) + = foldBM (\tcs st (either_fcat,last_id,tmap,rules) -> + let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap + rule = FRule (Name (CId "_") [Unify [0]]) [fcat_arg] fcat + (listArray (0,length rcs-1) [listArray (0,0) [FSymCat fcat_arg lbl 0] | lbl <- [0..length rcs-1]]) + in if st + then (Right fcat, last_id1,tmap1,rule:rules) + else (either_fcat,last_id, tmap, rules)) + (Left fcat,fcat,Map.insert tcs either_fcat tmap,rules) + (gen_tcs ctype [] []) + False + rmap1 = Map.singleton rcs tmap1 + in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat) + where + addArg tcs last_id tmap = + case Map.lookup tcs tmap of + Just (Left fcat) -> (last_id, tmap, fcat) + Just (Right fcat) -> (last_id, tmap, fcat) + Nothing -> let fcat = last_id+1 + in (fcat, Map.insert tcs (Left fcat) tmap, fcat) + + gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] + gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) + gen_tcs (S _) path acc = return acc + gen_tcs (RP _ term) path acc = gen_tcs term path acc + gen_tcs (C max_index) path acc = + case List.lookup path tcs of + Just index -> return $! addConstraint path index acc + Nothing -> do writeState True + index <- member [0..max_index] + return $! addConstraint path index acc + where + addConstraint path0 index0 (c@(path,index) : cs) + | path0 > path = c:addConstraint path0 index0 cs + addConstraint path0 index0 cs = (path0,index0) : cs + gen_tcs (F id) path acc = case Map.lookup id cnc_defs of + Just term -> gen_tcs term path acc + Nothing -> error ("unknown identifier: "++prt id) + + + +------------------------------------------------------------ +-- TODO queue organization + +type XRulesMap = Map.Map CId [XRule] +data XRule = XRule CId {- function -} + [CId] {- argument types -} + CId {- result type -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} + +takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv) +takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules) + where + (todo,fcatSet') = + Map.mapAccumWithKey (\todo cat rmap -> + let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> + let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat -> + case either_xcat of + Left xcat -> (tcs:tcss,Right xcat) + Right xcat -> ( tcss,either_xcat)) [] tmap + in case tcss of + [] -> ( todo,tmap ) + _ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap + mb_srules = Map.lookup cat xrulesMap + Just srules = mb_srules + + in case mb_srules of + Just srules -> (todo1,rmap1) + Nothing -> (todo ,rmap1)) [] fcatSet + + +------------------------------------------------------------ +-- The TermSelector + +data TermSelector + = TupleSel [(FIndex, TermSelector)] + | TuplePrj FIndex TermSelector + | ConSel [FIndex] + | StrSel + deriving Show + +mkSingletonSelectors :: TermMap + -> Term -- ^ Type representation term + -> [TermSelector] -- ^ list of selectors containing just one string field +mkSingletonSelectors cnc_defs term = sels0 + where + (sels0,tcss0) = loop [] ([],[]) term + + loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record) + loop path st (RP _ t) = loop path st t + loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss) + loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss) + loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of + Just term -> loop path (sels,tcss) term + Nothing -> error ("unknown identifier: "++prt id) + +mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector +mkSelector rcs tcss = + List.foldl' addRestriction (case xs of + (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys + where + xs = [ reverse path | path <- rcs] + ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs] + + addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector + addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices) + where + add [] = [n_index] + add (index':indices) + | n_index == index' = index': indices + | otherwise = index':add indices + addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields) + where + add [] = [(index,path2selector (ConSel [n_index]) path)] + add (field@(index',sub_sel):fields) + | index == index' = (index',addRestriction sub_sel (path,n_index)):fields + | otherwise = field : add fields + + addProjection :: TermSelector -> FPath -> TermSelector + addProjection StrSel [] = StrSel + addProjection (TupleSel fields) (index : path) = TupleSel (add fields) + where + add [] = [(index,path2selector StrSel path)] + add (field@(index',sub_sel):fields) + | index == index' = (index',addProjection sub_sel path):fields + | otherwise = field : add fields + + path2selector base [] = base + path2selector base (index : path) = TupleSel [(index,path2selector base path)] + +------------------------------------------------------------ +-- updating the MCF rule + +readArgCType :: FIndex -> CnvMonad Term +readArgCType nr = do (_, _, _, ctypes) <- readState + return (ctypes !! nr) + +restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () +restrictArg nr path index = do + (head, args, ctype, ctypes) <- readState + args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat + return (xcat,xs) ) nr args + writeState (head, args', ctype, ctypes) + +projectArg :: FIndex -> FPath -> CnvMonad Int +projectArg nr path = do + (head, args, ctype, ctypes) <- readState + (xnr,args') <- updateArgs nr args + writeState (head, args', ctype, ctypes) + return xnr + where + updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])]) + updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as) + | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as) + | otherwise = do a <- projectProtoFCat path a + return (0,(a,xpaths):as) + updateArgs n (a : as) = do + (xnr,as) <- updateArgs (n-1) as + return (xnr,a:as) + +readHeadCType :: CnvMonad Term +readHeadCType = do (_, _, ctype, _) <- readState + return ctype + +restrictHead :: FPath -> FIndex -> CnvMonad () +restrictHead path term + = do (head, args, ctype, ctypes) <- readState + head' <- restrictProtoFCat path term head + writeState (head', args, ctype, ctypes) + +projectHead :: FPath -> CnvMonad () +projectHead path + = do (head, args, ctype, ctypes) <- readState + head' <- projectProtoFCat path head + writeState (head', args, ctype, ctypes) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat +restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat cat rcs tcs) + where + addConstraint (c@(path,index) : cs) + | path0 > path = liftM (c:) (addConstraint cs) + | path0 == path = guard (index0 == index) >> + return (c : cs) + addConstraint cs = return ((path0,index0) : cs) + +projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat +projectProtoFCat path0 (PFCat cat rcs tcs) = do + return (PFCat cat (addConstraint rcs) tcs) + where + addConstraint (path : rcs) + | path0 > path = path : addConstraint rcs + | path0 == path = path : rcs + addConstraint rcs = path0 : rcs diff --git a/src-3.0/GF/Conversion/SimpleToFinite.hs b/src-3.0/GF/Conversion/SimpleToFinite.hs new file mode 100644 index 000000000..bbd3ae355 --- /dev/null +++ b/src-3.0/GF/Conversion/SimpleToFinite.hs @@ -0,0 +1,178 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/01 09:53:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.7 $ +-- +-- Calculating the finiteness of each type in a grammar +----------------------------------------------------------------------------- + +module GF.Conversion.SimpleToFinite + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.Utilities +import GF.Conversion.Types + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.BacktrackM +import GF.Data.Utilities (lookupList) + +import GF.Infra.Ident (Ident(..)) + +type CnvMonad a = BacktrackM () a + +convertGrammar :: SGrammar -> SGrammar +convertGrammar rules = tracePrt "SimpleToFinie - nr. 'finite' rules" (prt . length) $ + solutions cnvMonad () + where split = calcSplitable rules + cnvMonad = member rules >>= convertRule split + +convertRule :: Splitable -> SRule -> CnvMonad SRule +convertRule split (Rule abs cnc) + = do newAbs <- convertAbstract split abs + return $ Rule newAbs cnc + +{- +-- old code +convertAbstract :: Splitable -> Abstract SDecl Name + -> CnvMonad (Abstract SDecl Name) +convertAbstract split (Abs decl decls name) + = case splitableFun split (name2fun name) of + Just cat' -> return $ Abs (Decl anyVar (mergeFun (name2fun name) cat') []) decls name + Nothing -> expandTyping split name [] decl decls [] + + +expandTyping :: Splitable -> Name -> [(Var, SCat)] -> SDecl -> [SDecl] -> [SDecl] + -> CnvMonad (Abstract SDecl Name) +expandTyping split name env (Decl x cat args) [] decls + = return $ Abs decl (reverse decls) name + where decl = substArgs split x env cat args [] +expandTyping split name env typ (Decl x xcat xargs : declsToDo) declsDone + = do (x', xcat', env') <- calcNewEnv + let decl = substArgs split x' env xcat' xargs [] + expandTyping split name env' typ declsToDo (decl : declsDone) + where calcNewEnv = case splitableCat split xcat of + Just newFuns -> do newFun <- member newFuns + let newCat = mergeFun newFun xcat + -- Just newCats -> do newCat <- member newCats + return (anyVar, newCat, (x,newCat) : env) + Nothing -> return (x, xcat, env) +-} + +-- new code +convertAbstract :: Splitable -> Abstract SDecl Name + -> CnvMonad (Abstract SDecl Name) +convertAbstract split (Abs decl decls name) + = case splitableFun split fun of + Just cat' -> return $ Abs (Decl anyVar ([] ::--> (mergeFun fun cat' ::@ []))) decls name + Nothing -> expandTyping split [] fun profiles [] decl decls [] + where Name fun profiles = name + +expandTyping :: Splitable -> [(Var, SCat)] + -> Fun -> [Profile (SyntaxForest Fun)] -> [Profile (SyntaxForest Fun)] + -> SDecl -> [SDecl] -> [SDecl] + -> CnvMonad (Abstract SDecl Name) +expandTyping split env fun [] profiles (Decl x (typargs ::--> (cat ::@ args))) [] decls + = return $ Abs decl (reverse decls) (Name fun (reverse profiles)) + where decl = substArgs split x env typargs cat args [] +expandTyping split env fun (prof:profiles) profsDone typ + (Decl x (xtypargs ::--> (xcat ::@ xargs)) : declsToDo) declsDone + = do (x', xcat', env', prof') <- calcNewEnv + let decl = substArgs split x' env xtypargs xcat' xargs [] + expandTyping split env' fun profiles (prof' : profsDone) typ declsToDo (decl : declsDone) + where calcNewEnv = case splitableCat split xcat of + Nothing -> return (x, xcat, env, prof) + Just newFuns -> do newFun <- member newFuns + let newCat = mergeFun newFun xcat + newProf = Constant (FNode newFun [[]]) + -- should really be using some kind of + -- "profile unification" + return (anyVar, newCat, (x,newCat) : env, newProf) + +substArgs :: Splitable -> Var -> [(Var, SCat)] -> [FOType SCat] + -> SCat -> [TTerm] -> [TTerm] -> SDecl +substArgs split x env typargs cat [] args = Decl x (typargs ::--> (cat ::@ reverse args)) +substArgs split x env typargs cat (arg:argsToDo) argsDone + = case argLookup split env arg of + Just newCat -> substArgs split x env typargs (mergeArg cat newCat) argsToDo argsDone + Nothing -> substArgs split x env typargs cat argsToDo (arg : argsDone) + +argLookup split env (TVar x) = lookup x env +argLookup split env (con :@ _) = fmap (mergeFun fun) (splitableFun split fun) + where fun = constr2fun con + + +---------------------------------------------------------------------- +-- splitable categories (finite, no dependencies) +-- they should also be used as some dependency + +type Splitable = (Assoc SCat [Fun], Assoc Fun SCat) + +splitableCat :: Splitable -> SCat -> Maybe [Fun] +splitableCat = lookupAssoc . fst + +splitableFun :: Splitable -> Fun -> Maybe SCat +splitableFun = lookupAssoc . snd + +calcSplitable :: [SRule] -> Splitable +calcSplitable rules = (listAssoc splitableCat2Funs, listAssoc splitableFun2Cat) + where splitableCat2Funs = groupPairs $ nubsort splitableCatFuns + + splitableFun2Cat = nubsort + [ (fun, cat) | (cat, fun) <- splitableCatFuns ] + + -- cat-fun pairs that are splitable + splitableCatFuns = tracePrt "SimpleToFinite - splitable functions" prt $ + [ (cat, name2fun name) | + Rule (Abs (Decl _ ([] ::--> (cat ::@ []))) [] name) _ <- rules, + splitableCats ?= cat ] + + -- all cats that are splitable + splitableCats = listSet $ + tracePrt "SimpleToFinite - finite categories to split" prt $ + (nondepCats <**> depCats) <\\> resultCats + + -- all result cats for some pure function + resultCats = tracePrt "SimpleToFinite - result cats" prt $ + nubsort [ cat | Rule (Abs (Decl _ (_ ::--> (cat ::@ _))) decls _) _ <- rules, + not (null decls) ] + + -- all cats in constants without dependencies + nondepCats = tracePrt "SimpleToFinite - nondep cats" prt $ + nubsort [ cat | Rule (Abs (Decl _ ([] ::--> (cat ::@ []))) [] _) _ <- rules ] + + -- all cats occurring as some dependency of another cat + depCats = tracePrt "SimpleToFinite - dep cats" prt $ + nubsort [ cat | Rule (Abs decl decls _) _ <- rules, + cat <- varCats [] (decls ++ [decl]) ] + + varCats _ [] = [] + varCats env (Decl x (xargs ::--> xtyp@(xcat ::@ _)) : decls) + = varCats ((x,xcat) : env) decls ++ + [ cat | (_::@args) <- (xtyp:xargs), arg <- args, + y <- varsInTTerm arg, cat <- lookupList y env ] + + +---------------------------------------------------------------------- +-- utilities +-- mergeing categories + +mergeCats :: String -> String -> String -> SCat -> SCat -> SCat +mergeCats before middle after (IC cat) (IC arg) + = IC (before ++ cat ++ middle ++ arg ++ after) + +mergeFun, mergeArg :: SCat -> SCat -> SCat +mergeFun = mergeCats "{" ":" "}" +mergeArg = mergeCats "" "" "" + + diff --git a/src-3.0/GF/Conversion/SimpleToMCFG.hs b/src-3.0/GF/Conversion/SimpleToMCFG.hs new file mode 100644 index 000000000..8f23c905d --- /dev/null +++ b/src-3.0/GF/Conversion/SimpleToMCFG.hs @@ -0,0 +1,26 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/18 14:55:32 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.3 $ +-- +-- All different conversions from SimpleGFC to MCFG +----------------------------------------------------------------------------- + +module GF.Conversion.SimpleToMCFG where + +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import qualified GF.Conversion.SimpleToMCFG.Strict as Strict +import qualified GF.Conversion.SimpleToMCFG.Nondet as Nondet +import qualified GF.Conversion.SimpleToMCFG.Coercions as Coerce + +convertGrammarNondet, convertGrammarStrict :: SGrammar -> EGrammar +convertGrammarNondet = Coerce.addCoercions . Nondet.convertGrammar +convertGrammarStrict = Strict.convertGrammar + diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs new file mode 100644 index 000000000..319b99dcb --- /dev/null +++ b/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs @@ -0,0 +1,63 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Coercions + (addCoercions) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Conversion.Types +import GF.Data.SortedList +import Data.List (groupBy) + +---------------------------------------------------------------------- + +addCoercions :: EGrammar -> EGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule (Abs head args _) (Cnc lbls _ _) <- rules ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "SimpleToMCFG.Coercions - MCFG coercions" (prt . length) $ + concat $ + tracePrt "SimpleToMCFG.Coercions - MCFG coercions per category" + (prtList . map length) $ + combineCoercions + (groupBy sameECatFst allHeadSet) + (groupBy sameECat allArgSet) + sameECatFst a b = sameECat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (ecat2scat $ fst $ head heads) (ecat2scat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args + = [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) | + (head@(ECat _ headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@(ECat _ argCns) <- args, + argCns `subset` headCns ] + + + diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs new file mode 100644 index 000000000..d6ff052f5 --- /dev/null +++ b/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs @@ -0,0 +1,256 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/17 08:27:29 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.7 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically. +-- Afterwards, the grammar has to be extended with coercion functions, +-- from the module 'GF.Conversion.SimpleToMCFG.Coercions' +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Nondet + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Control.Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM +import GF.Data.Utilities (notLongerThan, updateNthM) + +------------------------------------------------------------ +-- type declarations + +type CnvMonad a = BacktrackM Env a + +type Env = (ECat, [ECat], LinRec, [SLinType]) -- variable bindings: [(Var, STerm)] +type LinRec = [Lin SCat MLabel Token] + + +---------------------------------------------------------------------- +-- main conversion function + +maxNrRules :: Int +maxNrRules = 5000 + +convertGrammar :: SGrammar -> EGrammar +convertGrammar rules = traceCalcFirst rules' $ + tracePrt "SimpleToMCFG.Nondet - MCFG rules" (prt . length) $ + rules' + where rules' = rules >>= convertRule +-- solutions conversion undefined +-- where conversion = member rules >>= convertRule + +convertRule :: SRule -> [ERule] -- CnvMonad ERule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) = +-- | prt(name2fun fun) `elem` +-- words "UseCl PosTP TPast ASimul SPredV IndefOneNP DefOneNP UseN2 mother_N2 jump_V" = + if notLongerThan maxNrRules rules + then tracePrt ("SimpeToMCFG.Nondet - MCFG rules for " ++ prt fun) (prt . length) $ + rules + else trace2 "SimpeToMCFG.Nondet - TOO MANY RULES, function not converted" + ("More than " ++ show maxNrRules ++ " MCFG rules for " ++ prt fun) $ + [] + where rules = flip solutions undefined $ + do let cat : args = map decl2cat (decl : decls) + writeState (initialECat cat, map initialECat args, [], ctypes) + rterm <- simplifyTerm term + reduceTerm ctype emptyPath rterm + (newCat, newArgs, linRec, _) <- readState + let newLinRec = map (instantiateArgs newArgs) linRec + catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) + -- checkLinRec argsPaths catPaths newLinRec + return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) +convertRule _ = [] -- failure + + +---------------------------------------------------------------------- +-- "type-checking" the resulting linearization +-- should not be necessary, if the algorithms (type-checking and conversion) are correct + +checkLinRec args lbls = mapM (checkLin args lbls) + +checkLin args lbls (Lin lbl lin) + | lbl `elem` lbls = mapM (symbol (checkArg args) (const (return ()))) lin + | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" "Label mismatch" $ + failure + +checkArg args (_cat, lbl, nr) + | lbl `elem` (args !! nr) = return () +-- | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" ("Label mismatch in arg " ++ prt nr) $ +-- failure + | otherwise = trace2 ("SimpleToMCFG.Nondet - ERROR: Label mismatch in arg " ++ prt nr) + (prt lbl ++ " `notElem` " ++ prt (args!!nr)) $ + failure + + +---------------------------------------------------------------------- +-- term simplification + +simplifyTerm :: STerm -> CnvMonad STerm +simplifyTerm (term :! sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + Tbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm ssel + return (sterm +! sel') +-- simplifyTerm (Var x) = readBinding x +simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms +simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record +simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term +simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table +simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms +simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2) +simplifyTerm term = return term + +simplifyAssign :: (Label, STerm) -> CnvMonad (Label, STerm) +simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term + +simplifyCase :: (STerm, STerm) -> CnvMonad (STerm, STerm) +simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term) + + +------------------------------------------------------------ +-- reducing simplified terms, collecting MCF rules + +reduceTerm :: SLinType -> SPath -> STerm -> CnvMonad () +--reduceTerm ctype path (Variants terms) +-- = member terms >>= reduceTerm ctype path +reduceTerm (StrT) path term = updateLin (path, term) +reduceTerm (ConT _) path term = do pat <- expandTerm term + updateHead (path, pat) +reduceTerm (RecT rtype) path term + = sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | (lbl, ctype) <- rtype ] +reduceTerm (TblT pats vtype) path table + = sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | pat <- pats ] + + +------------------------------------------------------------ +-- expanding a term to ground terms + +expandTerm :: STerm -> CnvMonad STerm +expandTerm arg@(Arg nr _ path) + = do ctypes <- readArgCTypes + unifyPType arg $ lintypeFollowPath path $ ctypes !! nr +-- expandTerm arg@(Arg nr _ path) +-- = do ctypes <- readArgCTypes +-- pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr +-- pat =?= arg +-- return pat +expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms +expandTerm (Rec record) = liftM Rec $ mapM expandAssign record +--expandTerm (Variants terms) = liftM Variants $ mapM expandTerm terms +expandTerm (Variants terms) = member terms >>= expandTerm +expandTerm term = error $ "expandTerm: " ++ prt term + +expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm) +expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term + +unifyPType :: STerm -> SLinType -> CnvMonad STerm +unifyPType arg (RecT prec) = + liftM Rec $ + sequence [ liftM ((,) lbl) $ + unifyPType (arg +. lbl) ptype | + (lbl, ptype) <- prec ] +unifyPType (Arg nr _ path) (ConT terms) = + do (_, args, _, _) <- readState + case lookup path (ecatConstraints (args !! nr)) of + Just term -> return term + Nothing -> do term <- member terms + updateArg nr (path, term) + return term + +------------------------------------------------------------ +-- unification of patterns and selection terms + +(=?=) :: STerm -> STerm -> CnvMonad () +-- Wildcard =?= _ = return () +-- Var x =?= term = addBinding x term +Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= Arg nr _ path = updateArg nr (path, pat) +(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +-- variants are not allowed in patterns, but in selection terms: +term =?= Variants terms = member terms >>= (term =?=) +pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term + +---------------------------------------------------------------------- +-- variable bindings (does not work correctly) +{- +addBinding x term = do (a, b, c, d, bindings) <- readState + writeState (a, b, c, d, (x,term):bindings) + +readBinding x = do (_, _, _, _, bindings) <- readState + return $ maybe (Var x) id $ lookup x bindings +-} + +------------------------------------------------------------ +-- updating the MCF rule + +readArgCTypes :: CnvMonad [SLinType] +readArgCTypes = do (_, _, _, env) <- readState + return env + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins, env) <- readState + args' <- updateNthM (addToECat cn) arg args + writeState (head, args', lins, env) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins, env) <- readState + head' <- addToECat cn head + writeState (head', args, lins, env) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins, env) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins', env) + +term2lins :: STerm -> [[Symbol (SCat, SPath, Int) Token]] +term2lins (Arg nr cat path) = return [Cat (cat, path, nr)] +term2lins (Token str) = return [Tok str] +term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (Empty) = return [] +term2lins (Variants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToECat :: Constraint -> ECat -> CnvMonad ECat +addToECat cn (ECat cat cns) = liftM (ECat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + + diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs new file mode 100644 index 000000000..a5519fcd8 --- /dev/null +++ b/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs @@ -0,0 +1,129 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, deterministic. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Strict + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Control.Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM +import GF.Data.SortedList + +---------------------------------------------------------------------- +-- main conversion function + +type CnvMonad a = BacktrackM () a + +convertGrammar :: SGrammar -> EGrammar +convertGrammar rules = tracePrt "SimpleToMCFG.Strict - MCFG rules" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SRule -> CnvMonad ERule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + args_ctypes = zip3 [0..] args ctypes + instArgs <- mapM enumerateArg args_ctypes + let instTerm = substitutePaths instArgs term + newCat <- extractECat cat ctype instTerm + newArgs <- mapM (extractArg instArgs) args_ctypes + let linRec = strPaths ctype instTerm >>= extractLin newArgs + let newLinRec = map (instantiateArgs newArgs) linRec + catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) +convertRule _ = failure + +---------------------------------------------------------------------- +-- category extraction + +extractArg :: [STerm] -> (Int, SCat, SLinType) -> CnvMonad ECat +extractArg args (nr, cat, ctype) = extractECat cat ctype (args !! nr) + +extractECat :: SCat -> SLinType -> STerm -> CnvMonad ECat +extractECat cat ctype term = member $ map (ECat cat) $ parPaths ctype term + +enumerateArg :: (Int, SCat, SLinType) -> CnvMonad STerm +enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype + +---------------------------------------------------------------------- +-- Substitute each instantiated parameter path for its instantiation + +substitutePaths :: [STerm] -> STerm -> STerm +substitutePaths arguments = subst + where subst (Arg nr _ path) = termFollowPath path (arguments !! nr) + subst (con :^ terms) = con :^ map subst terms + subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ] + subst (term :. lbl) = subst term +. lbl + subst (Tbl table) = Tbl [ (pat, subst term) | + (pat, term) <- table ] + subst (term :! select) = subst term +! subst select + subst (term :++ term') = subst term ?++ subst term' + subst (Variants terms) = Variants $ map subst terms + subst term = term + +---------------------------------------------------------------------- +-- term paths extaction + +termPaths :: SLinType -> STerm -> [(SPath, (SLinType, STerm))] +termPaths ctype (Variants terms) = terms >>= termPaths ctype +termPaths (RecT rtype) (Rec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let Just ctype = lookup lbl rtype, + (path, value) <- termPaths ctype term ] +termPaths (TblT _ ctype) (Tbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths ctype term ] +termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: SLinType -> STerm -> [[(SPath, STerm)]] +parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $ + nubsort [ (path, value) | + (path, (ConT _, value)) <- termPaths ctype term ] + +strPaths :: SLinType -> STerm -> [(SPath, STerm)] +strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ] + +---------------------------------------------------------------------- +-- linearization extraction + +extractLin :: [ECat] -> (SPath, STerm) -> [Lin ECat MLabel Token] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (Empty) = [[]] + convertLin (Token tok) = [[Tok tok]] + convertLin (Variants terms) = concatMap convertLin terms + convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]] + convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path) + diff --git a/src-3.0/GF/Conversion/TypeGraph.hs b/src-3.0/GF/Conversion/TypeGraph.hs new file mode 100644 index 000000000..62ee9726e --- /dev/null +++ b/src-3.0/GF/Conversion/TypeGraph.hs @@ -0,0 +1,58 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/16 10:21:21 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.2 $ +-- +-- Printing the type hierarchy of an abstract module in GraphViz format +----------------------------------------------------------------------------- + + +module GF.Conversion.TypeGraph (prtTypeGraph, prtFunctionGraph) where + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.Utilities +import GF.Conversion.Types + +import GF.Data.Operations ((++++), (+++++)) +import GF.Infra.Print + +---------------------------------------------------------------------- +-- | SimpleGFC to TypeGraph +-- +-- assumes that the profiles in the Simple GFC names are trivial + +prtTypeGraph :: SGrammar -> String +prtTypeGraph rules = "digraph TypeGraph {" ++++ + "concentrate=true;" ++++ + "node [shape=ellipse];" +++++ + unlines (map prtTypeGraphRule rules) +++++ + "}" + +prtTypeGraphRule :: SRule -> String +prtTypeGraphRule (Rule abs@(Abs cat cats (Name fun _prof)) _) + = "// " ++ prt abs ++++ + unlines [ prtSCat c ++ " -> " ++ prtSCat cat ++ ";" | c <- cats ] + +prtFunctionGraph :: SGrammar -> String +prtFunctionGraph rules = "digraph FunctionGraph {" ++++ + "node [shape=ellipse];" +++++ + unlines (map prtFunctionGraphRule rules) +++++ + "}" + +prtFunctionGraphRule :: SRule -> String +prtFunctionGraphRule (Rule abs@(Abs cat cats (Name fun _prof)) _) + = "// " ++ prt abs ++++ + pfun ++ " [label=\"" ++ prt fun ++ "\", shape=box, style=dashed];" ++++ + pfun ++ " -> " ++ prtSCat cat ++ ";" ++++ + unlines [ prtSCat c ++ " -> " ++ pfun ++ ";" | c <- cats ] + where pfun = "GF_FUNCTION_" ++ prt fun + +prtSCat decl = prt (decl2cat decl) + + diff --git a/src-3.0/GF/Conversion/Types.hs b/src-3.0/GF/Conversion/Types.hs new file mode 100644 index 000000000..97c2ace05 --- /dev/null +++ b/src-3.0/GF/Conversion/Types.hs @@ -0,0 +1,146 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/11 14:11:46 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.10 $ +-- +-- All possible instantiations of different grammar formats used in conversion from GFC +----------------------------------------------------------------------------- + + +module GF.Conversion.Types where + +---import GF.Conversion.FTypes + +import qualified GF.Infra.Ident as Ident (Ident(..), wildIdent, isWildIdent) +import qualified GF.Canon.AbsGFC as AbsGFC (CIdent(..), Label(..)) +import qualified GF.GFCC.CId +import qualified GF.Grammar.Grammar as Grammar (Term) + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.MCFG +import GF.Formalism.FCFG +import GF.Formalism.CFG +import GF.Formalism.Utilities +import GF.Infra.Print +import GF.Data.Assoc + +import Control.Monad (foldM) +import Data.Array + +---------------------------------------------------------------------- +-- * basic (leaf) types + +-- ** input tokens + +type Token = String + +-- ** function names + +type Fun = Ident.Ident +type Name = NameProfile Fun + + +---------------------------------------------------------------------- +-- * Simple GFC + +type SCat = Ident.Ident + +constr2fun :: Constr -> Fun +constr2fun (AbsGFC.CIQ _ fun) = fun + +-- ** grammar types + +type SGrammar = SimpleGrammar SCat Name Token +type SRule = SimpleRule SCat Name Token + +type SPath = Path SCat Token +type STerm = Term SCat Token +type SLinType = LinType SCat Token +type SDecl = Decl SCat + +---------------------------------------------------------------------- +-- * erasing MCFG + +type EGrammar = MCFGrammar ECat Name ELabel Token +type ERule = MCFRule ECat Name ELabel Token +data ECat = ECat SCat [Constraint] deriving (Eq, Ord, Show) +type ELabel = SPath + +type Constraint = (SPath, STerm) + +-- ** type coercions etc + +initialECat :: SCat -> ECat +initialECat cat = ECat cat [] + +ecat2scat :: ECat -> SCat +ecat2scat (ECat cat _) = cat + +ecatConstraints :: ECat -> [Constraint] +ecatConstraints (ECat _ cns) = cns + +sameECat :: ECat -> ECat -> Bool +sameECat ec1 ec2 = ecat2scat ec1 == ecat2scat ec2 + +coercionName :: Name +coercionName = Name Ident.wildIdent [Unify [0]] + +isCoercion :: Name -> Bool +isCoercion (Name fun [Unify [0]]) = Ident.isWildIdent fun +isCoercion _ = False + +---------------------------------------------------------------------- +-- * nonerasing MCFG + +type MGrammar = MCFGrammar MCat Name MLabel Token +type MRule = MCFRule MCat Name MLabel Token +data MCat = MCat ECat [ELabel] deriving (Eq, Ord, Show) +type MLabel = ELabel + +mcat2ecat :: MCat -> ECat +mcat2ecat (MCat cat _) = cat + +mcat2scat :: MCat -> SCat +mcat2scat = ecat2scat . mcat2ecat + +---------------------------------------------------------------------- +-- * fast nonerasing MCFG + +---- moved to FTypes by AR 20/9/2007 + + +---------------------------------------------------------------------- +-- * CFG + +type CGrammar = CFGrammar CCat Name Token +type CRule = CFRule CCat Name Token +data CCat = CCat ECat ELabel deriving (Eq, Ord, Show) + +ccat2ecat :: CCat -> ECat +ccat2ecat (CCat cat _) = cat + +ccat2scat :: CCat -> SCat +ccat2scat = ecat2scat . ccat2ecat + +---------------------------------------------------------------------- +-- * pretty-printing + +instance Print ECat where + prt (ECat cat constrs) = prt cat ++ "{" ++ + concat [ prt path ++ "=" ++ prt term ++ ";" | + (path, term) <- constrs ] ++ "}" + +instance Print MCat where + prt (MCat cat labels) = prt cat ++ prt labels + +instance Print CCat where + prt (CCat cat label) = prt cat ++ prt label + +---- instance Print FCat where ---- FCat + |
