summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Conversion
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Conversion
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Conversion')
-rw-r--r--src-3.0/GF/Conversion/GFC.hs157
-rw-r--r--src-3.0/GF/Conversion/GFCtoSimple.hs175
-rw-r--r--src-3.0/GF/Conversion/Haskell.hs71
-rw-r--r--src-3.0/GF/Conversion/MCFGtoCFG.hs53
-rw-r--r--src-3.0/GF/Conversion/MCFGtoFCFG.hs51
-rw-r--r--src-3.0/GF/Conversion/Prolog.hs205
-rw-r--r--src-3.0/GF/Conversion/RemoveEpsilon.hs46
-rw-r--r--src-3.0/GF/Conversion/RemoveErasing.hs113
-rw-r--r--src-3.0/GF/Conversion/RemoveSingletons.hs82
-rw-r--r--src-3.0/GF/Conversion/SimpleToFCFG.hs536
-rw-r--r--src-3.0/GF/Conversion/SimpleToFinite.hs178
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG.hs26
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs63
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs256
-rw-r--r--src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs129
-rw-r--r--src-3.0/GF/Conversion/TypeGraph.hs58
-rw-r--r--src-3.0/GF/Conversion/Types.hs146
17 files changed, 2345 insertions, 0 deletions
diff --git a/src-3.0/GF/Conversion/GFC.hs b/src-3.0/GF/Conversion/GFC.hs
new file mode 100644
index 000000000..354bdea65
--- /dev/null
+++ b/src-3.0/GF/Conversion/GFC.hs
@@ -0,0 +1,157 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/01 09:53:18 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.14 $
+--
+-- All conversions from GFC
+-----------------------------------------------------------------------------
+
+module GF.Conversion.GFC
+ (module GF.Conversion.GFC,
+ SGrammar, EGrammar, MGrammar, CGrammar) where
+
+import GF.Infra.Option
+import GF.Canon.GFC (CanonGrammar)
+import GF.Infra.Ident (Ident, identC)
+import qualified GF.Infra.Modules as M
+
+import GF.Formalism.GCFG (Rule(..), Abstract(..))
+import GF.Formalism.SimpleGFC (decl2cat)
+import GF.Formalism.CFG (CFRule(..))
+import GF.Formalism.Utilities (symbol, name2fun)
+import GF.Conversion.Types
+
+import qualified GF.Conversion.GFCtoSimple as G2S
+import qualified GF.Conversion.SimpleToFinite as S2Fin
+import qualified GF.Conversion.RemoveSingletons as RemSing
+import qualified GF.Conversion.RemoveErasing as RemEra
+import qualified GF.Conversion.RemoveEpsilon as RemEps
+import qualified GF.Conversion.SimpleToMCFG as S2M
+import qualified GF.Conversion.MCFGtoCFG as M2C
+
+import GF.Infra.Print
+
+import GF.System.Tracing
+
+----------------------------------------------------------------------
+-- * GFC -> MCFG & CFG, using options to decide which conversion is used
+
+convertGFC :: Options -> (CanonGrammar, Ident)
+ -> (SGrammar, (EGrammar, (MGrammar, CGrammar)))
+convertGFC opts = \g -> let s = g2s g
+ e = s2e s
+ m = e2m e
+ in trace2 "Options" (show opts) (s, (e, (m, e2c e)))
+ where e2c = M2C.convertGrammar
+ e2m = case getOptVal opts firstCat of
+ Just cat -> flip erasing [identC cat]
+ Nothing -> flip erasing []
+ s2e = case getOptVal opts gfcConversion of
+ Just "strict" -> strict
+ Just "finite-strict" -> strict
+ Just "epsilon" -> epsilon . nondet
+ _ -> nondet
+ g2s = case getOptVal opts gfcConversion of
+ Just "finite" -> finite . simple
+ Just "finite2" -> finite . finite . simple
+ Just "finite3" -> finite . finite . finite . simple
+ Just "singletons" -> single . simple
+ Just "finite-singletons" -> single . finite . simple
+ Just "finite-strict" -> finite . simple
+ _ -> simple
+
+ simple = G2S.convertGrammar
+ strict = S2M.convertGrammarStrict
+ nondet = S2M.convertGrammarNondet
+ epsilon = RemEps.convertGrammar
+ finite = S2Fin.convertGrammar
+ single = RemSing.convertGrammar
+ erasing = RemEra.convertGrammar
+
+gfc2simple :: Options -> (CanonGrammar, Ident) -> SGrammar
+gfc2simple opts = fst . convertGFC opts
+
+gfc2mcfg :: Options -> (CanonGrammar, Ident) -> MGrammar
+gfc2mcfg opts g = mcfg
+ where
+ (mcfg, _) = snd (snd (convertGFC opts g))
+
+gfc2cfg :: Options -> (CanonGrammar, Ident) -> CGrammar
+gfc2cfg opts g = cfg
+ where
+ (_, cfg) = snd (snd (convertGFC opts g))
+
+
+----------------------------------------------------------------------
+-- * single step conversions
+
+{-
+gfc2simple :: (CanonGrammar, Ident) -> SGrammar
+gfc2simple = G2S.convertGrammar
+
+simple2finite :: SGrammar -> SGrammar
+simple2finite = S2Fin.convertGrammar
+
+removeSingletons :: SGrammar -> SGrammar
+removeSingletons = RemSing.convertGrammar
+
+simple2mcfg_nondet :: SGrammar -> EGrammar
+simple2mcfg_nondet =
+
+simple2mcfg_strict :: SGrammar -> EGrammar
+simple2mcfg_strict = S2M.convertGrammarStrict
+
+mcfg2cfg :: EGrammar -> CGrammar
+mcfg2cfg = M2C.convertGrammar
+
+removeErasing :: EGrammar -> [SCat] -> MGrammar
+removeErasing = RemEra.convertGrammar
+
+removeEpsilon :: EGrammar -> EGrammar
+removeEpsilon = RemEps.convertGrammar
+-}
+
+----------------------------------------------------------------------
+-- * converting to some obscure formats
+
+gfc2abstract :: (CanonGrammar, Ident) -> [Abstract SCat Fun]
+gfc2abstract gr = [ Abs (decl2cat decl) (map decl2cat decls) (name2fun name) |
+ Rule (Abs decl decls name) _ <- G2S.convertGrammar gr ]
+
+abstract2skvatt :: [Abstract SCat Fun] -> String
+abstract2skvatt gr = skvatt_hdr ++ concatMap abs2pl gr
+ where abs2pl (Abs cat [] fun) = prtQuoted cat ++ " ---> " ++
+ "\"" ++ prt fun ++ "\".\n"
+ abs2pl (Abs cat cats fun) =
+ prtQuoted cat ++ " ---> " ++
+ "\"(" ++ prt fun ++ "\"" ++
+ prtBefore ", \" \", " (map prtQuoted cats) ++ ", \")\".\n"
+
+cfg2skvatt :: CGrammar -> String
+cfg2skvatt gr = skvatt_hdr ++ concatMap cfg2pl gr
+ where cfg2pl (CFRule cat syms _name) =
+ prtQuoted cat ++ " ---> " ++
+ if null syms then "\"\".\n" else
+ prtSep ", " (map (symbol prtQuoted prTok) syms) ++ ".\n"
+ prTok tok = "\"" ++ tok ++ " \""
+
+skvatt_hdr = ":- use_module(library(skvatt)).\n" ++
+ ":- use_module(library(utils), [repeat/1]).\n" ++
+ "corpus(File, StartCat, Depth, Size) :- \n" ++
+ " set_flag(gendepth, Depth),\n" ++
+ " tell(File), repeat(Size),\n" ++
+ " generate_words(StartCat, String), format('~s~n~n', [String]),\n" ++
+ " write(user_error, '.'),\n" ++
+ " fail ; told.\n\n"
+
+prtQuoted :: Print a => a -> String
+prtQuoted a = "'" ++ prt a ++ "'"
+
+
+
+
diff --git a/src-3.0/GF/Conversion/GFCtoSimple.hs b/src-3.0/GF/Conversion/GFCtoSimple.hs
new file mode 100644
index 000000000..b6a34a8ce
--- /dev/null
+++ b/src-3.0/GF/Conversion/GFCtoSimple.hs
@@ -0,0 +1,175 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/07 11:24:51 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.15 $
+--
+-- Converting GFC to SimpleGFC
+--
+-- the conversion might fail if the GFC grammar has dependent or higher-order types,
+-- or if the grammar contains bound pattern variables
+-- (use -optimize=values/share/none when importing)
+--
+-- TODO: lift all functions to the 'Err' monad
+-----------------------------------------------------------------------------
+
+module GF.Conversion.GFCtoSimple
+ (convertGrammar) where
+
+import qualified GF.Canon.AbsGFC as A
+import qualified GF.Infra.Ident as I
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+
+import GF.UseGrammar.Linear (expandLinTables)
+import GF.Canon.GFC (CanonGrammar)
+import GF.Canon.MkGFC (grammar2canon)
+import GF.Canon.Subexpressions (unSubelimCanon)
+import qualified GF.Canon.Look as Look (lookupLin, allParamValues, lookupLincat)
+import qualified GF.Canon.CMacros as CMacros (defLinType)
+import GF.Data.Operations (err, errVal)
+--import qualified Modules as M
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+
+type Env = (CanonGrammar, I.Ident)
+
+convertGrammar :: Env -> SGrammar
+convertGrammar (g,i) = trace2 "GFCtoSimple - concrete language" (prt (snd gram)) $
+ tracePrt "GFCtoSimple - simpleGFC rules" (prt . length) $
+ [ convertAbsFun gram fun typing |
+ A.Mod (A.MTAbs modname) _ _ _ defs <- modules,
+ A.AbsDFun fun typing _ <- defs ]
+ where A.Gr modules = grammar2canon (fst gram)
+ gram = (unSubelimCanon g,i)
+
+convertAbsFun :: Env -> I.Ident -> A.Exp -> SRule
+convertAbsFun gram fun typing = -- trace2 "GFCtoSimple - converting function" (prt fun) $
+ Rule abs cnc
+ where abs = convertAbstract [] fun typing
+ cnc = convertConcrete gram abs
+
+----------------------------------------------------------------------
+-- abstract definitions
+
+convertAbstract :: [SDecl] -> Fun -> A.Exp -> Abstract SDecl Name
+convertAbstract env fun (A.EProd x a b)
+ = convertAbstract (convertAbsType x' [] a : env) fun b
+ where x' = if x==I.identC "h_" then anyVar else x
+convertAbstract env fun a
+ = Abs (convertAbsType anyVar [] a) (reverse env) name
+ where name = Name fun [ Unify [n] | n <- [0 .. length env-1] ]
+
+convertAbsType :: Var -> [FOType SCat] -> A.Exp -> SDecl
+convertAbsType x args (A.EProd _ a b) = convertAbsType x (convertType [] a : args) b
+convertAbsType x args a = Decl x (reverse args ::--> convertType [] a)
+
+convertType :: [TTerm] -> A.Exp -> FOType SCat
+convertType args (A.EApp a b) = convertType (convertExp [] b : args) a
+convertType args (A.EAtom at) = convertCat at ::@ reverse args
+convertType args (A.EProd _ _ b) = convertType args b ---- AR 7/10 workaround
+convertType args exp = error $ "GFCtoSimple.convertType: " ++ prt exp
+
+{- Exp from GF/Canon/GFC.cf:
+EApp. Exp1 ::= Exp1 Exp2 ;
+EProd. Exp ::= "(" Ident ":" Exp ")" "->" Exp ;
+EAbs. Exp ::= "\\" Ident "->" Exp ;
+EAtom. Exp2 ::= Atom ;
+EData. Exp2 ::= "data" ;
+-}
+
+convertExp :: [TTerm] -> A.Exp -> TTerm
+convertExp args (A.EAtom at) = convertAtom args at
+convertExp args (A.EApp a b) = convertExp (convertExp [] b : args) a
+convertExp args exp = error $ "GFCtoSimple.convertExp: " ++ prt exp
+
+convertAtom :: [TTerm] -> A.Atom -> TTerm
+convertAtom args (A.AC con) = con :@ reverse args
+-- A.AD: is this correct???
+convertAtom args (A.AD con) = con :@ args
+convertAtom [] (A.AV var) = TVar var
+convertAtom args atom = error $ "GFCtoSimple.convertAtom: " ++ prt args ++ " " ++ show atom
+
+convertCat :: A.Atom -> SCat
+convertCat (A.AC (A.CIQ _ cat)) = cat
+convertCat atom = error $ "GFCtoSimple.convertCat: " ++ show atom
+
+----------------------------------------------------------------------
+-- concrete definitions
+
+convertConcrete :: Env -> Abstract SDecl Name -> Concrete SLinType (Maybe STerm)
+convertConcrete gram (Abs decl args name) = Cnc ltyp largs term
+ where term = fmap (convertTerm gram . expandTerm gram) $ lookupLin gram $ name2fun name
+ ltyp : largs = map (convertCType gram . lookupCType gram) (decl : args)
+
+expandTerm :: Env -> A.Term -> A.Term
+expandTerm gram term = -- tracePrt "expanded term" prt $
+ err error id $ expandLinTables (fst gram) $
+ -- tracePrt "initial term" prt $
+ term
+
+convertCType :: Env -> A.CType -> SLinType
+convertCType gram (A.RecType rec) = RecT [ (lbl, convertCType gram ctype) | A.Lbg lbl ctype <- rec ]
+convertCType gram (A.Table pt vt) = TblT (enumerateTerms Nothing (convertCType gram pt)) (convertCType gram vt)
+convertCType gram ct@(A.Cn con) = ConT $ map (convertTerm gram) $ groundTerms gram ct
+convertCType gram (A.TStr) = StrT
+convertCType gram (A.TInts n) = error "GFCtoSimple.convertCType: cannot handle 'TInts' constructor"
+
+convertTerm :: Env -> A.Term -> STerm
+convertTerm gram (A.Arg arg) = convertArgVar arg
+convertTerm gram (A.Par con terms) = con :^ map (convertTerm gram) terms
+-- convertTerm gram (A.LI var) = Var var
+convertTerm gram (A.R rec) = Rec [ (lbl, convertTerm gram term) | A.Ass lbl term <- rec ]
+convertTerm gram (A.P term lbl) = convertTerm gram term +. lbl
+convertTerm gram (A.V ctype terms) = Tbl [ (convertTerm gram pat, convertTerm gram term) |
+ (pat, term) <- zip (groundTerms gram ctype) terms ]
+convertTerm gram (A.T ctype tbl) = Tbl [ (convertPatt pat, convertTerm gram term) |
+ A.Cas pats term <- tbl, pat <- pats ]
+convertTerm gram (A.S term sel) = convertTerm gram term :! convertTerm gram sel
+convertTerm gram (A.C term1 term2) = convertTerm gram term1 ?++ convertTerm gram term2
+convertTerm gram (A.FV terms) = variants (map (convertTerm gram) terms)
+convertTerm gram (A.E) = Empty
+convertTerm gram (A.K (A.KS tok)) = Token tok
+-- 'pre' tokens are converted to variants (over-generating):
+convertTerm gram (A.K (A.KP strs vars))
+ = variants $ map conc $ strs : [ vs | A.Var vs _ <- vars ]
+ where conc [] = Empty
+ conc ts = foldr1 (?++) $ map Token ts
+convertTerm gram (A.I con) = error "GFCtoSimple.convertTerm: cannot handle 'I' constructor"
+convertTerm gram (A.EInt int) = error "GFCtoSimple.convertTerm: cannot handle 'EInt' constructor"
+
+convertArgVar :: A.ArgVar -> STerm
+convertArgVar (A.A cat nr) = Arg (fromInteger nr) cat emptyPath
+convertArgVar (A.AB cat bindings nr) = Arg (fromInteger nr) cat emptyPath
+
+convertPatt (A.PC con pats) = con :^ map convertPatt pats
+-- convertPatt (A.PV x) = Var x
+-- convertPatt (A.PW) = Wildcard
+convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ]
+convertPatt (A.PI n) = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor"
+convertPatt p = error $ "GFCtoSimple.convertPatt: cannot handle " ++ show p
+
+----------------------------------------------------------------------
+
+lookupLin :: Env -> Fun -> Maybe A.Term
+lookupLin gram fun = err fail Just $
+ Look.lookupLin (fst gram) (A.CIQ (snd gram) fun)
+
+lookupCType :: Env -> SDecl -> A.CType
+lookupCType env decl
+ = errVal CMacros.defLinType $
+ Look.lookupLincat (fst env) (A.CIQ (snd env) (decl2cat decl))
+
+groundTerms :: Env -> A.CType -> [A.Term]
+groundTerms gram ctype = err error id $
+ Look.allParamValues (fst gram) ctype
+
diff --git a/src-3.0/GF/Conversion/Haskell.hs b/src-3.0/GF/Conversion/Haskell.hs
new file mode 100644
index 000000000..abe651e1e
--- /dev/null
+++ b/src-3.0/GF/Conversion/Haskell.hs
@@ -0,0 +1,71 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/11 14:11:46 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Converting/Printing different grammar formalisms in Haskell-readable format
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.Haskell where
+
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Formalism.MCFG
+import GF.Formalism.CFG
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+import GF.Data.Operations ((++++), (+++++))
+import GF.Infra.Print
+
+import Data.List (intersperse)
+
+-- | SimpleGFC to Haskell
+prtSGrammar :: SGrammar -> String
+prtSGrammar rules = "-- Simple GFC grammar as a Haskell file" ++++
+ "-- Autogenerated from the Grammatical Framework" +++++
+ "import GF.Formalism.GCFG" ++++
+ "import GF.Formalism.SimpleGFC" ++++
+ "import GF.Formalism.Utilities" ++++
+ "import GF.Canon.AbsGFC (CIdent(..), Label(..))" ++++
+ "import GF.Infra.Ident (Ident(..))" +++++
+ "grammar :: SimpleGrammar Ident (NameProfile Ident) String" ++++
+ "grammar = \n\t[ " ++
+ concat (intersperse "\n\t, " (map show rules)) ++ "\n\t]\n\n"
+
+-- | MCFG to Haskell
+prtMGrammar :: MGrammar -> String
+prtMGrammar rules = "-- Multiple context-free grammar as a Haskell file" ++++
+ "-- Autogenerated from the Grammatical Framework" +++++
+ "import GF.Formalism.GCFG" ++++
+ "import GF.Formalism.MCFG" ++++
+ "import GF.Formalism.Utilities" +++++
+ "grammar :: MCFGrammar String (NameProfile String) String String" ++++
+ "grammar = \n\t[ " ++
+ concat (intersperse "\n\t, " (map prtMRule rules)) ++ "\n\t]\n\n"
+ where prtMRule (Rule (Abs cat cats (Name fun profiles)) (Cnc lcat lcats lins))
+ = show (Rule (Abs (prt cat) (map prt cats) (Name (prt fun) (map cnvProfile profiles)))
+ (Cnc (map prt lcat) (map (map prt) lcats) (map cnvLin lins)))
+ cnvLin (Lin lbl syms) = Lin (prt lbl) (map (mapSymbol prtMArg id) syms)
+ prtMArg (cat, lbl, nr) = (prt cat, prt lbl, nr)
+
+-- | CFG to Haskell
+prtCGrammar :: CGrammar -> String
+prtCGrammar rules = "-- Context-free grammar as a Haskell file" ++++
+ "-- autogenerated from the Grammatical Framework" +++++
+ "import GF.Formalism.CFG" ++++
+ "import GF.Formalism.Utilities" ++++
+ "\ngrammar :: CFGrammar String (NameProfile String) String" ++++
+ "grammar = \n\t[ " ++
+ concat (intersperse "\n\t, " (map prtCRule rules)) ++ "\n\t]\n\n"
+ where prtCRule (CFRule cat syms (Name fun profiles))
+ = show (CFRule (prt cat) (map (mapSymbol prt id) syms)
+ (Name (prt fun) (map cnvProfile profiles)))
+
+cnvProfile (Unify args) = Unify args
+cnvProfile (Constant forest) = Constant (fmap prt forest)
diff --git a/src-3.0/GF/Conversion/MCFGtoCFG.hs b/src-3.0/GF/Conversion/MCFGtoCFG.hs
new file mode 100644
index 000000000..a58c31d37
--- /dev/null
+++ b/src-3.0/GF/Conversion/MCFGtoCFG.hs
@@ -0,0 +1,53 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:43 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.6 $
+--
+-- Converting MCFG grammars to (possibly overgenerating) CFG
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.MCFGtoCFG
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.CFG
+import GF.Conversion.Types
+
+----------------------------------------------------------------------
+-- * converting (possibly erasing) MCFG grammars
+
+convertGrammar :: EGrammar -> CGrammar
+convertGrammar gram = tracePrt "MCFGtoCFG - context-free rules" (prt.length) $
+ concatMap convertRule gram
+
+convertRule :: ERule -> [CRule]
+convertRule (Rule (Abs cat args (Name fun mprofile)) (Cnc _ _ record))
+ = [ CFRule (CCat cat lbl) rhs (Name fun profile) |
+ Lin lbl lin <- record,
+ let rhs = map (mapSymbol convertArg id) lin,
+ let cprofile = map (Unify . argPlaces lin) [0 .. length args-1],
+ let profile = mprofile `composeProfiles` cprofile
+ ]
+
+convertArg :: (ECat, ELabel, Int) -> CCat
+convertArg (cat, lbl, _) = CCat cat lbl
+
+argPlaces :: [Symbol (cat, lbl, Int) tok] -> Int -> [Int]
+argPlaces lin nr = [ place | (nr', place) <- zip linArgs [0..], nr == nr' ]
+ where linArgs = [ nr' | (_, _, nr') <- filterCats lin ]
+
+
+
+
diff --git a/src-3.0/GF/Conversion/MCFGtoFCFG.hs b/src-3.0/GF/Conversion/MCFGtoFCFG.hs
new file mode 100644
index 000000000..70aa4644d
--- /dev/null
+++ b/src-3.0/GF/Conversion/MCFGtoFCFG.hs
@@ -0,0 +1,51 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:43 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.6 $
+--
+-- Converting MCFG grammars to equivalent optimized FCFG
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.MCFGtoFCFG
+ (convertGrammar) where
+
+import Control.Monad
+import List (elemIndex)
+import Array
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.FCFG
+import GF.Conversion.Types
+import GF.Data.SortedList (nubsort)
+
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- * converting MCFG to optimized FCFG
+
+convertGrammar :: MGrammar -> FGrammar
+convertGrammar gram = [ FRule (Abs (fcat cat) (map fcat cats) name) (fcnc cnc) |
+ Rule (Abs cat cats name) cnc <- gram ]
+ where mcats = nubsort [ mc | Rule (Abs mcat mcats _) _ <- gram, mc <- mcat:mcats ]
+
+ fcat mcat@(MCat (ECat scat ecns) mlbls)
+ = case elemIndex mcat mcats of
+ Just catid -> FCat catid scat mlbls ecns
+ Nothing -> error ("MCFGtoFCFG.fcat " ++ prt mcat)
+
+ fcnc (Cnc _ arglbls lins) = listArray (0, length lins-1) (map flin lins)
+ where flin (Lin _ syms) = listArray (0, length syms-1) (map fsym syms)
+ fsym (Tok tok) = FSymTok tok
+ fsym (Cat (cat,lbl,arg)) = FSymCat (fcat cat) (flbl arg lbl) arg
+ flbl arg lbl = case elemIndex lbl (arglbls !! arg) of
+ Just lblid -> lblid
+ Nothing -> error ("MCFGtoFCFG.flbl " ++ prt arg ++ " " ++ prt lbl)
+
diff --git a/src-3.0/GF/Conversion/Prolog.hs b/src-3.0/GF/Conversion/Prolog.hs
new file mode 100644
index 000000000..b930cb476
--- /dev/null
+++ b/src-3.0/GF/Conversion/Prolog.hs
@@ -0,0 +1,205 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/14 09:51:18 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.4 $
+--
+-- Converting/Printing different grammar formalisms in Prolog-readable format
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.Prolog (prtSGrammar, prtSMulti, prtSHeader, prtSRule,
+ prtMGrammar, prtMMulti, prtMHeader, prtMRule,
+ prtCGrammar, prtCMulti, prtCHeader, prtCRule) where
+
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Formalism.MCFG
+import GF.Formalism.CFG
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+import qualified GF.Conversion.GFC as Cnv
+
+import GF.Data.Operations ((++++), (+++++))
+import GF.Infra.Print
+import qualified GF.Infra.Modules as Mod
+import qualified GF.Infra.Option as Option
+import GF.Data.Operations (okError)
+import GF.Canon.AbsGFC (Flag(..))
+import GF.Canon.GFC (CanonGrammar)
+import GF.Infra.Ident (Ident(..))
+
+import Data.Maybe (maybeToList, listToMaybe)
+import Data.Char (isLower, isAlphaNum)
+
+import GF.System.Tracing
+
+----------------------------------------------------------------------
+-- | printing multiple languages at the same time
+
+prtSMulti, prtMMulti, prtCMulti :: Option.Options -> CanonGrammar -> String
+prtSMulti = prtMulti prtSHeader prtSRule Cnv.gfc2simple "gfc_"
+prtMMulti = prtMulti prtMHeader prtMRule Cnv.gfc2mcfg "mcfg_"
+prtCMulti = prtMulti prtCHeader prtCRule Cnv.gfc2cfg "cfg_"
+
+-- code and ideas stolen from GF.CFGM.PrintCFGrammar
+
+prtMulti prtHeader prtRule conversion prefix opts gr
+ = prtHeader ++++ unlines
+ [ "\n\n" ++ prtLine ++++
+ "%% Language module: " ++ prtQ langmod +++++
+ unlines (map (prtRule langmod) rules) |
+ lang <- maybe [] (Mod.allConcretes gr) (Mod.greatestAbstract gr),
+ let Mod.ModMod (Mod.Module{Mod.flags=fs}) = okError (Mod.lookupModule gr lang),
+ let cnvopts = Option.Opts $ map Option.gfcConversion $ getFlag fs "conversion",
+ let rules = conversion cnvopts (gr, lang),
+ let langmod = (let IC lg = lang in prefix ++ lg) ]
+
+getFlag :: [Flag] -> String -> [String]
+getFlag fs x = [v | Flg (IC k) (IC v) <- fs, k == x]
+
+----------------------------------------------------------------------
+-- | SimpleGFC to Prolog
+--
+-- assumes that the profiles in the Simple GFC names are trivial
+prtSGrammar :: SGrammar -> String
+prtSGrammar rules = prtSHeader +++++ unlines (map (prtSRule "") rules)
+
+prtSHeader :: String
+prtSHeader = prtLine ++++
+ "%% Simple GFC grammar in Prolog-readable format" ++++
+ "%% Autogenerated from the Grammatical Framework" +++++
+ "%% The following predicate is defined:" ++++
+ "%% \t rule(Fun, Cat, c(Cat,...), LinTerm)"
+
+prtSRule :: String -> SRule -> String
+prtSRule lang (Rule (Abs cat cats (Name fun _prof)) (Cnc _ _ mterm))
+ = (if null lang then "" else prtQ lang ++ " : ") ++
+ prtFunctor "rule" [plfun, plcat, plcats, plcnc] ++ "."
+ where plfun = prtQ fun
+ plcat = prtSDecl cat
+ plcats = prtFunctor "c" (map prtSDecl cats)
+ plcnc = "\n\t" ++ prtSTerm (maybe Empty id mterm)
+
+prtSTerm (Arg n c p) = prtFunctor "arg" [prtQ c, prt (n+1), prtSPath p]
+-- prtSTerm (c :^ []) = prtQ c
+prtSTerm (c :^ ts) = prtOper "^" (prtQ c) (prtPList (map prtSTerm ts))
+prtSTerm (Rec rec) = prtFunctor "rec" [prtPList [ prtOper "=" (prtQ l) (prtSTerm t) | (l, t) <- rec ]]
+prtSTerm (Tbl tbl) = prtFunctor "tbl" [prtPList [ prtOper "=" (prtSTerm p) (prtSTerm t) | (p, t) <- tbl ]]
+prtSTerm (Variants ts) = prtFunctor "variants" [prtPList (map prtSTerm ts)]
+prtSTerm (t1 :++ t2) = prtOper "+" (prtSTerm t1) (prtSTerm t2)
+prtSTerm (Token t) = prtFunctor "tok" [prtQ t]
+prtSTerm (Empty) = "empty"
+prtSTerm (term :. lbl) = prtOper "*" (prtSTerm term) (prtQ lbl)
+prtSTerm (term :! sel) = prtOper "/" (prtSTerm term) (prtSTerm sel)
+-- prtSTerm (Wildcard) = "wildcard"
+-- prtSTerm (Var var) = prtFunctor "var" [prtQ var]
+
+prtSPath (Path path) = prtPList (map (either prtQ prtSTerm) path)
+
+prtSDecl (Decl var typ) | var == anyVar = prtSAbsType typ
+ | otherwise = "_" ++ prtVar var ++ ":" ++ prtSAbsType typ
+
+
+prtSAbsType ([] ::--> typ) = prtSFOType typ
+prtSAbsType (args ::--> typ) = prtOper ":->" (prtPList (map prtSFOType args)) (prtSFOType typ)
+
+prtSFOType (cat ::@ args) = prtFunctor (prtQ cat) (map prtSTTerm args)
+
+prtSTTerm (con :@ args) = prtFunctor (prtQ con) (map prtSTTerm args)
+prtSTTerm (TVar var) = "_" ++ prtVar var
+
+----------------------------------------------------------------------
+-- | MCFG to Prolog
+prtMGrammar :: MGrammar -> String
+prtMGrammar rules = prtMHeader +++++ unlines (map (prtMRule "") rules)
+
+prtMHeader :: String
+prtMHeader = prtLine ++++
+ "%% Multiple context-free grammar in Prolog-readable format" ++++
+ "%% Autogenerated from the Grammatical Framework" +++++
+ "%% The following predicate is defined:" ++++
+ "%% \t rule(Profile, Cat, c(Cat,...), [Lbl=Symbols,...])"
+
+prtMRule :: String -> MRule -> String
+prtMRule lang (Rule (Abs cat cats name) (Cnc _lcat _lcats lins))
+ = (if null lang then "" else prtQ lang ++ " : ") ++
+ prtFunctor "rule" [plname, plcat, plcats, pllins] ++ "."
+ where plname = prtName name
+ plcat = prtQ cat
+ plcats = prtFunctor "c" (map prtQ cats)
+ pllins = "\n\t[ " ++ prtSep "\n\t, " (map prtMLin lins) ++ " ]"
+
+prtMLin (Lin lbl lin) = prtOper "=" (prtQ lbl) (prtPList (map prtMSymbol lin))
+
+prtMSymbol (Cat (cat, lbl, nr)) = prtFunctor "arg" [prtQ cat, show (nr+1), prtQ lbl]
+prtMSymbol (Tok tok) = prtFunctor "tok" [prtQ tok]
+
+----------------------------------------------------------------------
+-- | CFG to Prolog
+prtCGrammar :: CGrammar -> String
+prtCGrammar rules = prtCHeader +++++ unlines (map (prtCRule "") rules)
+
+prtCHeader :: String
+prtCHeader = prtLine ++++
+ "%% Context-free grammar in Prolog-readable format" ++++
+ "%% Autogenerated from the Grammatical Framework" +++++
+ "%% The following predicate is defined:" ++++
+ "%% \t rule(Profile, Cat, [Symbol,...])"
+
+prtCRule :: String -> CRule -> String
+prtCRule lang (CFRule cat syms name)
+ = (if null lang then "" else prtQ lang ++ " : ") ++
+ prtFunctor "cfgrule" [plname, plcat, plsyms] ++ "."
+ where plname = prtName name
+ plcat = prtQ cat
+ plsyms = prtPList (map prtCSymbol syms)
+
+prtCSymbol (Cat cat) = prtFunctor "cat" [prtQ cat]
+prtCSymbol (Tok tok) = prtFunctor "tok" [prtQ tok]
+
+----------------------------------------------------------------------
+-- profiles, quoted strings and more
+
+prtFunctor f xs = f ++ if null xs then "" else "(" ++ prtSep ", " xs ++ ")"
+prtPList xs = "[" ++ prtSep ", " xs ++ "]"
+prtOper f x y = "(" ++ x ++ " " ++ f ++ " " ++ y ++ ")"
+
+prtName name@(Name fun profiles)
+ | name == coercionName = "1"
+ | and (zipWith (==) profiles (map (Unify . return) [0..])) = prtQ fun
+ | otherwise = prtFunctor (prtQ fun) (map prtProfile profiles)
+
+prtProfile (Unify []) = " ? "
+prtProfile (Unify args) = foldr1 (prtOper "=") (map (show . succ) args)
+prtProfile (Constant forest) = prtForest forest
+
+prtForest (FMeta) = " ? "
+prtForest (FNode fun [fs]) = prtFunctor (prtQ fun) (map prtForest fs)
+prtForest (FNode fun fss) = prtPList [ prtFunctor (prtQ fun) (map prtForest fs) |
+ fs <- fss ]
+
+prtQ atom = prtQStr (prt atom)
+
+prtQStr atom@(x:xs)
+ | isLower x && all isAlphaNumUnder xs = atom
+ where isAlphaNumUnder '_' = True
+ isAlphaNumUnder x = isAlphaNum x
+prtQStr atom = "'" ++ concatMap esc (prt atom) ++ "'"
+ where esc '\'' = "\\'"
+ esc '\n' = "\\n"
+ esc '\t' = "\\t"
+ esc c = [c]
+
+prtVar var = reprime (prt var)
+ where reprime "" = ""
+ reprime ('\'' : cs) = "_0" ++ reprime cs
+ reprime (c:cs) = c : reprime cs
+
+prtLine = replicate 70 '%'
+
+
diff --git a/src-3.0/GF/Conversion/RemoveEpsilon.hs b/src-3.0/GF/Conversion/RemoveEpsilon.hs
new file mode 100644
index 000000000..0e5dafb38
--- /dev/null
+++ b/src-3.0/GF/Conversion/RemoveEpsilon.hs
@@ -0,0 +1,46 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/30 08:11:32 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.3 $
+--
+-- Removing epsilon linearizations from MCF grammars
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.RemoveEpsilon where
+-- (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+import Data.List (mapAccumL)
+import Data.Maybe (mapMaybe)
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Conversion.Types
+import GF.Data.Assoc
+import GF.Data.SortedList
+import GF.Data.GeneralDeduction
+
+convertGrammar :: EGrammar -> EGrammar
+convertGrammar grammar = trace2 "RemoveEpsilon: initialEmpties" (prt initialEmpties) $
+ trace2 "RemoveEpsilon: emptyCats" (prt emptyCats) $
+ grammar
+ where initialEmpties = nubsort [ (cat, lbl) |
+ Rule (Abs cat _ _) (Cnc _ _ lins) <- grammar,
+ Lin lbl [] <- lins ]
+ emptyCats = limitEmpties initialEmpties
+ limitEmpties es = if es==es' then es else limitEmpties es'
+ where es' = nubsort [ (cat, lbl) | Rule (Abs cat _ _) (Cnc _ _ lins) <- grammar,
+ Lin lbl rhs <- lins,
+ all (symbol (\(c,l,n) -> (c,l) `elem` es) (const False)) rhs ]
+
+
+
diff --git a/src-3.0/GF/Conversion/RemoveErasing.hs b/src-3.0/GF/Conversion/RemoveErasing.hs
new file mode 100644
index 000000000..1dc2560fc
--- /dev/null
+++ b/src-3.0/GF/Conversion/RemoveErasing.hs
@@ -0,0 +1,113 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:44 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.3 $
+--
+-- Removing erasingness from MCFG grammars (as in Ljunglöf 2004, sec 4.5.1)
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.RemoveErasing
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+import Data.List (mapAccumL)
+import Data.Maybe (mapMaybe)
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Conversion.Types
+import GF.Data.Assoc
+import GF.Data.SortedList
+import GF.Data.GeneralDeduction
+
+convertGrammar :: EGrammar -> [SCat] -> MGrammar
+convertGrammar grammar starts = newGrammar
+ where newGrammar = tracePrt "RemoveErasing - nonerasing rules" (prt . length) $
+ [ rule | NR rule <- chartLookup finalChart True ]
+ finalChart = tracePrt "RemoveErasing - nonerasing cats"
+ (prt . length . flip chartLookup False) $
+ buildChart keyof [newRules rulesByCat] $
+ tracePrt "RemoveErasing - initial ne-cats" (prt . length) $
+ initialCats
+ initialCats = trace2 "RemoveErasing - starting categories" (prt starts) $
+ if null starts
+ then trace2 "RemoveErasing" "initialCatsBU" $
+ initialCatsBU rulesByCat
+ else trace2 "RemoveErasing" ("initialCatsTD: " ++ prt starts) $
+ initialCatsTD rulesByCat starts
+ rulesByCat = trace2 "RemoveErasing - erasing rules" (prt $ length grammar) $
+ accumAssoc id [ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- grammar ]
+
+data Item r c = NR r | NC c deriving (Eq, Ord, Show)
+
+keyof (NR _) = True
+keyof (NC _) = False
+
+newRules grammar chart (NR (Rule (Abs _ cats _) _))
+ = [ NC cat | cat@(MCat _ lbls) <- cats, not (null lbls) ]
+newRules grammar chart (NC newCat@(MCat cat lbls))
+ = do Rule (Abs _ args (Name fun profile)) (Cnc _ _ lins0) <- grammar ? cat
+
+ lins <- selectLins lins0 lbls
+ -- let lins = [ lin | lin@(Lin lbl _) <- lins0,
+ -- lbl `elem` lbls ]
+
+ let argsInLin = listAssoc $
+ map (\((n,c),l) -> (n, MCat c l)) $
+ groupPairs $ nubsort $
+ [ ((nr, cat), lbl) |
+ Lin _ lin <- lins,
+ Cat (cat, lbl, nr) <- lin ]
+
+ newArgs = mapMaybe (lookupAssoc argsInLin) [0 .. length args-1]
+ argLbls = [ lbls | MCat _ lbls <- newArgs ]
+
+ newLins = [ Lin lbl newLin | Lin lbl lin <- lins,
+ let newLin = map (mapSymbol cnvCat id) lin ]
+ cnvCat (cat, lbl, nr) = (mcat, lbl, nr')
+ where Just mcat = lookupAssoc argsInLin nr
+ Unify [nr'] = newProfile !! nr
+ nonEmptyCat (Cat (MCat _ [], _, _)) = False
+ nonEmptyCat _ = True
+
+ newProfile = snd $ mapAccumL accumProf 0 $
+ map (lookupAssoc argsInLin) [0 .. length args-1]
+ accumProf nr = maybe (nr, Unify []) $ const (nr+1, Unify [nr])
+ newName = -- tracePrt "newName" (prtNewName profile newProfile) $
+ Name fun (profile `composeProfiles` newProfile)
+
+ guard $ all (not . null) argLbls
+ return $ NR (Rule (Abs newCat newArgs newName) (Cnc lbls argLbls newLins))
+
+selectLins lins0 = mapM selectLbl
+ where selectLbl lbl = [ lin | lin@(Lin lbl' _) <- lins0, lbl == lbl' ]
+
+
+prtNewName :: [Profile (SyntaxForest Fun)] -> [Profile (SyntaxForest Fun)] -> Name -> String
+prtNewName p p' n = prt p ++ " .o. " ++ prt p' ++ " : " ++ prt n
+
+
+initialCatsTD grammar starts =
+ [ cat | cat@(NC (MCat (ECat start _) _)) <- initialCatsBU grammar,
+ start `elem` starts ]
+
+initialCatsBU grammar
+ = [ NC (MCat cat [lbl]) | (cat, rules) <- aAssocs grammar,
+ let Rule _ (Cnc lbls _ _) = head rules,
+ lbl <- lbls ]
+
+
+
+
+
+
+
diff --git a/src-3.0/GF/Conversion/RemoveSingletons.hs b/src-3.0/GF/Conversion/RemoveSingletons.hs
new file mode 100644
index 000000000..4b9992a4d
--- /dev/null
+++ b/src-3.0/GF/Conversion/RemoveSingletons.hs
@@ -0,0 +1,82 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/11 10:28:16 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Instantiating all types which only have one single element.
+--
+-- Should be merged into 'GF.Conversion.FiniteToSimple'
+-----------------------------------------------------------------------------
+
+module GF.Conversion.RemoveSingletons where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Conversion.Types
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+
+import Data.List (mapAccumL)
+
+convertGrammar :: SGrammar -> SGrammar
+convertGrammar grammar = if singles == emptyAssoc then grammar
+ else tracePrt "RemoveSingletons - non-singleton rules" (prt . length) $
+ map (convertRule singles) grammar
+ where singles = calcSingletons grammar
+
+convertRule :: Assoc SCat (SyntaxForest Fun, Maybe STerm) -> SRule -> SRule
+convertRule singles rule@(Rule (Abs _ decls _) _)
+ = if all (Nothing ==) singleArgs then rule
+ else instantiateSingles singleArgs rule
+ where singleArgs = map (lookupAssoc singles . decl2cat) decls
+
+instantiateSingles :: [Maybe (SyntaxForest Fun, Maybe STerm)] -> SRule -> SRule
+instantiateSingles singleArgs (Rule (Abs decl decls (Name fun profile)) (Cnc lcat lcats lterm))
+ = Rule (Abs decl decls' (Name fun profile')) (Cnc lcat lcats' lterm')
+ where (decls', lcats') = unzip [ (d, l) | (Nothing, d, l) <- zip3 singleArgs decls lcats ]
+ profile' = map (fmap fst) exProfile `composeProfiles` profile
+ newArgs = map (fmap snd) exProfile
+ lterm' = fmap (instantiateLin newArgs) lterm
+ exProfile = snd $ mapAccumL mkProfile 0 singleArgs
+ mkProfile nr (Just trm) = (nr, Constant trm)
+ mkProfile nr (Nothing) = (nr+1, Unify [nr])
+
+instantiateLin :: [Profile (Maybe STerm)] -> STerm -> STerm
+instantiateLin newArgs = inst
+ where inst (Arg nr cat path)
+ = case newArgs !! nr of
+ Unify [nr'] -> Arg nr' cat path
+ Constant (Just term) -> termFollowPath path term
+ Constant Nothing -> error "RemoveSingletons.instantiateLin: This should not happen (argument has no linearization)"
+ inst (cn :^ terms) = cn :^ map inst terms
+ inst (Rec rec) = Rec [ (lbl, inst term) | (lbl, term) <- rec ]
+ inst (term :. lbl) = inst term +. lbl
+ inst (Tbl tbl) = Tbl [ (pat, inst term) | (pat, term) <- tbl ]
+ inst (term :! sel) = inst term +! inst sel
+ inst (Variants ts) = variants (map inst ts)
+ inst (t1 :++ t2) = inst t1 ?++ inst t2
+ inst term = term
+
+----------------------------------------------------------------------
+
+calcSingletons :: SGrammar -> Assoc SCat (SyntaxForest Fun, Maybe STerm)
+calcSingletons rules = listAssoc singleCats
+ where singleCats = tracePrt "RemoveSingletons - singleton cats" (prtSep " ") $
+ [ (cat, (constantNameToForest name, lin)) |
+ (cat, [([], name, lin)]) <- rulesByCat ]
+ rulesByCat = groupPairs $ nubsort
+ [ (decl2cat cat, (args, name, lin)) |
+ Rule (Abs cat args name) (Cnc _ _ lin) <- rules ]
+
+
+
diff --git a/src-3.0/GF/Conversion/SimpleToFCFG.hs b/src-3.0/GF/Conversion/SimpleToFCFG.hs
new file mode 100644
index 000000000..4ff5781f9
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToFCFG.hs
@@ -0,0 +1,536 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar.
+--
+-- the resulting grammars might be /very large/
+--
+-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToFCFG
+ (convertConcrete) where
+
+import GF.Infra.PrintClass
+
+import Control.Monad
+
+import GF.Formalism.Utilities
+import GF.Formalism.FCFG
+
+import GF.GFCC.Macros --hiding (prt)
+import GF.GFCC.DataGFCC
+import GF.GFCC.CId
+
+import GF.Data.BacktrackM
+import GF.Data.SortedList
+import GF.Data.Utilities (updateNthM, sortNub)
+
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.List as List
+import Data.Array
+import Data.Maybe
+
+----------------------------------------------------------------------
+-- main conversion function
+
+convertConcrete :: Abstr -> Concr -> FGrammar
+convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats'
+ where abs_defs = Map.assocs (funs abs)
+ conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
+ cats = lincats cnc
+ (abs_defs',conc',cats') = expandHOAS abs_defs conc cats
+
+expandHOAS :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> ([(CId,(Type,Exp))],TermMap,TermMap)
+expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns,
+ Map.unions [lins, hoLins, varLins],
+ Map.unions [lincats, hoLincats, varLincat])
+ where
+ -- replace higher-order fun argument types with new categories
+ funs' = [(f,(fixType ty,e)) | (f,(ty,e)) <- funs]
+ where
+ fixType :: Type -> Type
+ fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt
+
+ hoTypes :: [(Int,CId)]
+ hoTypes = sortNub [(n,c) | (_,(ty,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0]
+ hoCats = sortNub (map snd hoTypes)
+ -- for each Cat with N bindings, we add a new category _NCat
+ -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat
+ hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),EEq [])) | ty@(n,c) <- hoTypes]
+ -- lincats for the new categories
+ hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes]
+ -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ...
+ hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes]
+ where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c)
+ -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat
+ varFuns = [(varFunName cat, (cftype [varCat] cat,EEq [])) | cat <- hoCats]
+ -- linearizations of the _Var_Cat functions
+ varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats]
+ -- lincat for the _Var category
+ varLincat = Map.singleton varCat (R [S []])
+
+ lincatOf c = fromMaybe (error $ "No lincat for " ++ prt c) $ Map.lookup c lincats
+
+ modifyRec :: ([Term] -> [Term]) -> Term -> Term
+ modifyRec f (R xs) = R (f xs)
+ modifyRec _ t = error $ "Not a record: " ++ show t
+
+ varCat = CId "_Var"
+
+ catName :: (Int,CId) -> CId
+ catName (0,c) = c
+ catName (n,CId c) = CId ("_" ++ show n ++ c)
+
+ funName :: (Int,CId) -> CId
+ funName (n,CId c) = CId ("__" ++ show n ++ c)
+
+ varFunName :: CId -> CId
+ varFunName (CId c) = CId ("_Var_" ++ c)
+
+-- replaces __NCat with _B and _Var_Cat with _.
+-- the temporary names are just there to avoid name collisions.
+fixHoasFuns :: FGrammar -> FGrammar
+fixHoasFuns (rs, cs) = ([FRule (fixName n) args cat lins | FRule n args cat lins <- rs], cs)
+ where fixName (Name (CId ('_':'_':_)) p) = Name (CId "_B") p
+ fixName (Name (CId n) p) | "_Var_" `List.isPrefixOf` n = Name wildCId p
+ fixName n = n
+
+convert :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> FGrammar
+convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
+ where
+ srules = [
+ (XRule id args res (map findLinType args) (findLinType res) term) |
+ (id, (ty,_)) <- abs_defs, let (args,res) = catSkeleton ty,
+ term <- Map.lookup id cnc_defs]
+
+ findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
+
+ (xrulesMap,frulesEnv) = List.foldl' helper (Map.empty,emptyFRulesEnv) srules
+ where
+ helper (xrulesMap,frulesEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) =
+ let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap
+ frulesEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env)
+ frulesEnv
+ (mkSingletonSelectors cnc_defs cnc_res)
+ in xrulesMap' `seq` frulesEnv' `seq` (xrulesMap',frulesEnv')
+
+ loop frulesEnv =
+ let (todo, frulesEnv') = takeToDoRules xrulesMap frulesEnv
+ in case todo of
+ [] -> frulesEnv'
+ _ -> loop $! List.foldl' (\env (srules,selector) ->
+ List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) frulesEnv' todo
+
+convertRule :: TermMap -> TermSelector -> XRule -> FRulesEnv -> FRulesEnv
+convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) frulesEnv =
+ foldBM addRule
+ frulesEnv
+ (convertTerm cnc_defs selector term [([],[])])
+ (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes)
+ where
+ addRule linRec (newCat', newArgs', _, _) env0 =
+ let (env1, newCat) = genFCatHead env0 newCat'
+ (env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) ->
+ let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths]
+ (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs
+ in case xcat of
+ PFCat _ [] _ -> (env , args, all_args)
+ _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) (env1,[],[]) (zip3 newArgs' ctypes [0..])
+
+ newLinRec = listArray (0,length linRec-1) [translateLin idxArgs path linRec | path <- case newCat' of {PFCat _ rcs _ -> rcs}]
+
+ (_,newProfile) = List.mapAccumL accumProf 0 newArgs'
+ where
+ accumProf nr (PFCat _ [] _,_ ) = (nr, Unify [] )
+ accumProf nr (_ ,xpaths) = (nr+cnt+1, Unify [nr..nr+cnt])
+ where cnt = length xpaths
+
+ rule = FRule (Name fun newProfile) newArgs newCat newLinRec
+ in addFRule env2 rule
+
+translateLin idxArgs lbl' [] = array (0,-1) []
+translateLin idxArgs lbl' ((lbl,syms) : lins)
+ | lbl' == lbl = listArray (0,length syms-1) (map instSym syms)
+ | otherwise = translateLin idxArgs lbl' lins
+ where
+ instSym = symbol (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
+ instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
+ | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
+ in FSymCat fcat (index lbl rcs 0) (nr'+xnr)
+ | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs
+
+ index lbl' (lbl:lbls) idx
+ | lbl' == lbl = idx
+ | otherwise = index lbl' lbls $! (idx+1)
+
+
+----------------------------------------------------------------------
+-- term conversion
+
+type CnvMonad a = BacktrackM Env a
+
+type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term])
+type LinRec = [(FPath, [Symbol (FPath, FIndex, Int) FToken])]
+
+type TermMap = Map.Map CId Term
+
+convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec
+convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins
+convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins
+convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins
+
+convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel
+ convertTerm cnc_defs (TuplePrj nr selector) term lins
+convertTerm cnc_defs selector (FV vars) lins = do term <- member vars
+ convertTerm cnc_defs selector term lins
+convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path
+ foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
+convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
+ do projectHead lbl_path
+ return ((lbl_path,Tok str : lin) : lins)
+convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) =
+ do projectHead lbl_path
+ toks <- member (strs:[strs' | Var strs' _ <- vars])
+ return ((lbl_path, map Tok toks ++ lin) : lins)
+convertTerm cnc_defs selector (RP _ term) lins = convertTerm cnc_defs selector term lins
+convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
+ convertTerm cnc_defs selector term lins
+convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
+ ss <- case t of
+ R ss -> return ss
+ F f -> do
+ t <- Map.lookup f cnc_defs
+ case t of
+ R ss -> return ss
+ convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
+convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")
+
+
+convertArg (TupleSel record) nr path lbl_path lin lins =
+ foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record
+convertArg (TuplePrj lbl selector) nr path lbl_path lin lins =
+ convertArg selector nr (lbl:path) lbl_path lin lins
+convertArg (ConSel indices) nr path lbl_path lin lins = do
+ index <- member indices
+ restrictHead lbl_path index
+ restrictArg nr path index
+ return lins
+convertArg StrSel nr path lbl_path lin lins = do
+ projectHead lbl_path
+ xnr <- projectArg nr path
+ return ((lbl_path, GF.Formalism.Utilities.Cat (path, nr, xnr) : lin) : lins)
+
+convertCon (ConSel indices) index lbl_path lin lins = do
+ guard (index `elem` indices)
+ restrictHead lbl_path index
+ return lins
+convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x
+
+convertRec cnc_defs selector index [] lbl_path lin lins = return lins
+convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields
+ where
+ select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins
+ select ((index',sub_sel) : fields)
+ | index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins)
+ convertRec cnc_defs selector (index+1) record lbl_path lin lins
+ | otherwise = select fields
+convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do
+ convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins)
+
+
+------------------------------------------------------------
+-- eval a term to ground terms
+
+evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex
+evalTerm cnc_defs path (V nr) = do term <- readArgCType nr
+ unifyPType nr (reverse path) (selectTerm path term)
+evalTerm cnc_defs path (C nr) = return nr
+evalTerm cnc_defs path (R record) = case path of
+ (index:path) -> evalTerm cnc_defs path (record !! index)
+evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel
+ evalTerm cnc_defs (index:path) term
+evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path
+evalTerm cnc_defs path (RP alias _) = evalTerm cnc_defs path alias
+evalTerm cnc_defs path (F id) = do term <- Map.lookup id cnc_defs
+ evalTerm cnc_defs path term
+evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
+
+unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex
+unifyPType nr path (C max_index) =
+ do (_, args, _, _) <- readState
+ let (PFCat _ _ tcs,_) = args !! nr
+ case lookup path tcs of
+ Just index -> return index
+ Nothing -> do index <- member [0..max_index]
+ restrictArg nr path index
+ return index
+unifyPType nr path (RP alias _) = unifyPType nr path alias
+
+unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007
+
+selectTerm :: FPath -> Term -> Term
+selectTerm [] term = term
+selectTerm (index:path) (R record) = selectTerm path (record !! index)
+selectTerm path (RP _ term) = selectTerm path term
+
+
+----------------------------------------------------------------------
+-- FRulesEnv
+
+data FRulesEnv = FRulesEnv {-# UNPACK #-} !Int FCatSet [FRule]
+type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat)))
+
+data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)]
+
+protoFCat :: CId -> ProtoFCat
+protoFCat cat = PFCat cat [] []
+
+emptyFRulesEnv = FRulesEnv 0 (ins fcatString (CId "String") [[0]] [] $
+ ins fcatInt (CId "Int") [[0]] [] $
+ ins fcatFloat (CId "Float") [[0]] [] $
+ ins fcatVar (CId "_Var") [[0]] [] $
+ Map.empty) []
+ where
+ ins fcat cat rcs tcs fcatSet =
+ Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
+ where
+ right_fcat = Right fcat
+ tmap_s = Map.singleton tcs right_fcat
+ rmap_s = Map.singleton rcs tmap_s
+
+addFRule :: FRulesEnv -> FRule -> FRulesEnv
+addFRule (FRulesEnv last_id fcatSet rules) rule = FRulesEnv last_id fcatSet (rule:rules)
+
+getFGrammar :: FRulesEnv -> FGrammar
+getFGrammar (FRulesEnv last_id fcatSet rules) = (rules, Map.map getFCatList fcatSet)
+ where
+ getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs
+
+genFCatHead :: FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
+genFCatHead env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
+ case Map.lookup cat fcatSet >>= Map.lookup rcs >>= Map.lookup tcs of
+ Just (Left fcat) -> (FRulesEnv last_id (ins fcat) rules, fcat)
+ Just (Right fcat) -> (env, fcat)
+ Nothing -> let fcat = last_id+1
+ in (FRulesEnv fcat (ins fcat) rules, fcat)
+ where
+ ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s fcatSet
+ where
+ right_fcat = Right fcat
+ tmap_s = Map.singleton tcs right_fcat
+ rmap_s = Map.singleton rcs tmap_s
+
+genFCatArg :: TermMap -> Term -> FRulesEnv -> ProtoFCat -> (FRulesEnv, FCat)
+genFCatArg cnc_defs ctype env@(FRulesEnv last_id fcatSet rules) (PFCat cat rcs tcs) =
+ case Map.lookup cat fcatSet >>= Map.lookup rcs of
+ Just tmap -> case Map.lookup tcs tmap of
+ Just (Left fcat) -> (env, fcat)
+ Just (Right fcat) -> (env, fcat)
+ Nothing -> ins tmap
+ Nothing -> ins Map.empty
+ where
+ ins tmap =
+ let fcat = last_id+1
+ (either_fcat,last_id1,tmap1,rules1)
+ = foldBM (\tcs st (either_fcat,last_id,tmap,rules) ->
+ let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap
+ rule = FRule (Name (CId "_") [Unify [0]]) [fcat_arg] fcat
+ (listArray (0,length rcs-1) [listArray (0,0) [FSymCat fcat_arg lbl 0] | lbl <- [0..length rcs-1]])
+ in if st
+ then (Right fcat, last_id1,tmap1,rule:rules)
+ else (either_fcat,last_id, tmap, rules))
+ (Left fcat,fcat,Map.insert tcs either_fcat tmap,rules)
+ (gen_tcs ctype [] [])
+ False
+ rmap1 = Map.singleton rcs tmap1
+ in (FRulesEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 fcatSet) rules1, fcat)
+ where
+ addArg tcs last_id tmap =
+ case Map.lookup tcs tmap of
+ Just (Left fcat) -> (last_id, tmap, fcat)
+ Just (Right fcat) -> (last_id, tmap, fcat)
+ Nothing -> let fcat = last_id+1
+ in (fcat, Map.insert tcs (Left fcat) tmap, fcat)
+
+ gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)]
+ gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record)
+ gen_tcs (S _) path acc = return acc
+ gen_tcs (RP _ term) path acc = gen_tcs term path acc
+ gen_tcs (C max_index) path acc =
+ case List.lookup path tcs of
+ Just index -> return $! addConstraint path index acc
+ Nothing -> do writeState True
+ index <- member [0..max_index]
+ return $! addConstraint path index acc
+ where
+ addConstraint path0 index0 (c@(path,index) : cs)
+ | path0 > path = c:addConstraint path0 index0 cs
+ addConstraint path0 index0 cs = (path0,index0) : cs
+ gen_tcs (F id) path acc = case Map.lookup id cnc_defs of
+ Just term -> gen_tcs term path acc
+ Nothing -> error ("unknown identifier: "++prt id)
+
+
+
+------------------------------------------------------------
+-- TODO queue organization
+
+type XRulesMap = Map.Map CId [XRule]
+data XRule = XRule CId {- function -}
+ [CId] {- argument types -}
+ CId {- result type -}
+ [Term] {- argument lin-types representation -}
+ Term {- result lin-type representation -}
+ Term {- body -}
+
+takeToDoRules :: XRulesMap -> FRulesEnv -> ([([XRule], TermSelector)], FRulesEnv)
+takeToDoRules xrulesMap (FRulesEnv last_id fcatSet rules) = (todo,FRulesEnv last_id fcatSet' rules)
+ where
+ (todo,fcatSet') =
+ Map.mapAccumWithKey (\todo cat rmap ->
+ let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap ->
+ let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat ->
+ case either_xcat of
+ Left xcat -> (tcs:tcss,Right xcat)
+ Right xcat -> ( tcss,either_xcat)) [] tmap
+ in case tcss of
+ [] -> ( todo,tmap )
+ _ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap
+ mb_srules = Map.lookup cat xrulesMap
+ Just srules = mb_srules
+
+ in case mb_srules of
+ Just srules -> (todo1,rmap1)
+ Nothing -> (todo ,rmap1)) [] fcatSet
+
+
+------------------------------------------------------------
+-- The TermSelector
+
+data TermSelector
+ = TupleSel [(FIndex, TermSelector)]
+ | TuplePrj FIndex TermSelector
+ | ConSel [FIndex]
+ | StrSel
+ deriving Show
+
+mkSingletonSelectors :: TermMap
+ -> Term -- ^ Type representation term
+ -> [TermSelector] -- ^ list of selectors containing just one string field
+mkSingletonSelectors cnc_defs term = sels0
+ where
+ (sels0,tcss0) = loop [] ([],[]) term
+
+ loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record)
+ loop path st (RP _ t) = loop path st t
+ loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss)
+ loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss)
+ loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of
+ Just term -> loop path (sels,tcss) term
+ Nothing -> error ("unknown identifier: "++prt id)
+
+mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector
+mkSelector rcs tcss =
+ List.foldl' addRestriction (case xs of
+ (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys
+ where
+ xs = [ reverse path | path <- rcs]
+ ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs]
+
+ addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector
+ addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices)
+ where
+ add [] = [n_index]
+ add (index':indices)
+ | n_index == index' = index': indices
+ | otherwise = index':add indices
+ addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields)
+ where
+ add [] = [(index,path2selector (ConSel [n_index]) path)]
+ add (field@(index',sub_sel):fields)
+ | index == index' = (index',addRestriction sub_sel (path,n_index)):fields
+ | otherwise = field : add fields
+
+ addProjection :: TermSelector -> FPath -> TermSelector
+ addProjection StrSel [] = StrSel
+ addProjection (TupleSel fields) (index : path) = TupleSel (add fields)
+ where
+ add [] = [(index,path2selector StrSel path)]
+ add (field@(index',sub_sel):fields)
+ | index == index' = (index',addProjection sub_sel path):fields
+ | otherwise = field : add fields
+
+ path2selector base [] = base
+ path2selector base (index : path) = TupleSel [(index,path2selector base path)]
+
+------------------------------------------------------------
+-- updating the MCF rule
+
+readArgCType :: FIndex -> CnvMonad Term
+readArgCType nr = do (_, _, _, ctypes) <- readState
+ return (ctypes !! nr)
+
+restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad ()
+restrictArg nr path index = do
+ (head, args, ctype, ctypes) <- readState
+ args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat
+ return (xcat,xs) ) nr args
+ writeState (head, args', ctype, ctypes)
+
+projectArg :: FIndex -> FPath -> CnvMonad Int
+projectArg nr path = do
+ (head, args, ctype, ctypes) <- readState
+ (xnr,args') <- updateArgs nr args
+ writeState (head, args', ctype, ctypes)
+ return xnr
+ where
+ updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])])
+ updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as)
+ | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as)
+ | otherwise = do a <- projectProtoFCat path a
+ return (0,(a,xpaths):as)
+ updateArgs n (a : as) = do
+ (xnr,as) <- updateArgs (n-1) as
+ return (xnr,a:as)
+
+readHeadCType :: CnvMonad Term
+readHeadCType = do (_, _, ctype, _) <- readState
+ return ctype
+
+restrictHead :: FPath -> FIndex -> CnvMonad ()
+restrictHead path term
+ = do (head, args, ctype, ctypes) <- readState
+ head' <- restrictProtoFCat path term head
+ writeState (head', args, ctype, ctypes)
+
+projectHead :: FPath -> CnvMonad ()
+projectHead path
+ = do (head, args, ctype, ctypes) <- readState
+ head' <- projectProtoFCat path head
+ writeState (head', args, ctype, ctypes)
+
+restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat
+restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do
+ tcs <- addConstraint tcs
+ return (PFCat cat rcs tcs)
+ where
+ addConstraint (c@(path,index) : cs)
+ | path0 > path = liftM (c:) (addConstraint cs)
+ | path0 == path = guard (index0 == index) >>
+ return (c : cs)
+ addConstraint cs = return ((path0,index0) : cs)
+
+projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat
+projectProtoFCat path0 (PFCat cat rcs tcs) = do
+ return (PFCat cat (addConstraint rcs) tcs)
+ where
+ addConstraint (path : rcs)
+ | path0 > path = path : addConstraint rcs
+ | path0 == path = path : rcs
+ addConstraint rcs = path0 : rcs
diff --git a/src-3.0/GF/Conversion/SimpleToFinite.hs b/src-3.0/GF/Conversion/SimpleToFinite.hs
new file mode 100644
index 000000000..bbd3ae355
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToFinite.hs
@@ -0,0 +1,178 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/01 09:53:19 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.7 $
+--
+-- Calculating the finiteness of each type in a grammar
+-----------------------------------------------------------------------------
+
+module GF.Conversion.SimpleToFinite
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Data.BacktrackM
+import GF.Data.Utilities (lookupList)
+
+import GF.Infra.Ident (Ident(..))
+
+type CnvMonad a = BacktrackM () a
+
+convertGrammar :: SGrammar -> SGrammar
+convertGrammar rules = tracePrt "SimpleToFinie - nr. 'finite' rules" (prt . length) $
+ solutions cnvMonad ()
+ where split = calcSplitable rules
+ cnvMonad = member rules >>= convertRule split
+
+convertRule :: Splitable -> SRule -> CnvMonad SRule
+convertRule split (Rule abs cnc)
+ = do newAbs <- convertAbstract split abs
+ return $ Rule newAbs cnc
+
+{-
+-- old code
+convertAbstract :: Splitable -> Abstract SDecl Name
+ -> CnvMonad (Abstract SDecl Name)
+convertAbstract split (Abs decl decls name)
+ = case splitableFun split (name2fun name) of
+ Just cat' -> return $ Abs (Decl anyVar (mergeFun (name2fun name) cat') []) decls name
+ Nothing -> expandTyping split name [] decl decls []
+
+
+expandTyping :: Splitable -> Name -> [(Var, SCat)] -> SDecl -> [SDecl] -> [SDecl]
+ -> CnvMonad (Abstract SDecl Name)
+expandTyping split name env (Decl x cat args) [] decls
+ = return $ Abs decl (reverse decls) name
+ where decl = substArgs split x env cat args []
+expandTyping split name env typ (Decl x xcat xargs : declsToDo) declsDone
+ = do (x', xcat', env') <- calcNewEnv
+ let decl = substArgs split x' env xcat' xargs []
+ expandTyping split name env' typ declsToDo (decl : declsDone)
+ where calcNewEnv = case splitableCat split xcat of
+ Just newFuns -> do newFun <- member newFuns
+ let newCat = mergeFun newFun xcat
+ -- Just newCats -> do newCat <- member newCats
+ return (anyVar, newCat, (x,newCat) : env)
+ Nothing -> return (x, xcat, env)
+-}
+
+-- new code
+convertAbstract :: Splitable -> Abstract SDecl Name
+ -> CnvMonad (Abstract SDecl Name)
+convertAbstract split (Abs decl decls name)
+ = case splitableFun split fun of
+ Just cat' -> return $ Abs (Decl anyVar ([] ::--> (mergeFun fun cat' ::@ []))) decls name
+ Nothing -> expandTyping split [] fun profiles [] decl decls []
+ where Name fun profiles = name
+
+expandTyping :: Splitable -> [(Var, SCat)]
+ -> Fun -> [Profile (SyntaxForest Fun)] -> [Profile (SyntaxForest Fun)]
+ -> SDecl -> [SDecl] -> [SDecl]
+ -> CnvMonad (Abstract SDecl Name)
+expandTyping split env fun [] profiles (Decl x (typargs ::--> (cat ::@ args))) [] decls
+ = return $ Abs decl (reverse decls) (Name fun (reverse profiles))
+ where decl = substArgs split x env typargs cat args []
+expandTyping split env fun (prof:profiles) profsDone typ
+ (Decl x (xtypargs ::--> (xcat ::@ xargs)) : declsToDo) declsDone
+ = do (x', xcat', env', prof') <- calcNewEnv
+ let decl = substArgs split x' env xtypargs xcat' xargs []
+ expandTyping split env' fun profiles (prof' : profsDone) typ declsToDo (decl : declsDone)
+ where calcNewEnv = case splitableCat split xcat of
+ Nothing -> return (x, xcat, env, prof)
+ Just newFuns -> do newFun <- member newFuns
+ let newCat = mergeFun newFun xcat
+ newProf = Constant (FNode newFun [[]])
+ -- should really be using some kind of
+ -- "profile unification"
+ return (anyVar, newCat, (x,newCat) : env, newProf)
+
+substArgs :: Splitable -> Var -> [(Var, SCat)] -> [FOType SCat]
+ -> SCat -> [TTerm] -> [TTerm] -> SDecl
+substArgs split x env typargs cat [] args = Decl x (typargs ::--> (cat ::@ reverse args))
+substArgs split x env typargs cat (arg:argsToDo) argsDone
+ = case argLookup split env arg of
+ Just newCat -> substArgs split x env typargs (mergeArg cat newCat) argsToDo argsDone
+ Nothing -> substArgs split x env typargs cat argsToDo (arg : argsDone)
+
+argLookup split env (TVar x) = lookup x env
+argLookup split env (con :@ _) = fmap (mergeFun fun) (splitableFun split fun)
+ where fun = constr2fun con
+
+
+----------------------------------------------------------------------
+-- splitable categories (finite, no dependencies)
+-- they should also be used as some dependency
+
+type Splitable = (Assoc SCat [Fun], Assoc Fun SCat)
+
+splitableCat :: Splitable -> SCat -> Maybe [Fun]
+splitableCat = lookupAssoc . fst
+
+splitableFun :: Splitable -> Fun -> Maybe SCat
+splitableFun = lookupAssoc . snd
+
+calcSplitable :: [SRule] -> Splitable
+calcSplitable rules = (listAssoc splitableCat2Funs, listAssoc splitableFun2Cat)
+ where splitableCat2Funs = groupPairs $ nubsort splitableCatFuns
+
+ splitableFun2Cat = nubsort
+ [ (fun, cat) | (cat, fun) <- splitableCatFuns ]
+
+ -- cat-fun pairs that are splitable
+ splitableCatFuns = tracePrt "SimpleToFinite - splitable functions" prt $
+ [ (cat, name2fun name) |
+ Rule (Abs (Decl _ ([] ::--> (cat ::@ []))) [] name) _ <- rules,
+ splitableCats ?= cat ]
+
+ -- all cats that are splitable
+ splitableCats = listSet $
+ tracePrt "SimpleToFinite - finite categories to split" prt $
+ (nondepCats <**> depCats) <\\> resultCats
+
+ -- all result cats for some pure function
+ resultCats = tracePrt "SimpleToFinite - result cats" prt $
+ nubsort [ cat | Rule (Abs (Decl _ (_ ::--> (cat ::@ _))) decls _) _ <- rules,
+ not (null decls) ]
+
+ -- all cats in constants without dependencies
+ nondepCats = tracePrt "SimpleToFinite - nondep cats" prt $
+ nubsort [ cat | Rule (Abs (Decl _ ([] ::--> (cat ::@ []))) [] _) _ <- rules ]
+
+ -- all cats occurring as some dependency of another cat
+ depCats = tracePrt "SimpleToFinite - dep cats" prt $
+ nubsort [ cat | Rule (Abs decl decls _) _ <- rules,
+ cat <- varCats [] (decls ++ [decl]) ]
+
+ varCats _ [] = []
+ varCats env (Decl x (xargs ::--> xtyp@(xcat ::@ _)) : decls)
+ = varCats ((x,xcat) : env) decls ++
+ [ cat | (_::@args) <- (xtyp:xargs), arg <- args,
+ y <- varsInTTerm arg, cat <- lookupList y env ]
+
+
+----------------------------------------------------------------------
+-- utilities
+-- mergeing categories
+
+mergeCats :: String -> String -> String -> SCat -> SCat -> SCat
+mergeCats before middle after (IC cat) (IC arg)
+ = IC (before ++ cat ++ middle ++ arg ++ after)
+
+mergeFun, mergeArg :: SCat -> SCat -> SCat
+mergeFun = mergeCats "{" ":" "}"
+mergeArg = mergeCats "" "" ""
+
+
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG.hs b/src-3.0/GF/Conversion/SimpleToMCFG.hs
new file mode 100644
index 000000000..8f23c905d
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG.hs
@@ -0,0 +1,26 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/18 14:55:32 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.3 $
+--
+-- All different conversions from SimpleGFC to MCFG
+-----------------------------------------------------------------------------
+
+module GF.Conversion.SimpleToMCFG where
+
+import GF.Formalism.SimpleGFC
+import GF.Conversion.Types
+
+import qualified GF.Conversion.SimpleToMCFG.Strict as Strict
+import qualified GF.Conversion.SimpleToMCFG.Nondet as Nondet
+import qualified GF.Conversion.SimpleToMCFG.Coercions as Coerce
+
+convertGrammarNondet, convertGrammarStrict :: SGrammar -> EGrammar
+convertGrammarNondet = Coerce.addCoercions . Nondet.convertGrammar
+convertGrammarStrict = Strict.convertGrammar
+
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs
new file mode 100644
index 000000000..319b99dcb
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG/Coercions.hs
@@ -0,0 +1,63 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:44 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Adding coercion functions to a MCFG if necessary.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToMCFG.Coercions
+ (addCoercions) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Conversion.Types
+import GF.Data.SortedList
+import Data.List (groupBy)
+
+----------------------------------------------------------------------
+
+addCoercions :: EGrammar -> EGrammar
+addCoercions rules = coercions ++ rules
+ where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) |
+ Rule (Abs head args _) (Cnc lbls _ _) <- rules ]
+ allHeadSet = nubsort allHeads
+ allArgSet = union allArgs <\\> map fst allHeadSet
+ coercions = tracePrt "SimpleToMCFG.Coercions - MCFG coercions" (prt . length) $
+ concat $
+ tracePrt "SimpleToMCFG.Coercions - MCFG coercions per category"
+ (prtList . map length) $
+ combineCoercions
+ (groupBy sameECatFst allHeadSet)
+ (groupBy sameECat allArgSet)
+ sameECatFst a b = sameECat (fst a) (fst b)
+
+
+combineCoercions [] _ = []
+combineCoercions _ [] = []
+combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs)
+ = case compare (ecat2scat $ fst $ head heads) (ecat2scat $ head args) of
+ LT -> combineCoercions allHeads allArgs'
+ GT -> combineCoercions allHeads' allArgs
+ EQ -> makeCoercion heads args : combineCoercions allHeads allArgs
+
+
+makeCoercion heads args
+ = [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) |
+ (head@(ECat _ headCns), lbls) <- heads,
+ let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ],
+ arg@(ECat _ argCns) <- args,
+ argCns `subset` headCns ]
+
+
+
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs
new file mode 100644
index 000000000..d6ff052f5
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG/Nondet.hs
@@ -0,0 +1,256 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/17 08:27:29 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.7 $
+--
+-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically.
+-- Afterwards, the grammar has to be extended with coercion functions,
+-- from the module 'GF.Conversion.SimpleToMCFG.Coercions'
+--
+-- the resulting grammars might be /very large/
+--
+-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToMCFG.Nondet
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.SimpleGFC
+import GF.Conversion.Types
+
+import GF.Data.BacktrackM
+import GF.Data.Utilities (notLongerThan, updateNthM)
+
+------------------------------------------------------------
+-- type declarations
+
+type CnvMonad a = BacktrackM Env a
+
+type Env = (ECat, [ECat], LinRec, [SLinType]) -- variable bindings: [(Var, STerm)]
+type LinRec = [Lin SCat MLabel Token]
+
+
+----------------------------------------------------------------------
+-- main conversion function
+
+maxNrRules :: Int
+maxNrRules = 5000
+
+convertGrammar :: SGrammar -> EGrammar
+convertGrammar rules = traceCalcFirst rules' $
+ tracePrt "SimpleToMCFG.Nondet - MCFG rules" (prt . length) $
+ rules'
+ where rules' = rules >>= convertRule
+-- solutions conversion undefined
+-- where conversion = member rules >>= convertRule
+
+convertRule :: SRule -> [ERule] -- CnvMonad ERule
+convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) =
+-- | prt(name2fun fun) `elem`
+-- words "UseCl PosTP TPast ASimul SPredV IndefOneNP DefOneNP UseN2 mother_N2 jump_V" =
+ if notLongerThan maxNrRules rules
+ then tracePrt ("SimpeToMCFG.Nondet - MCFG rules for " ++ prt fun) (prt . length) $
+ rules
+ else trace2 "SimpeToMCFG.Nondet - TOO MANY RULES, function not converted"
+ ("More than " ++ show maxNrRules ++ " MCFG rules for " ++ prt fun) $
+ []
+ where rules = flip solutions undefined $
+ do let cat : args = map decl2cat (decl : decls)
+ writeState (initialECat cat, map initialECat args, [], ctypes)
+ rterm <- simplifyTerm term
+ reduceTerm ctype emptyPath rterm
+ (newCat, newArgs, linRec, _) <- readState
+ let newLinRec = map (instantiateArgs newArgs) linRec
+ catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes)
+ -- checkLinRec argsPaths catPaths newLinRec
+ return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec)
+convertRule _ = [] -- failure
+
+
+----------------------------------------------------------------------
+-- "type-checking" the resulting linearization
+-- should not be necessary, if the algorithms (type-checking and conversion) are correct
+
+checkLinRec args lbls = mapM (checkLin args lbls)
+
+checkLin args lbls (Lin lbl lin)
+ | lbl `elem` lbls = mapM (symbol (checkArg args) (const (return ()))) lin
+ | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" "Label mismatch" $
+ failure
+
+checkArg args (_cat, lbl, nr)
+ | lbl `elem` (args !! nr) = return ()
+-- | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" ("Label mismatch in arg " ++ prt nr) $
+-- failure
+ | otherwise = trace2 ("SimpleToMCFG.Nondet - ERROR: Label mismatch in arg " ++ prt nr)
+ (prt lbl ++ " `notElem` " ++ prt (args!!nr)) $
+ failure
+
+
+----------------------------------------------------------------------
+-- term simplification
+
+simplifyTerm :: STerm -> CnvMonad STerm
+simplifyTerm (term :! sel)
+ = do sterm <- simplifyTerm term
+ ssel <- simplifyTerm sel
+ case sterm of
+ Tbl table -> do (pat, val) <- member table
+ pat =?= ssel
+ return val
+ _ -> do sel' <- expandTerm ssel
+ return (sterm +! sel')
+-- simplifyTerm (Var x) = readBinding x
+simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms
+simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record
+simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term
+simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table
+simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms
+simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2)
+simplifyTerm term = return term
+
+simplifyAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
+simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term
+
+simplifyCase :: (STerm, STerm) -> CnvMonad (STerm, STerm)
+simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term)
+
+
+------------------------------------------------------------
+-- reducing simplified terms, collecting MCF rules
+
+reduceTerm :: SLinType -> SPath -> STerm -> CnvMonad ()
+--reduceTerm ctype path (Variants terms)
+-- = member terms >>= reduceTerm ctype path
+reduceTerm (StrT) path term = updateLin (path, term)
+reduceTerm (ConT _) path term = do pat <- expandTerm term
+ updateHead (path, pat)
+reduceTerm (RecT rtype) path term
+ = sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | (lbl, ctype) <- rtype ]
+reduceTerm (TblT pats vtype) path table
+ = sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | pat <- pats ]
+
+
+------------------------------------------------------------
+-- expanding a term to ground terms
+
+expandTerm :: STerm -> CnvMonad STerm
+expandTerm arg@(Arg nr _ path)
+ = do ctypes <- readArgCTypes
+ unifyPType arg $ lintypeFollowPath path $ ctypes !! nr
+-- expandTerm arg@(Arg nr _ path)
+-- = do ctypes <- readArgCTypes
+-- pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr
+-- pat =?= arg
+-- return pat
+expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms
+expandTerm (Rec record) = liftM Rec $ mapM expandAssign record
+--expandTerm (Variants terms) = liftM Variants $ mapM expandTerm terms
+expandTerm (Variants terms) = member terms >>= expandTerm
+expandTerm term = error $ "expandTerm: " ++ prt term
+
+expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
+expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term
+
+unifyPType :: STerm -> SLinType -> CnvMonad STerm
+unifyPType arg (RecT prec) =
+ liftM Rec $
+ sequence [ liftM ((,) lbl) $
+ unifyPType (arg +. lbl) ptype |
+ (lbl, ptype) <- prec ]
+unifyPType (Arg nr _ path) (ConT terms) =
+ do (_, args, _, _) <- readState
+ case lookup path (ecatConstraints (args !! nr)) of
+ Just term -> return term
+ Nothing -> do term <- member terms
+ updateArg nr (path, term)
+ return term
+
+------------------------------------------------------------
+-- unification of patterns and selection terms
+
+(=?=) :: STerm -> STerm -> CnvMonad ()
+-- Wildcard =?= _ = return ()
+-- Var x =?= term = addBinding x term
+Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) |
+ (lbl, pat) <- precord ]
+pat =?= Arg nr _ path = updateArg nr (path, pat)
+(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms)
+ sequence_ $ zipWith (=?=) pats terms
+Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm |
+ (lbl, pat) <- precord,
+ let mterm = lookup lbl record ]
+-- variants are not allowed in patterns, but in selection terms:
+term =?= Variants terms = member terms >>= (term =?=)
+pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term
+
+----------------------------------------------------------------------
+-- variable bindings (does not work correctly)
+{-
+addBinding x term = do (a, b, c, d, bindings) <- readState
+ writeState (a, b, c, d, (x,term):bindings)
+
+readBinding x = do (_, _, _, _, bindings) <- readState
+ return $ maybe (Var x) id $ lookup x bindings
+-}
+
+------------------------------------------------------------
+-- updating the MCF rule
+
+readArgCTypes :: CnvMonad [SLinType]
+readArgCTypes = do (_, _, _, env) <- readState
+ return env
+
+updateArg :: Int -> Constraint -> CnvMonad ()
+updateArg arg cn
+ = do (head, args, lins, env) <- readState
+ args' <- updateNthM (addToECat cn) arg args
+ writeState (head, args', lins, env)
+
+updateHead :: Constraint -> CnvMonad ()
+updateHead cn
+ = do (head, args, lins, env) <- readState
+ head' <- addToECat cn head
+ writeState (head', args, lins, env)
+
+updateLin :: Constraint -> CnvMonad ()
+updateLin (path, term)
+ = do let newLins = term2lins term
+ (head, args, lins, env) <- readState
+ let lins' = lins ++ map (Lin path) newLins
+ writeState (head, args, lins', env)
+
+term2lins :: STerm -> [[Symbol (SCat, SPath, Int) Token]]
+term2lins (Arg nr cat path) = return [Cat (cat, path, nr)]
+term2lins (Token str) = return [Tok str]
+term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2)
+term2lins (Empty) = return []
+term2lins (Variants terms) = terms >>= term2lins
+term2lins term = error $ "term2lins: " ++ show term
+
+addToECat :: Constraint -> ECat -> CnvMonad ECat
+addToECat cn (ECat cat cns) = liftM (ECat cat) $ addConstraint cn cns
+
+addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint]
+addConstraint cn0 (cn : cns)
+ | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns)
+ | fst cn0 == fst cn = guard (snd cn0 == snd cn) >>
+ return (cn : cns)
+addConstraint cn0 cns = return (cn0 : cns)
+
+
+
diff --git a/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs b/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs
new file mode 100644
index 000000000..a5519fcd8
--- /dev/null
+++ b/src-3.0/GF/Conversion/SimpleToMCFG/Strict.hs
@@ -0,0 +1,129 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:44 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Converting SimpleGFC grammars to MCFG grammars, deterministic.
+--
+-- the resulting grammars might be /very large/
+--
+-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.SimpleToMCFG.Strict
+ (convertGrammar) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Control.Monad
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.SimpleGFC
+import GF.Conversion.Types
+
+import GF.Data.BacktrackM
+import GF.Data.SortedList
+
+----------------------------------------------------------------------
+-- main conversion function
+
+type CnvMonad a = BacktrackM () a
+
+convertGrammar :: SGrammar -> EGrammar
+convertGrammar rules = tracePrt "SimpleToMCFG.Strict - MCFG rules" (prt . length) $
+ solutions conversion undefined
+ where conversion = member rules >>= convertRule
+
+convertRule :: SRule -> CnvMonad ERule
+convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term)))
+ = do let cat : args = map decl2cat (decl : decls)
+ args_ctypes = zip3 [0..] args ctypes
+ instArgs <- mapM enumerateArg args_ctypes
+ let instTerm = substitutePaths instArgs term
+ newCat <- extractECat cat ctype instTerm
+ newArgs <- mapM (extractArg instArgs) args_ctypes
+ let linRec = strPaths ctype instTerm >>= extractLin newArgs
+ let newLinRec = map (instantiateArgs newArgs) linRec
+ catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes)
+ return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec)
+convertRule _ = failure
+
+----------------------------------------------------------------------
+-- category extraction
+
+extractArg :: [STerm] -> (Int, SCat, SLinType) -> CnvMonad ECat
+extractArg args (nr, cat, ctype) = extractECat cat ctype (args !! nr)
+
+extractECat :: SCat -> SLinType -> STerm -> CnvMonad ECat
+extractECat cat ctype term = member $ map (ECat cat) $ parPaths ctype term
+
+enumerateArg :: (Int, SCat, SLinType) -> CnvMonad STerm
+enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype
+
+----------------------------------------------------------------------
+-- Substitute each instantiated parameter path for its instantiation
+
+substitutePaths :: [STerm] -> STerm -> STerm
+substitutePaths arguments = subst
+ where subst (Arg nr _ path) = termFollowPath path (arguments !! nr)
+ subst (con :^ terms) = con :^ map subst terms
+ subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ]
+ subst (term :. lbl) = subst term +. lbl
+ subst (Tbl table) = Tbl [ (pat, subst term) |
+ (pat, term) <- table ]
+ subst (term :! select) = subst term +! subst select
+ subst (term :++ term') = subst term ?++ subst term'
+ subst (Variants terms) = Variants $ map subst terms
+ subst term = term
+
+----------------------------------------------------------------------
+-- term paths extaction
+
+termPaths :: SLinType -> STerm -> [(SPath, (SLinType, STerm))]
+termPaths ctype (Variants terms) = terms >>= termPaths ctype
+termPaths (RecT rtype) (Rec record)
+ = [ (path ++. lbl, value) |
+ (lbl, term) <- record,
+ let Just ctype = lookup lbl rtype,
+ (path, value) <- termPaths ctype term ]
+termPaths (TblT _ ctype) (Tbl table)
+ = [ (path ++! pat, value) |
+ (pat, term) <- table,
+ (path, value) <- termPaths ctype term ]
+termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ]
+
+{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt):
+{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2}
+[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2]
+-}
+
+parPaths :: SLinType -> STerm -> [[(SPath, STerm)]]
+parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $
+ nubsort [ (path, value) |
+ (path, (ConT _, value)) <- termPaths ctype term ]
+
+strPaths :: SLinType -> STerm -> [(SPath, STerm)]
+strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ]
+ where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ]
+
+----------------------------------------------------------------------
+-- linearization extraction
+
+extractLin :: [ECat] -> (SPath, STerm) -> [Lin ECat MLabel Token]
+extractLin args (path, term) = map (Lin path) (convertLin term)
+ where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2)
+ convertLin (Empty) = [[]]
+ convertLin (Token tok) = [[Tok tok]]
+ convertLin (Variants terms) = concatMap convertLin terms
+ convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]]
+ convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path)
+
diff --git a/src-3.0/GF/Conversion/TypeGraph.hs b/src-3.0/GF/Conversion/TypeGraph.hs
new file mode 100644
index 000000000..62ee9726e
--- /dev/null
+++ b/src-3.0/GF/Conversion/TypeGraph.hs
@@ -0,0 +1,58 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/16 10:21:21 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.2 $
+--
+-- Printing the type hierarchy of an abstract module in GraphViz format
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.TypeGraph (prtTypeGraph, prtFunctionGraph) where
+
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+
+import GF.Data.Operations ((++++), (+++++))
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- | SimpleGFC to TypeGraph
+--
+-- assumes that the profiles in the Simple GFC names are trivial
+
+prtTypeGraph :: SGrammar -> String
+prtTypeGraph rules = "digraph TypeGraph {" ++++
+ "concentrate=true;" ++++
+ "node [shape=ellipse];" +++++
+ unlines (map prtTypeGraphRule rules) +++++
+ "}"
+
+prtTypeGraphRule :: SRule -> String
+prtTypeGraphRule (Rule abs@(Abs cat cats (Name fun _prof)) _)
+ = "// " ++ prt abs ++++
+ unlines [ prtSCat c ++ " -> " ++ prtSCat cat ++ ";" | c <- cats ]
+
+prtFunctionGraph :: SGrammar -> String
+prtFunctionGraph rules = "digraph FunctionGraph {" ++++
+ "node [shape=ellipse];" +++++
+ unlines (map prtFunctionGraphRule rules) +++++
+ "}"
+
+prtFunctionGraphRule :: SRule -> String
+prtFunctionGraphRule (Rule abs@(Abs cat cats (Name fun _prof)) _)
+ = "// " ++ prt abs ++++
+ pfun ++ " [label=\"" ++ prt fun ++ "\", shape=box, style=dashed];" ++++
+ pfun ++ " -> " ++ prtSCat cat ++ ";" ++++
+ unlines [ prtSCat c ++ " -> " ++ pfun ++ ";" | c <- cats ]
+ where pfun = "GF_FUNCTION_" ++ prt fun
+
+prtSCat decl = prt (decl2cat decl)
+
+
diff --git a/src-3.0/GF/Conversion/Types.hs b/src-3.0/GF/Conversion/Types.hs
new file mode 100644
index 000000000..97c2ace05
--- /dev/null
+++ b/src-3.0/GF/Conversion/Types.hs
@@ -0,0 +1,146 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/11 14:11:46 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.10 $
+--
+-- All possible instantiations of different grammar formats used in conversion from GFC
+-----------------------------------------------------------------------------
+
+
+module GF.Conversion.Types where
+
+---import GF.Conversion.FTypes
+
+import qualified GF.Infra.Ident as Ident (Ident(..), wildIdent, isWildIdent)
+import qualified GF.Canon.AbsGFC as AbsGFC (CIdent(..), Label(..))
+import qualified GF.GFCC.CId
+import qualified GF.Grammar.Grammar as Grammar (Term)
+
+import GF.Formalism.GCFG
+import GF.Formalism.SimpleGFC
+import GF.Formalism.MCFG
+import GF.Formalism.FCFG
+import GF.Formalism.CFG
+import GF.Formalism.Utilities
+import GF.Infra.Print
+import GF.Data.Assoc
+
+import Control.Monad (foldM)
+import Data.Array
+
+----------------------------------------------------------------------
+-- * basic (leaf) types
+
+-- ** input tokens
+
+type Token = String
+
+-- ** function names
+
+type Fun = Ident.Ident
+type Name = NameProfile Fun
+
+
+----------------------------------------------------------------------
+-- * Simple GFC
+
+type SCat = Ident.Ident
+
+constr2fun :: Constr -> Fun
+constr2fun (AbsGFC.CIQ _ fun) = fun
+
+-- ** grammar types
+
+type SGrammar = SimpleGrammar SCat Name Token
+type SRule = SimpleRule SCat Name Token
+
+type SPath = Path SCat Token
+type STerm = Term SCat Token
+type SLinType = LinType SCat Token
+type SDecl = Decl SCat
+
+----------------------------------------------------------------------
+-- * erasing MCFG
+
+type EGrammar = MCFGrammar ECat Name ELabel Token
+type ERule = MCFRule ECat Name ELabel Token
+data ECat = ECat SCat [Constraint] deriving (Eq, Ord, Show)
+type ELabel = SPath
+
+type Constraint = (SPath, STerm)
+
+-- ** type coercions etc
+
+initialECat :: SCat -> ECat
+initialECat cat = ECat cat []
+
+ecat2scat :: ECat -> SCat
+ecat2scat (ECat cat _) = cat
+
+ecatConstraints :: ECat -> [Constraint]
+ecatConstraints (ECat _ cns) = cns
+
+sameECat :: ECat -> ECat -> Bool
+sameECat ec1 ec2 = ecat2scat ec1 == ecat2scat ec2
+
+coercionName :: Name
+coercionName = Name Ident.wildIdent [Unify [0]]
+
+isCoercion :: Name -> Bool
+isCoercion (Name fun [Unify [0]]) = Ident.isWildIdent fun
+isCoercion _ = False
+
+----------------------------------------------------------------------
+-- * nonerasing MCFG
+
+type MGrammar = MCFGrammar MCat Name MLabel Token
+type MRule = MCFRule MCat Name MLabel Token
+data MCat = MCat ECat [ELabel] deriving (Eq, Ord, Show)
+type MLabel = ELabel
+
+mcat2ecat :: MCat -> ECat
+mcat2ecat (MCat cat _) = cat
+
+mcat2scat :: MCat -> SCat
+mcat2scat = ecat2scat . mcat2ecat
+
+----------------------------------------------------------------------
+-- * fast nonerasing MCFG
+
+---- moved to FTypes by AR 20/9/2007
+
+
+----------------------------------------------------------------------
+-- * CFG
+
+type CGrammar = CFGrammar CCat Name Token
+type CRule = CFRule CCat Name Token
+data CCat = CCat ECat ELabel deriving (Eq, Ord, Show)
+
+ccat2ecat :: CCat -> ECat
+ccat2ecat (CCat cat _) = cat
+
+ccat2scat :: CCat -> SCat
+ccat2scat = ecat2scat . ccat2ecat
+
+----------------------------------------------------------------------
+-- * pretty-printing
+
+instance Print ECat where
+ prt (ECat cat constrs) = prt cat ++ "{" ++
+ concat [ prt path ++ "=" ++ prt term ++ ";" |
+ (path, term) <- constrs ] ++ "}"
+
+instance Print MCat where
+ prt (MCat cat labels) = prt cat ++ prt labels
+
+instance Print CCat where
+ prt (CCat cat label) = prt cat ++ prt label
+
+---- instance Print FCat where ---- FCat
+