diff options
| author | peb <unknown> | 2005-04-11 12:57:45 +0000 |
|---|---|---|
| committer | peb <unknown> | 2005-04-11 12:57:45 +0000 |
| commit | ac00f77dadd4d447803dd7cab5a36f47365325d0 (patch) | |
| tree | 2fd02b19234f8d1fcc20ee67a2367d4d4eebfcd8 /src/GF/OldParsing | |
| parent | f6273f7033b85eea9a8d0cc7d31e9697ba95d5b7 (diff) | |
"Committed_by_peb"
Diffstat (limited to 'src/GF/OldParsing')
30 files changed, 3990 insertions, 0 deletions
diff --git a/src/GF/OldParsing/CFGrammar.hs b/src/GF/OldParsing/CFGrammar.hs new file mode 100644 index 000000000..6c6269626 --- /dev/null +++ b/src/GF/OldParsing/CFGrammar.hs @@ -0,0 +1,153 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFGrammar +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of context-free grammars, +-- parser information and chart conversion +---------------------------------------------------------------------- + +module GF.OldParsing.CFGrammar + (-- * Type definitions + Grammar, + Rule(..), + CFParser, + -- * Parser information + pInfo, + PInfo(..), + -- * Building parse charts + edges2chart, + -- * Grammar checking + checkGrammar + ) where + +import GF.System.Tracing + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import qualified CF +-- parser modules: +import GF.OldParsing.Utilities +import GF.Printing.PrintParser + + +------------------------------------------------------------ +-- type definitions + +type Grammar n c t = [Rule n c t] +data Rule n c t = Rule c [Symbol c t] n + deriving (Eq, Ord, Show) + + +type CFParser n c t = PInfo n c t -> [c] -> Input t -> [Edge (Rule n c t)] +-- - - - - - - - - - - - - - - - - - ^^^ possible starting categories + + +------------------------------------------------------------ +-- parser information + +pInfo :: (Ord n, Ord c, Ord t) => Grammar n c t -> PInfo n c t + +data PInfo n c t + = PInfo { grammarTokens :: SList t, + nameRules :: Assoc n (SList (Rule n c t)), + topdownRules :: Assoc c (SList (Rule n c t)), + bottomupRules :: Assoc (Symbol c t) (SList (Rule n c t)), + emptyLeftcornerRules :: Assoc c (SList (Rule n c t)), + emptyCategories :: Set c, + cyclicCategories :: SList c, + -- ^^ONLY FOR DIRECT CYCLIC RULES!!! + leftcornerTokens :: Assoc c (SList t) + -- ^^DOES NOT WORK WITH EMPTY RULES!!! + } + +-- this is not permanent... +pInfo grammar = pInfo' (filter (not.isCyclic) grammar) + +pInfo' grammar = tracePrt "#parserInfo" prt $ + PInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks + where grToks = union [ nubsort [ tok | Tok tok <- rhs ] | Rule _ rhs _ <- grammar ] + nmRules = accumAssoc id [ (name, rule) | rule@(Rule _ _ name) <- grammar ] + tdRules = accumAssoc id [ (cat, rule) | rule@(Rule cat _ _) <- grammar ] + buRules = accumAssoc id [ (next, rule) | rule@(Rule _ (next:_) _) <- grammar ] + elcRules = accumAssoc id $ limit lc emptyRules + leftToks = accumAssoc id $ limit lc $ + nubsort [ (cat, token) | Rule cat (Tok token:_) _ <- grammar ] + lc (left, res) = nubsort [ (cat, res) | Rule cat _ _ <- buRules ? Cat left ] + emptyRules = nubsort [ (cat, rule) | rule@(Rule cat [] _) <- grammar ] + emptyCats = listSet $ limitEmpties $ map fst emptyRules + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ cat | Rule cat rhs _ <- grammar, + all (symbol (`elem` es) (const False)) rhs ] + cyclicCats = nubsort [ cat | Rule cat [Cat cat'] _ <- grammar, cat == cat' ] + +isCyclic (Rule cat [Cat cat'] _) = cat==cat' +isCyclic _ = False + +------------------------------------------------------------ +-- building parse charts + +edges2chart :: (Ord n, Ord c, Ord t) => Input t -> + [Edge (Rule n c t)] -> ParseChart n (Edge c) + +---------- + +edges2chart input edges + = accumAssoc id [ (Edge i k cat, (name, children i k rhs)) | + Edge i k (Rule cat rhs name) <- edges ] + where children i k [] = [ [] | i == k ] + children i k (Tok tok:rhs) = [ rest | i <= k, + j <- (inputFrom input ! i) ? tok, + rest <- children j k rhs ] + children i k (Cat cat:rhs) = [ Edge i j cat : rest | i <= k, + j <- echart ? (i, cat), + rest <- children j k rhs ] + echart = accumAssoc id [ ((i, cat), j) | Edge i j (Rule cat _ _) <- edges ] + + +------------------------------------------------------------ +-- grammar checking + +checkGrammar :: (Ord n, Ord c, Ord t, Print n, Print c, Print t) => + Grammar n c t -> [String] + +---------- + +checkGrammar rules = [ "rhs category does not exist: " ++ prt cat ++ "\n" ++ + " in rule: " ++ prt rule | + rule@(Rule _ rhs _) <- rules, + Cat cat <- rhs, cat `notElem` cats ] + where cats = nubsort [ cat | Rule cat _ _ <- rules ] + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print n, Print c, Print t) => Print (Rule n c t) where + prt (Rule cat rhs name) = prt name ++ ". " ++ prt cat ++ " -> " ++ prt rhs ++ + (if null rhs then ".\n" else "\n") + prtList = concatMap prt + + +instance (Ord n, Ord c, Ord t) => Print (PInfo n c t) where + prt pI = "[ tokens=" ++ show (length (grammarTokens pI)) ++ + "; names=" ++ sla nameRules ++ + "; tdCats=" ++ sla topdownRules ++ + "; buCats=" ++ sla bottomupRules ++ + "; elcCats=" ++ sla emptyLeftcornerRules ++ + "; eCats=" ++ sla emptyCategories ++ + "; cCats=" ++ show (length (cyclicCategories pI)) ++ + -- "; lctokCats=" ++ sla leftcornerTokens ++ + " ]" + where sla f = show $ length $ aElems $ f pI + + diff --git a/src/GF/OldParsing/ConvertFiniteGFC.hs b/src/GF/OldParsing/ConvertFiniteGFC.hs new file mode 100644 index 000000000..61486023e --- /dev/null +++ b/src/GF/OldParsing/ConvertFiniteGFC.hs @@ -0,0 +1,283 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Calculating the finiteness of each type in a grammar +----------------------------------------------------------------------------- + +module GF.OldParsing.ConvertFiniteGFC where + +import Operations +import GFC +import MkGFC +import AbsGFC +import Ident (Ident(..)) +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.BacktrackM + +type Cat = Ident +type Name = Ident + +type CnvMonad a = BacktrackM () a + +convertGrammar :: CanonGrammar -> CanonGrammar +convertGrammar = canon2grammar . convertCanon . grammar2canon + +convertCanon :: Canon -> Canon +convertCanon (Gr modules) = Gr (map (convertModule split) modules) + where split = calcSplitable modules + +convertModule :: Splitable -> Module -> Module +convertModule split (Mod mtyp ext op fl defs) + = Mod mtyp ext op fl newDefs + where newDefs = solutions defMonad () + defMonad = member defs >>= convertDef split + +---------------------------------------------------------------------- +-- the main conversion function +convertDef :: Splitable -> Def -> CnvMonad Def + +-- converting abstract "cat" definitions +convertDef split (AbsDCat cat decls cidents) + = case splitableCat split cat of + Just newCats -> do newCat <- member newCats + return $ AbsDCat newCat decls cidents + Nothing -> do (newCat, newDecls) <- expandDecls cat decls + return $ AbsDCat newCat newDecls cidents + where expandDecls cat [] = return (cat, []) + expandDecls cat (decl@(Decl var typ) : decls) + = do (newCat, newDecls) <- expandDecls cat decls + let argCat = resultCat typ + case splitableCat split argCat of + Nothing -> return (newCat, decl : newDecls) + Just newArgs -> do newArg <- member newArgs + return (mergeArg newCat newArg, newDecls) + +-- converting abstract "fun" definitions +convertDef split (AbsDFun fun typ@(EAtom (AC (CIQ mod cat))) def) + = case splitableFun split fun of + Just newCat -> return (AbsDFun fun (EAtom (AC (CIQ mod newCat))) def) + Nothing -> do newTyp <- expandType split [] typ + return (AbsDFun fun newTyp def) +convertDef split (AbsDFun fun typ def) + = do newTyp <- expandType split [] typ + return (AbsDFun fun newTyp def) + +-- converting concrete "lincat" definitions +convertDef split (CncDCat cat ctype x y) + = case splitableCat split cat of + Just newCats -> do newCat <- member newCats + return $ CncDCat newCat ctype x y + Nothing -> return $ CncDCat cat ctype x y + +-- converting concrete "lin" definitions +convertDef split (CncDFun fun (CIQ mod cat) args linterm x) + = case splitableFun split fun of + Just newCat -> return $ CncDFun fun (CIQ mod newCat) args linterm x + Nothing -> return $ CncDFun fun (CIQ mod cat) args linterm x + +convertDef _ def = return def + +---------------------------------------------------------------------- +-- expanding type expressions + +expandType :: Splitable -> [(Ident, Cat)] -> Exp -> CnvMonad Exp +expandType split env (EProd x a@(EAtom (AC (CIQ mod cat))) b) + = case splitableCat split cat of + Nothing -> do b' <- expandType split env b + return (EProd x a b') + Just newCats -> do newCat <- member newCats + b' <- expandType split ((x,newCat):env) b + return (EProd x (EAtom (AC (CIQ mod newCat))) b') +expandType split env (EProd x a b) + = do a' <- expandType split env a + b' <- expandType split env b + return (EProd x a' b') +expandType split env app + = expandApp split env [] app + +expandApp :: Splitable -> [(Ident, Cat)] -> [Cat] -> Exp -> CnvMonad Exp +expandApp split env addons (EAtom (AC (CIQ mod cat))) + = return (EAtom (AC (CIQ mod (foldl mergeArg cat addons)))) +expandApp split env addons (EApp exp arg@(EAtom (AC (CIQ mod fun)))) + = case splitableFun split fun of + Just newCat -> expandApp split env (newCat:addons) exp + Nothing -> do exp' <- expandApp split env addons exp + return (EApp exp' arg) +expandApp split env addons (EApp exp arg@(EAtom (AV x))) + = case lookup x env of + Just newCat -> expandApp split env (newCat:addons) exp + Nothing -> do exp' <- expandApp split env addons exp + return (EApp exp' arg) + +---------------------------------------------------------------------- +-- splitable categories (finite, no dependencies) +-- they should also be used as some dependency + +type Splitable = (Assoc Cat [Cat], Assoc Name Cat) + +splitableCat :: Splitable -> Cat -> Maybe [Cat] +splitableCat = lookupAssoc . fst + +splitableFun :: Splitable -> Name -> Maybe Cat +splitableFun = lookupAssoc . snd + +calcSplitable :: [Module] -> Splitable +calcSplitable modules = (listAssoc splitableCats, listAssoc splitableFuns) + where splitableCats = tracePrt "splitableCats" (prtSep " ") $ + groupPairs $ nubsort + [ (cat, mergeFun fun cat) | (cat, fun) <- constantCats ] + + splitableFuns = tracePrt "splitableFuns" (prtSep " ") $ + nubsort + [ (fun, mergeFun fun cat) | (cat, fun) <- constantCats ] + + constantCats = tracePrt "constantCats" (prtSep " ") $ + [ (cat, fun) | + AbsDFun fun (EAtom (AC (CIQ _ cat))) _ <- absDefs, + dependentConstants ?= cat ] + + dependentConstants = listSet $ + tracePrt "dep consts" prt $ + dependentCats <\\> funCats + + funCats = tracePrt "fun cats" prt $ + nubsort [ resultCat typ | + AbsDFun _ typ@(EProd _ _ _) _ <- absDefs ] + + dependentCats = tracePrt "dep cats" prt $ + nubsort [ cat | AbsDCat _ decls _ <- absDefs, + Decl _ (EAtom (AC (CIQ _ cat))) <- decls ] + + absDefs = concat [ defs | Mod (MTAbs _) _ _ _ defs <- modules ] + + +---------------------------------------------------------------------- +-- utilities + +-- the main result category of a type expression +resultCat :: Exp -> Cat +resultCat (EProd _ _ b) = resultCat b +resultCat (EApp a _) = resultCat a +resultCat (EAtom (AC (CIQ _ cat))) = cat + +-- mergeing categories +mergeCats :: String -> String -> String -> Cat -> Cat -> Cat +mergeCats before middle after (IC cat) (IC arg) + = IC (before ++ cat ++ middle ++ arg ++ after) + +mergeFun, mergeArg :: Cat -> Cat -> Cat +mergeFun = mergeCats "{" ":" "}" +mergeArg = mergeCats "" "" "" + +---------------------------------------------------------------------- +-- obsolete? + +{- +type FiniteCats = Assoc Cat Integer + +calculateFiniteness :: Canon -> FiniteCats +calculateFiniteness canon@(Gr modules) + = trace2 "#typeInfo" (prt tInfo) $ + finiteCats + + where finiteCats = listAssoc [ (cat, fin) | (cat, Just fin) <- finiteInfo ] + finiteInfo = map finInfo groups + + finInfo :: (Cat, [[Cat]]) -> (Cat, Maybe Integer) + finInfo (cat, ctxts) + | cyclicCats ?= cat = (cat, Nothing) + | otherwise = (cat, fmap (sum . map product) $ + sequence (map (sequence . map lookFinCat) ctxts)) + + lookFinCat :: Cat -> Maybe Integer + lookFinCat cat = maybe (error "lookFinCat: Nothing") id $ + lookup cat finiteInfo + + cyclicCats :: Set Cat + cyclicCats = listSet $ + tracePrt "cyclic cats" prt $ + union $ map nubsort $ cyclesIn dependencies + + dependencies :: [(Cat, [Cat])] + dependencies = tracePrt "dependencies" (prtAfter "\n") $ + mapSnd (union . nubsort) groups + + groups :: [(Cat, [[Cat]])] + groups = tracePrt "groups" (prtAfter "\n") $ + mapSnd (map snd) $ groupPairs (nubsort allFuns) + + allFuns = tracePrt "all funs" (prtAfter "\n") $ + [ (cat, (fun, ctxt)) | + Mod (MTAbs _) _ _ _ defs <- modules, + AbsDFun fun typ _ <- defs, + let (cat, ctxt) = err error id $ typeForm typ ] + + tInfo = calculateTypeInfo 30 finiteCats (splitDefs canon) + +-- | stolen from 'Macros.qTypeForm', converted to GFC, and severely simplified +typeForm :: Monad m => Exp -> m (Cat, [Cat]) +typeForm t = case t of + EProd x a b -> do + (cat, ctxt) <- typeForm b + a' <- stripType a + return (cat, a':ctxt) + EApp c a -> do + (cat, _) <- typeForm c + return (cat, []) + EAtom (AC (CIQ _ con)) -> + return (con, []) + _ -> + fail $ "no normal form of type: " ++ prt t + +stripType :: Monad m => Exp -> m Cat +stripType (EApp c a) = stripType c +stripType (EAtom (AC (CIQ _ con))) = return con +stripType t = fail $ "can't strip type: " ++ prt t + +mapSnd f xs = [ (a, f b) | (a, b) <- xs ] +-} + +---------------------------------------------------------------------- +-- obsolete? + +{- +type SplitDefs = ([Def], [Def], [Def], [Def]) +----- AbsDCat AbsDFun CncDCat CncDFun + +splitDefs :: Canon -> SplitDefs +splitDefs (Gr modules) = foldr splitDef ([], [], [], []) $ + concat [ defs | Mod _ _ _ _ defs <- modules ] + +splitDef :: Def -> SplitDefs -> SplitDefs +splitDef ac@(AbsDCat _ _ _) (acs, afs, ccs, cfs) = (ac:acs, afs, ccs, cfs) +splitDef af@(AbsDFun _ _ _) (acs, afs, ccs, cfs) = (acs, af:afs, ccs, cfs) +splitDef cc@(CncDCat _ _ _ _) (acs, afs, ccs, cfs) = (acs, afs, cc:ccs, cfs) +splitDef cf@(CncDFun _ _ _ _ _) (acs, afs, ccs, cfs) = (acs, afs, ccs, cf:cfs) +splitDef _ sd = sd + +--calculateTypeInfo :: Integer -> FiniteCats -> SplitDefs -> ? +calculateTypeInfo maxFin allFinCats (acs, afs, ccs, cfs) + = (depCatsToExpand, catsToSplit) + where absDefsToExpand = tracePrt "absDefsToExpand" prt $ + [ ((cat, fin), cats) | + AbsDCat cat args _ <- acs, + not (null args), + cats <- mapM catOfDecl args, + fin <- lookupAssoc allFinCats cat, + fin <= maxFin + ] + (depCatsToExpand, argsCats') = unzip absDefsToExpand + catsToSplit = union (map nubsort argsCats') + catOfDecl (Decl _ exp) = err fail return $ stripType exp +-} diff --git a/src/GF/OldParsing/ConvertFiniteSimple.hs b/src/GF/OldParsing/ConvertFiniteSimple.hs new file mode 100644 index 000000000..7aac39cb2 --- /dev/null +++ b/src/GF/OldParsing/ConvertFiniteSimple.hs @@ -0,0 +1,121 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Calculating the finiteness of each type in a grammar +----------------------------------------------------------------------------- + +module GF.OldParsing.ConvertFiniteSimple + (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import Operations +import Ident (Ident(..)) +import GF.OldParsing.SimpleGFC +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.BacktrackM + +type CnvMonad a = BacktrackM () a + +convertGrammar :: Grammar -> Grammar +convertGrammar rules = solutions cnvMonad () + where split = calcSplitable rules + cnvMonad = member rules >>= convertRule split + +convertRule :: Splitable -> Rule -> CnvMonad Rule +convertRule split (Rule name typing term) + = do newTyping <- convertTyping split name typing + return $ Rule name newTyping term + +convertTyping :: Splitable -> Name -> Typing -> CnvMonad Typing +convertTyping split name (typ, decls) + = case splitableFun split name of + Just newCat -> return (newCat :@ [], decls) + Nothing -> expandTyping split [] typ decls [] + + +expandTyping :: Splitable -> [(Var, Cat)] -> Type -> [Decl] -> [Decl] -> CnvMonad Typing +expandTyping split env (cat :@ atoms) [] decls + = return (substAtoms split env cat atoms [], reverse decls) +expandTyping split env typ ((x ::: (xcat :@ xatoms)) : declsToDo) declsDone + = do env' <- calcNewEnv + expandTyping split env' typ declsToDo (decl : declsDone) + where decl = x ::: substAtoms split env xcat xatoms [] + calcNewEnv = case splitableCat split xcat of + Just newCats -> do newCat <- member newCats + return ((x,newCat) : env) + Nothing -> return env + +substAtoms :: Splitable -> [(Var, Cat)] -> Cat -> [Atom] -> [Atom] -> Type +substAtoms split env cat [] atoms = cat :@ reverse atoms +substAtoms split env cat (atom:atomsToDo) atomsDone + = case atomLookup split env atom of + Just newCat -> substAtoms split env (mergeArg cat newCat) atomsToDo atomsDone + Nothing -> substAtoms split env cat atomsToDo (atom : atomsDone) + +atomLookup split env (AVar x) = lookup x env +atomLookup split env (ACon con) = splitableFun split (constr2name con) + + +---------------------------------------------------------------------- +-- splitable categories (finite, no dependencies) +-- they should also be used as some dependency + +type Splitable = (Assoc Cat [Cat], Assoc Name Cat) + +splitableCat :: Splitable -> Cat -> Maybe [Cat] +splitableCat = lookupAssoc . fst + +splitableFun :: Splitable -> Name -> Maybe Cat +splitableFun = lookupAssoc . snd + +calcSplitable :: [Rule] -> Splitable +calcSplitable rules = (listAssoc splitableCats, listAssoc splitableFuns) + where splitableCats = tracePrt "splitableCats" (prtSep " ") $ + groupPairs $ nubsort + [ (cat, mergeFun fun cat) | (cat, fun) <- constantCats ] + + splitableFuns = tracePrt "splitableFuns" (prtSep " ") $ + nubsort + [ (fun, mergeFun fun cat) | (cat, fun) <- constantCats ] + + constantCats = tracePrt "constantCats" (prtSep " ") $ + [ (cat, fun) | + Rule fun (cat :@ [], []) _ <- rules, + dependentConstants ?= cat ] + + dependentConstants = listSet $ + tracePrt "dep consts" prt $ + dependentCats <\\> funCats + + funCats = tracePrt "fun cats" prt $ + nubsort [ cat | Rule _ (cat :@ _, decls) _ <- rules, + not (null decls) ] + + dependentCats = tracePrt "dep cats" prt $ + nubsort [ cat | Rule _ (cat :@ [], []) _ <- rules ] + + +---------------------------------------------------------------------- +-- utilities + +-- mergeing categories +mergeCats :: String -> String -> String -> Cat -> Cat -> Cat +mergeCats before middle after (IC cat) (IC arg) + = IC (before ++ cat ++ middle ++ arg ++ after) + +mergeFun, mergeArg :: Cat -> Cat -> Cat +mergeFun = mergeCats "{" ":" "}" +mergeArg = mergeCats "" "" "" + + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG.hs b/src/GF/OldParsing/ConvertGFCtoMCFG.hs new file mode 100644 index 000000000..1a9bc1a75 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG.hs @@ -0,0 +1,34 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All different conversions from GFC to MCFG +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG + (convertGrammar) where + +import GFC (CanonGrammar) +import GF.OldParsing.GrammarTypes +import Ident (Ident(..)) +import Option +import GF.System.Tracing + +import qualified GF.OldParsing.ConvertGFCtoMCFG.Old as Old +import qualified GF.OldParsing.ConvertGFCtoMCFG.Nondet as Nondet +import qualified GF.OldParsing.ConvertGFCtoMCFG.Strict as Strict +import qualified GF.OldParsing.ConvertGFCtoMCFG.Coercions as Coerce + +convertGrammar :: String -> (CanonGrammar, Ident) -> MCFGrammar +convertGrammar "nondet" = Coerce.addCoercions . Nondet.convertGrammar +convertGrammar "strict" = Strict.convertGrammar +convertGrammar "old" = Old.convertGrammar + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs new file mode 100644 index 000000000..650f8b646 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs @@ -0,0 +1,71 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Coercions +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Coercions (addCoercions) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import qualified Ident +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList +import List (groupBy) -- , transpose) + +---------------------------------------------------------------------- + +addCoercions :: MCFGrammar -> MCFGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule head args lins _ <- rules, + let lbls = [ lbl | Lin lbl _ <- lins ] ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#coercions total" (prt . length) $ + concat $ + tracePrt "#coercions per cat" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mainCat $ fst $ head heads) (mainCat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args = [ Rule arg [head] lins coercionName | + (head@(MCFCat _ headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@(MCFCat _ argCns) <- args, + argCns `subset` headCns ] + + +coercionName = Ident.IW + +mainCat (MCFCat c _) = c + +sameCat mc1 mc2 = mainCat mc1 == mainCat mc2 + + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs new file mode 100644 index 000000000..d27e240bc --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs @@ -0,0 +1,281 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Nondet +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Nondet (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, Ident) + +convertGrammar :: Env -- ^ the canonical grammar, together with the selected language + -> MCFGrammar -- ^ the resulting MCF grammar +convertGrammar gram = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= convertDef gram + convertModule _ = failure + +convertDef :: Env -> Def -> CnvMonad MCFRule +convertDef env (CncDFun fun (CIQ _ cat) args term _) + | trace2 "converting function" (prt fun) True + = do let iCat : iArgs = map initialMCat (cat : map catOfArg args) + writeState (iCat, iArgs, []) + convertTerm env cat term + (newCat, newArgs, linRec) <- readState + let newTerm = map (instLin newArgs) linRec + return (Rule newCat newArgs newTerm fun) +convertDef _ _ = failure + +instLin newArgs (Lin lbl lin) = Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, arg) = (newArgs !! arg, lbl, arg) + +convertTerm :: Env -> Cat -> Term -> CnvMonad () +convertTerm env cat term = do rterm <- simplTerm env term + let ctype = lookupCType env cat + reduceT env ctype rterm emptyPath + +------------------------------------------------------------ + +type CnvMonad a = BacktrackM CMRule a + +type CMRule = (MCFCat, [MCFCat], LinRec) +type LinRec = [Lin Cat Path Tokn] + +initialMCat :: Cat -> MCFCat +initialMCat cat = MCFCat cat [] + +---------------------------------------------------------------------- + +simplTerm :: Env -> Term -> CnvMonad STerm +simplTerm env = simplifyTerm + where + simplifyTerm :: Term -> CnvMonad STerm + simplifyTerm (Arg (A cat nr)) = return (SArg (fromInteger nr) cat emptyPath) + simplifyTerm (Con con terms) = liftM (SCon con) $ mapM simplifyTerm terms + simplifyTerm (R record) = liftM SRec $ mapM simplifyAssign record + simplifyTerm (P term lbl) = liftM (+. lbl) $ simplifyTerm term + simplifyTerm (T ct table) = liftM STbl $ sequence $ concatMap simplifyCase table + simplifyTerm (V ct terms) + = liftM STbl $ sequence [ liftM ((,) pat) (simplifyTerm term) | + (pat, term) <- zip (groundTerms env ct) terms ] + simplifyTerm (S term sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + STbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm env ssel + return (sterm +! sel') + simplifyTerm (FV terms) = liftM SVariants $ mapM simplifyTerm terms + simplifyTerm (term1 `C` term2) = liftM2 (SConcat) (simplifyTerm term1) (simplifyTerm term2) + simplifyTerm (K tokn) = return $ SToken tokn + simplifyTerm (E) = return $ SEmpty + simplifyTerm x = error $ "simplifyTerm: " ++ show x +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + + simplifyAssign :: Assign -> CnvMonad (Label, STerm) + simplifyAssign (Ass lbl term) = liftM ((,) lbl) $ simplifyTerm term + + simplifyCase :: Case -> [CnvMonad (STerm, STerm)] + simplifyCase (Cas pats term) = [ liftM2 (,) (simplifyPattern pat) (simplifyTerm term) | + pat <- pats ] + + simplifyPattern :: Patt -> CnvMonad STerm + simplifyPattern (PC con pats) = liftM (SCon con) $ mapM simplifyPattern pats + simplifyPattern (PW) = return SWildcard + simplifyPattern (PR record) = do record' <- mapM simplifyPattAssign record + case filter (\row -> snd row /= SWildcard) record' of + [] -> return SWildcard + record'' -> return (SRec record') + simplifyPattern x = error $ "simplifyPattern: " ++ show x +-- error constructors: +-- (PV Ident) - pattern variable + + simplifyPattAssign :: PattAssign -> CnvMonad (Label, STerm) + simplifyPattAssign (PAss lbl pat) = liftM ((,) lbl) $ simplifyPattern pat + + +------------------------------------------------------------ +-- reducing simplified terms, collecting mcf rules + +reduceT :: Env -> CType -> STerm -> Path -> CnvMonad () +reduceT env = reduce + where + reduce :: CType -> STerm -> Path -> CnvMonad () + reduce TStr term path = updateLin (path, term) + reduce (Cn _) term path + = do pat <- expandTerm env term + updateHead (path, pat) + reduce ctype (SVariants terms) path + = do term <- member terms + reduce ctype term path + reduce (RecType rtype) term path + = sequence_ [ reduce ctype (term +. lbl) (path ++. lbl) | + Lbg lbl ctype <- rtype ] + reduce (Table _ ctype) (STbl table) path + = sequence_ [ reduce ctype term (path ++! pat) | + (pat, term) <- table ] + reduce (Table ptype vtype) arg@(SArg _ _ _) path + = sequence_ [ reduce vtype (arg +! pat) (path ++! pat) | + pat <- groundTerms env ptype ] + reduce ctype term path = error ("reduce:\n ctype = (" ++ show ctype ++ + ")\n term = (" ++ show term ++ + ")\n path = (" ++ show path ++ ")\n") + + +------------------------------------------------------------ +-- expanding a term to ground terms + +expandTerm :: Env -> STerm -> CnvMonad STerm +expandTerm env arg@(SArg _ _ _) + = do pat <- member $ groundTerms env $ cTypeForArg env arg + pat =?= arg + return pat +expandTerm env (SCon con terms) = liftM (SCon con) $ mapM (expandTerm env) terms +expandTerm env (SRec record) = liftM SRec $ mapM (expandAssign env) record +expandTerm env (SVariants terms) = member terms >>= expandTerm env +expandTerm env term = error $ "expandTerm: " ++ show term + +expandAssign :: Env -> (Label, STerm) -> CnvMonad (Label, STerm) +expandAssign env (lbl, term) = liftM ((,) lbl) $ expandTerm env term + +------------------------------------------------------------ +-- unification of patterns and selection terms + +(=?=) :: STerm -> STerm -> CnvMonad () +SWildcard =?= _ = return () +SRec precord =?= arg@(SArg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= SArg arg _ path = updateArg arg (path, pat) +SCon con pats =?= SCon con' terms = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +SRec precord =?= SRec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ show pat ++ " =?= " ++ show term + + +------------------------------------------------------------ +-- updating the mcf rule + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins) <- readState + args' <- updateNth (addToMCFCat cn) arg args + writeState (head, args', lins) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins) <- readState + head' <- addToMCFCat cn head + writeState (head', args, lins) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins') + +term2lins :: STerm -> [[Symbol (Cat, Path, Int) Tokn]] +term2lins (SArg arg cat path) = return [Cat (cat, path, arg)] +term2lins (SToken str) = return [Tok str] +term2lins (SConcat t1 t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (SEmpty) = return [] +term2lins (SVariants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToMCFCat :: Constraint -> MCFCat -> CnvMonad MCFCat +addToMCFCat cn (MCFCat cat cns) = liftM (MCFCat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + +catOfArg (A aCat _) = aCat +catOfArg (AB aCat _ _) = aCat + +lookupCType :: Env -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +groundTerms :: Env -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +cTypeForArg :: Env -> STerm -> CType +cTypeForArg env (SArg nr cat (Path path)) + = follow path $ lookupCType env cat + where follow [] ctype = ctype + follow (Right pat : path) (Table _ ctype) = follow path ctype + follow (Left lbl : path) (RecType rec) + = case [ ctype | Lbg lbl' ctype <- rec, lbl == lbl' ] of + [ctype] -> follow path ctype + err -> error $ "follow: " ++ show rec ++ " . " ++ show lbl ++ + " results in " ++ show err + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Old.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Old.hs new file mode 100644 index 000000000..d0869c8f5 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Old.hs @@ -0,0 +1,277 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Old +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars. (Old variant) +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Old (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +--import PrintGFC +import qualified PrGrammar as PG + +import Monad (liftM, liftM2, guard) +-- import Maybe (listToMaybe) +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList (nubsort, groupPairs) +import Maybe (listToMaybe) +import List (groupBy, transpose) + +---------------------------------------------------------------------- +-- old style types + +data XMCFCat = XMCFCat Cat [(XPath, Term)] deriving (Eq, Ord, Show) +type XMCFLabel = XPath + +cnvXMCFCat :: XMCFCat -> MCFCat +cnvXMCFCat (XMCFCat cat constrs) = MCFCat cat [ (cnvXPath path, cnvTerm term) | + (path, term) <- constrs ] + +cnvXMCFLabel :: XMCFLabel -> MCFLabel +cnvXMCFLabel = cnvXPath + +cnvXMCFLin :: Lin XMCFCat XMCFLabel Tokn -> Lin MCFCat MCFLabel Tokn +cnvXMCFLin (Lin lbl lin) = Lin (cnvXMCFLabel lbl) $ + map (mapSymbol cnvSym id) lin + where cnvSym (cat, lbl, nr) = (cnvXMCFCat cat, cnvXMCFLabel lbl, nr) + +-- Term -> STerm + +cnvTerm (R rec) = SRec [ (lbl, cnvTerm term) | Ass lbl term <- rec ] +cnvTerm (T _ tbl) = STbl [ (cnvPattern pat, cnvTerm term) | + Cas pats term <- tbl, pat <- pats ] +cnvTerm (Con con terms) = SCon con $ map cnvTerm terms +cnvTerm term + | isArgPath term = cnvArgPath term + +cnvPattern (PR rec) = SRec [ (lbl, cnvPattern term) | PAss lbl term <- rec ] +cnvPattern (PC con pats) = SCon con $ map cnvPattern pats +cnvPattern (PW) = SWildcard + +isArgPath (Arg _) = True +isArgPath (P _ _) = True +isArgPath (S _ _) = True +isArgPath _ = False + +cnvArgPath (Arg (A cat nr)) = SArg (fromInteger nr) cat emptyPath +cnvArgPath (term `P` lbl) = cnvArgPath term +. lbl +cnvArgPath (term `S` sel) = cnvArgPath term +! cnvTerm sel + +-- old style paths + +newtype XPath = XPath [Either Label Term] deriving (Eq, Ord, Show) + +cnvXPath :: XPath -> Path +cnvXPath (XPath path) = Path (map (either Left (Right . cnvTerm)) (reverse path)) + +emptyXPath :: XPath +emptyXPath = XPath [] + +(++..) :: XPath -> Label -> XPath +XPath path ++.. lbl = XPath (Left lbl : path) + +(++!!) :: XPath -> Term -> XPath +XPath path ++!! sel = XPath (Right sel : path) + +---------------------------------------------------------------------- + +-- | combining alg. 1 and alg. 2 from Ljunglöf's PhD thesis +convertGrammar :: (CanonGrammar, Ident) -> MCFGrammar +convertGrammar (gram, lng) = trace2 "language" (prt lng) $ + trace2 "modules" (prtSep " " modnames) $ + trace2 "#lin-terms" (prt (length cncdefs)) $ + tracePrt "#mcf-rules total" (prt.length) $ + concat $ + tracePrt "#mcf-rules per fun" + (\rs -> concat [" "++show n++"="++show (length r) | + (n, r) <- zip [1..] rs]) $ + map (convertDef gram lng) cncdefs + where Gr mods = grammar2canon gram + cncdefs = [ def | Mod (MTCnc modname _) _ _ _ defs <- mods, + modname `elem` modnames, + def@(CncDFun _ _ _ _ _) <- defs ] + modnames = M.allExtends gram lng + + +convertDef :: CanonGrammar -> Ident -> Def -> [MCFRule] +convertDef gram lng (CncDFun fun (CIQ _ cat) args term _) + = [ Rule (cnvXMCFCat newCat) (map cnvXMCFCat newArgs) (map cnvXMCFLin newTerm) fun | + let ctype = lookupCType gram lng cat, + instArgs <- mapM (enumerateInsts gram lng) args, + let instTerm = substitutePaths gram lng instArgs term, + newCat <- emcfCat gram lng cat instTerm, + newArgs <- mapM (extractArg gram lng instArgs) args, + let newTerm = concatMap (extractLin newArgs) $ strPaths gram lng ctype instTerm + ] + + +-- gammalt skräp: +-- mergeArgs = zipWith mergeRec +-- mergeRec (R r1) (R r2) = R (r1 ++ r2) + +extractArg :: CanonGrammar -> Ident -> [Term] -> ArgVar -> [XMCFCat] +extractArg gram lng args (A cat nr) = emcfCat gram lng cat (args !!! nr) + + +emcfCat :: CanonGrammar -> Ident -> Ident -> Term -> [XMCFCat] +emcfCat gram lng cat = map (XMCFCat cat) . parPaths gram lng (lookupCType gram lng cat) + + +extractLin :: [XMCFCat] -> (XPath, Term) -> [Lin XMCFCat XMCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `C` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (E) = [[]] + convertLin (K tok) = [[Tok tok]] + convertLin (FV terms) = concatMap convertLin terms + convertLin term = map (return . Cat) $ flattenTerm emptyXPath term + flattenTerm path (Arg (A _ nr)) = [(args !!! nr, path, fromInteger nr)] + flattenTerm path (term `P` lbl) = flattenTerm (path ++.. lbl) term + flattenTerm path (term `S` sel) = flattenTerm (path ++!! sel) term + flattenTerm path (FV terms) = concatMap (flattenTerm path) terms + flattenTerm path term = error $ "flattenTerm: \n " ++ show path ++ "\n " ++ prt term + + +enumerateInsts :: CanonGrammar -> Ident -> ArgVar -> [Term] +enumerateInsts gram lng arg@(A argCat _) = enumerate (Arg arg) (lookupCType gram lng argCat) + where enumerate path (TStr) = [ path ] + enumerate path (Cn con) = okError $ lookupParamValues gram con + enumerate path (RecType r) + = map R $ sequence [ map (lbl `Ass`) $ + enumerate (path `P` lbl) ctype | + lbl `Lbg` ctype <- r ] + enumerate path (Table s t) + = map (T s) $ sequence [ map ([term2pattern sel] `Cas`) $ + enumerate (path `S` sel) t | + sel <- enumerate (error "enumerate") s ] + + + +termPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, (CType, Term))] +termPaths gr l (TStr) term = [ (emptyXPath, (TStr, term)) ] +termPaths gr l (RecType rtype) (R record) + = [ (path ++.. lbl, value) | + lbl `Ass` term <- record, + let ctype = okError $ maybeErr "termPaths/record" $ lookupLabelling lbl rtype, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (T _ table) + = [ (path ++!! pattern2term pat, value) | + pats `Cas` term <- table, pat <- pats, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (V ptype table) + = [ (path ++!! pat, value) | + (pat, term) <- zip (okError $ allParamValues gr ptype) table, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l ctype (FV terms) + = concatMap (termPaths gr l ctype) terms +termPaths gr l (Cn pc) term = [ (emptyXPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: CanonGrammar -> Ident -> CType -> Term -> [[(XPath, Term)]] +parPaths gr l ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths gr l ctype term ] + +strPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, Term)] +strPaths gr l ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths gr l ctype term ] + + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: CanonGrammar -> Ident -> [Term] -> Term -> Term +substitutePaths gr l arguments trm = subst trm + where subst (con `Con` terms) = con `Con` map subst terms + subst (R record) = R $ map substAss record + subst (term `P` lbl) = subst term `evalP` lbl + subst (T ptype table) = T ptype $ map substCas table + subst (V ptype table) = T ptype [ [term2pattern pat] `Cas` subst term | + (pat, term) <- zip (okError $ allParamValues gr ptype) table ] + subst (term `S` select) = subst term `evalS` subst select + subst (term `C` term') = subst term `C` subst term' + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !!! arg + subst term = term + + substAss (l `Ass` term) = l `Ass` subst term + substCas (p `Cas` term) = p `Cas` subst term + + +evalP (R record) lbl = okError $ maybeErr errStr $ lookupAssign lbl record + where errStr = "evalP: " ++ prt (R record `P` lbl) +evalP (FV terms) lbl = evalFV [ evalP term lbl | term <- terms ] +evalP term lbl = term `P` lbl + +evalS t@(T _ tbl) sel = maybe (t `S` sel) id $ lookupCase sel tbl +evalS (FV terms) sel = evalFV [ term `evalS` sel | term <- terms ] +evalS term (FV sels)= evalFV [ term `evalS` sel | sel <- sels ] +evalS term sel = term `S` sel + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> FV terms + where flattenFV (FV ts) = ts + flattenFV t = [t] + + +---------------------------------------------------------------------- +-- utilities + +-- lookup a CType for an Ident +lookupCType :: CanonGrammar -> Ident -> Ident -> CType +lookupCType gr lng c = errVal defLinType $ lookupLincat gr (CIQ lng c) + +-- lookup a label in a (record / record ctype / table) +lookupAssign :: Label -> [Assign] -> Maybe Term +lookupLabelling :: Label -> [Labelling] -> Maybe CType +lookupCase :: Term -> [Case] -> Maybe Term + +lookupAssign lbl rec = listToMaybe [ term | lbl' `Ass` term <- rec, lbl == lbl' ] +lookupLabelling lbl rtyp = listToMaybe [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] +lookupCase sel tbl = listToMaybe [ term | pats `Cas` term <- tbl, sel `matchesPats` pats ] + +matchesPats :: Term -> [Patt] -> Bool +matchesPats term patterns = or [ term == pattern2term pattern | pattern <- patterns ] + +-- converting between patterns and terms +pattern2term :: Patt -> Term +term2pattern :: Term -> Patt + +pattern2term (con `PC` patterns) = con `Con` map pattern2term patterns +pattern2term (PR record) = R [ lbl `Ass` pattern2term pattern | + lbl `PAss` pattern <- record ] + +term2pattern (con `Con` terms) = con `PC` map term2pattern terms +term2pattern (R record) = PR [ lbl `PAss` term2pattern term | + lbl `Ass` term <- record ] + +-- list lookup for Integers instead of Ints +(!!!) :: [a] -> Integer -> a +xs !!! n = xs !! fromInteger n diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Strict.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Strict.hs new file mode 100644 index 000000000..604fb460b --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Strict.hs @@ -0,0 +1,189 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Strict +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Strict (convertGrammar) where + +import GF.System.Tracing +-- import IOExts (unsafePerformIO) +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, Ident) + +convertGrammar :: Env -- ^ the canonical grammar, together with the selected language + -> MCFGrammar -- ^ the resulting MCF grammar +convertGrammar gram = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= convertDef gram + convertModule _ = failure + +convertDef :: Env -> Def -> CnvMonad MCFRule +convertDef env (CncDFun fun (CIQ _ cat) args term _) + | trace2 "converting function" (prt fun) True + = do let ctype = lookupCType env cat + instArgs <- mapM (enumerateArg env) args + let instTerm = substitutePaths env instArgs term + newCat <- emcfCat env cat instTerm + newArgs <- mapM (extractArg env instArgs) args + let newTerm = strPaths env ctype instTerm >>= extractLin newArgs + return (Rule newCat newArgs newTerm fun) +convertDef _ _ = failure + +------------------------------------------------------------ + +type CnvMonad a = BacktrackM () a + +---------------------------------------------------------------------- +-- strict conversion + +extractArg :: Env -> [STerm] -> ArgVar -> CnvMonad MCFCat +extractArg env args (A cat nr) = emcfCat env cat (args !! fromInteger nr) + +emcfCat :: Env -> Cat -> STerm -> CnvMonad MCFCat +emcfCat env cat term = member $ map (MCFCat cat) $ parPaths env (lookupCType env cat) term + +enumerateArg :: Env -> ArgVar -> CnvMonad STerm +enumerateArg env (A cat nr) = let ctype = lookupCType env cat + in enumerate (SArg (fromInteger nr) cat emptyPath) ctype + where enumerate arg (TStr) = return arg + enumerate arg ctype@(Cn _) = member $ groundTerms env ctype + enumerate arg (RecType rtype) + = liftM SRec $ sequence [ liftM ((,) lbl) $ + enumerate (arg +. lbl) ctype | + lbl `Lbg` ctype <- rtype ] + enumerate arg (Table stype ctype) + = do state <- readState + liftM STbl $ sequence [ liftM ((,) sel) $ + enumerate (arg +! sel) ctype | + sel <- solutions (enumerate err stype) state ] + where err = error "enumerate: parameter type should not be string" + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: Env -> [STerm] -> Term -> STerm +substitutePaths env arguments trm = subst trm + where subst (con `Con` terms) = con `SCon` map subst terms + subst (R record) = SRec [ (lbl, subst term) | lbl `Ass` term <- record ] + subst (term `P` lbl) = subst term +. lbl + subst (T ptype table) = STbl [ (pattern2sterm pat, subst term) | + pats `Cas` term <- table, pat <- pats ] + subst (V ptype table) = STbl [ (pat, subst term) | + (pat, term) <- zip (groundTerms env ptype) table ] + subst (term `S` select) = subst term +! subst select + subst (term `C` term') = subst term `SConcat` subst term' + subst (K str) = SToken str + subst (E) = SEmpty + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !! fromInteger arg + + +termPaths :: Env -> CType -> STerm -> [(Path, (CType, STerm))] +termPaths env (TStr) term = [ (emptyPath, (TStr, term)) ] +termPaths env (RecType rtype) (SRec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let ctype = lookupLabelling lbl rtype, + (path, value) <- termPaths env ctype term ] +termPaths env (Table _ ctype) (STbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths env ctype term ] +termPaths env ctype (SVariants terms) + = terms >>= termPaths env ctype +termPaths env (Cn pc) term = [ (emptyPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: Env -> CType -> STerm -> [[(Path, STerm)]] +parPaths env ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths env ctype term ] + +strPaths :: Env -> CType -> STerm -> [(Path, STerm)] +strPaths env ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths env ctype term ] + +extractLin :: [MCFCat] -> (Path, STerm) -> [Lin MCFCat MCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `SConcat` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (SEmpty) = [[]] + convertLin (SToken tok) = [[Tok tok]] + convertLin (SVariants terms) = concatMap convertLin terms + convertLin (SArg nr _ path) = [[Cat (args !! nr, path, nr)]] + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> SVariants terms + where flattenFV (SVariants ts) = ts + flattenFV t = [t] + +---------------------------------------------------------------------- +-- utilities + +lookupCType :: Env -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +lookupLabelling :: Label -> [Labelling] -> CType +lookupLabelling lbl rtyp = case [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] of + [ctyp] -> ctyp + err -> error $ "lookupLabelling:" ++ show err + +groundTerms :: Env -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + +pattern2sterm :: Patt -> STerm +pattern2sterm (con `PC` patterns) = con `SCon` map pattern2sterm patterns +pattern2sterm (PR record) = SRec [ (lbl, pattern2sterm pattern) | + lbl `PAss` pattern <- record ] + diff --git a/src/GF/OldParsing/ConvertGFCtoSimple.hs b/src/GF/OldParsing/ConvertGFCtoSimple.hs new file mode 100644 index 000000000..a14fa90b6 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoSimple.hs @@ -0,0 +1,122 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC to SimpleGFC +-- +-- the conversion might fail if the GFC grammar has dependent or higher-order types +----------------------------------------------------------------------------- + +module GF.OldParsing.ConvertGFCtoSimple where + +import qualified AbsGFC as A +import qualified Ident as I +import GF.OldParsing.SimpleGFC + +import GFC +import MkGFC (grammar2canon) +import qualified Look (lookupLin, allParamValues, lookupLincat) +import qualified CMacros (defLinType) +import Operations (err, errVal) +import qualified Modules as M + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, I.Ident) + +convertGrammar :: Env -> Grammar +convertGrammar gram = trace2 "language" (show (snd gram)) $ + tracePrt "#simple-rules total" (show . length) $ + [ convertAbsFun gram fun typing | + A.Mod (A.MTAbs modname) _ _ _ defs <- modules, + A.AbsDFun fun typing _ <- defs ] + where A.Gr modules = grammar2canon (fst gram) + +convertAbsFun :: Env -> I.Ident -> A.Exp -> Rule +convertAbsFun gram fun aTyping + = -- trace2 "absFun" (show fun) $ + Rule fun sTyping sTerm + where sTyping = convertTyping [] aTyping + sTerm = do lin <- lookupLin gram fun + return (convertTerm gram lin, convertCType gram cType) + cType = lookupCType gram sTyping + +convertTyping :: [Decl] -> A.Exp -> Typing +-- convertTyping env tp | trace2 "typing" (prt env ++ " / " ++ prt tp) False = undefined +convertTyping env (A.EProd x a b) + = convertTyping ((x ::: convertType [] a) : env) b +convertTyping env a = (convertType [] a, reverse env) + +convertType :: [Atom] -> A.Exp -> Type +-- convertType args tp | trace2 "type" (prt args ++ " / " ++ prt tp) False = undefined +convertType args (A.EApp a (A.EAtom at)) = convertType (convertAtom at : args) a +convertType args (A.EAtom at) = convertCat at :@ args + +convertAtom :: A.Atom -> Atom +convertAtom (A.AC con) = ACon con +convertAtom (A.AV var) = AVar var + +convertCat :: A.Atom -> Cat +convertCat (A.AC (A.CIQ _ cat)) = cat +convertCat at = error $ "convertCat: " ++ show at + +convertCType :: Env -> A.CType -> CType +convertCType gram (A.RecType rec) + = RecT [ (lbl, convertCType gram ctype) | A.Lbg lbl ctype <- rec ] +convertCType gram (A.Table ptype vtype) + = TblT (convertCType gram ptype) (convertCType gram vtype) +convertCType gram ct@(A.Cn con) = ConT con $ map (convertTerm gram) $ groundTerms gram ct +convertCType gram (A.TStr) = StrT +convertCType gram (A.TInts n) = error "convertCType: cannot handle 'TInts' constructor" + +convertTerm :: Env -> A.Term -> Term +convertTerm gram (A.Arg arg) = convertArgVar arg +convertTerm gram (A.Con con terms) = con :^ map (convertTerm gram) terms +convertTerm gram (A.LI var) = Var var +convertTerm gram (A.R rec) = Rec [ (lbl, convertTerm gram term) | A.Ass lbl term <- rec ] +convertTerm gram (A.P term lbl) = convertTerm gram term +. lbl +convertTerm gram (A.V ctype terms) = Tbl [ (convertTerm gram pat, convertTerm gram term) | + (pat, term) <- zip (groundTerms gram ctype) terms ] +convertTerm gram (A.T ctype tbl) = Tbl [ (convertPatt pat, convertTerm gram term) | + A.Cas pats term <- tbl, pat <- pats ] +convertTerm gram (A.S term sel) = convertTerm gram term +! convertTerm gram sel +convertTerm gram (A.C term1 term2) = convertTerm gram term1 ?++ convertTerm gram term2 +convertTerm gram (A.FV terms) = Variants (map (convertTerm gram) terms) +convertTerm gram (A.K tok) = Token tok +convertTerm gram (A.E) = Empty +convertTerm gram (A.I con) = error "convertTerm: cannot handle 'I' constructor" +convertTerm gram (A.EInt int) = error "convertTerm: cannot handle 'EInt' constructor" + +convertArgVar :: A.ArgVar -> Term +convertArgVar (A.A cat nr) = Arg (fromInteger nr) cat emptyPath +convertArgVar (A.AB cat bindings nr) = Arg (fromInteger nr) cat emptyPath + +convertPatt (A.PC con pats) = con :^ map convertPatt pats +convertPatt (A.PV x) = Var x +convertPatt (A.PW) = Wildcard +convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ] +convertPatt (A.PI n) = error "convertPatt: cannot handle 'PI' constructor" + +---------------------------------------------------------------------- + +lookupLin gram fun = err fail Just $ + Look.lookupLin (fst gram) (A.CIQ (snd gram) fun) + +--lookupCType :: Env -> Typing -> CType +lookupCType env (cat :@ _, _) = errVal CMacros.defLinType $ + Look.lookupLincat (fst env) (A.CIQ (snd env) cat) + +groundTerms :: Env -> A.CType -> [A.Term] +groundTerms gram ctype = err error id $ + Look.allParamValues (fst gram) ctype + diff --git a/src/GF/OldParsing/ConvertGrammar.hs b/src/GF/OldParsing/ConvertGrammar.hs new file mode 100644 index 000000000..474834081 --- /dev/null +++ b/src/GF/OldParsing/ConvertGrammar.hs @@ -0,0 +1,44 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGrammar +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All (?) grammar conversions which are used in GF +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGrammar + (pInfo, emptyPInfo, + module GF.OldParsing.GrammarTypes + ) where + +import GFC (CanonGrammar) +import MkGFC (grammar2canon) +import GF.OldParsing.GrammarTypes +import Ident (Ident(..)) +import Option +import GF.System.Tracing + +-- import qualified GF.OldParsing.FiniteTypes.Calc as Fin +import qualified GF.OldParsing.ConvertGFCtoMCFG as G2M +import qualified GF.OldParsing.ConvertMCFGtoCFG as M2C +import qualified GF.OldParsing.MCFGrammar as MCFG +import qualified GF.OldParsing.CFGrammar as CFG + +pInfo :: Options -> CanonGrammar -> Ident -> PInfo +pInfo opts canon lng = PInfo mcfg cfg mcfp cfp + where mcfg = G2M.convertGrammar cnv (canon, lng) + cnv = maybe "nondet" id $ getOptVal opts gfcConversion + cfg = M2C.convertGrammar mcfg + mcfp = MCFG.pInfo mcfg + cfp = CFG.pInfo cfg + +emptyPInfo :: PInfo +emptyPInfo = PInfo [] [] (MCFG.pInfo []) (CFG.pInfo []) + diff --git a/src/GF/OldParsing/ConvertMCFGtoCFG.hs b/src/GF/OldParsing/ConvertMCFGtoCFG.hs new file mode 100644 index 000000000..06965994c --- /dev/null +++ b/src/GF/OldParsing/ConvertMCFGtoCFG.hs @@ -0,0 +1,52 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertMCFGtoCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting MCFG grammars to (possibly overgenerating) CFG +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertMCFGtoCFG + (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser + +import Monad +import GF.OldParsing.Utilities +import qualified GF.OldParsing.MCFGrammar as MCFG +import qualified GF.OldParsing.CFGrammar as CFG +import GF.OldParsing.GrammarTypes + +convertGrammar :: MCFGrammar -> CFGrammar +convertGrammar gram = tracePrt "#cf-rules" (prt.length) $ + concatMap convertRule gram + +convertRule :: MCFRule -> [CFRule] +convertRule (MCFG.Rule cat args record name) + = [ CFG.Rule (CFCat cat lbl) rhs (CFName name profile) | + MCFG.Lin lbl lin <- record, + let rhs = map (mapSymbol convertArg id) lin, + let profile = map (argPlaces lin) [0 .. length args-1] + ] + +convertArg (cat, lbl, _arg) = CFCat cat lbl + +argPlaces lin arg = [ place | ((_cat, _lbl, arg'), place) <- + zip (filterCats lin) [0::Int ..], arg == arg' ] + +filterCats syms = [ cat | Cat cat <- syms ] + + + + + + + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG.hs b/src/GF/OldParsing/ConvertSimpleToMCFG.hs new file mode 100644 index 000000000..e111444f9 --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG.hs @@ -0,0 +1,30 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All different conversions from SimpleGFC to MCFG +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertSimpleToMCFG + (convertGrammar) where + +import qualified GF.OldParsing.SimpleGFC as S +--import GF.OldParsing.GrammarTypes + +import qualified GF.OldParsing.ConvertFiniteSimple as Fin +import qualified GF.OldParsing.ConvertSimpleToMCFG.Nondet as Nondet +--import qualified GF.OldParsing.ConvertSimpleToMCFG.Strict as Strict +import qualified GF.OldParsing.ConvertSimpleToMCFG.Coercions as Coerce + +--convertGrammar :: String -> S.Grammar -> MCFGrammar +convertGrammar ('f':'i':'n':'-':cnv) = convertGrammar cnv . Fin.convertGrammar +convertGrammar "nondet" = Coerce.addCoercions . Nondet.convertGrammar +--convertGrammar "strict" = Strict.convertGrammar + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs new file mode 100644 index 000000000..58a39b7f4 --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs @@ -0,0 +1,70 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertSimpleToMCFG.Coercions (addCoercions) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import qualified Ident +import GF.OldParsing.Utilities +--import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList +import List (groupBy) -- , transpose) + +---------------------------------------------------------------------- + +--addCoercions :: MCFGrammar -> MCFGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule head args lins _ <- rules, + let lbls = [ lbl | Lin lbl _ <- lins ] ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#coercions total" (prt . length) $ + concat $ + tracePrt "#coercions per cat" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mainCat $ fst $ head heads) (mainCat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args = [ Rule arg [head] lins coercionName | + (head@({-MCFCat-}(_, headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@({-MCFCat-} (_, argCns) <- args, + argCns `subset` headCns ] + + +coercionName = Ident.IW + +mainCat ({-MCFCat-} (c, _) = c + +sameCat mc1 mc2 = mainCat mc1 == mainCat mc2 + + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs new file mode 100644 index 000000000..da7511eaf --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs @@ -0,0 +1,245 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertSimpleToMCFG.Nondet (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +-- import Ident (Ident(..)) +import qualified AbsGFC +-- import GFC +import Look +import Operations +-- import qualified Modules as M +import CMacros (defLinType) +-- import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +-- import GF.OldParsing.GrammarTypes +import GF.Data.SortedList +import qualified GF.OldParsing.MCFGrammar as MCF (Grammar, Rule(..), Lin(..)) +import GF.OldParsing.SimpleGFC +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +--convertGrammar :: Grammar -> MCF.Grammar +convertGrammar rules = tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion rules undefined + where conversion = member rules >>= convertRule + +--convertRule :: Rule -> CnvMonad MCF.Rule +convertRule (Rule fun (cat :@ _, decls) (Just (term, ctype))) + = do let args = [ arg | _ ::: (arg :@ _) <- decls ] + writeState (initialMCat cat, map initialMCat args, []) + convertTerm cat term + (newCat, newArgs, linRec) <- readState + let newTerm = map (instLin newArgs) linRec + return (MCF.Rule newCat newArgs newTerm fun) +convertRule _ = failure + +instLin newArgs (MCF.Lin lbl lin) = MCF.Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, arg) = (newArgs !! arg, lbl, arg) + +--convertTerm :: Cat -> Term -> CnvMonad () +convertTerm cat term = do rterm <- simplifyTerm term + env <- readEnv + let ctype = lookupCType env cat + reduce ctype rterm emptyPath + +------------------------------------------------------------ + +{- +type CnvMonad a = BacktrackM Grammar CMRule a + +type CMRule = (MCFCat, [MCFCat], LinRec) +type LinRec = [Lin Cat Path Tokn] +-} + +--initialMCat :: Cat -> MCFCat +initialMCat cat = (cat, []) --MCFCat cat [] + +---------------------------------------------------------------------- + +--simplifyTerm :: Term -> CnvMonad STerm +simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms +simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record +simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term +simplifyTerm (Tbl table) = Tbl $ mapM simplifyCase table +simplifyTerm (term :! sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + Tbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm ssel + return (sterm +! sel') +simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms +simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2) +simplifyTerm term = return term +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + +--simplifyAssign :: Assign -> CnvMonad (Label, STerm) +simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term + +--simplifyCase :: Case -> [CnvMonad (STerm, STerm)] +simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term) + + +------------------------------------------------------------ +-- reducing simplified terms, collecting mcf rules + +--reduce :: CType -> STerm -> Path -> CnvMonad () +reduce StrT term path = updateLin (path, term) +reduce (ConT _) term path + = do pat <- expandTerm term + updateHead (path, pat) +reduce ctype (Variants terms) path + = do term <- member terms + reduce ctype term path +reduce (RecT rtype) term path + = sequence_ [ reduce ctype (term +. lbl) (path ++. lbl) | + (lbl, ctype) <- rtype ] +reduce (TblT _ ctype) (Tbl table) path + = sequence_ [ reduce ctype term (path ++! pat) | + (pat, term) <- table ] +reduce (TblT ptype vtype) arg@(Arg _ _ _) path + = do env <- readEnv + sequence_ [ reduce vtype (arg +! pat) (path ++! pat) | + pat <- groundTerms ptype ] +reduce ctype term path = error ("reduce:\n ctype = (" ++ show ctype ++ + ")\n term = (" ++ show term ++ + ")\n path = (" ++ show path ++ ")\n") + + +------------------------------------------------------------ +-- expanding a term to ground terms + +--expandTerm :: STerm -> CnvMonad STerm +expandTerm arg@(Arg _ _ _) + = do env <- readEnv + pat <- member $ groundTerms $ cTypeForArg env arg + pat =?= arg + return pat +expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms +expandTerm (Rec record) = liftM Rec $ mapM expandAssign record +expandTerm (Variants terms) = member terms >>= expandTerm +expandTerm term = error $ "expandTerm: " ++ show term + +--expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm) +expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term + +------------------------------------------------------------ +-- unification of patterns and selection terms + +--(=?=) :: STerm -> STerm -> CnvMonad () +Wildcard =?= _ = return () +Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= Arg arg _ path = updateArg arg (path, pat) +(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ show pat ++ " =?= " ++ show term + + +------------------------------------------------------------ +-- updating the mcf rule + +--updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins) <- readState + args' <- updateNth (addToMCFCat cn) arg args + writeState (head, args', lins) + +--updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins) <- readState + head' <- addToMCFCat cn head + writeState (head', args, lins) + +--updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins) <- readState + let lins' = lins ++ map (MCF.Lin path) newLins + writeState (head, args, lins') + +--term2lins :: STerm -> [[Symbol (Cat, Path, Int) Tokn]] +term2lins (Arg arg cat path) = return [Cat (cat, path, arg)] +term2lins (Token str) = return [Tok str] +term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (Empty) = return [] +term2lins (Variants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +--addToMCFCat :: Constraint -> MCFCat -> CnvMonad MCFCat +addToMCFCat cn ({-MCFCat-} cat, cns) = liftM ({-MCFCat-} (,) cat) $ addConstraint cn cns + +--addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + +--lookupCType :: GrammarEnv -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (AbsGFC.CIQ (snd env) cat) + +--groundTerms :: GrammarEnv -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +--cTypeForArg :: GrammarEnv -> STerm -> CType +cTypeForArg env (Arg nr cat (Path path)) + = follow path $ lookupCType env cat + where follow [] ctype = ctype + follow (Right pat : path) (TblT _ ctype) = follow path ctype + follow (Left lbl : path) (RecT rec) + = case [ ctype | (lbl', ctype) <- rec, lbl == lbl' ] of + [ctype] -> follow path ctype + err -> error $ "follow: " ++ show rec ++ " . " ++ show lbl ++ + " results in " ++ show err + +term2spattern (AbsGFC.R rec) = Rec [ (lbl, term2spattern term) | + AbsGFC.Ass lbl term <- rec ] +term2spattern (AbsGFC.Con con terms) = con :^ map term2spattern terms + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Old.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Old.hs new file mode 100644 index 000000000..88a459625 --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Old.hs @@ -0,0 +1,277 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Old +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars. (Old variant) +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Old (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +--import PrintGFC +import qualified PrGrammar as PG + +import Monad (liftM, liftM2, guard) +-- import Maybe (listToMaybe) +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList (nubsort, groupPairs) +import Maybe (listToMaybe) +import List (groupBy, transpose) + +---------------------------------------------------------------------- +-- old style types + +data XMCFCat = XMCFCat Cat [(XPath, Term)] deriving (Eq, Ord, Show) +type XMCFLabel = XPath + +cnvXMCFCat :: XMCFCat -> MCFCat +cnvXMCFCat (XMCFCat cat constrs) = MCFCat cat [ (cnvXPath path, cnvTerm term) | + (path, term) <- constrs ] + +cnvXMCFLabel :: XMCFLabel -> MCFLabel +cnvXMCFLabel = cnvXPath + +cnvXMCFLin :: Lin XMCFCat XMCFLabel Tokn -> Lin MCFCat MCFLabel Tokn +cnvXMCFLin (Lin lbl lin) = Lin (cnvXMCFLabel lbl) $ + map (mapSymbol cnvSym id) lin + where cnvSym (cat, lbl, nr) = (cnvXMCFCat cat, cnvXMCFLabel lbl, nr) + +-- Term -> STerm + +cnvTerm (R rec) = SRec [ (lbl, cnvTerm term) | Ass lbl term <- rec ] +cnvTerm (T _ tbl) = STbl [ (cnvPattern pat, cnvTerm term) | + Cas pats term <- tbl, pat <- pats ] +cnvTerm (Con con terms) = SCon con $ map cnvTerm terms +cnvTerm term + | isArgPath term = cnvArgPath term + +cnvPattern (PR rec) = SRec [ (lbl, cnvPattern term) | PAss lbl term <- rec ] +cnvPattern (PC con pats) = SCon con $ map cnvPattern pats +cnvPattern (PW) = SWildcard + +isArgPath (Arg _) = True +isArgPath (P _ _) = True +isArgPath (S _ _) = True +isArgPath _ = False + +cnvArgPath (Arg (A cat nr)) = SArg (fromInteger nr) cat emptyPath +cnvArgPath (term `P` lbl) = cnvArgPath term +. lbl +cnvArgPath (term `S` sel) = cnvArgPath term +! cnvTerm sel + +-- old style paths + +newtype XPath = XPath [Either Label Term] deriving (Eq, Ord, Show) + +cnvXPath :: XPath -> Path +cnvXPath (XPath path) = Path (map (either Left (Right . cnvTerm)) (reverse path)) + +emptyXPath :: XPath +emptyXPath = XPath [] + +(++..) :: XPath -> Label -> XPath +XPath path ++.. lbl = XPath (Left lbl : path) + +(++!!) :: XPath -> Term -> XPath +XPath path ++!! sel = XPath (Right sel : path) + +---------------------------------------------------------------------- + +-- | combining alg. 1 and alg. 2 from Ljunglöf's PhD thesis +convertGrammar :: (CanonGrammar, Ident) -> MCFGrammar +convertGrammar (gram, lng) = trace2 "language" (prt lng) $ + trace2 "modules" (prtSep " " modnames) $ + trace2 "#lin-terms" (prt (length cncdefs)) $ + tracePrt "#mcf-rules total" (prt.length) $ + concat $ + tracePrt "#mcf-rules per fun" + (\rs -> concat [" "++show n++"="++show (length r) | + (n, r) <- zip [1..] rs]) $ + map (convertDef gram lng) cncdefs + where Gr mods = grammar2canon gram + cncdefs = [ def | Mod (MTCnc modname _) _ _ _ defs <- mods, + modname `elem` modnames, + def@(CncDFun _ _ _ _ _) <- defs ] + modnames = M.allExtends gram lng + + +convertDef :: CanonGrammar -> Ident -> Def -> [MCFRule] +convertDef gram lng (CncDFun fun (CIQ _ cat) args term _) + = [ Rule (cnvXMCFCat newCat) (map cnvXMCFCat newArgs) (map cnvXMCFLin newTerm) fun | + let ctype = lookupCType gram lng cat, + instArgs <- mapM (enumerateInsts gram lng) args, + let instTerm = substitutePaths gram lng instArgs term, + newCat <- emcfCat gram lng cat instTerm, + newArgs <- mapM (extractArg gram lng instArgs) args, + let newTerm = concatMap (extractLin newArgs) $ strPaths gram lng ctype instTerm + ] + + +-- gammalt skräp: +-- mergeArgs = zipWith mergeRec +-- mergeRec (R r1) (R r2) = R (r1 ++ r2) + +extractArg :: CanonGrammar -> Ident -> [Term] -> ArgVar -> [XMCFCat] +extractArg gram lng args (A cat nr) = emcfCat gram lng cat (args !!! nr) + + +emcfCat :: CanonGrammar -> Ident -> Ident -> Term -> [XMCFCat] +emcfCat gram lng cat = map (XMCFCat cat) . parPaths gram lng (lookupCType gram lng cat) + + +extractLin :: [XMCFCat] -> (XPath, Term) -> [Lin XMCFCat XMCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `C` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (E) = [[]] + convertLin (K tok) = [[Tok tok]] + convertLin (FV terms) = concatMap convertLin terms + convertLin term = map (return . Cat) $ flattenTerm emptyXPath term + flattenTerm path (Arg (A _ nr)) = [(args !!! nr, path, fromInteger nr)] + flattenTerm path (term `P` lbl) = flattenTerm (path ++.. lbl) term + flattenTerm path (term `S` sel) = flattenTerm (path ++!! sel) term + flattenTerm path (FV terms) = concatMap (flattenTerm path) terms + flattenTerm path term = error $ "flattenTerm: \n " ++ show path ++ "\n " ++ prt term + + +enumerateInsts :: CanonGrammar -> Ident -> ArgVar -> [Term] +enumerateInsts gram lng arg@(A argCat _) = enumerate (Arg arg) (lookupCType gram lng argCat) + where enumerate path (TStr) = [ path ] + enumerate path (Cn con) = okError $ lookupParamValues gram con + enumerate path (RecType r) + = map R $ sequence [ map (lbl `Ass`) $ + enumerate (path `P` lbl) ctype | + lbl `Lbg` ctype <- r ] + enumerate path (Table s t) + = map (T s) $ sequence [ map ([term2pattern sel] `Cas`) $ + enumerate (path `S` sel) t | + sel <- enumerate (error "enumerate") s ] + + + +termPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, (CType, Term))] +termPaths gr l (TStr) term = [ (emptyXPath, (TStr, term)) ] +termPaths gr l (RecType rtype) (R record) + = [ (path ++.. lbl, value) | + lbl `Ass` term <- record, + let ctype = okError $ maybeErr "termPaths/record" $ lookupLabelling lbl rtype, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (T _ table) + = [ (path ++!! pattern2term pat, value) | + pats `Cas` term <- table, pat <- pats, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (V ptype table) + = [ (path ++!! pat, value) | + (pat, term) <- zip (okError $ allParamValues gr ptype) table, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l ctype (FV terms) + = concatMap (termPaths gr l ctype) terms +termPaths gr l (Cn pc) term = [ (emptyXPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: CanonGrammar -> Ident -> CType -> Term -> [[(XPath, Term)]] +parPaths gr l ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths gr l ctype term ] + +strPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, Term)] +strPaths gr l ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths gr l ctype term ] + + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: CanonGrammar -> Ident -> [Term] -> Term -> Term +substitutePaths gr l arguments trm = subst trm + where subst (con `Con` terms) = con `Con` map subst terms + subst (R record) = R $ map substAss record + subst (term `P` lbl) = subst term `evalP` lbl + subst (T ptype table) = T ptype $ map substCas table + subst (V ptype table) = T ptype [ [term2pattern pat] `Cas` subst term | + (pat, term) <- zip (okError $ allParamValues gr ptype) table ] + subst (term `S` select) = subst term `evalS` subst select + subst (term `C` term') = subst term `C` subst term' + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !!! arg + subst term = term + + substAss (l `Ass` term) = l `Ass` subst term + substCas (p `Cas` term) = p `Cas` subst term + + +evalP (R record) lbl = okError $ maybeErr errStr $ lookupAssign lbl record + where errStr = "evalP: " ++ prt (R record `P` lbl) +evalP (FV terms) lbl = evalFV [ evalP term lbl | term <- terms ] +evalP term lbl = term `P` lbl + +evalS t@(T _ tbl) sel = maybe (t `S` sel) id $ lookupCase sel tbl +evalS (FV terms) sel = evalFV [ term `evalS` sel | term <- terms ] +evalS term (FV sels)= evalFV [ term `evalS` sel | sel <- sels ] +evalS term sel = term `S` sel + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> FV terms + where flattenFV (FV ts) = ts + flattenFV t = [t] + + +---------------------------------------------------------------------- +-- utilities + +-- lookup a CType for an Ident +lookupCType :: CanonGrammar -> Ident -> Ident -> CType +lookupCType gr lng c = errVal defLinType $ lookupLincat gr (CIQ lng c) + +-- lookup a label in a (record / record ctype / table) +lookupAssign :: Label -> [Assign] -> Maybe Term +lookupLabelling :: Label -> [Labelling] -> Maybe CType +lookupCase :: Term -> [Case] -> Maybe Term + +lookupAssign lbl rec = listToMaybe [ term | lbl' `Ass` term <- rec, lbl == lbl' ] +lookupLabelling lbl rtyp = listToMaybe [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] +lookupCase sel tbl = listToMaybe [ term | pats `Cas` term <- tbl, sel `matchesPats` pats ] + +matchesPats :: Term -> [Patt] -> Bool +matchesPats term patterns = or [ term == pattern2term pattern | pattern <- patterns ] + +-- converting between patterns and terms +pattern2term :: Patt -> Term +term2pattern :: Term -> Patt + +pattern2term (con `PC` patterns) = con `Con` map pattern2term patterns +pattern2term (PR record) = R [ lbl `Ass` pattern2term pattern | + lbl `PAss` pattern <- record ] + +term2pattern (con `Con` terms) = con `PC` map term2pattern terms +term2pattern (R record) = PR [ lbl `PAss` term2pattern term | + lbl `Ass` term <- record ] + +-- list lookup for Integers instead of Ints +(!!!) :: [a] -> Integer -> a +xs !!! n = xs !! fromInteger n diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Strict.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Strict.hs new file mode 100644 index 000000000..a1be8af4e --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Strict.hs @@ -0,0 +1,139 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, deterministic. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Strict (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM + +{- +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM +-} + +---------------------------------------------------------------------- + +convertGrammar :: SimpleGrammar -> MGrammar +convertGrammar rules = tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SimpleRule -> CnvMonad MRule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + args_ctypes = zip3 [0..] args ctypes + instArgs <- mapM enumerateArg args_ctypes + let instTerm = substitutePaths instArgs term + newCat <- extractMCat cat ctype instTerm + newArgs <- mapM (extractArg instArgs) args + let newLinRec = strPaths ctype instTerm >>= extractLin newArgs + lintype : lintypes = map (convertLinType emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc lintype lintypes newLinRec) +convertRule _ = failure + +---------------------------------------------------------------------- + +type CnvMonad a = BacktrackM () a + +---------------------------------------------------------------------- +-- strict conversion + +--extractArg :: [Term] -> (Int, Cat, LinType) -> CnvMonad MCat +extractArg args (nr, cat, ctype) = emcfCat cat ctype (args !! nr) + +--emcfCat :: Cat -> LinType -> Term -> CnvMonad MCat +extractMCat cat ctype term = map (MCat cat) $ parPaths ctype term + +--enumerateArg :: (Int, Cat, LinType) -> CnvMonad Term +enumerateArg (nr, cat, ctype) = enumerateTerms (Arg nr cat emptyPath) ctype + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: [Term] -> Term -> Term +substitutePaths arguments = subst + where subst (Arg nr _ path) = followPath path (arguments !! nr) + subst (con :^ terms) = con :^ map subst terms + subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ] + subst (term :. lbl) = subst term +. lbl + subst (Tbl table) = Tbl [ (pat, subst term) | + (pat, term) <- table ] + subst (term :! select) = subst term +! subst select + subst (term :++ term') = subst term ?++ subst term' + subst (Variants terms) = Variants $ map subst terms + subst term = term + + +--termPaths :: CType -> STerm -> [(Path, (CType, STerm))] +termPaths ctype (Variants terms) = terms >>= termPaths ctype +termPaths (StrT) term = [ (emptyPath, (StrT, term)) ] +termPaths (RecT rtype) (Rec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let Just ctype = lookup lbl rtype, + (path, value) <- termPaths ctype term ] +termPaths (TblT _ ctype) (Tbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths ctype term ] +termPaths (ConT pc _) term = [ (emptyPath, (ConT pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +--parPaths :: CType -> STerm -> [[(Path, STerm)]] +parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $ + nubsort [ (path, value) | + (path, (ConT _, value)) <- termPaths ctype term ] + +--strPaths :: CType -> STerm -> [(Path, STerm)] +strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ] + +--extractLin :: [MCFCat] -> (Path, STerm) -> [Lin MCFCat MCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (Empty) = [[]] + convertLin (Token tok) = [[Tok tok]] + convertLin (Variants terms) = concatMap convertLin terms + convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]] + diff --git a/src/GF/OldParsing/GCFG.hs b/src/GF/OldParsing/GCFG.hs new file mode 100644 index 000000000..33a710e5d --- /dev/null +++ b/src/GF/OldParsing/GCFG.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplistic GFC format +----------------------------------------------------------------------------- + +module GF.OldParsing.GCFG where + +import GF.Printing.PrintParser + +---------------------------------------------------------------------- + +type Grammar c n l t = [Rule c n l t] +data Rule c n l t = Rule (Abstract c n) (Concrete l t) + deriving (Eq, Ord, Show) + +data Abstract cat name = Abs cat [cat] name + deriving (Eq, Ord, Show) +data Concrete lin term = Cnc lin [lin] term + deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +instance (Print c, Print n, Print l, Print t) => Print (Rule n c l t) where + prt (Rule abs cnc) = prt abs ++ " := " ++ prt cnc ++ "\n" + prtList = concatMap prt + +instance (Print c, Print n) => Print (Abstract c n) where + prt (Abs cat args name) = prt name ++ ". " ++ prt cat ++ + ( if null args then "" + else " -> " ++ prtSep " " args ) + +instance (Print l, Print t) => Print (Concrete l t) where + prt (Cnc lcat args term) = prt term ++ " : " ++ prt lcat ++ + ( if null args then "" + else " [ " ++ prtSep " " args ++ " ]" ) diff --git a/src/GF/OldParsing/GeneralChart.hs b/src/GF/OldParsing/GeneralChart.hs new file mode 100644 index 000000000..1d51da025 --- /dev/null +++ b/src/GF/OldParsing/GeneralChart.hs @@ -0,0 +1,86 @@ +---------------------------------------------------------------------- +-- | +-- Module : GeneralChart +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simple implementation of deductive chart parsing +----------------------------------------------------------------------------- + + +module GF.OldParsing.GeneralChart + (-- * Type definition + Chart, + -- * Main functions + chartLookup, + buildChart, + -- * Probably not needed + emptyChart, + chartMember, + chartInsert, + chartList, + addToChart + ) where + +-- import Trace + +import GF.Data.RedBlackSet + +-- main functions + +chartLookup :: (Ord item, Ord key) => Chart item key -> key -> [item] +buildChart :: (Ord item, Ord key) => (item -> key) -> + [Chart item key -> item -> [item]] -> [item] -> [item] + +buildChart keyof rules axioms = chartList (addItems axioms emptyChart) + where addItems [] = id + addItems (item:items) = addItems items . addItem item + + -- addItem item | trace ("+ "++show item++"\n") False = undefined + addItem item = addToChart item (keyof item) + (\chart -> foldr (consequence item) chart rules) + + consequence item rule chart = addItems (rule chart item) chart + +-- probably not needed + +emptyChart :: (Ord item, Ord key) => Chart item key +chartMember :: (Ord item, Ord key) => Chart item key -> item -> key -> Bool +chartInsert :: (Ord item, Ord key) => Chart item key -> item -> key -> Maybe (Chart item key) +chartList :: (Ord item, Ord key) => Chart item key -> [item] +addToChart :: (Ord item, Ord key) => item -> key -> (Chart item key -> Chart item key) -> Chart item key -> Chart item key + +addToChart item key after chart = maybe chart after (chartInsert chart item key) + + +-------------------------------------------------------------------------------- +-- key charts as red/black trees + +newtype Chart item key = KC (RedBlackMap key item) + deriving Show + +emptyChart = KC rbmEmpty +chartMember (KC tree) item key = rbmElem key item tree +chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree) +chartLookup (KC tree) key = rbmLookup key tree +chartList (KC tree) = concatMap snd (rbmList tree) +--------------------------------------------------------------------------------} + + +{-------------------------------------------------------------------------------- +-- key charts as unsorted association lists -- OBSOLETE! + +newtype Chart item key = SC [(key, item)] + +emptyChart = SC [] +chartMember (SC chart) item key = (key,item) `elem` chart +chartInsert (SC chart) item key = if (key,item) `elem` chart then Nothing else Just (SC ((key,item):chart)) +chartLookup (SC chart) key = [ item | (key',item) <- chart, key == key' ] +chartList (SC chart) = map snd chart +--------------------------------------------------------------------------------} + diff --git a/src/GF/OldParsing/GrammarTypes.hs b/src/GF/OldParsing/GrammarTypes.hs new file mode 100644 index 000000000..af2832bdf --- /dev/null +++ b/src/GF/OldParsing/GrammarTypes.hs @@ -0,0 +1,148 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All possible instantiations of different grammar formats used for parsing +-- +-- Plus some helper types and utilities +----------------------------------------------------------------------------- + + +module GF.OldParsing.GrammarTypes + (-- * Main parser information + PInfo(..), + -- * Multiple context-free grammars + MCFGrammar, MCFRule, MCFPInfo, + MCFCat(..), MCFLabel, + Constraint, + -- * Context-free grammars + CFGrammar, CFRule, CFPInfo, + CFProfile, CFName(..), CFCat(..), + -- * Assorted types + Cat, Name, Constr, Label, Tokn, + -- * Simplified terms + STerm(..), (+.), (+!), + -- * Record\/table paths + Path(..), emptyPath, + (++.), (++!) + ) where + +import Ident (Ident(..)) +import AbsGFC +-- import qualified GF.OldParsing.FiniteTypes.Calc as Fin +import qualified GF.OldParsing.CFGrammar as CFG +import qualified GF.OldParsing.MCFGrammar as MCFG +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import qualified GF.OldParsing.ConvertGFCtoSimple + +---------------------------------------------------------------------- + +data PInfo = PInfo { mcfg :: MCFGrammar, + cfg :: CFGrammar, + mcfPInfo :: MCFPInfo, + cfPInfo :: CFPInfo } + +type MCFGrammar = MCFG.Grammar Name MCFCat MCFLabel Tokn +type MCFRule = MCFG.Rule Name MCFCat MCFLabel Tokn +type MCFPInfo = MCFG.PInfo Name MCFCat MCFLabel Tokn + +data MCFCat = MCFCat Cat [Constraint] deriving (Eq, Ord, Show) +type MCFLabel = Path + +type Constraint = (Path, STerm) + +type CFGrammar = CFG.Grammar CFName CFCat Tokn +type CFRule = CFG.Rule CFName CFCat Tokn +type CFPInfo = CFG.PInfo CFName CFCat Tokn + +type CFProfile = [[Int]] +data CFName = CFName Name CFProfile deriving (Eq, Ord, Show) +data CFCat = CFCat MCFCat MCFLabel deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +type Cat = Ident +type Name = Ident +type Constr = CIdent + +data STerm = SArg Int Cat Path -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | SCon Constr [STerm] -- ^ constructor + | SRec [(Label, STerm)] -- ^ record + | STbl [(STerm, STerm)] -- ^ table of patterns\/terms + | SVariants [STerm] -- ^ variants + | SConcat STerm STerm -- ^ concatenation + | SToken Tokn -- ^ single token + | SEmpty -- ^ empty string + | SWildcard -- ^ wildcard pattern variable + + -- SRes CIdent -- resource identifier + -- SVar Ident -- bound pattern variable + -- SInt Integer -- integer + deriving (Eq, Ord, Show) + +(+.) :: STerm -> Label -> STerm +SRec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.), label not in record: " ++ show (SRec record) ++ " +. " ++ show lbl +SArg arg cat path +. lbl = SArg arg cat (path ++. lbl) +SVariants terms +. lbl = SVariants $ map (+. lbl) terms +sterm +. lbl = error $ "(+.): " ++ show sterm ++ " +. " ++ show lbl + +(+!) :: STerm -> STerm -> STerm +STbl table +! pat = maybe err id $ lookup pat table + where err = error $ "(+!), pattern not in table: " ++ show (STbl table) ++ " +! " ++ show pat +SArg arg cat path +! pat = SArg arg cat (path ++! pat) +SVariants terms +! pat = SVariants $ map (+! pat) terms +term +! SVariants pats = SVariants $ map (term +!) pats +sterm +! pat = error $ "(+!): " ++ show sterm ++ " +! " ++ show pat + +---------------------------------------------------------------------- + +newtype Path = Path [Either Label STerm] deriving (Eq, Ord, Show) + +emptyPath :: Path +emptyPath = Path [] + +(++.) :: Path -> Label -> Path +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path -> STerm -> Path +Path path ++! sel = Path (Right sel : path) + +------------------------------------------------------------ + +instance Print STerm where + prt (SArg n c p) = prt c ++ "@" ++ prt n ++ prt p + prt (SCon c []) = prt c + prt (SCon c ts) = prt c ++ prtList ts + prt (SRec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ ";" | (l,t) <- rec ] ++ "}" + prt (STbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ ";" | (p,t) <- tbl ] ++ "}" + prt (SVariants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (SConcat t1 t2) = prt t1 ++ "++" ++ prt t2 + prt (SToken t) = prt t + prt (SEmpty) = "[]" + prt (SWildcard) = "_" + +instance Print MCFCat where + prt (MCFCat cat params) + = prt cat ++ "{" ++ concat [ prt path ++ "=" ++ prt term ++ ";" | + (path, term) <- params ] ++ "}" + +instance Print CFName where + prt (CFName name profile) = prt name ++ prt profile + +instance Print CFCat where + prt (CFCat cat lbl) = prt cat ++ prt lbl + +instance Print Path where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src/GF/OldParsing/IncrementalChart.hs b/src/GF/OldParsing/IncrementalChart.hs new file mode 100644 index 000000000..2a941ec84 --- /dev/null +++ b/src/GF/OldParsing/IncrementalChart.hs @@ -0,0 +1,50 @@ +---------------------------------------------------------------------- +-- | +-- Module : IncrementalChart +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Implementation of /incremental/ deductive parsing, +-- i.e. parsing one word at the time. +----------------------------------------------------------------------------- + + +module GF.OldParsing.IncrementalChart + (-- * Type definitions + IncrementalChart, + -- * Functions + buildChart, + chartList + ) where + +import Array +import GF.Data.SortedList +import GF.Data.Assoc + +buildChart :: (Ord item, Ord key) => (item -> key) -> + (Int -> item -> SList item) -> + (Int -> SList item) -> + (Int, Int) -> IncrementalChart item key + +chartList :: (Ord item, Ord key) => (Int -> item -> edge) -> IncrementalChart item key -> [edge] + +type IncrementalChart item key = Array Int (Assoc key (SList item)) + +---------- + +buildChart keyof rules axioms bounds = finalChartArray + where buildState k = limit (rules k) $ axioms k + finalChartList = map buildState [fst bounds .. snd bounds] + finalChartArray = listArray bounds $ map stateAssoc finalChartList + stateAssoc state = accumAssoc id [ (keyof item, item) | item <- state ] + +chartList combine chart = [ combine k item | + (k, state) <- assocs chart, + item <- concatMap snd $ aAssocs state ] + + diff --git a/src/GF/OldParsing/MCFGrammar.hs b/src/GF/OldParsing/MCFGrammar.hs new file mode 100644 index 000000000..350c574a7 --- /dev/null +++ b/src/GF/OldParsing/MCFGrammar.hs @@ -0,0 +1,206 @@ +---------------------------------------------------------------------- +-- | +-- Module : MCFGrammar +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of multiple context-free grammars, +-- parser information and chart conversion +----------------------------------------------------------------------------- + +module GF.OldParsing.MCFGrammar + (-- * Type definitions + Grammar, + Rule(..), + Lin(..), + -- * Parser information + MCFParser, + MEdge, + edges2chart, + PInfo, + pInfo, + -- * Ranges + Range(..), + makeRange, + concatRange, + unifyRange, + unionRange, + failRange, + -- * Utilities + select, + updateIndex + ) where + +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parser modules: +import GF.OldParsing.Utilities +import GF.Printing.PrintParser + + + +select :: [a] -> [(a, [a])] +select [] = [] +select (x:xs) = (x,xs) : [ (y,x:ys) | (y,ys) <- select xs ] + +updateIndex :: Functor f => Int -> [a] -> (a -> f a) -> f [a] +updateIndex 0 (a:as) f = fmap (:as) $ f a +updateIndex n (a:as) f = fmap (a:) $ updateIndex (n-1) as f +updateIndex _ _ _ = error "ParserUtils.updateIndex: Index out of range" + + +------------------------------------------------------------ +-- grammar types + +type Grammar n c l t = [Rule n c l t] +data Rule n c l t = Rule c [c] [Lin c l t] n + deriving (Eq, Ord, Show) +data Lin c l t = Lin l [Symbol (c, l, Int) t] + deriving (Eq, Ord, Show) + +-- variants is simply several linearizations with the same label + + +------------------------------------------------------------ +-- parser information + +type PInfo n c l t = Grammar n c l t + +pInfo :: Grammar n c l t -> PInfo n c l t +pInfo = id + +type MCFParser n c l t = PInfo n c l t -> [c] -> Input t -> ParseChart n (MEdge c l) + +type MEdge c l = (c, [(l, Range)]) + +edges2chart :: (Ord n, Ord c, Ord l) => + [(n, MEdge c l, [MEdge c l])] -> ParseChart n (MEdge c l) +edges2chart edges = fmap groupPairs $ accumAssoc id $ + [ (medge, (name, medges)) | (name, medge, medges) <- edges ] + + +------------------------------------------------------------ +-- ranges as sets of int-pairs + +newtype Range = Rng (SList (Int, Int)) deriving (Eq, Ord, Show) + +makeRange :: SList (Int, Int) -> Range +makeRange rho = Rng rho + +concatRange :: Range -> Range -> Range +concatRange (Rng rho) (Rng rho') = Rng $ nubsort [ (i,k) | (i,j) <- rho, (j',k) <- rho', j==j' ] + +unifyRange :: Range -> Range -> Range +unifyRange (Rng rho) (Rng rho') = Rng $ rho <**> rho' + +unionRange :: Range -> Range -> Range +unionRange (Rng rho) (Rng rho') = Rng $ rho <++> rho' + +failRange :: Range +failRange = Rng [] + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Rule n c l t) where + prt (Rule cat args record name) + = prt name ++ ". " ++ prt cat ++ " -> " ++ prtSep " " args ++ "\n" ++ prt record + prtList = concatMap prt + +instance (Print c, Print l, Print t) => Print (Lin c l t) where + prt (Lin lbl lin) = prt lbl ++ " = " ++ prtSep " " (map (symbol prArg (show.prt)) lin) + where prArg (cat, lbl, arg) = prt cat ++ "@" ++ prt arg ++ "." ++ prt lbl + prtList = prtBeforeAfter "\t" "\n" + +instance Print Range where + prt (Rng rho) = "(" ++ prtSep "|" [ show i ++ "-" ++ show j | (i,j) <- rho ] ++ ")" + +{- +------------------------------------------------------------ +-- items & forests + +data Item n c l = Item n (MEdge c l) [[MEdge c l]] + deriving (Eq, Ord, Show) +type MEdge c l = (c, [Edge l]) + +items2forests :: (Ord n, Ord c, Ord l) => Edge ((c, l) -> Bool) -> [Item n c l] -> [ParseForest n] + +---------- + +items2forests (Edge i0 k0 startCat) items + = concatMap edge2forests $ filter checkEdge $ aElems chart + where edge2forests (cat, []) = [FMeta] + edge2forests edge = filter checkForest $ map item2forest (chart ? edge) + + item2forest (Item name _ children) = FNode name [ forests | edges <- children, + forests <- mapM edge2forests edges ] + + checkEdge (cat, [Edge i k lbl]) = i == i0 && k == k0 && startCat (cat, lbl) + checkEdge _ = False + + checkForest (FNode _ children) = not (null children) + + chart = accumAssoc id [ (edge, item) | item@(Item _ edge _) <- items ] +-} + + +------------------------------------------------------------ +-- grammar checking +{- +--checkGrammar :: (Ord c, Ord l, Print n, Print c, Print l, Print t) => Grammar n c l t -> [String] + +checkGrammar rules + = do rule@(Rule cat rhs record name) <- rules + if null record + then [ "empty linearization record in rule: " ++ prt rule ] + else [ "category does not exist: " ++ prt rcat ++ "\n" ++ + " - in rule: " ++ prt rule | + rcat <- rhs, rcat `notElem` lhsCats ] ++ + do Lin _ lin <- record + Cat (arg, albl) <- lin + if arg<0 || arg>=length rhs + then [ "argument index out of range: " ++ show arg ++ "/" ++ prt albl ++ "\n" ++ + " - in rule: " ++ prt rule ] + else [ "label does not exist: " ++ prt albl ++ "\n" ++ + " - from rule: " ++ prt rule ++ + " - in rule: " ++ prt arule | + arule@(Rule _ acat _ arecord) <- rules, + acat == rhs !! arg, + albl `notElem` [ lbl | Lin lbl _ <- arecord ] ] + where lhsCats = nubsort [ cat | Rule _ cat _ _ <- rules ] +-} + + + + + +{----- +------------------------------------------------------------ +-- simplifications + +splitMRule :: (Ord n, Ord c, Ord l, Ord t) => Grammar n c l t -> Rule n c l t -> [Rule n c l t] +splitMRule rules (Rule name cat args record) = nubsort [ (Rule name cat args splitrec) | + (cat', lbls) <- rhsCats, cat == cat', + let splitrec = [ lin | lin@(Lin lbl _) <- record, lbl `elem` lbls ] ] + where rhsCats = limit rhsC lhsCats + lhsCats = nubsort [ (cat, [lbl]) | Rule _ cat _ record <- rules, Lin lbl _ <- record ] + rhsC (cat, lbls) = nubsort [ (rcat, rlbls) | + Rule _ cat' rhs lins <- rules, cat == cat', + (arg, rcat) <- zip [0..] rhs, + let rlbls = nubsort [ rlbl | Lin lbl lin <- lins, lbl `elem` lbls, + Cat (arg', rlbl) <- lin, arg == arg' ], + not $ null rlbls + ] + + +----} + + + diff --git a/src/GF/OldParsing/ParseCF.hs b/src/GF/OldParsing/ParseCF.hs new file mode 100644 index 000000000..0ed19c786 --- /dev/null +++ b/src/GF/OldParsing/ParseCF.hs @@ -0,0 +1,82 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCF +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Chart parsing of grammars in CF format +----------------------------------------------------------------------------- + +module GF.OldParsing.ParseCF (parse, alternatives) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import GF.Data.SortedList (nubsort) +import GF.Data.Assoc +import qualified CF +import qualified CFIdent as CFI +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import qualified GF.OldParsing.ParseCFG as P + +type Token = CFI.CFTok +type Name = CFI.CFFun +type Category = CFI.CFCat + +alternatives :: [(String, [String])] +alternatives = [ ("gb", ["G","GB","_gen","_genBU"]), + ("gt", ["GT","_genTD"]), + ("ibn", ["","I","B","IB","IBN","_inc","BU","_incBU"]), + ("ibb", ["BB","IBB","BU_BUF","_incBU_BUF"]), + ("ibt", ["BT","IBT","BU_TDF","_incBU_TDF"]), + ("iba", ["BA","IBA","BU_BTF","BU_TBF","_incBU_BTF","_incBU_TBF"]), + ("itn", ["T","IT","ITN","TD","_incTD"]), + ("itb", ["TB","ITB","TD_BUF","_incTD_BUF"]) + ] + +parse :: String -> CF.CF -> Category -> CF.CFParser +parse = buildParser . P.parse + +buildParser :: CFParser Name Category Token -> CF.CF -> Category -> CF.CFParser +buildParser parser cf start tokens = trace "ParseCF" $ + (parseResults, parseInformation) + where parseInformation = prtSep "\n" trees + parseResults = {-take maxTake-} [ (tree2cfTree t, []) | t <- trees ] + theInput = input tokens + edges = tracePrt "#edges" (prt.length) $ + parser pInf [start] theInput + chart = tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + edges2chart theInput $ map (fmap addCategory) edges + forests = tracePrt "#forests" (prt.length) $ + chart2forests chart (const False) $ + uncurry Edge (inputBounds theInput) start + trees = tracePrt "#trees" (prt.length) $ + concatMap forest2trees forests + pInf = pInfo $ cf2grammar cf (nubsort tokens) + + +addCategory (Rule cat rhs name) = Rule cat rhs (name, cat) + +tree2cfTree (TNode (name, cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees)) + +cf2grammar :: CF.CF -> [Token] -> Grammar Name Category Token +cf2grammar cf tokens = [ Rule cat rhs name | + (name, (cat, rhs0)) <- cfRules, + rhs <- mapM item2symbol rhs0 ] + where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++ + CF.rulesOfCF cf + item2symbol (CF.CFNonterm cat) = [Cat cat] + item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens + +-- maxTake :: Int +-- maxTake = 500 +-- maxTake = maxBound + + diff --git a/src/GF/OldParsing/ParseCFG.hs b/src/GF/OldParsing/ParseCFG.hs new file mode 100644 index 000000000..7cba41175 --- /dev/null +++ b/src/GF/OldParsing/ParseCFG.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main parsing module for context-free grammars +----------------------------------------------------------------------------- + + +module GF.OldParsing.ParseCFG (parse) where + +import Char (toLower) +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import qualified GF.OldParsing.ParseCFG.General as PGen +import qualified GF.OldParsing.ParseCFG.Incremental as PInc + + +parse :: (Ord n, Ord c, Ord t, Show t) => + String -> CFParser n c t +parse = decodeParser . map toLower + +decodeParser ['g',s] = PGen.parse (decodeStrategy s) +decodeParser ['i',s,f] = PInc.parse (decodeStrategy s, decodeFilter f) +decodeParser _ = decodeParser "ibn" + +decodeStrategy 'b' = (True, False) +decodeStrategy 't' = (False, True) + +decodeFilter 'a' = (True, True) +decodeFilter 'b' = (True, False) +decodeFilter 't' = (False, True) +decodeFilter 'n' = (False, False) + + + + diff --git a/src/GF/OldParsing/ParseCFG/General.hs b/src/GF/OldParsing/ParseCFG/General.hs new file mode 100644 index 000000000..7ac395ba3 --- /dev/null +++ b/src/GF/OldParsing/ParseCFG/General.hs @@ -0,0 +1,83 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG.General +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:57 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Several implementations of CFG chart parsing +----------------------------------------------------------------------------- + +module GF.OldParsing.ParseCFG.General + (parse, Strategy) where + +import GF.System.Tracing + +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import GF.OldParsing.GeneralChart +import GF.Data.Assoc + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser n c t +parse strategy grammar start = extract . process strategy grammar start + +type Strategy = (Bool, Bool) -- (isBottomup, isTopdown) + +extract :: [Item n (Symbol c t)] -> [Edge (Rule n c t)] +extract edges = + edges' + where edges' = [ Edge j k (Rule cat (reverse found) name) | + Edge j k (Cat cat, found, [], Just name) <- edges ] + +process :: (Ord n, Ord c, Ord t) => Strategy -> PInfo n c t -> + [c] -> Input t -> [Item n (Symbol c t)] +process (isBottomup, isTopdown) grammar start + = trace2 "CFParserGeneral" ((if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = Chart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (Rule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src/GF/OldParsing/ParseCFG/Incremental.hs b/src/GF/OldParsing/ParseCFG/Incremental.hs new file mode 100644 index 000000000..882fad26e --- /dev/null +++ b/src/GF/OldParsing/ParseCFG/Incremental.hs @@ -0,0 +1,142 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG.Incremental +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:57 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Incremental chart parsing for context-free grammars +----------------------------------------------------------------------------- + + + +module GF.OldParsing.ParseCFG.Incremental + (parse, Strategy) where + +import GF.System.Tracing +import GF.Printing.PrintParser + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import Operations +-- parser modules: +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import GF.OldParsing.IncrementalChart + + +type Strategy = ((Bool, Bool), (Bool, Bool)) -- (predict:(BU, TD), filter:(BU, TD)) + +parse :: (Ord n, Ord c, Ord t, Show t) => + Strategy -> CFParser n c t +parse ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input = + trace2 "CFParserIncremental" + ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + finalEdges + where finalEdges = [ Edge j k (Rule cat (reverse found) name) | + (k, state) <- + tracePrt "#passiveChart" + (prt . map (length . (?Passive) . snd)) $ + tracePrt "#activeChart" + (prt . map (length . concatMap snd . aAssocs . snd)) $ + assocs finalChart, + Item j (Rule cat _Nil name) found <- state ? Passive ] + + finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = --tracePrt ("axioms 0") (prtSep "\n") $ + union $ map (tdInfer 0) start + axioms k = --tracePrt ("axioms "++show k) (prtSep "\n") $ + union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (Rule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(Rule _ (Cat next:_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (Cat next:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = --tracePrt ("buInfer "++show(j,k)++" "++prt next) (prtSep "\n") $ + buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ --tracePrt ("buRules "++prt next) (prtSep "\n") $ + bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (Rule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (Rule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | Rule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +-- type declarations, items & keys +data Item n c t = Item Int (Rule n c t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive + deriving (Eq, Ord, Show) + +keyof :: Item n c t -> IKey c t +keyof (Item _ (Rule _ (next:_) _) _) = Active next +keyof (Item _ (Rule _ [] _) _) = Passive + +forward :: Rule n c t -> Rule n c t +forward (Rule cat (_:rest) name) = Rule cat rest name + + +instance (Print n, Print c, Print t) => Print (Item n c t) where + prt (Item k (Rule cat rhs name) syms) + = "<" ++show k++ ": "++prt name++". "++ + prt cat++" -> "++prt rhs++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive) = "!" + + diff --git a/src/GF/OldParsing/ParseGFC.hs b/src/GF/OldParsing/ParseGFC.hs new file mode 100644 index 000000000..ebd4dc782 --- /dev/null +++ b/src/GF/OldParsing/ParseGFC.hs @@ -0,0 +1,177 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseGFC +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.OldParsing.ParseGFC (newParser) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import qualified PrGrammar + +-- Haskell modules +import Monad +-- import Ratio ((%)) +-- GF modules +import qualified Grammar as GF +import Values +import qualified Macros +import qualified Modules as Mods +import qualified AbsGFC +import qualified Ident +import qualified ShellState as SS +import Operations +import GF.Data.SortedList +-- Conversion and parser modules +import GF.Data.Assoc +import GF.OldParsing.Utilities +-- import ConvertGrammar +import GF.OldParsing.GrammarTypes +import qualified GF.OldParsing.MCFGrammar as M +import qualified GF.OldParsing.CFGrammar as C +import qualified GF.OldParsing.ParseMCFG as PM +import qualified GF.OldParsing.ParseCFG as PC +--import MCFRange + +newParser :: String -> SS.StateGrammar -> GF.Cat -> String -> Err [GF.Term] + +-- parsing via MCFG +newParser (m:strategy) gr (_, startCat) inString + | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms + where terms = map (ptree2term abstract) trees + trees = --tracePrt "trees" (prtBefore "\n") $ + tracePrt "#trees" (prt . length) $ + concatMap forest2trees forests + forests = --tracePrt "forests" (prtBefore "\n") $ + tracePrt "#forests" (prt . length) $ + concatMap (chart2forests chart isMeta) finalEdges + isMeta = null . snd + finalEdges = tracePrt "finalEdges" (prtBefore "\n") $ + filter isFinalEdge $ aElems chart +-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) | +-- let (i, j) = inputBounds inTokens, +-- E.Rule cat _ [E.Lin lbl _] _ <- pInf, +-- isStartCat cat ] + isFinalEdge (cat, rows) + = isStartCat cat && + inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ] + chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + PM.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $ + mcfPInfo $ SS.statePInfoOld gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ] + isStartCat (MCFCat cat _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + +-- parsing via CFG +newParser (c:strategy) gr (_, startCat) inString + | c=='c' || c=='C' = trace2 "Parser" "CFG" $ Ok terms + where terms = -- tracePrt "terms" (unlines . map PrGrammar.prt) $ + map (ptree2term abstract) trees + trees = tracePrt "#trees" (prt . length) $ + --tracePrt "trees" (prtSep "\n") $ + concatMap forest2trees forests + forests = tracePrt "$cfForests" (prt) $ -- . length) $ + tracePrt "forests" (unlines . map prt) $ + concatMap convertFromCFForest cfForests + cfForests= tracePrt "cfForests" (unlines . map prt) $ + concatMap (chart2forests chart (const False)) finalEdges + finalEdges = tracePrt "finalChartEdges" prt $ + map (uncurry Edge (inputBounds inTokens)) starters + chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + C.edges2chart inTokens edges + edges = --tracePrt "finalEdges" + --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $ + tracePrt "#edges" (prt . length) $ + PC.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = cfPInfo $ SS.statePInfoOld gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ map fst $ aAssocs $ C.topdownRules pInf + isStartCat (CFCat (MCFCat cat _) _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + --ifNull (Ident.identC "ABS") last $ + --[i | (i, Mods.ModMod m) <- Mods.modules (SS.grammar gr), Mods.isModAbs m] + +newParser "" gr start inString = newParser "c" gr start inString + +newParser opt gr (_,cat) _ = + Bad ("new-parser '" ++ opt ++ "' not defined yet") + +ptree2term :: Ident.Ident -> ParseTree Name -> GF.Term +ptree2term a (TNode f ts) = Macros.mkApp (Macros.qq (a,f)) (map (ptree2term a) ts) +ptree2term a (TMeta) = GF.Meta (GF.MetaSymb 0) + +---------------------------------------------------------------------- +-- conversion and unification of forests + +convertFromCFForest :: ParseForest CFName -> [ParseForest Name] +convertFromCFForest (FNode (CFName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] + checkProfile forests = unifyManyForests . map (forests !!) + -- foldM unifyForests FMeta . map (forests !!) + +isCoercion Ident.IW = True +isCoercion _ = False + +unifyManyForests :: Eq n => [ParseForest n] -> [ParseForest n] +unifyManyForests [] = [FMeta] +unifyManyForests [f] = [f] +unifyManyForests (f:g:fs) = do h <- unifyForests f g + unifyManyForests (h:fs) + +unifyForests :: Eq n => ParseForest n -> ParseForest n -> [ParseForest n] +unifyForests FMeta forest = [forest] +unifyForests forest FMeta = [forest] +unifyForests (FNode name1 children1) (FNode name2 children2) + = [ FNode name1 children | name1 == name2, not (null children) ] + where children = [ forests | forests1 <- children1, forests2 <- children2, + forests <- zipWithM unifyForests forests1 forests2 ] + + + +{- +---------------------------------------------------------------------- +-- conversion and unification for parse trees instead of forests + +convertFromCFTree :: ParseTree CFName -> [ParseTree Name] +convertFromCFTree (TNode (CFName name profile) children0) + = [ TNode name children | + children1 <- mapM convertFromCFTree children0, + children <- mapM (checkProfile children1) profile ] + where checkProfile trees = unifyManyTrees . map (trees !!) + +unifyManyTrees :: Eq n => [ParseTree n] -> [ParseTree n] +unifyManyTrees [] = [TMeta] +unifyManyTrees [f] = [f] +unifyManyTrees (f:g:fs) = do h <- unifyTrees f g + unifyManyTrees (h:fs) + +unifyTrees TMeta tree = [tree] +unifyTrees tree TMeta = [tree] +unifyTrees (TNode name1 children1) (TNode name2 children2) + = [ TNode name1 children | name1 == name2, + children <- zipWithM unifyTrees children1 children2 ] + +-} + diff --git a/src/GF/OldParsing/ParseMCFG.hs b/src/GF/OldParsing/ParseMCFG.hs new file mode 100644 index 000000000..ad29e5f2f --- /dev/null +++ b/src/GF/OldParsing/ParseMCFG.hs @@ -0,0 +1,37 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main module for MCFG parsing +----------------------------------------------------------------------------- + + +module GF.OldParsing.ParseMCFG (parse) where + +import Char (toLower) +import GF.OldParsing.Utilities +import GF.OldParsing.MCFGrammar +import qualified GF.OldParsing.ParseMCFG.Basic as PBas +import GF.Printing.PrintParser +---- import qualified MCFParserBasic2 as PBas2 -- file not found AR + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + String -> MCFParser n c l t +parse str = decodeParser (map toLower str) + +decodeParser "b" = PBas.parse +---- decodeParser "c" = PBas2.parse +decodeParser _ = decodeParser "b" + + + + diff --git a/src/GF/OldParsing/ParseMCFG/Basic.hs b/src/GF/OldParsing/ParseMCFG/Basic.hs new file mode 100644 index 000000000..7b0d01dde --- /dev/null +++ b/src/GF/OldParsing/ParseMCFG/Basic.hs @@ -0,0 +1,156 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseMCFG.Basic +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:57 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplest possible implementation of MCFG chart parsing +----------------------------------------------------------------------------- + +module GF.OldParsing.ParseMCFG.Basic + (parse) where + +import GF.System.Tracing + +import Ix +import GF.OldParsing.Utilities +import GF.OldParsing.MCFGrammar +import GF.OldParsing.GeneralChart +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Printing.PrintParser + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + MCFParser n c l t +parse grammar start = edges2chart . extract . process grammar + + +extract :: [Item n c l t] -> [(n, MEdge c l, [MEdge c l])] +extract items = tracePrt "#passives" (prt.length) $ + --trace2 "passives" (prtAfter "\n" [ i | i@(PItem _) <- items ]) $ + [ item | PItem item <- items ] + + +process :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + Grammar n c l t -> Input t -> [Item n c l t] +process grammar input = buildChart keyof rules axioms + where axioms = initial + rules = [combine, scan, predict] + + -- axioms + initial = traceItems "axiom" [] $ + [ nextLin name tofind (addNull cat) (map addNull args) | + Rule cat args tofind name <- grammar ] + + addNull a = (a, []) + + -- predict + predict chart i1@(Item name tofind rho (Lin lbl []) (cat, found0) children) + = traceItems "predict" [i1] + [ nextLin name tofind (cat, found) children | + let found = insertRow lbl rho found0 ] + predict _ _ = [] + + -- combine + combine chart active@(Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) + = do passive <- chartLookup chart (Passive cat) + combineItems active passive + combine chart passive@(PItem (_, (cat, _), _)) + = do active <- chartLookup chart (Active cat) + combineItems active passive + combine _ _ = [] + + combineItems i1@(Item name tofind rho0 (Lin lbl (Cat(_,lbl',nr):rest)) found children0) + i2@(PItem (_, found', _)) + = traceItems "combine" [i1,i2] + [ Item name tofind rho (Lin lbl rest) found children | + rho1 <- lookupLbl lbl' found', + let rho = concatRange rho0 rho1, + children <- updateChild nr children0 (snd found') ] + + -- scan + scan chart i1@(Item name tofind rho0 (Lin lbl (Tok tok:rest)) found children) + = traceItems "scan" [i1] + [ Item name tofind rho (Lin lbl rest) found children | + let rho = concatRange rho0 (rangeOfToken tok) ] + scan _ _ = [] + + -- utilities + rangeOfToken tok = makeRange $ inputToken input ? tok + + zeroRange = makeRange $ map (\i -> (i,i)) $ range $ inputBounds input + + nextLin name [] found children = PItem (name, found, children) + nextLin name (lin : tofind) found children + = Item name tofind zeroRange lin found children + +lookupLbl a = map snd . filter (\b -> a == fst b) . snd +updateChild nr children found = updateIndex nr children $ + \child -> if null (snd child) + then [ (fst child, found) ] + else [ child | snd child == found ] + +insertRow lbl rho [] = [(lbl, rho)] +insertRow lbl rho rows'@(row@(lbl', rho') : rows) + = case compare lbl lbl' of + LT -> row : insertRow lbl rho rows + GT -> (lbl, rho) : rows' + EQ -> (lbl, unionRange rho rho') : rows + + +-- internal representation of parse items + +data Item n c l t + = Item n [Lin c l t] -- tofind + Range (Lin c l t) -- current row + (MEdge c l) -- found rows + [MEdge c l] -- found children + | PItem (n, MEdge c l, [MEdge c l]) + deriving (Eq, Ord, Show) + +data IKey c = Passive c | Active c | AnyItem + deriving (Eq, Ord, Show) + +keyof (PItem (_, (cat, _), _)) = Passive cat +keyof (Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) = Active cat +keyof _ = AnyItem + + +-- tracing + +--type TraceItem = Item String String Char String +traceItems :: (Print n, Print l, Print c, Print t) => + String -> [Item n c l t] -> [Item n c l t] -> [Item n c l t] +traceItems rule trigs items + | null items || True = items + | otherwise = trace ("\n" ++ rule ++ ":" ++ + unlines [ "\t" ++ prt i | i <- trigs ] ++ "=>" ++ + unlines [ "\t" ++ prt i | i <- items ]) items + +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Item n c l t) where + prt (Item name tofind rho lin (cat, found) children) + = prt name ++ ". " ++ prt cat ++ prtRhs (map fst children) ++ + " { " ++ prt rho ++ prt lin ++ " ; " ++ + concat [ prt lbl ++ "=" ++ prt ln ++ " " | + Lin lbl ln <- tofind ] ++ "; " ++ + concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl, rho) <- found ] ++ "} " ++ + concat [ "[ " ++ concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl,rho) <- child ] ++ "] " | + child <- map snd children ] + prt (PItem (name, edge, edges)) + = prt name ++ ". " ++ prt edge ++ prtRhs edges + +prtRhs [] = "" +prtRhs rhs = " -> " ++ prtSep " " rhs + diff --git a/src/GF/OldParsing/SimpleGFC.hs b/src/GF/OldParsing/SimpleGFC.hs new file mode 100644 index 000000000..456c44685 --- /dev/null +++ b/src/GF/OldParsing/SimpleGFC.hs @@ -0,0 +1,161 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplistic GFC format +----------------------------------------------------------------------------- + +module GF.OldParsing.SimpleGFC where + +import qualified AbsGFC +import qualified Ident + +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import Operations (ifNull) + +---------------------------------------------------------------------- + +type Name = Ident.Ident +type Cat = Ident.Ident +type Constr = AbsGFC.CIdent +type Var = Ident.Ident +type Token = AbsGFC.Tokn +type Label = AbsGFC.Label + +constr2name :: Constr -> Name +constr2name (AbsGFC.CIQ _ name) = name + +---------------------------------------------------------------------- + +type Grammar = [Rule] +data Rule = Rule Name Typing (Maybe (Term, CType)) + deriving (Eq, Ord, Show) + +type Typing = (Type, [Decl]) + +data Decl = Var ::: Type + deriving (Eq, Ord, Show) +data Type = Cat :@ [Atom] + deriving (Eq, Ord, Show) +data Atom = ACon Constr + | AVar Var + deriving (Eq, Ord, Show) + +data CType = RecT [(Label, CType)] + | TblT CType CType + | ConT Constr [Term] + | StrT + deriving (Eq, Ord, Show) + + +data Term = Arg Int Cat Path -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | Constr :^ [Term] -- ^ constructor + | Rec [(Label, Term)] -- ^ record + | Term :. Label -- ^ record projection + | Tbl [(Term, Term)] -- ^ table of patterns\/terms + | Term :! Term -- ^ table selection + | Variants [Term] -- ^ variants + | Term :++ Term -- ^ concatenation + | Token Token -- ^ single token + | Empty -- ^ empty string + | Wildcard -- ^ wildcard pattern variable + | Var Var -- ^ bound pattern variable + + -- Res CIdent -- resource identifier + -- Int Integer -- integer + deriving (Eq, Ord, Show) + + +---------------------------------------------------------------------- + +(+.) :: Term -> Label -> Term +Variants terms +. lbl = Variants $ map (+. lbl) terms +Rec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.), label not in record: " ++ show (Rec record) ++ " +. " ++ show lbl +Arg arg cat path +. lbl = Arg arg cat (path ++. lbl) +term +. lbl = term :. lbl + +(+!) :: Term -> Term -> Term +Variants terms +! pat = Variants $ map (+! pat) terms +term +! Variants pats = Variants $ map (term +!) pats +Tbl table +! pat = maybe err id $ lookup pat table + where err = error $ "(+!), pattern not in table: " ++ show (Tbl table) ++ " +! " ++ show pat +Arg arg cat path +! pat = Arg arg cat (path ++! pat) +term +! pat = term :! pat + +(?++) :: Term -> Term -> Term +Variants terms ?++ term = Variants $ map (?++ term) terms +term ?++ Variants terms = Variants $ map (term ?++) terms +Empty ?++ term = term +term ?++ Empty = term +term1 ?++ term2 = term1 :++ term2 + +---------------------------------------------------------------------- + +newtype Path = Path [Either Label Term] deriving (Eq, Ord, Show) + +emptyPath :: Path +emptyPath = Path [] + +(++.) :: Path -> Label -> Path +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path -> Term -> Path +Path path ++! sel = Path (Right sel : path) + +---------------------------------------------------------------------- + +instance Print Rule where + prt (Rule name (typ, args) term) + = prt name ++ " : " ++ + prtAfter " " args ++ + (if null args then "" else "-> ") ++ + prt typ ++ + maybe "" (\(t,c) -> " := " ++ prt t ++ " : " ++ prt c) term ++ + "\n" + prtList = concatMap prt + +instance Print Decl where + prt (var ::: typ) = "(" ++ prt var ++ ":" ++ prt typ ++ ")" + +instance Print Type where + prt (cat :@ ats) = prt cat ++ prtList ats + +instance Print Atom where + prt (ACon con) = prt con + prt (AVar var) = "?" ++ prt var + +instance Print CType where + prt (RecT rec) = "{" ++ concat [ prt l ++ ":" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}" + prt (TblT t1 t2) = "(" ++ prt t1 ++ " => " ++ prt t2 ++ ")" + prt (ConT t ts) = prt t ++ "(|" ++ prtSep "|" ts ++ "|)" + prt (StrT) = "Str" + +instance Print Term where + prt (Arg n c p) = prt c ++ "@" ++ prt n ++ prt p + prt (c :^ []) = prt c + prt (c :^ ts) = prt c ++ prtList ts + prt (Rec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}" + prt (Tbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ "; " | (p,t) <- tbl ] ++ "}" + prt (Variants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (t1 :++ t2) = prt t1 ++ "++" ++ prt t2 + prt (Token t) = prt t + prt (Empty) = "[]" + prt (Wildcard) = "_" + prt (term :. lbl) = prt term ++ "." ++ prt lbl + prt (term :! sel) = prt term ++ " ! " ++ prt sel + prt (Var var) = "?" ++ prt var + +instance Print Path where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src/GF/OldParsing/Utilities.hs b/src/GF/OldParsing/Utilities.hs new file mode 100644 index 000000000..22d168973 --- /dev/null +++ b/src/GF/OldParsing/Utilities.hs @@ -0,0 +1,188 @@ +---------------------------------------------------------------------- +-- | +-- Module : Parsing.Utilities +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions to be used when parsing +----------------------------------------------------------------------------- + + +module GF.OldParsing.Utilities + ( -- * Symbols + Symbol(..), symbol, mapSymbol, + -- * Edges + Edge(..), + -- * Parser input + Input(..), makeInput, input, inputMany, + -- * charts, parse forests & trees + ParseChart, ParseForest(..), ParseTree(..), + chart2forests, forest2trees + ) where + +-- haskell modules: +import Monad +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parsing modules: +import GF.Printing.PrintParser + +------------------------------------------------------------ +-- symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u + +---------- + +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + + +------------------------------------------------------------ +-- edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- parser input + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- charts, parse forests & trees + +type ParseChart n e = Assoc e [(n, [[e]])] + +data ParseForest n = FNode n [[ParseForest n]] | FMeta + deriving (Eq, Ord, Show) + +data ParseTree n = TNode n [ParseTree n] | TMeta + deriving (Eq, Ord, Show) + +chart2forests :: Ord e => ParseChart n e -> (e -> Bool) -> e -> [ParseForest n] + +--filterCoercions :: (n -> Bool) -> ParseForest n -> [ParseForest n] + +forest2trees :: ParseForest n -> [ParseTree n] + +instance Functor ParseTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap f (TMeta) = TMeta + +instance Functor ParseForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap f (FMeta) = FMeta + +---------- + +chart2forests chart isMeta = edge2forests + where item2forest (name, children) = FNode name $ + do edges <- children + mapM edge2forests edges + edge2forests edge + | isMeta edge = [FMeta] + | otherwise = filter checkForest $ map item2forest $ chart ? edge + checkForest (FNode _ children) = not (null children) + +-- filterCoercions _ (FMeta) = [FMeta] +-- filterCoercions isCoercion (FNode s forests) +-- | isCoercion s = do [forest] <- forests ; filterCoercions isCoercion forest +-- | otherwise = FNode s $ do children <- forests ; mapM (filterCoercions isCoercion) + +forest2trees (FNode s forests) = map (TNode s) $ forests >>= mapM forest2trees +forest2trees (FMeta) = [TMeta] + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow.prt) + prtList = prtSep " " + +simpleShow :: String -> String +simpleShow s = "\"" ++ concatMap mkEsc s ++ "\"" + where + mkEsc :: Char -> String + mkEsc c = case c of + _ | elem c "\\\"" -> '\\' : [c] + '\n' -> "\\n" + '\t' -> "\\t" + _ -> [c] + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (ParseTree s) where + prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}" + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (ParseForest s) where + prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}" + prt (FMeta) = "?" + prtList = prtAfter "\n" + + |
