diff options
| author | peb <unknown> | 2005-04-11 12:57:45 +0000 |
|---|---|---|
| committer | peb <unknown> | 2005-04-11 12:57:45 +0000 |
| commit | ac00f77dadd4d447803dd7cab5a36f47365325d0 (patch) | |
| tree | 2fd02b19234f8d1fcc20ee67a2367d4d4eebfcd8 /src/GF/Parsing | |
| parent | f6273f7033b85eea9a8d0cc7d31e9697ba95d5b7 (diff) | |
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing')
| -rw-r--r-- | src/GF/Parsing/CFG.hs | 44 | ||||
| -rw-r--r-- | src/GF/Parsing/CFG/General.hs | 101 | ||||
| -rw-r--r-- | src/GF/Parsing/CFG/Incremental.hs | 148 | ||||
| -rw-r--r-- | src/GF/Parsing/CFG/PInfo.hs | 95 | ||||
| -rw-r--r-- | src/GF/Parsing/GFC.hs | 187 |
5 files changed, 575 insertions, 0 deletions
diff --git a/src/GF/Parsing/CFG.hs b/src/GF/Parsing/CFG.hs new file mode 100644 index 000000000..6af1de8ac --- /dev/null +++ b/src/GF/Parsing/CFG.hs @@ -0,0 +1,44 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG parsing +----------------------------------------------------------------------------- + +module GF.NewParsing.CFG + (parseCF, module GF.NewParsing.CFG.PInfo) where + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.NewParsing.CFG.PInfo + +import qualified GF.NewParsing.CFG.Incremental as Inc +import qualified GF.NewParsing.CFG.General as Gen + +---------------------------------------------------------------------- +-- parsing + +--parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t +parseCF "gb" = Gen.parse bottomup +parseCF "gt" = Gen.parse topdown +parseCF "ib" = Inc.parse (bottomup, noFilter) +parseCF "it" = Inc.parse (topdown, noFilter) +parseCF "ibFT" = Inc.parse (bottomup, topdown) +parseCF "ibFB" = Inc.parse (bottomup, bottomup) +parseCF "ibFTB" = Inc.parse (bottomup, bothFilters) +parseCF "itF" = Inc.parse (topdown, bottomup) +-- default parser: +parseCF _ = parseCF "gb" + +bottomup = (True, False) +topdown = (False, True) +noFilter = (False, False) +bothFilters = (True, True) + + diff --git a/src/GF/Parsing/CFG/General.hs b/src/GF/Parsing/CFG/General.hs new file mode 100644 index 000000000..ea67ec94f --- /dev/null +++ b/src/GF/Parsing/CFG/General.hs @@ -0,0 +1,101 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG parsing with a general chart +----------------------------------------------------------------------------- + +module GF.NewParsing.CFG.General + (parse, Strategy) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.NewParsing.CFG.PInfo +import GF.NewParsing.GeneralChart +import GF.Data.Assoc +import Monad + +--parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t +parse strategy grammar start = extract . + tracePrt "#internal chart" (prt . length . chartList) . + process strategy grammar start + +type Strategy = (Bool, Bool) -- ^ (isBottomup, isTopdown) + +extract :: (Ord n, Ord c, Ord t) => + IChart n (Symbol c t) -> CFChart c n t +extract chart = [ CFRule (Edge j k cat) daughters name | + Edge j k (Cat cat, found, [], Just name) <- chartList chart, + daughters <- path j k (reverse found) ] + where path i k [] = [ [] | i==k ] + path i k (Tok tok : found) + = [ Tok tok : daughters | + daughters <- path (i+1) k found ] + path i k (Cat cat : found) + = [ Cat (Edge i j cat) : daughters | + Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i), + daughters <- path j k found ] + + +process :: (Ord n, Ord c, Ord t) => + Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool) + -> CFPInfo c n t -- ^ parser information (= grammar) + -> [c] -- ^ list of starting categories + -> Input t -- ^ input string + -> IChart n (Symbol c t) +process (isBottomup, isTopdown) grammar start + = trace2 "CFParserGeneral" ((if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = ParseChart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k + = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src/GF/Parsing/CFG/Incremental.hs b/src/GF/Parsing/CFG/Incremental.hs new file mode 100644 index 000000000..af0f79bf0 --- /dev/null +++ b/src/GF/Parsing/CFG/Incremental.hs @@ -0,0 +1,148 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Incremental chart parsing for CFG +----------------------------------------------------------------------------- + + +module GF.NewParsing.CFG.Incremental + (parse, Strategy) where + +import GF.System.Tracing +import GF.Infra.Print + +import Array + +import Operations +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.NewParsing.CFG.PInfo +import GF.NewParsing.IncrementalChart + + +type Strategy = ((Bool, Bool), (Bool, Bool)) -- ^ (predict:(BU, TD), filter:(BU, TD)) + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t +parse strategy grammar start = extract . + tracePrt "#internal chart" (prt . length . flip chartList const) . + process strategy grammar start + +extract :: (Ord n, Ord c, Ord t) => + IChart c n t -> CFChart c n t +extract finalChart = [ CFRule (Edge j k cat) daughters name | + (k, Item j (CFRule cat [] name) found) <- chartList finalChart (,), + daughters <- path j k (reverse found) ] + where path i k [] = [ [] | i==k ] + path i k (Tok tok : found) + = [ Tok tok : daughters | + daughters <- path (i+1) k found ] + path i k (Cat cat : found) + = [ Cat (Edge i j cat) : daughters | + Item j _ _ <- chartLookup finalChart i (Passive cat), + daughters <- path j k found ] + +process :: (Ord n, Ord c, Ord t) => + Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t +process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input + = trace2 "CFParserIncremental" ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + finalChart + where finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = union $ map (tdInfer 0) start + axioms k = union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (CFRule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (sym:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules, see rules above and tdPredict below + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (CFRule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +---------------------------------------------------------------------- +-- type declarations, items & keys + +data Item c n t = Item Int (CFRule c n t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive c + deriving (Eq, Ord, Show) + +type IChart c n t = IncrementalChart (Item c n t) (IKey c t) + +keyof :: Item c n t -> IKey c t +keyof (Item _ (CFRule _ (next:_) _) _) = Active next +keyof (Item _ (CFRule cat [] _) _) = Passive cat + +forward :: CFRule c n t -> CFRule c n t +forward (CFRule cat (_:rest) name) = CFRule cat rest name + +---------------------------------------------------------------------- + +instance (Print n, Print c, Print t) => Print (Item c n t) where + prt (Item k rule syms) + = "<"++show k++ ": "++ prt rule++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive cat) = "!" ++ prt cat + + diff --git a/src/GF/Parsing/CFG/PInfo.hs b/src/GF/Parsing/CFG/PInfo.hs new file mode 100644 index 000000000..eff0767c1 --- /dev/null +++ b/src/GF/Parsing/CFG/PInfo.hs @@ -0,0 +1,95 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.NewParsing.CFG.PInfo where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.Data.SortedList +import GF.Data.Assoc + +---------------------------------------------------------------------- +-- type declarations + +type CFParser c n t = CFPInfo c n t + -> [c] -- ^ possible starting categories + -> Input t -- ^ the input tokens + -> CFChart c n t + +------------------------------------------------------------ +-- parser information + +data CFPInfo c n t + = CFPInfo { grammarTokens :: SList t, + nameRules :: Assoc n (SList (CFRule c n t)), + topdownRules :: Assoc c (SList (CFRule c n t)), + bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)), + emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)), + emptyCategories :: Set c, + cyclicCategories :: SList c, + -- ^ ONLY FOR DIRECT CYCLIC RULES!!! + leftcornerTokens :: Assoc c (SList t) + -- ^ DOES NOT WORK WITH EMPTY RULES!!! + } + +--buildCFPInfo :: (Ord n, Ord c, Ord t) => CFGrammar c n t -> CFPInfo c n t + +-- this is not permanent... +buildCFPInfo grammar = traceCalcFirst grammar $ + tracePrt "cf parser info" (prt) $ + pInfo' (filter (not . isCyclic) grammar) + +pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks + where grToks = union [ nubsort [ tok | Tok tok <- rhs ] | + CFRule _ rhs _ <- grammar ] + nmRules = accumAssoc id [ (name, rule) | + rule@(CFRule _ _ name) <- grammar ] + tdRules = accumAssoc id [ (cat, rule) | + rule@(CFRule cat _ _) <- grammar ] + buRules = accumAssoc id [ (next, rule) | + rule@(CFRule _ (next:_) _) <- grammar ] + elcRules = accumAssoc id $ limit lc emptyRules + leftToks = accumAssoc id $ limit lc $ + nubsort [ (cat, token) | + CFRule cat (Tok token:_) _ <- grammar ] + lc (left, res) = nubsort [ (cat, res) | + CFRule cat _ _ <- buRules ? Cat left ] + emptyRules = nubsort [ (cat, rule) | + rule@(CFRule cat [] _) <- grammar ] + emptyCats = listSet $ limitEmpties $ map fst emptyRules + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ cat | CFRule cat rhs _ <- grammar, + all (symbol (\e -> e `elem` es) (const False)) rhs ] + cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ] + +isCyclic (CFRule cat [Cat cat'] _) = cat==cat' +isCyclic _ = False + + +---------------------------------------------------------------------- + +instance (Ord n, Ord c, Ord t) => Print (CFPInfo n c t) where + prt pI = "[ tokens=" ++ sl grammarTokens ++ + "; names=" ++ sla nameRules ++ + "; tdCats=" ++ sla topdownRules ++ + "; buCats=" ++ sla bottomupRules ++ + "; elcCats=" ++ sla emptyLeftcornerRules ++ + "; eCats=" ++ sla emptyCategories ++ + "; cCats=" ++ sl cyclicCategories ++ + "; lctokCats=" ++ sla leftcornerTokens ++ + " ]" + where sla f = show $ length $ aElems $ f pI + sl f = show $ length $ f pI diff --git a/src/GF/Parsing/GFC.hs b/src/GF/Parsing/GFC.hs new file mode 100644 index 000000000..11fdbbe04 --- /dev/null +++ b/src/GF/Parsing/GFC.hs @@ -0,0 +1,187 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.NewParsing.GFC + (parse, PInfo(..), buildPInfo) where + +import GF.System.Tracing +import GF.Infra.Print +import qualified PrGrammar + +import Monad + +import qualified Grammar +-- import Values +import qualified Macros +-- import qualified Modules +import qualified AbsGFC +import qualified Ident +import Operations +import CFIdent (CFCat, cfCat2Ident, CFTok, prCFTok) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities +import GF.Conversion.Types +import GF.Formalism.SimpleGFC +import qualified GF.Formalism.MCFG as M +import qualified GF.Formalism.CFG as C +-- import qualified GF.NewParsing.MCFG as PM +import qualified GF.NewParsing.CFG as PC +--import qualified GF.Conversion.FromGFC as From + +---------------------------------------------------------------------- +-- parsing information + +data PInfo = PInfo { mcfPInfo :: (), -- ^ not implemented yet + cfPInfo :: PC.CFPInfo CCat CName Token } + +buildPInfo :: MGrammar -> CGrammar -> PInfo +buildPInfo mcfg cfg = PInfo { mcfPInfo = (), + cfPInfo = PC.buildCFPInfo cfg } + + +---------------------------------------------------------------------- +-- main parsing function + +parse :: String -- ^ parsing strategy + -> PInfo -- ^ compiled grammars (mcfg and cfg) + -> Ident.Ident -- ^ abstract module name + -> CFCat -- ^ starting category + -> [CFTok] -- ^ input tokens + -> [Grammar.Term] -- ^ resulting GF terms + +-- parsing via CFG +parse (c:strategy) pinfo abs startCat + | c=='c' || c=='C' = map (tree2term abs) . + parseCFG strategy pinfo startCats . + map prCFTok + where startCats = tracePrt "startCats" prt $ + filter isStartCat $ map fst $ aAssocs $ PC.topdownRules $ cfPInfo pinfo + isStartCat (CCat (MCat cat _) _) = cat == cfCat2Ident startCat + +-- default parser +parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start + + +---------------------------------------------------------------------- + +parseCFG :: String -> PInfo -> [CCat] -> [Token] -> [SyntaxTree Name] +parseCFG strategy pInfo startCats inString = trace2 "Parser" "CFG" $ + trees + where trees = tracePrt "#trees" (prt . length) $ + nubsort $ forests >>= forest2trees + -- compactFs >>= forest2trees + + -- compactFs = tracePrt "#compactForests" (prt . length) $ + -- tracePrt "compactForests" (prtBefore "\n") $ + -- compactForests forests + + forests = tracePrt "#forests" (prt . length) $ + cfForests >>= convertFromCFForest + cfForests= tracePrt "#cfForests" (prt . length) $ + chart2forests chart (const False) finalEdges + + finalEdges = tracePrt "finalChartEdges" prt $ + map (uncurry Edge (inputBounds inTokens)) startCats + chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + C.grammar2chart cfChart + cfChart = --tracePrt "finalEdges" + --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $ + tracePrt "#cfChart" (prt . length) $ + PC.parseCF strategy (cfPInfo pInfo) startCats inTokens + + inTokens = input inString + + +{- +-- parsing via MCFG +newParser (m:strategy) gr (_, startCat) inString + | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms + where terms = map (tree2term abstract) trees + trees = --tracePrt "trees" (prtBefore "\n") $ + tracePrt "#trees" (prt . length) $ + concatMap forest2trees forests + forests = --tracePrt "forests" (prtBefore "\n") $ + tracePrt "#forests" (prt . length) $ + concatMap (chart2forests chart isMeta) finalEdges + isMeta = null . snd + finalEdges = tracePrt "finalEdges" (prtBefore "\n") $ + filter isFinalEdge $ aElems chart +-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) | +-- let (i, j) = inputBounds inTokens, +-- E.Rule cat _ [E.Lin lbl _] _ <- pInf, +-- isStartCat cat ] + isFinalEdge (cat, rows) + = isStartCat cat && + inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ] + chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + PM.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $ + mcfPInfo $ SS.statePInfo gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ] + isStartCat (MCFCat cat _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr +-} + + +---------------------------------------------------------------------- +-- parse trees to GF terms + +tree2term :: Ident.Ident -> SyntaxTree Name -> Grammar.Term +tree2term abs (TNode f ts) = Macros.mkApp (Macros.qq (abs,f)) (map (tree2term abs) ts) +tree2term abs (TMeta) = Macros.mkMeta 0 + + +---------------------------------------------------------------------- +-- conversion and unification of forests + +convertFromCFForest :: SyntaxForest CName -> [SyntaxForest Name] + +-- simplest implementation +convertFromCFForest (FNode (CName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] + +{- +-- more intelligent(?) implementation +convertFromCFForest (FNode (CName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] +-} + +checkProfile forests = unifyManyForests . map (forests !!) + + +---------------------------------------------------------------------- +-- conversion and unification for parse trees instead of forests + +convertFromCFTree :: SyntaxTree CName -> [SyntaxTree Name] +convertFromCFTree (TNode (CName name profile) children0) + = [ TNode name children | + children1 <- mapM convertFromCFTree children0, + children <- mapM (checkProfile children1) profile ] + where checkProfile trees = unifyManyTrees . map (trees !!) + |
