diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Parsing | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Parsing')
| -rw-r--r-- | src-3.0/GF/Parsing/CF.hs | 66 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/CFG.hs | 51 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/CFG/General.hs | 103 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/CFG/Incremental.hs | 150 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/CFG/PInfo.hs | 98 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG.hs | 100 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/Active.hs | 179 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/Incremental.hs | 107 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/PInfo.hs | 121 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/Range.hs | 50 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/GFC.hs | 208 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG.hs | 68 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/Active.hs | 318 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/Active2.hs | 237 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/FastActive.hs | 176 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/Incremental.hs | 178 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/Incremental2.hs | 157 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/Naive.hs | 142 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/PInfo.hs | 162 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/Range.hs | 206 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/MCFG/ViaCFG.hs | 186 |
21 files changed, 3063 insertions, 0 deletions
diff --git a/src-3.0/GF/Parsing/CF.hs b/src-3.0/GF/Parsing/CF.hs new file mode 100644 index 000000000..1a65f6caf --- /dev/null +++ b/src-3.0/GF/Parsing/CF.hs @@ -0,0 +1,66 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:04 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.4 $ +-- +-- Chart parsing of grammars in CF format +----------------------------------------------------------------------------- + +module GF.Parsing.CF (parse) where + +import GF.Data.Operations (errVal) + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Data.SortedList (nubsort) +import GF.Data.Assoc +import qualified GF.CF.CF as CF +import qualified GF.CF.CFIdent as CFI +import GF.Formalism.Utilities +import GF.Formalism.CFG +import qualified GF.Parsing.CFG as P + +type Token = CFI.CFTok +type Name = CFI.CFFun +type Category = CFI.CFCat + +parse :: String -> CF.CF -> Category -> CF.CFParser +parse = buildParser . errVal (errVal undefined (P.parseCF "")) . P.parseCF + +buildParser :: P.CFParser Category Name Token -> CF.CF -> Category -> CF.CFParser +buildParser parser cf start tokens = (parseResults, parseInformation) + where parseInformation = prtSep "\n" trees + parseResults = [ (tree2cfTree t, []) | t <- trees ] + theInput = input tokens + edges = tracePrt "Parsing.CF - nr. edges" (prt.length) $ + parser pInf [start] theInput + chart = tracePrt "Parsing.CF - sz. chart" (prt . map (length.snd) . aAssocs) $ + grammar2chart $ map addCategory edges + forests = tracePrt "Parsing.CF - nr. forests" (prt.length) $ + chart2forests chart (const False) + [ uncurry Edge (inputBounds theInput) start ] + trees = tracePrt "Parsing.CF - nr. trees" (prt.length) $ + concatMap forest2trees forests + pInf = P.buildCFPInfo $ cf2grammar cf (nubsort tokens) + + +addCategory (CFRule cat rhs name) = CFRule cat rhs (name, cat) + +tree2cfTree (TNode (name, Edge _ _ cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees)) + +cf2grammar :: CF.CF -> [Token] -> CFGrammar Category Name Token +cf2grammar cf tokens = [ CFRule cat rhs name | + (name, (cat, rhs0)) <- cfRules, + rhs <- mapM item2symbol rhs0 ] + where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++ + CF.rulesOfCF cf + item2symbol (CF.CFNonterm cat) = [Cat cat] + item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens + + diff --git a/src-3.0/GF/Parsing/CFG.hs b/src-3.0/GF/Parsing/CFG.hs new file mode 100644 index 000000000..f64ce55f1 --- /dev/null +++ b/src-3.0/GF/Parsing/CFG.hs @@ -0,0 +1,51 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/11 10:28:16 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- CFG parsing +----------------------------------------------------------------------------- + +module GF.Parsing.CFG + (parseCF, module GF.Parsing.CFG.PInfo) where + +import GF.Data.Operations (Err(..)) + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.Parsing.CFG.PInfo + +import qualified GF.Parsing.CFG.Incremental as Inc +import qualified GF.Parsing.CFG.General as Gen + +---------------------------------------------------------------------- +-- parsing + +parseCF :: (Ord n, Ord c, Ord t) => String -> Err (CFParser c n t) + +parseCF "bottomup" = Ok $ Gen.parse bottomup +parseCF "topdown" = Ok $ Gen.parse topdown + +parseCF "gb" = Ok $ Gen.parse bottomup +parseCF "gt" = Ok $ Gen.parse topdown +parseCF "ib" = Ok $ Inc.parse (bottomup, noFilter) +parseCF "it" = Ok $ Inc.parse (topdown, noFilter) +parseCF "ibFT" = Ok $ Inc.parse (bottomup, topdown) +parseCF "ibFB" = Ok $ Inc.parse (bottomup, bottomup) +parseCF "ibFTB" = Ok $ Inc.parse (bottomup, bothFilters) +parseCF "itF" = Ok $ Inc.parse (topdown, bottomup) + +-- error parser: +parseCF prs = Bad $ "CFG parsing strategy not defined: " ++ prs + +bottomup = (True, False) +topdown = (False, True) +noFilter = (False, False) +bothFilters = (True, True) + + diff --git a/src-3.0/GF/Parsing/CFG/General.hs b/src-3.0/GF/Parsing/CFG/General.hs new file mode 100644 index 000000000..4f5959a85 --- /dev/null +++ b/src-3.0/GF/Parsing/CFG/General.hs @@ -0,0 +1,103 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:08 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.4 $ +-- +-- CFG parsing with a general chart +----------------------------------------------------------------------------- + +module GF.Parsing.CFG.General + (parse, Strategy) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.Parsing.CFG.PInfo +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import Control.Monad + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t +parse strategy grammar start = extract . + tracePrt "Parsing.CFG.General - size internal of chart" + (prt . length . chartList) . + process strategy grammar start + +-- | parsing strategy: (isBottomup, isTopdown) +type Strategy = (Bool, Bool) + +extract :: (Ord n, Ord c, Ord t) => + IChart n (Symbol c t) -> CFChart c n t +extract chart = [ CFRule (Edge j k cat) daughters name | + Edge j k (Cat cat, found, [], Just name) <- chartList chart, + daughters <- path j k (reverse found) ] + where path i k [] = [ [] | i==k ] + path i k (Tok tok : found) + = [ Tok tok : daughters | + daughters <- path (i+1) k found ] + path i k (Cat cat : found) + = [ Cat (Edge i j cat) : daughters | + Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i), + daughters <- path j k found ] + + +process :: (Ord n, Ord c, Ord t) => + Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool) + -> CFPInfo c n t -- ^ parser information (= grammar) + -> [c] -- ^ list of starting categories + -> Input t -- ^ input string + -> IChart n (Symbol c t) +process (isBottomup, isTopdown) grammar start + = trace2 "Parsing.CFG.General - strategy" ((if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = ParseChart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k + = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src-3.0/GF/Parsing/CFG/Incremental.hs b/src-3.0/GF/Parsing/CFG/Incremental.hs new file mode 100644 index 000000000..adab2b73c --- /dev/null +++ b/src-3.0/GF/Parsing/CFG/Incremental.hs @@ -0,0 +1,150 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:09 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.4 $ +-- +-- Incremental chart parsing for CFG +----------------------------------------------------------------------------- + + +module GF.Parsing.CFG.Incremental + (parse, Strategy) where + +import GF.System.Tracing +import GF.Infra.Print + +import Data.Array + +import GF.Data.Operations +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.Parsing.CFG.PInfo +import GF.Data.IncrementalDeduction + + +-- | parsing strategy: (predict:(BU, TD), filter:(BU, TD)) +type Strategy = ((Bool, Bool), (Bool, Bool)) + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t +parse strategy grammar start = extract . + tracePrt "Parsing.CFG.Incremental - size of internal chart" + (prt . length . flip chartList const) . + process strategy grammar start + +extract :: (Ord n, Ord c, Ord t) => + IChart c n t -> CFChart c n t +extract finalChart = [ CFRule (Edge j k cat) daughters name | + (k, Item j (CFRule cat [] name) found) <- chartList finalChart (,), + daughters <- path j k (reverse found) ] + where path i k [] = [ [] | i==k ] + path i k (Tok tok : found) + = [ Tok tok : daughters | + daughters <- path (i+1) k found ] + path i k (Cat cat : found) + = [ Cat (Edge i j cat) : daughters | + Item j _ _ <- chartLookup finalChart i (Passive cat), + daughters <- path j k found ] + +process :: (Ord n, Ord c, Ord t) => + Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t +process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input + = trace2 "Parsing.CFG.Incremental - strategy" ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + finalChart + where finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = union $ map (tdInfer 0) start + axioms k = union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (CFRule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (sym:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules, see rules above and tdPredict below + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (CFRule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +---------------------------------------------------------------------- +-- type declarations, items & keys + +data Item c n t = Item Int (CFRule c n t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive c + deriving (Eq, Ord, Show) + +type IChart c n t = IncrementalChart (Item c n t) (IKey c t) + +keyof :: Item c n t -> IKey c t +keyof (Item _ (CFRule _ (next:_) _) _) = Active next +keyof (Item _ (CFRule cat [] _) _) = Passive cat + +forward :: CFRule c n t -> CFRule c n t +forward (CFRule cat (_:rest) name) = CFRule cat rest name + +---------------------------------------------------------------------- + +instance (Print n, Print c, Print t) => Print (Item c n t) where + prt (Item k rule syms) + = "<"++show k++ ": "++ prt rule++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive cat) = "!" ++ prt cat + + diff --git a/src-3.0/GF/Parsing/CFG/PInfo.hs b/src-3.0/GF/Parsing/CFG/PInfo.hs new file mode 100644 index 000000000..f877b225e --- /dev/null +++ b/src-3.0/GF/Parsing/CFG/PInfo.hs @@ -0,0 +1,98 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:45 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- CFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.Parsing.CFG.PInfo + (CFParser, CFPInfo(..), buildCFPInfo) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.Data.SortedList +import GF.Data.Assoc + +---------------------------------------------------------------------- +-- type declarations + +-- | the list of categories = possible starting categories +type CFParser c n t = CFPInfo c n t + -> [c] + -> Input t + -> CFChart c n t + +------------------------------------------------------------ +-- parser information + +data CFPInfo c n t + = CFPInfo { grammarTokens :: SList t, + nameRules :: Assoc n (SList (CFRule c n t)), + topdownRules :: Assoc c (SList (CFRule c n t)), + bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)), + emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)), + emptyCategories :: Set c, + cyclicCategories :: SList c, + -- ^ ONLY FOR DIRECT CYCLIC RULES!!! + leftcornerTokens :: Assoc c (SList t) + -- ^ DOES NOT WORK WITH EMPTY RULES!!! + } + +buildCFPInfo :: (Ord c, Ord n, Ord t) => CFGrammar c n t -> CFPInfo c n t + +-- this is not permanent... +buildCFPInfo grammar = traceCalcFirst grammar $ + tracePrt "CFG.PInfo - parser info" (prt) $ + pInfo' (filter (not . isCyclic) grammar) + +pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks + where grToks = union [ nubsort [ tok | Tok tok <- rhs ] | + CFRule _ rhs _ <- grammar ] + nmRules = accumAssoc id [ (name, rule) | + rule@(CFRule _ _ name) <- grammar ] + tdRules = accumAssoc id [ (cat, rule) | + rule@(CFRule cat _ _) <- grammar ] + buRules = accumAssoc id [ (next, rule) | + rule@(CFRule _ (next:_) _) <- grammar ] + elcRules = accumAssoc id $ limit lc emptyRules + leftToks = accumAssoc id $ limit lc $ + nubsort [ (cat, token) | + CFRule cat (Tok token:_) _ <- grammar ] + lc (left, res) = nubsort [ (cat, res) | + CFRule cat _ _ <- buRules ? Cat left ] + emptyRules = nubsort [ (cat, rule) | + rule@(CFRule cat [] _) <- grammar ] + emptyCats = listSet $ limitEmpties $ map fst emptyRules + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ cat | CFRule cat rhs _ <- grammar, + all (symbol (\e -> e `elem` es) (const False)) rhs ] + cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ] + +isCyclic (CFRule cat [Cat cat'] _) = cat==cat' +isCyclic _ = False + + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance (Ord c, Ord n, Ord t) => Print (CFPInfo c n t) where + prt pI = "[ tokens=" ++ sl grammarTokens ++ + "; names=" ++ sla nameRules ++ + "; tdCats=" ++ sla topdownRules ++ + "; buCats=" ++ sla bottomupRules ++ + "; elcCats=" ++ sla emptyLeftcornerRules ++ + "; eCats=" ++ sla emptyCategories ++ + -- "; cCats=" ++ sl cyclicCategories ++ + -- "; lctokCats=" ++ sla leftcornerTokens ++ + " ]" + where sla f = show $ length $ aElems $ f pI + sl f = show $ length $ f pI diff --git a/src-3.0/GF/Parsing/FCFG.hs b/src-3.0/GF/Parsing/FCFG.hs new file mode 100644 index 000000000..30a7801c8 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG.hs @@ -0,0 +1,100 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG + (parseFCF,buildFCFPInfo,FCFPInfo(..),makeFinalEdge) where + +import GF.Data.SortedList +import GF.Data.Assoc + +import GF.Infra.PrintClass + +import GF.Formalism.FCFG +import GF.Formalism.Utilities + +import qualified GF.Parsing.FCFG.Active as Active +import GF.Parsing.FCFG.PInfo + +import GF.GFCC.DataGFCC +import GF.GFCC.CId +import GF.GFCC.Macros +import GF.Data.ErrM + +import qualified Data.Map as Map + +---------------------------------------------------------------------- +-- parsing + +-- main parsing function + +parseFCF :: + String -> -- ^ parsing strategy + FCFPInfo -> -- ^ compiled grammar (fcfg) + CId -> -- ^ starting category + [String] -> -- ^ input tokens + Err [Exp] -- ^ resulting GF terms +parseFCF strategy pinfo startCat inString = + do let inTokens = input inString + startCats <- Map.lookup startCat (startupCats pinfo) + fcfParser <- {- trace lctree $ -} parseFCF strategy + let chart = fcfParser pinfo startCats inTokens + (i,j) = inputBounds inTokens + finalEdges = [makeFinalEdge cat i j | cat <- startCats] + forests = map cnv_forests $ chart2forests chart (const False) finalEdges + filteredForests = forests >>= applyProfileToForest + trees = nubsort $ filteredForests >>= forest2trees + return $ map tree2term trees + where + parseFCF :: String -> Err (FCFParser) + parseFCF "bottomup" = Ok $ Active.parse "b" + parseFCF "topdown" = Ok $ Active.parse "t" + parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat + + +cnv_forests FMeta = FMeta +cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss) +cnv_forests (FString x) = FString x +cnv_forests (FInt x) = FInt x +cnv_forests (FFloat x) = FFloat x + +cnv_profile (Unify x) = Unify x +cnv_profile (Constant x) = Constant (cnv_forests2 x) + +cnv_forests2 FMeta = FMeta +cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss) +cnv_forests2 (FString x) = FString x +cnv_forests2 (FInt x) = FInt x +cnv_forests2 (FFloat x) = FFloat x + +---------------------------------------------------------------------- +-- parse trees to GFCC terms + +tree2term :: SyntaxTree CId -> Exp +tree2term (TNode f ts) = tree (AC f) (map tree2term ts) + +tree2term (TString s) = tree (AS s) [] +tree2term (TInt n) = tree (AI n) [] +tree2term (TFloat f) = tree (AF f) [] +tree2term (TMeta) = exp0 + +---------------------------------------------------------------------- +-- conversion and unification of forests + +-- simplest implementation +applyProfileToForest :: SyntaxForest FName -> [SyntaxForest CId] +applyProfileToForest (FNode name@(Name fun profile) children) + | isCoercionF name = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ applyProfileM unifyManyForests profile forests | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] diff --git a/src-3.0/GF/Parsing/FCFG/Active.hs b/src-3.0/GF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..df55793f8 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/Active.hs @@ -0,0 +1,179 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Active (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.Utilities + +import GF.Formalism.FCFG +import GF.Formalism.Utilities + +import GF.Infra.PrintClass + +import GF.Parsing.FCFG.Range +import GF.Parsing.FCFG.PInfo + +import Control.Monad (guard) + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +parse :: String -> FCFParser +parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo + where chart = process strategy pinfo toks axioms emptyXChart + axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks + | isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec +emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) + where + FRule _ rhs _ _ = allRules pinfo ! ruleid + +process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat +process strategy pinfo toks [] chart = chart +process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart + where + univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat c r d -> case recs !! d of + [] -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c + rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) + ++ + do guard (isTD strategy) + ruleid <- topdownRules pinfo ? c + return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) + in process strategy pinfo toks items chart + found' -> let items = do rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) node) + in process strategy pinfo toks items chart + FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok + rng' <- concatRange rng (makeRange i j) + return (cat, Active found rng' lbl (ppos+1) node) + in process strategy pinfo toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart + else univRule cat (Final (reverse (rng:found)) node) chart + where + (FRule fn _ cat lins) = allRules pinfo ! ruleid + lin = lins ! lbl + univRule cat item@(Final found' node) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat + let FRule _ _ _ lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! l ! ppos + rng <- concatRange rng (found' !! r) + return (cat, Active found rng l (ppos+1) (updateChildren node d found')) + ++ + do guard (isBU strategy) + ruleid <- leftcornerCats pinfo ? cat + let FRule _ _ _ lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! 0 ! 0 + return (cat, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) + + updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec + updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs + in process strategy pinfo toks items chart + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active RangeRec + Range + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !FPointPos + (SyntaxNode RuleId RangeRec) + | Final RangeRec (SyntaxNode RuleId RangeRec) + deriving (Eq, Ord) + +data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec) +xchart2syntaxchart (XChart actives finals) pinfo = + accumAssoc groupSyntaxNodes $ + [ case node of + SNode ruleid rrecs -> let FRule fun rhs cat _ = allRules pinfo ! ruleid + in ((cat,found), SNode fun (zip rhs rrecs)) + SString s -> ((cat,found), SString s) + SInt n -> ((cat,found), SInt n) + SFloat f -> ((cat,found), SFloat f) + | (cat, Final found node) <- chartAssocs finals + ] + +literals :: FCFPInfo -> Input FToken -> [(FCat,Item)] +literals pinfo toks = + [let (c,node) = lexer t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)] + where + lexer t = + case reads t of + [(n,"")] -> (fcatInt, SInt (n::Integer)) + _ -> case reads t of + [(f,"")] -> (fcatFloat, SFloat (f::Double)) + _ -> (fcatString,SString t) + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD pinfo starts toks = + do cat <- starts + ruleid <- topdownRules pinfo ? cat + return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)] +initialBU pinfo toks = + do (tok,rngs) <- aAssocs (inputToken toks) + ruleid <- leftcornerTokens pinfo ? tok + let FRule _ _ cat _ = allRules pinfo ! ruleid + (i,j) <- rngs + return (cat,Active [] (makeRange i j) 0 1 (emptyChildren ruleid pinfo)) + ++ + do ruleid <- epsilonRules pinfo + let FRule _ _ cat _ = allRules pinfo ! ruleid + return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) diff --git a/src-3.0/GF/Parsing/FCFG/Incremental.hs b/src-3.0/GF/Parsing/FCFG/Incremental.hs new file mode 100644 index 000000000..5ee77a061 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/Incremental.hs @@ -0,0 +1,107 @@ +module GF.Parsing.FCFG.Incremental where
+
+import Data.Array
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.Assoc
+import GF.Data.GeneralDeduction
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+import GF.Parsing.FCFG.PInfo
+import GF.Parsing.FCFG.Range
+import GF.GFCC.CId
+import Debug.Trace
+
+initState :: FCFPInfo -> CId -> State
+initState pinfo start =
+ let items = do
+ starts <- Map.lookup start (startupCats pinfo)
+ c <- starts
+ ruleid <- topdownRules pinfo ? c
+ let (FRule fn args cat lins) = allRules pinfo ! ruleid
+ lbl <- indices lins
+ return (Active 0 lbl 0 ruleid args cat)
+
+ forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ args cat _) <- assocs (allRules pinfo)]
+
+ max_fid = case IntMap.maxViewWithKey forest of
+ Just ((fid,_), _) -> fid+1
+ Nothing -> 0
+
+ in process pinfo items (State emptyChart [] emptyChart Map.empty forest max_fid 0)
+
+nextState :: FCFPInfo -> FToken -> State -> State
+nextState pinfo t state =
+ process pinfo (chartLookup (tokens state) t) state{ chart=emptyChart
+ , charts=chart state : charts state
+ , tokens=emptyChart
+ , passive=Map.empty
+ , currOffset=currOffset state+1
+ }
+
+getCompletions :: State -> FToken -> [FToken]
+getCompletions state w =
+ [t | t <- chartKeys (tokens state), take (length w) t == w]
+
+process pinfo [] state = state
+process pinfo (item@(Active j lbl ppos ruleid args fid0):xitems) state
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat _ r d -> let fid = args !! d
+ in case chartInsert (chart state) item (fid,r) of
+ Nothing -> process pinfo xitems state
+ Just actCat -> let items = do exprs <- IntMap.lookup fid (forest state)
+ (Passive ruleid args) <- Set.toList exprs
+ return (Active k r 0 ruleid args fid)
+ `mplus`
+ do id <- Map.lookup (fid,r,k) (passive state)
+ return (Active j lbl (ppos+1) ruleid (updateAt d id args) fid0)
+ in process pinfo (xitems++items) state{chart=actCat}
+ FSymTok tok -> case chartInsert (tokens state) (Active j lbl (ppos+1) ruleid args fid0) tok of
+ Nothing -> process pinfo xitems state
+ Just actTok -> process pinfo xitems state{tokens=actTok}
+ | otherwise = case Map.lookup (fid0, lbl, j) (passive state) of
+ Nothing -> let fid = nextId state
+ items = do Active j' lbl ppos ruleid args fidc <- chartLookup ((chart state:charts state) !! (k-j)) (fid0,lbl)
+ let FSymCat _ _ d = rhs ruleid lbl ! ppos
+ return (Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc)
+ in process pinfo (xitems++items) state{passive=Map.insert (fid0, lbl, j) fid (passive state)
+ ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
+ ,nextId =nextId state+1
+ }
+ Just id -> process pinfo xitems state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
+ where
+ lin = rhs ruleid lbl
+ k = currOffset state
+
+ rhs ruleid lbl = lins ! lbl
+ where
+ (FRule _ _ cat lins) = allRules pinfo ! ruleid
+
+ updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+
+data Active
+ = Active Int FIndex FPointPos RuleId [FCat] FCat
+ deriving (Eq,Show,Ord)
+data Passive
+ = Passive RuleId [FCat]
+ deriving (Eq,Ord,Show)
+
+
+data State
+ = State
+ { chart :: Chart
+ , charts :: [Chart]
+ , tokens :: ParseChart Active FToken
+ , passive :: Map.Map (FCat, FIndex, Int) FCat
+ , forest :: IntMap.IntMap (Set.Set Passive)
+ , nextId :: FCat
+ , currOffset :: Int
+ }
+ deriving Show
+
+type Chart = ParseChart Active (FCat, FIndex)
diff --git a/src-3.0/GF/Parsing/FCFG/PInfo.hs b/src-3.0/GF/Parsing/FCFG/PInfo.hs new file mode 100644 index 000000000..8b288f2f1 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/PInfo.hs @@ -0,0 +1,121 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.PInfo where + +import GF.Infra.PrintClass +import GF.Formalism.Utilities +import GF.Formalism.FCFG +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Parsing.FCFG.Range +import qualified GF.GFCC.CId as AbsGFCC + +import Data.Array +import Data.Maybe +import qualified Data.Map as Map +import qualified Data.Set as Set +import Debug.Trace + +---------------------------------------------------------------------- +-- type declarations + +-- | the list of categories = possible starting categories +type FCFParser = FCFPInfo + -> [FCat] + -> Input FToken + -> SyntaxChart FName (FCat,RangeRec) + +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +------------------------------------------------------------ +-- parser information + +type RuleId = Int + +data FCFPInfo + = FCFPInfo { allRules :: Array RuleId FRule + , topdownRules :: Assoc FCat (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + -- , emptyRules :: [RuleId] + , epsilonRules :: [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , leftcornerCats :: Assoc FCat (SList RuleId) + , leftcornerTokens :: Assoc FToken (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: SList FCat + , grammarToks :: SList FToken + , startupCats :: Map.Map AbsGFCC.CId [FCat] + } + + +getLeftCornerTok lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymTok tok -> Just tok + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +getLeftCornerCat lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat c _ _ -> Just c + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +buildFCFPInfo :: FGrammar -> FCFPInfo +buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ + FCFPInfo { allRules = allrules + , topdownRules = topdownrules + -- , emptyRules = emptyrules + , epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + , grammarToks = grammartoks + , startupCats = startup + } + + where allrules = listArray (0,length grammar-1) grammar + topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ cat _) <- assocs allrules] + -- emptyrules = [ruleid | (ruleid, FRule _ [] _ _) <- assocs allrules] + epsilonrules = [ ruleid | (ruleid, FRule _ _ _ lins) <- assocs allrules, + not (inRange (bounds (lins ! 0)) 0) ] + leftcorncats = accumAssoc id + [ (fromJust (getLeftCornerCat lins), ruleid) | + (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ] + leftcorntoks = accumAssoc id + [ (fromJust (getLeftCornerTok lins), ruleid) | + (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ] + grammarcats = aElems topdownrules + grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] + +fcfPInfoToFGrammar :: FCFPInfo -> FGrammar +fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo) + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance Print FCFPInfo where + prt pI = "[ allRules=" ++ sl (elems . allRules) ++ + "; tdRules=" ++ sla topdownRules ++ + -- "; emptyRules=" ++ sl emptyRules ++ + "; epsilonRules=" ++ sl epsilonRules ++ + "; lcCats=" ++ sla leftcornerCats ++ + "; lcTokens=" ++ sla leftcornerTokens ++ + "; categories=" ++ sl grammarCats ++ + " ]" + + where sl f = show $ length $ f pI + sla f = let (as, bs) = unzip $ aAssocs $ f pI + in show (length as) ++ "/" ++ show (length (concat bs)) + diff --git a/src-3.0/GF/Parsing/FCFG/Range.hs b/src-3.0/GF/Parsing/FCFG/Range.hs new file mode 100644 index 000000000..24674f58b --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/Range.hs @@ -0,0 +1,50 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Definitions of ranges, and operations on ranges +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Range + ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange, + ) where + + +-- GF modules +import GF.Formalism.Utilities +import GF.Infra.PrintClass + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +rangeEdge :: a -> Range -> Edge a +rangeEdge a (Range i j) = Edge i j a + +edgeRange :: Edge a -> Range +edgeRange (Edge i j _) = Range i j + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + +instance Print Range where + prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")" + prt (EmptyRange) = "(?)" diff --git a/src-3.0/GF/Parsing/GFC.hs b/src-3.0/GF/Parsing/GFC.hs new file mode 100644 index 000000000..9f1328a50 --- /dev/null +++ b/src-3.0/GF/Parsing/GFC.hs @@ -0,0 +1,208 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.9 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.Parsing.GFC + (parse, PInfo(..), buildPInfo) where + +import GF.System.Tracing +import GF.Infra.Print +import qualified GF.Grammar.PrGrammar as PrGrammar + +import GF.Data.ErrM + +import qualified GF.Grammar.Grammar as Grammar +import qualified GF.Grammar.Macros as Macros +import qualified GF.Canon.AbsGFC as AbsGFC +import qualified GF.GFCC.DataGFCC as AbsGFCC +import GF.GFCC.CId +import qualified GF.Infra.Ident as Ident +import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok, prCFTok) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities +import GF.Conversion.Types + +import qualified GF.Formalism.GCFG as G +import qualified GF.Formalism.SimpleGFC as S +import qualified GF.Formalism.MCFG as M +import GF.Formalism.FCFG +import qualified GF.Formalism.CFG as C +import qualified GF.Parsing.MCFG as PM +import qualified GF.Parsing.FCFG as PF +import qualified GF.Parsing.CFG as PC + +---------------------------------------------------------------------- +-- parsing information + +data PInfo = PInfo { mcfPInfo :: MCFPInfo + , fcfPInfo :: PF.FCFPInfo + , cfPInfo :: CFPInfo + } + +type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token +type CFPInfo = PC.CFPInfo CCat Name Token + +buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo +buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg + , fcfPInfo = PF.buildFCFPInfo fcfg + , cfPInfo = PC.buildCFPInfo cfg + } + +instance Print PInfo where + prt (PInfo m f c) = prt m ++ "\n" ++ prt c + +---------------------------------------------------------------------- +-- main parsing function + +parse :: String -- ^ parsing algorithm (mcfg or cfg) + -> String -- ^ parsing strategy + -> PInfo -- ^ compiled grammars (mcfg and cfg) + -> Ident.Ident -- ^ abstract module name + -> CFCat -- ^ starting category + -> [CFTok] -- ^ input tokens + -> Err [Grammar.Term] -- ^ resulting GF terms + + +-- parsing via CFG +parse "c" strategy pinfo abs startCat inString + = do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $ + inputMany (map wordsCFTok inString) + let startCats = tracePrt "Parsing.GFC - starting CF categories" prt $ + filter isStart $ map fst $ aAssocs $ PC.topdownRules cfpi + isStart cat = ccat2scat cat == cfCat2Ident startCat + cfpi = cfPInfo pinfo + cfParser <- PC.parseCF strategy + let cfChart = tracePrt "Parsing.GFC - CF chart" (prt . length) $ + cfParser cfpi startCats inTokens + chart = tracePrt "Parsing.GFC - chart" (prt . map (length.snd) . aAssocs) $ + C.grammar2chart cfChart + finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $ + map (uncurry Edge (inputBounds inTokens)) startCats + forests = chart2forests chart (const False) finalEdges + traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests)) + traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees))) + let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $ + forests >>= applyProfileToForest + -- compactFs = tracePrt "#compactForests" (prt . length) $ + -- tracePrt "compactForests" (prtBefore "\n") $ + -- compactForests forests + trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $ + nubsort $ filteredForests >>= forest2trees + -- compactFs >>= forest2trees + return $ map (tree2term abs) trees + + +-- parsing via MCFG +parse "m" strategy pinfo abs startCat inString + = do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $ + inputMany (map wordsCFTok inString) + let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $ + filter isStart $ PM.grammarCats mcfpi + isStart cat = mcat2scat cat == cfCat2Ident startCat + mcfpi = mcfPInfo pinfo + mcfParser <- PM.parseMCF strategy + let chart = mcfParser mcfpi startCats inTokens + finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $ + [ PM.makeFinalEdge cat lbl (inputBounds inTokens) | + cat@(MCat _ [lbl]) <- startCats ] + forests = chart2forests chart (const False) finalEdges + traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests)) + traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees))) + let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $ + forests >>= applyProfileToForest + -- compactFs = tracePrt "#compactForests" (prt . length) $ + -- tracePrt "compactForests" (prtBefore "\n") $ + -- compactForests forests + trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $ + nubsort $ filteredForests >>= forest2trees + -- compactFs >>= forest2trees + return $ map (tree2term abs) trees + + +-- parsing via FCFG +parse "f" strategy pinfo abs startCat inString = + let Ident.IC x = cfCat2Ident startCat + cat' = CId x + in case PF.parseFCF strategy (fcfPInfo pinfo) cat' (map prCFTok inString) of + Ok es -> Ok (map (exp2term abs) es) + Bad msg -> Bad msg + + +-- error parser: +selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy + +cnv_forests FMeta = FMeta +cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (Ident.IC n) (map cnv_profile p)) (map (map cnv_forests) fss) +cnv_forests (FString x) = FString x +cnv_forests (FInt x) = FInt x +cnv_forests (FFloat x) = FFloat x + +cnv_profile (Unify x) = Unify x +cnv_profile (Constant x) = Constant (cnv_forests2 x) + +cnv_forests2 FMeta = FMeta +cnv_forests2 (FNode (CId n) fss) = FNode (Ident.IC n) (map (map cnv_forests2) fss) +cnv_forests2 (FString x) = FString x +cnv_forests2 (FInt x) = FInt x +cnv_forests2 (FFloat x) = FFloat x + +---------------------------------------------------------------------- +-- parse trees to GF terms + +tree2term :: Ident.Ident -> SyntaxTree Fun -> Grammar.Term +tree2term abs (TNode f ts) = Macros.mkApp (Macros.qq (abs,f)) (map (tree2term abs) ts) +tree2term abs (TString s) = Macros.string2term s +tree2term abs (TInt n) = Macros.int2term n +tree2term abs (TFloat f) = Macros.float2term f +tree2term abs (TMeta) = Macros.mkMeta 0 + +exp2term :: Ident.Ident -> AbsGFCC.Exp -> Grammar.Term +exp2term abs (AbsGFCC.DTr _ a es) = ---- TODO: bindings + Macros.mkApp (atom2term abs a) (map (exp2term abs) es) + +atom2term :: Ident.Ident -> AbsGFCC.Atom -> Grammar.Term +atom2term abs (AbsGFCC.AC (CId f)) = Macros.qq (abs,Ident.IC f) +atom2term abs (AbsGFCC.AS s) = Macros.string2term s +atom2term abs (AbsGFCC.AI n) = Macros.int2term n +atom2term abs (AbsGFCC.AF f) = Macros.float2term f +atom2term abs (AbsGFCC.AM i) = Macros.mkMeta (fromInteger i) + +---------------------------------------------------------------------- +-- conversion and unification of forests + +-- simplest implementation +applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun] +applyProfileToForest (FNode name@(Name fun profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ applyProfileM unifyManyForests profile forests | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] + +{- +-- more intelligent(?) implementation +applyProfileToForest (FNode (Name name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +-} + + diff --git a/src-3.0/GF/Parsing/MCFG.hs b/src-3.0/GF/Parsing/MCFG.hs new file mode 100644 index 000000000..bda3af675 --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG.hs @@ -0,0 +1,68 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/11 10:28:16 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG + (parseMCF, module GF.Parsing.MCFG.PInfo) where + +import GF.Data.Operations (Err(..)) + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Parsing.MCFG.PInfo + +import qualified GF.Parsing.MCFG.Naive as Naive +import qualified GF.Parsing.MCFG.Active as Active +import qualified GF.Parsing.MCFG.FastActive as FastActive +-- import qualified GF.Parsing.MCFG.Active2 as Active2 +import qualified GF.Parsing.MCFG.Incremental as Incremental +-- import qualified GF.Parsing.MCFG.Incremental2 as Incremental2 + +---------------------------------------------------------------------- +-- parsing + +parseMCF :: (Ord c, Ord n, Ord l, Ord t) => String -> Err (MCFParser c n l t) +parseMCF prs | prs `elem` strategies = Ok $ parseMCF' prs + | otherwise = Bad $ "MCFG parsing strategy not defined: " ++ prs + + +strategies = words "bottomup topdown n an ab at i rn ran rab rat ri ft fb" + + +parseMCF' :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t + +parseMCF' "bottomup" pinfo starts toks = parseMCF' "fb" pinfo starts toks +parseMCF' "topdown" pinfo starts toks = parseMCF' "ft" pinfo starts toks + +parseMCF' "n" pinfo starts toks = Naive.parse pinfo starts toks +parseMCF' "an" pinfo starts toks = Active.parse "n" pinfo starts toks +parseMCF' "ab" pinfo starts toks = Active.parse "b" pinfo starts toks +parseMCF' "at" pinfo starts toks = Active.parse "t" pinfo starts toks +parseMCF' "i" pinfo starts toks = Incremental.parse pinfo starts toks + +-- parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks +-- parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks +-- parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks +-- parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks + +parseMCF' "rn" pinfo starts toks = Naive.parseR (rrP pinfo toks) starts +parseMCF' "ran" pinfo starts toks = Active.parseR "n" (rrP pinfo toks) starts +parseMCF' "rab" pinfo starts toks = Active.parseR "b" (rrP pinfo toks) starts +parseMCF' "rat" pinfo starts toks = Active.parseR "t" (rrP pinfo toks) starts +parseMCF' "ri" pinfo starts toks = Incremental.parseR (rrP pinfo toks) starts ntoks + where ntoks = snd (inputBounds toks) + +parseMCF' "fb" pinfo starts toks = FastActive.parse "b" (rrP pinfo toks) starts +parseMCF' "ft" pinfo starts toks = FastActive.parse "t" (rrP pinfo toks) starts + +rrP pi = rangeRestrictPInfo pi diff --git a/src-3.0/GF/Parsing/MCFG/Active.hs b/src-3.0/GF/Parsing/MCFG/Active.hs new file mode 100644 index 000000000..c6e9c6b06 --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/Active.hs @@ -0,0 +1,318 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.Active (parse, parseR) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc + +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities + +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo + +import GF.System.Tracing + +import Control.Monad (guard) + +import GF.Infra.Print + +---------------------------------------------------------------------- +-- * parsing + +parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t +parse strategy pinfo starts toks = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ] + where chart = process strategy pinfo starts toks + +-- parseR :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t +parseR strategy pinfo starts = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ] + where chart = processR strategy pinfo starts + +process :: (Ord n, Ord c, Ord l, Ord t) => + String -> MCFPInfo c n l t -> [c] -> Input t -> AChart c n l +process strategy pinfo starts toks + = tracePrt "MCFG.Active - chart size" prtSizes $ + buildChart keyof (complete : combine : convert : rules) axioms + where rules | isNil strategy = [scan] + | isBU strategy = [scan, predictKilbury pinfo toks] + | isTD strategy = [scan, predictEarley pinfo toks] + axioms | isNil strategy = predict pinfo toks + | isBU strategy = {- terminal pinfo toks ++ -} initialScan pinfo toks + | isTD strategy = initial pinfo starts toks + +--processR :: (Ord n, Ord c, Ord l) => +-- String -> MCFPInfo c n l Range -> [c] -> AChart c n l +processR strategy pinfo starts + = tracePrt "MCFG.Active Range - chart size" prtSizes $ + -- tracePrt "MCFG.Active Range - final chart" prtChart $ + buildChart keyof (complete : combine : convert : rules) axioms + where rules | isNil strategy = [scan] + | isBU strategy = [scan, predictKilburyR pinfo] + | isTD strategy = [scan, predictEarleyR pinfo] + axioms | isNil strategy = predictR pinfo + | isBU strategy = {- terminalR pinfo ++ -} initialScanR pinfo + | isTD strategy = initialR pinfo starts + +isNil s = s=="n" +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: Abstract c n -> [RangeRec l] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +makeMaxRange (Range (_, j)) = Range (j, j) +makeMaxRange EmptyRange = EmptyRange + + +---------------------------------------------------------------------- +-- * inference rules + +-- completion +complete :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +complete _ (Active rule found rng (Lin l []) (lin:lins) recs) = + return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs +complete _ _ = [] + +-- scanning +scan :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) = + do rng'' <- concatRange rng rng' + return $ Active rule found rng'' (Lin l syms) lins recs +scan _ _ = [] + +-- | Creates an Active Item every time it is possible to combine +-- an Active Item from the agenda with a Passive Item from the Chart +combine :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +combine chart item@(Active _ _ _ (Lin _ (Cat (c,_,_):_)) _ _) = + do Passive _c found <- chartLookup chart (Pass c) + combine2 chart found item +combine chart (Passive c found) = + do item <- chartLookup chart (Act c) + combine2 chart found item +combine _ _ = [] + +combine2 chart found' (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) = + do rng' <- projection r found' + rng'' <- concatRange rng rng' + recs' <- unifyRec recs d found' + return $ Active rule found rng'' (Lin l syms) lins recs' + +-- | Active Items with nothing to find are converted to Final items, +-- which in turn are converted to Passive Items +convert :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +convert _ (Active rule found rng (Lin lbl []) [] recs) = + return $ Final rule (found ++ [(lbl,rng)]) recs +convert _ (Final (Abs cat _ _) found _) = + return $ Passive cat found +convert _ _ = [] + + +---------------------------------------------------------------------- +-- Naive -- + +predict :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] +predict pinfo toks = tracePrt "MCFG.Active (Naive) - predicted rules" (prt . length) $ + do (Rule abs (Cnc _ _ lins)) <- rulesMatchingInput pinfo toks + (lin':lins') <- rangeRestRec toks lins + return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs) + + +---------------------------------------------------------------------- +-- NaiveR -- + +predictR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] +predictR pinfo = tracePrt "MCFG.Active (Naive Range) - predicted rules" (prt . length) $ + do (Rule abs (Cnc _ _ (lin:lins))) <- allRules pinfo + return $ Active abs [] EmptyRange lin lins (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Earley -- + +-- anropas med alla startkategorier +initial :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> [c] -> Input t -> [Item c n l] +initial pinfo starts toks = + tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $ + do cat <- starts + Rule abs (Cnc _ _ lins) <- topdownRules pinfo ? cat + lin' : lins' <- rangeRestRec toks lins + return $ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) + +predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t + -> AChart c n l -> Item c n l -> [Item c n l] +predictEarley pinfo toks _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) = + topdownRules pinfo ? cat >>= predictEarley2 toks rng +predictEarley _ _ _ _ = [] + +predictEarley2 :: (Ord c, Ord n, Ord l, Ord t) => Input t -> Range -> MCFRule c n l t -> [Item c n l] +predictEarley2 toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) = + do lins' <- rangeRestRec toks lins + return $ Final abs (makeRangeRec lins') [] +predictEarley2 toks rng (Rule abs (Cnc _ _ lins)) = + do lin' : lins' <- rangeRestRec toks lins + return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Earley Range -- + +initialR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l] +initialR pinfo starts = + tracePrt "MCFG.Active (Earley Range) - initial rules" (prt . length) $ + do cat <- starts + Rule abs (Cnc _ _ (lin : lins)) <- topdownRules pinfo ? cat + return $ Active abs [] (Range (0, 0)) lin lins (emptyChildren abs) + +predictEarleyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range + -> AChart c n l -> Item c n l -> [Item c n l] +predictEarleyR pinfo _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) = + topdownRules pinfo ? cat >>= predictEarleyR2 rng +predictEarleyR _ _ _ = [] + +predictEarleyR2 :: (Ord c, Ord n, Ord l) => Range -> MCFRule c n l Range -> [Item c n l] +predictEarleyR2 _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) = + return $ Final abs (makeRangeRec lins) [] +predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) = + return $ Active abs [] EmptyRange lin lins (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Kilbury -- + +-- terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] +-- terminal pinfo toks = +-- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $ +-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo +-- lins' <- rangeRestRec toks lins +-- return $ Final abs (makeRangeRec lins') [] + +initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] +initialScan pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial scanned rules + epsilon rules" (prt . length) $ + do tok <- aElems (inputToken toks) + Rule abs (Cnc _ _ lins) <- + leftcornerTokens pinfo ? tok ++ + epsilonRules pinfo + lin' : lins' <- rangeRestRec toks lins + return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs) + +predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t + -> AChart c n l -> Item c n l -> [Item c n l] +predictKilbury pinfo toks _ (Passive cat found) = + do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat + lin' : lins' <- rangeRestRec toks (Lin l syms : lins) + rng <- projection r found + children <- unifyRec (emptyChildren abs) i found + return $ Active abs [] rng lin' lins' children +predictKilbury _ _ _ _ = [] + + + +---------------------------------------------------------------------- +-- KilburyR -- + +-- terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] +-- terminalR pinfo = +-- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $ +-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo +-- return $ Final abs (makeRangeRec lins) [] + +initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] +initialScanR pinfo = + tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $ + do Rule abs (Cnc _ _ (lin : lins)) <- + concatMap snd (aAssocs (leftcornerTokens pinfo)) ++ + epsilonRules pinfo + return $ Active abs [] EmptyRange lin lins (emptyChildren abs) + +predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range + -> AChart c n l -> Item c n l -> [Item c n l] +predictKilburyR pinfo _ (Passive cat found) = + do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat + rng <- projection r found + children <- unifyRec (emptyChildren abs) i found + return $ Active abs [] rng (Lin l syms) lins children +predictKilburyR _ _ _ = [] + + +---------------------------------------------------------------------- +-- * type definitions + +type AChart c n l = ParseChart (Item c n l) (AKey c) + +data Item c n l = Active (Abstract c n) + (RangeRec l) + Range + (Lin c l Range) + (LinRec c l Range) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] + | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +data AKey c = Act c + | Pass c + | Useless + | Fin + deriving (Eq, Ord, Show) + + +keyof :: Item c n l -> AKey c +keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next +keyof (Final _ _ _) = Fin +keyof (Passive cat _) = Pass cat +keyof _ = Useless + + +---------------------------------------------------------------------- +-- for tracing purposes + +prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++ + ", passive=" ++ show (sum [length (chartLookup chart k) | + k@(Pass _) <- chartKeys chart ]) ++ + ", active=" ++ show (sum [length (chartLookup chart k) | + k@(Act _) <- chartKeys chart ]) ++ + ", useless=" ++ show (length (chartLookup chart Useless)) + +prtChart chart = concat [ "\n*** KEY: " ++ prt k ++ + prtBefore "\n " (chartLookup chart k) | + k <- chartKeys chart ] + +prtFinals chart = prtBefore "\n " (chartLookup chart Fin) + +instance (Print c, Print n, Print l) => Print (Item c n l) where + prt (Active abs found rng lin tofind children) = + "? " ++ prt abs ++ ";\n\t" ++ + "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++ + prt lin ++ " {" ++ prtSep " " tofind ++ "}" ++ + ( if null children then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" ) + prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}" + prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++ + ( if null rrs then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" ) + +instance Print c => Print (AKey c) where + prt (Act c) = "Active " ++ prt c + prt (Pass c) = "Passive " ++ prt c + prt (Fin) = "Final" + prt (Useless) = "Useless" diff --git a/src-3.0/GF/Parsing/MCFG/Active2.hs b/src-3.0/GF/Parsing/MCFG/Active2.hs new file mode 100644 index 000000000..7ad8627bc --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/Active2.hs @@ -0,0 +1,237 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.2 $ +-- +-- MCFG parsing, the active algorithm (alternative version) +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.Active2 (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc + +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities + +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo + +import GF.System.Tracing + +import Control.Monad (guard) + +import GF.Infra.Print + +---------------------------------------------------------------------- +-- * parsing + +--parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t +parse strategy pinfo starts toks = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ] + where chart = process strategy pinfo starts toks + +process :: (Ord n, Ord c, Ord l, Ord t) => + String -> MCFPInfo c n l t -> [c] -> Input t -> AChart c n l t +process strategy pinfo starts toks + = tracePrt "MCFG.Active - chart size" prtSizes $ + buildChart keyof (complete : combine : convert : rules) axioms + where rules | isNil strategy = [scan toks] + | isBU strategy = [scan toks, predictKilbury pinfo toks] + | isTD strategy = [scan toks, predictEarley pinfo toks] + axioms | isNil strategy = predict pinfo toks + | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks + | isTD strategy = initial pinfo starts toks + +isNil s = s=="n" +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: Abstract c n -> [RangeRec l] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +makeMaxRange (Range (_, j)) = Range (j, j) +makeMaxRange EmptyRange = EmptyRange + + +---------------------------------------------------------------------- +-- * inference rules + +-- completion +complete :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t] +complete _ (Active rule found rng (Lin l []) (lin:lins) recs) = + return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs +complete _ _ = [] + +-- scanning +--scan :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t] +scan inp _ (Active rule found rng (Lin l (Tok tok:syms)) lins recs) = + do rng' <- map makeRange (inputToken inp ? tok) + rng'' <- concatRange rng rng' + return $ Active rule found rng'' (Lin l syms) lins recs +scan _ _ _ = [] + +-- | Creates an Active Item every time it is possible to combine +-- an Active Item from the agenda with a Passive Item from the Chart +combine :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t] +combine chart item@(Active _ _ _ (Lin _ (Cat (c,_,_):_)) _ _) = + do Passive _c found <- chartLookup chart (Pass c) + combine2 chart found item +combine chart (Passive c found) = + do item <- chartLookup chart (Act c) + combine2 chart found item +combine _ _ = [] + +combine2 chart found' (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) = + do rng' <- projection r found' + rng'' <- concatRange rng rng' + recs' <- unifyRec recs d found' + return $ Active rule found rng'' (Lin l syms) lins recs' + +-- | Active Items with nothing to find are converted to Final items, +-- which in turn are converted to Passive Items +convert :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t] +convert _ (Active rule found rng (Lin lbl []) [] recs) = + return $ Final rule (found ++ [(lbl,rng)]) recs +convert _ (Final (Abs cat _ _) found _) = + return $ Passive cat found +convert _ _ = [] + + +---------------------------------------------------------------------- +-- Naive -- + +predict :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t] +predict pinfo toks = tracePrt "MCFG.Active (Naive) - predicted rules" (prt . length) $ + do Rule abs (Cnc _ _ (lin:lins)) <- rulesMatchingInput pinfo toks + return $ Active abs [] EmptyRange lin lins (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Earley -- + +-- anropas med alla startkategorier +initial :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> [c] -> Input t -> [Item c n l t] +initial pinfo starts toks = + tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $ + do cat <- starts + Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? cat + return $ Active abs [] (Range (0, 0)) lin lins (emptyChildren abs) + +predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t + -> AChart c n l t -> Item c n l t -> [Item c n l t] +predictEarley pinfo toks _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) = + topdownRules pinfo ? cat >>= predictEarley2 toks rng +predictEarley _ _ _ _ = [] + +predictEarley2 :: (Ord c, Ord n, Ord l, Ord t) => Input t -> Range -> MCFRule c n l t -> [Item c n l t] +predictEarley2 toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) = + do lins' <- rangeRestRec toks lins + return $ Final abs (makeRangeRec lins') [] +predictEarley2 toks rng (Rule abs (Cnc _ _ (lin:lins))) = + return $ Active abs [] EmptyRange lin lins (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Kilbury -- + +terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t] +terminal pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $ + do Rule abs (Cnc _ _ lins) <- emptyRules pinfo + lins' <- rangeRestRec toks lins + return $ Final abs (makeRangeRec lins') [] + +initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t] +initialScan pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $ + do tok <- aElems (inputToken toks) + Rule abs (Cnc _ _ (lin:lins)) <- leftcornerTokens pinfo ? tok + return $ Active abs [] EmptyRange lin lins (emptyChildren abs) + +predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t + -> AChart c n l t -> Item c n l t -> [Item c n l t] +predictKilbury pinfo toks _ (Passive cat found) = + do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat + rng <- projection r found + children <- unifyRec (emptyChildren abs) i found + return $ Active abs [] rng (Lin l syms) lins children +predictKilbury _ _ _ _ = [] + + +---------------------------------------------------------------------- +-- * type definitions + +type AChart c n l t = ParseChart (Item c n l t) (AKey c t) + +data Item c n l t = Active (Abstract c n) + (RangeRec l) + Range + (Lin c l t) + (LinRec c l t) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] + | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +data AKey c t = Act c + | ActTok t + | Pass c + | Useless + | Fin + deriving (Eq, Ord, Show) + + +keyof :: Item c n l t -> AKey c t +keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next +keyof (Active _ _ _ (Lin _ (Tok tok:_)) _ _) = ActTok tok +keyof (Final _ _ _) = Fin +keyof (Passive cat _) = Pass cat +keyof _ = Useless + + +---------------------------------------------------------------------- +-- for tracing purposes + +prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++ + ", passive=" ++ show (sum [length (chartLookup chart k) | + k@(Pass _) <- chartKeys chart ]) ++ + ", active=" ++ show (sum [length (chartLookup chart k) | + k@(Act _) <- chartKeys chart ]) ++ + ", active-tok=" ++ show (sum [length (chartLookup chart k) | + k@(ActTok _) <- chartKeys chart ]) ++ + ", useless=" ++ show (length (chartLookup chart Useless)) + +prtChart chart = concat [ "\n*** KEY: " ++ prt k ++ + prtBefore "\n " (chartLookup chart k) | + k <- chartKeys chart ] + +prtFinals chart = prtBefore "\n " (chartLookup chart Fin) + +instance (Print c, Print n, Print l, Print t) => Print (Item c n l t) where + prt (Active abs found rng lin tofind children) = + "? " ++ prt abs ++ ";\n\t" ++ + "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++ + prt lin ++ " {" ++ prtSep " " tofind ++ "}" ++ + ( if null children then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" ) + prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}" + prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++ + ( if null rrs then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" ) + +instance (Print c, Print t) => Print (AKey c t) where + prt (Act c) = "Active " ++ prt c + prt (ActTok t) = "Active-Tok " ++ prt t + prt (Pass c) = "Passive " ++ prt c + prt (Fin) = "Final" + prt (Useless) = "Useless" diff --git a/src-3.0/GF/Parsing/MCFG/FastActive.hs b/src-3.0/GF/Parsing/MCFG/FastActive.hs new file mode 100644 index 000000000..0a8e24b55 --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/FastActive.hs @@ -0,0 +1,176 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm, optimized version +-- structure stolen from Krasimir Angelov's GF.Parsing.FCFG.Active +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.FastActive (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.Utilities + +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities + +import GF.Infra.Ident + +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo + +import GF.System.Tracing + +import Control.Monad (guard) + +import GF.Infra.Print + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +-- parse :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t +parse strategy pinfo starts = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + Final (Abs cat rhs fun) found rrecs <- listXChartFinal chart ] + where chart = process strategy pinfo axioms emptyXChart + + -- axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks + axioms | isBU strategy = initialBU pinfo + | isTD strategy = initialTD pinfo starts + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: Abstract c n -> [RangeRec l] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +updateChildren :: Eq l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]] +updateChildren recs i rec = updateNthM update i recs + where update rec' = do guard (null rec' || rec' == rec) + return rec + +process :: (Ord c, Ord n, Ord l) => String -> MCFPInfo c n l Range -> [Item c n l] -> XChart c n l -> XChart c n l +process strategy pinfo [] chart = chart +process strategy pinfo (item:items) chart = process strategy pinfo items $! univRule item chart + where + univRule item@(Active abs found rng (Lin l syms) lins recs) chart + = case syms of + Cat(c,r,d) : syms' -> + case insertXChart chart item c of + Nothing -> chart + Just chart -> + let items = -- predict topdown + [ Active abs [] EmptyRange lin lins (emptyChildren abs) | + isTD strategy, + Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? c ] ++ + + -- combine + [ Active abs found rng'' (Lin l syms') lins recs' | + Final _ found' _ <- lookupXChartFinal chart c, + rng' <- projection r found', + rng'' <- concatRange rng rng', + recs' <- updateChildren recs d found' ] + in process strategy pinfo items chart + + -- scan + Tok rng' : syms' -> + let items = [ Active abs found rng'' (Lin l syms') lins recs | + rng'' <- concatRange rng rng' ] + in process strategy pinfo items chart + + -- complete + [] -> case lins of + (lin':lins') -> univRule (Active abs ((l,rng):found) EmptyRange lin' lins' recs) chart + [] -> univRule (Final abs (reverse ((l,rng):found)) recs) chart + + univRule item@(Final abs@(Abs cat _ _) found' recs) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> + let items = -- predict bottomup + [ Active abs [] rng (Lin l syms') lins children | + isBU strategy, + Rule abs (Cnc _ _ (Lin l (Cat(c,r,d):syms') : lins)) <- leftcornerCats pinfo ? cat, + -- lin' : lins' <- rangeRestRec toks (Lin l syms' : lins), + rng <- projection r found', + children <- unifyRec (emptyChildren abs) d found' ] ++ + + -- combine + [ Active abs found rng'' (Lin l syms') lins recs' | + Active abs found rng (Lin l (Cat(c,r,d):syms')) lins recs <- lookupXChartAct chart cat, + rng' <- projection r found', + rng'' <- concatRange rng rng', + recs' <- updateChildren recs d found' ] + in process strategy pinfo items chart + +---------------------------------------------------------------------- +-- * XChart + +data XChart c n l = XChart !(AChart c n l) !(AChart c n l) +type AChart c n l = ParseChart (Item c n l) c + +data Item c n l = Active (Abstract c n) + (RangeRec l) + Range + (Lin c l Range) + (LinRec c l Range) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] +-- | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +emptyXChart :: (Ord c, Ord n, Ord l) => XChart c n l +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +listXChartAct (XChart actives finals) = chartList actives +listXChartFinal (XChart actives finals) = chartList finals + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l] +initialTD pinfo starts = + [ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) | + cat <- starts, + Rule abs (Cnc _ _ (lin':lins')) <- topdownRules pinfo ? cat ] + -- lin' : lins' <- rangeRestRec toks lins + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] +initialBU pinfo = + [ Active abs [] EmptyRange lin' lins' (emptyChildren abs) | + -- do tok <- aElems (inputToken toks) + Rule abs (Cnc _ _ (lin':lins')) <- + concatMap snd (aAssocs (leftcornerTokens pinfo)) ++ + -- leftcornerTokens pinfo ? tok ++ + epsilonRules pinfo ] + -- lin' : lins' <- rangeRestRec toks lins diff --git a/src-3.0/GF/Parsing/MCFG/Incremental.hs b/src-3.0/GF/Parsing/MCFG/Incremental.hs new file mode 100644 index 000000000..bd5b4114d --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/Incremental.hs @@ -0,0 +1,178 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.4 $ +-- +-- MCFG parsing, the incremental algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.Incremental (parse, parseR) where + +import Data.List +import Control.Monad (guard) + +import GF.Data.Utilities (select) +import GF.Data.GeneralDeduction +import GF.Data.Assoc + +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities + +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo + +import GF.System.Tracing +import GF.Infra.Print + +---------------------------------------------------------------------- +-- parsing + +parse :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t +parse pinfo starts toks = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ] + where chart = process pinfo toks ntoks + ntoks = snd (inputBounds toks) + +-- parseR :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t +parseR pinfo starts ntoks = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ] + where chart = processR pinfo ntoks + +process :: (Ord n, Ord c, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> Int -> IChart c n l +process pinfo toks ntoks + = tracePrt "MCFG.Incremental - chart size" prtSizes $ + buildChart keyof [complete ntoks, scan, combine, convert] (predict pinfo toks ntoks) + +processR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> Int -> IChart c n l +processR pinfo ntoks + = tracePrt "MCFG.Incremental Range - chart size" prtSizes $ + buildChart keyof [complete ntoks, scan, combine, convert] (predictR pinfo ntoks) + +complete :: (Ord n, Ord c, Ord l) => Int -> IChart c n l -> Item c n l -> [Item c n l] +complete ntoks _ (Active rule found rng (Lin l []) lins recs) = + do (lin, lins') <- select lins + k <- [minRange rng .. ntoks] + return $ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs +complete _ _ _ = [] + + +predict :: (Ord n, Ord c, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> Int -> [Item c n l] +predict pinfo toks n = + tracePrt "MCFG.Incremental - predicted rules" (prt . length) $ + do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- rulesMatchingInput pinfo toks + let daughters = replicate (length rhs) [] + lins' <- rangeRestRec toks lins + (lin', lins'') <- select lins' + k <- [0..n] + return $ Active abs [] (Range (k,k)) lin' lins'' daughters + + +predictR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> Int -> [Item c n l] +predictR pinfo n = + tracePrt "MCFG.Incremental Range - predicted rules" (prt . length) $ + do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- allRules pinfo + let daughters = replicate (length rhs) [] + (lin, lins') <- select lins + k <- [0..n] + return $ Active abs [] (Range (k,k)) lin lins' daughters + + +scan :: (Ord n, Ord c, Ord l) => IChart c n l -> Item c n l -> [Item c n l] +scan _ (Active abs found rng (Lin l (Tok rng':syms)) lins recs) = + do rng'' <- concatRange rng rng' + return $ Active abs found rng'' (Lin l syms) lins recs +scan _ _ = [] + + +combine :: (Ord n, Ord c, Ord l) => IChart c n l -> Item c n l -> [Item c n l] +combine chart active@(Active _ _ rng (Lin _ (Cat (c,l,_):_)) _ _) = + do passive <- chartLookup chart (Pass c l (maxRange rng)) + combine2 active passive +combine chart passive@(Active (Abs c _ _) _ rng (Lin l []) _ _) = + do active <- chartLookup chart (Act c l (minRange rng)) + combine2 active passive +combine _ _ = [] + +combine2 (Active abs found rng (Lin l (Cat (c,l',d):syms)) lins recs) + (Active _ found' rng' _ _ _) + = do rng'' <- concatRange rng rng' + recs' <- unifyRec recs d found'' + return $ Active abs found rng'' (Lin l syms) lins recs' + where found'' = found' ++ [(l',rng')] + + +convert _ (Active rule found rng (Lin lbl []) [] recs) = + return $ Final rule (found ++ [(lbl,rng)]) recs +convert _ _ = [] + +---------------------------------------------------------------------- +-- type definitions + +type IChart c n l = ParseChart (Item c n l) (IKey c l) + +data Item c n l = Active (Abstract c n) + (RangeRec l) + Range + (Lin c l Range) + (LinRec c l Range) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] +-- | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +data IKey c l = Act c l Int + | Pass c l Int + | Useless + | Fin + deriving (Eq, Ord, Show) + +keyof :: Item c n l -> IKey c l +keyof (Active _ _ rng (Lin _ (Cat (next,lbl,_):_)) _ _) + = Act next lbl (maxRange rng) +keyof (Active (Abs cat _ _) found rng (Lin lbl []) _ _) + = Pass cat lbl (minRange rng) +keyof (Final _ _ _) = Fin +keyof _ + = Useless + + +---------------------------------------------------------------------- +-- for tracing purposes +prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++ + ", passive=" ++ show (sum [length (chartLookup chart k) | + k@(Pass _ _ _) <- chartKeys chart ]) ++ + ", active=" ++ show (sum [length (chartLookup chart k) | + k@(Act _ _ _) <- chartKeys chart ]) ++ + ", useless=" ++ show (length (chartLookup chart Useless)) + +prtChart chart = concat [ "\n*** KEY: " ++ prt k ++ + prtBefore "\n " (chartLookup chart k) | + k <- chartKeys chart ] + +instance (Print c, Print n, Print l) => Print (Item c n l) where + prt (Active abs found rng lin tofind children) = + "? " ++ prt abs ++ ";\n\t" ++ + "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++ + prt lin ++ "{" ++ prtSep " " tofind ++ "}" ++ + ( if null children then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" ) +-- prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}" + prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++ + ( if null rrs then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" ) + +instance (Print c, Print l) => Print (IKey c l) where + prt (Act c l i) = "Active " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i + prt (Pass c l i) = "Passive " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i + prt (Fin) = "Final" + prt (Useless) = "Useless" diff --git a/src-3.0/GF/Parsing/MCFG/Incremental2.hs b/src-3.0/GF/Parsing/MCFG/Incremental2.hs new file mode 100644 index 000000000..db6c3084e --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/Incremental2.hs @@ -0,0 +1,157 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.3 $ +-- +-- MCFG parsing, the incremental algorithm (alternative version) +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.Incremental2 (parse) where + +import Data.List +import Data.Array +import Control.Monad (guard) + +import GF.Data.Utilities (select) +import GF.Data.Assoc +import GF.Data.IncrementalDeduction + +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities + +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo + +import GF.System.Tracing +import GF.Infra.Print + +---------------------------------------------------------------------- +-- parsing + +-- parseR :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t +parse pinfo starts inp = + accumAssoc groupSyntaxNodes $ + [ ((cat, found), SNode fun (zip rhs rrecs)) | + k <- uncurry enumFromTo (inputBounds inp), + Final (Abs cat rhs fun) found rrecs <- chartLookup chart k Fin ] + where chart = process pinfo inp + +--process :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> (Int, Int) -> IChart c n l +process pinfo inp + = tracePrt "MCFG.Incremental - chart size" + (prt . map (prtSizes finalChart . fst) . assocs) $ + finalChart + where finalChart = buildChart keyof rules axioms inBounds + axioms k = tracePrt ("MCFG.Incremental - axioms for " ++ show k) (prt . length) $ + predict k ++ scan k ++ complete1 k + rules k item = complete2 k item ++ combine k item ++ convert k item + inBounds = inputBounds inp + + -- axioms: predict + scan + complete + predict k = do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- rulesMatchingInput pinfo inp + let daughters = replicate (length rhs) [] + (lin, lins') <- select lins + return $ Active abs [] k lin lins' daughters + + scan k = do (tok, js) <- aAssocs (inputTo inp ! k) + j <- js + Active abs found i (Lin l (Tok _tok:syms)) lins recs <- + chartLookup finalChart j (ActTok tok) + return $ Active abs found i (Lin l syms) lins recs + + complete1 k = do j <- [fst inBounds .. k-1] + Active abs found i (Lin l _Nil) lins recs <- + chartLookup finalChart j Pass + let found' = found ++ [(l, makeRange (i,j))] + (lin, lins') <- select lins + return $ Active abs found' k lin lins' recs + + -- rules: convert + combine + complete + convert k (Active rule found j (Lin lbl []) [] recs) = + let found' = found ++ [(lbl, makeRange (j,k))] + in return $ Final rule found' recs + convert _ _ = [] + + combine k (Active (Abs cat _ _) found' j (Lin lbl []) _ _) = + do guard (j < k) ---- cannot handle epsilon-rules + Active abs found i (Lin l (Cat (_cat,_lbl,nr):syms)) lins recs <- + chartLookup finalChart j (Act cat lbl) + let found'' = found' ++ [(lbl, makeRange (j,k))] + recs' <- unifyRec recs nr found'' + return $ Active abs found i (Lin l syms) lins recs' + combine _ _ = [] + + complete2 k (Active abs found i (Lin l []) lins recs) = + do let found' = found ++ [(l, makeRange (i,k))] + (lin, lins') <- select lins + return $ Active abs found' k lin lins' recs + complete2 _ _ = [] + +---------------------------------------------------------------------- +-- type definitions + +type IChart c n l t = IncrementalChart (Item c n l t) (IKey c l t) + +data Item c n l t = Active (Abstract c n) + (RangeRec l) + Int + (Lin c l t) + (LinRec c l t) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] + ---- | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +data IKey c l t = Act c l + | ActTok t + ---- | Useless + | Pass + | Fin + deriving (Eq, Ord, Show) + +keyof :: Item c n l t -> IKey c l t +keyof (Active _ _ _ (Lin _ (Cat (next,lbl,_):_)) _ _) = Act next lbl +keyof (Active _ _ _ (Lin _ (Tok tok:_)) _ _) = ActTok tok +keyof (Active _ _ _ (Lin _ []) _ _) = Pass +keyof (Final _ _ _) = Fin +-- keyof _ = Useless + + +---------------------------------------------------------------------- +-- for tracing purposes +prtSizes chart k = "f=" ++ show (length (chartLookup chart k Fin)) ++ + " p=" ++ show (length (chartLookup chart k Pass)) ++ + " a=" ++ show (sum [length (chartLookup chart k key) | + key@(Act _ _) <- chartKeys chart k ]) ++ + " t=" ++ show (sum [length (chartLookup chart k key) | + key@(ActTok _) <- chartKeys chart k ]) + -- " u=" ++ show (length (chartLookup chart k Useless)) + +-- prtChart chart = concat [ "\n*** KEY: " ++ prt k ++ +-- prtBefore "\n " (chartLookup chart k) | +-- k <- chartKeys chart ] + +instance (Print c, Print n, Print l, Print t) => Print (Item c n l t) where + prt (Active abs found rng lin tofind children) = + "? " ++ prt abs ++ ";\n\t" ++ + "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++ + prt lin ++ "{" ++ prtSep " " tofind ++ "}" ++ + ( if null children then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" ) + -- prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}" + prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++ + ( if null rrs then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" ) + +instance (Print c, Print l, Print t) => Print (IKey c l t) where + prt (Act c l) = "Active " ++ prt c ++ " " ++ prt l + prt (ActTok t) = "ActiveTok " ++ prt t + -- prt (Pass c l i) = "Passive " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i + prt (Fin) = "Final" + -- prt (Useless) = "Useless" diff --git a/src-3.0/GF/Parsing/MCFG/Naive.hs b/src-3.0/GF/Parsing/MCFG/Naive.hs new file mode 100644 index 000000000..7d1fa0a8a --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/Naive.hs @@ -0,0 +1,142 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, the naive algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.Naive (parse, parseR) where + +import Control.Monad (guard) + +-- GF modules +import GF.Data.GeneralDeduction +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo +import GF.Data.SortedList +import GF.Data.Assoc +import GF.System.Tracing + +import GF.Infra.Print + +---------------------------------------------------------------------- +-- * parsing + +-- | Builds a chart from the initial agenda, given by prediction, and the inference rules +parse :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t +parse pinfo starts toks + = accumAssoc groupSyntaxNodes $ + [ ((cat, makeRangeRec lins), SNode fun (zip rhs rrecs)) | + Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ] + where chart = process pinfo toks + +-- | Builds a chart from the initial agenda, given by prediction, and the inference rules +-- parseR :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t +parseR pinfo starts + = accumAssoc groupSyntaxNodes $ + [ ((cat, makeRangeRec lins), SNode fun (zip rhs rrecs)) | + Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ] + where chart = processR pinfo + +process :: (Ord t, Ord n, Ord c, Ord l) => MCFPInfo c n l t -> Input t -> NChart c n l +process pinfo toks + = tracePrt "MCFG.Naive - chart size" prtSizes $ + buildChart keyof [convert, combine] (predict pinfo toks) + +processR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> NChart c n l +processR pinfo + = tracePrt "MCFG.Naive Range - chart size" prtSizes $ + buildChart keyof [convert, combine] (predictR pinfo) + + +---------------------------------------------------------------------- +-- * inference rules + +-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda +predict :: (Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] +predict pinfo toks = tracePrt "MCFG.Naive - predicted rules" (prt . length) $ + do Rule abs (Cnc _ _ lins) <- rulesMatchingInput pinfo toks + lins' <- rangeRestRec toks lins + return $ Active (abs, []) lins' [] + +-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda +predictR :: (Ord l) => MCFPInfo c n l Range -> [Item c n l] +predictR pinfo = tracePrt "MCFG.Naive Range - predicted rules" (prt . length) $ + do Rule abs (Cnc _ _ lins) <- allRules pinfo + return $ Active (abs, []) lins [] + +-- | Creates an Active Item every time it is possible to combine +-- an Active Item from the agenda with a Passive Item from the Chart +combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l] +combine chart item@(Active (Abs _ (c:_) _, _) _ _) = + do Passive _c rrec <- chartLookup chart (Pass c) + combine2 chart rrec item +combine chart (Passive c rrec) = + do item <- chartLookup chart (Act c) + combine2 chart rrec item +combine _ _ = [] + +combine2 chart rrec (Active (Abs nt (c:find) f, found) lins rrecs) = + do lins' <- substArgRec (length found) rrec lins + return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec]) + +-- | Active Items with nothing to find are converted to Passive Items +convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l] +convert _ (Active (Abs cat [] fun, _) lins _) = [Passive cat (makeRangeRec lins)] +convert _ _ = [] + + +---------------------------------------------------------------------- +-- * type definitions + +type NChart c n l = ParseChart (Item c n l) (NKey c) + +data Item c n l = Active (DottedRule c n) (LinRec c l Range) [RangeRec l] + | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +type DottedRule c n = (Abstract c n, [c]) + +data NKey c = Act c + | Pass c + | Final + deriving (Eq, Ord, Show) + +keyof :: Item c n l -> NKey c +keyof (Active (Abs _ (next:_) _, _) _ _) = Act next +keyof (Passive cat _) = Pass cat +keyof _ = Final + +-- for tracing purposes +prtSizes chart = "final=" ++ show (length (chartLookup chart Final)) ++ + ", passive=" ++ show (sum [length (chartLookup chart k) | + k@(Pass _) <- chartKeys chart ]) ++ + ", active=" ++ show (sum [length (chartLookup chart k) | + k@(Act _) <- chartKeys chart ]) + +prtChart chart = concat [ "\n*** KEY: " ++ prt k ++ + prtBefore "\n " (chartLookup chart k) | + k <- chartKeys chart ] + +instance (Print c, Print n, Print l) => Print (Item c n l) where + prt (Active (abs, cs) lrec rrecs) = "? " ++ prt abs ++ " . " ++ prtSep " " cs ++ ";\n\t" ++ + "{" ++ prtSep " " lrec ++ "}" ++ + ( if null rrecs then ";" else ";\n\t" ++ + "{" ++ prtSep "} {" (map (prtSep " ") rrecs) ++ "}" ) + prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}" + +instance Print c => Print (NKey c) where + prt (Act c) = "Active " ++ prt c + prt (Pass c) = "Passive " ++ prt c + prt (Final) = "Final" + + diff --git a/src-3.0/GF/Parsing/MCFG/PInfo.hs b/src-3.0/GF/Parsing/MCFG/PInfo.hs new file mode 100644 index 000000000..56119dcec --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/PInfo.hs @@ -0,0 +1,162 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.PInfo where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Parsing.MCFG.Range + +---------------------------------------------------------------------- +-- type declarations + +-- | the list of categories = possible starting categories +type MCFParser c n l t = MCFPInfo c n l t + -> [c] + -> Input t + -> SyntaxChart n (c, RangeRec l) + +makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l) +makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)]) + + +------------------------------------------------------------ +-- parser information + +data MCFPInfo c n l t + = MCFPInfo { grammarTokens :: SList t + , nameRules :: Assoc n (SList (MCFRule c n l t)) + , topdownRules :: Assoc c (SList (MCFRule c n l t)) + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + , epsilonRules :: [MCFRule c n l t] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , leftcornerCats :: Assoc c (SList (MCFRule c n l t)) + , leftcornerTokens :: Assoc t (SList (MCFRule c n l t)) + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: SList c + -- ^ used when calculating starting categories + , rulesByToken :: Assoc t (SList (MCFRule c n l t, SList t)) + , rulesWithoutTokens :: SList (MCFRule c n l t) + -- ^ used by 'rulesMatchingInput' + , allRules :: MCFGrammar c n l t + -- ^ used by any unoptimized algorithm + + --bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)), + --emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)), + --emptyCategories :: Set c, + } + + +rangeRestrictPInfo :: (Ord c, Ord n, Ord l, Ord t) => + MCFPInfo c n l t -> Input t -> MCFPInfo c n l Range +rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp = + tracePrt "MCFG.PInfo - Restricting the parser information" (prt . grammarTokens) + MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp)) + , nameRules = rrAssoc (nameRules pinfo) + , topdownRules = rrAssoc (topdownRules pinfo) + , epsilonRules = rrRules (epsilonRules pinfo) + , leftcornerCats = rrAssoc (leftcornerCats pinfo) + , leftcornerTokens = lctokens + , grammarCats = grammarCats pinfo + , rulesByToken = emptyAssoc -- error "MCFG.PInfo.rulesByToken - no range restriction" + , rulesWithoutTokens = [] -- error "MCFG.PInfo.rulesByToken - no range restriction" + , allRules = allrules -- rrRules (allRules pinfo) + } + + where lctokens = accumAssoc id + [ (rng, rule) | (tok, rules) <- aAssocs (leftcornerTokens pinfo), + inputToken inp ?= tok, + rule@(Rule _ (Cnc _ _ (Lin _ (Tok rng:_) : _))) + <- concatMap (rangeRestrictRule inp) rules ] + + allrules = rrRules $ rulesMatchingInput pinfo inp + + rrAssoc assoc = filterNull $ fmap rrRules assoc + filterNull assoc = assocFilter (not . null) assoc + rrRules rules = concatMap (rangeRestrictRule inp) rules + + +buildMCFPInfo :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t +buildMCFPInfo grammar = + traceCalcFirst grammar $ + tracePrt "MCFG.PInfo - parser info" (prt) $ + MCFPInfo { grammarTokens = grammartokens + , nameRules = namerules + , topdownRules = topdownrules + , epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + , rulesByToken = rulesbytoken + , rulesWithoutTokens = ruleswithouttokens + , allRules = allrules + } + + where allrules = concatMap expandVariants grammar + grammartokens = union (map fst ruletokens) + namerules = accumAssoc id + [ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ] + topdownrules = accumAssoc id + [ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ] + epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ] + leftcorncats = accumAssoc id + [ (cat, rule) | + rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ] + leftcorntoks = accumAssoc id + [ (tok, rule) | + rule@(Rule _ (Cnc _ _ (Lin _ (Tok tok:_) : _))) <- allrules ] + grammarcats = aElems topdownrules + ruletokens = [ (toksoflins lins, rule) | + rule@(Rule _ (Cnc _ _ lins)) <- allrules ] + toksoflins lins = nubsort [ tok | Lin _ syms <- lins, Tok tok <- syms ] + rulesbytoken = accumAssoc id + [ (tok, (rule, toks)) | (tok:toks, rule) <- ruletokens ] + ruleswithouttokens = nubsort [ rule | ([], rule) <- ruletokens ] + + +-- | return only the rules for which all tokens are in the input string +rulesMatchingInput :: Ord t => MCFPInfo c n l t -> Input t -> [MCFRule c n l t] +rulesMatchingInput pinfo inp = + [ rule | tok <- toks, + (rule, ruletoks) <- rulesByToken pinfo ? tok, + ruletoks `subset` toks ] + ++ rulesWithoutTokens pinfo + where toks = aElems (inputToken inp) + + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where + prt pI = "[ tokens=" ++ sl grammarTokens ++ + "; categories=" ++ sl grammarCats ++ + "; nameRules=" ++ sla nameRules ++ + "; tdRules=" ++ sla topdownRules ++ + "; epsilonRules=" ++ sl epsilonRules ++ + "; lcCats=" ++ sla leftcornerCats ++ + "; lcTokens=" ++ sla leftcornerTokens ++ + "; byToken=" ++ sla rulesByToken ++ + "; noTokens=" ++ sl rulesWithoutTokens ++ + "; allRules=" ++ sl allRules ++ + " ]" + + where sl f = show $ length $ f pI + sla f = let (as, bs) = unzip $ aAssocs $ f pI + in show (length as) ++ "/" ++ show (length (concat bs)) + diff --git a/src-3.0/GF/Parsing/MCFG/Range.hs b/src-3.0/GF/Parsing/MCFG/Range.hs new file mode 100644 index 000000000..91671fa00 --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/Range.hs @@ -0,0 +1,206 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- Definitions of ranges, and operations on ranges +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.Range + ( Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange, + LinRec, RangeRec, + makeRangeRec, rangeRestRec, rangeRestrictRule, + projection, unifyRec, substArgRec + ) where + + +-- Haskell +import Data.List +import Control.Monad + +-- GF modules +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities +import GF.Infra.Print +import GF.Data.Assoc ((?)) +import GF.Data.Utilities (updateNthM) + +------------------------------------------------------------ +-- ranges as single pairs + +data Range = Range (Int, Int) + | EmptyRange + deriving (Eq, Ord, Show) + +makeRange :: (Int, Int) -> Range +concatRange :: Range -> Range -> [Range] +rangeEdge :: a -> Range -> Edge a +edgeRange :: Edge a -> Range +minRange :: Range -> Int +maxRange :: Range -> Int + +makeRange = Range +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range(i,j)) (Range(j',k)) = [ Range(i,k) | j==j'] +rangeEdge a (Range(i,j)) = Edge i j a +edgeRange (Edge i j _) = Range (i,j) +minRange (Range rho) = fst rho +maxRange (Range rho) = snd rho + +instance Print Range where + prt (Range (i,j)) = "(" ++ show i ++ "-" ++ show j ++ ")" + prt (EmptyRange) = "(?)" + +{-- Types -------------------------------------------------------------------- + Linearization- and Range records implemented as lists +-----------------------------------------------------------------------------} + +type LinRec c l t = [Lin c l t] + +type RangeRec l = [(l, Range)] + + +{-- Functions ---------------------------------------------------------------- + Concatenation : Concatenation of Ranges, Symbols and Linearizations + and records of Linearizations + Record transformation : Makes a Range record from a fully instantiated + Linearization record + Record projection : Given a label, returns the corresponding Range + Range restriction : Range restriction of Tokens, Symbols, + Linearizations and Records given a list of Tokens + Record replacment : Substitute a record for another in a list of Range + records + Argument substitution : Substitution of a Cat c to a Tok Range, where + Range is the cover of c + Note: The argument is still a Symbol c Range + Subsumation : Checks if a Range record subsumes another Range + record + Record unification : Unification of two Range records +-----------------------------------------------------------------------------} + + +--- Concatenation ------------------------------------------------------------ + + +concSymbols :: [Symbol c Range] -> [[Symbol c Range]] +concSymbols (Tok rng:Tok rng':toks) = do rng'' <- concatRange rng rng' + concSymbols (Tok rng'':toks) +concSymbols (sym:syms) = do syms' <- concSymbols syms + return (sym:syms') +concSymbols [] = return [] + + +concLin :: Lin c l Range -> [Lin c l Range] +concLin (Lin lbl syms) = do syms' <- concSymbols syms + return (Lin lbl syms') + + +concLinRec :: LinRec c l Range -> [LinRec c l Range] +concLinRec = mapM concLin + + +--- Record transformation ---------------------------------------------------- + +makeRangeRec :: LinRec c l Range -> RangeRec l +makeRangeRec lins = map convLin lins + where convLin (Lin lbl [Tok rng]) = (lbl, rng) + convLin (Lin lbl []) = (lbl, EmptyRange) + convLin _ = error "makeRangeRec" + + +--- Record projection -------------------------------------------------------- + +projection :: Ord l => l -> RangeRec l -> [Range] +projection l rec = maybe (fail "projection") return $ lookup l rec + + +--- Range restriction -------------------------------------------------------- + +rangeRestTok :: Ord t => Input t -> t -> [Range] +rangeRestTok toks tok = do rng <- inputToken toks ? tok + return (makeRange rng) + + +rangeRestSym :: Ord t => Input t -> Symbol a t -> [Symbol a Range] +rangeRestSym toks (Tok tok) = do rng <- rangeRestTok toks tok + return (Tok rng) +rangeRestSym _ (Cat c) = return (Cat c) + + +rangeRestLin :: Ord t => Input t -> Lin c l t -> [Lin c l Range] +rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms + concLin (Lin lbl syms') + -- return (Lin lbl syms') + + +rangeRestRec :: Ord t => Input t -> LinRec c l t -> [LinRec c l Range] +rangeRestRec toks = mapM (rangeRestLin toks) + + +rangeRestrictRule :: Ord t => Input t -> MCFRule c n l t -> [MCFRule c n l Range] +rangeRestrictRule toks (Rule abs (Cnc l ls lins)) = liftM (Rule abs . Cnc l ls) $ + rangeRestRec toks lins + +--- Argument substitution ---------------------------------------------------- + +substArgSymbol :: Ord l => Int -> RangeRec l -> Symbol (c, l, Int) Range + -> Symbol (c, l, Int) Range +substArgSymbol i rec tok@(Tok rng) = tok +substArgSymbol i rec cat@(Cat (c, l, j)) + | i==j = maybe err Tok $ lookup l rec + | otherwise = cat + where err = error "substArg: Label not in range-record" + +substArgLin :: Ord l => Int -> RangeRec l -> Lin c l Range + -> [Lin c l Range] +substArgLin i rec (Lin lbl syms) = + concLin (Lin lbl (map (substArgSymbol i rec) syms)) + + +substArgRec :: Ord l => Int -> RangeRec l -> LinRec c l Range + -> [LinRec c l Range] +substArgRec i rec lins = mapM (substArgLin i rec) lins + + +-- Record unification & replacment --------------------------------------------------------- + +unifyRec :: Ord l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]] +unifyRec recs i rec = updateNthM update i recs + where update rec' = guard (subsumes rec' rec) >> return rec + +-- unifyRec recs i rec = do guard $ subsumes (recs !! i) rec +-- return $ replaceRec recs i rec + +replaceRec :: [RangeRec l] -> Int -> RangeRec l -> [RangeRec l] +replaceRec recs i rec = before ++ (rec : after) + where (before, _ : after) = splitAt i recs + +subsumes :: Ord l => RangeRec l -> RangeRec l -> Bool +subsumes rec rec' = and [r `elem` rec' | r <- rec] +-- subsumes rec rec' = all (`elem` rec') rec + + +{- +--- Record unification ------------------------------------------------------- +unifyRangeRecs :: Ord l => [RangeRec l] -> [RangeRec l] -> [[RangeRec l]] +unifyRangeRecs recs recs' = zipWithM unify recs recs' + where unify :: Ord l => RangeRec l -> RangeRec l -> [RangeRec l] + unify rec [] = return rec + unify [] rec = return rec + unify rec1'@(p1@(l1, r1):rec1) rec2'@(p2@(l2, r2):rec2) + = case compare l1 l2 of + LT -> do rec3 <- unify rec1 rec2' + return (p1:rec3) + GT -> do rec3 <- unify rec1' rec2 + return (p2:rec3) + EQ -> do guard (r1 == r2) + rec3 <- unify rec1 rec2 + return (p1:rec3) +-} diff --git a/src-3.0/GF/Parsing/MCFG/ViaCFG.hs b/src-3.0/GF/Parsing/MCFG/ViaCFG.hs new file mode 100644 index 000000000..9204ea9f1 --- /dev/null +++ b/src-3.0/GF/Parsing/MCFG/ViaCFG.hs @@ -0,0 +1,186 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.4 $ +-- +-- MCFG parsing, through context-free approximation +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.ViaCFG where + + +-- Haskell modules +import Data.List +import Control.Monad + +-- GF modules +import ConvertMCFGtoDecoratedCFG +import qualified DecoratedCFParser as CFP +import qualified DecoratedGrammar as CFG +import Examples +import GF.OldParsing.GeneralChart +import qualified GF.OldParsing.MCFGrammar as MCFG +import MCFParser +import Nondet +import Parser +import GF.Parsing.MCFG.Range + + +{-- Datatypes ----------------------------------------------------------------- +Chart +Item +Key + + + Item : Four different Items are used. PreMCFG for MCFG Pre Items, Pre are + the Items returned by the pre-Functions and Mark are the + corresponding Items for the mark-Functions. For convenience correctly + marked Mark Items are converted to Passive Items. +I use dottedrule for convenience to keep track of wich daughter's RangeRec to look for. + AChart: A RedBlackMap with Items and Keys + AKey : +------------------------------------------------------------------------------} + +--Ev ta bort några typer av Item och bara nyckla på det som är unikt för den typen... +data Item n c l = PreMCFG (n, c) (RangeRec l) [RangeRec l] + | Pre (n, c) (RangeRec l) [l] [RangeRec l] + | Mark (n, c) (RangeRec l) (RangeRec l) [RangeRec l] + | Passive (n, c) (RangeRec l) (RangeRec l) + deriving (Eq, Ord, Show) + +type AChart n c l = ParseChart (Item n c l) (AKey n c l) + +data AKey n c l = Pr (n, c) l + | Pm (n, c) l + | Mk (RangeRec l) + | Ps (RangeRec l) + | Useless + deriving (Eq, Ord, Show) + + +{-- Parsing ------------------------------------------------------------------- + recognize: + parse : The Agenda consists of the Passive Items from context-free + approximation (as PreMCFG Items) and the Pre Items inferred by + pre-prediction. + keyof : Given an Item returns an appropriate Key for the Chart +------------------------------------------------------------------------------} + +recognize strategy mcfg toks = chartMember (parse strategy mcfg toks) + (Passive ("f", S) + [("s" , MCFG.Range (0, n))] + [("p" , MCFG.Range (0, n2)), ("q", MCFG.Range (n2, n))]) + (Ps [("s" , MCFG.Range (0, n))]) + where n = length toks + n2 = n `div` 2 + + +--parse :: (Ord n, Ord NT, Ord String, Eq t) => CFP.Strategy -> MCFG.Grammar n NT String t -> [t] +-- -> AChart n NT String +parse strategy mcfg toks + = buildChart keyof + [preCombine, markPredict, markCombine, convert] + (makePreItems (CFP.parse strategy (CFG.pInfo (convertGrammar mcfg)) [(S, "s")] toks) ++ + (prePredict mcfg)) + + +keyof :: Item n c l -> AKey n c l +keyof (PreMCFG head [(lbl, rng)] _) = Pm head lbl +keyof (Pre head _ (lbl:lbls) _) = Pr head lbl +keyof (Mark _ _ _ (rec:recs)) = Mk rec +keyof (Passive _ rec _) = Ps rec +keyof _ = Useless + + +{-- Initializing agenda ------------------------------------------------------- + makePreItems: +------------------------------------------------------------------------------} + +makePreItems :: (Eq c, Ord i) => CFG.Grammar n (Edge (c, l)) i t -> [Item n c l] +makePreItems cfchart + = [ PreMCFG (fun, cat) [(lbl, MCFG.makeRange (i, j))] (symToRec beta) | + CFG.Rule (Edge i j (cat,lbl)) beta fun <- cfchart ] + + +prePredict :: (Ord n, Ord c, Ord l) => MCFG.Grammar n c l t -> [Item n c l] +prePredict mcfg = + [ Pre (f, nt) [] (getLables lins) (replicate (nrOfCats (head lins)) []) | + MCFG.Rule nt nts lins f <- mcfg ] + + +{-- Inference rules --------------------------------------------------------- + prePredict : + preCombine : + markPredict: + markCombine: + convert : +----------------------------------------------------------------------------} + +preCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +preCombine chart (Pre head rec (l:ls) recs) = + [ Pre head (rec ++ [(l, r)]) ls recs'' | + PreMCFG head [(l, r)] recs' <- chartLookup chart (Pm head l), + recs'' <- solutions (unifyRangeRecs recs recs') ] +preCombine chart (PreMCFG head [(l, r)] recs) = + [ Pre head (rec ++ [(l, r)]) ls recs'' | + Pre head rec (l:ls) recs' <- chartLookup chart (Pr head l), + recs'' <- solutions (unifyRangeRecs recs recs') ] +preCombine _ _ = [] + + +markPredict :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +markPredict _ (Pre (n, c) rec [] recs) = [Mark (n, c) rec [] recs] +markPredict _ _ = [] + + +markCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +markCombine chart (Mark (f, c) rec mRec (r:recs)) = + [ Mark (f, c) rec (mRec ++ r) recs | + Passive _ r _ <- chartLookup chart (Ps r)] +markCombine chart (Passive _ r _) = + [ Mark (f, c) rec (mRec++r) recs | + Mark (f, c) rec mRec (r:recs) <- chartLookup chart (Mk r) ] +markCombine _ _ = [] + + +convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +convert _ (Mark (f, c) r rec []) = [Passive (f, c) r rec] +convert _ _ = [] + + +{-- Help functions ---------------------------------------------------------------- + getRHS : + getLables: + symToRec : +----------------------------------------------------------------------------------} + +-- FULKOD ! +nrOfCats :: Eq c => MCFG.Lin c l t -> Int +nrOfCats (MCFG.Lin l syms) = length $ nub [(c, i) | Cat (c, l, i) <- syms] + + +-- +getLables :: LinRec c l t -> [l] +getLables lins = [l | MCFG.Lin l syms <- lins] + + +-- +symToRec :: Ord i => [Symbol (Edge (c, l), i) d] -> [[(l, MCFG.Range)]] +symToRec beta = map makeLblRng $ groupBy (\(_, d) (_, d') -> (d == d')) + $ sortBy sBd [(Edge i j (c, l) , d) | Cat (Edge i j (c, l), d) + <- beta] + where makeLblRng edges = [(l, (MCFG.makeRange (i, j))) | (Edge i j (_, l), _) + <- edges] + sBd (_, d) (_, d') + | d < d' = LT + | d > d' = GT + | otherwise = EQ |
