summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Parsing
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Parsing
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Parsing')
-rw-r--r--src-3.0/GF/Parsing/CF.hs66
-rw-r--r--src-3.0/GF/Parsing/CFG.hs51
-rw-r--r--src-3.0/GF/Parsing/CFG/General.hs103
-rw-r--r--src-3.0/GF/Parsing/CFG/Incremental.hs150
-rw-r--r--src-3.0/GF/Parsing/CFG/PInfo.hs98
-rw-r--r--src-3.0/GF/Parsing/FCFG.hs100
-rw-r--r--src-3.0/GF/Parsing/FCFG/Active.hs179
-rw-r--r--src-3.0/GF/Parsing/FCFG/Incremental.hs107
-rw-r--r--src-3.0/GF/Parsing/FCFG/PInfo.hs121
-rw-r--r--src-3.0/GF/Parsing/FCFG/Range.hs50
-rw-r--r--src-3.0/GF/Parsing/GFC.hs208
-rw-r--r--src-3.0/GF/Parsing/MCFG.hs68
-rw-r--r--src-3.0/GF/Parsing/MCFG/Active.hs318
-rw-r--r--src-3.0/GF/Parsing/MCFG/Active2.hs237
-rw-r--r--src-3.0/GF/Parsing/MCFG/FastActive.hs176
-rw-r--r--src-3.0/GF/Parsing/MCFG/Incremental.hs178
-rw-r--r--src-3.0/GF/Parsing/MCFG/Incremental2.hs157
-rw-r--r--src-3.0/GF/Parsing/MCFG/Naive.hs142
-rw-r--r--src-3.0/GF/Parsing/MCFG/PInfo.hs162
-rw-r--r--src-3.0/GF/Parsing/MCFG/Range.hs206
-rw-r--r--src-3.0/GF/Parsing/MCFG/ViaCFG.hs186
21 files changed, 3063 insertions, 0 deletions
diff --git a/src-3.0/GF/Parsing/CF.hs b/src-3.0/GF/Parsing/CF.hs
new file mode 100644
index 000000000..1a65f6caf
--- /dev/null
+++ b/src-3.0/GF/Parsing/CF.hs
@@ -0,0 +1,66 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.4 $
+--
+-- Chart parsing of grammars in CF format
+-----------------------------------------------------------------------------
+
+module GF.Parsing.CF (parse) where
+
+import GF.Data.Operations (errVal)
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Data.SortedList (nubsort)
+import GF.Data.Assoc
+import qualified GF.CF.CF as CF
+import qualified GF.CF.CFIdent as CFI
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import qualified GF.Parsing.CFG as P
+
+type Token = CFI.CFTok
+type Name = CFI.CFFun
+type Category = CFI.CFCat
+
+parse :: String -> CF.CF -> Category -> CF.CFParser
+parse = buildParser . errVal (errVal undefined (P.parseCF "")) . P.parseCF
+
+buildParser :: P.CFParser Category Name Token -> CF.CF -> Category -> CF.CFParser
+buildParser parser cf start tokens = (parseResults, parseInformation)
+ where parseInformation = prtSep "\n" trees
+ parseResults = [ (tree2cfTree t, []) | t <- trees ]
+ theInput = input tokens
+ edges = tracePrt "Parsing.CF - nr. edges" (prt.length) $
+ parser pInf [start] theInput
+ chart = tracePrt "Parsing.CF - sz. chart" (prt . map (length.snd) . aAssocs) $
+ grammar2chart $ map addCategory edges
+ forests = tracePrt "Parsing.CF - nr. forests" (prt.length) $
+ chart2forests chart (const False)
+ [ uncurry Edge (inputBounds theInput) start ]
+ trees = tracePrt "Parsing.CF - nr. trees" (prt.length) $
+ concatMap forest2trees forests
+ pInf = P.buildCFPInfo $ cf2grammar cf (nubsort tokens)
+
+
+addCategory (CFRule cat rhs name) = CFRule cat rhs (name, cat)
+
+tree2cfTree (TNode (name, Edge _ _ cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees))
+
+cf2grammar :: CF.CF -> [Token] -> CFGrammar Category Name Token
+cf2grammar cf tokens = [ CFRule cat rhs name |
+ (name, (cat, rhs0)) <- cfRules,
+ rhs <- mapM item2symbol rhs0 ]
+ where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++
+ CF.rulesOfCF cf
+ item2symbol (CF.CFNonterm cat) = [Cat cat]
+ item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens
+
+
diff --git a/src-3.0/GF/Parsing/CFG.hs b/src-3.0/GF/Parsing/CFG.hs
new file mode 100644
index 000000000..f64ce55f1
--- /dev/null
+++ b/src-3.0/GF/Parsing/CFG.hs
@@ -0,0 +1,51 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/11 10:28:16 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- CFG parsing
+-----------------------------------------------------------------------------
+
+module GF.Parsing.CFG
+ (parseCF, module GF.Parsing.CFG.PInfo) where
+
+import GF.Data.Operations (Err(..))
+
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.Parsing.CFG.PInfo
+
+import qualified GF.Parsing.CFG.Incremental as Inc
+import qualified GF.Parsing.CFG.General as Gen
+
+----------------------------------------------------------------------
+-- parsing
+
+parseCF :: (Ord n, Ord c, Ord t) => String -> Err (CFParser c n t)
+
+parseCF "bottomup" = Ok $ Gen.parse bottomup
+parseCF "topdown" = Ok $ Gen.parse topdown
+
+parseCF "gb" = Ok $ Gen.parse bottomup
+parseCF "gt" = Ok $ Gen.parse topdown
+parseCF "ib" = Ok $ Inc.parse (bottomup, noFilter)
+parseCF "it" = Ok $ Inc.parse (topdown, noFilter)
+parseCF "ibFT" = Ok $ Inc.parse (bottomup, topdown)
+parseCF "ibFB" = Ok $ Inc.parse (bottomup, bottomup)
+parseCF "ibFTB" = Ok $ Inc.parse (bottomup, bothFilters)
+parseCF "itF" = Ok $ Inc.parse (topdown, bottomup)
+
+-- error parser:
+parseCF prs = Bad $ "CFG parsing strategy not defined: " ++ prs
+
+bottomup = (True, False)
+topdown = (False, True)
+noFilter = (False, False)
+bothFilters = (True, True)
+
+
diff --git a/src-3.0/GF/Parsing/CFG/General.hs b/src-3.0/GF/Parsing/CFG/General.hs
new file mode 100644
index 000000000..4f5959a85
--- /dev/null
+++ b/src-3.0/GF/Parsing/CFG/General.hs
@@ -0,0 +1,103 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:08 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.4 $
+--
+-- CFG parsing with a general chart
+-----------------------------------------------------------------------------
+
+module GF.Parsing.CFG.General
+ (parse, Strategy) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.Parsing.CFG.PInfo
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+import Control.Monad
+
+parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
+parse strategy grammar start = extract .
+ tracePrt "Parsing.CFG.General - size internal of chart"
+ (prt . length . chartList) .
+ process strategy grammar start
+
+-- | parsing strategy: (isBottomup, isTopdown)
+type Strategy = (Bool, Bool)
+
+extract :: (Ord n, Ord c, Ord t) =>
+ IChart n (Symbol c t) -> CFChart c n t
+extract chart = [ CFRule (Edge j k cat) daughters name |
+ Edge j k (Cat cat, found, [], Just name) <- chartList chart,
+ daughters <- path j k (reverse found) ]
+ where path i k [] = [ [] | i==k ]
+ path i k (Tok tok : found)
+ = [ Tok tok : daughters |
+ daughters <- path (i+1) k found ]
+ path i k (Cat cat : found)
+ = [ Cat (Edge i j cat) : daughters |
+ Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i),
+ daughters <- path j k found ]
+
+
+process :: (Ord n, Ord c, Ord t) =>
+ Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool)
+ -> CFPInfo c n t -- ^ parser information (= grammar)
+ -> [c] -- ^ list of starting categories
+ -> Input t -- ^ input string
+ -> IChart n (Symbol c t)
+process (isBottomup, isTopdown) grammar start
+ = trace2 "Parsing.CFG.General - strategy" ((if isBottomup then " BU" else "") ++
+ (if isTopdown then " TD" else "")) $
+ buildChart keyof [predict, combine] . axioms
+ where axioms input = initial ++ scan input
+
+ scan input = map (fmap mkEdge) (inputEdges input)
+ mkEdge tok = (Tok tok, [], [], Nothing)
+
+ -- the combine rule
+ combine chart (Edge j k (next, _, [], _))
+ = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ]
+ combine chart edge@(Edge _ j (_, _, next:_, _))
+ = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ]
+
+ -- initial predictions
+ initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ]
+
+ -- predictions
+ predict chart (Edge j k (next, _, [], _)) | isBottomup
+ = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ]
+ -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward
+ predict chart (Edge _ k (_, _, Cat cat:_, _))
+ = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ]
+ predict _ _ = []
+
+ tdRuleLookup | isTopdown = topdownRules grammar
+ | isBottomup = emptyLeftcornerRules grammar
+
+-- internal representation of parse items
+
+type Item n s = Edge (s, [s], [s], Maybe n)
+type IChart n s = ParseChart (Item n s) (IKey s)
+data IKey s = Active s Int
+ | Passive s Int
+ deriving (Eq, Ord, Show)
+
+keyof (Edge _ j (_, _, next:_, _)) = Active next j
+keyof (Edge j _ (cat, _, [], _)) = Passive cat j
+
+forwardTo (Edge i j (cat, found, next:tofind, name)) k
+ = Edge i k (cat, next:found, tofind, name)
+
+loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name)
+
+
+
diff --git a/src-3.0/GF/Parsing/CFG/Incremental.hs b/src-3.0/GF/Parsing/CFG/Incremental.hs
new file mode 100644
index 000000000..adab2b73c
--- /dev/null
+++ b/src-3.0/GF/Parsing/CFG/Incremental.hs
@@ -0,0 +1,150 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:09 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.4 $
+--
+-- Incremental chart parsing for CFG
+-----------------------------------------------------------------------------
+
+
+module GF.Parsing.CFG.Incremental
+ (parse, Strategy) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Data.Array
+
+import GF.Data.Operations
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.Parsing.CFG.PInfo
+import GF.Data.IncrementalDeduction
+
+
+-- | parsing strategy: (predict:(BU, TD), filter:(BU, TD))
+type Strategy = ((Bool, Bool), (Bool, Bool))
+
+parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
+parse strategy grammar start = extract .
+ tracePrt "Parsing.CFG.Incremental - size of internal chart"
+ (prt . length . flip chartList const) .
+ process strategy grammar start
+
+extract :: (Ord n, Ord c, Ord t) =>
+ IChart c n t -> CFChart c n t
+extract finalChart = [ CFRule (Edge j k cat) daughters name |
+ (k, Item j (CFRule cat [] name) found) <- chartList finalChart (,),
+ daughters <- path j k (reverse found) ]
+ where path i k [] = [ [] | i==k ]
+ path i k (Tok tok : found)
+ = [ Tok tok : daughters |
+ daughters <- path (i+1) k found ]
+ path i k (Cat cat : found)
+ = [ Cat (Edge i j cat) : daughters |
+ Item j _ _ <- chartLookup finalChart i (Passive cat),
+ daughters <- path j k found ]
+
+process :: (Ord n, Ord c, Ord t) =>
+ Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t
+process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input
+ = trace2 "Parsing.CFG.Incremental - strategy" ((if isPredictBU then "BU-predict " else "") ++
+ (if isPredictTD then "TD-predict " else "") ++
+ (if isFilterBU then "BU-filter " else "") ++
+ (if isFilterTD then "TD-filter " else "")) $
+ finalChart
+ where finalChart = buildChart keyof rules axioms $ inputBounds input
+
+ axioms 0 = union $ map (tdInfer 0) start
+ axioms k = union [ buInfer j k (Tok token) |
+ (token, js) <- aAssocs (inputTo input ! k), j <- js ]
+
+ rules k (Item j (CFRule cat [] _) _)
+ = buInfer j k (Cat cat)
+ rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found)
+ = tdInfer k next <++>
+ -- hack for empty rules:
+ [ Item j (forward rule) (sym:found) |
+ emptyCategories grammar ?= next ]
+ rules _ _ = []
+
+ buInfer j k next = buPredict j k next <++> buCombine j k next
+ tdInfer k next = tdPredict k next
+
+ -- the combine rule
+ buCombine j k next
+ | j == k = [] -- hack for empty rules, see rules above and tdPredict below
+ | otherwise = [ Item i (forward rule) (next:found) |
+ Item i rule found <- (finalChart ! j) ? Active next ]
+
+ -- kilbury bottom-up prediction
+ buPredict j k next
+ = [ Item j rule [next] | isPredictBU,
+ rule <- map forward $ bottomupRules grammar ? next,
+ buFilter rule k,
+ tdFilter rule j k ]
+
+ -- top-down prediction
+ tdPredict k cat
+ = [ Item k rule [] | isPredictTD || isFilterTD,
+ rule <- topdownRules grammar ? cat,
+ buFilter rule k ] <++>
+ -- hack for empty rules:
+ [ Item k rule [] | isPredictBU,
+ rule <- emptyLeftcornerRules grammar ? cat ]
+
+ -- bottom up filtering: input symbol k can begin the given symbol list (first set)
+ -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!!
+ buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU
+ = k < snd (inputBounds input) &&
+ hasCommonElements (leftcornerTokens grammar ? cat)
+ (aElems (inputFrom input ! k))
+ buFilter _ _ = True
+
+ -- top down filtering: 'cat' is reachable by an active edge ending in node j < k
+ tdFilter (CFRule cat _ _) j k | isFilterTD && j < k
+ = (tdFilters ! j) ?= cat
+ tdFilter _ _ _ = True
+
+ tdFilters = listArray (inputBounds input) $
+ map (listSet . limit leftCats . activeCats) [0..]
+ activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ]
+ leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ]
+
+
+----------------------------------------------------------------------
+-- type declarations, items & keys
+
+data Item c n t = Item Int (CFRule c n t) [Symbol c t]
+ deriving (Eq, Ord, Show)
+
+data IKey c t = Active (Symbol c t) | Passive c
+ deriving (Eq, Ord, Show)
+
+type IChart c n t = IncrementalChart (Item c n t) (IKey c t)
+
+keyof :: Item c n t -> IKey c t
+keyof (Item _ (CFRule _ (next:_) _) _) = Active next
+keyof (Item _ (CFRule cat [] _) _) = Passive cat
+
+forward :: CFRule c n t -> CFRule c n t
+forward (CFRule cat (_:rest) name) = CFRule cat rest name
+
+----------------------------------------------------------------------
+
+instance (Print n, Print c, Print t) => Print (Item c n t) where
+ prt (Item k rule syms)
+ = "<"++show k++ ": "++ prt rule++" / "++prt syms++">"
+
+instance (Print c, Print t) => Print (IKey c t) where
+ prt (Active sym) = "?" ++ prt sym
+ prt (Passive cat) = "!" ++ prt cat
+
+
diff --git a/src-3.0/GF/Parsing/CFG/PInfo.hs b/src-3.0/GF/Parsing/CFG/PInfo.hs
new file mode 100644
index 000000000..f877b225e
--- /dev/null
+++ b/src-3.0/GF/Parsing/CFG/PInfo.hs
@@ -0,0 +1,98 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/09 09:28:45 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- CFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module GF.Parsing.CFG.PInfo
+ (CFParser, CFPInfo(..), buildCFPInfo) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.Data.SortedList
+import GF.Data.Assoc
+
+----------------------------------------------------------------------
+-- type declarations
+
+-- | the list of categories = possible starting categories
+type CFParser c n t = CFPInfo c n t
+ -> [c]
+ -> Input t
+ -> CFChart c n t
+
+------------------------------------------------------------
+-- parser information
+
+data CFPInfo c n t
+ = CFPInfo { grammarTokens :: SList t,
+ nameRules :: Assoc n (SList (CFRule c n t)),
+ topdownRules :: Assoc c (SList (CFRule c n t)),
+ bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
+ emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
+ emptyCategories :: Set c,
+ cyclicCategories :: SList c,
+ -- ^ ONLY FOR DIRECT CYCLIC RULES!!!
+ leftcornerTokens :: Assoc c (SList t)
+ -- ^ DOES NOT WORK WITH EMPTY RULES!!!
+ }
+
+buildCFPInfo :: (Ord c, Ord n, Ord t) => CFGrammar c n t -> CFPInfo c n t
+
+-- this is not permanent...
+buildCFPInfo grammar = traceCalcFirst grammar $
+ tracePrt "CFG.PInfo - parser info" (prt) $
+ pInfo' (filter (not . isCyclic) grammar)
+
+pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks
+ where grToks = union [ nubsort [ tok | Tok tok <- rhs ] |
+ CFRule _ rhs _ <- grammar ]
+ nmRules = accumAssoc id [ (name, rule) |
+ rule@(CFRule _ _ name) <- grammar ]
+ tdRules = accumAssoc id [ (cat, rule) |
+ rule@(CFRule cat _ _) <- grammar ]
+ buRules = accumAssoc id [ (next, rule) |
+ rule@(CFRule _ (next:_) _) <- grammar ]
+ elcRules = accumAssoc id $ limit lc emptyRules
+ leftToks = accumAssoc id $ limit lc $
+ nubsort [ (cat, token) |
+ CFRule cat (Tok token:_) _ <- grammar ]
+ lc (left, res) = nubsort [ (cat, res) |
+ CFRule cat _ _ <- buRules ? Cat left ]
+ emptyRules = nubsort [ (cat, rule) |
+ rule@(CFRule cat [] _) <- grammar ]
+ emptyCats = listSet $ limitEmpties $ map fst emptyRules
+ limitEmpties es = if es==es' then es else limitEmpties es'
+ where es' = nubsort [ cat | CFRule cat rhs _ <- grammar,
+ all (symbol (\e -> e `elem` es) (const False)) rhs ]
+ cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ]
+
+isCyclic (CFRule cat [Cat cat'] _) = cat==cat'
+isCyclic _ = False
+
+
+----------------------------------------------------------------------
+-- pretty-printing of statistics
+
+instance (Ord c, Ord n, Ord t) => Print (CFPInfo c n t) where
+ prt pI = "[ tokens=" ++ sl grammarTokens ++
+ "; names=" ++ sla nameRules ++
+ "; tdCats=" ++ sla topdownRules ++
+ "; buCats=" ++ sla bottomupRules ++
+ "; elcCats=" ++ sla emptyLeftcornerRules ++
+ "; eCats=" ++ sla emptyCategories ++
+ -- "; cCats=" ++ sl cyclicCategories ++
+ -- "; lctokCats=" ++ sla leftcornerTokens ++
+ " ]"
+ where sla f = show $ length $ aElems $ f pI
+ sl f = show $ length $ f pI
diff --git a/src-3.0/GF/Parsing/FCFG.hs b/src-3.0/GF/Parsing/FCFG.hs
new file mode 100644
index 000000000..30a7801c8
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG.hs
@@ -0,0 +1,100 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- FCFG parsing
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG
+ (parseFCF,buildFCFPInfo,FCFPInfo(..),makeFinalEdge) where
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+
+import GF.Infra.PrintClass
+
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+
+import qualified GF.Parsing.FCFG.Active as Active
+import GF.Parsing.FCFG.PInfo
+
+import GF.GFCC.DataGFCC
+import GF.GFCC.CId
+import GF.GFCC.Macros
+import GF.Data.ErrM
+
+import qualified Data.Map as Map
+
+----------------------------------------------------------------------
+-- parsing
+
+-- main parsing function
+
+parseFCF ::
+ String -> -- ^ parsing strategy
+ FCFPInfo -> -- ^ compiled grammar (fcfg)
+ CId -> -- ^ starting category
+ [String] -> -- ^ input tokens
+ Err [Exp] -- ^ resulting GF terms
+parseFCF strategy pinfo startCat inString =
+ do let inTokens = input inString
+ startCats <- Map.lookup startCat (startupCats pinfo)
+ fcfParser <- {- trace lctree $ -} parseFCF strategy
+ let chart = fcfParser pinfo startCats inTokens
+ (i,j) = inputBounds inTokens
+ finalEdges = [makeFinalEdge cat i j | cat <- startCats]
+ forests = map cnv_forests $ chart2forests chart (const False) finalEdges
+ filteredForests = forests >>= applyProfileToForest
+ trees = nubsort $ filteredForests >>= forest2trees
+ return $ map tree2term trees
+ where
+ parseFCF :: String -> Err (FCFParser)
+ parseFCF "bottomup" = Ok $ Active.parse "b"
+ parseFCF "topdown" = Ok $ Active.parse "t"
+ parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat
+
+
+cnv_forests FMeta = FMeta
+cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
+cnv_forests (FString x) = FString x
+cnv_forests (FInt x) = FInt x
+cnv_forests (FFloat x) = FFloat x
+
+cnv_profile (Unify x) = Unify x
+cnv_profile (Constant x) = Constant (cnv_forests2 x)
+
+cnv_forests2 FMeta = FMeta
+cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
+cnv_forests2 (FString x) = FString x
+cnv_forests2 (FInt x) = FInt x
+cnv_forests2 (FFloat x) = FFloat x
+
+----------------------------------------------------------------------
+-- parse trees to GFCC terms
+
+tree2term :: SyntaxTree CId -> Exp
+tree2term (TNode f ts) = tree (AC f) (map tree2term ts)
+
+tree2term (TString s) = tree (AS s) []
+tree2term (TInt n) = tree (AI n) []
+tree2term (TFloat f) = tree (AF f) []
+tree2term (TMeta) = exp0
+
+----------------------------------------------------------------------
+-- conversion and unification of forests
+
+-- simplest implementation
+applyProfileToForest :: SyntaxForest FName -> [SyntaxForest CId]
+applyProfileToForest (FNode name@(Name fun profile) children)
+ | isCoercionF name = concat chForests
+ | otherwise = [ FNode fun chForests | not (null chForests) ]
+ where chForests = concat [ applyProfileM unifyManyForests profile forests |
+ forests0 <- children,
+ forests <- mapM applyProfileToForest forests0 ]
+applyProfileToForest (FString s) = [FString s]
+applyProfileToForest (FInt n) = [FInt n]
+applyProfileToForest (FFloat f) = [FFloat f]
+applyProfileToForest (FMeta) = [FMeta]
diff --git a/src-3.0/GF/Parsing/FCFG/Active.hs b/src-3.0/GF/Parsing/FCFG/Active.hs
new file mode 100644
index 000000000..df55793f8
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/Active.hs
@@ -0,0 +1,179 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- MCFG parsing, the active algorithm
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.Active (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+import GF.Data.SortedList
+import GF.Data.Utilities
+
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+
+import GF.Infra.PrintClass
+
+import GF.Parsing.FCFG.Range
+import GF.Parsing.FCFG.PInfo
+
+import Control.Monad (guard)
+
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Data.Array
+
+----------------------------------------------------------------------
+-- * parsing
+
+parse :: String -> FCFParser
+parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
+ where chart = process strategy pinfo toks axioms emptyXChart
+ axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
+ | isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
+
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec
+emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
+ where
+ FRule _ rhs _ _ = allRules pinfo ! ruleid
+
+process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
+process strategy pinfo toks [] chart = chart
+process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
+ where
+ univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat c r d -> case recs !! d of
+ [] -> case insertXChart chart item c of
+ Nothing -> chart
+ Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
+ rng <- concatRange rng (found' !! r)
+ return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
+ ++
+ do guard (isTD strategy)
+ ruleid <- topdownRules pinfo ? c
+ return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
+ in process strategy pinfo toks items chart
+ found' -> let items = do rng <- concatRange rng (found' !! r)
+ return (c, Active found rng lbl (ppos+1) node)
+ in process strategy pinfo toks items chart
+ FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
+ rng' <- concatRange rng (makeRange i j)
+ return (cat, Active found rng' lbl (ppos+1) node)
+ in process strategy pinfo toks items chart
+ | otherwise =
+ if inRange (bounds lins) (lbl+1)
+ then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
+ else univRule cat (Final (reverse (rng:found)) node) chart
+ where
+ (FRule fn _ cat lins) = allRules pinfo ! ruleid
+ lin = lins ! lbl
+ univRule cat item@(Final found' node) chart =
+ case insertXChart chart item cat of
+ Nothing -> chart
+ Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
+ let FRule _ _ _ lins = allRules pinfo ! ruleid
+ FSymCat cat r d = lins ! l ! ppos
+ rng <- concatRange rng (found' !! r)
+ return (cat, Active found rng l (ppos+1) (updateChildren node d found'))
+ ++
+ do guard (isBU strategy)
+ ruleid <- leftcornerCats pinfo ? cat
+ let FRule _ _ _ lins = allRules pinfo ! ruleid
+ FSymCat cat r d = lins ! 0 ! 0
+ return (cat, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
+
+ updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
+ updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
+ in process strategy pinfo toks items chart
+
+----------------------------------------------------------------------
+-- * XChart
+
+data Item
+ = Active RangeRec
+ Range
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !FPointPos
+ (SyntaxNode RuleId RangeRec)
+ | Final RangeRec (SyntaxNode RuleId RangeRec)
+ deriving (Eq, Ord)
+
+data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
+
+emptyXChart :: Ord c => XChart c
+emptyXChart = XChart emptyChart emptyChart
+
+insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
+ case chartInsert actives item c of
+ Nothing -> Nothing
+ Just actives -> Just (XChart actives finals)
+
+insertXChart (XChart actives finals) item@(Final _ _) c =
+ case chartInsert finals item c of
+ Nothing -> Nothing
+ Just finals -> Just (XChart actives finals)
+
+lookupXChartAct (XChart actives finals) c = chartLookup actives c
+lookupXChartFinal (XChart actives finals) c = chartLookup finals c
+
+xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec)
+xchart2syntaxchart (XChart actives finals) pinfo =
+ accumAssoc groupSyntaxNodes $
+ [ case node of
+ SNode ruleid rrecs -> let FRule fun rhs cat _ = allRules pinfo ! ruleid
+ in ((cat,found), SNode fun (zip rhs rrecs))
+ SString s -> ((cat,found), SString s)
+ SInt n -> ((cat,found), SInt n)
+ SFloat f -> ((cat,found), SFloat f)
+ | (cat, Final found node) <- chartAssocs finals
+ ]
+
+literals :: FCFPInfo -> Input FToken -> [(FCat,Item)]
+literals pinfo toks =
+ [let (c,node) = lexer t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)]
+ where
+ lexer t =
+ case reads t of
+ [(n,"")] -> (fcatInt, SInt (n::Integer))
+ _ -> case reads t of
+ [(f,"")] -> (fcatFloat, SFloat (f::Double))
+ _ -> (fcatString,SString t)
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- called with all starting categories
+initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
+initialTD pinfo starts toks =
+ do cat <- starts
+ ruleid <- topdownRules pinfo ? cat
+ return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)]
+initialBU pinfo toks =
+ do (tok,rngs) <- aAssocs (inputToken toks)
+ ruleid <- leftcornerTokens pinfo ? tok
+ let FRule _ _ cat _ = allRules pinfo ! ruleid
+ (i,j) <- rngs
+ return (cat,Active [] (makeRange i j) 0 1 (emptyChildren ruleid pinfo))
+ ++
+ do ruleid <- epsilonRules pinfo
+ let FRule _ _ cat _ = allRules pinfo ! ruleid
+ return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
diff --git a/src-3.0/GF/Parsing/FCFG/Incremental.hs b/src-3.0/GF/Parsing/FCFG/Incremental.hs
new file mode 100644
index 000000000..5ee77a061
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/Incremental.hs
@@ -0,0 +1,107 @@
+module GF.Parsing.FCFG.Incremental where
+
+import Data.Array
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.Assoc
+import GF.Data.GeneralDeduction
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+import GF.Parsing.FCFG.PInfo
+import GF.Parsing.FCFG.Range
+import GF.GFCC.CId
+import Debug.Trace
+
+initState :: FCFPInfo -> CId -> State
+initState pinfo start =
+ let items = do
+ starts <- Map.lookup start (startupCats pinfo)
+ c <- starts
+ ruleid <- topdownRules pinfo ? c
+ let (FRule fn args cat lins) = allRules pinfo ! ruleid
+ lbl <- indices lins
+ return (Active 0 lbl 0 ruleid args cat)
+
+ forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ args cat _) <- assocs (allRules pinfo)]
+
+ max_fid = case IntMap.maxViewWithKey forest of
+ Just ((fid,_), _) -> fid+1
+ Nothing -> 0
+
+ in process pinfo items (State emptyChart [] emptyChart Map.empty forest max_fid 0)
+
+nextState :: FCFPInfo -> FToken -> State -> State
+nextState pinfo t state =
+ process pinfo (chartLookup (tokens state) t) state{ chart=emptyChart
+ , charts=chart state : charts state
+ , tokens=emptyChart
+ , passive=Map.empty
+ , currOffset=currOffset state+1
+ }
+
+getCompletions :: State -> FToken -> [FToken]
+getCompletions state w =
+ [t | t <- chartKeys (tokens state), take (length w) t == w]
+
+process pinfo [] state = state
+process pinfo (item@(Active j lbl ppos ruleid args fid0):xitems) state
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat _ r d -> let fid = args !! d
+ in case chartInsert (chart state) item (fid,r) of
+ Nothing -> process pinfo xitems state
+ Just actCat -> let items = do exprs <- IntMap.lookup fid (forest state)
+ (Passive ruleid args) <- Set.toList exprs
+ return (Active k r 0 ruleid args fid)
+ `mplus`
+ do id <- Map.lookup (fid,r,k) (passive state)
+ return (Active j lbl (ppos+1) ruleid (updateAt d id args) fid0)
+ in process pinfo (xitems++items) state{chart=actCat}
+ FSymTok tok -> case chartInsert (tokens state) (Active j lbl (ppos+1) ruleid args fid0) tok of
+ Nothing -> process pinfo xitems state
+ Just actTok -> process pinfo xitems state{tokens=actTok}
+ | otherwise = case Map.lookup (fid0, lbl, j) (passive state) of
+ Nothing -> let fid = nextId state
+ items = do Active j' lbl ppos ruleid args fidc <- chartLookup ((chart state:charts state) !! (k-j)) (fid0,lbl)
+ let FSymCat _ _ d = rhs ruleid lbl ! ppos
+ return (Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc)
+ in process pinfo (xitems++items) state{passive=Map.insert (fid0, lbl, j) fid (passive state)
+ ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
+ ,nextId =nextId state+1
+ }
+ Just id -> process pinfo xitems state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
+ where
+ lin = rhs ruleid lbl
+ k = currOffset state
+
+ rhs ruleid lbl = lins ! lbl
+ where
+ (FRule _ _ cat lins) = allRules pinfo ! ruleid
+
+ updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+
+data Active
+ = Active Int FIndex FPointPos RuleId [FCat] FCat
+ deriving (Eq,Show,Ord)
+data Passive
+ = Passive RuleId [FCat]
+ deriving (Eq,Ord,Show)
+
+
+data State
+ = State
+ { chart :: Chart
+ , charts :: [Chart]
+ , tokens :: ParseChart Active FToken
+ , passive :: Map.Map (FCat, FIndex, Int) FCat
+ , forest :: IntMap.IntMap (Set.Set Passive)
+ , nextId :: FCat
+ , currOffset :: Int
+ }
+ deriving Show
+
+type Chart = ParseChart Active (FCat, FIndex)
diff --git a/src-3.0/GF/Parsing/FCFG/PInfo.hs b/src-3.0/GF/Parsing/FCFG/PInfo.hs
new file mode 100644
index 000000000..8b288f2f1
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/PInfo.hs
@@ -0,0 +1,121 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- FCFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.PInfo where
+
+import GF.Infra.PrintClass
+import GF.Formalism.Utilities
+import GF.Formalism.FCFG
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Parsing.FCFG.Range
+import qualified GF.GFCC.CId as AbsGFCC
+
+import Data.Array
+import Data.Maybe
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Debug.Trace
+
+----------------------------------------------------------------------
+-- type declarations
+
+-- | the list of categories = possible starting categories
+type FCFParser = FCFPInfo
+ -> [FCat]
+ -> Input FToken
+ -> SyntaxChart FName (FCat,RangeRec)
+
+makeFinalEdge cat 0 0 = (cat, [EmptyRange])
+makeFinalEdge cat i j = (cat, [makeRange i j])
+
+------------------------------------------------------------
+-- parser information
+
+type RuleId = Int
+
+data FCFPInfo
+ = FCFPInfo { allRules :: Array RuleId FRule
+ , topdownRules :: Assoc FCat (SList RuleId)
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
+ -- , emptyRules :: [RuleId]
+ , epsilonRules :: [RuleId]
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , leftcornerCats :: Assoc FCat (SList RuleId)
+ , leftcornerTokens :: Assoc FToken (SList RuleId)
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , grammarCats :: SList FCat
+ , grammarToks :: SList FToken
+ , startupCats :: Map.Map AbsGFCC.CId [FCat]
+ }
+
+
+getLeftCornerTok lins
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymTok tok -> Just tok
+ _ -> Nothing
+ | otherwise = Nothing
+ where
+ syms = lins ! 0
+
+getLeftCornerCat lins
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymCat c _ _ -> Just c
+ _ -> Nothing
+ | otherwise = Nothing
+ where
+ syms = lins ! 0
+
+buildFCFPInfo :: FGrammar -> FCFPInfo
+buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $
+ FCFPInfo { allRules = allrules
+ , topdownRules = topdownrules
+ -- , emptyRules = emptyrules
+ , epsilonRules = epsilonrules
+ , leftcornerCats = leftcorncats
+ , leftcornerTokens = leftcorntoks
+ , grammarCats = grammarcats
+ , grammarToks = grammartoks
+ , startupCats = startup
+ }
+
+ where allrules = listArray (0,length grammar-1) grammar
+ topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ cat _) <- assocs allrules]
+ -- emptyrules = [ruleid | (ruleid, FRule _ [] _ _) <- assocs allrules]
+ epsilonrules = [ ruleid | (ruleid, FRule _ _ _ lins) <- assocs allrules,
+ not (inRange (bounds (lins ! 0)) 0) ]
+ leftcorncats = accumAssoc id
+ [ (fromJust (getLeftCornerCat lins), ruleid) |
+ (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
+ leftcorntoks = accumAssoc id
+ [ (fromJust (getLeftCornerTok lins), ruleid) |
+ (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
+ grammarcats = aElems topdownrules
+ grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
+
+fcfPInfoToFGrammar :: FCFPInfo -> FGrammar
+fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo)
+
+----------------------------------------------------------------------
+-- pretty-printing of statistics
+
+instance Print FCFPInfo where
+ prt pI = "[ allRules=" ++ sl (elems . allRules) ++
+ "; tdRules=" ++ sla topdownRules ++
+ -- "; emptyRules=" ++ sl emptyRules ++
+ "; epsilonRules=" ++ sl epsilonRules ++
+ "; lcCats=" ++ sla leftcornerCats ++
+ "; lcTokens=" ++ sla leftcornerTokens ++
+ "; categories=" ++ sl grammarCats ++
+ " ]"
+
+ where sl f = show $ length $ f pI
+ sla f = let (as, bs) = unzip $ aAssocs $ f pI
+ in show (length as) ++ "/" ++ show (length (concat bs))
+
diff --git a/src-3.0/GF/Parsing/FCFG/Range.hs b/src-3.0/GF/Parsing/FCFG/Range.hs
new file mode 100644
index 000000000..24674f58b
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/Range.hs
@@ -0,0 +1,50 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Definitions of ranges, and operations on ranges
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.Range
+ ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
+ ) where
+
+
+-- GF modules
+import GF.Formalism.Utilities
+import GF.Infra.PrintClass
+
+------------------------------------------------------------
+-- ranges as single pairs
+
+type RangeRec = [Range]
+
+data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
+ | EmptyRange
+ deriving (Eq, Ord)
+
+makeRange :: Int -> Int -> Range
+makeRange = Range
+
+concatRange :: Range -> Range -> [Range]
+concatRange EmptyRange rng = return rng
+concatRange rng EmptyRange = return rng
+concatRange (Range i j) (Range j' k) = [Range i k | j==j']
+
+rangeEdge :: a -> Range -> Edge a
+rangeEdge a (Range i j) = Edge i j a
+
+edgeRange :: Edge a -> Range
+edgeRange (Edge i j _) = Range i j
+
+minRange :: Range -> Int
+minRange (Range i j) = i
+
+maxRange :: Range -> Int
+maxRange (Range i j) = j
+
+instance Print Range where
+ prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
+ prt (EmptyRange) = "(?)"
diff --git a/src-3.0/GF/Parsing/GFC.hs b/src-3.0/GF/Parsing/GFC.hs
new file mode 100644
index 000000000..9f1328a50
--- /dev/null
+++ b/src-3.0/GF/Parsing/GFC.hs
@@ -0,0 +1,208 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/13 12:40:19 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.9 $
+--
+-- The main parsing module, parsing GFC grammars
+-- by translating to simpler formats, such as PMCFG and CFG
+----------------------------------------------------------------------
+
+module GF.Parsing.GFC
+ (parse, PInfo(..), buildPInfo) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+import qualified GF.Grammar.PrGrammar as PrGrammar
+
+import GF.Data.ErrM
+
+import qualified GF.Grammar.Grammar as Grammar
+import qualified GF.Grammar.Macros as Macros
+import qualified GF.Canon.AbsGFC as AbsGFC
+import qualified GF.GFCC.DataGFCC as AbsGFCC
+import GF.GFCC.CId
+import qualified GF.Infra.Ident as Ident
+import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok, prCFTok)
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+
+import qualified GF.Formalism.GCFG as G
+import qualified GF.Formalism.SimpleGFC as S
+import qualified GF.Formalism.MCFG as M
+import GF.Formalism.FCFG
+import qualified GF.Formalism.CFG as C
+import qualified GF.Parsing.MCFG as PM
+import qualified GF.Parsing.FCFG as PF
+import qualified GF.Parsing.CFG as PC
+
+----------------------------------------------------------------------
+-- parsing information
+
+data PInfo = PInfo { mcfPInfo :: MCFPInfo
+ , fcfPInfo :: PF.FCFPInfo
+ , cfPInfo :: CFPInfo
+ }
+
+type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
+type CFPInfo = PC.CFPInfo CCat Name Token
+
+buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo
+buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg
+ , fcfPInfo = PF.buildFCFPInfo fcfg
+ , cfPInfo = PC.buildCFPInfo cfg
+ }
+
+instance Print PInfo where
+ prt (PInfo m f c) = prt m ++ "\n" ++ prt c
+
+----------------------------------------------------------------------
+-- main parsing function
+
+parse :: String -- ^ parsing algorithm (mcfg or cfg)
+ -> String -- ^ parsing strategy
+ -> PInfo -- ^ compiled grammars (mcfg and cfg)
+ -> Ident.Ident -- ^ abstract module name
+ -> CFCat -- ^ starting category
+ -> [CFTok] -- ^ input tokens
+ -> Err [Grammar.Term] -- ^ resulting GF terms
+
+
+-- parsing via CFG
+parse "c" strategy pinfo abs startCat inString
+ = do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
+ inputMany (map wordsCFTok inString)
+ let startCats = tracePrt "Parsing.GFC - starting CF categories" prt $
+ filter isStart $ map fst $ aAssocs $ PC.topdownRules cfpi
+ isStart cat = ccat2scat cat == cfCat2Ident startCat
+ cfpi = cfPInfo pinfo
+ cfParser <- PC.parseCF strategy
+ let cfChart = tracePrt "Parsing.GFC - CF chart" (prt . length) $
+ cfParser cfpi startCats inTokens
+ chart = tracePrt "Parsing.GFC - chart" (prt . map (length.snd) . aAssocs) $
+ C.grammar2chart cfChart
+ finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
+ map (uncurry Edge (inputBounds inTokens)) startCats
+ forests = chart2forests chart (const False) finalEdges
+ traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests))
+ traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees)))
+ let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
+ forests >>= applyProfileToForest
+ -- compactFs = tracePrt "#compactForests" (prt . length) $
+ -- tracePrt "compactForests" (prtBefore "\n") $
+ -- compactForests forests
+ trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
+ nubsort $ filteredForests >>= forest2trees
+ -- compactFs >>= forest2trees
+ return $ map (tree2term abs) trees
+
+
+-- parsing via MCFG
+parse "m" strategy pinfo abs startCat inString
+ = do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
+ inputMany (map wordsCFTok inString)
+ let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $
+ filter isStart $ PM.grammarCats mcfpi
+ isStart cat = mcat2scat cat == cfCat2Ident startCat
+ mcfpi = mcfPInfo pinfo
+ mcfParser <- PM.parseMCF strategy
+ let chart = mcfParser mcfpi startCats inTokens
+ finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
+ [ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
+ cat@(MCat _ [lbl]) <- startCats ]
+ forests = chart2forests chart (const False) finalEdges
+ traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests))
+ traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees)))
+ let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
+ forests >>= applyProfileToForest
+ -- compactFs = tracePrt "#compactForests" (prt . length) $
+ -- tracePrt "compactForests" (prtBefore "\n") $
+ -- compactForests forests
+ trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
+ nubsort $ filteredForests >>= forest2trees
+ -- compactFs >>= forest2trees
+ return $ map (tree2term abs) trees
+
+
+-- parsing via FCFG
+parse "f" strategy pinfo abs startCat inString =
+ let Ident.IC x = cfCat2Ident startCat
+ cat' = CId x
+ in case PF.parseFCF strategy (fcfPInfo pinfo) cat' (map prCFTok inString) of
+ Ok es -> Ok (map (exp2term abs) es)
+ Bad msg -> Bad msg
+
+
+-- error parser:
+selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy
+
+cnv_forests FMeta = FMeta
+cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (Ident.IC n) (map cnv_profile p)) (map (map cnv_forests) fss)
+cnv_forests (FString x) = FString x
+cnv_forests (FInt x) = FInt x
+cnv_forests (FFloat x) = FFloat x
+
+cnv_profile (Unify x) = Unify x
+cnv_profile (Constant x) = Constant (cnv_forests2 x)
+
+cnv_forests2 FMeta = FMeta
+cnv_forests2 (FNode (CId n) fss) = FNode (Ident.IC n) (map (map cnv_forests2) fss)
+cnv_forests2 (FString x) = FString x
+cnv_forests2 (FInt x) = FInt x
+cnv_forests2 (FFloat x) = FFloat x
+
+----------------------------------------------------------------------
+-- parse trees to GF terms
+
+tree2term :: Ident.Ident -> SyntaxTree Fun -> Grammar.Term
+tree2term abs (TNode f ts) = Macros.mkApp (Macros.qq (abs,f)) (map (tree2term abs) ts)
+tree2term abs (TString s) = Macros.string2term s
+tree2term abs (TInt n) = Macros.int2term n
+tree2term abs (TFloat f) = Macros.float2term f
+tree2term abs (TMeta) = Macros.mkMeta 0
+
+exp2term :: Ident.Ident -> AbsGFCC.Exp -> Grammar.Term
+exp2term abs (AbsGFCC.DTr _ a es) = ---- TODO: bindings
+ Macros.mkApp (atom2term abs a) (map (exp2term abs) es)
+
+atom2term :: Ident.Ident -> AbsGFCC.Atom -> Grammar.Term
+atom2term abs (AbsGFCC.AC (CId f)) = Macros.qq (abs,Ident.IC f)
+atom2term abs (AbsGFCC.AS s) = Macros.string2term s
+atom2term abs (AbsGFCC.AI n) = Macros.int2term n
+atom2term abs (AbsGFCC.AF f) = Macros.float2term f
+atom2term abs (AbsGFCC.AM i) = Macros.mkMeta (fromInteger i)
+
+----------------------------------------------------------------------
+-- conversion and unification of forests
+
+-- simplest implementation
+applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
+applyProfileToForest (FNode name@(Name fun profile) children)
+ | isCoercion name = concat chForests
+ | otherwise = [ FNode fun chForests | not (null chForests) ]
+ where chForests = concat [ applyProfileM unifyManyForests profile forests |
+ forests0 <- children,
+ forests <- mapM applyProfileToForest forests0 ]
+applyProfileToForest (FString s) = [FString s]
+applyProfileToForest (FInt n) = [FInt n]
+applyProfileToForest (FFloat f) = [FFloat f]
+applyProfileToForest (FMeta) = [FMeta]
+
+{-
+-- more intelligent(?) implementation
+applyProfileToForest (FNode (Name name profile) children)
+ | isCoercion name = concat chForests
+ | otherwise = [ FNode name chForests | not (null chForests) ]
+ where chForests = concat [ mapM (checkProfile forests) profile |
+ forests0 <- children,
+ forests <- mapM applyProfileToForest forests0 ]
+-}
+
+
diff --git a/src-3.0/GF/Parsing/MCFG.hs b/src-3.0/GF/Parsing/MCFG.hs
new file mode 100644
index 000000000..bda3af675
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG.hs
@@ -0,0 +1,68 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/11 10:28:16 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- MCFG parsing
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG
+ (parseMCF, module GF.Parsing.MCFG.PInfo) where
+
+import GF.Data.Operations (Err(..))
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Parsing.MCFG.PInfo
+
+import qualified GF.Parsing.MCFG.Naive as Naive
+import qualified GF.Parsing.MCFG.Active as Active
+import qualified GF.Parsing.MCFG.FastActive as FastActive
+-- import qualified GF.Parsing.MCFG.Active2 as Active2
+import qualified GF.Parsing.MCFG.Incremental as Incremental
+-- import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
+
+----------------------------------------------------------------------
+-- parsing
+
+parseMCF :: (Ord c, Ord n, Ord l, Ord t) => String -> Err (MCFParser c n l t)
+parseMCF prs | prs `elem` strategies = Ok $ parseMCF' prs
+ | otherwise = Bad $ "MCFG parsing strategy not defined: " ++ prs
+
+
+strategies = words "bottomup topdown n an ab at i rn ran rab rat ri ft fb"
+
+
+parseMCF' :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
+
+parseMCF' "bottomup" pinfo starts toks = parseMCF' "fb" pinfo starts toks
+parseMCF' "topdown" pinfo starts toks = parseMCF' "ft" pinfo starts toks
+
+parseMCF' "n" pinfo starts toks = Naive.parse pinfo starts toks
+parseMCF' "an" pinfo starts toks = Active.parse "n" pinfo starts toks
+parseMCF' "ab" pinfo starts toks = Active.parse "b" pinfo starts toks
+parseMCF' "at" pinfo starts toks = Active.parse "t" pinfo starts toks
+parseMCF' "i" pinfo starts toks = Incremental.parse pinfo starts toks
+
+-- parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
+-- parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
+-- parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
+-- parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
+
+parseMCF' "rn" pinfo starts toks = Naive.parseR (rrP pinfo toks) starts
+parseMCF' "ran" pinfo starts toks = Active.parseR "n" (rrP pinfo toks) starts
+parseMCF' "rab" pinfo starts toks = Active.parseR "b" (rrP pinfo toks) starts
+parseMCF' "rat" pinfo starts toks = Active.parseR "t" (rrP pinfo toks) starts
+parseMCF' "ri" pinfo starts toks = Incremental.parseR (rrP pinfo toks) starts ntoks
+ where ntoks = snd (inputBounds toks)
+
+parseMCF' "fb" pinfo starts toks = FastActive.parse "b" (rrP pinfo toks) starts
+parseMCF' "ft" pinfo starts toks = FastActive.parse "t" (rrP pinfo toks) starts
+
+rrP pi = rangeRestrictPInfo pi
diff --git a/src-3.0/GF/Parsing/MCFG/Active.hs b/src-3.0/GF/Parsing/MCFG/Active.hs
new file mode 100644
index 000000000..c6e9c6b06
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/Active.hs
@@ -0,0 +1,318 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- MCFG parsing, the active algorithm
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.Active (parse, parseR) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+
+import Control.Monad (guard)
+
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- * parsing
+
+parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
+parse strategy pinfo starts toks =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = process strategy pinfo starts toks
+
+-- parseR :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
+parseR strategy pinfo starts =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = processR strategy pinfo starts
+
+process :: (Ord n, Ord c, Ord l, Ord t) =>
+ String -> MCFPInfo c n l t -> [c] -> Input t -> AChart c n l
+process strategy pinfo starts toks
+ = tracePrt "MCFG.Active - chart size" prtSizes $
+ buildChart keyof (complete : combine : convert : rules) axioms
+ where rules | isNil strategy = [scan]
+ | isBU strategy = [scan, predictKilbury pinfo toks]
+ | isTD strategy = [scan, predictEarley pinfo toks]
+ axioms | isNil strategy = predict pinfo toks
+ | isBU strategy = {- terminal pinfo toks ++ -} initialScan pinfo toks
+ | isTD strategy = initial pinfo starts toks
+
+--processR :: (Ord n, Ord c, Ord l) =>
+-- String -> MCFPInfo c n l Range -> [c] -> AChart c n l
+processR strategy pinfo starts
+ = tracePrt "MCFG.Active Range - chart size" prtSizes $
+ -- tracePrt "MCFG.Active Range - final chart" prtChart $
+ buildChart keyof (complete : combine : convert : rules) axioms
+ where rules | isNil strategy = [scan]
+ | isBU strategy = [scan, predictKilburyR pinfo]
+ | isTD strategy = [scan, predictEarleyR pinfo]
+ axioms | isNil strategy = predictR pinfo
+ | isBU strategy = {- terminalR pinfo ++ -} initialScanR pinfo
+ | isTD strategy = initialR pinfo starts
+
+isNil s = s=="n"
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: Abstract c n -> [RangeRec l]
+emptyChildren (Abs _ rhs _) = replicate (length rhs) []
+
+makeMaxRange (Range (_, j)) = Range (j, j)
+makeMaxRange EmptyRange = EmptyRange
+
+
+----------------------------------------------------------------------
+-- * inference rules
+
+-- completion
+complete :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
+complete _ (Active rule found rng (Lin l []) (lin:lins) recs) =
+ return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs
+complete _ _ = []
+
+-- scanning
+scan :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
+scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) =
+ do rng'' <- concatRange rng rng'
+ return $ Active rule found rng'' (Lin l syms) lins recs
+scan _ _ = []
+
+-- | Creates an Active Item every time it is possible to combine
+-- an Active Item from the agenda with a Passive Item from the Chart
+combine :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
+combine chart item@(Active _ _ _ (Lin _ (Cat (c,_,_):_)) _ _) =
+ do Passive _c found <- chartLookup chart (Pass c)
+ combine2 chart found item
+combine chart (Passive c found) =
+ do item <- chartLookup chart (Act c)
+ combine2 chart found item
+combine _ _ = []
+
+combine2 chart found' (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
+ do rng' <- projection r found'
+ rng'' <- concatRange rng rng'
+ recs' <- unifyRec recs d found'
+ return $ Active rule found rng'' (Lin l syms) lins recs'
+
+-- | Active Items with nothing to find are converted to Final items,
+-- which in turn are converted to Passive Items
+convert :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
+convert _ (Active rule found rng (Lin lbl []) [] recs) =
+ return $ Final rule (found ++ [(lbl,rng)]) recs
+convert _ (Final (Abs cat _ _) found _) =
+ return $ Passive cat found
+convert _ _ = []
+
+
+----------------------------------------------------------------------
+-- Naive --
+
+predict :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+predict pinfo toks = tracePrt "MCFG.Active (Naive) - predicted rules" (prt . length) $
+ do (Rule abs (Cnc _ _ lins)) <- rulesMatchingInput pinfo toks
+ (lin':lins') <- rangeRestRec toks lins
+ return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- NaiveR --
+
+predictR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+predictR pinfo = tracePrt "MCFG.Active (Naive Range) - predicted rules" (prt . length) $
+ do (Rule abs (Cnc _ _ (lin:lins))) <- allRules pinfo
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- anropas med alla startkategorier
+initial :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> [c] -> Input t -> [Item c n l]
+initial pinfo starts toks =
+ tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
+ do cat <- starts
+ Rule abs (Cnc _ _ lins) <- topdownRules pinfo ? cat
+ lin' : lins' <- rangeRestRec toks lins
+ return $ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs)
+
+predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
+ -> AChart c n l -> Item c n l -> [Item c n l]
+predictEarley pinfo toks _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
+ topdownRules pinfo ? cat >>= predictEarley2 toks rng
+predictEarley _ _ _ _ = []
+
+predictEarley2 :: (Ord c, Ord n, Ord l, Ord t) => Input t -> Range -> MCFRule c n l t -> [Item c n l]
+predictEarley2 toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
+ do lins' <- rangeRestRec toks lins
+ return $ Final abs (makeRangeRec lins') []
+predictEarley2 toks rng (Rule abs (Cnc _ _ lins)) =
+ do lin' : lins' <- rangeRestRec toks lins
+ return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Earley Range --
+
+initialR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l]
+initialR pinfo starts =
+ tracePrt "MCFG.Active (Earley Range) - initial rules" (prt . length) $
+ do cat <- starts
+ Rule abs (Cnc _ _ (lin : lins)) <- topdownRules pinfo ? cat
+ return $ Active abs [] (Range (0, 0)) lin lins (emptyChildren abs)
+
+predictEarleyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range
+ -> AChart c n l -> Item c n l -> [Item c n l]
+predictEarleyR pinfo _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
+ topdownRules pinfo ? cat >>= predictEarleyR2 rng
+predictEarleyR _ _ _ = []
+
+predictEarleyR2 :: (Ord c, Ord n, Ord l) => Range -> MCFRule c n l Range -> [Item c n l]
+predictEarleyR2 _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
+ return $ Final abs (makeRangeRec lins) []
+predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) =
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+-- terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+-- terminal pinfo toks =
+-- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
+-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+-- lins' <- rangeRestRec toks lins
+-- return $ Final abs (makeRangeRec lins') []
+
+initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+initialScan pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial scanned rules + epsilon rules" (prt . length) $
+ do tok <- aElems (inputToken toks)
+ Rule abs (Cnc _ _ lins) <-
+ leftcornerTokens pinfo ? tok ++
+ epsilonRules pinfo
+ lin' : lins' <- rangeRestRec toks lins
+ return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+
+predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
+ -> AChart c n l -> Item c n l -> [Item c n l]
+predictKilbury pinfo toks _ (Passive cat found) =
+ do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat
+ lin' : lins' <- rangeRestRec toks (Lin l syms : lins)
+ rng <- projection r found
+ children <- unifyRec (emptyChildren abs) i found
+ return $ Active abs [] rng lin' lins' children
+predictKilbury _ _ _ _ = []
+
+
+
+----------------------------------------------------------------------
+-- KilburyR --
+
+-- terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+-- terminalR pinfo =
+-- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
+-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+-- return $ Final abs (makeRangeRec lins) []
+
+initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+initialScanR pinfo =
+ tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $
+ do Rule abs (Cnc _ _ (lin : lins)) <-
+ concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
+ epsilonRules pinfo
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range
+ -> AChart c n l -> Item c n l -> [Item c n l]
+predictKilburyR pinfo _ (Passive cat found) =
+ do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat
+ rng <- projection r found
+ children <- unifyRec (emptyChildren abs) i found
+ return $ Active abs [] rng (Lin l syms) lins children
+predictKilburyR _ _ _ = []
+
+
+----------------------------------------------------------------------
+-- * type definitions
+
+type AChart c n l = ParseChart (Item c n l) (AKey c)
+
+data Item c n l = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l Range)
+ (LinRec c l Range)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+ | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data AKey c = Act c
+ | Pass c
+ | Useless
+ | Fin
+ deriving (Eq, Ord, Show)
+
+
+keyof :: Item c n l -> AKey c
+keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
+keyof (Final _ _ _) = Fin
+keyof (Passive cat _) = Pass cat
+keyof _ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+
+prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _) <- chartKeys chart ]) ++
+ ", useless=" ++ show (length (chartLookup chart Useless))
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+prtFinals chart = prtBefore "\n " (chartLookup chart Fin)
+
+instance (Print c, Print n, Print l) => Print (Item c n l) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ " {" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+ prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance Print c => Print (AKey c) where
+ prt (Act c) = "Active " ++ prt c
+ prt (Pass c) = "Passive " ++ prt c
+ prt (Fin) = "Final"
+ prt (Useless) = "Useless"
diff --git a/src-3.0/GF/Parsing/MCFG/Active2.hs b/src-3.0/GF/Parsing/MCFG/Active2.hs
new file mode 100644
index 000000000..7ad8627bc
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/Active2.hs
@@ -0,0 +1,237 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.2 $
+--
+-- MCFG parsing, the active algorithm (alternative version)
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.Active2 (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+
+import Control.Monad (guard)
+
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- * parsing
+
+--parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
+parse strategy pinfo starts toks =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = process strategy pinfo starts toks
+
+process :: (Ord n, Ord c, Ord l, Ord t) =>
+ String -> MCFPInfo c n l t -> [c] -> Input t -> AChart c n l t
+process strategy pinfo starts toks
+ = tracePrt "MCFG.Active - chart size" prtSizes $
+ buildChart keyof (complete : combine : convert : rules) axioms
+ where rules | isNil strategy = [scan toks]
+ | isBU strategy = [scan toks, predictKilbury pinfo toks]
+ | isTD strategy = [scan toks, predictEarley pinfo toks]
+ axioms | isNil strategy = predict pinfo toks
+ | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ | isTD strategy = initial pinfo starts toks
+
+isNil s = s=="n"
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: Abstract c n -> [RangeRec l]
+emptyChildren (Abs _ rhs _) = replicate (length rhs) []
+
+makeMaxRange (Range (_, j)) = Range (j, j)
+makeMaxRange EmptyRange = EmptyRange
+
+
+----------------------------------------------------------------------
+-- * inference rules
+
+-- completion
+complete :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+complete _ (Active rule found rng (Lin l []) (lin:lins) recs) =
+ return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs
+complete _ _ = []
+
+-- scanning
+--scan :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+scan inp _ (Active rule found rng (Lin l (Tok tok:syms)) lins recs) =
+ do rng' <- map makeRange (inputToken inp ? tok)
+ rng'' <- concatRange rng rng'
+ return $ Active rule found rng'' (Lin l syms) lins recs
+scan _ _ _ = []
+
+-- | Creates an Active Item every time it is possible to combine
+-- an Active Item from the agenda with a Passive Item from the Chart
+combine :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+combine chart item@(Active _ _ _ (Lin _ (Cat (c,_,_):_)) _ _) =
+ do Passive _c found <- chartLookup chart (Pass c)
+ combine2 chart found item
+combine chart (Passive c found) =
+ do item <- chartLookup chart (Act c)
+ combine2 chart found item
+combine _ _ = []
+
+combine2 chart found' (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
+ do rng' <- projection r found'
+ rng'' <- concatRange rng rng'
+ recs' <- unifyRec recs d found'
+ return $ Active rule found rng'' (Lin l syms) lins recs'
+
+-- | Active Items with nothing to find are converted to Final items,
+-- which in turn are converted to Passive Items
+convert :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+convert _ (Active rule found rng (Lin lbl []) [] recs) =
+ return $ Final rule (found ++ [(lbl,rng)]) recs
+convert _ (Final (Abs cat _ _) found _) =
+ return $ Passive cat found
+convert _ _ = []
+
+
+----------------------------------------------------------------------
+-- Naive --
+
+predict :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t]
+predict pinfo toks = tracePrt "MCFG.Active (Naive) - predicted rules" (prt . length) $
+ do Rule abs (Cnc _ _ (lin:lins)) <- rulesMatchingInput pinfo toks
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- anropas med alla startkategorier
+initial :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> [c] -> Input t -> [Item c n l t]
+initial pinfo starts toks =
+ tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
+ do cat <- starts
+ Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? cat
+ return $ Active abs [] (Range (0, 0)) lin lins (emptyChildren abs)
+
+predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
+ -> AChart c n l t -> Item c n l t -> [Item c n l t]
+predictEarley pinfo toks _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
+ topdownRules pinfo ? cat >>= predictEarley2 toks rng
+predictEarley _ _ _ _ = []
+
+predictEarley2 :: (Ord c, Ord n, Ord l, Ord t) => Input t -> Range -> MCFRule c n l t -> [Item c n l t]
+predictEarley2 toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
+ do lins' <- rangeRestRec toks lins
+ return $ Final abs (makeRangeRec lins') []
+predictEarley2 toks rng (Rule abs (Cnc _ _ (lin:lins))) =
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t]
+terminal pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+ lins' <- rangeRestRec toks lins
+ return $ Final abs (makeRangeRec lins') []
+
+initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t]
+initialScan pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
+ do tok <- aElems (inputToken toks)
+ Rule abs (Cnc _ _ (lin:lins)) <- leftcornerTokens pinfo ? tok
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
+ -> AChart c n l t -> Item c n l t -> [Item c n l t]
+predictKilbury pinfo toks _ (Passive cat found) =
+ do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat
+ rng <- projection r found
+ children <- unifyRec (emptyChildren abs) i found
+ return $ Active abs [] rng (Lin l syms) lins children
+predictKilbury _ _ _ _ = []
+
+
+----------------------------------------------------------------------
+-- * type definitions
+
+type AChart c n l t = ParseChart (Item c n l t) (AKey c t)
+
+data Item c n l t = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l t)
+ (LinRec c l t)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+ | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data AKey c t = Act c
+ | ActTok t
+ | Pass c
+ | Useless
+ | Fin
+ deriving (Eq, Ord, Show)
+
+
+keyof :: Item c n l t -> AKey c t
+keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
+keyof (Active _ _ _ (Lin _ (Tok tok:_)) _ _) = ActTok tok
+keyof (Final _ _ _) = Fin
+keyof (Passive cat _) = Pass cat
+keyof _ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+
+prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _) <- chartKeys chart ]) ++
+ ", active-tok=" ++ show (sum [length (chartLookup chart k) |
+ k@(ActTok _) <- chartKeys chart ]) ++
+ ", useless=" ++ show (length (chartLookup chart Useless))
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+prtFinals chart = prtBefore "\n " (chartLookup chart Fin)
+
+instance (Print c, Print n, Print l, Print t) => Print (Item c n l t) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ " {" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+ prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance (Print c, Print t) => Print (AKey c t) where
+ prt (Act c) = "Active " ++ prt c
+ prt (ActTok t) = "Active-Tok " ++ prt t
+ prt (Pass c) = "Passive " ++ prt c
+ prt (Fin) = "Final"
+ prt (Useless) = "Useless"
diff --git a/src-3.0/GF/Parsing/MCFG/FastActive.hs b/src-3.0/GF/Parsing/MCFG/FastActive.hs
new file mode 100644
index 000000000..0a8e24b55
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/FastActive.hs
@@ -0,0 +1,176 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Peter Ljunglöf
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- MCFG parsing, the active algorithm, optimized version
+-- structure stolen from Krasimir Angelov's GF.Parsing.FCFG.Active
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.FastActive (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+import GF.Data.Utilities
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Infra.Ident
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+
+import Control.Monad (guard)
+
+import GF.Infra.Print
+
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Data.Array
+
+----------------------------------------------------------------------
+-- * parsing
+
+-- parse :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
+parse strategy pinfo starts =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ Final (Abs cat rhs fun) found rrecs <- listXChartFinal chart ]
+ where chart = process strategy pinfo axioms emptyXChart
+
+ -- axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ axioms | isBU strategy = initialBU pinfo
+ | isTD strategy = initialTD pinfo starts
+
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: Abstract c n -> [RangeRec l]
+emptyChildren (Abs _ rhs _) = replicate (length rhs) []
+
+updateChildren :: Eq l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]]
+updateChildren recs i rec = updateNthM update i recs
+ where update rec' = do guard (null rec' || rec' == rec)
+ return rec
+
+process :: (Ord c, Ord n, Ord l) => String -> MCFPInfo c n l Range -> [Item c n l] -> XChart c n l -> XChart c n l
+process strategy pinfo [] chart = chart
+process strategy pinfo (item:items) chart = process strategy pinfo items $! univRule item chart
+ where
+ univRule item@(Active abs found rng (Lin l syms) lins recs) chart
+ = case syms of
+ Cat(c,r,d) : syms' ->
+ case insertXChart chart item c of
+ Nothing -> chart
+ Just chart ->
+ let items = -- predict topdown
+ [ Active abs [] EmptyRange lin lins (emptyChildren abs) |
+ isTD strategy,
+ Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? c ] ++
+
+ -- combine
+ [ Active abs found rng'' (Lin l syms') lins recs' |
+ Final _ found' _ <- lookupXChartFinal chart c,
+ rng' <- projection r found',
+ rng'' <- concatRange rng rng',
+ recs' <- updateChildren recs d found' ]
+ in process strategy pinfo items chart
+
+ -- scan
+ Tok rng' : syms' ->
+ let items = [ Active abs found rng'' (Lin l syms') lins recs |
+ rng'' <- concatRange rng rng' ]
+ in process strategy pinfo items chart
+
+ -- complete
+ [] -> case lins of
+ (lin':lins') -> univRule (Active abs ((l,rng):found) EmptyRange lin' lins' recs) chart
+ [] -> univRule (Final abs (reverse ((l,rng):found)) recs) chart
+
+ univRule item@(Final abs@(Abs cat _ _) found' recs) chart =
+ case insertXChart chart item cat of
+ Nothing -> chart
+ Just chart ->
+ let items = -- predict bottomup
+ [ Active abs [] rng (Lin l syms') lins children |
+ isBU strategy,
+ Rule abs (Cnc _ _ (Lin l (Cat(c,r,d):syms') : lins)) <- leftcornerCats pinfo ? cat,
+ -- lin' : lins' <- rangeRestRec toks (Lin l syms' : lins),
+ rng <- projection r found',
+ children <- unifyRec (emptyChildren abs) d found' ] ++
+
+ -- combine
+ [ Active abs found rng'' (Lin l syms') lins recs' |
+ Active abs found rng (Lin l (Cat(c,r,d):syms')) lins recs <- lookupXChartAct chart cat,
+ rng' <- projection r found',
+ rng'' <- concatRange rng rng',
+ recs' <- updateChildren recs d found' ]
+ in process strategy pinfo items chart
+
+----------------------------------------------------------------------
+-- * XChart
+
+data XChart c n l = XChart !(AChart c n l) !(AChart c n l)
+type AChart c n l = ParseChart (Item c n l) c
+
+data Item c n l = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l Range)
+ (LinRec c l Range)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+-- | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+emptyXChart :: (Ord c, Ord n, Ord l) => XChart c n l
+emptyXChart = XChart emptyChart emptyChart
+
+insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
+ case chartInsert actives item c of
+ Nothing -> Nothing
+ Just actives -> Just (XChart actives finals)
+
+insertXChart (XChart actives finals) item@(Final _ _ _) c =
+ case chartInsert finals item c of
+ Nothing -> Nothing
+ Just finals -> Just (XChart actives finals)
+
+lookupXChartAct (XChart actives finals) c = chartLookup actives c
+lookupXChartFinal (XChart actives finals) c = chartLookup finals c
+
+listXChartAct (XChart actives finals) = chartList actives
+listXChartFinal (XChart actives finals) = chartList finals
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- called with all starting categories
+initialTD :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l]
+initialTD pinfo starts =
+ [ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) |
+ cat <- starts,
+ Rule abs (Cnc _ _ (lin':lins')) <- topdownRules pinfo ? cat ]
+ -- lin' : lins' <- rangeRestRec toks lins
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+initialBU :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+initialBU pinfo =
+ [ Active abs [] EmptyRange lin' lins' (emptyChildren abs) |
+ -- do tok <- aElems (inputToken toks)
+ Rule abs (Cnc _ _ (lin':lins')) <-
+ concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
+ -- leftcornerTokens pinfo ? tok ++
+ epsilonRules pinfo ]
+ -- lin' : lins' <- rangeRestRec toks lins
diff --git a/src-3.0/GF/Parsing/MCFG/Incremental.hs b/src-3.0/GF/Parsing/MCFG/Incremental.hs
new file mode 100644
index 000000000..bd5b4114d
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/Incremental.hs
@@ -0,0 +1,178 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.4 $
+--
+-- MCFG parsing, the incremental algorithm
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.Incremental (parse, parseR) where
+
+import Data.List
+import Control.Monad (guard)
+
+import GF.Data.Utilities (select)
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- parsing
+
+parse :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t
+parse pinfo starts toks =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = process pinfo toks ntoks
+ ntoks = snd (inputBounds toks)
+
+-- parseR :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t
+parseR pinfo starts ntoks =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = processR pinfo ntoks
+
+process :: (Ord n, Ord c, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> Int -> IChart c n l
+process pinfo toks ntoks
+ = tracePrt "MCFG.Incremental - chart size" prtSizes $
+ buildChart keyof [complete ntoks, scan, combine, convert] (predict pinfo toks ntoks)
+
+processR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> Int -> IChart c n l
+processR pinfo ntoks
+ = tracePrt "MCFG.Incremental Range - chart size" prtSizes $
+ buildChart keyof [complete ntoks, scan, combine, convert] (predictR pinfo ntoks)
+
+complete :: (Ord n, Ord c, Ord l) => Int -> IChart c n l -> Item c n l -> [Item c n l]
+complete ntoks _ (Active rule found rng (Lin l []) lins recs) =
+ do (lin, lins') <- select lins
+ k <- [minRange rng .. ntoks]
+ return $ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs
+complete _ _ _ = []
+
+
+predict :: (Ord n, Ord c, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> Int -> [Item c n l]
+predict pinfo toks n =
+ tracePrt "MCFG.Incremental - predicted rules" (prt . length) $
+ do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- rulesMatchingInput pinfo toks
+ let daughters = replicate (length rhs) []
+ lins' <- rangeRestRec toks lins
+ (lin', lins'') <- select lins'
+ k <- [0..n]
+ return $ Active abs [] (Range (k,k)) lin' lins'' daughters
+
+
+predictR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> Int -> [Item c n l]
+predictR pinfo n =
+ tracePrt "MCFG.Incremental Range - predicted rules" (prt . length) $
+ do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- allRules pinfo
+ let daughters = replicate (length rhs) []
+ (lin, lins') <- select lins
+ k <- [0..n]
+ return $ Active abs [] (Range (k,k)) lin lins' daughters
+
+
+scan :: (Ord n, Ord c, Ord l) => IChart c n l -> Item c n l -> [Item c n l]
+scan _ (Active abs found rng (Lin l (Tok rng':syms)) lins recs) =
+ do rng'' <- concatRange rng rng'
+ return $ Active abs found rng'' (Lin l syms) lins recs
+scan _ _ = []
+
+
+combine :: (Ord n, Ord c, Ord l) => IChart c n l -> Item c n l -> [Item c n l]
+combine chart active@(Active _ _ rng (Lin _ (Cat (c,l,_):_)) _ _) =
+ do passive <- chartLookup chart (Pass c l (maxRange rng))
+ combine2 active passive
+combine chart passive@(Active (Abs c _ _) _ rng (Lin l []) _ _) =
+ do active <- chartLookup chart (Act c l (minRange rng))
+ combine2 active passive
+combine _ _ = []
+
+combine2 (Active abs found rng (Lin l (Cat (c,l',d):syms)) lins recs)
+ (Active _ found' rng' _ _ _)
+ = do rng'' <- concatRange rng rng'
+ recs' <- unifyRec recs d found''
+ return $ Active abs found rng'' (Lin l syms) lins recs'
+ where found'' = found' ++ [(l',rng')]
+
+
+convert _ (Active rule found rng (Lin lbl []) [] recs) =
+ return $ Final rule (found ++ [(lbl,rng)]) recs
+convert _ _ = []
+
+----------------------------------------------------------------------
+-- type definitions
+
+type IChart c n l = ParseChart (Item c n l) (IKey c l)
+
+data Item c n l = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l Range)
+ (LinRec c l Range)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+-- | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data IKey c l = Act c l Int
+ | Pass c l Int
+ | Useless
+ | Fin
+ deriving (Eq, Ord, Show)
+
+keyof :: Item c n l -> IKey c l
+keyof (Active _ _ rng (Lin _ (Cat (next,lbl,_):_)) _ _)
+ = Act next lbl (maxRange rng)
+keyof (Active (Abs cat _ _) found rng (Lin lbl []) _ _)
+ = Pass cat lbl (minRange rng)
+keyof (Final _ _ _) = Fin
+keyof _
+ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _ _ _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _ _ _) <- chartKeys chart ]) ++
+ ", useless=" ++ show (length (chartLookup chart Useless))
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+instance (Print c, Print n, Print l) => Print (Item c n l) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ "{" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+-- prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance (Print c, Print l) => Print (IKey c l) where
+ prt (Act c l i) = "Active " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i
+ prt (Pass c l i) = "Passive " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i
+ prt (Fin) = "Final"
+ prt (Useless) = "Useless"
diff --git a/src-3.0/GF/Parsing/MCFG/Incremental2.hs b/src-3.0/GF/Parsing/MCFG/Incremental2.hs
new file mode 100644
index 000000000..db6c3084e
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/Incremental2.hs
@@ -0,0 +1,157 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.3 $
+--
+-- MCFG parsing, the incremental algorithm (alternative version)
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.Incremental2 (parse) where
+
+import Data.List
+import Data.Array
+import Control.Monad (guard)
+
+import GF.Data.Utilities (select)
+import GF.Data.Assoc
+import GF.Data.IncrementalDeduction
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- parsing
+
+-- parseR :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t
+parse pinfo starts inp =
+ accumAssoc groupSyntaxNodes $
+ [ ((cat, found), SNode fun (zip rhs rrecs)) |
+ k <- uncurry enumFromTo (inputBounds inp),
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart k Fin ]
+ where chart = process pinfo inp
+
+--process :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> (Int, Int) -> IChart c n l
+process pinfo inp
+ = tracePrt "MCFG.Incremental - chart size"
+ (prt . map (prtSizes finalChart . fst) . assocs) $
+ finalChart
+ where finalChart = buildChart keyof rules axioms inBounds
+ axioms k = tracePrt ("MCFG.Incremental - axioms for " ++ show k) (prt . length) $
+ predict k ++ scan k ++ complete1 k
+ rules k item = complete2 k item ++ combine k item ++ convert k item
+ inBounds = inputBounds inp
+
+ -- axioms: predict + scan + complete
+ predict k = do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- rulesMatchingInput pinfo inp
+ let daughters = replicate (length rhs) []
+ (lin, lins') <- select lins
+ return $ Active abs [] k lin lins' daughters
+
+ scan k = do (tok, js) <- aAssocs (inputTo inp ! k)
+ j <- js
+ Active abs found i (Lin l (Tok _tok:syms)) lins recs <-
+ chartLookup finalChart j (ActTok tok)
+ return $ Active abs found i (Lin l syms) lins recs
+
+ complete1 k = do j <- [fst inBounds .. k-1]
+ Active abs found i (Lin l _Nil) lins recs <-
+ chartLookup finalChart j Pass
+ let found' = found ++ [(l, makeRange (i,j))]
+ (lin, lins') <- select lins
+ return $ Active abs found' k lin lins' recs
+
+ -- rules: convert + combine + complete
+ convert k (Active rule found j (Lin lbl []) [] recs) =
+ let found' = found ++ [(lbl, makeRange (j,k))]
+ in return $ Final rule found' recs
+ convert _ _ = []
+
+ combine k (Active (Abs cat _ _) found' j (Lin lbl []) _ _) =
+ do guard (j < k) ---- cannot handle epsilon-rules
+ Active abs found i (Lin l (Cat (_cat,_lbl,nr):syms)) lins recs <-
+ chartLookup finalChart j (Act cat lbl)
+ let found'' = found' ++ [(lbl, makeRange (j,k))]
+ recs' <- unifyRec recs nr found''
+ return $ Active abs found i (Lin l syms) lins recs'
+ combine _ _ = []
+
+ complete2 k (Active abs found i (Lin l []) lins recs) =
+ do let found' = found ++ [(l, makeRange (i,k))]
+ (lin, lins') <- select lins
+ return $ Active abs found' k lin lins' recs
+ complete2 _ _ = []
+
+----------------------------------------------------------------------
+-- type definitions
+
+type IChart c n l t = IncrementalChart (Item c n l t) (IKey c l t)
+
+data Item c n l t = Active (Abstract c n)
+ (RangeRec l)
+ Int
+ (Lin c l t)
+ (LinRec c l t)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+ ---- | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data IKey c l t = Act c l
+ | ActTok t
+ ---- | Useless
+ | Pass
+ | Fin
+ deriving (Eq, Ord, Show)
+
+keyof :: Item c n l t -> IKey c l t
+keyof (Active _ _ _ (Lin _ (Cat (next,lbl,_):_)) _ _) = Act next lbl
+keyof (Active _ _ _ (Lin _ (Tok tok:_)) _ _) = ActTok tok
+keyof (Active _ _ _ (Lin _ []) _ _) = Pass
+keyof (Final _ _ _) = Fin
+-- keyof _ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+prtSizes chart k = "f=" ++ show (length (chartLookup chart k Fin)) ++
+ " p=" ++ show (length (chartLookup chart k Pass)) ++
+ " a=" ++ show (sum [length (chartLookup chart k key) |
+ key@(Act _ _) <- chartKeys chart k ]) ++
+ " t=" ++ show (sum [length (chartLookup chart k key) |
+ key@(ActTok _) <- chartKeys chart k ])
+ -- " u=" ++ show (length (chartLookup chart k Useless))
+
+-- prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+-- prtBefore "\n " (chartLookup chart k) |
+-- k <- chartKeys chart ]
+
+instance (Print c, Print n, Print l, Print t) => Print (Item c n l t) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ "{" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+ -- prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance (Print c, Print l, Print t) => Print (IKey c l t) where
+ prt (Act c l) = "Active " ++ prt c ++ " " ++ prt l
+ prt (ActTok t) = "ActiveTok " ++ prt t
+ -- prt (Pass c l i) = "Passive " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i
+ prt (Fin) = "Final"
+ -- prt (Useless) = "Useless"
diff --git a/src-3.0/GF/Parsing/MCFG/Naive.hs b/src-3.0/GF/Parsing/MCFG/Naive.hs
new file mode 100644
index 000000000..7d1fa0a8a
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/Naive.hs
@@ -0,0 +1,142 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- MCFG parsing, the naive algorithm
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.Naive (parse, parseR) where
+
+import Control.Monad (guard)
+
+-- GF modules
+import GF.Data.GeneralDeduction
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.System.Tracing
+
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- * parsing
+
+-- | Builds a chart from the initial agenda, given by prediction, and the inference rules
+parse :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t
+parse pinfo starts toks
+ = accumAssoc groupSyntaxNodes $
+ [ ((cat, makeRangeRec lins), SNode fun (zip rhs rrecs)) |
+ Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ]
+ where chart = process pinfo toks
+
+-- | Builds a chart from the initial agenda, given by prediction, and the inference rules
+-- parseR :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t
+parseR pinfo starts
+ = accumAssoc groupSyntaxNodes $
+ [ ((cat, makeRangeRec lins), SNode fun (zip rhs rrecs)) |
+ Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ]
+ where chart = processR pinfo
+
+process :: (Ord t, Ord n, Ord c, Ord l) => MCFPInfo c n l t -> Input t -> NChart c n l
+process pinfo toks
+ = tracePrt "MCFG.Naive - chart size" prtSizes $
+ buildChart keyof [convert, combine] (predict pinfo toks)
+
+processR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> NChart c n l
+processR pinfo
+ = tracePrt "MCFG.Naive Range - chart size" prtSizes $
+ buildChart keyof [convert, combine] (predictR pinfo)
+
+
+----------------------------------------------------------------------
+-- * inference rules
+
+-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
+predict :: (Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+predict pinfo toks = tracePrt "MCFG.Naive - predicted rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- rulesMatchingInput pinfo toks
+ lins' <- rangeRestRec toks lins
+ return $ Active (abs, []) lins' []
+
+-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
+predictR :: (Ord l) => MCFPInfo c n l Range -> [Item c n l]
+predictR pinfo = tracePrt "MCFG.Naive Range - predicted rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- allRules pinfo
+ return $ Active (abs, []) lins []
+
+-- | Creates an Active Item every time it is possible to combine
+-- an Active Item from the agenda with a Passive Item from the Chart
+combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
+combine chart item@(Active (Abs _ (c:_) _, _) _ _) =
+ do Passive _c rrec <- chartLookup chart (Pass c)
+ combine2 chart rrec item
+combine chart (Passive c rrec) =
+ do item <- chartLookup chart (Act c)
+ combine2 chart rrec item
+combine _ _ = []
+
+combine2 chart rrec (Active (Abs nt (c:find) f, found) lins rrecs) =
+ do lins' <- substArgRec (length found) rrec lins
+ return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
+
+-- | Active Items with nothing to find are converted to Passive Items
+convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
+convert _ (Active (Abs cat [] fun, _) lins _) = [Passive cat (makeRangeRec lins)]
+convert _ _ = []
+
+
+----------------------------------------------------------------------
+-- * type definitions
+
+type NChart c n l = ParseChart (Item c n l) (NKey c)
+
+data Item c n l = Active (DottedRule c n) (LinRec c l Range) [RangeRec l]
+ | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+type DottedRule c n = (Abstract c n, [c])
+
+data NKey c = Act c
+ | Pass c
+ | Final
+ deriving (Eq, Ord, Show)
+
+keyof :: Item c n l -> NKey c
+keyof (Active (Abs _ (next:_) _, _) _ _) = Act next
+keyof (Passive cat _) = Pass cat
+keyof _ = Final
+
+-- for tracing purposes
+prtSizes chart = "final=" ++ show (length (chartLookup chart Final)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _) <- chartKeys chart ])
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+instance (Print c, Print n, Print l) => Print (Item c n l) where
+ prt (Active (abs, cs) lrec rrecs) = "? " ++ prt abs ++ " . " ++ prtSep " " cs ++ ";\n\t" ++
+ "{" ++ prtSep " " lrec ++ "}" ++
+ ( if null rrecs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrecs) ++ "}" )
+ prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+
+instance Print c => Print (NKey c) where
+ prt (Act c) = "Active " ++ prt c
+ prt (Pass c) = "Passive " ++ prt c
+ prt (Final) = "Final"
+
+
diff --git a/src-3.0/GF/Parsing/MCFG/PInfo.hs b/src-3.0/GF/Parsing/MCFG/PInfo.hs
new file mode 100644
index 000000000..56119dcec
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/PInfo.hs
@@ -0,0 +1,162 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/13 12:40:19 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- MCFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.PInfo where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Parsing.MCFG.Range
+
+----------------------------------------------------------------------
+-- type declarations
+
+-- | the list of categories = possible starting categories
+type MCFParser c n l t = MCFPInfo c n l t
+ -> [c]
+ -> Input t
+ -> SyntaxChart n (c, RangeRec l)
+
+makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
+makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
+
+
+------------------------------------------------------------
+-- parser information
+
+data MCFPInfo c n l t
+ = MCFPInfo { grammarTokens :: SList t
+ , nameRules :: Assoc n (SList (MCFRule c n l t))
+ , topdownRules :: Assoc c (SList (MCFRule c n l t))
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
+ , epsilonRules :: [MCFRule c n l t]
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , leftcornerCats :: Assoc c (SList (MCFRule c n l t))
+ , leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , grammarCats :: SList c
+ -- ^ used when calculating starting categories
+ , rulesByToken :: Assoc t (SList (MCFRule c n l t, SList t))
+ , rulesWithoutTokens :: SList (MCFRule c n l t)
+ -- ^ used by 'rulesMatchingInput'
+ , allRules :: MCFGrammar c n l t
+ -- ^ used by any unoptimized algorithm
+
+ --bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
+ --emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
+ --emptyCategories :: Set c,
+ }
+
+
+rangeRestrictPInfo :: (Ord c, Ord n, Ord l, Ord t) =>
+ MCFPInfo c n l t -> Input t -> MCFPInfo c n l Range
+rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
+ tracePrt "MCFG.PInfo - Restricting the parser information" (prt . grammarTokens)
+ MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
+ , nameRules = rrAssoc (nameRules pinfo)
+ , topdownRules = rrAssoc (topdownRules pinfo)
+ , epsilonRules = rrRules (epsilonRules pinfo)
+ , leftcornerCats = rrAssoc (leftcornerCats pinfo)
+ , leftcornerTokens = lctokens
+ , grammarCats = grammarCats pinfo
+ , rulesByToken = emptyAssoc -- error "MCFG.PInfo.rulesByToken - no range restriction"
+ , rulesWithoutTokens = [] -- error "MCFG.PInfo.rulesByToken - no range restriction"
+ , allRules = allrules -- rrRules (allRules pinfo)
+ }
+
+ where lctokens = accumAssoc id
+ [ (rng, rule) | (tok, rules) <- aAssocs (leftcornerTokens pinfo),
+ inputToken inp ?= tok,
+ rule@(Rule _ (Cnc _ _ (Lin _ (Tok rng:_) : _)))
+ <- concatMap (rangeRestrictRule inp) rules ]
+
+ allrules = rrRules $ rulesMatchingInput pinfo inp
+
+ rrAssoc assoc = filterNull $ fmap rrRules assoc
+ filterNull assoc = assocFilter (not . null) assoc
+ rrRules rules = concatMap (rangeRestrictRule inp) rules
+
+
+buildMCFPInfo :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
+buildMCFPInfo grammar =
+ traceCalcFirst grammar $
+ tracePrt "MCFG.PInfo - parser info" (prt) $
+ MCFPInfo { grammarTokens = grammartokens
+ , nameRules = namerules
+ , topdownRules = topdownrules
+ , epsilonRules = epsilonrules
+ , leftcornerCats = leftcorncats
+ , leftcornerTokens = leftcorntoks
+ , grammarCats = grammarcats
+ , rulesByToken = rulesbytoken
+ , rulesWithoutTokens = ruleswithouttokens
+ , allRules = allrules
+ }
+
+ where allrules = concatMap expandVariants grammar
+ grammartokens = union (map fst ruletokens)
+ namerules = accumAssoc id
+ [ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
+ topdownrules = accumAssoc id
+ [ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
+ epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
+ leftcorncats = accumAssoc id
+ [ (cat, rule) |
+ rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
+ leftcorntoks = accumAssoc id
+ [ (tok, rule) |
+ rule@(Rule _ (Cnc _ _ (Lin _ (Tok tok:_) : _))) <- allrules ]
+ grammarcats = aElems topdownrules
+ ruletokens = [ (toksoflins lins, rule) |
+ rule@(Rule _ (Cnc _ _ lins)) <- allrules ]
+ toksoflins lins = nubsort [ tok | Lin _ syms <- lins, Tok tok <- syms ]
+ rulesbytoken = accumAssoc id
+ [ (tok, (rule, toks)) | (tok:toks, rule) <- ruletokens ]
+ ruleswithouttokens = nubsort [ rule | ([], rule) <- ruletokens ]
+
+
+-- | return only the rules for which all tokens are in the input string
+rulesMatchingInput :: Ord t => MCFPInfo c n l t -> Input t -> [MCFRule c n l t]
+rulesMatchingInput pinfo inp =
+ [ rule | tok <- toks,
+ (rule, ruletoks) <- rulesByToken pinfo ? tok,
+ ruletoks `subset` toks ]
+ ++ rulesWithoutTokens pinfo
+ where toks = aElems (inputToken inp)
+
+
+----------------------------------------------------------------------
+-- pretty-printing of statistics
+
+instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
+ prt pI = "[ tokens=" ++ sl grammarTokens ++
+ "; categories=" ++ sl grammarCats ++
+ "; nameRules=" ++ sla nameRules ++
+ "; tdRules=" ++ sla topdownRules ++
+ "; epsilonRules=" ++ sl epsilonRules ++
+ "; lcCats=" ++ sla leftcornerCats ++
+ "; lcTokens=" ++ sla leftcornerTokens ++
+ "; byToken=" ++ sla rulesByToken ++
+ "; noTokens=" ++ sl rulesWithoutTokens ++
+ "; allRules=" ++ sl allRules ++
+ " ]"
+
+ where sl f = show $ length $ f pI
+ sla f = let (as, bs) = unzip $ aAssocs $ f pI
+ in show (length as) ++ "/" ++ show (length (concat bs))
+
diff --git a/src-3.0/GF/Parsing/MCFG/Range.hs b/src-3.0/GF/Parsing/MCFG/Range.hs
new file mode 100644
index 000000000..91671fa00
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/Range.hs
@@ -0,0 +1,206 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Definitions of ranges, and operations on ranges
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.Range
+ ( Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
+ LinRec, RangeRec,
+ makeRangeRec, rangeRestRec, rangeRestrictRule,
+ projection, unifyRec, substArgRec
+ ) where
+
+
+-- Haskell
+import Data.List
+import Control.Monad
+
+-- GF modules
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+import GF.Infra.Print
+import GF.Data.Assoc ((?))
+import GF.Data.Utilities (updateNthM)
+
+------------------------------------------------------------
+-- ranges as single pairs
+
+data Range = Range (Int, Int)
+ | EmptyRange
+ deriving (Eq, Ord, Show)
+
+makeRange :: (Int, Int) -> Range
+concatRange :: Range -> Range -> [Range]
+rangeEdge :: a -> Range -> Edge a
+edgeRange :: Edge a -> Range
+minRange :: Range -> Int
+maxRange :: Range -> Int
+
+makeRange = Range
+concatRange EmptyRange rng = return rng
+concatRange rng EmptyRange = return rng
+concatRange (Range(i,j)) (Range(j',k)) = [ Range(i,k) | j==j']
+rangeEdge a (Range(i,j)) = Edge i j a
+edgeRange (Edge i j _) = Range (i,j)
+minRange (Range rho) = fst rho
+maxRange (Range rho) = snd rho
+
+instance Print Range where
+ prt (Range (i,j)) = "(" ++ show i ++ "-" ++ show j ++ ")"
+ prt (EmptyRange) = "(?)"
+
+{-- Types --------------------------------------------------------------------
+ Linearization- and Range records implemented as lists
+-----------------------------------------------------------------------------}
+
+type LinRec c l t = [Lin c l t]
+
+type RangeRec l = [(l, Range)]
+
+
+{-- Functions ----------------------------------------------------------------
+ Concatenation : Concatenation of Ranges, Symbols and Linearizations
+ and records of Linearizations
+ Record transformation : Makes a Range record from a fully instantiated
+ Linearization record
+ Record projection : Given a label, returns the corresponding Range
+ Range restriction : Range restriction of Tokens, Symbols,
+ Linearizations and Records given a list of Tokens
+ Record replacment : Substitute a record for another in a list of Range
+ records
+ Argument substitution : Substitution of a Cat c to a Tok Range, where
+ Range is the cover of c
+ Note: The argument is still a Symbol c Range
+ Subsumation : Checks if a Range record subsumes another Range
+ record
+ Record unification : Unification of two Range records
+-----------------------------------------------------------------------------}
+
+
+--- Concatenation ------------------------------------------------------------
+
+
+concSymbols :: [Symbol c Range] -> [[Symbol c Range]]
+concSymbols (Tok rng:Tok rng':toks) = do rng'' <- concatRange rng rng'
+ concSymbols (Tok rng'':toks)
+concSymbols (sym:syms) = do syms' <- concSymbols syms
+ return (sym:syms')
+concSymbols [] = return []
+
+
+concLin :: Lin c l Range -> [Lin c l Range]
+concLin (Lin lbl syms) = do syms' <- concSymbols syms
+ return (Lin lbl syms')
+
+
+concLinRec :: LinRec c l Range -> [LinRec c l Range]
+concLinRec = mapM concLin
+
+
+--- Record transformation ----------------------------------------------------
+
+makeRangeRec :: LinRec c l Range -> RangeRec l
+makeRangeRec lins = map convLin lins
+ where convLin (Lin lbl [Tok rng]) = (lbl, rng)
+ convLin (Lin lbl []) = (lbl, EmptyRange)
+ convLin _ = error "makeRangeRec"
+
+
+--- Record projection --------------------------------------------------------
+
+projection :: Ord l => l -> RangeRec l -> [Range]
+projection l rec = maybe (fail "projection") return $ lookup l rec
+
+
+--- Range restriction --------------------------------------------------------
+
+rangeRestTok :: Ord t => Input t -> t -> [Range]
+rangeRestTok toks tok = do rng <- inputToken toks ? tok
+ return (makeRange rng)
+
+
+rangeRestSym :: Ord t => Input t -> Symbol a t -> [Symbol a Range]
+rangeRestSym toks (Tok tok) = do rng <- rangeRestTok toks tok
+ return (Tok rng)
+rangeRestSym _ (Cat c) = return (Cat c)
+
+
+rangeRestLin :: Ord t => Input t -> Lin c l t -> [Lin c l Range]
+rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms
+ concLin (Lin lbl syms')
+ -- return (Lin lbl syms')
+
+
+rangeRestRec :: Ord t => Input t -> LinRec c l t -> [LinRec c l Range]
+rangeRestRec toks = mapM (rangeRestLin toks)
+
+
+rangeRestrictRule :: Ord t => Input t -> MCFRule c n l t -> [MCFRule c n l Range]
+rangeRestrictRule toks (Rule abs (Cnc l ls lins)) = liftM (Rule abs . Cnc l ls) $
+ rangeRestRec toks lins
+
+--- Argument substitution ----------------------------------------------------
+
+substArgSymbol :: Ord l => Int -> RangeRec l -> Symbol (c, l, Int) Range
+ -> Symbol (c, l, Int) Range
+substArgSymbol i rec tok@(Tok rng) = tok
+substArgSymbol i rec cat@(Cat (c, l, j))
+ | i==j = maybe err Tok $ lookup l rec
+ | otherwise = cat
+ where err = error "substArg: Label not in range-record"
+
+substArgLin :: Ord l => Int -> RangeRec l -> Lin c l Range
+ -> [Lin c l Range]
+substArgLin i rec (Lin lbl syms) =
+ concLin (Lin lbl (map (substArgSymbol i rec) syms))
+
+
+substArgRec :: Ord l => Int -> RangeRec l -> LinRec c l Range
+ -> [LinRec c l Range]
+substArgRec i rec lins = mapM (substArgLin i rec) lins
+
+
+-- Record unification & replacment ---------------------------------------------------------
+
+unifyRec :: Ord l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]]
+unifyRec recs i rec = updateNthM update i recs
+ where update rec' = guard (subsumes rec' rec) >> return rec
+
+-- unifyRec recs i rec = do guard $ subsumes (recs !! i) rec
+-- return $ replaceRec recs i rec
+
+replaceRec :: [RangeRec l] -> Int -> RangeRec l -> [RangeRec l]
+replaceRec recs i rec = before ++ (rec : after)
+ where (before, _ : after) = splitAt i recs
+
+subsumes :: Ord l => RangeRec l -> RangeRec l -> Bool
+subsumes rec rec' = and [r `elem` rec' | r <- rec]
+-- subsumes rec rec' = all (`elem` rec') rec
+
+
+{-
+--- Record unification -------------------------------------------------------
+unifyRangeRecs :: Ord l => [RangeRec l] -> [RangeRec l] -> [[RangeRec l]]
+unifyRangeRecs recs recs' = zipWithM unify recs recs'
+ where unify :: Ord l => RangeRec l -> RangeRec l -> [RangeRec l]
+ unify rec [] = return rec
+ unify [] rec = return rec
+ unify rec1'@(p1@(l1, r1):rec1) rec2'@(p2@(l2, r2):rec2)
+ = case compare l1 l2 of
+ LT -> do rec3 <- unify rec1 rec2'
+ return (p1:rec3)
+ GT -> do rec3 <- unify rec1' rec2
+ return (p2:rec3)
+ EQ -> do guard (r1 == r2)
+ rec3 <- unify rec1 rec2
+ return (p1:rec3)
+-}
diff --git a/src-3.0/GF/Parsing/MCFG/ViaCFG.hs b/src-3.0/GF/Parsing/MCFG/ViaCFG.hs
new file mode 100644
index 000000000..9204ea9f1
--- /dev/null
+++ b/src-3.0/GF/Parsing/MCFG/ViaCFG.hs
@@ -0,0 +1,186 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.4 $
+--
+-- MCFG parsing, through context-free approximation
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.ViaCFG where
+
+
+-- Haskell modules
+import Data.List
+import Control.Monad
+
+-- GF modules
+import ConvertMCFGtoDecoratedCFG
+import qualified DecoratedCFParser as CFP
+import qualified DecoratedGrammar as CFG
+import Examples
+import GF.OldParsing.GeneralChart
+import qualified GF.OldParsing.MCFGrammar as MCFG
+import MCFParser
+import Nondet
+import Parser
+import GF.Parsing.MCFG.Range
+
+
+{-- Datatypes -----------------------------------------------------------------
+Chart
+Item
+Key
+
+
+ Item : Four different Items are used. PreMCFG for MCFG Pre Items, Pre are
+ the Items returned by the pre-Functions and Mark are the
+ corresponding Items for the mark-Functions. For convenience correctly
+ marked Mark Items are converted to Passive Items.
+I use dottedrule for convenience to keep track of wich daughter's RangeRec to look for.
+ AChart: A RedBlackMap with Items and Keys
+ AKey :
+------------------------------------------------------------------------------}
+
+--Ev ta bort några typer av Item och bara nyckla på det som är unikt för den typen...
+data Item n c l = PreMCFG (n, c) (RangeRec l) [RangeRec l]
+ | Pre (n, c) (RangeRec l) [l] [RangeRec l]
+ | Mark (n, c) (RangeRec l) (RangeRec l) [RangeRec l]
+ | Passive (n, c) (RangeRec l) (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+type AChart n c l = ParseChart (Item n c l) (AKey n c l)
+
+data AKey n c l = Pr (n, c) l
+ | Pm (n, c) l
+ | Mk (RangeRec l)
+ | Ps (RangeRec l)
+ | Useless
+ deriving (Eq, Ord, Show)
+
+
+{-- Parsing -------------------------------------------------------------------
+ recognize:
+ parse : The Agenda consists of the Passive Items from context-free
+ approximation (as PreMCFG Items) and the Pre Items inferred by
+ pre-prediction.
+ keyof : Given an Item returns an appropriate Key for the Chart
+------------------------------------------------------------------------------}
+
+recognize strategy mcfg toks = chartMember (parse strategy mcfg toks)
+ (Passive ("f", S)
+ [("s" , MCFG.Range (0, n))]
+ [("p" , MCFG.Range (0, n2)), ("q", MCFG.Range (n2, n))])
+ (Ps [("s" , MCFG.Range (0, n))])
+ where n = length toks
+ n2 = n `div` 2
+
+
+--parse :: (Ord n, Ord NT, Ord String, Eq t) => CFP.Strategy -> MCFG.Grammar n NT String t -> [t]
+-- -> AChart n NT String
+parse strategy mcfg toks
+ = buildChart keyof
+ [preCombine, markPredict, markCombine, convert]
+ (makePreItems (CFP.parse strategy (CFG.pInfo (convertGrammar mcfg)) [(S, "s")] toks) ++
+ (prePredict mcfg))
+
+
+keyof :: Item n c l -> AKey n c l
+keyof (PreMCFG head [(lbl, rng)] _) = Pm head lbl
+keyof (Pre head _ (lbl:lbls) _) = Pr head lbl
+keyof (Mark _ _ _ (rec:recs)) = Mk rec
+keyof (Passive _ rec _) = Ps rec
+keyof _ = Useless
+
+
+{-- Initializing agenda -------------------------------------------------------
+ makePreItems:
+------------------------------------------------------------------------------}
+
+makePreItems :: (Eq c, Ord i) => CFG.Grammar n (Edge (c, l)) i t -> [Item n c l]
+makePreItems cfchart
+ = [ PreMCFG (fun, cat) [(lbl, MCFG.makeRange (i, j))] (symToRec beta) |
+ CFG.Rule (Edge i j (cat,lbl)) beta fun <- cfchart ]
+
+
+prePredict :: (Ord n, Ord c, Ord l) => MCFG.Grammar n c l t -> [Item n c l]
+prePredict mcfg =
+ [ Pre (f, nt) [] (getLables lins) (replicate (nrOfCats (head lins)) []) |
+ MCFG.Rule nt nts lins f <- mcfg ]
+
+
+{-- Inference rules ---------------------------------------------------------
+ prePredict :
+ preCombine :
+ markPredict:
+ markCombine:
+ convert :
+----------------------------------------------------------------------------}
+
+preCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
+ -> Item n c l -> [Item n c l]
+preCombine chart (Pre head rec (l:ls) recs) =
+ [ Pre head (rec ++ [(l, r)]) ls recs'' |
+ PreMCFG head [(l, r)] recs' <- chartLookup chart (Pm head l),
+ recs'' <- solutions (unifyRangeRecs recs recs') ]
+preCombine chart (PreMCFG head [(l, r)] recs) =
+ [ Pre head (rec ++ [(l, r)]) ls recs'' |
+ Pre head rec (l:ls) recs' <- chartLookup chart (Pr head l),
+ recs'' <- solutions (unifyRangeRecs recs recs') ]
+preCombine _ _ = []
+
+
+markPredict :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
+ -> Item n c l -> [Item n c l]
+markPredict _ (Pre (n, c) rec [] recs) = [Mark (n, c) rec [] recs]
+markPredict _ _ = []
+
+
+markCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
+ -> Item n c l -> [Item n c l]
+markCombine chart (Mark (f, c) rec mRec (r:recs)) =
+ [ Mark (f, c) rec (mRec ++ r) recs |
+ Passive _ r _ <- chartLookup chart (Ps r)]
+markCombine chart (Passive _ r _) =
+ [ Mark (f, c) rec (mRec++r) recs |
+ Mark (f, c) rec mRec (r:recs) <- chartLookup chart (Mk r) ]
+markCombine _ _ = []
+
+
+convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
+ -> Item n c l -> [Item n c l]
+convert _ (Mark (f, c) r rec []) = [Passive (f, c) r rec]
+convert _ _ = []
+
+
+{-- Help functions ----------------------------------------------------------------
+ getRHS :
+ getLables:
+ symToRec :
+----------------------------------------------------------------------------------}
+
+-- FULKOD !
+nrOfCats :: Eq c => MCFG.Lin c l t -> Int
+nrOfCats (MCFG.Lin l syms) = length $ nub [(c, i) | Cat (c, l, i) <- syms]
+
+
+--
+getLables :: LinRec c l t -> [l]
+getLables lins = [l | MCFG.Lin l syms <- lins]
+
+
+--
+symToRec :: Ord i => [Symbol (Edge (c, l), i) d] -> [[(l, MCFG.Range)]]
+symToRec beta = map makeLblRng $ groupBy (\(_, d) (_, d') -> (d == d'))
+ $ sortBy sBd [(Edge i j (c, l) , d) | Cat (Edge i j (c, l), d)
+ <- beta]
+ where makeLblRng edges = [(l, (MCFG.makeRange (i, j))) | (Edge i j (_, l), _)
+ <- edges]
+ sBd (_, d) (_, d')
+ | d < d' = LT
+ | d > d' = GT
+ | otherwise = EQ