summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GF/Parsing/MCFG.hs22
-rw-r--r--src/GF/Parsing/MCFG/Active.hs36
-rw-r--r--src/GF/Parsing/MCFG/FastActive.hs175
-rw-r--r--src/GF/Parsing/MCFG/PInfo.hs11
4 files changed, 214 insertions, 30 deletions
diff --git a/src/GF/Parsing/MCFG.hs b/src/GF/Parsing/MCFG.hs
index 6aec811de..bda3af675 100644
--- a/src/GF/Parsing/MCFG.hs
+++ b/src/GF/Parsing/MCFG.hs
@@ -23,9 +23,10 @@ import GF.Parsing.MCFG.PInfo
import qualified GF.Parsing.MCFG.Naive as Naive
import qualified GF.Parsing.MCFG.Active as Active
-import qualified GF.Parsing.MCFG.Active2 as Active2
+import qualified GF.Parsing.MCFG.FastActive as FastActive
+-- import qualified GF.Parsing.MCFG.Active2 as Active2
import qualified GF.Parsing.MCFG.Incremental as Incremental
-import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
+-- import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
----------------------------------------------------------------------
-- parsing
@@ -35,13 +36,13 @@ parseMCF prs | prs `elem` strategies = Ok $ parseMCF' prs
| otherwise = Bad $ "MCFG parsing strategy not defined: " ++ prs
-strategies = words "bottomup topdown n an ab at i an2 ab2 at2 i2 rn ran rab rat ri"
+strategies = words "bottomup topdown n an ab at i rn ran rab rat ri ft fb"
parseMCF' :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
-parseMCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
-parseMCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
+parseMCF' "bottomup" pinfo starts toks = parseMCF' "fb" pinfo starts toks
+parseMCF' "topdown" pinfo starts toks = parseMCF' "ft" pinfo starts toks
parseMCF' "n" pinfo starts toks = Naive.parse pinfo starts toks
parseMCF' "an" pinfo starts toks = Active.parse "n" pinfo starts toks
@@ -49,10 +50,10 @@ parseMCF' "ab" pinfo starts toks = Active.parse "b" pinfo starts toks
parseMCF' "at" pinfo starts toks = Active.parse "t" pinfo starts toks
parseMCF' "i" pinfo starts toks = Incremental.parse pinfo starts toks
-parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
-parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
-parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
-parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
+-- parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
+-- parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
+-- parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
+-- parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
parseMCF' "rn" pinfo starts toks = Naive.parseR (rrP pinfo toks) starts
parseMCF' "ran" pinfo starts toks = Active.parseR "n" (rrP pinfo toks) starts
@@ -61,4 +62,7 @@ parseMCF' "rat" pinfo starts toks = Active.parseR "t" (rrP pinfo toks) starts
parseMCF' "ri" pinfo starts toks = Incremental.parseR (rrP pinfo toks) starts ntoks
where ntoks = snd (inputBounds toks)
+parseMCF' "fb" pinfo starts toks = FastActive.parse "b" (rrP pinfo toks) starts
+parseMCF' "ft" pinfo starts toks = FastActive.parse "t" (rrP pinfo toks) starts
+
rrP pi = rangeRestrictPInfo pi
diff --git a/src/GF/Parsing/MCFG/Active.hs b/src/GF/Parsing/MCFG/Active.hs
index a422f7e10..5ccd43398 100644
--- a/src/GF/Parsing/MCFG/Active.hs
+++ b/src/GF/Parsing/MCFG/Active.hs
@@ -57,7 +57,7 @@ process strategy pinfo starts toks
| isBU strategy = [scan, predictKilbury pinfo toks]
| isTD strategy = [scan, predictEarley pinfo toks]
axioms | isNil strategy = predict pinfo toks
- | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ | isBU strategy = {- terminal pinfo toks ++ -} initialScan pinfo toks
| isTD strategy = initial pinfo starts toks
--processR :: (Ord n, Ord c, Ord l) =>
@@ -70,7 +70,7 @@ processR strategy pinfo starts
| isBU strategy = [scan, predictKilburyR pinfo]
| isTD strategy = [scan, predictEarleyR pinfo]
axioms | isNil strategy = predictR pinfo
- | isBU strategy = terminalR pinfo ++ initialScanR pinfo
+ | isBU strategy = {- terminalR pinfo ++ -} initialScanR pinfo
| isTD strategy = initialR pinfo starts
isNil s = s=="n"
@@ -200,18 +200,20 @@ predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) =
----------------------------------------------------------------------
-- Kilbury --
-terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
-terminal pinfo toks =
- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
- lins' <- rangeRestRec toks lins
- return $ Final abs (makeRangeRec lins') []
+-- terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+-- terminal pinfo toks =
+-- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
+-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+-- lins' <- rangeRestRec toks lins
+-- return $ Final abs (makeRangeRec lins') []
initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
initialScan pinfo toks =
- tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
+ tracePrt "MCFG.Active (Kilbury) - initial scanned rules + epsilon rules" (prt . length) $
do tok <- aElems (inputToken toks)
- Rule abs (Cnc _ _ lins) <- leftcornerTokens pinfo ? tok
+ Rule abs (Cnc _ _ lins) <-
+ leftcornerTokens pinfo ? tok ++
+ epsilonRules pinfo
lin' : lins' <- rangeRestRec toks lins
return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
@@ -230,16 +232,18 @@ predictKilbury _ _ _ _ = []
----------------------------------------------------------------------
-- KilburyR --
-terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
-terminalR pinfo =
- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
- return $ Final abs (makeRangeRec lins) []
+-- terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+-- terminalR pinfo =
+-- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
+-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+-- return $ Final abs (makeRangeRec lins) []
initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
initialScanR pinfo =
tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $
- do Rule abs (Cnc _ _ (lin : lins)) <- concatMap snd (aAssocs (leftcornerTokens pinfo))
+ do Rule abs (Cnc _ _ (lin : lins)) <-
+ concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
+ epsilonRules pinfo
return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range
diff --git a/src/GF/Parsing/MCFG/FastActive.hs b/src/GF/Parsing/MCFG/FastActive.hs
new file mode 100644
index 000000000..de33172d2
--- /dev/null
+++ b/src/GF/Parsing/MCFG/FastActive.hs
@@ -0,0 +1,175 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Peter Ljunglöf
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- MCFG parsing, the active algorithm, optimized version
+-- structure stolen from Krasimir Angelov's GF.Parsing.FCFG.Active
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFG.FastActive (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+import GF.Data.Utilities
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Infra.Ident
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+
+import Control.Monad (guard)
+
+import GF.Infra.Print
+
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Data.Array
+
+----------------------------------------------------------------------
+-- * parsing
+
+-- parse :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
+parse strategy pinfo starts =
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final (Abs cat rhs fun) found rrecs <- listXChartFinal chart ]
+ where chart = process strategy pinfo axioms emptyXChart
+
+ -- axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ axioms | isBU strategy = initialBU pinfo
+ | isTD strategy = initialTD pinfo starts
+
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: Abstract c n -> [RangeRec l]
+emptyChildren (Abs _ rhs _) = replicate (length rhs) []
+
+updateChildren :: Eq l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]]
+updateChildren recs i rec = updateNthM update i recs
+ where update rec' = do guard (null rec' || rec' == rec)
+ return rec
+
+process :: (Ord c, Ord n, Ord l) => String -> MCFPInfo c n l Range -> [Item c n l] -> XChart c n l -> XChart c n l
+process strategy pinfo [] chart = chart
+process strategy pinfo (item:items) chart = process strategy pinfo items $! univRule item chart
+ where
+ univRule item@(Active abs found rng (Lin l syms) lins recs) chart
+ = case syms of
+ Cat(c,r,d) : syms' ->
+ case insertXChart chart item c of
+ Nothing -> chart
+ Just chart ->
+ let items = -- predict topdown
+ [ Active abs [] EmptyRange lin lins (emptyChildren abs) |
+ isTD strategy,
+ Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? c ] ++
+
+ -- combine
+ [ Active abs found rng'' (Lin l syms') lins recs' |
+ Final _ found' _ <- lookupXChartFinal chart c,
+ rng' <- projection r found',
+ rng'' <- concatRange rng rng',
+ recs' <- updateChildren recs d found' ]
+ in process strategy pinfo items chart
+
+ -- scan
+ Tok rng' : syms' ->
+ let items = [ Active abs found rng'' (Lin l syms') lins recs |
+ rng'' <- concatRange rng rng' ]
+ in process strategy pinfo items chart
+
+ -- complete
+ [] -> case lins of
+ (lin':lins') -> univRule (Active abs ((l,rng):found) EmptyRange lin' lins' recs) chart
+ [] -> univRule (Final abs (reverse ((l,rng):found)) recs) chart
+
+ univRule item@(Final abs@(Abs cat _ _) found' recs) chart =
+ case insertXChart chart item cat of
+ Nothing -> chart
+ Just chart ->
+ let items = -- predict bottomup
+ [ Active abs [] rng (Lin l syms') lins children |
+ isBU strategy,
+ Rule abs (Cnc _ _ (Lin l (Cat(c,r,d):syms') : lins)) <- leftcornerCats pinfo ? cat,
+ -- lin' : lins' <- rangeRestRec toks (Lin l syms' : lins),
+ rng <- projection r found',
+ children <- unifyRec (emptyChildren abs) d found' ] ++
+
+ -- combine
+ [ Active abs found rng'' (Lin l syms') lins recs' |
+ Active abs found rng (Lin l (Cat(c,r,d):syms')) lins recs <- lookupXChartAct chart cat,
+ rng' <- projection r found',
+ rng'' <- concatRange rng rng',
+ recs' <- updateChildren recs d found' ]
+ in process strategy pinfo items chart
+
+----------------------------------------------------------------------
+-- * XChart
+
+data XChart c n l = XChart !(AChart c n l) !(AChart c n l)
+type AChart c n l = ParseChart (Item c n l) c
+
+data Item c n l = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l Range)
+ (LinRec c l Range)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+-- | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+emptyXChart :: (Ord c, Ord n, Ord l) => XChart c n l
+emptyXChart = XChart emptyChart emptyChart
+
+insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
+ case chartInsert actives item c of
+ Nothing -> Nothing
+ Just actives -> Just (XChart actives finals)
+
+insertXChart (XChart actives finals) item@(Final _ _ _) c =
+ case chartInsert finals item c of
+ Nothing -> Nothing
+ Just finals -> Just (XChart actives finals)
+
+lookupXChartAct (XChart actives finals) c = chartLookup actives c
+lookupXChartFinal (XChart actives finals) c = chartLookup finals c
+
+listXChartAct (XChart actives finals) = chartList actives
+listXChartFinal (XChart actives finals) = chartList finals
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- called with all starting categories
+initialTD :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l]
+initialTD pinfo starts =
+ [ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) |
+ cat <- starts,
+ Rule abs (Cnc _ _ (lin':lins')) <- topdownRules pinfo ? cat ]
+ -- lin' : lins' <- rangeRestRec toks lins
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+initialBU :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+initialBU pinfo =
+ [ Active abs [] EmptyRange lin' lins' (emptyChildren abs) |
+ -- do tok <- aElems (inputToken toks)
+ Rule abs (Cnc _ _ (lin':lins')) <-
+ concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
+ -- leftcornerTokens pinfo ? tok ++
+ epsilonRules pinfo ]
+ -- lin' : lins' <- rangeRestRec toks lins
diff --git a/src/GF/Parsing/MCFG/PInfo.hs b/src/GF/Parsing/MCFG/PInfo.hs
index 4fbe3e736..5a61a4edf 100644
--- a/src/GF/Parsing/MCFG/PInfo.hs
+++ b/src/GF/Parsing/MCFG/PInfo.hs
@@ -46,7 +46,8 @@ data MCFPInfo c n l t
, nameRules :: Assoc n (SList (MCFRule c n l t))
, topdownRules :: Assoc c (SList (MCFRule c n l t))
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
- , emptyRules :: [MCFRule c n l t]
+ , epsilonRules :: [MCFRule c n l t]
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc c (SList (MCFRule c n l t))
, leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
@@ -71,7 +72,7 @@ rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
, nameRules = rrAssoc (nameRules pinfo)
, topdownRules = rrAssoc (topdownRules pinfo)
- , emptyRules = rrRules (emptyRules pinfo)
+ , epsilonRules = rrRules (epsilonRules pinfo)
, leftcornerCats = rrAssoc (leftcornerCats pinfo)
, leftcornerTokens = lctokens
, grammarCats = grammarCats pinfo
@@ -100,7 +101,7 @@ buildMCFPInfo grammar =
MCFPInfo { grammarTokens = grammartokens
, nameRules = namerules
, topdownRules = topdownrules
- , emptyRules = emptyrules
+ , epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
@@ -115,7 +116,7 @@ buildMCFPInfo grammar =
[ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
topdownrules = accumAssoc id
[ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
- emptyrules = [ rule | rule@(Rule (Abs _ [] _) _) <- allrules ]
+ epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
leftcorncats = accumAssoc id
[ (cat, rule) |
rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
@@ -149,7 +150,7 @@ instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
"; categories=" ++ sl grammarCats ++
"; nameRules=" ++ sla nameRules ++
"; tdRules=" ++ sla topdownRules ++
- "; emptyRules=" ++ sl emptyRules ++
+ "; epsilonRules=" ++ sl epsilonRules ++
"; lcCats=" ++ sla leftcornerCats ++
"; lcTokens=" ++ sla leftcornerTokens ++
"; byToken=" ++ sla rulesByToken ++