summaryrefslogtreecommitdiff
path: root/src/GF/Parsing
diff options
context:
space:
mode:
authorpeb <unknown>2005-05-09 08:25:56 +0000
committerpeb <unknown>2005-05-09 08:25:56 +0000
commit2b059b811db03a53e8e0f8ec1a655e507851a995 (patch)
tree467ad9a1849bf454b22d5b2a457d09f8247041e6 /src/GF/Parsing
parent01696e4f86fa156d079f2febaf103fbe229ffdb1 (diff)
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing')
-rw-r--r--src/GF/Parsing/CFG/PInfo.hs27
-rw-r--r--src/GF/Parsing/GFC.hs26
-rw-r--r--src/GF/Parsing/MCFG.hs41
-rw-r--r--src/GF/Parsing/MCFG/Active.hs312
-rw-r--r--src/GF/Parsing/MCFG/Active2.hs226
-rw-r--r--src/GF/Parsing/MCFG/Incremental.hs234
-rw-r--r--src/GF/Parsing/MCFG/Incremental2.hs144
-rw-r--r--src/GF/Parsing/MCFG/Naive.hs110
-rw-r--r--src/GF/Parsing/MCFG/PInfo.hs135
-rw-r--r--src/GF/Parsing/MCFG/Range.hs65
10 files changed, 1023 insertions, 297 deletions
diff --git a/src/GF/Parsing/CFG/PInfo.hs b/src/GF/Parsing/CFG/PInfo.hs
index 81d8d3724..f877b225e 100644
--- a/src/GF/Parsing/CFG/PInfo.hs
+++ b/src/GF/Parsing/CFG/PInfo.hs
@@ -4,9 +4,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/04/21 16:23:10 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.4 $
+-- > CVS $Date: 2005/05/09 09:28:45 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
--
-- CFG parsing, parser information
-----------------------------------------------------------------------------
@@ -47,7 +47,7 @@ data CFPInfo c n t
-- ^ DOES NOT WORK WITH EMPTY RULES!!!
}
-buildCFPInfo :: (Ord n, Ord c, Ord t) => CFGrammar c n t -> CFPInfo c n t
+buildCFPInfo :: (Ord c, Ord n, Ord t) => CFGrammar c n t -> CFPInfo c n t
-- this is not permanent...
buildCFPInfo grammar = traceCalcFirst grammar $
@@ -82,16 +82,17 @@ isCyclic _ = False
----------------------------------------------------------------------
+-- pretty-printing of statistics
-instance (Ord n, Ord c, Ord t) => Print (CFPInfo n c t) where
- prt pI = "[ nr. tokens=" ++ sl grammarTokens ++
- "; nr. names=" ++ sla nameRules ++
- "; nr. tdCats=" ++ sla topdownRules ++
- "; nr. buCats=" ++ sla bottomupRules ++
- "; nr. elcCats=" ++ sla emptyLeftcornerRules ++
- "; nr. eCats=" ++ sla emptyCategories ++
- "; nr. cCats=" ++ sl cyclicCategories ++
- "; nr. lctokCats=" ++ sla leftcornerTokens ++
+instance (Ord c, Ord n, Ord t) => Print (CFPInfo c n t) where
+ prt pI = "[ tokens=" ++ sl grammarTokens ++
+ "; names=" ++ sla nameRules ++
+ "; tdCats=" ++ sla topdownRules ++
+ "; buCats=" ++ sla bottomupRules ++
+ "; elcCats=" ++ sla emptyLeftcornerRules ++
+ "; eCats=" ++ sla emptyCategories ++
+ -- "; cCats=" ++ sl cyclicCategories ++
+ -- "; lctokCats=" ++ sla leftcornerTokens ++
" ]"
where sla f = show $ length $ aElems $ f pI
sl f = show $ length $ f pI
diff --git a/src/GF/Parsing/GFC.hs b/src/GF/Parsing/GFC.hs
index 7f54186a7..5476b8e8b 100644
--- a/src/GF/Parsing/GFC.hs
+++ b/src/GF/Parsing/GFC.hs
@@ -4,9 +4,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/04/21 16:23:06 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.6 $
+-- > CVS $Date: 2005/05/09 09:28:45 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.7 $
--
-- The main parsing module, parsing GFC grammars
-- by translating to simpler formats, such as PMCFG and CFG
@@ -45,13 +45,15 @@ import qualified GF.Parsing.CFG as PC
data PInfo = PInfo { mcfPInfo :: MCFPInfo,
cfPInfo :: CFPInfo }
-type MCFPInfo = MGrammar
+type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
type CFPInfo = PC.CFPInfo CCat Name Token
buildPInfo :: MGrammar -> CGrammar -> PInfo
-buildPInfo mcfg cfg = PInfo { mcfPInfo = mcfg,
+buildPInfo mcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg,
cfPInfo = PC.buildCFPInfo cfg }
+instance Print PInfo where
+ prt (PInfo m c) = prt m ++ "\n" ++ prt c
----------------------------------------------------------------------
-- main parsing function
@@ -67,8 +69,9 @@ parse (prs:strategy) pinfo abs startCat inString =
do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
inputMany (map wordsCFTok inString)
forests <- selectParser prs strategy pinfo startCat inTokens
- traceM "Parsing.GFC - nr. forests" (prt (length forests))
- let filteredForests = tracePrt "Parsing.GFC - nr. filtered forests" (prt . length) $
+ traceM "Parsing.GFC - nr. unfiltered forests" (prt (length forests))
+ traceM "Parsing.GFC - nr. unfiltered trees" (prt (length (forests >>= forest2trees)))
+ let filteredForests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
forests >>= applyProfileToForest
-- compactFs = tracePrt "#compactForests" (prt . length) $
-- tracePrt "compactForests" (prtBefore "\n") $
@@ -100,13 +103,12 @@ selectParser prs strategy pinfo startCat inTokens | prs=='c'
-- parsing via MCFG
selectParser prs strategy pinfo startCat inTokens | prs=='m'
= do let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $
- filter isStart $ nubsort [ c | G.Rule (G.Abs c _ _) _ <- mcfpi ]
+ filter isStart $ PM.grammarCats mcfpi
isStart cat = mcat2scat cat == cfCat2Ident startCat
mcfpi = mcfPInfo pinfo
- mcfParser <- PM.parseMCF strategy
- let mcfChart = tracePrt "Parsing.GFC - sz. MCF chart" (prt . length) $
- mcfParser mcfpi startCats inTokens
- chart = tracePrt "Parsing.GFC - sz. chart" (prt . map (length.snd) . aAssocs) $
+ mcfChart <- PM.parseMCF strategy mcfpi startCats inTokens
+ traceM "Parsing.GFC - sz. MCF chart" (prt (length mcfChart))
+ let chart = tracePrt "Parsing.GFC - sz. chart" (prt . length . concat . map snd . aAssocs) $
G.abstract2chart mcfChart
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
diff --git a/src/GF/Parsing/MCFG.hs b/src/GF/Parsing/MCFG.hs
index 11c845365..4cfc6e2ec 100644
--- a/src/GF/Parsing/MCFG.hs
+++ b/src/GF/Parsing/MCFG.hs
@@ -4,9 +4,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/04/21 16:23:07 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.3 $
+-- > CVS $Date: 2005/05/09 09:28:45 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.4 $
--
-- MCFG parsing
-----------------------------------------------------------------------------
@@ -23,20 +23,37 @@ import GF.Parsing.MCFG.PInfo
import qualified GF.Parsing.MCFG.Naive as Naive
import qualified GF.Parsing.MCFG.Active as Active
-import qualified GF.Parsing.MCFG.Range as Range (makeRange)
+import qualified GF.Parsing.MCFG.Active2 as Active2
+import qualified GF.Parsing.MCFG.Incremental as Incremental
+import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
----------------------------------------------------------------------
-- parsing
-parseMCF :: (Ord c, Ord n, Ord l, Ord t) => String -> Err (MCFParser c n l t)
-parseMCF "n" = Ok $ Naive.parse
-parseMCF "an" = Ok $ Active.parse "n"
-parseMCF "ab" = Ok $ Active.parse "b"
-parseMCF "at" = Ok $ Active.parse "t"
+-- parseMCF :: (Ord c, Ord n, Ord l, Ord t) => String -> Err (MCFParser c n l t)
+
+parseMCF "n" pinfo starts toks = Ok $ Naive.parse pinfo starts toks
+parseMCF "an" pinfo starts toks = Ok $ Active.parse "n" pinfo starts toks
+parseMCF "ab" pinfo starts toks = Ok $ Active.parse "b" pinfo starts toks
+parseMCF "at" pinfo starts toks = Ok $ Active.parse "t" pinfo starts toks
+parseMCF "i" pinfo starts toks = Ok $ Incremental.parse pinfo starts toks
+
+parseMCF "an2" pinfo starts toks = Ok $ Active2.parse "n" pinfo starts toks
+parseMCF "ab2" pinfo starts toks = Ok $ Active2.parse "b" pinfo starts toks
+parseMCF "at2" pinfo starts toks = Ok $ Active2.parse "t" pinfo starts toks
+parseMCF "i2" pinfo starts toks = Ok $ Incremental2.parse pinfo starts toks
+
+parseMCF "rn" pinfo starts toks = Ok $ Naive.parseR (rrP pinfo toks) starts
+parseMCF "ran" pinfo starts toks = Ok $ Active.parseR "n" (rrP pinfo toks) starts
+parseMCF "rab" pinfo starts toks = Ok $ Active.parseR "b" (rrP pinfo toks) starts
+parseMCF "rat" pinfo starts toks = Ok $ Active.parseR "t" (rrP pinfo toks) starts
+parseMCF "ri" pinfo starts toks = Ok $ Incremental.parseR (rrP pinfo toks) starts ntoks
+ where ntoks = snd (inputBounds toks)
+
-- default parsers:
-parseMCF "a" = parseMCF "an"
+parseMCF "" pinfo starts toks = parseMCF "n" pinfo starts toks
-- error parser:
-parseMCF prs = Bad $ "Parser not defined: " ++ prs
-
+parseMCF prs pinfo starts toks = Bad $ "Parser not defined: " ++ prs
+rrP pi = rangeRestrictPInfo pi
diff --git a/src/GF/Parsing/MCFG/Active.hs b/src/GF/Parsing/MCFG/Active.hs
index 44661b0c9..cb1440e24 100644
--- a/src/GF/Parsing/MCFG/Active.hs
+++ b/src/GF/Parsing/MCFG/Active.hs
@@ -1,81 +1,76 @@
-module GF.Parsing.MCFG.Active (parse) where
+module GF.Parsing.MCFG.Active (parse, parseR) where
import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Formalism.Utilities
+
import GF.Parsing.MCFG.Range
import GF.Parsing.MCFG.PInfo
+
import GF.System.Tracing
+
import Control.Monad (guard)
+import GF.Infra.Print
+
----------------------------------------------------------------------
-- * parsing
-parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
-parse strategy mcfg starts toks
- = [ Abs (cat, found) (zip rhs rrecs) fun |
- Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
- where chart = process strategy mcfg starts toks
+--parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
+parse strategy pinfo starts toks =
+ trace2 "MCFG.Active - strategy" (if isBU strategy then "BU"
+ else if isTD strategy then "TD" else "None") $
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = process strategy pinfo starts toks
+
+--parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
+parseR strategy pinfo starts =
+ trace2 "MCFG.Active Range - strategy" (if isBU strategy then "BU"
+ else if isTD strategy then "TD" else "None") $
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = processR strategy pinfo starts
process :: (Ord n, Ord c, Ord l, Ord t) =>
- String -> MCFGrammar c n l t -> [c] -> Input t -> AChart c n l
-process strategy mcfg starts toks
- = trace2 "MCFG.Active - strategy" (if isBU strategy then "BU"
- else if isTD strategy then "TD" else "None") $
- tracePrt "MCFG.Active - chart size" prtSizes $
+ String -> MCFPInfo c n l t -> [c] -> Input t -> AChart c n l
+process strategy pinfo starts toks
+ = tracePrt "MCFG.Active - chart size" prtSizes $
+ buildChart keyof (complete : combine : convert : rules) axioms
+ where rules | isNil strategy = [scan]
+ | isBU strategy = [scan, predictKilbury pinfo toks]
+ | isTD strategy = [scan, predictEarley pinfo toks]
+ axioms | isNil strategy = predict pinfo toks
+ | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ | isTD strategy = initial pinfo starts toks
+
+--processR :: (Ord n, Ord c, Ord l) =>
+-- String -> MCFPInfo c n l Range -> [c] -> AChart c n l
+processR strategy pinfo starts
+ = tracePrt "MCFG.Active Range - chart size" prtSizes $
+ -- tracePrt "MCFG.Active Range - final chart" prtChart $
buildChart keyof (complete : combine : convert : rules) axioms
where rules | isNil strategy = [scan]
- | isBU strategy = [predictKilbury mcfg toks]
- | isTD strategy = [predictEarley mcfg toks]
- axioms | isNil strategy = predict mcfg toks
- | isBU strategy = terminal mcfg toks
- | isTD strategy = initial mcfg starts toks
+ | isBU strategy = [scan, predictKilburyR pinfo]
+ | isTD strategy = [scan, predictEarleyR pinfo]
+ axioms | isNil strategy = predictR pinfo
+ | isBU strategy = terminalR pinfo ++ initialScanR pinfo
+ | isTD strategy = initialR pinfo starts
isNil s = s=="n"
isBU s = s=="b"
isTD s = s=="t"
-----------------------------------------------------------------------
--- * type definitions
-
-type AChart c n l = ParseChart (Item c n l) (AKey c)
-
-data Item c n l = Active (Abstract c n)
- (RangeRec l)
- Range
- (Lin c l Range)
- (LinRec c l Range)
- [RangeRec l]
- | Final (Abstract c n) (RangeRec l) [RangeRec l]
- | Passive c (RangeRec l)
- deriving (Eq, Ord, Show)
-
-data AKey c = Act c
- | Pass c
- | Useless
- | Fin
- deriving (Eq, Ord, Show)
-
-
-keyof :: Item c n l -> AKey c
-keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
-keyof (Final _ _ _) = Fin
-keyof (Passive cat _) = Pass cat
-keyof _ = Useless
-
--- to be used in prediction
+-- used in prediction
emptyChildren :: Abstract c n -> [RangeRec l]
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
--- for tracing purposes
-prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
- ", passive=" ++ show (sum [length (chartLookup chart k) |
- k@(Pass _) <- chartKeys chart ]) ++
- ", active=" ++ show (sum [length (chartLookup chart k) |
- k@(Act _) <- chartKeys chart ]) ++
- ", useless=" ++ show (length (chartLookup chart Useless))
+makeMaxRange (Range (_, j)) = Range (j, j)
+makeMaxRange EmptyRange = EmptyRange
----------------------------------------------------------------------
@@ -97,21 +92,20 @@ scan _ _ = []
-- | Creates an Active Item every time it is possible to combine
-- an Active Item from the agenda with a Passive Item from the Chart
combine :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
-combine chart (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
- do Passive _c found' <- chartLookup chart (Pass c)
- rng' <- projection r found'
- rng'' <- concatRange rng rng'
- guard $ subsumes (recs !! d) found'
- return $ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found')
+combine chart item@(Active _ _ _ (Lin _ (Cat (c,_,_):_)) _ _) =
+ do Passive _c found <- chartLookup chart (Pass c)
+ combine2 chart found item
combine chart (Passive c found) =
- do Active rule found' rng' (Lin l ((Cat (_c, r, d)):syms)) lins recs'
- <- chartLookup chart (Act c)
- rng'' <- projection r found
- rng <- concatRange rng' rng''
- guard $ subsumes (recs' !! d) found
- return $ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found)
+ do item <- chartLookup chart (Act c)
+ combine2 chart found item
combine _ _ = []
+combine2 chart found' (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
+ do rng' <- projection r found'
+ rng'' <- concatRange rng rng'
+ recs' <- unifyRec recs d found'
+ return $ Active rule found rng'' (Lin l syms) lins recs'
+
-- | Active Items with nothing to find are converted to Final items,
-- which in turn are converted to Passive Items
convert :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
@@ -121,66 +115,190 @@ convert _ (Final (Abs cat _ _) found _) =
return $ Passive cat found
convert _ _ = []
+
----------------------------------------------------------------------
-- Naive --
--- | Creates an Active Item of every Rule in the Grammar to give the initial Agenda
-predict :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l]
-predict grammar toks =
- do Rule abs (Cnc _ _ lins) <- grammar
- (lin':lins') <- rangeRestRec toks lins
- return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+predict :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+predict pinfo toks = tracePrt "MCFG.Active (Naive) - predicted rules" (prt . length) $
+ do (Rule abs (Cnc _ _ lins)) <- rulesMatchingInput pinfo toks
+ (lin':lins') <- rangeRestRec toks lins
+ return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- NaiveR --
+
+predictR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+predictR pinfo = tracePrt "MCFG.Active (Naive Range) - predicted rules" (prt . length) $
+ do (Rule abs (Cnc _ _ (lin:lins))) <- allRules pinfo
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
----------------------------------------------------------------------
-- Earley --
-- anropas med alla startkategorier
-initial :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> [c] -> Input t -> [Item c n l]
-initial mcfg starts toks =
- do Rule abs@(Abs cat _ _) (Cnc _ _ lins) <- mcfg
- guard $ cat `elem` starts
+initial :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> [c] -> Input t -> [Item c n l]
+initial pinfo starts toks =
+ tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
+ do cat <- starts
+ Rule abs (Cnc _ _ lins) <- topdownRules pinfo ? cat
lin' : lins' <- rangeRestRec toks lins
return $ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs)
--- earley prediction
-predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t
+predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
-> AChart c n l -> Item c n l -> [Item c n l]
-predictEarley mcfg toks _ (Active _ _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
- do rule@(Rule (Abs cat' _ _) _) <- mcfg
- guard $ cat == cat'
- predEar toks rng rule
+predictEarley pinfo toks _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
+ topdownRules pinfo ? cat >>= predictEarley2 toks rng
predictEarley _ _ _ _ = []
-predEar :: (Ord c, Ord n, Ord l, Ord t) =>
- Input t -> Range -> MCFRule c n l t -> [Item c n l]
-predEar toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
+predictEarley2 :: (Ord c, Ord n, Ord l, Ord t) => Input t -> Range -> MCFRule c n l t -> [Item c n l]
+predictEarley2 toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
do lins' <- rangeRestRec toks lins
return $ Final abs (makeRangeRec lins') []
-predEar toks rng (Rule abs (Cnc _ _ lins)) =
+predictEarley2 toks rng (Rule abs (Cnc _ _ lins)) =
do lin' : lins' <- rangeRestRec toks lins
- return $ Active abs [] (makeMaxRange rng) lin' lins' (emptyChildren abs)
+ return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Earley Range --
+
+initialR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l]
+initialR pinfo starts =
+ tracePrt "MCFG.Active (Earley Range) - initial rules" (prt . length) $
+ do cat <- starts
+ Rule abs (Cnc _ _ (lin : lins)) <- topdownRules pinfo ? cat
+ return $ Active abs [] (Range (0, 0)) lin lins (emptyChildren abs)
+
+predictEarleyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range
+ -> AChart c n l -> Item c n l -> [Item c n l]
+predictEarleyR pinfo _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
+ topdownRules pinfo ? cat >>= predictEarleyR2 rng
+predictEarleyR _ _ _ = []
+
+predictEarleyR2 :: (Ord c, Ord n, Ord l) => Range -> MCFRule c n l Range -> [Item c n l]
+predictEarleyR2 _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
+ return $ Final abs (makeRangeRec lins) []
+predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) =
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
-makeMaxRange (Range (_, j)) = Range (j, j)
-makeMaxRange EmptyRange = EmptyRange
----------------------------------------------------------------------
-- Kilbury --
-terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l]
-terminal mcfg toks =
- do Rule abs@(Abs _ [] _) (Cnc _ _ lins) <- mcfg
+terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+terminal pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
lins' <- rangeRestRec toks lins
return $ Final abs (makeRangeRec lins') []
--- kilbury prediction
-predictKilbury :: (Ord c, Ord n, Ord l, Ord t) =>
- MCFGrammar c n l t -> Input t
+initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+initialScan pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
+ do tok <- aElems (inputToken toks)
+ Rule abs (Cnc _ _ lins) <- leftcornerTokens pinfo ? tok
+ lin' : lins' <- rangeRestRec toks lins
+ return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
+
+predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
-> AChart c n l -> Item c n l -> [Item c n l]
-predictKilbury mcfg toks _ (Passive cat found) =
- do Rule abs@(Abs _ rhs _) (Cnc _ _ (Lin l (Cat (cat', r, i):syms) : lins)) <- mcfg
- guard $ cat == cat'
+predictKilbury pinfo toks _ (Passive cat found) =
+ do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat
lin' : lins' <- rangeRestRec toks (Lin l syms : lins)
rng <- projection r found
- let children = replaceRec (emptyChildren abs) i found
+ children <- unifyRec (emptyChildren abs) i found
return $ Active abs [] rng lin' lins' children
predictKilbury _ _ _ _ = []
+
+
+
+----------------------------------------------------------------------
+-- KilburyR --
+
+terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+terminalR pinfo =
+ tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+ return $ Final abs (makeRangeRec lins) []
+
+initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
+initialScanR pinfo =
+ tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $
+ do Rule abs (Cnc _ _ (lin : lins)) <- concatMap snd (aAssocs (leftcornerTokens pinfo))
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range
+ -> AChart c n l -> Item c n l -> [Item c n l]
+predictKilburyR pinfo _ (Passive cat found) =
+ do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat
+ rng <- projection r found
+ children <- unifyRec (emptyChildren abs) i found
+ return $ Active abs [] rng (Lin l syms) lins children
+predictKilburyR _ _ _ = []
+
+
+----------------------------------------------------------------------
+-- * type definitions
+
+type AChart c n l = ParseChart (Item c n l) (AKey c)
+
+data Item c n l = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l Range)
+ (LinRec c l Range)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+ | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data AKey c = Act c
+ | Pass c
+ | Useless
+ | Fin
+ deriving (Eq, Ord, Show)
+
+
+keyof :: Item c n l -> AKey c
+keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
+keyof (Final _ _ _) = Fin
+keyof (Passive cat _) = Pass cat
+keyof _ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+
+prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _) <- chartKeys chart ]) ++
+ ", useless=" ++ show (length (chartLookup chart Useless))
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+prtFinals chart = prtBefore "\n " (chartLookup chart Fin)
+
+instance (Print c, Print n, Print l) => Print (Item c n l) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ " {" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+ prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance Print c => Print (AKey c) where
+ prt (Act c) = "Active " ++ prt c
+ prt (Pass c) = "Passive " ++ prt c
+ prt (Fin) = "Final"
+ prt (Useless) = "Useless"
diff --git a/src/GF/Parsing/MCFG/Active2.hs b/src/GF/Parsing/MCFG/Active2.hs
new file mode 100644
index 000000000..a37c7c15d
--- /dev/null
+++ b/src/GF/Parsing/MCFG/Active2.hs
@@ -0,0 +1,226 @@
+
+module GF.Parsing.MCFG.Active2 (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+
+import Control.Monad (guard)
+
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- * parsing
+
+--parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
+parse strategy pinfo starts toks =
+ trace2 "MCFG.Active 2 - strategy" (if isBU strategy then "BU"
+ else if isTD strategy then "TD" else "None") $
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = process strategy pinfo starts toks
+
+process :: (Ord n, Ord c, Ord l, Ord t) =>
+ String -> MCFPInfo c n l t -> [c] -> Input t -> AChart c n l t
+process strategy pinfo starts toks
+ = tracePrt "MCFG.Active - chart size" prtSizes $
+ buildChart keyof (complete : combine : convert : rules) axioms
+ where rules | isNil strategy = [scan toks]
+ | isBU strategy = [scan toks, predictKilbury pinfo toks]
+ | isTD strategy = [scan toks, predictEarley pinfo toks]
+ axioms | isNil strategy = predict pinfo toks
+ | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ | isTD strategy = initial pinfo starts toks
+
+isNil s = s=="n"
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: Abstract c n -> [RangeRec l]
+emptyChildren (Abs _ rhs _) = replicate (length rhs) []
+
+makeMaxRange (Range (_, j)) = Range (j, j)
+makeMaxRange EmptyRange = EmptyRange
+
+
+----------------------------------------------------------------------
+-- * inference rules
+
+-- completion
+complete :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+complete _ (Active rule found rng (Lin l []) (lin:lins) recs) =
+ return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs
+complete _ _ = []
+
+-- scanning
+--scan :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+scan inp _ (Active rule found rng (Lin l (Tok tok:syms)) lins recs) =
+ do rng' <- map makeRange (inputToken inp ? tok)
+ rng'' <- concatRange rng rng'
+ return $ Active rule found rng'' (Lin l syms) lins recs
+scan _ _ _ = []
+
+-- | Creates an Active Item every time it is possible to combine
+-- an Active Item from the agenda with a Passive Item from the Chart
+combine :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+combine chart item@(Active _ _ _ (Lin _ (Cat (c,_,_):_)) _ _) =
+ do Passive _c found <- chartLookup chart (Pass c)
+ combine2 chart found item
+combine chart (Passive c found) =
+ do item <- chartLookup chart (Act c)
+ combine2 chart found item
+combine _ _ = []
+
+combine2 chart found' (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
+ do rng' <- projection r found'
+ rng'' <- concatRange rng rng'
+ recs' <- unifyRec recs d found'
+ return $ Active rule found rng'' (Lin l syms) lins recs'
+
+-- | Active Items with nothing to find are converted to Final items,
+-- which in turn are converted to Passive Items
+convert :: (Ord c, Ord n, Ord l, Ord t) => AChart c n l t -> Item c n l t -> [Item c n l t]
+convert _ (Active rule found rng (Lin lbl []) [] recs) =
+ return $ Final rule (found ++ [(lbl,rng)]) recs
+convert _ (Final (Abs cat _ _) found _) =
+ return $ Passive cat found
+convert _ _ = []
+
+
+----------------------------------------------------------------------
+-- Naive --
+
+predict :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t]
+predict pinfo toks = tracePrt "MCFG.Active (Naive) - predicted rules" (prt . length) $
+ do Rule abs (Cnc _ _ (lin:lins)) <- rulesMatchingInput pinfo toks
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- anropas med alla startkategorier
+initial :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> [c] -> Input t -> [Item c n l t]
+initial pinfo starts toks =
+ tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
+ do cat <- starts
+ Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? cat
+ return $ Active abs [] (Range (0, 0)) lin lins (emptyChildren abs)
+
+predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
+ -> AChart c n l t -> Item c n l t -> [Item c n l t]
+predictEarley pinfo toks _ item@(Active (Abs _ _ f) _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
+ topdownRules pinfo ? cat >>= predictEarley2 toks rng
+predictEarley _ _ _ _ = []
+
+predictEarley2 :: (Ord c, Ord n, Ord l, Ord t) => Input t -> Range -> MCFRule c n l t -> [Item c n l t]
+predictEarley2 toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
+ do lins' <- rangeRestRec toks lins
+ return $ Final abs (makeRangeRec lins') []
+predictEarley2 toks rng (Rule abs (Cnc _ _ (lin:lins))) =
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t]
+terminal pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
+ lins' <- rangeRestRec toks lins
+ return $ Final abs (makeRangeRec lins') []
+
+initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l t]
+initialScan pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
+ do tok <- aElems (inputToken toks)
+ Rule abs (Cnc _ _ (lin:lins)) <- leftcornerTokens pinfo ? tok
+ return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
+
+predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t
+ -> AChart c n l t -> Item c n l t -> [Item c n l t]
+predictKilbury pinfo toks _ (Passive cat found) =
+ do Rule abs (Cnc _ _ (Lin l (Cat (_,r,i):syms) : lins)) <- leftcornerCats pinfo ? cat
+ rng <- projection r found
+ children <- unifyRec (emptyChildren abs) i found
+ return $ Active abs [] rng (Lin l syms) lins children
+predictKilbury _ _ _ _ = []
+
+
+----------------------------------------------------------------------
+-- * type definitions
+
+type AChart c n l t = ParseChart (Item c n l t) (AKey c t)
+
+data Item c n l t = Active (Abstract c n)
+ (RangeRec l)
+ Range
+ (Lin c l t)
+ (LinRec c l t)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+ | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data AKey c t = Act c
+ | ActTok t
+ | Pass c
+ | Useless
+ | Fin
+ deriving (Eq, Ord, Show)
+
+
+keyof :: Item c n l t -> AKey c t
+keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
+keyof (Active _ _ _ (Lin _ (Tok tok:_)) _ _) = ActTok tok
+keyof (Final _ _ _) = Fin
+keyof (Passive cat _) = Pass cat
+keyof _ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+
+prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _) <- chartKeys chart ]) ++
+ ", active-tok=" ++ show (sum [length (chartLookup chart k) |
+ k@(ActTok _) <- chartKeys chart ]) ++
+ ", useless=" ++ show (length (chartLookup chart Useless))
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+prtFinals chart = prtBefore "\n " (chartLookup chart Fin)
+
+instance (Print c, Print n, Print l, Print t) => Print (Item c n l t) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ " {" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+ prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance (Print c, Print t) => Print (AKey c t) where
+ prt (Act c) = "Active " ++ prt c
+ prt (ActTok t) = "Active-Tok " ++ prt t
+ prt (Pass c) = "Passive " ++ prt c
+ prt (Fin) = "Final"
+ prt (Useless) = "Useless"
diff --git a/src/GF/Parsing/MCFG/Incremental.hs b/src/GF/Parsing/MCFG/Incremental.hs
index 21467078f..eafca578d 100644
--- a/src/GF/Parsing/MCFG/Incremental.hs
+++ b/src/GF/Parsing/MCFG/Incremental.hs
@@ -1,123 +1,163 @@
-{-- Module --------------------------------------------------------------------
- Filename: IncrementalParse.hs
- Author: Håkan Burden
- Time-stamp: <2005-04-18, 15:07>
- Description: An agenda-driven implementation of the incremental algorithm 4.6
- that handles erasing and suppressing MCFG.
- As described in Ljunglöf (2004)
-------------------------------------------------------------------------------}
+module GF.Parsing.MCFG.Incremental (parse, parseR) where
-module GF.Parsing.MCFG.Incremental where
+import Data.List
+import Control.Monad (guard)
+import GF.Data.Utilities (select)
+import GF.Data.GeneralDeduction
--- Haskell
-import Data.List
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
--- GF modules
-import Examples
-import GF.OldParsing.GeneralChart
-import GF.OldParsing.MCFGrammar
-import MCFParser
-import Parser
import GF.Parsing.MCFG.Range
-import Nondet
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- parsing
+
+parse :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t
+parse pinfo starts toks =
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = process pinfo toks ntoks
+ ntoks = snd (inputBounds toks)
+
+-- parseR :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t
+parseR pinfo starts ntoks =
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
+ where chart = processR pinfo ntoks
+
+process :: (Ord n, Ord c, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> Int -> IChart c n l
+process pinfo toks ntoks
+ = tracePrt "MCFG.Incremental - chart size" prtSizes $
+ buildChart keyof [complete ntoks, scan, combine, convert] (predict pinfo toks ntoks)
+
+processR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> Int -> IChart c n l
+processR pinfo ntoks
+ = tracePrt "MCFG.Incremental Range - chart size" prtSizes $
+ buildChart keyof [complete ntoks, scan, combine, convert] (predictR pinfo ntoks)
+
+complete :: (Ord n, Ord c, Ord l) => Int -> IChart c n l -> Item c n l -> [Item c n l]
+complete ntoks _ (Active rule found rng (Lin l []) lins recs) =
+ do (lin, lins') <- select lins
+ k <- [minRange rng .. ntoks]
+ return $ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs
+complete _ _ _ = []
+
+
+predict :: (Ord n, Ord c, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> Int -> [Item c n l]
+predict pinfo toks n =
+ tracePrt "MCFG.Incremental - predicted rules" (prt . length) $
+ do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- rulesMatchingInput pinfo toks
+ let daughters = replicate (length rhs) []
+ lins' <- rangeRestRec toks lins
+ (lin', lins'') <- select lins'
+ k <- [0..n]
+ return $ Active abs [] (Range (k,k)) lin' lins'' daughters
+
+
+predictR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> Int -> [Item c n l]
+predictR pinfo n =
+ tracePrt "MCFG.Incremental Range - predicted rules" (prt . length) $
+ do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- allRules pinfo
+ let daughters = replicate (length rhs) []
+ (lin, lins') <- select lins
+ k <- [0..n]
+ return $ Active abs [] (Range (k,k)) lin lins' daughters
+
+
+scan :: (Ord n, Ord c, Ord l) => IChart c n l -> Item c n l -> [Item c n l]
+scan _ (Active abs found rng (Lin l (Tok rng':syms)) lins recs) =
+ do rng'' <- concatRange rng rng'
+ return $ Active abs found rng'' (Lin l syms) lins recs
+scan _ _ = []
+
+
+combine :: (Ord n, Ord c, Ord l) => IChart c n l -> Item c n l -> [Item c n l]
+combine chart active@(Active _ _ rng (Lin _ (Cat (c,l,_):_)) _ _) =
+ do passive <- chartLookup chart (Pass c l (maxRange rng))
+ combine2 active passive
+combine chart passive@(Active (Abs c _ _) _ rng (Lin l []) _ _) =
+ do active <- chartLookup chart (Act c l (minRange rng))
+ combine2 active passive
+combine _ _ = []
+
+combine2 (Active abs found rng (Lin l (Cat (c,l',d):syms)) lins recs)
+ (Active _ found' rng' _ _ _)
+ = do rng'' <- concatRange rng rng'
+ recs' <- unifyRec recs d found''
+ return $ Active abs found rng'' (Lin l syms) lins recs'
+ where found'' = found' ++ [(l',rng')]
+
+convert _ (Active rule found rng (Lin lbl []) [] recs) =
+ return $ Final rule (found ++ [(lbl,rng)]) recs
+convert _ _ = []
-{-- Datatypes -----------------------------------------------------------------
- IChart: A RedBlackMap with Items and Keys
- Item : One kind of Item since the Passive Items not necessarily need to be
- saturated iow, they can still have rows to recognize.
- IKey :
-------------------------------------------------------------------------------}
+----------------------------------------------------------------------
+-- type definitions
-type IChart n c l = ParseChart (Item n c l) (IKey c l)
+type IChart c n l = ParseChart (Item c n l) (IKey c l)
-data Item n c l = Active (AbstractRule n c)
+data Item c n l = Active (Abstract c n)
(RangeRec l)
Range
(Lin c l Range)
(LinRec c l Range)
[RangeRec l]
--- | Passive (AbstractRule n c)
--- (RangeRec l)
--- [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+-- | Passive c (RangeRec l)
deriving (Eq, Ord, Show)
data IKey c l = Act c l Int
--- | ActE l
| Pass c l Int
--- | Pred l
| Useless
+ | Fin
deriving (Eq, Ord, Show)
-keyof :: Item n c l -> IKey c l
-keyof (Active _ _ (Range (_,j)) (Lin _ ((Cat (next,lbl,_)):_)) _ _)
- = Act next lbl j
-keyof (Active (_, cat, _) found (Range (i,_)) (Lin lbl []) _ _)
- = Pass cat lbl i
+keyof :: Item c n l -> IKey c l
+keyof (Active _ _ rng (Lin _ (Cat (next,lbl,_):_)) _ _)
+ = Act next lbl (maxRange rng)
+keyof (Active (Abs cat _ _) found rng (Lin lbl []) _ _)
+ = Pass cat lbl (minRange rng)
+keyof (Final _ _ _) = Fin
keyof _
= Useless
-{-- Parsing -------------------------------------------------------------------
- recognize:
- parse : Builds a chart from the initial agenda, given by prediction, and
- the inference rules
- keyof : Given an Item returns an appropriate Key for the Chart
-------------------------------------------------------------------------------}
-
-recognize mcfg toks = chartMember (parse mcfg toks) item (keyof item)
- where n = length toks
- n2 = n `div` 2
- item = Active ("f",S,[A])
- [] (Range (0, n)) (Lin "s" []) []
- [[("p", Range (0, n2)), ("q", Range (n2, n))]]
-
-
-parse :: (Ord n, Ord c, Ord l, Eq t) => Grammar n c l t -> [t] -> IChart n c l
-parse mcfg toks = buildChart keyof [complete ntoks, scan, combine] (predict mcfg toks ntoks)
- where ntoks = length toks
-
-complete :: (Ord n, Ord c, Ord l) => Int -> IChart n c l
- -> Item n c l -> [Item n c l]
-complete ntoks _ (Active rule found rng@(Range (_,j)) (Lin l []) lins recs) =
- [ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs |
- (lin, lins') <- select lins,
- k <- [j .. ntoks] ]
-complete _ _ _ = []
-
-
-predict :: (Eq n, Eq c, Eq l, Eq t) => Grammar n c l t -> [t] -> Int -> [Item n c l]
-predict mcfg toks n = [ Active (f, c, rhs) [] (Range (k,k)) lin' lins'' daughters |
- Rule c rhs lins f <- mcfg,
- let daughters = replicate (length rhs) [],
- lins' <- solutions $ rangeRestRec toks lins,
- (lin', lins'') <- select lins',
- k <- [0..n] ]
-
-
-scan :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l]
-scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) =
- [ Active rule found rng'' (Lin l syms) lins recs |
- rng'' <- solutions $ concRanges rng rng' ]
-scan _ _ = []
-
-
-combine :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l]
-combine chart (Active rule found rng@(Range (_,j)) (Lin l ((Cat (c,r,d)):syms)) lins recs) =
- [ Active rule found rng'' (Lin l syms) lins (replaceRec recs d (found' ++ [(l',rng')])) |
- Active _ found' rng' (Lin l' []) _ _ <- chartLookup chart (Pass c r j),
- subsumes (recs !! d) (found' ++ [(l',rng')]),
- rng'' <- solutions $ concRanges rng rng' ]
-combine chart (Active (_,c,_) found rng'@(Range (i,_)) (Lin l []) _ _) =
- [ Active rule found' rng'' (Lin l' syms) lins (replaceRec recs d (found ++ [(l,rng')])) |
- Active rule found' rng (Lin l' ((Cat (c,r,d)):syms)) lins recs
- <- chartLookup chart (Act c l i),
- subsumes (recs !! d) (found ++ [(l,rng')]),
- rng'' <- solutions $ concRanges rng rng' ]
-combine _ _ = []
-
-
-
-
+----------------------------------------------------------------------
+-- for tracing purposes
+prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
+ ", passive=" ++ show (sum [length (chartLookup chart k) |
+ k@(Pass _ _ _) <- chartKeys chart ]) ++
+ ", active=" ++ show (sum [length (chartLookup chart k) |
+ k@(Act _ _ _) <- chartKeys chart ]) ++
+ ", useless=" ++ show (length (chartLookup chart Useless))
+
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+instance (Print c, Print n, Print l) => Print (Item c n l) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ "{" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+-- prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance (Print c, Print l) => Print (IKey c l) where
+ prt (Act c l i) = "Active " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i
+ prt (Pass c l i) = "Passive " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i
+ prt (Fin) = "Final"
+ prt (Useless) = "Useless"
diff --git a/src/GF/Parsing/MCFG/Incremental2.hs b/src/GF/Parsing/MCFG/Incremental2.hs
new file mode 100644
index 000000000..0ae6eb926
--- /dev/null
+++ b/src/GF/Parsing/MCFG/Incremental2.hs
@@ -0,0 +1,144 @@
+
+module GF.Parsing.MCFG.Incremental2 (parse) where
+
+import Data.List
+import Data.Array
+import Control.Monad (guard)
+
+import GF.Data.Utilities (select)
+import GF.Data.Assoc
+import GF.Data.IncrementalDeduction
+
+import GF.Formalism.GCFG
+import GF.Formalism.MCFG
+import GF.Formalism.Utilities
+
+import GF.Parsing.MCFG.Range
+import GF.Parsing.MCFG.PInfo
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+----------------------------------------------------------------------
+-- parsing
+
+-- parseR :: (Ord n, Ord c, Ord l, Ord t) => MCFParser c n l t
+parse pinfo starts inp =
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ k <- uncurry enumFromTo (inputBounds inp),
+ Final (Abs cat rhs fun) found rrecs <- chartLookup chart k Fin ]
+ where chart = process pinfo inp
+
+--process :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> (Int, Int) -> IChart c n l
+process pinfo inp
+ = tracePrt "MCFG.Incremental - chart size"
+ (prt . map (prtSizes finalChart . fst) . assocs) $
+ finalChart
+ where finalChart = buildChart keyof rules axioms inBounds
+ axioms k = tracePrt ("MCFG.Incremental - axioms for " ++ show k) (prt . length) $
+ predict k ++ scan k ++ complete1 k
+ rules k item = complete2 k item ++ combine k item ++ convert k item
+ inBounds = inputBounds inp
+
+ -- axioms: predict + scan + complete
+ predict k = do Rule abs@(Abs _ rhs _) (Cnc _ _ lins) <- rulesMatchingInput pinfo inp
+ let daughters = replicate (length rhs) []
+ (lin, lins') <- select lins
+ return $ Active abs [] k lin lins' daughters
+
+ scan k = do (tok, js) <- aAssocs (inputTo inp ! k)
+ j <- js
+ Active abs found i (Lin l (Tok _tok:syms)) lins recs <-
+ chartLookup finalChart j (ActTok tok)
+ return $ Active abs found i (Lin l syms) lins recs
+
+ complete1 k = do j <- [fst inBounds .. k-1]
+ Active abs found i (Lin l _Nil) lins recs <-
+ chartLookup finalChart j Pass
+ let found' = found ++ [(l, makeRange (i,j))]
+ (lin, lins') <- select lins
+ return $ Active abs found' k lin lins' recs
+
+ -- rules: convert + combine + complete
+ convert k (Active rule found j (Lin lbl []) [] recs) =
+ let found' = found ++ [(lbl, makeRange (j,k))]
+ in return $ Final rule found' recs
+ convert _ _ = []
+
+ combine k (Active (Abs cat _ _) found' j (Lin lbl []) _ _) =
+ do guard (j < k) ---- cannot handle epsilon-rules
+ Active abs found i (Lin l (Cat (_cat,_lbl,nr):syms)) lins recs <-
+ chartLookup finalChart j (Act cat lbl)
+ let found'' = found' ++ [(lbl, makeRange (j,k))]
+ recs' <- unifyRec recs nr found''
+ return $ Active abs found i (Lin l syms) lins recs'
+ combine _ _ = []
+
+ complete2 k (Active abs found i (Lin l []) lins recs) =
+ do let found' = found ++ [(l, makeRange (i,k))]
+ (lin, lins') <- select lins
+ return $ Active abs found' k lin lins' recs
+ complete2 _ _ = []
+
+----------------------------------------------------------------------
+-- type definitions
+
+type IChart c n l t = IncrementalChart (Item c n l t) (IKey c l t)
+
+data Item c n l t = Active (Abstract c n)
+ (RangeRec l)
+ Int
+ (Lin c l t)
+ (LinRec c l t)
+ [RangeRec l]
+ | Final (Abstract c n) (RangeRec l) [RangeRec l]
+ -- | Passive c (RangeRec l)
+ deriving (Eq, Ord, Show)
+
+data IKey c l t = Act c l
+ | ActTok t
+ -- | Useless
+ | Pass
+ | Fin
+ deriving (Eq, Ord, Show)
+
+keyof :: Item c n l t -> IKey c l t
+keyof (Active _ _ _ (Lin _ (Cat (next,lbl,_):_)) _ _) = Act next lbl
+keyof (Active _ _ _ (Lin _ (Tok tok:_)) _ _) = ActTok tok
+keyof (Active _ _ _ (Lin _ []) _ _) = Pass
+keyof (Final _ _ _) = Fin
+-- keyof _ = Useless
+
+
+----------------------------------------------------------------------
+-- for tracing purposes
+prtSizes chart k = "f=" ++ show (length (chartLookup chart k Fin)) ++
+ " p=" ++ show (length (chartLookup chart k Pass)) ++
+ " a=" ++ show (sum [length (chartLookup chart k key) |
+ key@(Act _ _) <- chartKeys chart k ]) ++
+ " t=" ++ show (sum [length (chartLookup chart k key) |
+ key@(ActTok _) <- chartKeys chart k ])
+ -- " u=" ++ show (length (chartLookup chart k Useless))
+
+-- prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+-- prtBefore "\n " (chartLookup chart k) |
+-- k <- chartKeys chart ]
+
+instance (Print c, Print n, Print l, Print t) => Print (Item c n l t) where
+ prt (Active abs found rng lin tofind children) =
+ "? " ++ prt abs ++ ";\n\t" ++
+ "{" ++ prtSep " " found ++ "} " ++ prt rng ++ " . " ++
+ prt lin ++ "{" ++ prtSep " " tofind ++ "}" ++
+ ( if null children then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") children) ++ "}" )
+ -- prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+ prt (Final abs rr rrs) = ": " ++ prt abs ++ ";\n\t{" ++ prtSep " " rr ++ "}" ++
+ ( if null rrs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrs) ++ "}" )
+
+instance (Print c, Print l, Print t) => Print (IKey c l t) where
+ prt (Act c l) = "Active " ++ prt c ++ " " ++ prt l
+ prt (ActTok t) = "ActiveTok " ++ prt t
+ -- prt (Pass c l i) = "Passive " ++ prt c ++ " " ++ prt l ++ " @ " ++ prt i
+ prt (Fin) = "Final"
+ -- prt (Useless) = "Useless"
diff --git a/src/GF/Parsing/MCFG/Naive.hs b/src/GF/Parsing/MCFG/Naive.hs
index 4b994e726..932261d2b 100644
--- a/src/GF/Parsing/MCFG/Naive.hs
+++ b/src/GF/Parsing/MCFG/Naive.hs
@@ -1,6 +1,7 @@
-module GF.Parsing.MCFG.Naive (parse) where
+module GF.Parsing.MCFG.Naive (parse, parseR) where
+import Control.Monad (guard)
-- GF modules
import GF.Data.GeneralDeduction
@@ -13,21 +14,72 @@ import GF.Data.SortedList
import GF.Data.Assoc
import GF.System.Tracing
+import GF.Infra.Print
+
----------------------------------------------------------------------
-- * parsing
--- | Builds a chart from the initial agenda, given by prediction, and
--- the inference rules
+-- | Builds a chart from the initial agenda, given by prediction, and the inference rules
parse :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t
-parse mcfg starts toks
+parse pinfo starts toks
= [ Abs (cat, makeRangeRec lins) (zip rhs rrecs) fun |
Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ]
- where chart = process mcfg toks
+ where chart = process pinfo toks
-process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> Input t -> NChart c n l
-process mcfg toks
+-- | Builds a chart from the initial agenda, given by prediction, and the inference rules
+-- parseR :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t
+parseR pinfo starts
+ = [ Abs (cat, makeRangeRec lins) (zip rhs rrecs) fun |
+ Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ]
+ where chart = processR pinfo
+
+process :: (Ord t, Ord n, Ord c, Ord l) => MCFPInfo c n l t -> Input t -> NChart c n l
+process pinfo toks
= tracePrt "MCFG.Naive - chart size" prtSizes $
- buildChart keyof [convert, combine] (predict toks mcfg)
+ buildChart keyof [convert, combine] (predict pinfo toks)
+
+processR :: (Ord n, Ord c, Ord l) => MCFPInfo c n l Range -> NChart c n l
+processR pinfo
+ = tracePrt "MCFG.Naive Range - chart size" prtSizes $
+ buildChart keyof [convert, combine] (predictR pinfo)
+
+
+----------------------------------------------------------------------
+-- * inference rules
+
+-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
+predict :: (Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
+predict pinfo toks = tracePrt "MCFG.Naive - predicted rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- rulesMatchingInput pinfo toks
+ lins' <- rangeRestRec toks lins
+ return $ Active (abs, []) lins' []
+
+-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
+predictR :: (Ord l) => MCFPInfo c n l Range -> [Item c n l]
+predictR pinfo = tracePrt "MCFG.Naive Range - predicted rules" (prt . length) $
+ do Rule abs (Cnc _ _ lins) <- allRules pinfo
+ return $ Active (abs, []) lins []
+
+-- | Creates an Active Item every time it is possible to combine
+-- an Active Item from the agenda with a Passive Item from the Chart
+combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
+combine chart item@(Active (Abs _ (c:_) _, _) _ _) =
+ do Passive _c rrec <- chartLookup chart (Pass c)
+ combine2 chart rrec item
+combine chart (Passive c rrec) =
+ do item <- chartLookup chart (Act c)
+ combine2 chart rrec item
+combine _ _ = []
+
+combine2 chart rrec (Active (Abs nt (c:find) f, found) lins rrecs) =
+ do lins' <- substArgRec (length found) rrec lins
+ return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
+
+-- | Active Items with nothing to find are converted to Passive Items
+convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
+convert _ (Active (Abs cat [] fun, _) lins _) = [Passive cat (makeRangeRec lins)]
+convert _ _ = []
+
----------------------------------------------------------------------
-- * type definitions
@@ -57,32 +109,20 @@ prtSizes chart = "final=" ++ show (length (chartLookup chart Final)) ++
", active=" ++ show (sum [length (chartLookup chart k) |
k@(Act _) <- chartKeys chart ])
-----------------------------------------------------------------------
--- * inference rules
-
--- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
-predict :: Ord t => Input t -> MCFGrammar c n l t -> [Item c n l]
-predict toks mcfg = [ Active (abs, []) lins' [] |
- Rule abs (Cnc _ _ lins) <- mcfg,
- lins' <- rangeRestRec toks lins ]
-
--- | Creates an Active Item every time it is possible to combine
--- an Active Item from the agenda with a Passive Item from the Chart
-combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
-combine chart (Active (Abs nt (c:find) f, found) lins rrecs) =
- do Passive _ rrec <- chartLookup chart (Pass c)
- lins' <- concLinRec $ substArgRec (length found) rrec lins
- return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
-combine chart (Passive c rrec) =
- do Active (Abs nt (c:find) f, found) lins rrecs <- chartLookup chart (Act c)
- lins' <- concLinRec $ substArgRec (length found) rrec lins
- return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
-combine _ _ = []
-
--- | Active Items with nothing to find are converted to Passive Items
-convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
-convert _ (Active (Abs cat [] _, _) lins _) = [Passive cat rrec]
- where rrec = makeRangeRec lins
-convert _ _ = []
+prtChart chart = concat [ "\n*** KEY: " ++ prt k ++
+ prtBefore "\n " (chartLookup chart k) |
+ k <- chartKeys chart ]
+
+instance (Print c, Print n, Print l) => Print (Item c n l) where
+ prt (Active (abs, cs) lrec rrecs) = "? " ++ prt abs ++ " . " ++ prtSep " " cs ++ ";\n\t" ++
+ "{" ++ prtSep " " lrec ++ "}" ++
+ ( if null rrecs then ";" else ";\n\t" ++
+ "{" ++ prtSep "} {" (map (prtSep " ") rrecs) ++ "}" )
+ prt (Passive c rrec) = "- " ++ prt c ++ "; {" ++ prtSep " " rrec ++ "}"
+
+instance Print c => Print (NKey c) where
+ prt (Act c) = "Active " ++ prt c
+ prt (Pass c) = "Passive " ++ prt c
+ prt (Final) = "Final"
diff --git a/src/GF/Parsing/MCFG/PInfo.hs b/src/GF/Parsing/MCFG/PInfo.hs
index b89ce6d5e..3b2603a20 100644
--- a/src/GF/Parsing/MCFG/PInfo.hs
+++ b/src/GF/Parsing/MCFG/PInfo.hs
@@ -4,9 +4,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/04/21 16:23:14 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.3 $
+-- > CVS $Date: 2005/05/09 09:28:46 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.4 $
--
-- MCFG parsing, parser information
-----------------------------------------------------------------------------
@@ -34,11 +34,130 @@ type MCFParser c n l t = MCFPInfo c n l t
type MCFChart c n l = [Abstract (c, RangeRec l) n]
-type MCFPInfo c n l t = MCFGrammar c n l t
-
-buildMCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
-buildMCFPInfo = id
-
makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
+
+------------------------------------------------------------
+-- parser information
+
+data MCFPInfo c n l t
+ = MCFPInfo { grammarTokens :: SList t
+ , nameRules :: Assoc n (SList (MCFRule c n l t))
+ , topdownRules :: Assoc c (SList (MCFRule c n l t))
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
+ , emptyRules :: [MCFRule c n l t]
+ , leftcornerCats :: Assoc c (SList (MCFRule c n l t))
+ , leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , grammarCats :: SList c
+ -- ^ used when calculating starting categories
+ , rulesByToken :: Assoc t (SList (MCFRule c n l t, SList t))
+ , rulesWithoutTokens :: SList (MCFRule c n l t)
+ -- ^ used by 'rulesMatchingInput'
+ , allRules :: MCFGrammar c n l t
+ -- ^ used by any unoptimized algorithm
+
+ --bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
+ --emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
+ --emptyCategories :: Set c,
+ }
+
+
+rangeRestrictPInfo :: (Ord c, Ord n, Ord l, Ord t) =>
+ MCFPInfo c n l t -> Input t -> MCFPInfo c n l Range
+rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
+ tracePrt "MCFG.PInfo - Restricting the parser information" (prt . grammarTokens)
+ MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
+ , nameRules = rrAssoc (nameRules pinfo)
+ , topdownRules = rrAssoc (topdownRules pinfo)
+ , emptyRules = rrRules (emptyRules pinfo)
+ , leftcornerCats = rrAssoc (leftcornerCats pinfo)
+ , leftcornerTokens = lctokens
+ , grammarCats = grammarCats pinfo
+ , rulesByToken = emptyAssoc -- error "MCFG.PInfo.rulesByToken - no range restriction"
+ , rulesWithoutTokens = [] -- error "MCFG.PInfo.rulesByToken - no range restriction"
+ , allRules = allrules -- rrRules (allRules pinfo)
+ }
+
+ where lctokens = accumAssoc id
+ [ (rng, rule) | (tok, rules) <- aAssocs (leftcornerTokens pinfo),
+ inputToken inp ?= tok,
+ rule@(Rule _ (Cnc _ _ (Lin _ (Tok rng:_) : _)))
+ <- concatMap (rangeRestrictRule inp) rules ]
+
+ allrules = rrRules $ rulesMatchingInput pinfo inp
+
+ rrAssoc assoc = filterNull $ fmap rrRules assoc
+ filterNull assoc = assocFilter (not . null) assoc
+ rrRules rules = concatMap (rangeRestrictRule inp) rules
+
+
+buildMCFPInfo :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
+buildMCFPInfo grammar =
+ traceCalcFirst grammar $
+ tracePrt "MCFG.PInfo - parser info" (prt) $
+ MCFPInfo { grammarTokens = grammartokens
+ , nameRules = namerules
+ , topdownRules = topdownrules
+ , emptyRules = emptyrules
+ , leftcornerCats = leftcorncats
+ , leftcornerTokens = leftcorntoks
+ , grammarCats = grammarcats
+ , rulesByToken = rulesbytoken
+ , rulesWithoutTokens = ruleswithouttokens
+ , allRules = allrules
+ }
+
+ where allrules = concatMap expandVariants grammar
+ grammartokens = union (map fst ruletokens)
+ namerules = accumAssoc id
+ [ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
+ topdownrules = accumAssoc id
+ [ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
+ emptyrules = [ rule | rule@(Rule (Abs _ [] _) _) <- allrules ]
+ leftcorncats = accumAssoc id
+ [ (cat, rule) |
+ rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
+ leftcorntoks = accumAssoc id
+ [ (tok, rule) |
+ rule@(Rule _ (Cnc _ _ (Lin _ (Tok tok:_) : _))) <- allrules ]
+ grammarcats = aElems topdownrules
+ ruletokens = [ (toksoflins lins, rule) |
+ rule@(Rule _ (Cnc _ _ lins)) <- allrules ]
+ toksoflins lins = nubsort [ tok | Lin _ syms <- lins, Tok tok <- syms ]
+ rulesbytoken = accumAssoc id
+ [ (tok, (rule, toks)) | (tok:toks, rule) <- ruletokens ]
+ ruleswithouttokens = nubsort [ rule | ([], rule) <- ruletokens ]
+
+
+-- | return only the rules for which all tokens are in the input string
+rulesMatchingInput :: Ord t => MCFPInfo c n l t -> Input t -> [MCFRule c n l t]
+rulesMatchingInput pinfo inp =
+ [ rule | tok <- toks,
+ (rule, ruletoks) <- rulesByToken pinfo ? tok,
+ ruletoks `subset` toks ]
+ ++ rulesWithoutTokens pinfo
+ where toks = aElems (inputToken inp)
+
+
+----------------------------------------------------------------------
+-- pretty-printing of statistics
+
+instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
+ prt pI = "[ tokens=" ++ sl grammarTokens ++
+ "; categories=" ++ sl grammarCats ++
+ "; nameRules=" ++ sla nameRules ++
+ "; tdRules=" ++ sla topdownRules ++
+ "; emptyRules=" ++ sl emptyRules ++
+ "; lcCats=" ++ sla leftcornerCats ++
+ "; lcTokens=" ++ sla leftcornerTokens ++
+ "; byToken=" ++ sla rulesByToken ++
+ "; noTokens=" ++ sl rulesWithoutTokens ++
+ "; allRules=" ++ sl allRules ++
+ " ]"
+
+ where sl f = show $ length $ f pI
+ sla f = let (as, bs) = unzip $ aAssocs $ f pI
+ in show (length as) ++ "/" ++ show (length (concat bs))
+
diff --git a/src/GF/Parsing/MCFG/Range.hs b/src/GF/Parsing/MCFG/Range.hs
index 994f8fdb7..7e5cc859a 100644
--- a/src/GF/Parsing/MCFG/Range.hs
+++ b/src/GF/Parsing/MCFG/Range.hs
@@ -1,5 +1,10 @@
-module GF.Parsing.MCFG.Range where
+module GF.Parsing.MCFG.Range
+ ( Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
+ LinRec, RangeRec,
+ makeRangeRec, rangeRestRec, rangeRestrictRule,
+ projection, unifyRec, substArgRec
+ ) where
-- Haskell
@@ -12,6 +17,7 @@ import GF.Formalism.MCFG
import GF.Formalism.Utilities
import GF.Infra.Print
import GF.Data.Assoc ((?))
+import GF.Data.Utilities (updateNthM)
------------------------------------------------------------
-- ranges as single pairs
@@ -23,6 +29,7 @@ data Range = Range (Int, Int)
makeRange :: (Int, Int) -> Range
concatRange :: Range -> Range -> [Range]
rangeEdge :: a -> Range -> Edge a
+edgeRange :: Edge a -> Range
minRange :: Range -> Int
maxRange :: Range -> Int
@@ -31,6 +38,7 @@ concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range(i,j)) (Range(j',k)) = [ Range(i,k) | j==j']
rangeEdge a (Range(i,j)) = Edge i j a
+edgeRange (Edge i j _) = Range (i,j)
minRange (Range rho) = fst rho
maxRange (Range rho) = snd rho
@@ -91,6 +99,8 @@ concLinRec = mapM concLin
makeRangeRec :: LinRec c l Range -> RangeRec l
makeRangeRec lins = map convLin lins
where convLin (Lin lbl [Tok rng]) = (lbl, rng)
+ convLin (Lin lbl []) = (lbl, EmptyRange)
+ convLin _ = error "makeRangeRec"
--- Record projection --------------------------------------------------------
@@ -114,51 +124,59 @@ rangeRestSym _ (Cat c) = return (Cat c)
rangeRestLin :: Ord t => Input t -> Lin c l t -> [Lin c l Range]
rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms
- return (Lin lbl syms')
+ concLin (Lin lbl syms')
+ -- return (Lin lbl syms')
rangeRestRec :: Ord t => Input t -> LinRec c l t -> [LinRec c l Range]
-rangeRestRec toks = mapM (rangeRestLin toks)
+rangeRestRec toks = mapM (rangeRestLin toks)
--- Record replacment ---------------------------------------------------------
--- ineffektiv!!
-
-replaceRec :: [RangeRec l] -> Int -> RangeRec l -> [RangeRec l]
-replaceRec recs i rec = (fst tup) ++ [rec] ++ (tail $ snd tup)
- where tup = splitAt i recs
-
+rangeRestrictRule :: Ord t => Input t -> MCFRule c n l t -> [MCFRule c n l Range]
+rangeRestrictRule toks (Rule abs (Cnc l ls lins)) = liftM (Rule abs . Cnc l ls) $
+ rangeRestRec toks lins
--- Argument substitution ----------------------------------------------------
substArgSymbol :: Ord l => Int -> RangeRec l -> Symbol (c, l, Int) Range
-> Symbol (c, l, Int) Range
-substArgSymbol i rec (Tok rng) = (Tok rng)
-substArgSymbol i rec (Cat (c, l, j))
- | i==j = maybe (Cat (c, l, j)) Tok $ lookup l rec
- | otherwise = (Cat (c, l, j))
-
+substArgSymbol i rec tok@(Tok rng) = tok
+substArgSymbol i rec cat@(Cat (c, l, j))
+ | i==j = maybe err Tok $ lookup l rec
+ | otherwise = cat
+ where err = error "substArg: Label not in range-record"
substArgLin :: Ord l => Int -> RangeRec l -> Lin c l Range
- -> Lin c l Range
+ -> [Lin c l Range]
substArgLin i rec (Lin lbl syms) =
- (Lin lbl (map (substArgSymbol i rec) syms))
+ concLin (Lin lbl (map (substArgSymbol i rec) syms))
substArgRec :: Ord l => Int -> RangeRec l -> LinRec c l Range
- -> LinRec c l Range
-substArgRec i rec lins = map (substArgLin i rec) lins
+ -> [LinRec c l Range]
+substArgRec i rec lins = mapM (substArgLin i rec) lins
+
+-- Record unification & replacment ---------------------------------------------------------
---- Subsumation -------------------------------------------------------------
+unifyRec :: Ord l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]]
+unifyRec recs i rec = updateNthM update i recs
+ where update rec' = guard (subsumes rec' rec) >> return rec
+
+-- unifyRec recs i rec = do guard $ subsumes (recs !! i) rec
+-- return $ replaceRec recs i rec
+
+replaceRec :: [RangeRec l] -> Int -> RangeRec l -> [RangeRec l]
+replaceRec recs i rec = before ++ (rec : after)
+ where (before, _ : after) = splitAt i recs
--- "rec' subsumes rec?"
subsumes :: Ord l => RangeRec l -> RangeRec l -> Bool
-subsumes rec rec' = and [elem r rec' | r <- rec]
+subsumes rec rec' = and [r `elem` rec' | r <- rec]
+-- subsumes rec rec' = all (`elem` rec') rec
+{-
--- Record unification -------------------------------------------------------
-
unifyRangeRecs :: Ord l => [RangeRec l] -> [RangeRec l] -> [[RangeRec l]]
unifyRangeRecs recs recs' = zipWithM unify recs recs'
where unify :: Ord l => RangeRec l -> RangeRec l -> [RangeRec l]
@@ -173,3 +191,4 @@ unifyRangeRecs recs recs' = zipWithM unify recs recs'
EQ -> do guard (r1 == r2)
rec3 <- unify rec1 rec2
return (p1:rec3)
+-}