summaryrefslogtreecommitdiff
path: root/src/GF/Parsing/CFG
diff options
context:
space:
mode:
authorpeb <unknown>2005-04-11 12:57:45 +0000
committerpeb <unknown>2005-04-11 12:57:45 +0000
commitac00f77dadd4d447803dd7cab5a36f47365325d0 (patch)
tree2fd02b19234f8d1fcc20ee67a2367d4d4eebfcd8 /src/GF/Parsing/CFG
parentf6273f7033b85eea9a8d0cc7d31e9697ba95d5b7 (diff)
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing/CFG')
-rw-r--r--src/GF/Parsing/CFG/General.hs101
-rw-r--r--src/GF/Parsing/CFG/Incremental.hs148
-rw-r--r--src/GF/Parsing/CFG/PInfo.hs95
3 files changed, 344 insertions, 0 deletions
diff --git a/src/GF/Parsing/CFG/General.hs b/src/GF/Parsing/CFG/General.hs
new file mode 100644
index 000000000..ea67ec94f
--- /dev/null
+++ b/src/GF/Parsing/CFG/General.hs
@@ -0,0 +1,101 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/11 13:52:51 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- CFG parsing with a general chart
+-----------------------------------------------------------------------------
+
+module GF.NewParsing.CFG.General
+ (parse, Strategy) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.NewParsing.CFG.PInfo
+import GF.NewParsing.GeneralChart
+import GF.Data.Assoc
+import Monad
+
+--parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
+parse strategy grammar start = extract .
+ tracePrt "#internal chart" (prt . length . chartList) .
+ process strategy grammar start
+
+type Strategy = (Bool, Bool) -- ^ (isBottomup, isTopdown)
+
+extract :: (Ord n, Ord c, Ord t) =>
+ IChart n (Symbol c t) -> CFChart c n t
+extract chart = [ CFRule (Edge j k cat) daughters name |
+ Edge j k (Cat cat, found, [], Just name) <- chartList chart,
+ daughters <- path j k (reverse found) ]
+ where path i k [] = [ [] | i==k ]
+ path i k (Tok tok : found)
+ = [ Tok tok : daughters |
+ daughters <- path (i+1) k found ]
+ path i k (Cat cat : found)
+ = [ Cat (Edge i j cat) : daughters |
+ Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i),
+ daughters <- path j k found ]
+
+
+process :: (Ord n, Ord c, Ord t) =>
+ Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool)
+ -> CFPInfo c n t -- ^ parser information (= grammar)
+ -> [c] -- ^ list of starting categories
+ -> Input t -- ^ input string
+ -> IChart n (Symbol c t)
+process (isBottomup, isTopdown) grammar start
+ = trace2 "CFParserGeneral" ((if isBottomup then " BU" else "") ++
+ (if isTopdown then " TD" else "")) $
+ buildChart keyof [predict, combine] . axioms
+ where axioms input = initial ++ scan input
+
+ scan input = map (fmap mkEdge) (inputEdges input)
+ mkEdge tok = (Tok tok, [], [], Nothing)
+
+ -- the combine rule
+ combine chart (Edge j k (next, _, [], _))
+ = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ]
+ combine chart edge@(Edge _ j (_, _, next:_, _))
+ = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ]
+
+ -- initial predictions
+ initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ]
+
+ -- predictions
+ predict chart (Edge j k (next, _, [], _)) | isBottomup
+ = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ]
+ -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward
+ predict chart (Edge _ k (_, _, Cat cat:_, _))
+ = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ]
+ predict _ _ = []
+
+ tdRuleLookup | isTopdown = topdownRules grammar
+ | isBottomup = emptyLeftcornerRules grammar
+
+-- internal representation of parse items
+
+type Item n s = Edge (s, [s], [s], Maybe n)
+type IChart n s = ParseChart (Item n s) (IKey s)
+data IKey s = Active s Int
+ | Passive s Int
+ deriving (Eq, Ord, Show)
+
+keyof (Edge _ j (_, _, next:_, _)) = Active next j
+keyof (Edge j _ (cat, _, [], _)) = Passive cat j
+
+forwardTo (Edge i j (cat, found, next:tofind, name)) k
+ = Edge i k (cat, next:found, tofind, name)
+
+loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name)
+
+
+
diff --git a/src/GF/Parsing/CFG/Incremental.hs b/src/GF/Parsing/CFG/Incremental.hs
new file mode 100644
index 000000000..af0f79bf0
--- /dev/null
+++ b/src/GF/Parsing/CFG/Incremental.hs
@@ -0,0 +1,148 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/11 13:52:51 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Incremental chart parsing for CFG
+-----------------------------------------------------------------------------
+
+
+module GF.NewParsing.CFG.Incremental
+ (parse, Strategy) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import Array
+
+import Operations
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.NewParsing.CFG.PInfo
+import GF.NewParsing.IncrementalChart
+
+
+type Strategy = ((Bool, Bool), (Bool, Bool)) -- ^ (predict:(BU, TD), filter:(BU, TD))
+
+parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t
+parse strategy grammar start = extract .
+ tracePrt "#internal chart" (prt . length . flip chartList const) .
+ process strategy grammar start
+
+extract :: (Ord n, Ord c, Ord t) =>
+ IChart c n t -> CFChart c n t
+extract finalChart = [ CFRule (Edge j k cat) daughters name |
+ (k, Item j (CFRule cat [] name) found) <- chartList finalChart (,),
+ daughters <- path j k (reverse found) ]
+ where path i k [] = [ [] | i==k ]
+ path i k (Tok tok : found)
+ = [ Tok tok : daughters |
+ daughters <- path (i+1) k found ]
+ path i k (Cat cat : found)
+ = [ Cat (Edge i j cat) : daughters |
+ Item j _ _ <- chartLookup finalChart i (Passive cat),
+ daughters <- path j k found ]
+
+process :: (Ord n, Ord c, Ord t) =>
+ Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t
+process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input
+ = trace2 "CFParserIncremental" ((if isPredictBU then "BU-predict " else "") ++
+ (if isPredictTD then "TD-predict " else "") ++
+ (if isFilterBU then "BU-filter " else "") ++
+ (if isFilterTD then "TD-filter " else "")) $
+ finalChart
+ where finalChart = buildChart keyof rules axioms $ inputBounds input
+
+ axioms 0 = union $ map (tdInfer 0) start
+ axioms k = union [ buInfer j k (Tok token) |
+ (token, js) <- aAssocs (inputTo input ! k), j <- js ]
+
+ rules k (Item j (CFRule cat [] _) _)
+ = buInfer j k (Cat cat)
+ rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found)
+ = tdInfer k next <++>
+ -- hack for empty rules:
+ [ Item j (forward rule) (sym:found) |
+ emptyCategories grammar ?= next ]
+ rules _ _ = []
+
+ buInfer j k next = buPredict j k next <++> buCombine j k next
+ tdInfer k next = tdPredict k next
+
+ -- the combine rule
+ buCombine j k next
+ | j == k = [] -- hack for empty rules, see rules above and tdPredict below
+ | otherwise = [ Item i (forward rule) (next:found) |
+ Item i rule found <- (finalChart ! j) ? Active next ]
+
+ -- kilbury bottom-up prediction
+ buPredict j k next
+ = [ Item j rule [next] | isPredictBU,
+ rule <- map forward $ bottomupRules grammar ? next,
+ buFilter rule k,
+ tdFilter rule j k ]
+
+ -- top-down prediction
+ tdPredict k cat
+ = [ Item k rule [] | isPredictTD || isFilterTD,
+ rule <- topdownRules grammar ? cat,
+ buFilter rule k ] <++>
+ -- hack for empty rules:
+ [ Item k rule [] | isPredictBU,
+ rule <- emptyLeftcornerRules grammar ? cat ]
+
+ -- bottom up filtering: input symbol k can begin the given symbol list (first set)
+ -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!!
+ buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU
+ = k < snd (inputBounds input) &&
+ hasCommonElements (leftcornerTokens grammar ? cat)
+ (aElems (inputFrom input ! k))
+ buFilter _ _ = True
+
+ -- top down filtering: 'cat' is reachable by an active edge ending in node j < k
+ tdFilter (CFRule cat _ _) j k | isFilterTD && j < k
+ = (tdFilters ! j) ?= cat
+ tdFilter _ _ _ = True
+
+ tdFilters = listArray (inputBounds input) $
+ map (listSet . limit leftCats . activeCats) [0..]
+ activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ]
+ leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ]
+
+
+----------------------------------------------------------------------
+-- type declarations, items & keys
+
+data Item c n t = Item Int (CFRule c n t) [Symbol c t]
+ deriving (Eq, Ord, Show)
+
+data IKey c t = Active (Symbol c t) | Passive c
+ deriving (Eq, Ord, Show)
+
+type IChart c n t = IncrementalChart (Item c n t) (IKey c t)
+
+keyof :: Item c n t -> IKey c t
+keyof (Item _ (CFRule _ (next:_) _) _) = Active next
+keyof (Item _ (CFRule cat [] _) _) = Passive cat
+
+forward :: CFRule c n t -> CFRule c n t
+forward (CFRule cat (_:rest) name) = CFRule cat rest name
+
+----------------------------------------------------------------------
+
+instance (Print n, Print c, Print t) => Print (Item c n t) where
+ prt (Item k rule syms)
+ = "<"++show k++ ": "++ prt rule++" / "++prt syms++">"
+
+instance (Print c, Print t) => Print (IKey c t) where
+ prt (Active sym) = "?" ++ prt sym
+ prt (Passive cat) = "!" ++ prt cat
+
+
diff --git a/src/GF/Parsing/CFG/PInfo.hs b/src/GF/Parsing/CFG/PInfo.hs
new file mode 100644
index 000000000..eff0767c1
--- /dev/null
+++ b/src/GF/Parsing/CFG/PInfo.hs
@@ -0,0 +1,95 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/11 13:52:52 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- CFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module GF.NewParsing.CFG.PInfo where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.CFG
+import GF.Data.SortedList
+import GF.Data.Assoc
+
+----------------------------------------------------------------------
+-- type declarations
+
+type CFParser c n t = CFPInfo c n t
+ -> [c] -- ^ possible starting categories
+ -> Input t -- ^ the input tokens
+ -> CFChart c n t
+
+------------------------------------------------------------
+-- parser information
+
+data CFPInfo c n t
+ = CFPInfo { grammarTokens :: SList t,
+ nameRules :: Assoc n (SList (CFRule c n t)),
+ topdownRules :: Assoc c (SList (CFRule c n t)),
+ bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
+ emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
+ emptyCategories :: Set c,
+ cyclicCategories :: SList c,
+ -- ^ ONLY FOR DIRECT CYCLIC RULES!!!
+ leftcornerTokens :: Assoc c (SList t)
+ -- ^ DOES NOT WORK WITH EMPTY RULES!!!
+ }
+
+--buildCFPInfo :: (Ord n, Ord c, Ord t) => CFGrammar c n t -> CFPInfo c n t
+
+-- this is not permanent...
+buildCFPInfo grammar = traceCalcFirst grammar $
+ tracePrt "cf parser info" (prt) $
+ pInfo' (filter (not . isCyclic) grammar)
+
+pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks
+ where grToks = union [ nubsort [ tok | Tok tok <- rhs ] |
+ CFRule _ rhs _ <- grammar ]
+ nmRules = accumAssoc id [ (name, rule) |
+ rule@(CFRule _ _ name) <- grammar ]
+ tdRules = accumAssoc id [ (cat, rule) |
+ rule@(CFRule cat _ _) <- grammar ]
+ buRules = accumAssoc id [ (next, rule) |
+ rule@(CFRule _ (next:_) _) <- grammar ]
+ elcRules = accumAssoc id $ limit lc emptyRules
+ leftToks = accumAssoc id $ limit lc $
+ nubsort [ (cat, token) |
+ CFRule cat (Tok token:_) _ <- grammar ]
+ lc (left, res) = nubsort [ (cat, res) |
+ CFRule cat _ _ <- buRules ? Cat left ]
+ emptyRules = nubsort [ (cat, rule) |
+ rule@(CFRule cat [] _) <- grammar ]
+ emptyCats = listSet $ limitEmpties $ map fst emptyRules
+ limitEmpties es = if es==es' then es else limitEmpties es'
+ where es' = nubsort [ cat | CFRule cat rhs _ <- grammar,
+ all (symbol (\e -> e `elem` es) (const False)) rhs ]
+ cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ]
+
+isCyclic (CFRule cat [Cat cat'] _) = cat==cat'
+isCyclic _ = False
+
+
+----------------------------------------------------------------------
+
+instance (Ord n, Ord c, Ord t) => Print (CFPInfo n c t) where
+ prt pI = "[ tokens=" ++ sl grammarTokens ++
+ "; names=" ++ sla nameRules ++
+ "; tdCats=" ++ sla topdownRules ++
+ "; buCats=" ++ sla bottomupRules ++
+ "; elcCats=" ++ sla emptyLeftcornerRules ++
+ "; eCats=" ++ sla emptyCategories ++
+ "; cCats=" ++ sl cyclicCategories ++
+ "; lctokCats=" ++ sla leftcornerTokens ++
+ " ]"
+ where sla f = show $ length $ aElems $ f pI
+ sl f = show $ length $ f pI