summaryrefslogtreecommitdiff
path: root/src/GF/Parsing/FCFG
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2006-06-01 11:19:47 +0000
committerkr.angelov <kr.angelov@gmail.com>2006-06-01 11:19:47 +0000
commite51eaed4fde9f2bee962ed43f5b9a8592e76a947 (patch)
tree8f1b3bb01373d052ecfa1f883a37ffe2d765977a /src/GF/Parsing/FCFG
parent496f1fc8767f9d8ce1bb69b6e6460c2b7b7dd4b4 (diff)
add the FCFG parser
Diffstat (limited to 'src/GF/Parsing/FCFG')
-rw-r--r--src/GF/Parsing/FCFG/Active.hs188
-rw-r--r--src/GF/Parsing/FCFG/PInfo.hs115
-rw-r--r--src/GF/Parsing/FCFG/Range.hs54
3 files changed, 357 insertions, 0 deletions
diff --git a/src/GF/Parsing/FCFG/Active.hs b/src/GF/Parsing/FCFG/Active.hs
new file mode 100644
index 000000000..662aec6e4
--- /dev/null
+++ b/src/GF/Parsing/FCFG/Active.hs
@@ -0,0 +1,188 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- MCFG parsing, the active algorithm
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.Active (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+import GF.Data.Utilities
+
+import GF.Formalism.GCFG
+import GF.Formalism.FCFG
+import GF.Formalism.MCFG(Lin(..))
+import GF.Formalism.Utilities
+
+import GF.Infra.Ident
+
+import GF.Parsing.FCFG.Range
+import GF.Parsing.FCFG.PInfo
+
+import GF.System.Tracing
+
+import Control.Monad (guard)
+
+import GF.Infra.Print
+
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Data.Array
+
+----------------------------------------------------------------------
+-- * parsing
+
+parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t
+parse strategy pinfo starts toks =
+ [ Abs (cat, found) (zip rhs rrecs) fun |
+ Final ruleid found rrecs <- listXChartFinal chart,
+ let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ]
+ where chart = process strategy pinfo toks axioms emptyXChart
+
+ axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
+ | isTD strategy = initial pinfo starts toks
+
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: Abstract c n -> [RangeRec]
+emptyChildren (Abs _ rhs _) = replicate (length rhs) []
+
+updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]]
+updateChildren recs i rec = updateNthM update i recs
+ where update rec' = do guard (null rec' || rec' == rec)
+ return rec
+
+makeMaxRange (Range _ j) = Range j j
+makeMaxRange EmptyRange = EmptyRange
+
+process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c
+process strategy pinfo toks [] chart = chart
+process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart
+ where
+ univRule item@(Active ruleid found rng lbl ppos recs) chart
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat c r d -> case insertXChart chart item c of
+ Nothing -> chart
+ Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c
+ rng' <- concatRange rng (found' !! r)
+ recs' <- updateChildren recs d found'
+ return (Active ruleid found rng' lbl (ppos+1) recs')
+ ++
+ do guard (isTD strategy)
+ ruleid <- topdownRules pinfo ? c
+ let FRule abs lins = allRules pinfo ! ruleid
+ return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs))
+ in process strategy pinfo toks items chart
+ FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
+ rng' <- concatRange rng (makeRange i j)
+ return (Active ruleid found rng' lbl (ppos+1) recs)
+ in process strategy pinfo toks items chart
+ | otherwise =
+ if inRange (bounds lins) (lbl+1)
+ then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart
+ else univRule (Final ruleid (reverse (rng:found)) recs) chart
+ where
+ (FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid
+ lin = lins ! lbl
+ univRule item@(Final ruleid found' recs) chart =
+ case insertXChart chart item cat of
+ Nothing -> chart
+ Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat
+ let FRule _ lins = allRules pinfo ! ruleid
+ FSymCat cat r d = lins ! l ! ppos
+ rng' <- concatRange rng (found' !! r)
+ recs' <- updateChildren recs d found'
+ return (Active ruleid found rng' l (ppos+1) recs')
+ ++
+ do guard (isBU strategy)
+ ruleid <- leftcornerCats pinfo ? cat
+ let FRule abs lins = allRules pinfo ! ruleid
+ FSymCat cat r d = lins ! 0 ! 0
+ return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs)))
+ in process strategy pinfo toks items chart
+ where
+ (FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid
+
+----------------------------------------------------------------------
+-- * XChart
+
+data Item
+ = Active {-# UNPACK #-} !RuleId
+ RangeRec
+ Range
+ {-# UNPACK #-} !FLabel
+ {-# UNPACK #-} !FPointPos
+ [RangeRec]
+ | Final {-# UNPACK #-} !RuleId RangeRec [RangeRec]
+ deriving (Eq, Ord)
+
+data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
+
+emptyXChart :: Ord c => XChart c
+emptyXChart = XChart emptyChart emptyChart
+
+insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
+ case chartInsert actives item c of
+ Nothing -> Nothing
+ Just actives -> Just (XChart actives finals)
+
+insertXChart (XChart actives finals) item@(Final _ _ _) c =
+ case chartInsert finals item c of
+ Nothing -> Nothing
+ Just finals -> Just (XChart actives finals)
+
+lookupXChartAct (XChart actives finals) c = chartLookup actives c
+lookupXChartFinal (XChart actives finals) c = chartLookup finals c
+
+listXChartAct (XChart actives finals) = chartList actives
+listXChartFinal (XChart actives finals) = chartList finals
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- anropas med alla startkategorier
+initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item]
+initial pinfo starts toks =
+ tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $
+ do cat <- starts
+ ruleid <- topdownRules pinfo ? cat
+ let FRule abs lins = allRules pinfo ! ruleid
+ return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs)
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
+terminal pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
+ do ruleid <- emptyRules pinfo
+ let FRule abs lins = allRules pinfo ! ruleid
+ rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins)
+ return $ Final ruleid rrec []
+ where
+ rangeRestSyms toks rng [] = return rng
+ rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok
+ rng' <- concatRange rng (makeRange i j)
+ rangeRestSyms toks rng' syms
+
+initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item]
+initialScan pinfo toks =
+ tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
+ do tok <- aElems (inputToken toks)
+ ruleid <- leftcornerTokens pinfo ? tok
+ let FRule abs lins = allRules pinfo ! ruleid
+ return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs)
diff --git a/src/GF/Parsing/FCFG/PInfo.hs b/src/GF/Parsing/FCFG/PInfo.hs
new file mode 100644
index 000000000..6fdc79269
--- /dev/null
+++ b/src/GF/Parsing/FCFG/PInfo.hs
@@ -0,0 +1,115 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/13 12:40:19 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- MCFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.PInfo where
+
+import GF.System.Tracing
+import GF.Infra.Print
+
+import GF.Formalism.Utilities
+import GF.Formalism.GCFG
+import GF.Formalism.FCFG
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Parsing.FCFG.Range
+
+import Data.Array
+import Data.Maybe
+
+----------------------------------------------------------------------
+-- type declarations
+
+-- | the list of categories = possible starting categories
+type FCFParser c n t = FCFPInfo c n t
+ -> [c]
+ -> Input t
+ -> FCFChart c n
+
+type FCFChart c n = [Abstract (c, RangeRec) n]
+
+makeFinalEdge :: c -> Int -> Int -> (c, RangeRec)
+makeFinalEdge cat i j = (cat, [makeRange i j])
+
+
+------------------------------------------------------------
+-- parser information
+
+type RuleId = Int
+
+data FCFPInfo c n t
+ = FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
+ , topdownRules :: Assoc c (SList RuleId)
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
+ , emptyRules :: [RuleId]
+ , leftcornerCats :: Assoc c (SList RuleId)
+ , leftcornerTokens :: Assoc t (SList RuleId)
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , grammarCats :: SList c
+ }
+
+
+getLeftCornerTok lins
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymTok tok -> Just tok
+ _ -> Nothing
+ | otherwise = Nothing
+ where
+ syms = lins ! 0
+
+getLeftCornerCat lins
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymCat c _ _ -> Just c
+ _ -> Nothing
+ | otherwise = Nothing
+ where
+ syms = lins ! 0
+
+buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t
+buildFCFPInfo grammar =
+ traceCalcFirst grammar $
+ tracePrt "MCFG.PInfo - parser info" (prt) $
+ FCFPInfo { allRules = allrules
+ , topdownRules = topdownrules
+ , emptyRules = emptyrules
+ , leftcornerCats = leftcorncats
+ , leftcornerTokens = leftcorntoks
+ , grammarCats = grammarcats
+ }
+
+ where allrules = listArray (0,length grammar-1) grammar
+ topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules]
+ emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules]
+ leftcorncats = accumAssoc id
+ [ (fromJust (getLeftCornerCat lins), ruleid) |
+ (ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
+ leftcorntoks = accumAssoc id
+ [ (fromJust (getLeftCornerTok lins), ruleid) |
+ (ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
+ grammarcats = aElems topdownrules
+
+----------------------------------------------------------------------
+-- pretty-printing of statistics
+
+instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
+ prt pI = "[ allRules=" ++ sl (elems . allRules) ++
+ "; tdRules=" ++ sla topdownRules ++
+ "; emptyRules=" ++ sl emptyRules ++
+ "; lcCats=" ++ sla leftcornerCats ++
+ "; lcTokens=" ++ sla leftcornerTokens ++
+ "; categories=" ++ sl grammarCats ++
+ " ]"
+
+ where sl f = show $ length $ f pI
+ sla f = let (as, bs) = unzip $ aAssocs $ f pI
+ in show (length as) ++ "/" ++ show (length (concat bs))
+
diff --git a/src/GF/Parsing/FCFG/Range.hs b/src/GF/Parsing/FCFG/Range.hs
new file mode 100644
index 000000000..31ad088de
--- /dev/null
+++ b/src/GF/Parsing/FCFG/Range.hs
@@ -0,0 +1,54 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/08 09:01:25 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.5 $
+--
+-- Definitions of ranges, and operations on ranges
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.Range
+ ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
+ ) where
+
+
+-- GF modules
+import GF.Formalism.Utilities
+import GF.Infra.Print
+
+------------------------------------------------------------
+-- ranges as single pairs
+
+type RangeRec = [Range]
+
+data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
+ | EmptyRange
+ deriving (Eq, Ord)
+
+makeRange :: Int -> Int -> Range
+makeRange = Range
+
+concatRange :: Range -> Range -> [Range]
+concatRange EmptyRange rng = return rng
+concatRange rng EmptyRange = return rng
+concatRange (Range i j) (Range j' k) = [Range i k | j==j']
+
+rangeEdge :: a -> Range -> Edge a
+rangeEdge a (Range i j) = Edge i j a
+
+edgeRange :: Edge a -> Range
+edgeRange (Edge i j _) = Range i j
+
+minRange :: Range -> Int
+minRange (Range i j) = i
+
+maxRange :: Range -> Int
+maxRange (Range i j) = j
+
+instance Print Range where
+ prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
+ prt (EmptyRange) = "(?)"