diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2006-06-01 11:19:47 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2006-06-01 11:19:47 +0000 |
| commit | e51eaed4fde9f2bee962ed43f5b9a8592e76a947 (patch) | |
| tree | 8f1b3bb01373d052ecfa1f883a37ffe2d765977a /src/GF/Parsing/FCFG | |
| parent | 496f1fc8767f9d8ce1bb69b6e6460c2b7b7dd4b4 (diff) | |
add the FCFG parser
Diffstat (limited to 'src/GF/Parsing/FCFG')
| -rw-r--r-- | src/GF/Parsing/FCFG/Active.hs | 188 | ||||
| -rw-r--r-- | src/GF/Parsing/FCFG/PInfo.hs | 115 | ||||
| -rw-r--r-- | src/GF/Parsing/FCFG/Range.hs | 54 |
3 files changed, 357 insertions, 0 deletions
diff --git a/src/GF/Parsing/FCFG/Active.hs b/src/GF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..662aec6e4 --- /dev/null +++ b/src/GF/Parsing/FCFG/Active.hs @@ -0,0 +1,188 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Active (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.Utilities + +import GF.Formalism.GCFG +import GF.Formalism.FCFG +import GF.Formalism.MCFG(Lin(..)) +import GF.Formalism.Utilities + +import GF.Infra.Ident + +import GF.Parsing.FCFG.Range +import GF.Parsing.FCFG.PInfo + +import GF.System.Tracing + +import Control.Monad (guard) + +import GF.Infra.Print + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t +parse strategy pinfo starts toks = + [ Abs (cat, found) (zip rhs rrecs) fun | + Final ruleid found rrecs <- listXChartFinal chart, + let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ] + where chart = process strategy pinfo toks axioms emptyXChart + + axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks + | isTD strategy = initial pinfo starts toks + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: Abstract c n -> [RangeRec] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]] +updateChildren recs i rec = updateNthM update i recs + where update rec' = do guard (null rec' || rec' == rec) + return rec + +makeMaxRange (Range _ j) = Range j j +makeMaxRange EmptyRange = EmptyRange + +process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c +process strategy pinfo toks [] chart = chart +process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart + where + univRule item@(Active ruleid found rng lbl ppos recs) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat c r d -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c + rng' <- concatRange rng (found' !! r) + recs' <- updateChildren recs d found' + return (Active ruleid found rng' lbl (ppos+1) recs') + ++ + do guard (isTD strategy) + ruleid <- topdownRules pinfo ? c + let FRule abs lins = allRules pinfo ! ruleid + return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs)) + in process strategy pinfo toks items chart + FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok + rng' <- concatRange rng (makeRange i j) + return (Active ruleid found rng' lbl (ppos+1) recs) + in process strategy pinfo toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart + else univRule (Final ruleid (reverse (rng:found)) recs) chart + where + (FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid + lin = lins ! lbl + univRule item@(Final ruleid found' recs) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat + let FRule _ lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! l ! ppos + rng' <- concatRange rng (found' !! r) + recs' <- updateChildren recs d found' + return (Active ruleid found rng' l (ppos+1) recs') + ++ + do guard (isBU strategy) + ruleid <- leftcornerCats pinfo ? cat + let FRule abs lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! 0 ! 0 + return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs))) + in process strategy pinfo toks items chart + where + (FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active {-# UNPACK #-} !RuleId + RangeRec + Range + {-# UNPACK #-} !FLabel + {-# UNPACK #-} !FPointPos + [RangeRec] + | Final {-# UNPACK #-} !RuleId RangeRec [RangeRec] + deriving (Eq, Ord) + +data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +listXChartAct (XChart actives finals) = chartList actives +listXChartFinal (XChart actives finals) = chartList finals + + +---------------------------------------------------------------------- +-- Earley -- + +-- anropas med alla startkategorier +initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item] +initial pinfo starts toks = + tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $ + do cat <- starts + ruleid <- topdownRules pinfo ? cat + let FRule abs lins = allRules pinfo ! ruleid + return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Kilbury -- + +terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item] +terminal pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $ + do ruleid <- emptyRules pinfo + let FRule abs lins = allRules pinfo ! ruleid + rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins) + return $ Final ruleid rrec [] + where + rangeRestSyms toks rng [] = return rng + rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok + rng' <- concatRange rng (makeRange i j) + rangeRestSyms toks rng' syms + +initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item] +initialScan pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $ + do tok <- aElems (inputToken toks) + ruleid <- leftcornerTokens pinfo ? tok + let FRule abs lins = allRules pinfo ! ruleid + return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs) diff --git a/src/GF/Parsing/FCFG/PInfo.hs b/src/GF/Parsing/FCFG/PInfo.hs new file mode 100644 index 000000000..6fdc79269 --- /dev/null +++ b/src/GF/Parsing/FCFG/PInfo.hs @@ -0,0 +1,115 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.PInfo where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.FCFG +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Parsing.FCFG.Range + +import Data.Array +import Data.Maybe + +---------------------------------------------------------------------- +-- type declarations + +-- | the list of categories = possible starting categories +type FCFParser c n t = FCFPInfo c n t + -> [c] + -> Input t + -> FCFChart c n + +type FCFChart c n = [Abstract (c, RangeRec) n] + +makeFinalEdge :: c -> Int -> Int -> (c, RangeRec) +makeFinalEdge cat i j = (cat, [makeRange i j]) + + +------------------------------------------------------------ +-- parser information + +type RuleId = Int + +data FCFPInfo c n t + = FCFPInfo { allRules :: Array RuleId (FCFRule c n t) + , topdownRules :: Assoc c (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + , emptyRules :: [RuleId] + , leftcornerCats :: Assoc c (SList RuleId) + , leftcornerTokens :: Assoc t (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: SList c + } + + +getLeftCornerTok lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymTok tok -> Just tok + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +getLeftCornerCat lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat c _ _ -> Just c + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t +buildFCFPInfo grammar = + traceCalcFirst grammar $ + tracePrt "MCFG.PInfo - parser info" (prt) $ + FCFPInfo { allRules = allrules + , topdownRules = topdownrules + , emptyRules = emptyrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + } + + where allrules = listArray (0,length grammar-1) grammar + topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules] + emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules] + leftcorncats = accumAssoc id + [ (fromJust (getLeftCornerCat lins), ruleid) | + (ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ] + leftcorntoks = accumAssoc id + [ (fromJust (getLeftCornerTok lins), ruleid) | + (ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ] + grammarcats = aElems topdownrules + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where + prt pI = "[ allRules=" ++ sl (elems . allRules) ++ + "; tdRules=" ++ sla topdownRules ++ + "; emptyRules=" ++ sl emptyRules ++ + "; lcCats=" ++ sla leftcornerCats ++ + "; lcTokens=" ++ sla leftcornerTokens ++ + "; categories=" ++ sl grammarCats ++ + " ]" + + where sl f = show $ length $ f pI + sla f = let (as, bs) = unzip $ aAssocs $ f pI + in show (length as) ++ "/" ++ show (length (concat bs)) + diff --git a/src/GF/Parsing/FCFG/Range.hs b/src/GF/Parsing/FCFG/Range.hs new file mode 100644 index 000000000..31ad088de --- /dev/null +++ b/src/GF/Parsing/FCFG/Range.hs @@ -0,0 +1,54 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- Definitions of ranges, and operations on ranges +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Range + ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange, + ) where + + +-- GF modules +import GF.Formalism.Utilities +import GF.Infra.Print + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +rangeEdge :: a -> Range -> Edge a +rangeEdge a (Range i j) = Edge i j a + +edgeRange :: Edge a -> Range +edgeRange (Edge i j _) = Range i j + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + +instance Print Range where + prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")" + prt (EmptyRange) = "(?)" |
