diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2006-06-01 11:19:47 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2006-06-01 11:19:47 +0000 |
| commit | e51eaed4fde9f2bee962ed43f5b9a8592e76a947 (patch) | |
| tree | 8f1b3bb01373d052ecfa1f883a37ffe2d765977a /src/GF/Parsing | |
| parent | 496f1fc8767f9d8ce1bb69b6e6460c2b7b7dd4b4 (diff) | |
add the FCFG parser
Diffstat (limited to 'src/GF/Parsing')
| -rw-r--r-- | src/GF/Parsing/FCFG.hs | 38 | ||||
| -rw-r--r-- | src/GF/Parsing/FCFG/Active.hs | 188 | ||||
| -rw-r--r-- | src/GF/Parsing/FCFG/PInfo.hs | 115 | ||||
| -rw-r--r-- | src/GF/Parsing/FCFG/Range.hs | 54 | ||||
| -rw-r--r-- | src/GF/Parsing/GFC.hs | 31 |
5 files changed, 420 insertions, 6 deletions
diff --git a/src/GF/Parsing/FCFG.hs b/src/GF/Parsing/FCFG.hs new file mode 100644 index 000000000..bec6eb777 --- /dev/null +++ b/src/GF/Parsing/FCFG.hs @@ -0,0 +1,38 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/11 10:28:16 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG + (parseFCF, module GF.Parsing.FCFG.PInfo) where + +import GF.Data.Operations (Err(..)) + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Parsing.FCFG.PInfo + +import qualified GF.Parsing.FCFG.Active as Active +import GF.Infra.Print + +---------------------------------------------------------------------- +-- parsing + +parseFCF :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t) +parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs + | otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs + +strategies = words "bottomup topdown" + +parseFCF' :: (Print c, Ord c, Print n, Ord n, Print t, Ord t) => String -> FCFParser c n t +parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks +parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks diff --git a/src/GF/Parsing/FCFG/Active.hs b/src/GF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..662aec6e4 --- /dev/null +++ b/src/GF/Parsing/FCFG/Active.hs @@ -0,0 +1,188 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Active (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.Utilities + +import GF.Formalism.GCFG +import GF.Formalism.FCFG +import GF.Formalism.MCFG(Lin(..)) +import GF.Formalism.Utilities + +import GF.Infra.Ident + +import GF.Parsing.FCFG.Range +import GF.Parsing.FCFG.PInfo + +import GF.System.Tracing + +import Control.Monad (guard) + +import GF.Infra.Print + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +parse :: (Ord c, Print n, Ord n, Ord t) => String -> FCFParser c n t +parse strategy pinfo starts toks = + [ Abs (cat, found) (zip rhs rrecs) fun | + Final ruleid found rrecs <- listXChartFinal chart, + let FRule (Abs cat rhs fun) _ = allRules pinfo ! ruleid ] + where chart = process strategy pinfo toks axioms emptyXChart + + axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks + | isTD strategy = initial pinfo starts toks + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: Abstract c n -> [RangeRec] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +updateChildren :: [RangeRec] -> Int -> RangeRec -> [[RangeRec]] +updateChildren recs i rec = updateNthM update i recs + where update rec' = do guard (null rec' || rec' == rec) + return rec + +makeMaxRange (Range _ j) = Range j j +makeMaxRange EmptyRange = EmptyRange + +process :: (Ord c, Ord n, Ord t) => String -> FCFPInfo c n t -> Input t -> [Item] -> XChart c -> XChart c +process strategy pinfo toks [] chart = chart +process strategy pinfo toks (item:items) chart = process strategy pinfo toks items $! univRule item chart + where + univRule item@(Active ruleid found rng lbl ppos recs) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat c r d -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do Final _ found' _ <- lookupXChartFinal chart c + rng' <- concatRange rng (found' !! r) + recs' <- updateChildren recs d found' + return (Active ruleid found rng' lbl (ppos+1) recs') + ++ + do guard (isTD strategy) + ruleid <- topdownRules pinfo ? c + let FRule abs lins = allRules pinfo ! ruleid + return (Active ruleid [] EmptyRange 0 0 (emptyChildren abs)) + in process strategy pinfo toks items chart + FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok + rng' <- concatRange rng (makeRange i j) + return (Active ruleid found rng' lbl (ppos+1) recs) + in process strategy pinfo toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule (Active ruleid (rng:found) EmptyRange (lbl+1) 0 recs) chart + else univRule (Final ruleid (reverse (rng:found)) recs) chart + where + (FRule (Abs cat _ fn) lins) = allRules pinfo ! ruleid + lin = lins ! lbl + univRule item@(Final ruleid found' recs) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active ruleid found rng l ppos recs) <- lookupXChartAct chart cat + let FRule _ lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! l ! ppos + rng' <- concatRange rng (found' !! r) + recs' <- updateChildren recs d found' + return (Active ruleid found rng' l (ppos+1) recs') + ++ + do guard (isBU strategy) + ruleid <- leftcornerCats pinfo ? cat + let FRule abs lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! 0 ! 0 + return (Active ruleid [] (found' !! r) 0 1 (updateNth (const found') d (emptyChildren abs))) + in process strategy pinfo toks items chart + where + (FRule (Abs cat _ fn) _) = allRules pinfo ! ruleid + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active {-# UNPACK #-} !RuleId + RangeRec + Range + {-# UNPACK #-} !FLabel + {-# UNPACK #-} !FPointPos + [RangeRec] + | Final {-# UNPACK #-} !RuleId RangeRec [RangeRec] + deriving (Eq, Ord) + +data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +listXChartAct (XChart actives finals) = chartList actives +listXChartFinal (XChart actives finals) = chartList finals + + +---------------------------------------------------------------------- +-- Earley -- + +-- anropas med alla startkategorier +initial :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [Item] +initial pinfo starts toks = + tracePrt "MCFG.Active (Earley) - initial rules" (prt . length) $ + do cat <- starts + ruleid <- topdownRules pinfo ? cat + let FRule abs lins = allRules pinfo ! ruleid + return $ Active ruleid [] (Range 0 0) 0 0 (emptyChildren abs) + + +---------------------------------------------------------------------- +-- Kilbury -- + +terminal :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item] +terminal pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $ + do ruleid <- emptyRules pinfo + let FRule abs lins = allRules pinfo ! ruleid + rrec <- mapM (rangeRestSyms toks EmptyRange . elems) (elems lins) + return $ Final ruleid rrec [] + where + rangeRestSyms toks rng [] = return rng + rangeRestSyms toks rng (FSymTok tok:syms) = do (i,j) <- inputToken toks ? tok + rng' <- concatRange rng (makeRange i j) + rangeRestSyms toks rng' syms + +initialScan :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [Item] +initialScan pinfo toks = + tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $ + do tok <- aElems (inputToken toks) + ruleid <- leftcornerTokens pinfo ? tok + let FRule abs lins = allRules pinfo ! ruleid + return $ Active ruleid [] EmptyRange 0 0 (emptyChildren abs) diff --git a/src/GF/Parsing/FCFG/PInfo.hs b/src/GF/Parsing/FCFG/PInfo.hs new file mode 100644 index 000000000..6fdc79269 --- /dev/null +++ b/src/GF/Parsing/FCFG/PInfo.hs @@ -0,0 +1,115 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- MCFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.PInfo where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.FCFG +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Parsing.FCFG.Range + +import Data.Array +import Data.Maybe + +---------------------------------------------------------------------- +-- type declarations + +-- | the list of categories = possible starting categories +type FCFParser c n t = FCFPInfo c n t + -> [c] + -> Input t + -> FCFChart c n + +type FCFChart c n = [Abstract (c, RangeRec) n] + +makeFinalEdge :: c -> Int -> Int -> (c, RangeRec) +makeFinalEdge cat i j = (cat, [makeRange i j]) + + +------------------------------------------------------------ +-- parser information + +type RuleId = Int + +data FCFPInfo c n t + = FCFPInfo { allRules :: Array RuleId (FCFRule c n t) + , topdownRules :: Assoc c (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + , emptyRules :: [RuleId] + , leftcornerCats :: Assoc c (SList RuleId) + , leftcornerTokens :: Assoc t (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: SList c + } + + +getLeftCornerTok lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymTok tok -> Just tok + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +getLeftCornerCat lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat c _ _ -> Just c + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +buildFCFPInfo :: (Ord c, Ord n, Ord t) => FCFGrammar c n t -> FCFPInfo c n t +buildFCFPInfo grammar = + traceCalcFirst grammar $ + tracePrt "MCFG.PInfo - parser info" (prt) $ + FCFPInfo { allRules = allrules + , topdownRules = topdownrules + , emptyRules = emptyrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + } + + where allrules = listArray (0,length grammar-1) grammar + topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule (Abs cat _ _) _) <- assocs allrules] + emptyrules = [ruleid | (ruleid, FRule (Abs _ [] _) _) <- assocs allrules] + leftcorncats = accumAssoc id + [ (fromJust (getLeftCornerCat lins), ruleid) | + (ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ] + leftcorntoks = accumAssoc id + [ (fromJust (getLeftCornerTok lins), ruleid) | + (ruleid, FRule _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ] + grammarcats = aElems topdownrules + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where + prt pI = "[ allRules=" ++ sl (elems . allRules) ++ + "; tdRules=" ++ sla topdownRules ++ + "; emptyRules=" ++ sl emptyRules ++ + "; lcCats=" ++ sla leftcornerCats ++ + "; lcTokens=" ++ sla leftcornerTokens ++ + "; categories=" ++ sl grammarCats ++ + " ]" + + where sl f = show $ length $ f pI + sla f = let (as, bs) = unzip $ aAssocs $ f pI + in show (length as) ++ "/" ++ show (length (concat bs)) + diff --git a/src/GF/Parsing/FCFG/Range.hs b/src/GF/Parsing/FCFG/Range.hs new file mode 100644 index 000000000..31ad088de --- /dev/null +++ b/src/GF/Parsing/FCFG/Range.hs @@ -0,0 +1,54 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.5 $ +-- +-- Definitions of ranges, and operations on ranges +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Range + ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange, + ) where + + +-- GF modules +import GF.Formalism.Utilities +import GF.Infra.Print + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +rangeEdge :: a -> Range -> Edge a +rangeEdge a (Range i j) = Edge i j a + +edgeRange :: Edge a -> Range +edgeRange (Edge i j _) = Range i j + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + +instance Print Range where + prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")" + prt (EmptyRange) = "(?)" diff --git a/src/GF/Parsing/GFC.hs b/src/GF/Parsing/GFC.hs index 8f79bab01..e87b45590 100644 --- a/src/GF/Parsing/GFC.hs +++ b/src/GF/Parsing/GFC.hs @@ -37,23 +37,29 @@ import qualified GF.Formalism.SimpleGFC as S import qualified GF.Formalism.MCFG as M import qualified GF.Formalism.CFG as C import qualified GF.Parsing.MCFG as PM +import qualified GF.Parsing.FCFG as PF import qualified GF.Parsing.CFG as PC ---------------------------------------------------------------------- -- parsing information -data PInfo = PInfo { mcfPInfo :: MCFPInfo, - cfPInfo :: CFPInfo } +data PInfo = PInfo { mcfPInfo :: MCFPInfo + , fcfPInfo :: FCFPInfo + , cfPInfo :: CFPInfo + } type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token +type FCFPInfo = PF.FCFPInfo FCat Name Token type CFPInfo = PC.CFPInfo CCat Name Token -buildPInfo :: MGrammar -> CGrammar -> PInfo -buildPInfo mcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg, - cfPInfo = PC.buildCFPInfo cfg } +buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo +buildPInfo mcfg fcfg cfg = PInfo { mcfPInfo = PM.buildMCFPInfo mcfg + , fcfPInfo = PF.buildFCFPInfo fcfg + , cfPInfo = PC.buildCFPInfo cfg + } instance Print PInfo where - prt (PInfo m c) = prt m ++ "\n" ++ prt c + prt (PInfo m f c) = prt m ++ "\n" ++ prt c ---------------------------------------------------------------------- -- main parsing function @@ -114,6 +120,19 @@ selectParser "m" strategy pinfo startCat inTokens cat@(MCat _ [lbl]) <- startCats ] return $ chart2forests chart (const False) finalEdges +-- parsing via FCFG +selectParser "f" strategy pinfo startCat inTokens + = do let startCats = filter isStart $ PF.grammarCats fcfpi + isStart cat = fcat2scat cat == cfCat2Ident startCat + fcfpi = fcfPInfo pinfo + fcfParser <- PF.parseFCF strategy + let fcfChart = fcfParser fcfpi startCats inTokens + chart = G.abstract2chart fcfChart + (begin,end) = inputBounds inTokens + finalEdges = [ PF.makeFinalEdge cat begin end | + cat@(FCat _ _ [lbl] _) <- startCats ] + return $ chart2forests chart (const False) finalEdges + -- error parser: selectParser prs strategy _ _ _ = Bad $ "Parser '" ++ prs ++ "' not defined with strategy: " ++ strategy |
