diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Parsing/FCFG | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Parsing/FCFG')
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/Active.hs | 179 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/Incremental.hs | 107 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/PInfo.hs | 121 | ||||
| -rw-r--r-- | src-3.0/GF/Parsing/FCFG/Range.hs | 50 |
4 files changed, 457 insertions, 0 deletions
diff --git a/src-3.0/GF/Parsing/FCFG/Active.hs b/src-3.0/GF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..df55793f8 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/Active.hs @@ -0,0 +1,179 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Active (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.Utilities + +import GF.Formalism.FCFG +import GF.Formalism.Utilities + +import GF.Infra.PrintClass + +import GF.Parsing.FCFG.Range +import GF.Parsing.FCFG.PInfo + +import Control.Monad (guard) + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +parse :: String -> FCFParser +parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo + where chart = process strategy pinfo toks axioms emptyXChart + axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks + | isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec +emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) + where + FRule _ rhs _ _ = allRules pinfo ! ruleid + +process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat +process strategy pinfo toks [] chart = chart +process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart + where + univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat c r d -> case recs !! d of + [] -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c + rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) + ++ + do guard (isTD strategy) + ruleid <- topdownRules pinfo ? c + return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) + in process strategy pinfo toks items chart + found' -> let items = do rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) node) + in process strategy pinfo toks items chart + FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok + rng' <- concatRange rng (makeRange i j) + return (cat, Active found rng' lbl (ppos+1) node) + in process strategy pinfo toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart + else univRule cat (Final (reverse (rng:found)) node) chart + where + (FRule fn _ cat lins) = allRules pinfo ! ruleid + lin = lins ! lbl + univRule cat item@(Final found' node) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat + let FRule _ _ _ lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! l ! ppos + rng <- concatRange rng (found' !! r) + return (cat, Active found rng l (ppos+1) (updateChildren node d found')) + ++ + do guard (isBU strategy) + ruleid <- leftcornerCats pinfo ? cat + let FRule _ _ _ lins = allRules pinfo ! ruleid + FSymCat cat r d = lins ! 0 ! 0 + return (cat, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) + + updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec + updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs + in process strategy pinfo toks items chart + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active RangeRec + Range + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !FPointPos + (SyntaxNode RuleId RangeRec) + | Final RangeRec (SyntaxNode RuleId RangeRec) + deriving (Eq, Ord) + +data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec) +xchart2syntaxchart (XChart actives finals) pinfo = + accumAssoc groupSyntaxNodes $ + [ case node of + SNode ruleid rrecs -> let FRule fun rhs cat _ = allRules pinfo ! ruleid + in ((cat,found), SNode fun (zip rhs rrecs)) + SString s -> ((cat,found), SString s) + SInt n -> ((cat,found), SInt n) + SFloat f -> ((cat,found), SFloat f) + | (cat, Final found node) <- chartAssocs finals + ] + +literals :: FCFPInfo -> Input FToken -> [(FCat,Item)] +literals pinfo toks = + [let (c,node) = lexer t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)] + where + lexer t = + case reads t of + [(n,"")] -> (fcatInt, SInt (n::Integer)) + _ -> case reads t of + [(f,"")] -> (fcatFloat, SFloat (f::Double)) + _ -> (fcatString,SString t) + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD pinfo starts toks = + do cat <- starts + ruleid <- topdownRules pinfo ? cat + return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)] +initialBU pinfo toks = + do (tok,rngs) <- aAssocs (inputToken toks) + ruleid <- leftcornerTokens pinfo ? tok + let FRule _ _ cat _ = allRules pinfo ! ruleid + (i,j) <- rngs + return (cat,Active [] (makeRange i j) 0 1 (emptyChildren ruleid pinfo)) + ++ + do ruleid <- epsilonRules pinfo + let FRule _ _ cat _ = allRules pinfo ! ruleid + return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) diff --git a/src-3.0/GF/Parsing/FCFG/Incremental.hs b/src-3.0/GF/Parsing/FCFG/Incremental.hs new file mode 100644 index 000000000..5ee77a061 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/Incremental.hs @@ -0,0 +1,107 @@ +module GF.Parsing.FCFG.Incremental where
+
+import Data.Array
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.Assoc
+import GF.Data.GeneralDeduction
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+import GF.Parsing.FCFG.PInfo
+import GF.Parsing.FCFG.Range
+import GF.GFCC.CId
+import Debug.Trace
+
+initState :: FCFPInfo -> CId -> State
+initState pinfo start =
+ let items = do
+ starts <- Map.lookup start (startupCats pinfo)
+ c <- starts
+ ruleid <- topdownRules pinfo ? c
+ let (FRule fn args cat lins) = allRules pinfo ! ruleid
+ lbl <- indices lins
+ return (Active 0 lbl 0 ruleid args cat)
+
+ forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ args cat _) <- assocs (allRules pinfo)]
+
+ max_fid = case IntMap.maxViewWithKey forest of
+ Just ((fid,_), _) -> fid+1
+ Nothing -> 0
+
+ in process pinfo items (State emptyChart [] emptyChart Map.empty forest max_fid 0)
+
+nextState :: FCFPInfo -> FToken -> State -> State
+nextState pinfo t state =
+ process pinfo (chartLookup (tokens state) t) state{ chart=emptyChart
+ , charts=chart state : charts state
+ , tokens=emptyChart
+ , passive=Map.empty
+ , currOffset=currOffset state+1
+ }
+
+getCompletions :: State -> FToken -> [FToken]
+getCompletions state w =
+ [t | t <- chartKeys (tokens state), take (length w) t == w]
+
+process pinfo [] state = state
+process pinfo (item@(Active j lbl ppos ruleid args fid0):xitems) state
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat _ r d -> let fid = args !! d
+ in case chartInsert (chart state) item (fid,r) of
+ Nothing -> process pinfo xitems state
+ Just actCat -> let items = do exprs <- IntMap.lookup fid (forest state)
+ (Passive ruleid args) <- Set.toList exprs
+ return (Active k r 0 ruleid args fid)
+ `mplus`
+ do id <- Map.lookup (fid,r,k) (passive state)
+ return (Active j lbl (ppos+1) ruleid (updateAt d id args) fid0)
+ in process pinfo (xitems++items) state{chart=actCat}
+ FSymTok tok -> case chartInsert (tokens state) (Active j lbl (ppos+1) ruleid args fid0) tok of
+ Nothing -> process pinfo xitems state
+ Just actTok -> process pinfo xitems state{tokens=actTok}
+ | otherwise = case Map.lookup (fid0, lbl, j) (passive state) of
+ Nothing -> let fid = nextId state
+ items = do Active j' lbl ppos ruleid args fidc <- chartLookup ((chart state:charts state) !! (k-j)) (fid0,lbl)
+ let FSymCat _ _ d = rhs ruleid lbl ! ppos
+ return (Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc)
+ in process pinfo (xitems++items) state{passive=Map.insert (fid0, lbl, j) fid (passive state)
+ ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
+ ,nextId =nextId state+1
+ }
+ Just id -> process pinfo xitems state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
+ where
+ lin = rhs ruleid lbl
+ k = currOffset state
+
+ rhs ruleid lbl = lins ! lbl
+ where
+ (FRule _ _ cat lins) = allRules pinfo ! ruleid
+
+ updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+
+data Active
+ = Active Int FIndex FPointPos RuleId [FCat] FCat
+ deriving (Eq,Show,Ord)
+data Passive
+ = Passive RuleId [FCat]
+ deriving (Eq,Ord,Show)
+
+
+data State
+ = State
+ { chart :: Chart
+ , charts :: [Chart]
+ , tokens :: ParseChart Active FToken
+ , passive :: Map.Map (FCat, FIndex, Int) FCat
+ , forest :: IntMap.IntMap (Set.Set Passive)
+ , nextId :: FCat
+ , currOffset :: Int
+ }
+ deriving Show
+
+type Chart = ParseChart Active (FCat, FIndex)
diff --git a/src-3.0/GF/Parsing/FCFG/PInfo.hs b/src-3.0/GF/Parsing/FCFG/PInfo.hs new file mode 100644 index 000000000..8b288f2f1 --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/PInfo.hs @@ -0,0 +1,121 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.PInfo where + +import GF.Infra.PrintClass +import GF.Formalism.Utilities +import GF.Formalism.FCFG +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Parsing.FCFG.Range +import qualified GF.GFCC.CId as AbsGFCC + +import Data.Array +import Data.Maybe +import qualified Data.Map as Map +import qualified Data.Set as Set +import Debug.Trace + +---------------------------------------------------------------------- +-- type declarations + +-- | the list of categories = possible starting categories +type FCFParser = FCFPInfo + -> [FCat] + -> Input FToken + -> SyntaxChart FName (FCat,RangeRec) + +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +------------------------------------------------------------ +-- parser information + +type RuleId = Int + +data FCFPInfo + = FCFPInfo { allRules :: Array RuleId FRule + , topdownRules :: Assoc FCat (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + -- , emptyRules :: [RuleId] + , epsilonRules :: [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , leftcornerCats :: Assoc FCat (SList RuleId) + , leftcornerTokens :: Assoc FToken (SList RuleId) + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: SList FCat + , grammarToks :: SList FToken + , startupCats :: Map.Map AbsGFCC.CId [FCat] + } + + +getLeftCornerTok lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymTok tok -> Just tok + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +getLeftCornerCat lins + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat c _ _ -> Just c + _ -> Nothing + | otherwise = Nothing + where + syms = lins ! 0 + +buildFCFPInfo :: FGrammar -> FCFPInfo +buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ + FCFPInfo { allRules = allrules + , topdownRules = topdownrules + -- , emptyRules = emptyrules + , epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + , grammarToks = grammartoks + , startupCats = startup + } + + where allrules = listArray (0,length grammar-1) grammar + topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ cat _) <- assocs allrules] + -- emptyrules = [ruleid | (ruleid, FRule _ [] _ _) <- assocs allrules] + epsilonrules = [ ruleid | (ruleid, FRule _ _ _ lins) <- assocs allrules, + not (inRange (bounds (lins ! 0)) 0) ] + leftcorncats = accumAssoc id + [ (fromJust (getLeftCornerCat lins), ruleid) | + (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ] + leftcorntoks = accumAssoc id + [ (fromJust (getLeftCornerTok lins), ruleid) | + (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ] + grammarcats = aElems topdownrules + grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] + +fcfPInfoToFGrammar :: FCFPInfo -> FGrammar +fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo) + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance Print FCFPInfo where + prt pI = "[ allRules=" ++ sl (elems . allRules) ++ + "; tdRules=" ++ sla topdownRules ++ + -- "; emptyRules=" ++ sl emptyRules ++ + "; epsilonRules=" ++ sl epsilonRules ++ + "; lcCats=" ++ sla leftcornerCats ++ + "; lcTokens=" ++ sla leftcornerTokens ++ + "; categories=" ++ sl grammarCats ++ + " ]" + + where sl f = show $ length $ f pI + sla f = let (as, bs) = unzip $ aAssocs $ f pI + in show (length as) ++ "/" ++ show (length (concat bs)) + diff --git a/src-3.0/GF/Parsing/FCFG/Range.hs b/src-3.0/GF/Parsing/FCFG/Range.hs new file mode 100644 index 000000000..24674f58b --- /dev/null +++ b/src-3.0/GF/Parsing/FCFG/Range.hs @@ -0,0 +1,50 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Definitions of ranges, and operations on ranges +----------------------------------------------------------------------------- + +module GF.Parsing.FCFG.Range + ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange, + ) where + + +-- GF modules +import GF.Formalism.Utilities +import GF.Infra.PrintClass + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +rangeEdge :: a -> Range -> Edge a +rangeEdge a (Range i j) = Edge i j a + +edgeRange :: Edge a -> Range +edgeRange (Edge i j _) = Range i j + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + +instance Print Range where + prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")" + prt (EmptyRange) = "(?)" |
