diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
| commit | e9e80fc389365e24d4300d7d5390c7d833a96c50 (patch) | |
| tree | f0b58473adaa670bd8fc52ada419d8cad470ee03 /src/PGF/Parsing/FCFG | |
| parent | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff) | |
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src/PGF/Parsing/FCFG')
| -rw-r--r-- | src/PGF/Parsing/FCFG/Active.hs | 189 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Incremental.hs | 187 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Utilities.hs | 187 |
3 files changed, 563 insertions, 0 deletions
diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..4386bfdd1 --- /dev/null +++ b/src/PGF/Parsing/FCFG/Active.hs @@ -0,0 +1,189 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module PGF.Parsing.FCFG.Active (parse) where + +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.Utilities +import qualified GF.Data.MultiMap as MM + +import PGF.CId +import PGF.Data +import PGF.Parsing.FCFG.Utilities + +import Control.Monad (guard) + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +-- | the list of categories = possible starting categories +parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree] +parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees + where + inTokens = input toks + starts = Map.findWithDefault [] start (startupCats pinfo) + schart = xchart2syntaxchart chart pinfo + (i,j) = inputBounds inTokens + finalEdges = [makeFinalEdge cat i j | cat <- starts] + forests = chart2forests schart (const False) finalEdges + filteredForests = forests >>= applyProfileToForest + + chart = process strategy pinfo inTokens axioms emptyXChart + axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens + | isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec +emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) + where + FRule _ _ rhs _ _ = allRules pinfo ! ruleid + +process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat +process strategy pinfo toks [] chart = chart +process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart + where + univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat r d -> let c = args !! d + in case recs !! d of + [] -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c + rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) + ++ + do guard (isTD strategy) + ruleid <- topdownRules pinfo ? c + return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) + in process strategy pinfo toks items chart + found' -> let items = do rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) node) + in process strategy pinfo toks items chart + FSymTok tok -> let items = do t_rng <- inputToken toks ? tok + rng' <- concatRange rng t_rng + return (cat, Active found rng' lbl (ppos+1) node) + in process strategy pinfo toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart + else univRule cat (Final (reverse (rng:found)) node) chart + where + (FRule _ _ args cat lins) = allRules pinfo ! ruleid + lin = lins ! lbl + univRule cat item@(Final found' node) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat + let FRule _ _ args _ lins = allRules pinfo ! ruleid + FSymCat r d = lins ! l ! ppos + rng <- concatRange rng (found' !! r) + return (args !! d, Active found rng l (ppos+1) (updateChildren node d found')) + ++ + do guard (isBU strategy) + ruleid <- leftcornerCats pinfo ? cat + let FRule _ _ args _ lins = allRules pinfo ! ruleid + FSymCat r d = lins ! 0 ! 0 + return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) + + updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec + updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs + in process strategy pinfo toks items chart + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active RangeRec + Range + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !FPointPos + (SyntaxNode RuleId RangeRec) + | Final RangeRec (SyntaxNode RuleId RangeRec) + deriving (Eq, Ord) + +data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart MM.empty MM.empty + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = + case MM.insert' c item actives of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _) c = + case MM.insert' c item finals of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = actives MM.! c +lookupXChartFinal (XChart actives finals) c = finals MM.! c + +xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) +xchart2syntaxchart (XChart actives finals) pinfo = + accumAssoc groupSyntaxNodes $ + [ case node of + SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid + in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) + SString s -> ((cat,found), SString s) + SInt n -> ((cat,found), SInt n) + SFloat f -> ((cat,found), SFloat f) + | (cat, Final found node) <- MM.toList finals + ] + +literals :: ParserInfo -> Input FToken -> [(FCat,Item)] +literals pinfo toks = + [let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] + where + lexer t = + case reads t of + [(n,"")] -> (fcatInt, SInt (n::Integer)) + _ -> case reads t of + [(f,"")] -> (fcatFloat, SFloat (f::Double)) + _ -> (fcatString,SString t) + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD pinfo starts toks = + do cat <- starts + ruleid <- topdownRules pinfo ? cat + return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] +initialBU pinfo toks = + do (tok,rngs) <- aAssocs (inputToken toks) + ruleid <- leftcornerTokens pinfo ? tok + let FRule _ _ _ cat _ = allRules pinfo ! ruleid + rng <- rngs + return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo)) + ++ + do ruleid <- epsilonRules pinfo + let FRule _ _ _ cat _ = allRules pinfo ! ruleid + return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs new file mode 100644 index 000000000..fff5f0212 --- /dev/null +++ b/src/PGF/Parsing/FCFG/Incremental.hs @@ -0,0 +1,187 @@ +{-# LANGUAGE BangPatterns #-}
+module PGF.Parsing.FCFG.Incremental
+ ( ParseState
+ , initState
+ , nextState
+ , getCompletions
+ , extractExps
+ , parse
+ ) where
+
+import Data.Array
+import Data.Array.Base (unsafeAt)
+import Data.List (isPrefixOf, foldl')
+import Data.Maybe (fromMaybe)
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.Assoc
+import GF.Data.SortedList
+import qualified GF.Data.MultiMap as MM
+import PGF.CId
+import PGF.Data
+import PGF.Parsing.FCFG.Utilities
+import Debug.Trace
+
+parse :: ParserInfo -> CId -> [FToken] -> [Tree]
+parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start
+
+initState :: ParserInfo -> CId -> ParseState
+initState pinfo start =
+ let items = do
+ c <- Map.findWithDefault [] start (startupCats pinfo)
+ ruleid <- topdownRules pinfo ? c
+ let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
+ lbl <- indices lins
+ return (Active 0 lbl 0 ruleid args cat)
+
+ forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
+
+ max_fid = case IntMap.maxViewWithKey forest of
+ Just ((fid,_), _) -> fid+1
+ Nothing -> 0
+
+ in State pinfo
+ (Chart MM.empty [] Map.empty forest max_fid 0)
+ (Set.fromList items)
+
+-- | From the current state and the next token
+-- 'nextState' computes a new state where the token
+-- is consumed and the current position shifted by one.
+nextState :: ParseState -> String -> ParseState
+nextState (State pinfo chart items) t =
+ let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart)
+ chart2 = chart1{ active =MM.empty
+ , actives=active chart1 : actives chart1
+ , passive=Map.empty
+ , offset =offset chart1+1
+ }
+ in State pinfo chart2 items1
+ where
+ add tok item set
+ | tok == t = Set.insert item set
+ | otherwise = set
+
+-- | If the next token is not known but only its prefix (possible empty prefix)
+-- then the 'getCompletions' function can be used to calculate the possible
+-- next words and the consequent states. This is used for word completions in
+-- the GF interpreter.
+getCompletions :: ParseState -> String -> Map.Map String ParseState
+getCompletions (State pinfo chart items) w =
+ let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart)
+ chart2 = chart1{ active =MM.empty
+ , actives=active chart1 : actives chart1
+ , passive=Map.empty
+ , offset =offset chart1+1
+ }
+ in fmap (State pinfo chart2) map'
+ where
+ add tok item map
+ | isPrefixOf w tok = fromMaybe map (MM.insert' tok item map)
+ | otherwise = map
+
+extractExps :: ParseState -> CId -> [Tree]
+extractExps (State pinfo chart items) start = exps
+ where
+ (_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart)
+
+ exps = nubsort $ do
+ c <- Map.findWithDefault [] start (startupCats pinfo)
+ ruleid <- topdownRules pinfo ? c
+ let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
+ lbl <- indices lins
+ fid <- Map.lookup (PK c lbl 0) (passive st)
+ go Set.empty fid
+
+ go rec fid
+ | Set.member fid rec = mzero
+ | otherwise = do set <- IntMap.lookup fid (forest st)
+ Passive ruleid args <- Set.toList set
+ let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
+ if fn == wildCId
+ then go (Set.insert fid rec) (head args)
+ else do args <- mapM (go (Set.insert fid rec)) args
+ return (Fun fn args)
+
+process fn !rules [] acc_chart = acc_chart
+process fn !rules (item:items) acc_chart = univRule item acc_chart
+ where
+ univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart)
+ | inRange (bounds lin) ppos =
+ case unsafeAt lin ppos of
+ FSymCat r d -> let !fid = args !! d
+ in case MM.insert' (AK fid r) item (active chart) of
+ Nothing -> process fn rules items $ acc_chart
+ Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
+ Nothing -> id
+ Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
+ (case IntMap.lookup fid (forest chart) of
+ Nothing -> id
+ Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
+ process fn rules items $
+ (acc,chart{active=actCat})
+ FSymTok tok -> process fn rules items $
+ (fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
+ | otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
+ Nothing -> let fid = nextId chart
+ in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
+ | Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
+ let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
+ process fn rules items $
+ (acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
+ ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
+ ,nextId =nextId chart+1
+ })
+ Just id -> process fn rules items $
+ (acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
+ where
+ !lin = rhs ruleid lbl
+ !k = offset chart
+
+ rhs ruleid lbl = unsafeAt lins lbl
+ where
+ (FRule _ _ _ cat lins) = unsafeAt rules ruleid
+
+ updateAt :: Int -> a -> [a] -> [a]
+ updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+
+data Active
+ = Active {-# UNPACK #-} !Int
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !FPointPos
+ {-# UNPACK #-} !RuleId
+ [FCat]
+ {-# UNPACK #-} !FCat
+ deriving (Eq,Show,Ord)
+data Passive
+ = Passive {-# UNPACK #-} !RuleId
+ [FCat]
+ deriving (Eq,Ord,Show)
+
+data ActiveKey
+ = AK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ deriving (Eq,Ord,Show)
+data PassiveKey
+ = PK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !Int
+ deriving (Eq,Ord,Show)
+
+
+-- | An abstract data type whose values represent
+-- the current state in an incremental parser.
+data ParseState = State ParserInfo Chart (Set.Set Active)
+
+data Chart
+ = Chart
+ { active :: MM.MultiMap ActiveKey Active
+ , actives :: [MM.MultiMap ActiveKey Active]
+ , passive :: Map.Map PassiveKey FCat
+ , forest :: IntMap.IntMap (Set.Set Passive)
+ , nextId :: {-# UNPACK #-} !FCat
+ , offset :: {-# UNPACK #-} !Int
+ }
diff --git a/src/PGF/Parsing/FCFG/Utilities.hs b/src/PGF/Parsing/FCFG/Utilities.hs new file mode 100644 index 000000000..4187d0f24 --- /dev/null +++ b/src/PGF/Parsing/FCFG/Utilities.hs @@ -0,0 +1,187 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module PGF.Parsing.FCFG.Utilities where + +import Control.Monad +import Data.Array +import Data.List (groupBy) + +import PGF.CId +import PGF.Data +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputBounds :: (Int, Int), + inputToken :: Assoc t [Range] + } + +input :: Ord t => [t] -> Input t +input toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] + +inputMany :: Ord t => [[t]] -> Input t +inputMany toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] + + +------------------------------------------------------------ +-- * representations of syntactical analyses + +-- ** charts as finite maps over edges + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [SyntaxNode n [e]] + +data SyntaxNode n e = SMeta + | SNode n [e] + | SString String + | SInt Integer + | SFloat Double + deriving (Eq,Ord) + +groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] +groupSyntaxNodes [] = [] +groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' + where + (ess,xs') = span xs + + span [] = ([],[]) + span xs@(SNode n es:xs') + | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) + | otherwise = ([],xs) +groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs +groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs +groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs + +-- ** syntax forests + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + | FString String + | FInt Integer + | FFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap _ (FString s) = FString s + fmap _ (FInt n) = FInt n + fmap _ (FFloat f) = FFloat f + fmap _ (FMeta) = FMeta + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName _ = Nothing + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] +unifyForests (FString s1) (FString s2) + | s1 == s2 = return $ FString s1 +unifyForests (FInt n1) (FInt n2) + | n1 == n2 = return $ FInt n1 +unifyForests (FFloat f1) (FFloat f2) + | f1 == f2 = return $ FFloat f1 +unifyForests _ _ = fail "forest unification failure" + + +-- ** conversions between representations + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters +chart2forests chart isMeta = concatMap (edge2forests []) + where edge2forests edges edge + | isMeta edge = [FMeta] + | edge `elem` edges = [] + | otherwise = map (item2forest (edge:edges)) $ chart ? edge + item2forest edges (SMeta) = FMeta + item2forest edges (SNode name children) = + FNode name $ children >>= mapM (edge2forests edges) + item2forest edges (SString s) = FString s + item2forest edges (SInt n) = FInt n + item2forest edges (SFloat f) = FFloat f + + +applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] +applyProfileToForest (FNode (fun,profiles) children) + | fun == wildCId = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] + + +forest2trees :: SyntaxForest CId -> [Tree] +forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees +forest2trees (FString s) = [Lit (LStr s)] +forest2trees (FInt n) = [Lit (LInt n)] +forest2trees (FFloat f) = [Lit (LFlt f)] +forest2trees (FMeta) = [Meta 0] |
