diff options
| author | krasimir <krasimir@chalmers.se> | 2008-10-14 08:00:50 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2008-10-14 08:00:50 +0000 |
| commit | 4573d104425a79b8b00ebcccb2e94c62275285ea (patch) | |
| tree | d8a7f902baf5246367c048aeb201dd9e3486d1b0 /src/PGF | |
| parent | 0c66ad597db65fcddc8a425f0bce4beedf2aae33 (diff) | |
the new optimized incremental parser and the common subexpression elimination optimization in PMCFG
Diffstat (limited to 'src/PGF')
| -rw-r--r-- | src/PGF/BuildParser.hs | 68 | ||||
| -rw-r--r-- | src/PGF/Data.hs | 42 | ||||
| -rw-r--r-- | src/PGF/Macros.hs | 6 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG.hs | 3 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Active.hs | 135 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Incremental.hs | 224 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Utilities.hs | 4 | ||||
| -rw-r--r-- | src/PGF/Raw/Convert.hs | 132 |
8 files changed, 348 insertions, 266 deletions
diff --git a/src/PGF/BuildParser.hs b/src/PGF/BuildParser.hs index 9dfab3130..1603a3dab 100644 --- a/src/PGF/BuildParser.hs +++ b/src/PGF/BuildParser.hs @@ -15,50 +15,62 @@ import PGF.CId import PGF.Data import PGF.Parsing.FCFG.Utilities -import Data.Array +import Data.Array.IArray import Data.Maybe +import qualified Data.IntMap as IntMap import qualified Data.Map as Map import qualified Data.Set as Set import Debug.Trace +data ParserInfoEx + = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] + , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] + , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] + , grammarToks :: [String] + } + ------------------------------------------------------------ -- parser information -getLeftCornerTok (FRule _ _ _ _ lins) +getLeftCornerTok pinfo (FFun _ _ lins) | inRange (bounds syms) 0 = case syms ! 0 of - FSymTok tok -> [tok] - _ -> [] + FSymTok (KS tok) -> [tok] + _ -> [] | otherwise = [] where - syms = lins ! 0 + syms = (sequences pinfo) ! (lins ! 0) -getLeftCornerCat (FRule _ _ args _ lins) +getLeftCornerCat pinfo args (FFun _ _ lins) | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat _ d -> [args !! d] + FSymCat d _ -> let cat = args !! d + in case IntMap.lookup cat (productions pinfo) of + Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] + Nothing -> [cat] _ -> [] | otherwise = [] where - syms = lins ! 0 + syms = (sequences pinfo) ! (lins ! 0) -buildParserInfo :: FGrammar -> ParserInfo -buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ - ParserInfo { allRules = allrules - , topdownRules = topdownrules - -- , emptyRules = emptyrules - , epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarCats = grammarcats - , grammarToks = grammartoks - , startupCats = startup - } +buildParserInfo :: ParserInfo -> ParserInfoEx +buildParserInfo pinfo = + ParserInfoEx { epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarToks = grammartoks + } - where allrules = listArray (0,length grammar-1) grammar - topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules] - epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules, - not (inRange (bounds (lins ! 0)) 0) ] - leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ] - leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ] - grammarcats = aElems topdownrules - grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] + where epsilonrules = [ (ruleid,args,cat) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , let (FFun _ _ lins) = (functions pinfo) ! ruleid + , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] + leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] + leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] + grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin] diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs index 7f240dc4c..76659912c 100644 --- a/src/PGF/Data.hs +++ b/src/PGF/Data.hs @@ -2,11 +2,13 @@ module PGF.Data where import PGF.CId import GF.Text.UTF8 -import GF.Data.Assoc import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap import Data.List import Data.Array +import Data.Array.Unboxed -- internal datatypes for PGF @@ -108,32 +110,28 @@ data Equation = deriving (Eq,Ord,Show) -type FToken = String type FCat = Int type FIndex = Int -data FSymbol - = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int - | FSymTok FToken -type Profile = [Int] type FPointPos = Int -type FGrammar = ([FRule], Map.Map CId [FCat]) -data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) - -type RuleId = Int +data FSymbol + = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex + | FSymTok Tokn + deriving (Eq,Ord,Show) +type Profile = [Int] +data Production + = FApply {-# UNPACK #-} !FunId [FCat] + | FCoerce {-# UNPACK #-} !FCat + deriving (Eq,Ord,Show) +data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) +type FSeq = Array FPointPos FSymbol +type FunId = Int +type SeqId = Int data ParserInfo - = ParserInfo { allRules :: Array RuleId FRule - , topdownRules :: Assoc FCat [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - -- , emptyRules :: [RuleId] - , epsilonRules :: [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , leftcornerCats :: Assoc FCat [RuleId] - , leftcornerTokens :: Assoc FToken [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , grammarCats :: [FCat] - , grammarToks :: [FToken] - , startupCats :: Map.Map CId [FCat] + = ParserInfo { functions :: Array FunId FFun + , sequences :: Array SeqId FSeq + , productions :: IntMap.IntMap (Set.Set Production) + , startCats :: Map.Map CId [FCat] } diff --git a/src/PGF/Macros.hs b/src/PGF/Macros.hs index 4c73817dc..9218fcb8f 100644 --- a/src/PGF/Macros.hs +++ b/src/PGF/Macros.hs @@ -49,12 +49,6 @@ lookValCat pgf = valCat . lookType pgf lookParser :: PGF -> CId -> Maybe ParserInfo lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser -lookFCFG :: PGF -> CId -> Maybe FGrammar -lookFCFG pgf lang = fmap toFGrammar $ lookParser pgf lang - where - toFGrammar :: ParserInfo -> FGrammar - toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo) - lookStartCat :: PGF -> String lookStartCat pgf = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) [gflags pgf, aflags (abstract pgf)] diff --git a/src/PGF/Parsing/FCFG.hs b/src/PGF/Parsing/FCFG.hs index 4ca6a956a..fe56f8712 100644 --- a/src/PGF/Parsing/FCFG.hs +++ b/src/PGF/Parsing/FCFG.hs @@ -8,7 +8,7 @@ ----------------------------------------------------------------------------- module PGF.Parsing.FCFG - (buildParserInfo,ParserInfo,parseFCFG) where + (ParserInfo,parseFCFG) where import GF.Data.ErrM import GF.Data.Assoc @@ -17,7 +17,6 @@ import GF.Data.SortedList import PGF.CId import PGF.Data import PGF.Macros -import PGF.BuildParser import PGF.Parsing.FCFG.Utilities import qualified PGF.Parsing.FCFG.Active as Active import qualified PGF.Parsing.FCFG.Incremental as Incremental diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs index 4386bfdd1..0927a719b 100644 --- a/src/PGF/Parsing/FCFG/Active.hs +++ b/src/PGF/Parsing/FCFG/Active.hs @@ -17,17 +17,22 @@ import qualified GF.Data.MultiMap as MM import PGF.CId import PGF.Data import PGF.Parsing.FCFG.Utilities +import PGF.BuildParser import Control.Monad (guard) import qualified Data.List as List import qualified Data.Map as Map +import qualified Data.IntMap as IntMap import qualified Data.Set as Set -import Data.Array +import Data.Array.IArray +import Debug.Trace ---------------------------------------------------------------------- -- * parsing +type FToken = String + makeFinalEdge cat 0 0 = (cat, [EmptyRange]) makeFinalEdge cat i j = (cat, [makeRange i j]) @@ -36,77 +41,79 @@ parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree] parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees where inTokens = input toks - starts = Map.findWithDefault [] start (startupCats pinfo) + starts = Map.findWithDefault [] start (startCats pinfo) schart = xchart2syntaxchart chart pinfo (i,j) = inputBounds inTokens finalEdges = [makeFinalEdge cat i j | cat <- starts] forests = chart2forests schart (const False) finalEdges filteredForests = forests >>= applyProfileToForest - chart = process strategy pinfo inTokens axioms emptyXChart - axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens - | isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens + pinfoex = buildParserInfo pinfo + + chart = process strategy pinfo pinfoex inTokens axioms emptyXChart + axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens + | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens isBU s = s=="b" isTD s = s=="t" -- used in prediction -emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec -emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) - where - FRule _ _ rhs _ _ = allRules pinfo ! ruleid +emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec +emptyChildren ruleid args = SNode ruleid (replicate (length args) []) + -process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat -process strategy pinfo toks [] chart = chart -process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart +process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat +process strategy pinfo pinfoex toks [] chart = chart +process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart where - univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart + univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart | inRange (bounds lin) ppos = case lin ! ppos of - FSymCat r d -> let c = args !! d + FSymCat d r -> let c = args !! d in case recs !! d of [] -> case insertXChart chart item c of Nothing -> chart - Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c + Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c rng <- concatRange rng (found' !! r) - return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) + return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) ++ do guard (isTD strategy) - ruleid <- topdownRules pinfo ? c - return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) - in process strategy pinfo toks items chart + (ruleid,args) <- topdownRules pinfo c + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) + in process strategy pinfo pinfoex toks items chart found' -> let items = do rng <- concatRange rng (found' !! r) - return (c, Active found rng lbl (ppos+1) node) - in process strategy pinfo toks items chart - FSymTok tok -> let items = do t_rng <- inputToken toks ? tok + return (Active found rng lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart + FSymTok (KS tok) + -> let items = do t_rng <- inputToken toks ? tok rng' <- concatRange rng t_rng - return (cat, Active found rng' lbl (ppos+1) node) - in process strategy pinfo toks items chart + return (Active found rng' lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart | otherwise = if inRange (bounds lins) (lbl+1) - then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart - else univRule cat (Final (reverse (rng:found)) node) chart + then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart + else univRule (Final (reverse (rng:found)) node args cat) chart where - (FRule _ _ args cat lins) = allRules pinfo ! ruleid - lin = lins ! lbl - univRule cat item@(Final found' node) chart = + (FFun _ _ lins) = functions pinfo ! ruleid + lin = sequences pinfo ! (lins ! lbl) + univRule item@(Final found' node args cat) chart = case insertXChart chart item cat of Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat - let FRule _ _ args _ lins = allRules pinfo ! ruleid - FSymCat r d = lins ! l ! ppos + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos rng <- concatRange rng (found' !! r) - return (args !! d, Active found rng l (ppos+1) (updateChildren node d found')) + return (Active found rng l (ppos+1) (updateChildren node d found') args c) ++ do guard (isBU strategy) - ruleid <- leftcornerCats pinfo ? cat - let FRule _ _ args _ lins = allRules pinfo ! ruleid - FSymCat r d = lins ! 0 ! 0 - return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) + (ruleid,args,c) <- leftcornerCats pinfoex ? cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 + return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) - updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec + updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo toks items chart + in process strategy pinfo pinfoex toks items chart ---------------------------------------------------------------------- -- * XChart @@ -116,21 +123,23 @@ data Item Range {-# UNPACK #-} !FIndex {-# UNPACK #-} !FPointPos - (SyntaxNode RuleId RangeRec) - | Final RangeRec (SyntaxNode RuleId RangeRec) - deriving (Eq, Ord) + (SyntaxNode FunId RangeRec) + [FCat] + FCat + | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat + deriving (Eq, Ord, Show) data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) emptyXChart :: Ord c => XChart c emptyXChart = XChart MM.empty MM.empty -insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = case MM.insert' c item actives of Nothing -> Nothing Just actives -> Just (XChart actives finals) -insertXChart (XChart actives finals) item@(Final _ _) c = +insertXChart (XChart actives finals) item@(Final _ _ _ _) c = case MM.insert' c item finals of Nothing -> Nothing Just finals -> Just (XChart actives finals) @@ -142,17 +151,17 @@ xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) ( xchart2syntaxchart (XChart actives finals) pinfo = accumAssoc groupSyntaxNodes $ [ case node of - SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid + SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) SString s -> ((cat,found), SString s) SInt n -> ((cat,found), SInt n) SFloat f -> ((cat,found), SFloat f) - | (cat, Final found node) <- MM.toList finals + | (Final found node rhs cat) <- MM.elems finals ] -literals :: ParserInfo -> Input FToken -> [(FCat,Item)] -literals pinfo toks = - [let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] +literals :: ParserInfoEx -> Input FToken -> [Item] +literals pinfoex toks = + [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] where lexer t = case reads t of @@ -166,24 +175,30 @@ literals pinfo toks = -- Earley -- -- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] initialTD pinfo starts toks = do cat <- starts - ruleid <- topdownRules pinfo ? cat - return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) + (ruleid,args) <- topdownRules pinfo cat + return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) + +topdownRules pinfo cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply ruleid args) rules = (ruleid,args) : rules + g (FCoerce cat) rules = f cat rules ---------------------------------------------------------------------- -- Kilbury -- -initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] -initialBU pinfo toks = +initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] +initialBU pinfo pinfoex toks = do (tok,rngs) <- aAssocs (inputToken toks) - ruleid <- leftcornerTokens pinfo ? tok - let FRule _ _ _ cat _ = allRules pinfo ! ruleid + (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok rng <- rngs - return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo)) + return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) ++ - do ruleid <- epsilonRules pinfo - let FRule _ _ _ cat _ = allRules pinfo ! ruleid - return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) + do (ruleid,args,cat) <- epsilonRules pinfoex + let FFun _ _ _ = functions pinfo ! ruleid + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs index 23b0424cc..4f35ed169 100644 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ b/src/PGF/Parsing/FCFG/Incremental.hs @@ -8,55 +8,54 @@ module PGF.Parsing.FCFG.Incremental , parse
) where
-import Data.Array
+import Data.Array.IArray
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, maybe)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
-import GF.Data.Assoc
import GF.Data.SortedList
-import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
-import PGF.Parsing.FCFG.Utilities
import Debug.Trace
-parse :: ParserInfo -> CId -> [FToken] -> [Tree]
-parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start
+parse :: ParserInfo -> CId -> [String] -> [Tree]
+parse pinfo start toks = maybe [] (\ps -> extractExps ps start) (foldM nextState (initState pinfo start) toks)
initState :: ParserInfo -> CId -> ParseState
initState pinfo start =
let items = do
- c <- Map.findWithDefault [] start (startupCats pinfo)
- ruleid <- topdownRules pinfo ? c
- let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
- lbl <- indices lins
- return (Active 0 lbl 0 ruleid args cat)
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ (lbl,seqid) <- assocs lins
+ return (Active 0 0 funid seqid args (AK cat lbl))
- forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
-
- max_fid = maximum (0:[maximum (cat:args) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)])+1
+ max_fid = maximum (0:[maximum (cat:args) | (cat, set) <- IntMap.toList (productions pinfo)
+ , p <- Set.toList set
+ , let args = case p of {FApply _ args -> args; FCoerce cat -> [cat]}])+1
in State pinfo
- (Chart MM.empty [] Map.empty forest max_fid 0)
+ (Chart emptyAC [] emptyPC (productions pinfo) max_fid 0)
(Set.fromList items)
-- | From the current state and the next token
-- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one.
-nextState :: ParseState -> String -> ParseState
+nextState :: ParseState -> String -> Maybe ParseState
nextState (State pinfo chart items) t =
- let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart)
- chart2 = chart1{ active =MM.empty
+ let (items1,chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
+ chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
- , passive=Map.empty
+ , passive=emptyPC
, offset =offset chart1+1
}
- in State pinfo chart2 items1
+ in if Set.null items1
+ then Nothing
+ else Just (State pinfo chart2 items1)
where
add tok item set
| tok == t = Set.insert item set
@@ -68,107 +67,157 @@ nextState (State pinfo chart items) t = -- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w =
- let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart)
- chart2 = chart1{ active =MM.empty
+ let (map',chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
+ chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
- , passive=Map.empty
+ , passive=emptyPC
, offset =offset chart1+1
}
in fmap (State pinfo chart2) map'
where
add tok item map
- | isPrefixOf w tok = fromMaybe map (MM.insert' tok item map)
+ | isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
| otherwise = map
extractExps :: ParseState -> CId -> [Tree]
extractExps (State pinfo chart items) start = exps
where
- (_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart)
+ (_,st) = process (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
exps = nubsort $ do
- c <- Map.findWithDefault [] start (startupCats pinfo)
- ruleid <- topdownRules pinfo ? c
- let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
lbl <- indices lins
- fid <- Map.lookup (PK c lbl 0) (passive st)
+ Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
go Set.empty fid
- go rec fid
- | Set.member fid rec = mzero
- | otherwise = do set <- IntMap.lookup fid (forest st)
- Passive ruleid args <- Set.toList set
- let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
- if fn == wildCId
- then go (Set.insert fid rec) (head args)
- else do args <- mapM (go (Set.insert fid rec)) args
- return (Fun fn args)
-
-process fn !rules [] acc_chart = acc_chart
-process fn !rules (item:items) acc_chart = univRule item acc_chart
+ go rec fcat
+ | Set.member fcat rec = mzero
+ | otherwise = do (funid,args) <- foldForest (\funid args -> (:) (funid,args)) [] fcat (forest st)
+ let FFun fn _ lins = functions pinfo ! funid
+ args <- mapM (go (Set.insert fcat rec)) args
+ return (Fun fn args)
+
+process fn !seqs !funs [] acc chart = (acc,chart)
+process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
+ | inRange (bounds lin) ppos =
+ case unsafeAt lin ppos of
+ FSymCat d r -> let !fid = args !! d
+ key = AK fid r
+
+ items2 = case lookupPC (mkPK key k) (passive chart) of
+ Nothing -> items
+ Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
+ items3 = foldForest (\funid args -> (:) (Active k 0 funid (rhs funid r) args key)) items2 fid (forest chart)
+ in case lookupAC key (active chart) of
+ Nothing -> process fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
+ Just set | Set.member item set -> process fn seqs funs items acc chart
+ | otherwise -> process fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
+ FSymTok (KS tok) -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
+ in process fn seqs funs items acc' chart
+ | otherwise =
+ case lookupPC (mkPK key0 j) (passive chart) of
+ Nothing -> let fid = nextId chart
+
+ items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
+ Nothing -> items
+ Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
+ in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
+ in process fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
+ ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
+ ,nextId =nextId chart+1
+ }
+ Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
+ in process fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
where
- univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart)
- | inRange (bounds lin) ppos =
- case unsafeAt lin ppos of
- FSymCat r d -> let !fid = args !! d
- in case MM.insert' (AK fid r) item (active chart) of
- Nothing -> process fn rules items $ acc_chart
- Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
- Nothing -> id
- Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
- (case IntMap.lookup fid (forest chart) of
- Nothing -> id
- Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
- process fn rules items $
- (acc,chart{active=actCat})
- FSymTok tok -> process fn rules items $
- (fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
- | otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
- Nothing -> let fid = nextId chart
- in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
- | Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
- let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
- process fn rules items $
- (acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
- ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
- ,nextId =nextId chart+1
- })
- Just id -> process fn rules items $
- (acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
- where
- !lin = rhs ruleid lbl
- !k = offset chart
+ !lin = unsafeAt seqs seqid
+ !k = offset chart
- rhs ruleid lbl = unsafeAt lins lbl
+ mkPK (AK fid lbl) j = PK fid lbl j
+
+ rhs funid lbl = unsafeAt lins lbl
where
- (FRule _ _ _ cat lins) = unsafeAt rules ruleid
+ FFun _ _ lins = unsafeAt funs funid
updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+----------------------------------------------------------------
+-- Active Chart
+----------------------------------------------------------------
+
data Active
= Active {-# UNPACK #-} !Int
- {-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
- {-# UNPACK #-} !RuleId
+ {-# UNPACK #-} !FunId
+ {-# UNPACK #-} !SeqId
[FCat]
- {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !ActiveKey
deriving (Eq,Show,Ord)
-data Passive
- = Passive {-# UNPACK #-} !RuleId
- [FCat]
- deriving (Eq,Ord,Show)
-
data ActiveKey
= AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show)
+type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
+
+emptyAC :: ActiveChart
+emptyAC = IntMap.empty
+
+lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
+lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
+
+labelsAC :: FCat -> ActiveChart -> [FIndex]
+labelsAC fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.keys map
+
+insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
+insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
+
+
+----------------------------------------------------------------
+-- Passive Chart
+----------------------------------------------------------------
+
data PassiveKey
= PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int
deriving (Eq,Ord,Show)
+type PassiveChart = Map.Map PassiveKey FCat
+
+emptyPC :: PassiveChart
+emptyPC = Map.empty
+
+lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
+lookupPC key chart = Map.lookup key chart
+
+insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
+insertPC key fcat chart = Map.insert key fcat chart
+
+
+----------------------------------------------------------------
+-- Forest
+----------------------------------------------------------------
+
+foldForest :: (FunId -> [FCat] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
+foldForest f b fcat forest =
+ case IntMap.lookup fcat forest of
+ Nothing -> b
+ Just set -> Set.fold foldPassive b set
+ where
+ foldPassive (FCoerce fcat) b = foldForest f b fcat forest
+ foldPassive (FApply funid args) b = f funid args b
+
+
+----------------------------------------------------------------
+-- Parse State
+----------------------------------------------------------------
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
@@ -176,10 +225,11 @@ data ParseState = State ParserInfo Chart (Set.Set Active) data Chart
= Chart
- { active :: MM.MultiMap ActiveKey Active
- , actives :: [MM.MultiMap ActiveKey Active]
- , passive :: Map.Map PassiveKey FCat
- , forest :: IntMap.IntMap (Set.Set Passive)
+ { active :: ActiveChart
+ , actives :: [ActiveChart]
+ , passive :: PassiveChart
+ , forest :: IntMap.IntMap (Set.Set Production)
, nextId :: {-# UNPACK #-} !FCat
, offset :: {-# UNPACK #-} !Int
}
+ deriving Show
diff --git a/src/PGF/Parsing/FCFG/Utilities.hs b/src/PGF/Parsing/FCFG/Utilities.hs index 4187d0f24..6a2c13c0a 100644 --- a/src/PGF/Parsing/FCFG/Utilities.hs +++ b/src/PGF/Parsing/FCFG/Utilities.hs @@ -31,7 +31,7 @@ type RangeRec = [Range] data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int | EmptyRange - deriving (Eq, Ord) + deriving (Eq, Ord, Show) makeRange :: Int -> Int -> Range makeRange = Range @@ -83,7 +83,7 @@ data SyntaxNode n e = SMeta | SString String | SInt Integer | SFloat Double - deriving (Eq,Ord) + deriving (Eq,Ord,Show) groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] groupSyntaxNodes [] = [] diff --git a/src/PGF/Raw/Convert.hs b/src/PGF/Raw/Convert.hs index 0c9338012..2912bced1 100644 --- a/src/PGF/Raw/Convert.hs +++ b/src/PGF/Raw/Convert.hs @@ -3,13 +3,12 @@ module PGF.Raw.Convert (toPGF,fromPGF) where import PGF.CId import PGF.Data import PGF.Raw.Abstract -import PGF.BuildParser (buildParserInfo) -import PGF.Parsing.FCFG.Utilities -import qualified GF.Compile.GenerateFCFG as FCFG import qualified GF.Compile.GeneratePMCFG as PMCFG -import qualified Data.Array as Array -import qualified Data.Map as Map +import Data.Array.IArray +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap pgfMajorVersion, pgfMinorVersion :: Integer (pgfMajorVersion, pgfMinorVersion) = (1,0) @@ -54,11 +53,11 @@ toConcr pgf rexp = lindefs = Map.empty, printnames = Map.empty, paramlincats = Map.empty, - parser = Just (buildParserOnDemand cnc) -- This thunk will be overwritten if there is a parser + parser = Just (PMCFG.convertConcrete (abstract pgf) cnc) + -- This thunk will be overwritten if there is a parser -- compiled in the PGF file. We use lazy evaluation here -- to make sure that buildParserOnDemand is called only -- if it is needed. - }) rexp in cnc where @@ -72,41 +71,44 @@ toConcr pgf rexp = add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts } add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) } - buildParserOnDemand cnc = buildParserInfo fcfg - where - fcfg - | Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" = PMCFG.convertConcrete (abstract pgf) cnc - | otherwise = FCFG.convertConcrete (abstract pgf) cnc - toPInfo :: [RExp] -> ParserInfo -toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats) +toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "startcats" cs] = + ParserInfo { functions = functions + , sequences = seqs + , productions = productions + , startCats = cats + } where - rules = map toFRule rs - cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs] - - toFRule :: RExp -> FRule - toFRule (App "rule" - [n, - App "cats" (rt:at), - App "R" ls]) = FRule fun prof args res lins + functions = mkArray (map toFFun fs) + seqs = mkArray (map toFSeq ss) + productions = IntMap.fromList (map toProductionSet ps) + cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs] + + toFFun :: RExp -> FFun + toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins + where + fun = mkCId f + prof = map toProfile ts + lins = mkArray [fromIntegral seqid | AInt seqid <- ls] + + toProfile :: RExp -> Profile + toProfile AMet = [] + toProfile (App "_A" [t]) = [expToInt t] + toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts] + + toFSeq :: RExp -> FSeq + toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss] + + toProductionSet :: RExp -> (FCat,Set.Set Production) + toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs)) where - (fun,prof) = toFName n - args = map expToInt at - res = expToInt rt - lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls] - -toFName :: RExp -> (CId,[Profile]) -toFName (App "_A" [x]) = (wildCId, [[expToInt x]]) -toFName (App f ts) = (mkCId f, map toProfile ts) - where - toProfile :: RExp -> Profile - toProfile AMet = [] - toProfile (App "_A" [t]) = [expToInt t] - toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts] + toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at) + toProduction (App "C" [fcat]) = FCoerce (expToInt fcat) toSymbol :: RExp -> FSymbol -toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n) -toSymbol (AStr t) = FSymTok t +toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l) +toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts) +toSymbol (AStr t) = FSymTok (KS t) toType :: RExp -> Type toType e = case e of @@ -142,8 +144,15 @@ toTerm e = case e of App f [] -> F (mkCId f) AInt i -> C (fromInteger i) AMet -> TM "?" - AStr s -> K (KS s) ---- + App "KP" (d:alts) -> K (toKP d alts) + AStr s -> K (KS s) _ -> error $ "term " ++ show e + +toKP d alts = KP (toStr d) (map toAlt alts) + where + toStr (App "S" vs) = [v | AStr v <- vs] + toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y) + ------------------------------ --- from internal to parser -- @@ -192,8 +201,7 @@ fromExp e = case e of ELit (LFlt d) -> AFlt d ELit (LInt i) -> AInt (toInteger i) EMeta _ -> AMet ---- - EEq eqs -> - App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] + EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] fromTerm :: Term -> RExp fromTerm e = case e of @@ -206,8 +214,11 @@ fromTerm e = case e of TM _ -> AMet F f -> App (prCId f) [] V i -> App "A" [AInt (toInteger i)] - K (KS s) -> AStr s ---- - K (KP d vs) -> App "FV" (str d : [str v | Alt v _ <- vs]) ---- + K t -> fromTokn t + +fromTokn :: Tokn -> RExp +fromTokn (KS s) = AStr s +fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs]) where str v = App "S" (map AStr v) @@ -215,39 +226,42 @@ fromTerm e = case e of fromPInfo :: ParserInfo -> RExp fromPInfo p = App "parser" [ - App "rules" [fromFRule rule | rule <- Array.elems (allRules p)], - App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)] + App "functions" [fromFFun fun | fun <- elems (functions p)], + App "sequences" [fromFSeq seq | seq <- elems (sequences p)], + App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)], + App "startcats" [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)] ] -fromFRule :: FRule -> RExp -fromFRule (FRule fun prof args res lins) = - App "rule" [fromFName (fun,prof), - App "cats" (intToExp res:map intToExp args), - App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins] - ] - -fromFName :: (CId,[Profile]) -> RExp -fromFName (f,ps) | f == wildCId = fromProfile (head ps) - | otherwise = App (prCId f) (map fromProfile ps) +fromFFun :: FFun -> RExp +fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]] where fromProfile :: Profile -> RExp fromProfile [] = AMet fromProfile [x] = daughter x fromProfile args = App "_U" (map daughter args) - + daughter n = App "_A" [intToExp n] fromSymbol :: FSymbol -> RExp -fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l] -fromSymbol (FSymTok t) = AStr t +fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l] +fromSymbol (FSymTok t) = fromTokn t + +fromFSeq :: FSeq -> RExp +fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq] + +fromProductionSet :: (FCat,Set.Set Production) -> RExp +fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs)) + where + fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args) + fromPassive (FCoerce fcat) = App "C" [intToExp fcat] -- ** Utilities mkTermMap :: [RExp] -> Map.Map CId Term mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts] -mkArray :: [a] -> Array.Array Int a -mkArray xs = Array.listArray (0, length xs - 1) xs +mkArray :: IArray a e => [e] -> a Int e +mkArray xs = listArray (0, length xs - 1) xs expToInt :: Integral a => RExp -> a expToInt (App "neg" [AInt i]) = fromIntegral (negate i) |
