From 8bc8929c59d2bd6f28d5dab9c7a9ca8a1c23609e Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 16 Jun 2009 11:56:08 +0000 Subject: completely phrase based parser and support for pre {} in PMCFG --- src/PGF/Binary.hs | 8 ++-- src/PGF/BuildParser.hs | 6 +-- src/PGF/Data.hs | 5 ++- src/PGF/PMCFG.hs | 24 +++++------- src/PGF/Parsing/FCFG/Active.hs | 2 +- src/PGF/Parsing/FCFG/Incremental.hs | 75 ++++++++++++++++++++++--------------- 6 files changed, 65 insertions(+), 55 deletions(-) (limited to 'src/PGF') diff --git a/src/PGF/Binary.hs b/src/PGF/Binary.hs index ea99a3ed4..b99296db5 100644 --- a/src/PGF/Binary.hs +++ b/src/PGF/Binary.hs @@ -156,14 +156,14 @@ instance Binary FFun where instance Binary FSymbol where put (FSymCat n l) = putWord8 0 >> put (n,l) put (FSymLit n l) = putWord8 1 >> put (n,l) - put (FSymTok (KS s)) = putWord8 2 >> put s - put (FSymTok (KP d vs)) = putWord8 3 >> put (d,vs) + put (FSymKS ts) = putWord8 2 >> put ts + put (FSymKP d vs) = putWord8 3 >> put (d,vs) get = do tag <- getWord8 case tag of 0 -> liftM2 FSymCat get get 1 -> liftM2 FSymLit get get - 2 -> liftM (FSymTok . KS) get - 3 -> liftM2 (\d vs -> FSymTok (KP d vs)) get get + 2 -> liftM FSymKS get + 3 -> liftM2 (\d vs -> FSymKP d vs) get get _ -> decodingError instance Binary Production where diff --git a/src/PGF/BuildParser.hs b/src/PGF/BuildParser.hs index 1603a3dab..23e0725c6 100644 --- a/src/PGF/BuildParser.hs +++ b/src/PGF/BuildParser.hs @@ -35,8 +35,8 @@ data ParserInfoEx getLeftCornerTok pinfo (FFun _ _ lins) | inRange (bounds syms) 0 = case syms ! 0 of - FSymTok (KS tok) -> [tok] - _ -> [] + FSymKS [tok] -> [tok] + _ -> [] | otherwise = [] where syms = (sequences pinfo) ! (lins ! 0) @@ -73,4 +73,4 @@ buildParserInfo pinfo = | (cat,set) <- IntMap.toList (productions pinfo) , (FApply ruleid args) <- Set.toList set , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] - grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin] + grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs index 142968d8c..6895bd335 100644 --- a/src/PGF/Data.hs +++ b/src/PGF/Data.hs @@ -53,7 +53,10 @@ data Term = | TM String deriving (Eq,Ord,Show) - +data Tokn = + KS String + | KP [String] [Alternative] + deriving (Eq,Ord,Show) -- merge two GFCCs; fails is differens absnames; priority to second arg diff --git a/src/PGF/PMCFG.hs b/src/PGF/PMCFG.hs index 9a0dfa98e..6a83baad3 100644 --- a/src/PGF/PMCFG.hs +++ b/src/PGF/PMCFG.hs @@ -16,24 +16,20 @@ type FPointPos = Int data FSymbol = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex - | FSymTok Tokn + | FSymKS [String] + | FSymKP [String] [Alternative] deriving (Eq,Ord,Show) type Profile = [Int] data Production = FApply {-# UNPACK #-} !FunId [FCat] | FCoerce {-# UNPACK #-} !FCat - | FConst Tree String + | FConst Tree [String] deriving (Eq,Ord,Show) data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) type FSeq = Array FPointPos FSymbol type FunId = Int type SeqId = Int -data Tokn = - KS String - | KP [String] [Alternative] - deriving (Eq,Ord,Show) - data Alternative = Alt [String] [String] deriving (Eq,Ord,Show) @@ -70,8 +66,8 @@ ppProduction (fcat,FApply funid args) = ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args))) ppProduction (fcat,FCoerce arg) = ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg) -ppProduction (fcat,FConst _ s) = - ppFCat fcat <+> text "->" <+> ppStr s +ppProduction (fcat,FConst _ ss) = + ppFCat fcat <+> text "->" <+> ppStrs ss ppFun (funid,FFun fun _ arr) = ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (text (prCId fun)) @@ -84,14 +80,12 @@ ppStartCat (id,fcats) = ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>' ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>' -ppSymbol (FSymTok t) = ppTokn t - -ppTokn (KS t) = ppStr t -ppTokn (KP ts alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppStr ts) : map ppAlt alts))) +ppSymbol (FSymKS ts) = ppStrs ts +ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts))) -ppAlt (Alt ts ps) = hsep (map ppStr ts) <+> char '/' <+> hsep (map ppStr ps) +ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps) -ppStr s = doubleQuotes (text s) +ppStrs ss = doubleQuotes (hsep (map text ss)) ppFCat fcat | fcat == fcatString = text "String" diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs index ad1db7220..07fa1ba4f 100644 --- a/src/PGF/Parsing/FCFG/Active.hs +++ b/src/PGF/Parsing/FCFG/Active.hs @@ -84,7 +84,7 @@ process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo found' -> let items = do rng <- concatRange rng (found' !! r) return (Active found rng lbl (ppos+1) node args cat) in process strategy pinfo pinfoex toks items chart - FSymTok (KS tok) + FSymKS [tok] -> let items = do t_rng <- inputToken toks ? tok rng' <- concatRange rng t_rng return (Active found rng' lbl (ppos+1) node args cat) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs index 2950c2776..0aedd6d30 100644 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ b/src/PGF/Parsing/FCFG/Incremental.hs @@ -13,6 +13,7 @@ import Data.Array.Base (unsafeAt) import Data.List (isPrefixOf, foldl') import Data.Maybe (fromMaybe, maybe) import qualified Data.Map as Map +import qualified GF.Data.TrieMap as TMap import qualified Data.IntMap as IntMap import qualified Data.Set as Set import Control.Monad @@ -37,26 +38,29 @@ initState pinfo (DTyp _ start _) = in State pinfo (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0) - (Set.fromList items) + (TMap.singleton [] (Set.fromList items)) -- | From the current state and the next token -- 'nextState' computes a new state where the token -- is consumed and the current position shifted by one. nextState :: ParseState -> String -> Maybe ParseState nextState (State pinfo chart items) t = - let (items1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart + let (mb_agenda,map_items) = TMap.decompose items + agenda = maybe [] Set.toList mb_agenda + acc = fromMaybe TMap.empty (Map.lookup t map_items) + (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC , offset =offset chart1+1 } - in if Set.null items1 + in if TMap.null acc1 then Nothing - else Just (State pinfo chart2 items1) + else Just (State pinfo chart2 acc1) where - add (KS tok) item set - | tok == t = Set.insert item set - | otherwise = set + add (tok:toks) item acc + | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc + add _ item acc = acc -- | If the next token is not known but only its prefix (possible empty prefix) -- then the 'getCompletions' function can be used to calculate the possible @@ -64,22 +68,27 @@ nextState (State pinfo chart items) t = -- the GF interpreter. getCompletions :: ParseState -> String -> Map.Map String ParseState getCompletions (State pinfo chart items) w = - let (map',chart1) = process Nothing add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart + let (mb_agenda,map_items) = TMap.decompose items + agenda = maybe [] Set.toList mb_agenda + acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items + (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC , offset =offset chart1+1 } - in fmap (State pinfo chart2) map' + in fmap (State pinfo chart2) acc' where - add (KS tok) item map - | isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map - | otherwise = map + add (tok:toks) item acc + | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc + add _ item acc = acc extractExps :: ParseState -> Type -> [Tree] extractExps (State pinfo chart items) (DTyp _ start _) = exps where - (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart + (mb_agenda,acc) = TMap.decompose items + agenda = maybe [] Set.toList mb_agenda + (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart exps = nubsort $ do cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) @@ -138,19 +147,23 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} Just set | Set.member item set -> process mbt fn seqs funs items acc chart | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} - FSymTok tok -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc + FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc + in process mbt fn seqs funs items acc' chart + FSymKP strs vars + -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc + (strs:[strs' | Alt strs' _ <- vars]) in process mbt fn seqs funs items acc' chart FSymLit d r -> let !fid = args !! d - in case [t | FConst _ t <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of - (tok:_) -> let !acc' = fn (KS tok) (Active j (ppos+1) funid seqid args key0) acc - in process mbt fn seqs funs items acc' chart - [] -> case litCatMatch fid mbt of - Just (t,lit) -> let fid' = nextId chart - !acc' = fn (KS t) (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc - in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit t)) (forest chart) - ,nextId=nextId chart+1 - } - Nothing -> process mbt fn seqs funs items acc chart + in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of + (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc + in process mbt fn seqs funs items acc' chart + [] -> case litCatMatch fid mbt of + Just (toks,lit) -> let fid' = nextId chart + !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc + in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart) + ,nextId=nextId chart+1 + } + Nothing -> process mbt fn seqs funs items acc chart | otherwise = case lookupPC (mkPK key0 j) (passive chart) of Nothing -> let fid = nextId chart @@ -181,12 +194,12 @@ updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] litCatMatch fcat (Just t) - | fcat == fcatString = Just (t,Lit (LStr t)) - | fcat == fcatInt = case reads t of {[(n,"")] -> Just (t,Lit (LInt n)); + | fcat == fcatString = Just ([t],Lit (LStr t)) + | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],Lit (LInt n)); _ -> Nothing } - | fcat == fcatFloat = case reads t of {[(d,"")] -> Just (t,Lit (LFlt d)); + | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],Lit (LFlt d)); _ -> Nothing } - | fcat == fcatVar = Just (t,Var (mkCId t)) + | fcat == fcatVar = Just ([t],Var (mkCId t)) litCatMatch _ _ = Nothing @@ -250,7 +263,7 @@ insertPC key fcat chart = Map.insert key fcat chart -- Forest ---------------------------------------------------------------- -foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> String -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b +foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b foldForest f g b fcat forest = case IntMap.lookup fcat forest of Nothing -> b @@ -258,7 +271,7 @@ foldForest f g b fcat forest = where foldProd (FCoerce fcat) b = foldForest f g b fcat forest foldProd (FApply funid args) b = f funid args b - foldProd (FConst const s) b = g const s b + foldProd (FConst const toks) b = g const toks b ---------------------------------------------------------------- @@ -267,7 +280,7 @@ foldForest f g b fcat forest = -- | An abstract data type whose values represent -- the current state in an incremental parser. -data ParseState = State ParserInfo Chart (Set.Set Active) +data ParseState = State ParserInfo Chart (TMap.TrieMap String (Set.Set Active)) data Chart = Chart -- cgit v1.2.3