diff options
| author | krasimir <krasimir@chalmers.se> | 2009-06-16 11:56:08 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-06-16 11:56:08 +0000 |
| commit | 8bc8929c59d2bd6f28d5dab9c7a9ca8a1c23609e (patch) | |
| tree | 84244e9cc3b969e86167b309538dfe08d7374630 /src/PGF | |
| parent | b442cde3bd01fb935c215446097592510cf8e713 (diff) | |
completely phrase based parser and support for pre {} in PMCFG
Diffstat (limited to 'src/PGF')
| -rw-r--r-- | src/PGF/Binary.hs | 8 | ||||
| -rw-r--r-- | src/PGF/BuildParser.hs | 6 | ||||
| -rw-r--r-- | src/PGF/Data.hs | 5 | ||||
| -rw-r--r-- | src/PGF/PMCFG.hs | 24 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Active.hs | 2 | ||||
| -rw-r--r-- | src/PGF/Parsing/FCFG/Incremental.hs | 75 |
6 files changed, 65 insertions, 55 deletions
diff --git a/src/PGF/Binary.hs b/src/PGF/Binary.hs index ea99a3ed4..b99296db5 100644 --- a/src/PGF/Binary.hs +++ b/src/PGF/Binary.hs @@ -156,14 +156,14 @@ instance Binary FFun where instance Binary FSymbol where
put (FSymCat n l) = putWord8 0 >> put (n,l)
put (FSymLit n l) = putWord8 1 >> put (n,l)
- put (FSymTok (KS s)) = putWord8 2 >> put s
- put (FSymTok (KP d vs)) = putWord8 3 >> put (d,vs)
+ put (FSymKS ts) = putWord8 2 >> put ts
+ put (FSymKP d vs) = putWord8 3 >> put (d,vs)
get = do tag <- getWord8
case tag of
0 -> liftM2 FSymCat get get
1 -> liftM2 FSymLit get get
- 2 -> liftM (FSymTok . KS) get
- 3 -> liftM2 (\d vs -> FSymTok (KP d vs)) get get
+ 2 -> liftM FSymKS get
+ 3 -> liftM2 (\d vs -> FSymKP d vs) get get
_ -> decodingError
instance Binary Production where
diff --git a/src/PGF/BuildParser.hs b/src/PGF/BuildParser.hs index 1603a3dab..23e0725c6 100644 --- a/src/PGF/BuildParser.hs +++ b/src/PGF/BuildParser.hs @@ -35,8 +35,8 @@ data ParserInfoEx getLeftCornerTok pinfo (FFun _ _ lins) | inRange (bounds syms) 0 = case syms ! 0 of - FSymTok (KS tok) -> [tok] - _ -> [] + FSymKS [tok] -> [tok] + _ -> [] | otherwise = [] where syms = (sequences pinfo) ! (lins ! 0) @@ -73,4 +73,4 @@ buildParserInfo pinfo = | (cat,set) <- IntMap.toList (productions pinfo) , (FApply ruleid args) <- Set.toList set , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] - grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin] + grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs index 142968d8c..6895bd335 100644 --- a/src/PGF/Data.hs +++ b/src/PGF/Data.hs @@ -53,7 +53,10 @@ data Term = | TM String deriving (Eq,Ord,Show) - +data Tokn = + KS String + | KP [String] [Alternative] + deriving (Eq,Ord,Show) -- merge two GFCCs; fails is differens absnames; priority to second arg diff --git a/src/PGF/PMCFG.hs b/src/PGF/PMCFG.hs index 9a0dfa98e..6a83baad3 100644 --- a/src/PGF/PMCFG.hs +++ b/src/PGF/PMCFG.hs @@ -16,24 +16,20 @@ type FPointPos = Int data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
- | FSymTok Tokn
+ | FSymKS [String]
+ | FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
- | FConst Tree String
+ | FConst Tree [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
-data Tokn =
- KS String
- | KP [String] [Alternative]
- deriving (Eq,Ord,Show)
-
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
@@ -70,8 +66,8 @@ ppProduction (fcat,FApply funid args) = ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
-ppProduction (fcat,FConst _ s) =
- ppFCat fcat <+> text "->" <+> ppStr s
+ppProduction (fcat,FConst _ ss) =
+ ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun _ arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (text (prCId fun))
@@ -84,14 +80,12 @@ ppStartCat (id,fcats) = ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
-ppSymbol (FSymTok t) = ppTokn t
-
-ppTokn (KS t) = ppStr t
-ppTokn (KP ts alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppStr ts) : map ppAlt alts)))
+ppSymbol (FSymKS ts) = ppStrs ts
+ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
-ppAlt (Alt ts ps) = hsep (map ppStr ts) <+> char '/' <+> hsep (map ppStr ps)
+ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
-ppStr s = doubleQuotes (text s)
+ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "String"
diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs index ad1db7220..07fa1ba4f 100644 --- a/src/PGF/Parsing/FCFG/Active.hs +++ b/src/PGF/Parsing/FCFG/Active.hs @@ -84,7 +84,7 @@ process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo found' -> let items = do rng <- concatRange rng (found' !! r) return (Active found rng lbl (ppos+1) node args cat) in process strategy pinfo pinfoex toks items chart - FSymTok (KS tok) + FSymKS [tok] -> let items = do t_rng <- inputToken toks ? tok rng' <- concatRange rng t_rng return (Active found rng' lbl (ppos+1) node args cat) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs index 2950c2776..0aedd6d30 100644 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ b/src/PGF/Parsing/FCFG/Incremental.hs @@ -13,6 +13,7 @@ import Data.Array.Base (unsafeAt) import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe, maybe)
import qualified Data.Map as Map
+import qualified GF.Data.TrieMap as TMap
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
@@ -37,26 +38,29 @@ initState pinfo (DTyp _ start _) = in State pinfo
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
- (Set.fromList items)
+ (TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token
-- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one.
nextState :: ParseState -> String -> Maybe ParseState
nextState (State pinfo chart items) t =
- let (items1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = fromMaybe TMap.empty (Map.lookup t map_items)
+ (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
- in if Set.null items1
+ in if TMap.null acc1
then Nothing
- else Just (State pinfo chart2 items1)
+ else Just (State pinfo chart2 acc1)
where
- add (KS tok) item set
- | tok == t = Set.insert item set
- | otherwise = set
+ add (tok:toks) item acc
+ | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
+ add _ item acc = acc
-- | If the next token is not known but only its prefix (possible empty prefix)
-- then the 'getCompletions' function can be used to calculate the possible
@@ -64,22 +68,27 @@ nextState (State pinfo chart items) t = -- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w =
- let (map',chart1) = process Nothing add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
+ (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
- in fmap (State pinfo chart2) map'
+ in fmap (State pinfo chart2) acc'
where
- add (KS tok) item map
- | isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
- | otherwise = map
+ add (tok:toks) item acc
+ | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+ add _ item acc = acc
extractExps :: ParseState -> Type -> [Tree]
extractExps (State pinfo chart items) (DTyp _ start _) = exps
where
- (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
+ (mb_agenda,acc) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
exps = nubsort $ do
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
@@ -138,19 +147,23 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
Just set | Set.member item set -> process mbt fn seqs funs items acc chart
| otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
- FSymTok tok -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
+ FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ FSymKP strs vars
+ -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
+ (strs:[strs' | Alt strs' _ <- vars])
in process mbt fn seqs funs items acc' chart
FSymLit d r -> let !fid = args !! d
- in case [t | FConst _ t <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
- (tok:_) -> let !acc' = fn (KS tok) (Active j (ppos+1) funid seqid args key0) acc
- in process mbt fn seqs funs items acc' chart
- [] -> case litCatMatch fid mbt of
- Just (t,lit) -> let fid' = nextId chart
- !acc' = fn (KS t) (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
- in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit t)) (forest chart)
- ,nextId=nextId chart+1
- }
- Nothing -> process mbt fn seqs funs items acc chart
+ in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
+ (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ [] -> case litCatMatch fid mbt of
+ Just (toks,lit) -> let fid' = nextId chart
+ !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
+ in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
+ ,nextId=nextId chart+1
+ }
+ Nothing -> process mbt fn seqs funs items acc chart
| otherwise =
case lookupPC (mkPK key0 j) (passive chart) of
Nothing -> let fid = nextId chart
@@ -181,12 +194,12 @@ updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
litCatMatch fcat (Just t)
- | fcat == fcatString = Just (t,Lit (LStr t))
- | fcat == fcatInt = case reads t of {[(n,"")] -> Just (t,Lit (LInt n));
+ | fcat == fcatString = Just ([t],Lit (LStr t))
+ | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],Lit (LInt n));
_ -> Nothing }
- | fcat == fcatFloat = case reads t of {[(d,"")] -> Just (t,Lit (LFlt d));
+ | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],Lit (LFlt d));
_ -> Nothing }
- | fcat == fcatVar = Just (t,Var (mkCId t))
+ | fcat == fcatVar = Just ([t],Var (mkCId t))
litCatMatch _ _ = Nothing
@@ -250,7 +263,7 @@ insertPC key fcat chart = Map.insert key fcat chart -- Forest
----------------------------------------------------------------
-foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> String -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
+foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
@@ -258,7 +271,7 @@ foldForest f g b fcat forest = where
foldProd (FCoerce fcat) b = foldForest f g b fcat forest
foldProd (FApply funid args) b = f funid args b
- foldProd (FConst const s) b = g const s b
+ foldProd (FConst const toks) b = g const toks b
----------------------------------------------------------------
@@ -267,7 +280,7 @@ foldForest f g b fcat forest = -- | An abstract data type whose values represent
-- the current state in an incremental parser.
-data ParseState = State ParserInfo Chart (Set.Set Active)
+data ParseState = State ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
data Chart
= Chart
|
