diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell/PGF/Parsing | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/runtime/haskell/PGF/Parsing')
| -rw-r--r-- | src/runtime/haskell/PGF/Parsing/FCFG/Active.hs | 205 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs | 371 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs | 188 |
3 files changed, 764 insertions, 0 deletions
diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..e88926f6e --- /dev/null +++ b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs @@ -0,0 +1,205 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module PGF.Parsing.FCFG.Active (parse) where + +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.Utilities +import qualified GF.Data.MultiMap as MM + +import PGF.CId +import PGF.Data +import PGF.Tree +import PGF.Parsing.FCFG.Utilities +import PGF.BuildParser + +import Control.Monad (guard) + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set +import Data.Array.IArray +import Debug.Trace + +---------------------------------------------------------------------- +-- * parsing + +type FToken = String + +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +-- | the list of categories = possible starting categories +parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr] +parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees + where + inTokens = input toks + starts = Map.findWithDefault [] start (startCats pinfo) + schart = xchart2syntaxchart chart pinfo + (i,j) = inputBounds inTokens + finalEdges = [makeFinalEdge cat i j | cat <- starts] + forests = chart2forests schart (const False) finalEdges + filteredForests = forests >>= applyProfileToForest + + pinfoex = buildParserInfo pinfo + + chart = process strategy pinfo pinfoex inTokens axioms emptyXChart + axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens + | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec +emptyChildren ruleid args = SNode ruleid (replicate (length args) []) + + +process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat +process strategy pinfo pinfoex toks [] chart = chart +process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart + where + univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat d r -> let c = args !! d + in case recs !! d of + [] -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c + rng <- concatRange rng (found' !! r) + return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) + ++ + do guard (isTD strategy) + (ruleid,args) <- topdownRules pinfo c + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) + in process strategy pinfo pinfoex toks items chart + found' -> let items = do rng <- concatRange rng (found' !! r) + return (Active found rng lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart + FSymKS [tok] + -> let items = do t_rng <- inputToken toks ? tok + rng' <- concatRange rng t_rng + return (Active found rng' lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart + else univRule (Final (reverse (rng:found)) node args cat) chart + where + (FFun _ _ lins) = functions pinfo ! ruleid + lin = sequences pinfo ! (lins ! lbl) + univRule item@(Final found' node args cat) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos + rng <- concatRange rng (found' !! r) + return (Active found rng l (ppos+1) (updateChildren node d found') args c) + ++ + do guard (isBU strategy) + (ruleid,args,c) <- leftcornerCats pinfoex ? cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 + return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) + + updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec + updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs + in process strategy pinfo pinfoex toks items chart + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active RangeRec + Range + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !FPointPos + (SyntaxNode FunId RangeRec) + [FCat] + FCat + | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat + deriving (Eq, Ord, Show) + +data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart MM.empty MM.empty + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = + case MM.insert' c item actives of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _ _ _) c = + case MM.insert' c item finals of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = actives MM.! c +lookupXChartFinal (XChart actives finals) c = finals MM.! c + +xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) +xchart2syntaxchart (XChart actives finals) pinfo = + accumAssoc groupSyntaxNodes $ + [ case node of + SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid + in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) + SString s -> ((cat,found), SString s) + SInt n -> ((cat,found), SInt n) + SFloat f -> ((cat,found), SFloat f) + | (Final found node rhs cat) <- MM.elems finals + ] + +literals :: ParserInfoEx -> Input FToken -> [Item] +literals pinfoex toks = + [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] + where + lexer t = + case reads t of + [(n,"")] -> (fcatInt, SInt (n::Integer)) + _ -> case reads t of + [(f,"")] -> (fcatFloat, SFloat (f::Double)) + _ -> (fcatString,SString t) + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] +initialTD pinfo starts toks = + do cat <- starts + (ruleid,args) <- topdownRules pinfo cat + return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) + +topdownRules pinfo cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply ruleid args) rules = (ruleid,args) : rules + g (FCoerce cat) rules = f cat rules + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] +initialBU pinfo pinfoex toks = + do (tok,rngs) <- aAssocs (inputToken toks) + (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok + rng <- rngs + return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) + ++ + do (ruleid,args,cat) <- epsilonRules pinfoex + let FFun _ _ _ = functions pinfo ! ruleid + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs new file mode 100644 index 000000000..296a0d33b --- /dev/null +++ b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs @@ -0,0 +1,371 @@ +{-# LANGUAGE BangPatterns #-}
+module PGF.Parsing.FCFG.Incremental
+ ( ParseState
+ , ErrorState
+ , initState
+ , nextState
+ , getCompletions
+ , recoveryStates
+ , extractTrees
+ , parse
+ , parseWithRecovery
+ ) where
+
+import Data.Array.IArray
+import Data.Array.Base (unsafeAt)
+import Data.List (isPrefixOf, foldl')
+import Data.Maybe (fromMaybe, maybe)
+import qualified Data.Map as Map
+import qualified GF.Data.TrieMap as TMap
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.SortedList
+import PGF.CId
+import PGF.Data
+import PGF.Expr(Tree)
+import PGF.Macros
+import PGF.TypeCheck
+import Debug.Trace
+
+parse :: PGF -> Language -> Type -> [String] -> [Tree]
+parse pgf lang typ toks = loop (initState pgf lang typ) toks
+ where
+ loop ps [] = extractTrees ps typ
+ loop ps (t:ts) = case nextState ps t of
+ Left es -> []
+ Right ps -> loop ps ts
+
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
+parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
+ where
+ accept ps [] = extractTrees ps typ
+ accept ps (t:ts) =
+ case nextState ps t of
+ Right ps -> accept ps ts
+ Left es -> skip (recoveryStates open_typs es) ts
+
+ skip ps_map [] = extractTrees (fst ps_map) typ
+ skip ps_map (t:ts) =
+ case Map.lookup t (snd ps_map) of
+ Just ps -> accept ps ts
+ Nothing -> skip ps_map ts
+
+-- | Creates an initial parsing state for a given language and
+-- startup category.
+initState :: PGF -> Language -> Type -> ParseState
+initState pgf lang (DTyp _ start _) =
+ let items = do
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
+ [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ (lbl,seqid) <- assocs lins
+ return (Active 0 0 funid seqid args (AK cat lbl))
+
+ pinfo =
+ case lookParser pgf lang of
+ Just pinfo -> pinfo
+ _ -> error ("Unknown language: " ++ showCId lang)
+
+ in PState pgf
+ pinfo
+ (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
+ (TMap.singleton [] (Set.fromList items))
+
+-- | From the current state and the next token
+-- 'nextState' computes a new state, where the token
+-- is consumed and the current position is shifted by one.
+-- If the new token cannot be accepted then an error state
+-- is returned.
+nextState :: ParseState -> String -> Either ErrorState ParseState
+nextState (PState pgf pinfo chart items) t =
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = fromMaybe TMap.empty (Map.lookup t map_items)
+ (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in if TMap.null acc1
+ then Left (EState pgf pinfo chart2)
+ else Right (PState pgf pinfo chart2 acc1)
+ where
+ add (tok:toks) item acc
+ | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
+ add _ item acc = acc
+
+-- | If the next token is not known but only its prefix (possible empty prefix)
+-- then the 'getCompletions' function can be used to calculate the possible
+-- next words and the consequent states. This is used for word completions in
+-- the GF interpreter.
+getCompletions :: ParseState -> String -> Map.Map String ParseState
+getCompletions (PState pgf pinfo chart items) w =
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
+ (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in fmap (PState pgf pinfo chart2) acc'
+ where
+ add (tok:toks) item acc
+ | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+ add _ item acc = acc
+
+recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
+recoveryStates open_types (EState pgf pinfo chart) =
+ let open_fcats = concatMap type2fcats open_types
+ agenda = foldl (complete open_fcats) [] (actives chart)
+ (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
+ where
+ type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
+
+ complete open_fcats items ac =
+ foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ (:) (Active j' (ppos+1) funid seqid args keyc)))
+ items
+ [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac]
+
+ add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+
+-- | This function extracts the list of all completed parse trees
+-- that spans the whole input consumed so far. The trees are also
+-- limited by the category specified, which is usually
+-- the same as the startup category.
+extractTrees :: ParseState -> Type -> [Tree]
+extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
+ nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
+ where
+ (mb_agenda,acc) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
+
+ exps = do
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
+ [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ lbl <- indices lins
+ Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
+ (fvs,tree) <- go Set.empty 0 (0,fid)
+ guard (Set.null fvs)
+ return tree
+
+ go rec fcat' (d,fcat)
+ | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
+ | Set.member fcat rec = mzero
+ | otherwise = foldForest (\funid args trees ->
+ do let FFun fn _ lins = functions pinfo ! funid
+ args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
+ check_ho_fun fn args
+ `mplus`
+ trees)
+ (\const _ trees ->
+ return (freeVar const,const)
+ `mplus`
+ trees)
+ [] fcat (forest st)
+
+ check_ho_fun fun args
+ | fun == _V = return (head args)
+ | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
+ | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
+
+ mkVar (EFun v) = v
+ mkVar (EMeta _) = wildCId
+
+ freeVar (EFun v) = Set.singleton v
+ freeVar _ = Set.empty
+
+_B = mkCId "_B"
+_V = mkCId "_V"
+
+process mbt fn !seqs !funs [] acc chart = (acc,chart)
+process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
+ | inRange (bounds lin) ppos =
+ case unsafeAt lin ppos of
+ FSymCat d r -> let !fid = args !! d
+ key = AK fid r
+
+ items2 = case lookupPC (mkPK key k) (passive chart) of
+ Nothing -> items
+ Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
+ items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items)
+ (\_ _ items -> items)
+ items2 fid (forest chart)
+ in case lookupAC key (active chart) of
+ Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
+ Just set | Set.member item set -> process mbt fn seqs funs items acc chart
+ | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
+ FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ FSymKP strs vars
+ -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
+ (strs:[strs' | Alt strs' _ <- vars])
+ in process mbt fn seqs funs items acc' chart
+ FSymLit d r -> let !fid = args !! d
+ in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
+ (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ [] -> case litCatMatch fid mbt of
+ Just (toks,lit) -> let fid' = nextId chart
+ !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
+ in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
+ ,nextId=nextId chart+1
+ }
+ Nothing -> process mbt fn seqs funs items acc chart
+ | otherwise =
+ case lookupPC (mkPK key0 j) (passive chart) of
+ Nothing -> let fid = nextId chart
+
+ items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
+ Nothing -> items
+ Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
+ in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
+ in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
+ ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
+ ,nextId =nextId chart+1
+ }
+ Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
+ in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
+ where
+ !lin = unsafeAt seqs seqid
+ !k = offset chart
+
+ mkPK (AK fid lbl) j = PK fid lbl j
+
+ rhs funid lbl = unsafeAt lins lbl
+ where
+ FFun _ _ lins = unsafeAt funs funid
+
+
+updateAt :: Int -> a -> [a] -> [a]
+updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+litCatMatch fcat (Just t)
+ | fcat == fcatString = Just ([t],ELit (LStr t))
+ | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
+ _ -> Nothing }
+ | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
+ _ -> Nothing }
+ | fcat == fcatVar = Just ([t],EFun (mkCId t))
+litCatMatch _ _ = Nothing
+
+
+----------------------------------------------------------------
+-- Active Chart
+----------------------------------------------------------------
+
+data Active
+ = Active {-# UNPACK #-} !Int
+ {-# UNPACK #-} !FPointPos
+ {-# UNPACK #-} !FunId
+ {-# UNPACK #-} !SeqId
+ [FCat]
+ {-# UNPACK #-} !ActiveKey
+ deriving (Eq,Show,Ord)
+data ActiveKey
+ = AK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ deriving (Eq,Ord,Show)
+type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
+
+emptyAC :: ActiveChart
+emptyAC = IntMap.empty
+
+lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
+lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
+
+lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active]
+lookupACByFCat fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.elems map
+
+labelsAC :: FCat -> ActiveChart -> [FIndex]
+labelsAC fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.keys map
+
+insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
+insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
+
+
+----------------------------------------------------------------
+-- Passive Chart
+----------------------------------------------------------------
+
+data PassiveKey
+ = PK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !Int
+ deriving (Eq,Ord,Show)
+
+type PassiveChart = Map.Map PassiveKey FCat
+
+emptyPC :: PassiveChart
+emptyPC = Map.empty
+
+lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
+lookupPC key chart = Map.lookup key chart
+
+insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
+insertPC key fcat chart = Map.insert key fcat chart
+
+
+----------------------------------------------------------------
+-- Forest
+----------------------------------------------------------------
+
+foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
+foldForest f g b fcat forest =
+ case IntMap.lookup fcat forest of
+ Nothing -> b
+ Just set -> Set.fold foldProd b set
+ where
+ foldProd (FCoerce fcat) b = foldForest f g b fcat forest
+ foldProd (FApply funid args) b = f funid args b
+ foldProd (FConst const toks) b = g const toks b
+
+
+----------------------------------------------------------------
+-- Parse State
+----------------------------------------------------------------
+
+-- | An abstract data type whose values represent
+-- the current state in an incremental parser.
+data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
+
+data Chart
+ = Chart
+ { active :: ActiveChart
+ , actives :: [ActiveChart]
+ , passive :: PassiveChart
+ , forest :: IntMap.IntMap (Set.Set Production)
+ , nextId :: {-# UNPACK #-} !FCat
+ , offset :: {-# UNPACK #-} !Int
+ }
+ deriving Show
+
+----------------------------------------------------------------
+-- Error State
+----------------------------------------------------------------
+
+-- | An abstract data type whose values represent
+-- the state in an incremental parser after an error.
+data ErrorState = EState PGF ParserInfo Chart
diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs new file mode 100644 index 000000000..dc0b2dc4a --- /dev/null +++ b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs @@ -0,0 +1,188 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module PGF.Parsing.FCFG.Utilities where + +import Control.Monad +import Data.Array +import Data.List (groupBy) + +import PGF.CId +import PGF.Data +import PGF.Tree +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord, Show) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputBounds :: (Int, Int), + inputToken :: Assoc t [Range] + } + +input :: Ord t => [t] -> Input t +input toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] + +inputMany :: Ord t => [[t]] -> Input t +inputMany toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] + + +------------------------------------------------------------ +-- * representations of syntactical analyses + +-- ** charts as finite maps over edges + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [SyntaxNode n [e]] + +data SyntaxNode n e = SMeta + | SNode n [e] + | SString String + | SInt Integer + | SFloat Double + deriving (Eq,Ord,Show) + +groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] +groupSyntaxNodes [] = [] +groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' + where + (ess,xs') = span xs + + span [] = ([],[]) + span xs@(SNode n es:xs') + | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) + | otherwise = ([],xs) +groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs +groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs +groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs + +-- ** syntax forests + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + | FString String + | FInt Integer + | FFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap _ (FString s) = FString s + fmap _ (FInt n) = FInt n + fmap _ (FFloat f) = FFloat f + fmap _ (FMeta) = FMeta + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName _ = Nothing + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] +unifyForests (FString s1) (FString s2) + | s1 == s2 = return $ FString s1 +unifyForests (FInt n1) (FInt n2) + | n1 == n2 = return $ FInt n1 +unifyForests (FFloat f1) (FFloat f2) + | f1 == f2 = return $ FFloat f1 +unifyForests _ _ = fail "forest unification failure" + + +-- ** conversions between representations + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters +chart2forests chart isMeta = concatMap (edge2forests []) + where edge2forests edges edge + | isMeta edge = [FMeta] + | edge `elem` edges = [] + | otherwise = map (item2forest (edge:edges)) $ chart ? edge + item2forest edges (SMeta) = FMeta + item2forest edges (SNode name children) = + FNode name $ children >>= mapM (edge2forests edges) + item2forest edges (SString s) = FString s + item2forest edges (SInt n) = FInt n + item2forest edges (SFloat f) = FFloat f + + +applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] +applyProfileToForest (FNode (fun,profiles) children) + | fun == wildCId = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] + + +forest2trees :: SyntaxForest CId -> [Tree] +forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees +forest2trees (FString s) = [Lit (LStr s)] +forest2trees (FInt n) = [Lit (LInt n)] +forest2trees (FFloat f) = [Lit (LFlt f)] +forest2trees (FMeta) = [Meta 0] |
