From c036459214852ca01868f5da81408f49b22a49e9 Mon Sep 17 00:00:00 2001 From: krasimir Date: Mon, 14 Dec 2009 10:54:22 +0000 Subject: remove the old parsing code and the -erasing=on flag --- src/runtime/haskell/PGF.hs | 21 +- src/runtime/haskell/PGF/Binary.hs | 4 +- src/runtime/haskell/PGF/BuildParser.hs | 76 ----- src/runtime/haskell/PGF/PMCFG.hs | 11 +- src/runtime/haskell/PGF/Parse.hs | 371 +++++++++++++++++++++ src/runtime/haskell/PGF/Parsing/FCFG/Active.hs | 205 ------------ .../haskell/PGF/Parsing/FCFG/Incremental.hs | 371 --------------------- src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs | 188 ----------- 8 files changed, 387 insertions(+), 860 deletions(-) delete mode 100644 src/runtime/haskell/PGF/BuildParser.hs create mode 100644 src/runtime/haskell/PGF/Parse.hs delete mode 100644 src/runtime/haskell/PGF/Parsing/FCFG/Active.hs delete mode 100644 src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs delete mode 100644 src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs (limited to 'src/runtime') diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 6c192095d..2b521e8f7 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -74,8 +74,8 @@ module PGF( -- ** Word Completion (Incremental Parsing) complete, - Incremental.ParseState, - Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees, + Parse.ParseState, + Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees, -- ** Generation generateRandom, generateAll, generateAllDepth, @@ -105,8 +105,7 @@ import PGF.Expr (Tree) import PGF.Morphology import PGF.Data hiding (functions) import PGF.Binary -import qualified PGF.Parsing.FCFG.Active as Active -import qualified PGF.Parsing.FCFG.Incremental as Incremental +import qualified PGF.Parse as Parse import qualified GF.Compile.GeneratePMCFG as PMCFG import GF.Infra.Option @@ -249,13 +248,11 @@ linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang parse pgf lang typ s = case Map.lookup lang (concretes pgf) of Just cnc -> case parser cnc of - Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" - then Incremental.parse pgf lang typ (words s) - else Active.parse "t" pinfo typ (words s) + Just pinfo -> Parse.parse pgf lang typ (words s) Nothing -> error ("No parser built for language: " ++ showCId lang) Nothing -> error ("Unknown language: " ++ showCId lang) -parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s) +parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s) canParse pgf cnc = isJust (lookParser pgf cnc) @@ -297,12 +294,12 @@ functionType pgf fun = complete pgf from typ input = let (ws,prefix) = tokensAndPrefix input - state0 = Incremental.initState pgf from typ + state0 = Parse.initState pgf from typ in case loop state0 ws of Nothing -> [] Just state -> - (if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else []) - ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)] + (if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else []) + ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] where tokensAndPrefix :: String -> ([String],String) tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "") @@ -311,7 +308,7 @@ complete pgf from typ input = where ws = words s loop ps [] = Just ps - loop ps (t:ts) = case Incremental.nextState ps t of + loop ps (t:ts) = case Parse.nextState ps t of Left es -> Nothing Right ps -> loop ps ts diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index e4ed98424..7d5db73af 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -159,8 +159,8 @@ instance Binary BindType where _ -> decodingError instance Binary FFun where - put (FFun fun prof lins) = put (fun,prof,lins) - get = liftM3 FFun get get get + put (FFun fun lins) = put (fun,lins) + get = liftM2 FFun get get instance Binary FSymbol where put (FSymCat n l) = putWord8 0 >> put (n,l) diff --git a/src/runtime/haskell/PGF/BuildParser.hs b/src/runtime/haskell/PGF/BuildParser.hs deleted file mode 100644 index 23e0725c6..000000000 --- a/src/runtime/haskell/PGF/BuildParser.hs +++ /dev/null @@ -1,76 +0,0 @@ ---------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing, parser information ------------------------------------------------------------------------------ - -module PGF.BuildParser where - -import GF.Data.SortedList -import GF.Data.Assoc -import PGF.CId -import PGF.Data -import PGF.Parsing.FCFG.Utilities - -import Data.Array.IArray -import Data.Maybe -import qualified Data.IntMap as IntMap -import qualified Data.Map as Map -import qualified Data.Set as Set -import Debug.Trace - - -data ParserInfoEx - = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] - , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] - , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] - , grammarToks :: [String] - } - ------------------------------------------------------------- --- parser information - -getLeftCornerTok pinfo (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymKS [tok] -> [tok] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -getLeftCornerCat pinfo args (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat d _ -> let cat = args !! d - in case IntMap.lookup cat (productions pinfo) of - Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] - Nothing -> [cat] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -buildParserInfo :: ParserInfo -> ParserInfoEx -buildParserInfo pinfo = - ParserInfoEx { epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarToks = grammartoks - } - - where epsilonrules = [ (ruleid,args,cat) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , let (FFun _ _ lins) = (functions pinfo) ! ruleid - , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] - leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] - leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] - grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs index c657e3d17..b9303aeb8 100644 --- a/src/runtime/haskell/PGF/PMCFG.hs +++ b/src/runtime/haskell/PGF/PMCFG.hs @@ -19,13 +19,12 @@ data FSymbol | FSymKS [String] | FSymKP [String] [Alternative] deriving (Eq,Ord,Show) -type Profile = [Int] data Production = FApply {-# UNPACK #-} !FunId [FCat] | FCoerce {-# UNPACK #-} !FCat | FConst Expr [String] deriving (Eq,Ord,Show) -data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) +data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) type FSeq = Array FPointPos FSymbol type FunId = Int type SeqId = Int @@ -39,7 +38,7 @@ data ParserInfo , sequences :: Array SeqId FSeq , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions - , startCats :: Map.Map CId [FCat] + , startCats :: Map.Map CId (FCat,FCat) , totalCats :: {-# UNPACK #-} !FCat } @@ -71,14 +70,14 @@ ppProduction (fcat,FCoerce arg) = ppProduction (fcat,FConst _ ss) = ppFCat fcat <+> text "->" <+> ppStrs ss -ppFun (funid,FFun fun _ arr) = +ppFun (funid,FFun fun arr) = ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun) ppSeq (seqid,seq) = ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq)) -ppStartCat (id,fcats) = - ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats))) +ppStartCat (id,(start,end)) = + ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>' ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>' diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs new file mode 100644 index 000000000..44ff525b4 --- /dev/null +++ b/src/runtime/haskell/PGF/Parse.hs @@ -0,0 +1,371 @@ +{-# LANGUAGE BangPatterns #-} +module PGF.Parse + ( ParseState + , ErrorState + , initState + , nextState + , getCompletions + , recoveryStates + , extractTrees + , parse + , parseWithRecovery + ) where + +import Data.Array.IArray +import Data.Array.Base (unsafeAt) +import Data.List (isPrefixOf, foldl') +import Data.Maybe (fromMaybe, maybe) +import qualified Data.Map as Map +import qualified GF.Data.TrieMap as TMap +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set +import Control.Monad + +import GF.Data.SortedList +import PGF.CId +import PGF.Data +import PGF.Expr(Tree) +import PGF.Macros +import PGF.TypeCheck +import Debug.Trace + +parse :: PGF -> Language -> Type -> [String] -> [Tree] +parse pgf lang typ toks = loop (initState pgf lang typ) toks + where + loop ps [] = extractTrees ps typ + loop ps (t:ts) = case nextState ps t of + Left es -> [] + Right ps -> loop ps ts + +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree] +parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks + where + accept ps [] = extractTrees ps typ + accept ps (t:ts) = + case nextState ps t of + Right ps -> accept ps ts + Left es -> skip (recoveryStates open_typs es) ts + + skip ps_map [] = extractTrees (fst ps_map) typ + skip ps_map (t:ts) = + case Map.lookup t (snd ps_map) of + Just ps -> accept ps ts + Nothing -> skip ps_map ts + +-- | Creates an initial parsing state for a given language and +-- startup category. +initState :: PGF -> Language -> Type -> ParseState +initState pgf lang (DTyp _ start _) = + let items = do + cat <- maybe [] range (Map.lookup start (startCats pinfo)) + (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) + [] cat (productions pinfo) + let FFun fn lins = functions pinfo ! funid + (lbl,seqid) <- assocs lins + return (Active 0 0 funid seqid args (AK cat lbl)) + + pinfo = + case lookParser pgf lang of + Just pinfo -> pinfo + _ -> error ("Unknown language: " ++ showCId lang) + + in PState pgf + pinfo + (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0) + (TMap.singleton [] (Set.fromList items)) + +-- | From the current state and the next token +-- 'nextState' computes a new state, where the token +-- is consumed and the current position is shifted by one. +-- If the new token cannot be accepted then an error state +-- is returned. +nextState :: ParseState -> String -> Either ErrorState ParseState +nextState (PState pgf pinfo chart items) t = + let (mb_agenda,map_items) = TMap.decompose items + agenda = maybe [] Set.toList mb_agenda + acc = fromMaybe TMap.empty (Map.lookup t map_items) + (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart + chart2 = chart1{ active =emptyAC + , actives=active chart1 : actives chart1 + , passive=emptyPC + , offset =offset chart1+1 + } + in if TMap.null acc1 + then Left (EState pgf pinfo chart2) + else Right (PState pgf pinfo chart2 acc1) + where + add (tok:toks) item acc + | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc + add _ item acc = acc + +-- | If the next token is not known but only its prefix (possible empty prefix) +-- then the 'getCompletions' function can be used to calculate the possible +-- next words and the consequent states. This is used for word completions in +-- the GF interpreter. +getCompletions :: ParseState -> String -> Map.Map String ParseState +getCompletions (PState pgf pinfo chart items) w = + let (mb_agenda,map_items) = TMap.decompose items + agenda = maybe [] Set.toList mb_agenda + acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items + (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart + chart2 = chart1{ active =emptyAC + , actives=active chart1 : actives chart1 + , passive=emptyPC + , offset =offset chart1+1 + } + in fmap (PState pgf pinfo chart2) acc' + where + add (tok:toks) item acc + | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc + add _ item acc = acc + +recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState) +recoveryStates open_types (EState pgf pinfo chart) = + let open_fcats = concatMap type2fcats open_types + agenda = foldl (complete open_fcats) [] (actives chart) + (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart + chart2 = chart1{ active =emptyAC + , actives=active chart1 : actives chart1 + , passive=emptyPC + , offset =offset chart1+1 + } + in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc) + where + type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo)) + + complete open_fcats items ac = + foldl (Set.fold (\(Active j' ppos funid seqid args keyc) -> + (:) (Active j' (ppos+1) funid seqid args keyc))) + items + [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac] + + add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc + +-- | This function extracts the list of all completed parse trees +-- that spans the whole input consumed so far. The trees are also +-- limited by the category specified, which is usually +-- the same as the startup category. +extractTrees :: ParseState -> Type -> [Tree] +extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = + nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]] + where + (mb_agenda,acc) = TMap.decompose items + agenda = maybe [] Set.toList mb_agenda + (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart + + exps = do + cat <- maybe [] range (Map.lookup start (startCats pinfo)) + (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) + [] cat (productions pinfo) + let FFun fn lins = functions pinfo ! funid + lbl <- indices lins + Just fid <- [lookupPC (PK cat lbl 0) (passive st)] + (fvs,tree) <- go Set.empty 0 (0,fid) + guard (Set.null fvs) + return tree + + go rec fcat' (d,fcat) + | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments + | Set.member fcat rec = mzero + | otherwise = foldForest (\funid args trees -> + do let FFun fn lins = functions pinfo ! funid + args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args) + check_ho_fun fn args + `mplus` + trees) + (\const _ trees -> + return (freeVar const,const) + `mplus` + trees) + [] fcat (forest st) + + check_ho_fun fun args + | fun == _V = return (head args) + | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args)) + | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args) + + mkVar (EFun v) = v + mkVar (EMeta _) = wildCId + + freeVar (EFun v) = Set.singleton v + freeVar _ = Set.empty + +_B = mkCId "_B" +_V = mkCId "_V" + +process mbt fn !seqs !funs [] acc chart = (acc,chart) +process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart + | inRange (bounds lin) ppos = + case unsafeAt lin ppos of + FSymCat d r -> let !fid = args !! d + key = AK fid r + + items2 = case lookupPC (mkPK key k) (passive chart) of + Nothing -> items + Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items + items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items) + (\_ _ items -> items) + items2 fid (forest chart) + in case lookupAC key (active chart) of + Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} + Just set | Set.member item set -> process mbt fn seqs funs items acc chart + | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} + FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc + in process mbt fn seqs funs items acc' chart + FSymKP strs vars + -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc + (strs:[strs' | Alt strs' _ <- vars]) + in process mbt fn seqs funs items acc' chart + FSymLit d r -> let !fid = args !! d + in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of + (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc + in process mbt fn seqs funs items acc' chart + [] -> case litCatMatch fid mbt of + Just (toks,lit) -> let fid' = nextId chart + !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc + in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart) + ,nextId=nextId chart+1 + } + Nothing -> process mbt fn seqs funs items acc chart + | otherwise = + case lookupPC (mkPK key0 j) (passive chart) of + Nothing -> let fid = nextId chart + + items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of + Nothing -> items + Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) -> + let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos + in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set + in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) + ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart) + ,nextId =nextId chart+1 + } + Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items + in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)} + where + !lin = unsafeAt seqs seqid + !k = offset chart + + mkPK (AK fid lbl) j = PK fid lbl j + + rhs funid lbl = unsafeAt lins lbl + where + FFun _ lins = unsafeAt funs funid + + +updateAt :: Int -> a -> [a] -> [a] +updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] + +litCatMatch fcat (Just t) + | fcat == fcatString = Just ([t],ELit (LStr t)) + | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n)); + _ -> Nothing } + | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d)); + _ -> Nothing } + | fcat == fcatVar = Just ([t],EFun (mkCId t)) +litCatMatch _ _ = Nothing + + +---------------------------------------------------------------- +-- Active Chart +---------------------------------------------------------------- + +data Active + = Active {-# UNPACK #-} !Int + {-# UNPACK #-} !FPointPos + {-# UNPACK #-} !FunId + {-# UNPACK #-} !SeqId + [FCat] + {-# UNPACK #-} !ActiveKey + deriving (Eq,Show,Ord) +data ActiveKey + = AK {-# UNPACK #-} !FCat + {-# UNPACK #-} !FIndex + deriving (Eq,Ord,Show) +type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active)) + +emptyAC :: ActiveChart +emptyAC = IntMap.empty + +lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active) +lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l + +lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active] +lookupACByFCat fcat chart = + case IntMap.lookup fcat chart of + Nothing -> [] + Just map -> IntMap.elems map + +labelsAC :: FCat -> ActiveChart -> [FIndex] +labelsAC fcat chart = + case IntMap.lookup fcat chart of + Nothing -> [] + Just map -> IntMap.keys map + +insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart +insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart + + +---------------------------------------------------------------- +-- Passive Chart +---------------------------------------------------------------- + +data PassiveKey + = PK {-# UNPACK #-} !FCat + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !Int + deriving (Eq,Ord,Show) + +type PassiveChart = Map.Map PassiveKey FCat + +emptyPC :: PassiveChart +emptyPC = Map.empty + +lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat +lookupPC key chart = Map.lookup key chart + +insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart +insertPC key fcat chart = Map.insert key fcat chart + + +---------------------------------------------------------------- +-- Forest +---------------------------------------------------------------- + +foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b +foldForest f g b fcat forest = + case IntMap.lookup fcat forest of + Nothing -> b + Just set -> Set.fold foldProd b set + where + foldProd (FCoerce fcat) b = foldForest f g b fcat forest + foldProd (FApply funid args) b = f funid args b + foldProd (FConst const toks) b = g const toks b + + +---------------------------------------------------------------- +-- Parse State +---------------------------------------------------------------- + +-- | An abstract data type whose values represent +-- the current state in an incremental parser. +data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active)) + +data Chart + = Chart + { active :: ActiveChart + , actives :: [ActiveChart] + , passive :: PassiveChart + , forest :: IntMap.IntMap (Set.Set Production) + , nextId :: {-# UNPACK #-} !FCat + , offset :: {-# UNPACK #-} !Int + } + deriving Show + +---------------------------------------------------------------- +-- Error State +---------------------------------------------------------------- + +-- | An abstract data type whose values represent +-- the state in an incremental parser after an error. +data ErrorState = EState PGF ParserInfo Chart diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs deleted file mode 100644 index e88926f6e..000000000 --- a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs +++ /dev/null @@ -1,205 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- MCFG parsing, the active algorithm ------------------------------------------------------------------------------ - -module PGF.Parsing.FCFG.Active (parse) where - -import GF.Data.Assoc -import GF.Data.SortedList -import GF.Data.Utilities -import qualified GF.Data.MultiMap as MM - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Parsing.FCFG.Utilities -import PGF.BuildParser - -import Control.Monad (guard) - -import qualified Data.List as List -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Data.Array.IArray -import Debug.Trace - ----------------------------------------------------------------------- --- * parsing - -type FToken = String - -makeFinalEdge cat 0 0 = (cat, [EmptyRange]) -makeFinalEdge cat i j = (cat, [makeRange i j]) - --- | the list of categories = possible starting categories -parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr] -parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees - where - inTokens = input toks - starts = Map.findWithDefault [] start (startCats pinfo) - schart = xchart2syntaxchart chart pinfo - (i,j) = inputBounds inTokens - finalEdges = [makeFinalEdge cat i j | cat <- starts] - forests = chart2forests schart (const False) finalEdges - filteredForests = forests >>= applyProfileToForest - - pinfoex = buildParserInfo pinfo - - chart = process strategy pinfo pinfoex inTokens axioms emptyXChart - axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens - | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens - -isBU s = s=="b" -isTD s = s=="t" - --- used in prediction -emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec -emptyChildren ruleid args = SNode ruleid (replicate (length args) []) - - -process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat -process strategy pinfo pinfoex toks [] chart = chart -process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart - where - univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart - | inRange (bounds lin) ppos = - case lin ! ppos of - FSymCat d r -> let c = args !! d - in case recs !! d of - [] -> case insertXChart chart item c of - Nothing -> chart - Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c - rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) - ++ - do guard (isTD strategy) - (ruleid,args) <- topdownRules pinfo c - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) - in process strategy pinfo pinfoex toks items chart - found' -> let items = do rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - FSymKS [tok] - -> let items = do t_rng <- inputToken toks ? tok - rng' <- concatRange rng t_rng - return (Active found rng' lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - | otherwise = - if inRange (bounds lins) (lbl+1) - then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart - else univRule (Final (reverse (rng:found)) node args cat) chart - where - (FFun _ _ lins) = functions pinfo ! ruleid - lin = sequences pinfo ! (lins ! lbl) - univRule item@(Final found' node args cat) chart = - case insertXChart chart item cat of - Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos - rng <- concatRange rng (found' !! r) - return (Active found rng l (ppos+1) (updateChildren node d found') args c) - ++ - do guard (isBU strategy) - (ruleid,args,c) <- leftcornerCats pinfoex ? cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 - return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) - - updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec - updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo pinfoex toks items chart - ----------------------------------------------------------------------- --- * XChart - -data Item - = Active RangeRec - Range - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !FPointPos - (SyntaxNode FunId RangeRec) - [FCat] - FCat - | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat - deriving (Eq, Ord, Show) - -data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) - -emptyXChart :: Ord c => XChart c -emptyXChart = XChart MM.empty MM.empty - -insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = - case MM.insert' c item actives of - Nothing -> Nothing - Just actives -> Just (XChart actives finals) - -insertXChart (XChart actives finals) item@(Final _ _ _ _) c = - case MM.insert' c item finals of - Nothing -> Nothing - Just finals -> Just (XChart actives finals) - -lookupXChartAct (XChart actives finals) c = actives MM.! c -lookupXChartFinal (XChart actives finals) c = finals MM.! c - -xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) -xchart2syntaxchart (XChart actives finals) pinfo = - accumAssoc groupSyntaxNodes $ - [ case node of - SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid - in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) - SString s -> ((cat,found), SString s) - SInt n -> ((cat,found), SInt n) - SFloat f -> ((cat,found), SFloat f) - | (Final found node rhs cat) <- MM.elems finals - ] - -literals :: ParserInfoEx -> Input FToken -> [Item] -literals pinfoex toks = - [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] - where - lexer t = - case reads t of - [(n,"")] -> (fcatInt, SInt (n::Integer)) - _ -> case reads t of - [(f,"")] -> (fcatFloat, SFloat (f::Double)) - _ -> (fcatString,SString t) - - ----------------------------------------------------------------------- --- Earley -- - --- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] -initialTD pinfo starts toks = - do cat <- starts - (ruleid,args) <- topdownRules pinfo cat - return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) - -topdownRules pinfo cat = f cat [] - where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) - - g (FApply ruleid args) rules = (ruleid,args) : rules - g (FCoerce cat) rules = f cat rules - - ----------------------------------------------------------------------- --- Kilbury -- - -initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -initialBU pinfo pinfoex toks = - do (tok,rngs) <- aAssocs (inputToken toks) - (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok - rng <- rngs - return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) - ++ - do (ruleid,args,cat) <- epsilonRules pinfoex - let FFun _ _ _ = functions pinfo ! ruleid - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs deleted file mode 100644 index 296a0d33b..000000000 --- a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs +++ /dev/null @@ -1,371 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -module PGF.Parsing.FCFG.Incremental - ( ParseState - , ErrorState - , initState - , nextState - , getCompletions - , recoveryStates - , extractTrees - , parse - , parseWithRecovery - ) where - -import Data.Array.IArray -import Data.Array.Base (unsafeAt) -import Data.List (isPrefixOf, foldl') -import Data.Maybe (fromMaybe, maybe) -import qualified Data.Map as Map -import qualified GF.Data.TrieMap as TMap -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Control.Monad - -import GF.Data.SortedList -import PGF.CId -import PGF.Data -import PGF.Expr(Tree) -import PGF.Macros -import PGF.TypeCheck -import Debug.Trace - -parse :: PGF -> Language -> Type -> [String] -> [Tree] -parse pgf lang typ toks = loop (initState pgf lang typ) toks - where - loop ps [] = extractTrees ps typ - loop ps (t:ts) = case nextState ps t of - Left es -> [] - Right ps -> loop ps ts - -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree] -parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks - where - accept ps [] = extractTrees ps typ - accept ps (t:ts) = - case nextState ps t of - Right ps -> accept ps ts - Left es -> skip (recoveryStates open_typs es) ts - - skip ps_map [] = extractTrees (fst ps_map) typ - skip ps_map (t:ts) = - case Map.lookup t (snd ps_map) of - Just ps -> accept ps ts - Nothing -> skip ps_map ts - --- | Creates an initial parsing state for a given language and --- startup category. -initState :: PGF -> Language -> Type -> ParseState -initState pgf lang (DTyp _ start _) = - let items = do - cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) - let FFun fn _ lins = functions pinfo ! funid - (lbl,seqid) <- assocs lins - return (Active 0 0 funid seqid args (AK cat lbl)) - - pinfo = - case lookParser pgf lang of - Just pinfo -> pinfo - _ -> error ("Unknown language: " ++ showCId lang) - - in PState pgf - pinfo - (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0) - (TMap.singleton [] (Set.fromList items)) - --- | From the current state and the next token --- 'nextState' computes a new state, where the token --- is consumed and the current position is shifted by one. --- If the new token cannot be accepted then an error state --- is returned. -nextState :: ParseState -> String -> Either ErrorState ParseState -nextState (PState pgf pinfo chart items) t = - let (mb_agenda,map_items) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - acc = fromMaybe TMap.empty (Map.lookup t map_items) - (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in if TMap.null acc1 - then Left (EState pgf pinfo chart2) - else Right (PState pgf pinfo chart2 acc1) - where - add (tok:toks) item acc - | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc - add _ item acc = acc - --- | If the next token is not known but only its prefix (possible empty prefix) --- then the 'getCompletions' function can be used to calculate the possible --- next words and the consequent states. This is used for word completions in --- the GF interpreter. -getCompletions :: ParseState -> String -> Map.Map String ParseState -getCompletions (PState pgf pinfo chart items) w = - let (mb_agenda,map_items) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items - (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in fmap (PState pgf pinfo chart2) acc' - where - add (tok:toks) item acc - | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc - add _ item acc = acc - -recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState) -recoveryStates open_types (EState pgf pinfo chart) = - let open_fcats = concatMap type2fcats open_types - agenda = foldl (complete open_fcats) [] (actives chart) - (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc) - where - type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo)) - - complete open_fcats items ac = - foldl (Set.fold (\(Active j' ppos funid seqid args keyc) -> - (:) (Active j' (ppos+1) funid seqid args keyc))) - items - [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac] - - add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc - --- | This function extracts the list of all completed parse trees --- that spans the whole input consumed so far. The trees are also --- limited by the category specified, which is usually --- the same as the startup category. -extractTrees :: ParseState -> Type -> [Tree] -extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = - nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]] - where - (mb_agenda,acc) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart - - exps = do - cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) - let FFun fn _ lins = functions pinfo ! funid - lbl <- indices lins - Just fid <- [lookupPC (PK cat lbl 0) (passive st)] - (fvs,tree) <- go Set.empty 0 (0,fid) - guard (Set.null fvs) - return tree - - go rec fcat' (d,fcat) - | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments - | Set.member fcat rec = mzero - | otherwise = foldForest (\funid args trees -> - do let FFun fn _ lins = functions pinfo ! funid - args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args) - check_ho_fun fn args - `mplus` - trees) - (\const _ trees -> - return (freeVar const,const) - `mplus` - trees) - [] fcat (forest st) - - check_ho_fun fun args - | fun == _V = return (head args) - | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args)) - | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args) - - mkVar (EFun v) = v - mkVar (EMeta _) = wildCId - - freeVar (EFun v) = Set.singleton v - freeVar _ = Set.empty - -_B = mkCId "_B" -_V = mkCId "_V" - -process mbt fn !seqs !funs [] acc chart = (acc,chart) -process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart - | inRange (bounds lin) ppos = - case unsafeAt lin ppos of - FSymCat d r -> let !fid = args !! d - key = AK fid r - - items2 = case lookupPC (mkPK key k) (passive chart) of - Nothing -> items - Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items - items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items) - (\_ _ items -> items) - items2 fid (forest chart) - in case lookupAC key (active chart) of - Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} - Just set | Set.member item set -> process mbt fn seqs funs items acc chart - | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} - FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc - in process mbt fn seqs funs items acc' chart - FSymKP strs vars - -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc - (strs:[strs' | Alt strs' _ <- vars]) - in process mbt fn seqs funs items acc' chart - FSymLit d r -> let !fid = args !! d - in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of - (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc - in process mbt fn seqs funs items acc' chart - [] -> case litCatMatch fid mbt of - Just (toks,lit) -> let fid' = nextId chart - !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc - in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart) - ,nextId=nextId chart+1 - } - Nothing -> process mbt fn seqs funs items acc chart - | otherwise = - case lookupPC (mkPK key0 j) (passive chart) of - Nothing -> let fid = nextId chart - - items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of - Nothing -> items - Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) -> - let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos - in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set - in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) - ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart) - ,nextId =nextId chart+1 - } - Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items - in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)} - where - !lin = unsafeAt seqs seqid - !k = offset chart - - mkPK (AK fid lbl) j = PK fid lbl j - - rhs funid lbl = unsafeAt lins lbl - where - FFun _ _ lins = unsafeAt funs funid - - -updateAt :: Int -> a -> [a] -> [a] -updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] - -litCatMatch fcat (Just t) - | fcat == fcatString = Just ([t],ELit (LStr t)) - | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n)); - _ -> Nothing } - | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d)); - _ -> Nothing } - | fcat == fcatVar = Just ([t],EFun (mkCId t)) -litCatMatch _ _ = Nothing - - ----------------------------------------------------------------- --- Active Chart ----------------------------------------------------------------- - -data Active - = Active {-# UNPACK #-} !Int - {-# UNPACK #-} !FPointPos - {-# UNPACK #-} !FunId - {-# UNPACK #-} !SeqId - [FCat] - {-# UNPACK #-} !ActiveKey - deriving (Eq,Show,Ord) -data ActiveKey - = AK {-# UNPACK #-} !FCat - {-# UNPACK #-} !FIndex - deriving (Eq,Ord,Show) -type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active)) - -emptyAC :: ActiveChart -emptyAC = IntMap.empty - -lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active) -lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l - -lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active] -lookupACByFCat fcat chart = - case IntMap.lookup fcat chart of - Nothing -> [] - Just map -> IntMap.elems map - -labelsAC :: FCat -> ActiveChart -> [FIndex] -labelsAC fcat chart = - case IntMap.lookup fcat chart of - Nothing -> [] - Just map -> IntMap.keys map - -insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart -insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart - - ----------------------------------------------------------------- --- Passive Chart ----------------------------------------------------------------- - -data PassiveKey - = PK {-# UNPACK #-} !FCat - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !Int - deriving (Eq,Ord,Show) - -type PassiveChart = Map.Map PassiveKey FCat - -emptyPC :: PassiveChart -emptyPC = Map.empty - -lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat -lookupPC key chart = Map.lookup key chart - -insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart -insertPC key fcat chart = Map.insert key fcat chart - - ----------------------------------------------------------------- --- Forest ----------------------------------------------------------------- - -foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b -foldForest f g b fcat forest = - case IntMap.lookup fcat forest of - Nothing -> b - Just set -> Set.fold foldProd b set - where - foldProd (FCoerce fcat) b = foldForest f g b fcat forest - foldProd (FApply funid args) b = f funid args b - foldProd (FConst const toks) b = g const toks b - - ----------------------------------------------------------------- --- Parse State ----------------------------------------------------------------- - --- | An abstract data type whose values represent --- the current state in an incremental parser. -data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active)) - -data Chart - = Chart - { active :: ActiveChart - , actives :: [ActiveChart] - , passive :: PassiveChart - , forest :: IntMap.IntMap (Set.Set Production) - , nextId :: {-# UNPACK #-} !FCat - , offset :: {-# UNPACK #-} !Int - } - deriving Show - ----------------------------------------------------------------- --- Error State ----------------------------------------------------------------- - --- | An abstract data type whose values represent --- the state in an incremental parser after an error. -data ErrorState = EState PGF ParserInfo Chart diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs deleted file mode 100644 index dc0b2dc4a..000000000 --- a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs +++ /dev/null @@ -1,188 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/13 12:40:19 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ --- --- Basic type declarations and functions for grammar formalisms ------------------------------------------------------------------------------ - - -module PGF.Parsing.FCFG.Utilities where - -import Control.Monad -import Data.Array -import Data.List (groupBy) - -import PGF.CId -import PGF.Data -import PGF.Tree -import GF.Data.Assoc -import GF.Data.Utilities (sameLength, foldMerge, splitBy) - - ------------------------------------------------------------- --- ranges as single pairs - -type RangeRec = [Range] - -data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | EmptyRange - deriving (Eq, Ord, Show) - -makeRange :: Int -> Int -> Range -makeRange = Range - -concatRange :: Range -> Range -> [Range] -concatRange EmptyRange rng = return rng -concatRange rng EmptyRange = return rng -concatRange (Range i j) (Range j' k) = [Range i k | j==j'] - -minRange :: Range -> Int -minRange (Range i j) = i - -maxRange :: Range -> Int -maxRange (Range i j) = j - - ------------------------------------------------------------- --- * representaions of input tokens - -data Input t = MkInput { inputBounds :: (Int, Int), - inputToken :: Assoc t [Range] - } - -input :: Ord t => [t] -> Input t -input toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] - -inputMany :: Ord t => [[t]] -> Input t -inputMany toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] - - ------------------------------------------------------------- --- * representations of syntactical analyses - --- ** charts as finite maps over edges - --- | The values of the chart, a list of key-daughters pairs, --- has unique keys. In essence, it is a map from 'n' to daughters. --- The daughters should be a set (not necessarily sorted) of rhs's. -type SyntaxChart n e = Assoc e [SyntaxNode n [e]] - -data SyntaxNode n e = SMeta - | SNode n [e] - | SString String - | SInt Integer - | SFloat Double - deriving (Eq,Ord,Show) - -groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] -groupSyntaxNodes [] = [] -groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' - where - (ess,xs') = span xs - - span [] = ([],[]) - span xs@(SNode n es:xs') - | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) - | otherwise = ([],xs) -groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs -groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs -groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs - --- ** syntax forests - -data SyntaxForest n = FMeta - | FNode n [[SyntaxForest n]] - -- ^ The outer list should be a set (not necessarily sorted) - -- of possible alternatives. Ie. the outer list - -- is a disjunctive node, and the inner lists - -- are (conjunctive) concatenative nodes - | FString String - | FInt Integer - | FFloat Double - deriving (Eq, Ord, Show) - -instance Functor SyntaxForest where - fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests - fmap _ (FString s) = FString s - fmap _ (FInt n) = FInt n - fmap _ (FFloat f) = FFloat f - fmap _ (FMeta) = FMeta - -forestName :: SyntaxForest n -> Maybe n -forestName (FNode n _) = Just n -forestName _ = Nothing - -unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) -unifyManyForests = foldM unifyForests FMeta - --- | two forests can be unified, if either is 'FMeta', or both have the same parent, --- and all children can be unified -unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) -unifyForests FMeta forest = return forest -unifyForests forest FMeta = return forest -unifyForests (FNode name1 children1) (FNode name2 children2) - | name1 == name2 && not (null children) = return $ FNode name1 children - where children = [ forests | forests1 <- children1, forests2 <- children2, - sameLength forests1 forests2, - forests <- zipWithM unifyForests forests1 forests2 ] -unifyForests (FString s1) (FString s2) - | s1 == s2 = return $ FString s1 -unifyForests (FInt n1) (FInt n2) - | n1 == n2 = return $ FInt n1 -unifyForests (FFloat f1) (FFloat f2) - | f1 == f2 = return $ FFloat f1 -unifyForests _ _ = fail "forest unification failure" - - --- ** conversions between representations - -chart2forests :: (Ord n, Ord e) => - SyntaxChart n e -- ^ The complete chart - -> (e -> Bool) -- ^ When is an edge 'FMeta'? - -> [e] -- ^ The starting edges - -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. - -- In essence, the result is a map from 'n' to forest daughters -chart2forests chart isMeta = concatMap (edge2forests []) - where edge2forests edges edge - | isMeta edge = [FMeta] - | edge `elem` edges = [] - | otherwise = map (item2forest (edge:edges)) $ chart ? edge - item2forest edges (SMeta) = FMeta - item2forest edges (SNode name children) = - FNode name $ children >>= mapM (edge2forests edges) - item2forest edges (SString s) = FString s - item2forest edges (SInt n) = FInt n - item2forest edges (SFloat f) = FFloat f - - -applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] -applyProfileToForest (FNode (fun,profiles) children) - | fun == wildCId = concat chForests - | otherwise = [ FNode fun chForests | not (null chForests) ] - where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | - forests0 <- children, - forests <- mapM applyProfileToForest forests0 ] -applyProfileToForest (FString s) = [FString s] -applyProfileToForest (FInt n) = [FInt n] -applyProfileToForest (FFloat f) = [FFloat f] -applyProfileToForest (FMeta) = [FMeta] - - -forest2trees :: SyntaxForest CId -> [Tree] -forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees -forest2trees (FString s) = [Lit (LStr s)] -forest2trees (FInt n) = [Lit (LInt n)] -forest2trees (FFloat f) = [Lit (LFlt f)] -forest2trees (FMeta) = [Meta 0] -- cgit v1.2.3