summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Parsing/FCFG
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Parsing/FCFG
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Parsing/FCFG')
-rw-r--r--src-3.0/GF/Parsing/FCFG/Active.hs179
-rw-r--r--src-3.0/GF/Parsing/FCFG/Incremental.hs107
-rw-r--r--src-3.0/GF/Parsing/FCFG/PInfo.hs121
-rw-r--r--src-3.0/GF/Parsing/FCFG/Range.hs50
4 files changed, 457 insertions, 0 deletions
diff --git a/src-3.0/GF/Parsing/FCFG/Active.hs b/src-3.0/GF/Parsing/FCFG/Active.hs
new file mode 100644
index 000000000..df55793f8
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/Active.hs
@@ -0,0 +1,179 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- MCFG parsing, the active algorithm
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.Active (parse) where
+
+import GF.Data.GeneralDeduction
+import GF.Data.Assoc
+import GF.Data.SortedList
+import GF.Data.Utilities
+
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+
+import GF.Infra.PrintClass
+
+import GF.Parsing.FCFG.Range
+import GF.Parsing.FCFG.PInfo
+
+import Control.Monad (guard)
+
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Data.Array
+
+----------------------------------------------------------------------
+-- * parsing
+
+parse :: String -> FCFParser
+parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
+ where chart = process strategy pinfo toks axioms emptyXChart
+ axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
+ | isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
+
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec
+emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
+ where
+ FRule _ rhs _ _ = allRules pinfo ! ruleid
+
+process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
+process strategy pinfo toks [] chart = chart
+process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
+ where
+ univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat c r d -> case recs !! d of
+ [] -> case insertXChart chart item c of
+ Nothing -> chart
+ Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
+ rng <- concatRange rng (found' !! r)
+ return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
+ ++
+ do guard (isTD strategy)
+ ruleid <- topdownRules pinfo ? c
+ return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
+ in process strategy pinfo toks items chart
+ found' -> let items = do rng <- concatRange rng (found' !! r)
+ return (c, Active found rng lbl (ppos+1) node)
+ in process strategy pinfo toks items chart
+ FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
+ rng' <- concatRange rng (makeRange i j)
+ return (cat, Active found rng' lbl (ppos+1) node)
+ in process strategy pinfo toks items chart
+ | otherwise =
+ if inRange (bounds lins) (lbl+1)
+ then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
+ else univRule cat (Final (reverse (rng:found)) node) chart
+ where
+ (FRule fn _ cat lins) = allRules pinfo ! ruleid
+ lin = lins ! lbl
+ univRule cat item@(Final found' node) chart =
+ case insertXChart chart item cat of
+ Nothing -> chart
+ Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
+ let FRule _ _ _ lins = allRules pinfo ! ruleid
+ FSymCat cat r d = lins ! l ! ppos
+ rng <- concatRange rng (found' !! r)
+ return (cat, Active found rng l (ppos+1) (updateChildren node d found'))
+ ++
+ do guard (isBU strategy)
+ ruleid <- leftcornerCats pinfo ? cat
+ let FRule _ _ _ lins = allRules pinfo ! ruleid
+ FSymCat cat r d = lins ! 0 ! 0
+ return (cat, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
+
+ updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
+ updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
+ in process strategy pinfo toks items chart
+
+----------------------------------------------------------------------
+-- * XChart
+
+data Item
+ = Active RangeRec
+ Range
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !FPointPos
+ (SyntaxNode RuleId RangeRec)
+ | Final RangeRec (SyntaxNode RuleId RangeRec)
+ deriving (Eq, Ord)
+
+data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
+
+emptyXChart :: Ord c => XChart c
+emptyXChart = XChart emptyChart emptyChart
+
+insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
+ case chartInsert actives item c of
+ Nothing -> Nothing
+ Just actives -> Just (XChart actives finals)
+
+insertXChart (XChart actives finals) item@(Final _ _) c =
+ case chartInsert finals item c of
+ Nothing -> Nothing
+ Just finals -> Just (XChart actives finals)
+
+lookupXChartAct (XChart actives finals) c = chartLookup actives c
+lookupXChartFinal (XChart actives finals) c = chartLookup finals c
+
+xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec)
+xchart2syntaxchart (XChart actives finals) pinfo =
+ accumAssoc groupSyntaxNodes $
+ [ case node of
+ SNode ruleid rrecs -> let FRule fun rhs cat _ = allRules pinfo ! ruleid
+ in ((cat,found), SNode fun (zip rhs rrecs))
+ SString s -> ((cat,found), SString s)
+ SInt n -> ((cat,found), SInt n)
+ SFloat f -> ((cat,found), SFloat f)
+ | (cat, Final found node) <- chartAssocs finals
+ ]
+
+literals :: FCFPInfo -> Input FToken -> [(FCat,Item)]
+literals pinfo toks =
+ [let (c,node) = lexer t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)]
+ where
+ lexer t =
+ case reads t of
+ [(n,"")] -> (fcatInt, SInt (n::Integer))
+ _ -> case reads t of
+ [(f,"")] -> (fcatFloat, SFloat (f::Double))
+ _ -> (fcatString,SString t)
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- called with all starting categories
+initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
+initialTD pinfo starts toks =
+ do cat <- starts
+ ruleid <- topdownRules pinfo ? cat
+ return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)]
+initialBU pinfo toks =
+ do (tok,rngs) <- aAssocs (inputToken toks)
+ ruleid <- leftcornerTokens pinfo ? tok
+ let FRule _ _ cat _ = allRules pinfo ! ruleid
+ (i,j) <- rngs
+ return (cat,Active [] (makeRange i j) 0 1 (emptyChildren ruleid pinfo))
+ ++
+ do ruleid <- epsilonRules pinfo
+ let FRule _ _ cat _ = allRules pinfo ! ruleid
+ return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
diff --git a/src-3.0/GF/Parsing/FCFG/Incremental.hs b/src-3.0/GF/Parsing/FCFG/Incremental.hs
new file mode 100644
index 000000000..5ee77a061
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/Incremental.hs
@@ -0,0 +1,107 @@
+module GF.Parsing.FCFG.Incremental where
+
+import Data.Array
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.Assoc
+import GF.Data.GeneralDeduction
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities
+import GF.Parsing.FCFG.PInfo
+import GF.Parsing.FCFG.Range
+import GF.GFCC.CId
+import Debug.Trace
+
+initState :: FCFPInfo -> CId -> State
+initState pinfo start =
+ let items = do
+ starts <- Map.lookup start (startupCats pinfo)
+ c <- starts
+ ruleid <- topdownRules pinfo ? c
+ let (FRule fn args cat lins) = allRules pinfo ! ruleid
+ lbl <- indices lins
+ return (Active 0 lbl 0 ruleid args cat)
+
+ forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ args cat _) <- assocs (allRules pinfo)]
+
+ max_fid = case IntMap.maxViewWithKey forest of
+ Just ((fid,_), _) -> fid+1
+ Nothing -> 0
+
+ in process pinfo items (State emptyChart [] emptyChart Map.empty forest max_fid 0)
+
+nextState :: FCFPInfo -> FToken -> State -> State
+nextState pinfo t state =
+ process pinfo (chartLookup (tokens state) t) state{ chart=emptyChart
+ , charts=chart state : charts state
+ , tokens=emptyChart
+ , passive=Map.empty
+ , currOffset=currOffset state+1
+ }
+
+getCompletions :: State -> FToken -> [FToken]
+getCompletions state w =
+ [t | t <- chartKeys (tokens state), take (length w) t == w]
+
+process pinfo [] state = state
+process pinfo (item@(Active j lbl ppos ruleid args fid0):xitems) state
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat _ r d -> let fid = args !! d
+ in case chartInsert (chart state) item (fid,r) of
+ Nothing -> process pinfo xitems state
+ Just actCat -> let items = do exprs <- IntMap.lookup fid (forest state)
+ (Passive ruleid args) <- Set.toList exprs
+ return (Active k r 0 ruleid args fid)
+ `mplus`
+ do id <- Map.lookup (fid,r,k) (passive state)
+ return (Active j lbl (ppos+1) ruleid (updateAt d id args) fid0)
+ in process pinfo (xitems++items) state{chart=actCat}
+ FSymTok tok -> case chartInsert (tokens state) (Active j lbl (ppos+1) ruleid args fid0) tok of
+ Nothing -> process pinfo xitems state
+ Just actTok -> process pinfo xitems state{tokens=actTok}
+ | otherwise = case Map.lookup (fid0, lbl, j) (passive state) of
+ Nothing -> let fid = nextId state
+ items = do Active j' lbl ppos ruleid args fidc <- chartLookup ((chart state:charts state) !! (k-j)) (fid0,lbl)
+ let FSymCat _ _ d = rhs ruleid lbl ! ppos
+ return (Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc)
+ in process pinfo (xitems++items) state{passive=Map.insert (fid0, lbl, j) fid (passive state)
+ ,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
+ ,nextId =nextId state+1
+ }
+ Just id -> process pinfo xitems state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
+ where
+ lin = rhs ruleid lbl
+ k = currOffset state
+
+ rhs ruleid lbl = lins ! lbl
+ where
+ (FRule _ _ cat lins) = allRules pinfo ! ruleid
+
+ updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+
+data Active
+ = Active Int FIndex FPointPos RuleId [FCat] FCat
+ deriving (Eq,Show,Ord)
+data Passive
+ = Passive RuleId [FCat]
+ deriving (Eq,Ord,Show)
+
+
+data State
+ = State
+ { chart :: Chart
+ , charts :: [Chart]
+ , tokens :: ParseChart Active FToken
+ , passive :: Map.Map (FCat, FIndex, Int) FCat
+ , forest :: IntMap.IntMap (Set.Set Passive)
+ , nextId :: FCat
+ , currOffset :: Int
+ }
+ deriving Show
+
+type Chart = ParseChart Active (FCat, FIndex)
diff --git a/src-3.0/GF/Parsing/FCFG/PInfo.hs b/src-3.0/GF/Parsing/FCFG/PInfo.hs
new file mode 100644
index 000000000..8b288f2f1
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/PInfo.hs
@@ -0,0 +1,121 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- FCFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.PInfo where
+
+import GF.Infra.PrintClass
+import GF.Formalism.Utilities
+import GF.Formalism.FCFG
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Parsing.FCFG.Range
+import qualified GF.GFCC.CId as AbsGFCC
+
+import Data.Array
+import Data.Maybe
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Debug.Trace
+
+----------------------------------------------------------------------
+-- type declarations
+
+-- | the list of categories = possible starting categories
+type FCFParser = FCFPInfo
+ -> [FCat]
+ -> Input FToken
+ -> SyntaxChart FName (FCat,RangeRec)
+
+makeFinalEdge cat 0 0 = (cat, [EmptyRange])
+makeFinalEdge cat i j = (cat, [makeRange i j])
+
+------------------------------------------------------------
+-- parser information
+
+type RuleId = Int
+
+data FCFPInfo
+ = FCFPInfo { allRules :: Array RuleId FRule
+ , topdownRules :: Assoc FCat (SList RuleId)
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
+ -- , emptyRules :: [RuleId]
+ , epsilonRules :: [RuleId]
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , leftcornerCats :: Assoc FCat (SList RuleId)
+ , leftcornerTokens :: Assoc FToken (SList RuleId)
+ -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
+ , grammarCats :: SList FCat
+ , grammarToks :: SList FToken
+ , startupCats :: Map.Map AbsGFCC.CId [FCat]
+ }
+
+
+getLeftCornerTok lins
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymTok tok -> Just tok
+ _ -> Nothing
+ | otherwise = Nothing
+ where
+ syms = lins ! 0
+
+getLeftCornerCat lins
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymCat c _ _ -> Just c
+ _ -> Nothing
+ | otherwise = Nothing
+ where
+ syms = lins ! 0
+
+buildFCFPInfo :: FGrammar -> FCFPInfo
+buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $
+ FCFPInfo { allRules = allrules
+ , topdownRules = topdownrules
+ -- , emptyRules = emptyrules
+ , epsilonRules = epsilonrules
+ , leftcornerCats = leftcorncats
+ , leftcornerTokens = leftcorntoks
+ , grammarCats = grammarcats
+ , grammarToks = grammartoks
+ , startupCats = startup
+ }
+
+ where allrules = listArray (0,length grammar-1) grammar
+ topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ cat _) <- assocs allrules]
+ -- emptyrules = [ruleid | (ruleid, FRule _ [] _ _) <- assocs allrules]
+ epsilonrules = [ ruleid | (ruleid, FRule _ _ _ lins) <- assocs allrules,
+ not (inRange (bounds (lins ! 0)) 0) ]
+ leftcorncats = accumAssoc id
+ [ (fromJust (getLeftCornerCat lins), ruleid) |
+ (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
+ leftcorntoks = accumAssoc id
+ [ (fromJust (getLeftCornerTok lins), ruleid) |
+ (ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
+ grammarcats = aElems topdownrules
+ grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
+
+fcfPInfoToFGrammar :: FCFPInfo -> FGrammar
+fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo)
+
+----------------------------------------------------------------------
+-- pretty-printing of statistics
+
+instance Print FCFPInfo where
+ prt pI = "[ allRules=" ++ sl (elems . allRules) ++
+ "; tdRules=" ++ sla topdownRules ++
+ -- "; emptyRules=" ++ sl emptyRules ++
+ "; epsilonRules=" ++ sl epsilonRules ++
+ "; lcCats=" ++ sla leftcornerCats ++
+ "; lcTokens=" ++ sla leftcornerTokens ++
+ "; categories=" ++ sl grammarCats ++
+ " ]"
+
+ where sl f = show $ length $ f pI
+ sla f = let (as, bs) = unzip $ aAssocs $ f pI
+ in show (length as) ++ "/" ++ show (length (concat bs))
+
diff --git a/src-3.0/GF/Parsing/FCFG/Range.hs b/src-3.0/GF/Parsing/FCFG/Range.hs
new file mode 100644
index 000000000..24674f58b
--- /dev/null
+++ b/src-3.0/GF/Parsing/FCFG/Range.hs
@@ -0,0 +1,50 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Definitions of ranges, and operations on ranges
+-----------------------------------------------------------------------------
+
+module GF.Parsing.FCFG.Range
+ ( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
+ ) where
+
+
+-- GF modules
+import GF.Formalism.Utilities
+import GF.Infra.PrintClass
+
+------------------------------------------------------------
+-- ranges as single pairs
+
+type RangeRec = [Range]
+
+data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
+ | EmptyRange
+ deriving (Eq, Ord)
+
+makeRange :: Int -> Int -> Range
+makeRange = Range
+
+concatRange :: Range -> Range -> [Range]
+concatRange EmptyRange rng = return rng
+concatRange rng EmptyRange = return rng
+concatRange (Range i j) (Range j' k) = [Range i k | j==j']
+
+rangeEdge :: a -> Range -> Edge a
+rangeEdge a (Range i j) = Edge i j a
+
+edgeRange :: Edge a -> Range
+edgeRange (Edge i j _) = Range i j
+
+minRange :: Range -> Int
+minRange (Range i j) = i
+
+maxRange :: Range -> Int
+maxRange (Range i j) = j
+
+instance Print Range where
+ prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
+ prt (EmptyRange) = "(?)"