diff options
| author | peb <unknown> | 2005-04-20 11:49:44 +0000 |
|---|---|---|
| committer | peb <unknown> | 2005-04-20 11:49:44 +0000 |
| commit | 78108f7817fbf3269bb75f278eb9a8540737873e (patch) | |
| tree | 6fc47a586e0d4eb223fc5b1bc3a25b1ef77762c8 /src/GF/Parsing/MCFG | |
| parent | 5621344c73f75f6d5a89ec77c6a4b432f391b16d (diff) | |
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing/MCFG')
| -rw-r--r-- | src/GF/Parsing/MCFG/Active.hs | 314 | ||||
| -rw-r--r-- | src/GF/Parsing/MCFG/Incremental.hs | 123 | ||||
| -rw-r--r-- | src/GF/Parsing/MCFG/Naive.hs | 83 | ||||
| -rw-r--r-- | src/GF/Parsing/MCFG/PInfo.hs | 17 | ||||
| -rw-r--r-- | src/GF/Parsing/MCFG/Range.hs | 24 | ||||
| -rw-r--r-- | src/GF/Parsing/MCFG/ViaCFG.hs | 183 |
6 files changed, 529 insertions, 215 deletions
diff --git a/src/GF/Parsing/MCFG/Active.hs b/src/GF/Parsing/MCFG/Active.hs index 2287b17d4..dd8516379 100644 --- a/src/GF/Parsing/MCFG/Active.hs +++ b/src/GF/Parsing/MCFG/Active.hs @@ -1,174 +1,186 @@ -{-- Module -------------------------------------------------------------------- - Filename: ActiveParse.hs - Author: Håkan Burden - Time-stamp: <2005-04-18, 14:25> - - Description: An agenda-driven implementation of algorithm 4.6, Active parsing - of PMCFG, as described in Ljunglöf (2004) -------------------------------------------------------------------------------} - -module ActiveParse where - - --- GF modules -import Examples -import GeneralChart -import MCFGrammar -import MCFParser -import Nondet -import Parser -import Range - - -{-- Datatypes ----------------------------------------------------------------- - AChart: A RedBlackMap with Items and Keys - Item : - AKey : -------------------------------------------------------------------------------} -data Item n c l = Active (AbstractRule n c) - (RangeRec l) - Range - (Lin c l Range) - (LinRec c l Range) - [RangeRec l] - | Passive (AbstractRule n c) (RangeRec l) [RangeRec l] - deriving (Eq, Ord, Show) -type AChart n c l = ParseChart (Item n c l) (AKey c) +module GF.NewParsing.MCFG.Active (parse) where + +import GF.NewParsing.GeneralChart +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities +import GF.NewParsing.MCFG.Range +import GF.NewParsing.MCFG.PInfo +import GF.System.Tracing +import Monad (guard) + +---------------------------------------------------------------------- +-- * parsing + +parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t +parse strategy mcfg starts toks + = [ Abs (cat, found) (zip rhs rrecs) fun | + Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ] + where chart = process strategy mcfg starts toks + +process :: (Ord n, Ord c, Ord l, Ord t) => + String -> MCFGrammar c n l t -> [c] -> Input t -> AChart c n l +process strategy mcfg starts toks + = trace2 "MCFG.Active - strategy" (if isBU strategy then "BU" + else if isTD strategy then "TD" else "None") $ + tracePrt "MCFG.Active - chart size" prtSizes $ + buildChart keyof (complete : combine : convert : rules) axioms + where rules | isNil strategy = [scan] + | isBU strategy = [predictKilbury mcfg toks] + | isTD strategy = [predictEarley mcfg toks] + axioms | isNil strategy = predict mcfg toks + | isBU strategy = terminal mcfg toks + | isTD strategy = initial mcfg starts toks + +isNil s = s=="n" +isBU s = s=="b" +isTD s = s=="t" + +---------------------------------------------------------------------- +-- * type definitions + +type AChart c n l = ParseChart (Item c n l) (AKey c) + +data Item c n l = Active (Abstract c n) + (RangeRec l) + Range + (Lin c l Range) + (LinRec c l Range) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] + | Passive c (RangeRec l) + deriving (Eq, Ord, Show) data AKey c = Act c | Pass c | Useless + | Fin deriving (Eq, Ord, Show) -keyof :: Item n c l -> AKey c +keyof :: Item c n l -> AKey c keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next -keyof (Passive (_, cat, _) _ _) = Pass cat -keyof _ = Useless - - -{-- Parsing ------------------------------------------------------------------- - recognize: - parse : Builds a chart from the initial agenda, given by prediction, and - the inference rules - keyof : Given an Item returns an appropriate Key for the Chart -------------------------------------------------------------------------------} - -recognize strategy mcfg toks = chartMember - (parse strategy mcfg toks) item (keyof item) - where n = length toks - n2 = n `div` 2 - item = (Passive ("f", S, [A]) - [("s",Range (0,n))] - [[("p",Range (0,n2)),("q",Range (n2,n))]]) - - -parse :: (Ord n, Ord c, Ord l, Eq t) => Strategy -> Grammar n c l t -> [t] - -> ParseChart (Item n c l) (AKey c) -parse (False,False) mcfg toks = buildChart keyof - [complete, scan, combine, convert] - (predict mcfg toks) -parse (True, False) mcfg toks = buildChart keyof - [predictKilbury mcfg toks, complete, combine, convert] - (terminal mcfg toks) -parse (False, True) mcfg toks = buildChart keyof - [predictEarley mcfg toks, complete, scan, combine, convert] - (initial (take 1 mcfg) toks) - -predictKilbury mcfg toks _ (Passive (_, cat, _) found _) = - [ Active (f, a, rhs) [] rng lin' lins' daughters | - Rule a rhs ((Lin l ((Cat (cat', r, i)):syms)):lins) f <- mcfg, - cat == cat', - lin' : lins' <- solutions $ rangeRestRec toks (Lin l syms : lins), - -- lins' <- solutions $ rangeRestRec toks lins, - rng <- solutions $ projection r found, - let daughters = (replaceRec (replicate (length rhs) []) i found) ] -predictKilbury _ _ _ _ = [] - -predictEarley mcfg toks _ item@(Active _ _ _ (Lin _ ((Cat (cat, _, _)):_)) _ _) = - concat [ predEar toks item rule | - rule@(Rule cat' _ _ _) <- mcfg, cat == cat' ] -predictEarley _ _ _ _ = [] - -predEar toks _ (Rule cat [] lins f) = - [ Passive (f, cat, []) (makeRangeRec lins') [] | - lins' <- solutions $ rangeRestRec toks lins ] -predEar toks (Active _ _ (Range (_,j)) _ _ _) (Rule cat rhs lins f) = - [ Active (f, cat, rhs) [] (Range (j, j)) lin' lins' (replicate (length rhs) []) | - (lin':lins') <- solutions $ rangeRestRec toks lins ] -predEar toks (Active _ _ EmptyRange _ _ _) (Rule cat rhs lins f) = - [ Active (f, cat, rhs) [] EmptyRange lin' lins' (replicate (length rhs) []) | - (lin':lins') <- solutions $ rangeRestRec toks lins ] - - -{--Inference rules ------------------------------------------------------------ - predict : Creates an Active Item of every Rule in the Grammar to give the - initial Agenda - complete: - scan : - combine : Creates an Active Item every time it is possible to combine - an Active Item from the agenda with a Passive Item from the Chart - convert : Active Items with nothing to find are converted to Passive Items -------------------------------------------------------------------------------} - -predict :: Eq t => Grammar n c l t -> [t] -> [Item n c l] -predict grammar toks = [ Active (f, cat, rhs) [] EmptyRange lin' lins' - (replicate (length rhs) []) | - Rule cat rhs lins f <- grammar, - (lin':lins') <- solutions $ rangeRestRec toks lins ] - - -complete :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l - -> [Item n c l] -complete _ (Active rule found (Range (i, j)) (Lin l []) (lin:lins) recs) = - [ Active rule (found ++ [(l, Range (i,j))]) EmptyRange lin lins recs ] +keyof (Final _ _ _) = Fin +keyof (Passive cat _) = Pass cat +keyof _ = Useless + +-- to be used in prediction +emptyChildren :: Abstract c n -> [RangeRec l] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +-- for tracing purposes +prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++ + ", passive=" ++ show (sum [length (chartLookup chart k) | + k@(Pass _) <- chartKeys chart ]) ++ + ", active=" ++ show (sum [length (chartLookup chart k) | + k@(Act _) <- chartKeys chart ]) ++ + ", useless=" ++ show (length (chartLookup chart Useless)) + + +---------------------------------------------------------------------- +-- * inference rules + +-- completion +complete :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +complete _ (Active rule found rng (Lin l []) (lin:lins) recs) = + return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs complete _ _ = [] - -scan :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l - -> [Item n c l] -scan _ (Active rule found rng (Lin l ((Tok rng'):syms)) lins recs) = - [ Active rule found rng'' (Lin l syms) lins recs | - rng'' <- solutions $ concRanges rng rng' ] +-- scanning +scan :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) = + do rng'' <- concatRange rng rng' + return $ Active rule found rng'' (Lin l syms) lins recs scan _ _ = [] - -combine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l - -> [Item n c l] -combine chart (Active rule found rng (Lin l ((Cat (c, r, d)):syms)) lins recs) = - [ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found') | - Passive _ found' _ <- chartLookup chart (Pass c), - rng' <- solutions $ projection r found', - rng'' <- solutions $ concRanges rng rng', - subsumes (recs !! d) found' ] -combine chart (Passive (_, c, _) found _) = - [ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found) | - Active rule found' rng' (Lin l ((Cat (c, r, d)):syms)) lins recs' - <- chartLookup chart (Act c), - rng'' <- solutions $ projection r found, - rng <- solutions $ concRanges rng' rng'', - subsumes (recs' !! d) found ] +-- | Creates an Active Item every time it is possible to combine +-- an Active Item from the agenda with a Passive Item from the Chart +combine :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +combine chart (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) = + do Passive _c found' <- chartLookup chart (Pass c) + rng' <- projection r found' + rng'' <- concatRange rng rng' + guard $ subsumes (recs !! d) found' + return $ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found') +combine chart (Passive c found) = + do Active rule found' rng' (Lin l ((Cat (_c, r, d)):syms)) lins recs' + <- chartLookup chart (Act c) + rng'' <- projection r found + rng <- concatRange rng' rng'' + guard $ subsumes (recs' !! d) found + return $ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found) combine _ _ = [] -convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l - -> [Item n c l] -convert _ (Active rule found rng (Lin l []) [] recs) = - [ Passive rule (found ++ [(l, rng)]) recs ] +-- | Active Items with nothing to find are converted to Final items, +-- which in turn are converted to Passive Items +convert :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l] +convert _ (Active rule found rng (Lin lbl []) [] recs) = + return $ Final rule (found ++ [(lbl,rng)]) recs +convert _ (Final (Abs cat _ _) found _) = + return $ Passive cat found convert _ _ = [] +---------------------------------------------------------------------- +-- Naive -- + +-- | Creates an Active Item of every Rule in the Grammar to give the initial Agenda +predict :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l] +predict grammar toks = + do Rule abs (Cnc _ _ lins) <- grammar + (lin':lins') <- rangeRestRec toks lins + return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs) +---------------------------------------------------------------------- -- Earley -- --- anropas med alla startregler -initial :: Eq t => [Rule n c l t] -> [t] -> [Item n c l] -initial starts toks = - [ Active (f, s, rhs) [] (Range (0, 0)) lin' lins' (replicate (length rhs) []) | - Rule s rhs lins f <- starts, - (lin':lins') <- solutions $ rangeRestRec toks lins ] +-- anropas med alla startkategorier +initial :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> [c] -> Input t -> [Item c n l] +initial mcfg starts toks = + do Rule abs@(Abs cat _ _) (Cnc _ _ lins) <- mcfg + guard $ cat `elem` starts + lin' : lins' <- rangeRestRec toks lins + return $ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) + +-- earley prediction +predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t + -> AChart c n l -> Item c n l -> [Item c n l] +predictEarley mcfg toks _ (Active _ _ rng (Lin _ (Cat (cat,_,_):_)) _ _) = + do rule@(Rule (Abs cat' _ _) _) <- mcfg + guard $ cat == cat' + predEar toks rng rule +predictEarley _ _ _ _ = [] + +predEar :: (Ord c, Ord n, Ord l, Ord t) => + Input t -> Range -> MCFRule c n l t -> [Item c n l] +predEar toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) = + do lins' <- rangeRestRec toks lins + return $ Final abs (makeRangeRec lins') [] +predEar toks rng (Rule abs (Cnc _ _ lins)) = + do lin' : lins' <- rangeRestRec toks lins + return $ Active abs [] (makeMaxRange rng) lin' lins' (emptyChildren abs) +makeMaxRange (Range (_, j)) = Range (j, j) +makeMaxRange EmptyRange = EmptyRange + +---------------------------------------------------------------------- -- Kilbury -- + +terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l] terminal mcfg toks = - [ Passive (f, cat, []) (makeRangeRec lins') [] | - Rule cat [] lins f <- mcfg, - lins' <- solutions $ rangeRestRec toks lins ] + do Rule abs@(Abs _ [] _) (Cnc _ _ lins) <- mcfg + lins' <- rangeRestRec toks lins + return $ Final abs (makeRangeRec lins') [] + +-- kilbury prediction +predictKilbury :: (Ord c, Ord n, Ord l, Ord t) => + MCFGrammar c n l t -> Input t + -> AChart c n l -> Item c n l -> [Item c n l] +predictKilbury mcfg toks _ (Passive cat found) = + do Rule abs@(Abs _ rhs _) (Cnc _ _ (Lin l (Cat (cat', r, i):syms) : lins)) <- mcfg + guard $ cat == cat' + lin' : lins' <- rangeRestRec toks (Lin l syms : lins) + rng <- projection r found + let children = replaceRec (emptyChildren abs) i found + return $ Active abs [] rng lin' lins' children +predictKilbury _ _ _ _ = [] diff --git a/src/GF/Parsing/MCFG/Incremental.hs b/src/GF/Parsing/MCFG/Incremental.hs new file mode 100644 index 000000000..897d365c9 --- /dev/null +++ b/src/GF/Parsing/MCFG/Incremental.hs @@ -0,0 +1,123 @@ +{-- Module -------------------------------------------------------------------- + Filename: IncrementalParse.hs + Author: Håkan Burden + Time-stamp: <2005-04-18, 15:07> + + Description: An agenda-driven implementation of the incremental algorithm 4.6 + that handles erasing and suppressing MCFG. + As described in Ljunglöf (2004) +------------------------------------------------------------------------------} + +module IncrementalParse where + + +-- Haskell +import List + +-- GF modules +import Examples +import GeneralChart +import MCFGrammar +import MCFParser +import Parser +import Range +import Nondet + + +{-- Datatypes ----------------------------------------------------------------- + IChart: A RedBlackMap with Items and Keys + Item : One kind of Item since the Passive Items not necessarily need to be + saturated iow, they can still have rows to recognize. + IKey : +------------------------------------------------------------------------------} + +type IChart n c l = ParseChart (Item n c l) (IKey c l) + +data Item n c l = Active (AbstractRule n c) + (RangeRec l) + Range + (Lin c l Range) + (LinRec c l Range) + [RangeRec l] +-- | Passive (AbstractRule n c) +-- (RangeRec l) +-- [RangeRec l] + deriving (Eq, Ord, Show) + +data IKey c l = Act c l Int +-- | ActE l + | Pass c l Int +-- | Pred l + | Useless + deriving (Eq, Ord, Show) + +keyof :: Item n c l -> IKey c l +keyof (Active _ _ (Range (_,j)) (Lin _ ((Cat (next,lbl,_)):_)) _ _) + = Act next lbl j +keyof (Active (_, cat, _) found (Range (i,_)) (Lin lbl []) _ _) + = Pass cat lbl i +keyof _ + = Useless + + +{-- Parsing ------------------------------------------------------------------- + recognize: + parse : Builds a chart from the initial agenda, given by prediction, and + the inference rules + keyof : Given an Item returns an appropriate Key for the Chart +------------------------------------------------------------------------------} + +recognize mcfg toks = chartMember (parse mcfg toks) item (keyof item) + where n = length toks + n2 = n `div` 2 + item = Active ("f",S,[A]) + [] (Range (0, n)) (Lin "s" []) [] + [[("p", Range (0, n2)), ("q", Range (n2, n))]] + + +parse :: (Ord n, Ord c, Ord l, Eq t) => Grammar n c l t -> [t] -> IChart n c l +parse mcfg toks = buildChart keyof [complete ntoks, scan, combine] (predict mcfg toks ntoks) + where ntoks = length toks + +complete :: (Ord n, Ord c, Ord l) => Int -> IChart n c l + -> Item n c l -> [Item n c l] +complete ntoks _ (Active rule found rng@(Range (_,j)) (Lin l []) lins recs) = + [ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs | + (lin, lins') <- select lins, + k <- [j .. ntoks] ] +complete _ _ _ = [] + + +predict :: (Eq n, Eq c, Eq l, Eq t) => Grammar n c l t -> [t] -> Int -> [Item n c l] +predict mcfg toks n = [ Active (f, c, rhs) [] (Range (k,k)) lin' lins'' daughters | + Rule c rhs lins f <- mcfg, + let daughters = replicate (length rhs) [], + lins' <- solutions $ rangeRestRec toks lins, + (lin', lins'') <- select lins', + k <- [0..n] ] + + +scan :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l] +scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) = + [ Active rule found rng'' (Lin l syms) lins recs | + rng'' <- solutions $ concRanges rng rng' ] +scan _ _ = [] + + +combine :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l] +combine chart (Active rule found rng@(Range (_,j)) (Lin l ((Cat (c,r,d)):syms)) lins recs) = + [ Active rule found rng'' (Lin l syms) lins (replaceRec recs d (found' ++ [(l',rng')])) | + Active _ found' rng' (Lin l' []) _ _ <- chartLookup chart (Pass c r j), + subsumes (recs !! d) (found' ++ [(l',rng')]), + rng'' <- solutions $ concRanges rng rng' ] +combine chart (Active (_,c,_) found rng'@(Range (i,_)) (Lin l []) _ _) = + [ Active rule found' rng'' (Lin l' syms) lins (replaceRec recs d (found ++ [(l,rng')])) | + Active rule found' rng (Lin l' ((Cat (c,r,d)):syms)) lins recs + <- chartLookup chart (Act c l i), + subsumes (recs !! d) (found ++ [(l,rng')]), + rng'' <- solutions $ concRanges rng rng' ] +combine _ _ = [] + + + + diff --git a/src/GF/Parsing/MCFG/Naive.hs b/src/GF/Parsing/MCFG/Naive.hs index 1717a16d9..1d315506d 100644 --- a/src/GF/Parsing/MCFG/Naive.hs +++ b/src/GF/Parsing/MCFG/Naive.hs @@ -1,5 +1,5 @@ -module GF.NewParsing.MCFG.Naive where +module GF.NewParsing.MCFG.Naive (parse) where -- GF modules @@ -8,21 +8,34 @@ import GF.Formalism.GCFG import GF.Formalism.MCFG import GF.Formalism.Utilities import GF.NewParsing.MCFG.Range +import GF.NewParsing.MCFG.PInfo import GF.Data.SortedList import GF.Data.Assoc +import GF.System.Tracing -{-- Datatypes and types ------------------------------------------------------- - NChart : A RedBlackMap with Items and Keys - Item : The parse Items are either Active or Passive - NKey : One for Active Items, one for Passive and one for Active Items - to convert to Passive - DottedRule: (function-name, LHS, [Found in RHS], [To find in RHS]) -------------------------------------------------------------------------------} +---------------------------------------------------------------------- +-- * parsing + +-- | Builds a chart from the initial agenda, given by prediction, and +-- the inference rules +parse :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t +parse mcfg starts toks + = [ Abs (cat, makeRangeRec lins) (zip rhs rrecs) fun | + Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ] + where chart = process mcfg toks + +process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> Input t -> NChart c n l +process mcfg toks + = tracePrt "MCFG.Naive - chart size" prtSizes $ + buildChart keyof [convert, combine] (predict toks mcfg) + +---------------------------------------------------------------------- +-- * type definitions type NChart c n l = ParseChart (Item c n l) (NKey c) data Item c n l = Active (DottedRule c n) (LinRec c l Range) [RangeRec l] - | Passive (Abstract c n) (RangeRec l) + | Passive c (RangeRec l) deriving (Eq, Ord, Show) type DottedRule c n = (Abstract c n, [c]) @@ -32,63 +45,43 @@ data NKey c = Act c | Final deriving (Eq, Ord, Show) - -{-- Parsing ------------------------------------------------------------------- - recognize: - parse : Builds a chart from the initial agenda, given by prediction, and - the inference rules - keyof : Given an Item returns an appropriate Key for the Chart -------------------------------------------------------------------------------} - - -parse :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t] - -> SyntaxChart n (c, RangeRec l) -parse mcfg toks = chart3 - where chart3 = assocMap (const groupPairs) chart2 - chart2 = accumAssoc id $ nubsort chart1 - chart1 = [ ((cat, rrec), (fun, zip rhs rrecs)) | - Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart0 Final, - let rrec = makeRangeRec lins ] - chart0 = process mcfg toks - -process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t] -> NChart c n l -process mcfg toks = buildChart keyof [convert, combine] (predict toks mcfg) - - keyof :: Item c n l -> NKey c keyof (Active (Abs _ (next:_) _, _) _ _) = Act next -keyof (Passive (Abs cat _ _) _) = Pass cat +keyof (Passive cat _) = Pass cat keyof _ = Final +-- for tracing purposes +prtSizes chart = "final=" ++ show (length (chartLookup chart Final)) ++ + ", passive=" ++ show (sum [length (chartLookup chart k) | + k@(Pass _) <- chartKeys chart ]) ++ + ", active=" ++ show (sum [length (chartLookup chart k) | + k@(Act _) <- chartKeys chart ]) -{--Inference rules ------------------------------------------------------------ - predict: Creates an Active Item of every Rule in the Grammar to give the - initial Agenda - combine: Creates an Active Item every time it is possible to combine - an Active Item from the agenda with a Passive Item from the Chart - convert: Active Items with nothing to find are converted to Passive Items -------------------------------------------------------------------------------} +---------------------------------------------------------------------- +-- * inference rules -predict :: (Eq t, Eq c) => [t] -> MCFGrammar c n l t -> [Item c n l] +-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda +predict :: Ord t => Input t -> MCFGrammar c n l t -> [Item c n l] predict toks mcfg = [ Active (abs, []) lins' [] | Rule abs (Cnc _ _ lins) <- mcfg, lins' <- rangeRestRec toks lins ] - +-- | Creates an Active Item every time it is possible to combine +-- an Active Item from the agenda with a Passive Item from the Chart combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l] combine chart (Active (Abs nt (c:find) f, found) lins rrecs) = do Passive _ rrec <- chartLookup chart (Pass c) lins' <- concLinRec $ substArgRec (length found) rrec lins return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec]) -combine chart (Passive (Abs c _ _) rrec) = +combine chart (Passive c rrec) = do Active (Abs nt (c:find) f, found) lins rrecs <- chartLookup chart (Act c) lins' <- concLinRec $ substArgRec (length found) rrec lins return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec]) combine _ _ = [] - +-- | Active Items with nothing to find are converted to Passive Items convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l] -convert _ (Active (Abs nt [] f, rhs) lins _) = [Passive (Abs nt rhs f) rrec] +convert _ (Active (Abs cat [] _, _) lins _) = [Passive cat rrec] where rrec = makeRangeRec lins convert _ _ = [] diff --git a/src/GF/Parsing/MCFG/PInfo.hs b/src/GF/Parsing/MCFG/PInfo.hs index 68fbcc031..a51ec7d20 100644 --- a/src/GF/Parsing/MCFG/PInfo.hs +++ b/src/GF/Parsing/MCFG/PInfo.hs @@ -4,15 +4,14 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/19 10:46:08 $ +-- > CVS $Date: 2005/04/20 12:49:45 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- MCFG parsing, parser information ----------------------------------------------------------------------------- -module GF.NewParsing.MCFG.PInfo - (MCFParser, MCFPInfo(..), buildMCFPInfo) where +module GF.NewParsing.MCFG.PInfo where import GF.System.Tracing import GF.Infra.Print @@ -22,6 +21,7 @@ import GF.Formalism.GCFG import GF.Formalism.MCFG import GF.Data.SortedList import GF.Data.Assoc +import GF.NewParsing.MCFG.Range ---------------------------------------------------------------------- -- type declarations @@ -32,10 +32,13 @@ type MCFParser c n l t = MCFPInfo c n l t -> Input t -> MCFChart c n l -type MCFChart c n l = [(n, (c, RangeRec l), [(c, RangeRec l)])] +type MCFChart c n l = [Abstract (c, RangeRec l) n] type MCFPInfo c n l t = MCFGrammar c n l t -buildCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t -buildCFPInfo = id +buildMCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t +buildMCFPInfo = id + +makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l) +makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)]) diff --git a/src/GF/Parsing/MCFG/Range.hs b/src/GF/Parsing/MCFG/Range.hs index 6e849b46c..e60b9916e 100644 --- a/src/GF/Parsing/MCFG/Range.hs +++ b/src/GF/Parsing/MCFG/Range.hs @@ -11,7 +11,7 @@ import GF.Formalism.GCFG import GF.Formalism.MCFG import GF.Formalism.Utilities import GF.Infra.Print - +import GF.Data.Assoc ((?)) ------------------------------------------------------------ -- ranges as single pairs @@ -95,29 +95,29 @@ makeRangeRec lins = map convLin lins --- Record projection -------------------------------------------------------- -projection :: Eq l => l -> RangeRec l -> [Range] +projection :: Ord l => l -> RangeRec l -> [Range] projection l rec = maybe (fail "projection") return $ lookup l rec --- Range restriction -------------------------------------------------------- -rangeRestTok :: Eq t => [t] -> t -> [Range] -rangeRestTok toks tok = do i <- elemIndices tok toks - return (makeRange (i, i+1)) +rangeRestTok :: Ord t => Input t -> t -> [Range] +rangeRestTok toks tok = do rng <- inputToken toks ? tok + return (makeRange rng) -rangeRestSym :: Eq t => [t] -> Symbol a t -> [Symbol a Range] +rangeRestSym :: Ord t => Input t -> Symbol a t -> [Symbol a Range] rangeRestSym toks (Tok tok) = do rng <- rangeRestTok toks tok return (Tok rng) rangeRestSym _ (Cat c) = return (Cat c) -rangeRestLin :: Eq t => [t] -> Lin c l t -> [Lin c l Range] +rangeRestLin :: Ord t => Input t -> Lin c l t -> [Lin c l Range] rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms return (Lin lbl syms') -rangeRestRec :: Eq t => [t] -> LinRec c l t -> [LinRec c l Range] +rangeRestRec :: Ord t => Input t -> LinRec c l t -> [LinRec c l Range] rangeRestRec toks = mapM (rangeRestLin toks) @@ -131,7 +131,7 @@ replaceRec recs i rec = (fst tup) ++ [rec] ++ (tail $ snd tup) --- Argument substitution ---------------------------------------------------- -substArgSymbol :: Eq l => Int -> RangeRec l -> Symbol (c, l, Int) Range +substArgSymbol :: Ord l => Int -> RangeRec l -> Symbol (c, l, Int) Range -> Symbol (c, l, Int) Range substArgSymbol i rec (Tok rng) = (Tok rng) substArgSymbol i rec (Cat (c, l, j)) @@ -139,13 +139,13 @@ substArgSymbol i rec (Cat (c, l, j)) | otherwise = (Cat (c, l, j)) -substArgLin :: Eq l => Int -> RangeRec l -> Lin c l Range +substArgLin :: Ord l => Int -> RangeRec l -> Lin c l Range -> Lin c l Range substArgLin i rec (Lin lbl syms) = (Lin lbl (map (substArgSymbol i rec) syms)) -substArgRec :: Eq l => Int -> RangeRec l -> LinRec c l Range +substArgRec :: Ord l => Int -> RangeRec l -> LinRec c l Range -> LinRec c l Range substArgRec i rec lins = map (substArgLin i rec) lins @@ -153,7 +153,7 @@ substArgRec i rec lins = map (substArgLin i rec) lins --- Subsumation ------------------------------------------------------------- -- "rec' subsumes rec?" -subsumes :: Eq l => RangeRec l -> RangeRec l -> Bool +subsumes :: Ord l => RangeRec l -> RangeRec l -> Bool subsumes rec rec' = and [elem r rec' | r <- rec] diff --git a/src/GF/Parsing/MCFG/ViaCFG.hs b/src/GF/Parsing/MCFG/ViaCFG.hs new file mode 100644 index 000000000..f1b76bb75 --- /dev/null +++ b/src/GF/Parsing/MCFG/ViaCFG.hs @@ -0,0 +1,183 @@ +{-- Module -------------------------------------------------------------------- + Filename: ApproxParse.hs + Author: Håkan Burden + Time-stamp: <2005-04-18, 14:56> + + Description: An agenda-driven implementation of the active algorithm 4.3.4, + parsing through context-free approximation as described in + Ljunglöf (2004) +------------------------------------------------------------------------------} + +module ApproxParse where + + +-- Haskell modules +import List +import Monad + +-- GF modules +import ConvertMCFGtoDecoratedCFG +import qualified DecoratedCFParser as CFP +import qualified DecoratedGrammar as CFG +import Examples +import GeneralChart +import qualified MCFGrammar as MCFG +import MCFParser +import Nondet +import Parser +import Range + + +{-- Datatypes ----------------------------------------------------------------- +Chart +Item +Key + + + Item : Four different Items are used. PreMCFG for MCFG Pre Items, Pre are + the Items returned by the pre-Functions and Mark are the + corresponding Items for the mark-Functions. For convenience correctly + marked Mark Items are converted to Passive Items. +I use dottedrule for convenience to keep track of wich daughter's RangeRec to look for. + AChart: A RedBlackMap with Items and Keys + AKey : +------------------------------------------------------------------------------} + +--Ev ta bort några typer av Item och bara nyckla på det som är unikt för den typen... +data Item n c l = PreMCFG (n, c) (RangeRec l) [RangeRec l] + | Pre (n, c) (RangeRec l) [l] [RangeRec l] + | Mark (n, c) (RangeRec l) (RangeRec l) [RangeRec l] + | Passive (n, c) (RangeRec l) (RangeRec l) + deriving (Eq, Ord, Show) + +type AChart n c l = ParseChart (Item n c l) (AKey n c l) + +data AKey n c l = Pr (n, c) l + | Pm (n, c) l + | Mk (RangeRec l) + | Ps (RangeRec l) + | Useless + deriving (Eq, Ord, Show) + + +{-- Parsing ------------------------------------------------------------------- + recognize: + parse : The Agenda consists of the Passive Items from context-free + approximation (as PreMCFG Items) and the Pre Items inferred by + pre-prediction. + keyof : Given an Item returns an appropriate Key for the Chart +------------------------------------------------------------------------------} + +recognize strategy mcfg toks = chartMember (parse strategy mcfg toks) + (Passive ("f", S) + [("s" , MCFG.Range (0, n))] + [("p" , MCFG.Range (0, n2)), ("q", MCFG.Range (n2, n))]) + (Ps [("s" , MCFG.Range (0, n))]) + where n = length toks + n2 = n `div` 2 + + +--parse :: (Ord n, Ord NT, Ord String, Eq t) => CFP.Strategy -> MCFG.Grammar n NT String t -> [t] +-- -> AChart n NT String +parse strategy mcfg toks + = buildChart keyof + [preCombine, markPredict, markCombine, convert] + (makePreItems (CFP.parse strategy (CFG.pInfo (convertGrammar mcfg)) [(S, "s")] toks) ++ + (prePredict mcfg)) + + +keyof :: Item n c l -> AKey n c l +keyof (PreMCFG head [(lbl, rng)] _) = Pm head lbl +keyof (Pre head _ (lbl:lbls) _) = Pr head lbl +keyof (Mark _ _ _ (rec:recs)) = Mk rec +keyof (Passive _ rec _) = Ps rec +keyof _ = Useless + + +{-- Initializing agenda ------------------------------------------------------- + makePreItems: +------------------------------------------------------------------------------} + +makePreItems :: (Eq c, Ord i) => CFG.Grammar n (Edge (c, l)) i t -> [Item n c l] +makePreItems cfchart + = [ PreMCFG (fun, cat) [(lbl, MCFG.makeRange (i, j))] (symToRec beta) | + CFG.Rule (Edge i j (cat,lbl)) beta fun <- cfchart ] + + +prePredict :: (Ord n, Ord c, Ord l) => MCFG.Grammar n c l t -> [Item n c l] +prePredict mcfg = + [ Pre (f, nt) [] (getLables lins) (replicate (nrOfCats (head lins)) []) | + MCFG.Rule nt nts lins f <- mcfg ] + + +{-- Inference rules --------------------------------------------------------- + prePredict : + preCombine : + markPredict: + markCombine: + convert : +----------------------------------------------------------------------------} + +preCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +preCombine chart (Pre head rec (l:ls) recs) = + [ Pre head (rec ++ [(l, r)]) ls recs'' | + PreMCFG head [(l, r)] recs' <- chartLookup chart (Pm head l), + recs'' <- solutions (unifyRangeRecs recs recs') ] +preCombine chart (PreMCFG head [(l, r)] recs) = + [ Pre head (rec ++ [(l, r)]) ls recs'' | + Pre head rec (l:ls) recs' <- chartLookup chart (Pr head l), + recs'' <- solutions (unifyRangeRecs recs recs') ] +preCombine _ _ = [] + + +markPredict :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +markPredict _ (Pre (n, c) rec [] recs) = [Mark (n, c) rec [] recs] +markPredict _ _ = [] + + +markCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +markCombine chart (Mark (f, c) rec mRec (r:recs)) = + [ Mark (f, c) rec (mRec ++ r) recs | + Passive _ r _ <- chartLookup chart (Ps r)] +markCombine chart (Passive _ r _) = + [ Mark (f, c) rec (mRec++r) recs | + Mark (f, c) rec mRec (r:recs) <- chartLookup chart (Mk r) ] +markCombine _ _ = [] + + +convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l) + -> Item n c l -> [Item n c l] +convert _ (Mark (f, c) r rec []) = [Passive (f, c) r rec] +convert _ _ = [] + + +{-- Help functions ---------------------------------------------------------------- + getRHS : + getLables: + symToRec : +----------------------------------------------------------------------------------} + +-- FULKOD ! +nrOfCats :: Eq c => MCFG.Lin c l t -> Int +nrOfCats (MCFG.Lin l syms) = length $ nub [(c, i) | Cat (c, l, i) <- syms] + + +-- +getLables :: LinRec c l t -> [l] +getLables lins = [l | MCFG.Lin l syms <- lins] + + +-- +symToRec :: Ord i => [Symbol (Edge (c, l), i) d] -> [[(l, MCFG.Range)]] +symToRec beta = map makeLblRng $ groupBy (\(_, d) (_, d') -> (d == d')) + $ sortBy sBd [(Edge i j (c, l) , d) | Cat (Edge i j (c, l), d) + <- beta] + where makeLblRng edges = [(l, (MCFG.makeRange (i, j))) | (Edge i j (_, l), _) + <- edges] + sBd (_, d) (_, d') + | d < d' = LT + | d > d' = GT + | otherwise = EQ
\ No newline at end of file |
