diff options
| author | peb <unknown> | 2005-03-21 13:17:44 +0000 |
|---|---|---|
| committer | peb <unknown> | 2005-03-21 13:17:44 +0000 |
| commit | 96a08c9df49345657c769ac481b6df47cbea3a8d (patch) | |
| tree | 2c9d6dc0603fb1fe70934af8df7b6e1336c83fa4 /src/GF/Parsing | |
| parent | aef9430eb0576964a3fb669c741f1c689724bb5a (diff) | |
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing')
| -rw-r--r-- | src/GF/Parsing/CFParserGeneral.hs | 85 | ||||
| -rw-r--r-- | src/GF/Parsing/CFParserIncremental.hs | 143 | ||||
| -rw-r--r-- | src/GF/Parsing/GeneralChart.hs | 85 | ||||
| -rw-r--r-- | src/GF/Parsing/IncrementalChart.hs | 49 | ||||
| -rw-r--r-- | src/GF/Parsing/MCFParserBasic.hs | 156 | ||||
| -rw-r--r-- | src/GF/Parsing/ParseCF.hs | 82 | ||||
| -rw-r--r-- | src/GF/Parsing/ParseCFG.hs | 43 | ||||
| -rw-r--r-- | src/GF/Parsing/ParseGFC.hs | 177 | ||||
| -rw-r--r-- | src/GF/Parsing/ParseMCFG.hs | 37 | ||||
| -rw-r--r-- | src/GF/Parsing/Parser.hs | 187 |
10 files changed, 1044 insertions, 0 deletions
diff --git a/src/GF/Parsing/CFParserGeneral.hs b/src/GF/Parsing/CFParserGeneral.hs new file mode 100644 index 000000000..cc24820b7 --- /dev/null +++ b/src/GF/Parsing/CFParserGeneral.hs @@ -0,0 +1,85 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFParserGeneral +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:41 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Several implementations of CFG chart parsing +----------------------------------------------------------------------------- + +module GF.Parsing.CFParserGeneral (parse, + Strategy + ) where + +import Tracing + +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import GF.Parsing.GeneralChart +import GF.Data.Assoc + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser n c t +parse strategy grammar start = extract . process strategy grammar start + +type Strategy = (Bool, Bool) -- (isBottomup, isTopdown) + +extract :: [Item n (Symbol c t)] -> [Edge (Rule n c t)] +extract edges = + edges' + where edges' = [ Edge j k (Rule cat (reverse found) name) | + Edge j k (Cat cat, found, [], Just name) <- edges ] + +process :: (Ord n, Ord c, Ord t) => Strategy -> PInfo n c t -> + [c] -> Input t -> [Item n (Symbol c t)] +process (isBottomup, isTopdown) grammar start + = trace ("CFParserGeneral" ++ + (if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = Chart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (Rule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src/GF/Parsing/CFParserIncremental.hs b/src/GF/Parsing/CFParserIncremental.hs new file mode 100644 index 000000000..3b9951721 --- /dev/null +++ b/src/GF/Parsing/CFParserIncremental.hs @@ -0,0 +1,143 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFParserIncremental +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:41 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Incremental chart parsing for context-free grammars +----------------------------------------------------------------------------- + + + +module GF.Parsing.CFParserIncremental (parse, + Strategy) where + +import Tracing +import GF.Printing.PrintParser + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import Operations +-- parser modules: +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import GF.Parsing.IncrementalChart + + +type Strategy = ((Bool, Bool), (Bool, Bool)) -- (predict:(BU, TD), filter:(BU, TD)) + +parse :: (Ord n, Ord c, Ord t, Show t) => + Strategy -> CFParser n c t +parse ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input = + trace2 "CFParserIncremental" + ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + trace2 "input" (show (inputTo input)) $ + finalEdges + where finalEdges = [ Edge j k (Rule cat (reverse found) name) | + (k, state) <- + tracePrt "#passiveChart" + (prt . map (length . (?Passive) . snd)) $ + tracePrt "#activeChart" + (prt . map (length . concatMap snd . aAssocs . snd)) $ + assocs finalChart, + Item j (Rule cat _Nil name) found <- state ? Passive ] + + finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = --tracePrt ("axioms 0") (prtSep "\n") $ + union $ map (tdInfer 0) start + axioms k = --tracePrt ("axioms "++show k) (prtSep "\n") $ + union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (Rule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(Rule _ (Cat next:_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (Cat next:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = --tracePrt ("buInfer "++show(j,k)++" "++prt next) (prtSep "\n") $ + buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ --tracePrt ("buRules "++prt next) (prtSep "\n") $ + bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (Rule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (Rule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | Rule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +-- type declarations, items & keys +data Item n c t = Item Int (Rule n c t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive + deriving (Eq, Ord, Show) + +keyof :: Item n c t -> IKey c t +keyof (Item _ (Rule _ (next:_) _) _) = Active next +keyof (Item _ (Rule _ [] _) _) = Passive + +forward :: Rule n c t -> Rule n c t +forward (Rule cat (_:rest) name) = Rule cat rest name + + +instance (Print n, Print c, Print t) => Print (Item n c t) where + prt (Item k (Rule cat rhs name) syms) + = "<" ++show k++ ": "++prt name++". "++ + prt cat++" -> "++prt rhs++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive) = "!" + + diff --git a/src/GF/Parsing/GeneralChart.hs b/src/GF/Parsing/GeneralChart.hs new file mode 100644 index 000000000..61f933932 --- /dev/null +++ b/src/GF/Parsing/GeneralChart.hs @@ -0,0 +1,85 @@ +---------------------------------------------------------------------- +-- | +-- Module : GeneralChart +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simple implementation of deductive chart parsing +----------------------------------------------------------------------------- + + +module GF.Parsing.GeneralChart (-- * Type definition + Chart, + -- * Main functions + chartLookup, + buildChart, + -- * Probably not needed + emptyChart, + chartMember, + chartInsert, + chartList, + addToChart + ) where + +-- import Trace + +import GF.Data.RedBlackSet + +-- main functions + +chartLookup :: (Ord item, Ord key) => Chart item key -> key -> [item] +buildChart :: (Ord item, Ord key) => (item -> key) -> + [Chart item key -> item -> [item]] -> [item] -> [item] + +buildChart keyof rules axioms = chartList (addItems axioms emptyChart) + where addItems [] = id + addItems (item:items) = addItems items . addItem item + + -- addItem item | trace ("+ "++show item++"\n") False = undefined + addItem item = addToChart item (keyof item) + (\chart -> foldr (consequence item) chart rules) + + consequence item rule chart = addItems (rule chart item) chart + +-- probably not needed + +emptyChart :: (Ord item, Ord key) => Chart item key +chartMember :: (Ord item, Ord key) => Chart item key -> item -> key -> Bool +chartInsert :: (Ord item, Ord key) => Chart item key -> item -> key -> Maybe (Chart item key) +chartList :: (Ord item, Ord key) => Chart item key -> [item] +addToChart :: (Ord item, Ord key) => item -> key -> (Chart item key -> Chart item key) -> Chart item key -> Chart item key + +addToChart item key after chart = maybe chart after (chartInsert chart item key) + + +-------------------------------------------------------------------------------- +-- key charts as red/black trees + +newtype Chart item key = KC (RedBlackMap key item) + deriving Show + +emptyChart = KC rbmEmpty +chartMember (KC tree) item key = rbmElem key item tree +chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree) +chartLookup (KC tree) key = rbmLookup key tree +chartList (KC tree) = concatMap snd (rbmList tree) +--------------------------------------------------------------------------------} + + +{-------------------------------------------------------------------------------- +-- key charts as unsorted association lists -- OBSOLETE! + +newtype Chart item key = SC [(key, item)] + +emptyChart = SC [] +chartMember (SC chart) item key = (key,item) `elem` chart +chartInsert (SC chart) item key = if (key,item) `elem` chart then Nothing else Just (SC ((key,item):chart)) +chartLookup (SC chart) key = [ item | (key',item) <- chart, key == key' ] +chartList (SC chart) = map snd chart +--------------------------------------------------------------------------------} + diff --git a/src/GF/Parsing/IncrementalChart.hs b/src/GF/Parsing/IncrementalChart.hs new file mode 100644 index 000000000..a040ddd60 --- /dev/null +++ b/src/GF/Parsing/IncrementalChart.hs @@ -0,0 +1,49 @@ +---------------------------------------------------------------------- +-- | +-- Module : IncrementalChart +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Implementation of /incremental/ deductive parsing, +-- i.e. parsing one word at the time. +----------------------------------------------------------------------------- + + +module GF.Parsing.IncrementalChart (-- * Type definitions + IncrementalChart, + -- * Functions + buildChart, + chartList + ) where + +import Array +import GF.Data.SortedList +import GF.Data.Assoc + +buildChart :: (Ord item, Ord key) => (item -> key) -> + (Int -> item -> SList item) -> + (Int -> SList item) -> + (Int, Int) -> IncrementalChart item key + +chartList :: (Ord item, Ord key) => (Int -> item -> edge) -> IncrementalChart item key -> [edge] + +type IncrementalChart item key = Array Int (Assoc key (SList item)) + +---------- + +buildChart keyof rules axioms bounds = finalChartArray + where buildState k = limit (rules k) $ axioms k + finalChartList = map buildState [fst bounds .. snd bounds] + finalChartArray = listArray bounds $ map stateAssoc finalChartList + stateAssoc state = accumAssoc id [ (keyof item, item) | item <- state ] + +chartList combine chart = [ combine k item | + (k, state) <- assocs chart, + item <- concatMap snd $ aAssocs state ] + + diff --git a/src/GF/Parsing/MCFParserBasic.hs b/src/GF/Parsing/MCFParserBasic.hs new file mode 100644 index 000000000..03a1d8b9d --- /dev/null +++ b/src/GF/Parsing/MCFParserBasic.hs @@ -0,0 +1,156 @@ +---------------------------------------------------------------------- +-- | +-- Module : MCFParserBasic +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplest possible implementation of MCFG chart parsing +----------------------------------------------------------------------------- + +module GF.Parsing.MCFParserBasic (parse + ) where + +import Tracing + +import Ix +import GF.Parsing.Parser +import GF.Conversion.MCFGrammar +import GF.Parsing.GeneralChart +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Printing.PrintParser + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + MCFParser n c l t +parse grammar start = edges2chart . extract . process grammar + + +extract :: [Item n c l t] -> [(n, MEdge c l, [MEdge c l])] +extract items = tracePrt "#passives" (prt.length) $ + --trace2 "passives" (prtAfter "\n" [ i | i@(PItem _) <- items ]) $ + [ item | PItem item <- items ] + + +process :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + Grammar n c l t -> Input t -> [Item n c l t] +process grammar input = buildChart keyof rules axioms + where axioms = initial + rules = [combine, scan, predict] + + -- axioms + initial = traceItems "axiom" [] $ + [ nextLin name tofind (addNull cat) (map addNull args) | + Rule cat args tofind name <- grammar ] + + addNull a = (a, []) + + -- predict + predict chart i1@(Item name tofind rho (Lin lbl []) (cat, found0) children) + = traceItems "predict" [i1] + [ nextLin name tofind (cat, found) children | + let found = insertRow lbl rho found0 ] + predict _ _ = [] + + -- combine + combine chart active@(Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) + = do passive <- chartLookup chart (Passive cat) + combineItems active passive + combine chart passive@(PItem (_, (cat, _), _)) + = do active <- chartLookup chart (Active cat) + combineItems active passive + combine _ _ = [] + + combineItems i1@(Item name tofind rho0 (Lin lbl (Cat(_,lbl',nr):rest)) found children0) + i2@(PItem (_, found', _)) + = traceItems "combine" [i1,i2] + [ Item name tofind rho (Lin lbl rest) found children | + rho1 <- lookupLbl lbl' found', + let rho = concatRange rho0 rho1, + children <- updateChild nr children0 (snd found') ] + + -- scan + scan chart i1@(Item name tofind rho0 (Lin lbl (Tok tok:rest)) found children) + = traceItems "scan" [i1] + [ Item name tofind rho (Lin lbl rest) found children | + let rho = concatRange rho0 (rangeOfToken tok) ] + scan _ _ = [] + + -- utilities + rangeOfToken tok = makeRange $ inputToken input ? tok + + zeroRange = makeRange $ map (\i -> (i,i)) $ range $ inputBounds input + + nextLin name [] found children = PItem (name, found, children) + nextLin name (lin : tofind) found children + = Item name tofind zeroRange lin found children + +lookupLbl a = map snd . filter (\b -> a == fst b) . snd +updateChild nr children found = updateIndex nr children $ + \child -> if null (snd child) + then [ (fst child, found) ] + else [ child | snd child == found ] + +insertRow lbl rho [] = [(lbl, rho)] +insertRow lbl rho rows'@(row@(lbl', rho') : rows) + = case compare lbl lbl' of + LT -> row : insertRow lbl rho rows + GT -> (lbl, rho) : rows' + EQ -> (lbl, unionRange rho rho') : rows + + +-- internal representation of parse items + +data Item n c l t + = Item n [Lin c l t] -- tofind + Range (Lin c l t) -- current row + (MEdge c l) -- found rows + [MEdge c l] -- found children + | PItem (n, MEdge c l, [MEdge c l]) + deriving (Eq, Ord, Show) + +data IKey c = Passive c | Active c | AnyItem + deriving (Eq, Ord, Show) + +keyof (PItem (_, (cat, _), _)) = Passive cat +keyof (Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) = Active cat +keyof _ = AnyItem + + +-- tracing + +--type TraceItem = Item String String Char String +traceItems :: (Print n, Print l, Print c, Print t) => + String -> [Item n c l t] -> [Item n c l t] -> [Item n c l t] +traceItems rule trigs items + | null items || True = items + | otherwise = trace ("\n" ++ rule ++ ":" ++ + unlines [ "\t" ++ prt i | i <- trigs ] ++ "=>" ++ + unlines [ "\t" ++ prt i | i <- items ]) items + +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Item n c l t) where + prt (Item name tofind rho lin (cat, found) children) + = prt name ++ ". " ++ prt cat ++ prtRhs (map fst children) ++ + " { " ++ prt rho ++ prt lin ++ " ; " ++ + concat [ prt lbl ++ "=" ++ prt ln ++ " " | + Lin lbl ln <- tofind ] ++ "; " ++ + concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl, rho) <- found ] ++ "} " ++ + concat [ "[ " ++ concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl,rho) <- child ] ++ "] " | + child <- map snd children ] + prt (PItem (name, edge, edges)) + = prt name ++ ". " ++ prt edge ++ prtRhs edges + +prtRhs [] = "" +prtRhs rhs = " -> " ++ prtSep " " rhs + diff --git a/src/GF/Parsing/ParseCF.hs b/src/GF/Parsing/ParseCF.hs new file mode 100644 index 000000000..20f45e3f2 --- /dev/null +++ b/src/GF/Parsing/ParseCF.hs @@ -0,0 +1,82 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCF +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Chart parsing of grammars in CF format +----------------------------------------------------------------------------- + +module GF.Parsing.ParseCF (parse, alternatives) where + +import Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import GF.Data.SortedList (nubsort) +import GF.Data.Assoc +import qualified CF +import qualified CFIdent as CFI +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import qualified GF.Parsing.ParseCFG as P + +type Token = CFI.CFTok +type Name = CFI.CFFun +type Category = CFI.CFCat + +alternatives :: [(String, [String])] +alternatives = [ ("gb", ["G","GB","_gen","_genBU"]), + ("gt", ["GT","_genTD"]), + ("ibn", ["","I","B","IB","IBN","_inc","BU","_incBU"]), + ("ibb", ["BB","IBB","BU_BUF","_incBU_BUF"]), + ("ibt", ["BT","IBT","BU_TDF","_incBU_TDF"]), + ("iba", ["BA","IBA","BU_BTF","BU_TBF","_incBU_BTF","_incBU_TBF"]), + ("itn", ["T","IT","ITN","TD","_incTD"]), + ("itb", ["TB","ITB","TD_BUF","_incTD_BUF"]) + ] + +parse :: String -> CF.CF -> Category -> CF.CFParser +parse = buildParser . P.parse + +buildParser :: CFParser Name Category Token -> CF.CF -> Category -> CF.CFParser +buildParser parser cf start tokens = trace "ParseCF" $ + (parseResults, parseInformation) + where parseInformation = prtSep "\n" trees + parseResults = {-take maxTake-} [ (tree2cfTree t, []) | t <- trees ] + theInput = input tokens + edges = tracePrt "#edges" (prt.length) $ + parser pInf [start] theInput + chart = tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + edges2chart theInput $ map (fmap addCategory) edges + forests = tracePrt "#forests" (prt.length) $ + chart2forests chart (const False) $ + uncurry Edge (inputBounds theInput) start + trees = tracePrt "#trees" (prt.length) $ + concatMap forest2trees forests + pInf = pInfo $ cf2grammar cf (nubsort tokens) + + +addCategory (Rule cat rhs name) = Rule cat rhs (name, cat) + +tree2cfTree (TNode (name, cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees)) + +cf2grammar :: CF.CF -> [Token] -> Grammar Name Category Token +cf2grammar cf tokens = [ Rule cat rhs name | + (name, (cat, rhs0)) <- cfRules, + rhs <- mapM item2symbol rhs0 ] + where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++ + CF.rulesOfCF cf + item2symbol (CF.CFNonterm cat) = [Cat cat] + item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens + +-- maxTake :: Int +-- maxTake = 500 +-- maxTake = maxBound + + diff --git a/src/GF/Parsing/ParseCFG.hs b/src/GF/Parsing/ParseCFG.hs new file mode 100644 index 000000000..1005d5656 --- /dev/null +++ b/src/GF/Parsing/ParseCFG.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main parsing module for context-free grammars +----------------------------------------------------------------------------- + + +module GF.Parsing.ParseCFG (parse) where + +import Char (toLower) +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import qualified GF.Parsing.CFParserGeneral as PGen +import qualified GF.Parsing.CFParserIncremental as PInc + + +parse :: (Ord n, Ord c, Ord t, Show t) => + String -> CFParser n c t +parse = decodeParser . map toLower + +decodeParser ['g',s] = PGen.parse (decodeStrategy s) +decodeParser ['i',s,f] = PInc.parse (decodeStrategy s, decodeFilter f) +decodeParser _ = decodeParser "ibn" + +decodeStrategy 'b' = (True, False) +decodeStrategy 't' = (False, True) + +decodeFilter 'a' = (True, True) +decodeFilter 'b' = (True, False) +decodeFilter 't' = (False, True) +decodeFilter 'n' = (False, False) + + + + diff --git a/src/GF/Parsing/ParseGFC.hs b/src/GF/Parsing/ParseGFC.hs new file mode 100644 index 000000000..0d0d5c662 --- /dev/null +++ b/src/GF/Parsing/ParseGFC.hs @@ -0,0 +1,177 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseGFC +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.Parsing.ParseGFC (newParser) where + +import Tracing +import GF.Printing.PrintParser +import qualified PrGrammar + +-- Haskell modules +import Monad +-- import Ratio ((%)) +-- GF modules +import qualified Grammar as GF +import Values +import qualified Macros +import qualified Modules as Mods +import qualified AbsGFC +import qualified Ident +import qualified ShellState as SS +import Operations +import GF.Data.SortedList +-- Conversion and parser modules +import GF.Data.Assoc +import GF.Parsing.Parser +-- import ConvertGrammar +import GF.Conversion.GrammarTypes +import qualified GF.Conversion.MCFGrammar as M +import qualified GF.Conversion.CFGrammar as C +import qualified GF.Parsing.ParseMCFG as PM +import qualified GF.Parsing.ParseCFG as PC +--import MCFRange + +newParser :: String -> SS.StateGrammar -> GF.Cat -> String -> Err [GF.Term] + +-- parsing via MCFG +newParser (m:strategy) gr (_, startCat) inString + | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms + where terms = map (ptree2term abstract) trees + trees = --tracePrt "trees" (prtBefore "\n") $ + tracePrt "#trees" (prt . length) $ + concatMap forest2trees forests + forests = --tracePrt "forests" (prtBefore "\n") $ + tracePrt "#forests" (prt . length) $ + concatMap (chart2forests chart isMeta) finalEdges + isMeta = null . snd + finalEdges = tracePrt "finalEdges" (prtBefore "\n") $ + filter isFinalEdge $ aElems chart +-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) | +-- let (i, j) = inputBounds inTokens, +-- E.Rule cat _ [E.Lin lbl _] _ <- pInf, +-- isStartCat cat ] + isFinalEdge (cat, rows) + = isStartCat cat && + inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ] + chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + PM.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $ + mcfPInfo $ SS.statePInfo gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ] + isStartCat (MCFCat cat _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + +-- parsing via CFG +newParser (c:strategy) gr (_, startCat) inString + | c=='c' || c=='C' = trace2 "Parser" "CFG" $ Ok terms + where terms = -- tracePrt "terms" (unlines . map PrGrammar.prt) $ + map (ptree2term abstract) trees + trees = tracePrt "#trees" (prt . length) $ + --tracePrt "trees" (prtSep "\n") $ + concatMap forest2trees forests + forests = tracePrt "$cfForests" (prt) $ -- . length) $ + tracePrt "forests" (unlines . map prt) $ + concatMap convertFromCFForest cfForests + cfForests= tracePrt "cfForests" (unlines . map prt) $ + concatMap (chart2forests chart (const False)) finalEdges + finalEdges = tracePrt "finalChartEdges" prt $ + map (uncurry Edge (inputBounds inTokens)) starters + chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + C.edges2chart inTokens edges + edges = --tracePrt "finalEdges" + --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $ + tracePrt "#edges" (prt . length) $ + PC.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = cfPInfo $ SS.statePInfo gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ map fst $ aAssocs $ C.topdownRules pInf + isStartCat (CFCat (MCFCat cat _) _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + --ifNull (Ident.identC "ABS") last $ + --[i | (i, Mods.ModMod m) <- Mods.modules (SS.grammar gr), Mods.isModAbs m] + +newParser "" gr start inString = newParser "c" gr start inString + +newParser opt gr (_,cat) _ = + Bad ("new-parser '" ++ opt ++ "' not defined yet") + +ptree2term :: Ident.Ident -> ParseTree Name -> GF.Term +ptree2term a (TNode f ts) = Macros.mkApp (Macros.qq (a,f)) (map (ptree2term a) ts) +ptree2term a (TMeta) = GF.Meta (GF.MetaSymb 0) + +---------------------------------------------------------------------- +-- conversion and unification of forests + +convertFromCFForest :: ParseForest CFName -> [ParseForest Name] +convertFromCFForest (FNode (CFName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] + checkProfile forests = unifyManyForests . map (forests !!) + -- foldM unifyForests FMeta . map (forests !!) + +isCoercion Ident.IW = True +isCoercion _ = False + +unifyManyForests :: Eq n => [ParseForest n] -> [ParseForest n] +unifyManyForests [] = [FMeta] +unifyManyForests [f] = [f] +unifyManyForests (f:g:fs) = do h <- unifyForests f g + unifyManyForests (h:fs) + +unifyForests :: Eq n => ParseForest n -> ParseForest n -> [ParseForest n] +unifyForests FMeta forest = [forest] +unifyForests forest FMeta = [forest] +unifyForests (FNode name1 children1) (FNode name2 children2) + = [ FNode name1 children | name1 == name2, not (null children) ] + where children = [ forests | forests1 <- children1, forests2 <- children2, + forests <- zipWithM unifyForests forests1 forests2 ] + + + +{- +---------------------------------------------------------------------- +-- conversion and unification for parse trees instead of forests + +convertFromCFTree :: ParseTree CFName -> [ParseTree Name] +convertFromCFTree (TNode (CFName name profile) children0) + = [ TNode name children | + children1 <- mapM convertFromCFTree children0, + children <- mapM (checkProfile children1) profile ] + where checkProfile trees = unifyManyTrees . map (trees !!) + +unifyManyTrees :: Eq n => [ParseTree n] -> [ParseTree n] +unifyManyTrees [] = [TMeta] +unifyManyTrees [f] = [f] +unifyManyTrees (f:g:fs) = do h <- unifyTrees f g + unifyManyTrees (h:fs) + +unifyTrees TMeta tree = [tree] +unifyTrees tree TMeta = [tree] +unifyTrees (TNode name1 children1) (TNode name2 children2) + = [ TNode name1 children | name1 == name2, + children <- zipWithM unifyTrees children1 children2 ] + +-} + diff --git a/src/GF/Parsing/ParseMCFG.hs b/src/GF/Parsing/ParseMCFG.hs new file mode 100644 index 000000000..4afc44bb7 --- /dev/null +++ b/src/GF/Parsing/ParseMCFG.hs @@ -0,0 +1,37 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main module for MCFG parsing +----------------------------------------------------------------------------- + + +module GF.Parsing.ParseMCFG (parse) where + +import Char (toLower) +import GF.Parsing.Parser +import GF.Conversion.MCFGrammar +import qualified GF.Parsing.MCFParserBasic as PBas +import GF.Printing.PrintParser +---- import qualified MCFParserBasic2 as PBas2 -- file not found AR + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + String -> MCFParser n c l t +parse str = decodeParser (map toLower str) + +decodeParser "b" = PBas.parse +---- decodeParser "c" = PBas2.parse +decodeParser _ = decodeParser "c" + + + + diff --git a/src/GF/Parsing/Parser.hs b/src/GF/Parsing/Parser.hs new file mode 100644 index 000000000..0c18514f9 --- /dev/null +++ b/src/GF/Parsing/Parser.hs @@ -0,0 +1,187 @@ +---------------------------------------------------------------------- +-- | +-- Module : Parser +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions to be used when parsing +----------------------------------------------------------------------------- + + +module GF.Parsing.Parser ( -- * Symbols + Symbol(..), symbol, mapSymbol, + -- * Edges + Edge(..), + -- * Parser input + Input(..), makeInput, input, inputMany, + -- * charts, parse forests & trees + ParseChart, ParseForest(..), ParseTree(..), + chart2forests, forest2trees + ) where + +-- haskell modules: +import Monad +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parsing modules: +import GF.Printing.PrintParser + +------------------------------------------------------------ +-- symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u + +---------- + +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + + +------------------------------------------------------------ +-- edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- parser input + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- charts, parse forests & trees + +type ParseChart n e = Assoc e [(n, [[e]])] + +data ParseForest n = FNode n [[ParseForest n]] | FMeta + deriving (Eq, Ord, Show) + +data ParseTree n = TNode n [ParseTree n] | TMeta + deriving (Eq, Ord, Show) + +chart2forests :: Ord e => ParseChart n e -> (e -> Bool) -> e -> [ParseForest n] + +--filterCoercions :: (n -> Bool) -> ParseForest n -> [ParseForest n] + +forest2trees :: ParseForest n -> [ParseTree n] + +instance Functor ParseTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap f (TMeta) = TMeta + +instance Functor ParseForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap f (FMeta) = FMeta + +---------- + +chart2forests chart isMeta = edge2forests + where item2forest (name, children) = FNode name $ + do edges <- children + mapM edge2forests edges + edge2forests edge + | isMeta edge = [FMeta] + | otherwise = filter checkForest $ map item2forest $ chart ? edge + checkForest (FNode _ children) = not (null children) + +-- filterCoercions _ (FMeta) = [FMeta] +-- filterCoercions isCoercion (FNode s forests) +-- | isCoercion s = do [forest] <- forests ; filterCoercions isCoercion forest +-- | otherwise = FNode s $ do children <- forests ; mapM (filterCoercions isCoercion) + +forest2trees (FNode s forests) = map (TNode s) $ forests >>= mapM forest2trees +forest2trees (FMeta) = [TMeta] + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow.prt) + prtList = prtSep " " + +simpleShow :: String -> String +simpleShow s = "\"" ++ concatMap mkEsc s ++ "\"" + where + mkEsc :: Char -> String + mkEsc c = case c of + _ | elem c "\\\"" -> '\\' : [c] + '\n' -> "\\n" + '\t' -> "\\t" + _ -> [c] + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (ParseTree s) where + prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}" + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (ParseForest s) where + prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}" + prt (FMeta) = "?" + prtList = prtAfter "\n" + + |
