summaryrefslogtreecommitdiff
path: root/src/GF/Parsing
diff options
context:
space:
mode:
authorpeb <unknown>2005-03-21 13:17:44 +0000
committerpeb <unknown>2005-03-21 13:17:44 +0000
commit96a08c9df49345657c769ac481b6df47cbea3a8d (patch)
tree2c9d6dc0603fb1fe70934af8df7b6e1336c83fa4 /src/GF/Parsing
parentaef9430eb0576964a3fb669c741f1c689724bb5a (diff)
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing')
-rw-r--r--src/GF/Parsing/CFParserGeneral.hs85
-rw-r--r--src/GF/Parsing/CFParserIncremental.hs143
-rw-r--r--src/GF/Parsing/GeneralChart.hs85
-rw-r--r--src/GF/Parsing/IncrementalChart.hs49
-rw-r--r--src/GF/Parsing/MCFParserBasic.hs156
-rw-r--r--src/GF/Parsing/ParseCF.hs82
-rw-r--r--src/GF/Parsing/ParseCFG.hs43
-rw-r--r--src/GF/Parsing/ParseGFC.hs177
-rw-r--r--src/GF/Parsing/ParseMCFG.hs37
-rw-r--r--src/GF/Parsing/Parser.hs187
10 files changed, 1044 insertions, 0 deletions
diff --git a/src/GF/Parsing/CFParserGeneral.hs b/src/GF/Parsing/CFParserGeneral.hs
new file mode 100644
index 000000000..cc24820b7
--- /dev/null
+++ b/src/GF/Parsing/CFParserGeneral.hs
@@ -0,0 +1,85 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CFParserGeneral
+-- Maintainer : Peter Ljunglöf
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:41 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Several implementations of CFG chart parsing
+-----------------------------------------------------------------------------
+
+module GF.Parsing.CFParserGeneral (parse,
+ Strategy
+ ) where
+
+import Tracing
+
+import GF.Parsing.Parser
+import GF.Conversion.CFGrammar
+import GF.Parsing.GeneralChart
+import GF.Data.Assoc
+
+parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser n c t
+parse strategy grammar start = extract . process strategy grammar start
+
+type Strategy = (Bool, Bool) -- (isBottomup, isTopdown)
+
+extract :: [Item n (Symbol c t)] -> [Edge (Rule n c t)]
+extract edges =
+ edges'
+ where edges' = [ Edge j k (Rule cat (reverse found) name) |
+ Edge j k (Cat cat, found, [], Just name) <- edges ]
+
+process :: (Ord n, Ord c, Ord t) => Strategy -> PInfo n c t ->
+ [c] -> Input t -> [Item n (Symbol c t)]
+process (isBottomup, isTopdown) grammar start
+ = trace ("CFParserGeneral" ++
+ (if isBottomup then " BU" else "") ++
+ (if isTopdown then " TD" else "")) $
+ buildChart keyof [predict, combine] . axioms
+ where axioms input = initial ++ scan input
+
+ scan input = map (fmap mkEdge) (inputEdges input)
+ mkEdge tok = (Tok tok, [], [], Nothing)
+
+ -- the combine rule
+ combine chart (Edge j k (next, _, [], _))
+ = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ]
+ combine chart edge@(Edge _ j (_, _, next:_, _))
+ = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ]
+
+ -- initial predictions
+ initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ]
+
+ -- predictions
+ predict chart (Edge j k (next, _, [], _)) | isBottomup
+ = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ]
+ -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward
+ predict chart (Edge _ k (_, _, Cat cat:_, _))
+ = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ]
+ predict _ _ = []
+
+ tdRuleLookup | isTopdown = topdownRules grammar
+ | isBottomup = emptyLeftcornerRules grammar
+
+-- internal representation of parse items
+
+type Item n s = Edge (s, [s], [s], Maybe n)
+type IChart n s = Chart (Item n s) (IKey s)
+data IKey s = Active s Int
+ | Passive s Int
+ deriving (Eq, Ord, Show)
+
+keyof (Edge _ j (_, _, next:_, _)) = Active next j
+keyof (Edge j _ (cat, _, [], _)) = Passive cat j
+
+forwardTo (Edge i j (cat, found, next:tofind, name)) k = Edge i k (cat, next:found, tofind, name)
+
+loopingEdge k (Rule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name)
+
+
+
diff --git a/src/GF/Parsing/CFParserIncremental.hs b/src/GF/Parsing/CFParserIncremental.hs
new file mode 100644
index 000000000..3b9951721
--- /dev/null
+++ b/src/GF/Parsing/CFParserIncremental.hs
@@ -0,0 +1,143 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CFParserIncremental
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:41 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Incremental chart parsing for context-free grammars
+-----------------------------------------------------------------------------
+
+
+
+module GF.Parsing.CFParserIncremental (parse,
+ Strategy) where
+
+import Tracing
+import GF.Printing.PrintParser
+
+-- haskell modules:
+import Array
+-- gf modules:
+import GF.Data.SortedList
+import GF.Data.Assoc
+import Operations
+-- parser modules:
+import GF.Parsing.Parser
+import GF.Conversion.CFGrammar
+import GF.Parsing.IncrementalChart
+
+
+type Strategy = ((Bool, Bool), (Bool, Bool)) -- (predict:(BU, TD), filter:(BU, TD))
+
+parse :: (Ord n, Ord c, Ord t, Show t) =>
+ Strategy -> CFParser n c t
+parse ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input =
+ trace2 "CFParserIncremental"
+ ((if isPredictBU then "BU-predict " else "") ++
+ (if isPredictTD then "TD-predict " else "") ++
+ (if isFilterBU then "BU-filter " else "") ++
+ (if isFilterTD then "TD-filter " else "")) $
+ trace2 "input" (show (inputTo input)) $
+ finalEdges
+ where finalEdges = [ Edge j k (Rule cat (reverse found) name) |
+ (k, state) <-
+ tracePrt "#passiveChart"
+ (prt . map (length . (?Passive) . snd)) $
+ tracePrt "#activeChart"
+ (prt . map (length . concatMap snd . aAssocs . snd)) $
+ assocs finalChart,
+ Item j (Rule cat _Nil name) found <- state ? Passive ]
+
+ finalChart = buildChart keyof rules axioms $ inputBounds input
+
+ axioms 0 = --tracePrt ("axioms 0") (prtSep "\n") $
+ union $ map (tdInfer 0) start
+ axioms k = --tracePrt ("axioms "++show k) (prtSep "\n") $
+ union [ buInfer j k (Tok token) |
+ (token, js) <- aAssocs (inputTo input ! k), j <- js ]
+
+ rules k (Item j (Rule cat [] _) _)
+ = buInfer j k (Cat cat)
+ rules k (Item j rule@(Rule _ (Cat next:_) _) found)
+ = tdInfer k next <++>
+ -- hack for empty rules:
+ [ Item j (forward rule) (Cat next:found) |
+ emptyCategories grammar ?= next ]
+ rules _ _ = []
+
+ buInfer j k next = --tracePrt ("buInfer "++show(j,k)++" "++prt next) (prtSep "\n") $
+ buPredict j k next <++> buCombine j k next
+ tdInfer k next = tdPredict k next
+
+ -- the combine rule
+ buCombine j k next
+ | j == k = [] -- hack for empty rules
+ | otherwise = [ Item i (forward rule) (next:found) |
+ Item i rule found <- (finalChart ! j) ? Active next ]
+
+ -- kilbury bottom-up prediction
+ buPredict j k next
+ = [ Item j rule [next] | isPredictBU,
+ rule <- map forward $ --tracePrt ("buRules "++prt next) (prtSep "\n") $
+ bottomupRules grammar ? next,
+ buFilter rule k,
+ tdFilter rule j k ]
+
+ -- top-down prediction
+ tdPredict k cat
+ = [ Item k rule [] | isPredictTD || isFilterTD,
+ rule <- topdownRules grammar ? cat,
+ buFilter rule k ] <++>
+ -- hack for empty rules:
+ [ Item k rule [] | isPredictBU,
+ rule <- emptyLeftcornerRules grammar ? cat ]
+
+ -- bottom up filtering: input symbol k can begin the given symbol list (first set)
+ -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!!
+ buFilter (Rule _ (Cat cat:_) _) k | isFilterBU
+ = k < snd (inputBounds input) &&
+ hasCommonElements (leftcornerTokens grammar ? cat)
+ (aElems (inputFrom input ! k))
+ buFilter _ _ = True
+
+ -- top down filtering: 'cat' is reachable by an active edge ending in node j < k
+ tdFilter (Rule cat _ _) j k | isFilterTD && j < k
+ = (tdFilters ! j) ?= cat
+ tdFilter _ _ _ = True
+
+ tdFilters = listArray (inputBounds input) $
+ map (listSet . limit leftCats . activeCats) [0..]
+ activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ]
+ leftCats cat = [ left | Rule _cat (Cat left:_) _ <- topdownRules grammar ? cat ]
+
+
+-- type declarations, items & keys
+data Item n c t = Item Int (Rule n c t) [Symbol c t]
+ deriving (Eq, Ord, Show)
+
+data IKey c t = Active (Symbol c t) | Passive
+ deriving (Eq, Ord, Show)
+
+keyof :: Item n c t -> IKey c t
+keyof (Item _ (Rule _ (next:_) _) _) = Active next
+keyof (Item _ (Rule _ [] _) _) = Passive
+
+forward :: Rule n c t -> Rule n c t
+forward (Rule cat (_:rest) name) = Rule cat rest name
+
+
+instance (Print n, Print c, Print t) => Print (Item n c t) where
+ prt (Item k (Rule cat rhs name) syms)
+ = "<" ++show k++ ": "++prt name++". "++
+ prt cat++" -> "++prt rhs++" / "++prt syms++">"
+
+instance (Print c, Print t) => Print (IKey c t) where
+ prt (Active sym) = "?" ++ prt sym
+ prt (Passive) = "!"
+
+
diff --git a/src/GF/Parsing/GeneralChart.hs b/src/GF/Parsing/GeneralChart.hs
new file mode 100644
index 000000000..61f933932
--- /dev/null
+++ b/src/GF/Parsing/GeneralChart.hs
@@ -0,0 +1,85 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GeneralChart
+-- Maintainer : Peter Ljunglöf
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:42 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Simple implementation of deductive chart parsing
+-----------------------------------------------------------------------------
+
+
+module GF.Parsing.GeneralChart (-- * Type definition
+ Chart,
+ -- * Main functions
+ chartLookup,
+ buildChart,
+ -- * Probably not needed
+ emptyChart,
+ chartMember,
+ chartInsert,
+ chartList,
+ addToChart
+ ) where
+
+-- import Trace
+
+import GF.Data.RedBlackSet
+
+-- main functions
+
+chartLookup :: (Ord item, Ord key) => Chart item key -> key -> [item]
+buildChart :: (Ord item, Ord key) => (item -> key) ->
+ [Chart item key -> item -> [item]] -> [item] -> [item]
+
+buildChart keyof rules axioms = chartList (addItems axioms emptyChart)
+ where addItems [] = id
+ addItems (item:items) = addItems items . addItem item
+
+ -- addItem item | trace ("+ "++show item++"\n") False = undefined
+ addItem item = addToChart item (keyof item)
+ (\chart -> foldr (consequence item) chart rules)
+
+ consequence item rule chart = addItems (rule chart item) chart
+
+-- probably not needed
+
+emptyChart :: (Ord item, Ord key) => Chart item key
+chartMember :: (Ord item, Ord key) => Chart item key -> item -> key -> Bool
+chartInsert :: (Ord item, Ord key) => Chart item key -> item -> key -> Maybe (Chart item key)
+chartList :: (Ord item, Ord key) => Chart item key -> [item]
+addToChart :: (Ord item, Ord key) => item -> key -> (Chart item key -> Chart item key) -> Chart item key -> Chart item key
+
+addToChart item key after chart = maybe chart after (chartInsert chart item key)
+
+
+--------------------------------------------------------------------------------
+-- key charts as red/black trees
+
+newtype Chart item key = KC (RedBlackMap key item)
+ deriving Show
+
+emptyChart = KC rbmEmpty
+chartMember (KC tree) item key = rbmElem key item tree
+chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree)
+chartLookup (KC tree) key = rbmLookup key tree
+chartList (KC tree) = concatMap snd (rbmList tree)
+--------------------------------------------------------------------------------}
+
+
+{--------------------------------------------------------------------------------
+-- key charts as unsorted association lists -- OBSOLETE!
+
+newtype Chart item key = SC [(key, item)]
+
+emptyChart = SC []
+chartMember (SC chart) item key = (key,item) `elem` chart
+chartInsert (SC chart) item key = if (key,item) `elem` chart then Nothing else Just (SC ((key,item):chart))
+chartLookup (SC chart) key = [ item | (key',item) <- chart, key == key' ]
+chartList (SC chart) = map snd chart
+--------------------------------------------------------------------------------}
+
diff --git a/src/GF/Parsing/IncrementalChart.hs b/src/GF/Parsing/IncrementalChart.hs
new file mode 100644
index 000000000..a040ddd60
--- /dev/null
+++ b/src/GF/Parsing/IncrementalChart.hs
@@ -0,0 +1,49 @@
+----------------------------------------------------------------------
+-- |
+-- Module : IncrementalChart
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:42 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Implementation of /incremental/ deductive parsing,
+-- i.e. parsing one word at the time.
+-----------------------------------------------------------------------------
+
+
+module GF.Parsing.IncrementalChart (-- * Type definitions
+ IncrementalChart,
+ -- * Functions
+ buildChart,
+ chartList
+ ) where
+
+import Array
+import GF.Data.SortedList
+import GF.Data.Assoc
+
+buildChart :: (Ord item, Ord key) => (item -> key) ->
+ (Int -> item -> SList item) ->
+ (Int -> SList item) ->
+ (Int, Int) -> IncrementalChart item key
+
+chartList :: (Ord item, Ord key) => (Int -> item -> edge) -> IncrementalChart item key -> [edge]
+
+type IncrementalChart item key = Array Int (Assoc key (SList item))
+
+----------
+
+buildChart keyof rules axioms bounds = finalChartArray
+ where buildState k = limit (rules k) $ axioms k
+ finalChartList = map buildState [fst bounds .. snd bounds]
+ finalChartArray = listArray bounds $ map stateAssoc finalChartList
+ stateAssoc state = accumAssoc id [ (keyof item, item) | item <- state ]
+
+chartList combine chart = [ combine k item |
+ (k, state) <- assocs chart,
+ item <- concatMap snd $ aAssocs state ]
+
+
diff --git a/src/GF/Parsing/MCFParserBasic.hs b/src/GF/Parsing/MCFParserBasic.hs
new file mode 100644
index 000000000..03a1d8b9d
--- /dev/null
+++ b/src/GF/Parsing/MCFParserBasic.hs
@@ -0,0 +1,156 @@
+----------------------------------------------------------------------
+-- |
+-- Module : MCFParserBasic
+-- Maintainer : Peter Ljunglöf
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:42 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Simplest possible implementation of MCFG chart parsing
+-----------------------------------------------------------------------------
+
+module GF.Parsing.MCFParserBasic (parse
+ ) where
+
+import Tracing
+
+import Ix
+import GF.Parsing.Parser
+import GF.Conversion.MCFGrammar
+import GF.Parsing.GeneralChart
+import GF.Data.Assoc
+import GF.Data.SortedList
+import GF.Printing.PrintParser
+
+
+parse :: (Ord n, Ord c, Ord l, Ord t,
+ Print n, Print c, Print l, Print t) =>
+ MCFParser n c l t
+parse grammar start = edges2chart . extract . process grammar
+
+
+extract :: [Item n c l t] -> [(n, MEdge c l, [MEdge c l])]
+extract items = tracePrt "#passives" (prt.length) $
+ --trace2 "passives" (prtAfter "\n" [ i | i@(PItem _) <- items ]) $
+ [ item | PItem item <- items ]
+
+
+process :: (Ord n, Ord c, Ord l, Ord t,
+ Print n, Print c, Print l, Print t) =>
+ Grammar n c l t -> Input t -> [Item n c l t]
+process grammar input = buildChart keyof rules axioms
+ where axioms = initial
+ rules = [combine, scan, predict]
+
+ -- axioms
+ initial = traceItems "axiom" [] $
+ [ nextLin name tofind (addNull cat) (map addNull args) |
+ Rule cat args tofind name <- grammar ]
+
+ addNull a = (a, [])
+
+ -- predict
+ predict chart i1@(Item name tofind rho (Lin lbl []) (cat, found0) children)
+ = traceItems "predict" [i1]
+ [ nextLin name tofind (cat, found) children |
+ let found = insertRow lbl rho found0 ]
+ predict _ _ = []
+
+ -- combine
+ combine chart active@(Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _)
+ = do passive <- chartLookup chart (Passive cat)
+ combineItems active passive
+ combine chart passive@(PItem (_, (cat, _), _))
+ = do active <- chartLookup chart (Active cat)
+ combineItems active passive
+ combine _ _ = []
+
+ combineItems i1@(Item name tofind rho0 (Lin lbl (Cat(_,lbl',nr):rest)) found children0)
+ i2@(PItem (_, found', _))
+ = traceItems "combine" [i1,i2]
+ [ Item name tofind rho (Lin lbl rest) found children |
+ rho1 <- lookupLbl lbl' found',
+ let rho = concatRange rho0 rho1,
+ children <- updateChild nr children0 (snd found') ]
+
+ -- scan
+ scan chart i1@(Item name tofind rho0 (Lin lbl (Tok tok:rest)) found children)
+ = traceItems "scan" [i1]
+ [ Item name tofind rho (Lin lbl rest) found children |
+ let rho = concatRange rho0 (rangeOfToken tok) ]
+ scan _ _ = []
+
+ -- utilities
+ rangeOfToken tok = makeRange $ inputToken input ? tok
+
+ zeroRange = makeRange $ map (\i -> (i,i)) $ range $ inputBounds input
+
+ nextLin name [] found children = PItem (name, found, children)
+ nextLin name (lin : tofind) found children
+ = Item name tofind zeroRange lin found children
+
+lookupLbl a = map snd . filter (\b -> a == fst b) . snd
+updateChild nr children found = updateIndex nr children $
+ \child -> if null (snd child)
+ then [ (fst child, found) ]
+ else [ child | snd child == found ]
+
+insertRow lbl rho [] = [(lbl, rho)]
+insertRow lbl rho rows'@(row@(lbl', rho') : rows)
+ = case compare lbl lbl' of
+ LT -> row : insertRow lbl rho rows
+ GT -> (lbl, rho) : rows'
+ EQ -> (lbl, unionRange rho rho') : rows
+
+
+-- internal representation of parse items
+
+data Item n c l t
+ = Item n [Lin c l t] -- tofind
+ Range (Lin c l t) -- current row
+ (MEdge c l) -- found rows
+ [MEdge c l] -- found children
+ | PItem (n, MEdge c l, [MEdge c l])
+ deriving (Eq, Ord, Show)
+
+data IKey c = Passive c | Active c | AnyItem
+ deriving (Eq, Ord, Show)
+
+keyof (PItem (_, (cat, _), _)) = Passive cat
+keyof (Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) = Active cat
+keyof _ = AnyItem
+
+
+-- tracing
+
+--type TraceItem = Item String String Char String
+traceItems :: (Print n, Print l, Print c, Print t) =>
+ String -> [Item n c l t] -> [Item n c l t] -> [Item n c l t]
+traceItems rule trigs items
+ | null items || True = items
+ | otherwise = trace ("\n" ++ rule ++ ":" ++
+ unlines [ "\t" ++ prt i | i <- trigs ] ++ "=>" ++
+ unlines [ "\t" ++ prt i | i <- items ]) items
+
+-- pretty-printing
+
+instance (Print n, Print c, Print l, Print t) => Print (Item n c l t) where
+ prt (Item name tofind rho lin (cat, found) children)
+ = prt name ++ ". " ++ prt cat ++ prtRhs (map fst children) ++
+ " { " ++ prt rho ++ prt lin ++ " ; " ++
+ concat [ prt lbl ++ "=" ++ prt ln ++ " " |
+ Lin lbl ln <- tofind ] ++ "; " ++
+ concat [ prt lbl ++ "=" ++ prt rho ++ " " |
+ (lbl, rho) <- found ] ++ "} " ++
+ concat [ "[ " ++ concat [ prt lbl ++ "=" ++ prt rho ++ " " |
+ (lbl,rho) <- child ] ++ "] " |
+ child <- map snd children ]
+ prt (PItem (name, edge, edges))
+ = prt name ++ ". " ++ prt edge ++ prtRhs edges
+
+prtRhs [] = ""
+prtRhs rhs = " -> " ++ prtSep " " rhs
+
diff --git a/src/GF/Parsing/ParseCF.hs b/src/GF/Parsing/ParseCF.hs
new file mode 100644
index 000000000..20f45e3f2
--- /dev/null
+++ b/src/GF/Parsing/ParseCF.hs
@@ -0,0 +1,82 @@
+----------------------------------------------------------------------
+-- |
+-- Module : ParseCF
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:42 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Chart parsing of grammars in CF format
+-----------------------------------------------------------------------------
+
+module GF.Parsing.ParseCF (parse, alternatives) where
+
+import Tracing
+import GF.Printing.PrintParser
+import GF.Printing.PrintSimplifiedTerm
+
+import GF.Data.SortedList (nubsort)
+import GF.Data.Assoc
+import qualified CF
+import qualified CFIdent as CFI
+import GF.Parsing.Parser
+import GF.Conversion.CFGrammar
+import qualified GF.Parsing.ParseCFG as P
+
+type Token = CFI.CFTok
+type Name = CFI.CFFun
+type Category = CFI.CFCat
+
+alternatives :: [(String, [String])]
+alternatives = [ ("gb", ["G","GB","_gen","_genBU"]),
+ ("gt", ["GT","_genTD"]),
+ ("ibn", ["","I","B","IB","IBN","_inc","BU","_incBU"]),
+ ("ibb", ["BB","IBB","BU_BUF","_incBU_BUF"]),
+ ("ibt", ["BT","IBT","BU_TDF","_incBU_TDF"]),
+ ("iba", ["BA","IBA","BU_BTF","BU_TBF","_incBU_BTF","_incBU_TBF"]),
+ ("itn", ["T","IT","ITN","TD","_incTD"]),
+ ("itb", ["TB","ITB","TD_BUF","_incTD_BUF"])
+ ]
+
+parse :: String -> CF.CF -> Category -> CF.CFParser
+parse = buildParser . P.parse
+
+buildParser :: CFParser Name Category Token -> CF.CF -> Category -> CF.CFParser
+buildParser parser cf start tokens = trace "ParseCF" $
+ (parseResults, parseInformation)
+ where parseInformation = prtSep "\n" trees
+ parseResults = {-take maxTake-} [ (tree2cfTree t, []) | t <- trees ]
+ theInput = input tokens
+ edges = tracePrt "#edges" (prt.length) $
+ parser pInf [start] theInput
+ chart = tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
+ edges2chart theInput $ map (fmap addCategory) edges
+ forests = tracePrt "#forests" (prt.length) $
+ chart2forests chart (const False) $
+ uncurry Edge (inputBounds theInput) start
+ trees = tracePrt "#trees" (prt.length) $
+ concatMap forest2trees forests
+ pInf = pInfo $ cf2grammar cf (nubsort tokens)
+
+
+addCategory (Rule cat rhs name) = Rule cat rhs (name, cat)
+
+tree2cfTree (TNode (name, cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees))
+
+cf2grammar :: CF.CF -> [Token] -> Grammar Name Category Token
+cf2grammar cf tokens = [ Rule cat rhs name |
+ (name, (cat, rhs0)) <- cfRules,
+ rhs <- mapM item2symbol rhs0 ]
+ where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++
+ CF.rulesOfCF cf
+ item2symbol (CF.CFNonterm cat) = [Cat cat]
+ item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens
+
+-- maxTake :: Int
+-- maxTake = 500
+-- maxTake = maxBound
+
+
diff --git a/src/GF/Parsing/ParseCFG.hs b/src/GF/Parsing/ParseCFG.hs
new file mode 100644
index 000000000..1005d5656
--- /dev/null
+++ b/src/GF/Parsing/ParseCFG.hs
@@ -0,0 +1,43 @@
+----------------------------------------------------------------------
+-- |
+-- Module : ParseCFG
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:42 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Main parsing module for context-free grammars
+-----------------------------------------------------------------------------
+
+
+module GF.Parsing.ParseCFG (parse) where
+
+import Char (toLower)
+import GF.Parsing.Parser
+import GF.Conversion.CFGrammar
+import qualified GF.Parsing.CFParserGeneral as PGen
+import qualified GF.Parsing.CFParserIncremental as PInc
+
+
+parse :: (Ord n, Ord c, Ord t, Show t) =>
+ String -> CFParser n c t
+parse = decodeParser . map toLower
+
+decodeParser ['g',s] = PGen.parse (decodeStrategy s)
+decodeParser ['i',s,f] = PInc.parse (decodeStrategy s, decodeFilter f)
+decodeParser _ = decodeParser "ibn"
+
+decodeStrategy 'b' = (True, False)
+decodeStrategy 't' = (False, True)
+
+decodeFilter 'a' = (True, True)
+decodeFilter 'b' = (True, False)
+decodeFilter 't' = (False, True)
+decodeFilter 'n' = (False, False)
+
+
+
+
diff --git a/src/GF/Parsing/ParseGFC.hs b/src/GF/Parsing/ParseGFC.hs
new file mode 100644
index 000000000..0d0d5c662
--- /dev/null
+++ b/src/GF/Parsing/ParseGFC.hs
@@ -0,0 +1,177 @@
+----------------------------------------------------------------------
+-- |
+-- Module : ParseGFC
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:43 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- The main parsing module, parsing GFC grammars
+-- by translating to simpler formats, such as PMCFG and CFG
+----------------------------------------------------------------------
+
+module GF.Parsing.ParseGFC (newParser) where
+
+import Tracing
+import GF.Printing.PrintParser
+import qualified PrGrammar
+
+-- Haskell modules
+import Monad
+-- import Ratio ((%))
+-- GF modules
+import qualified Grammar as GF
+import Values
+import qualified Macros
+import qualified Modules as Mods
+import qualified AbsGFC
+import qualified Ident
+import qualified ShellState as SS
+import Operations
+import GF.Data.SortedList
+-- Conversion and parser modules
+import GF.Data.Assoc
+import GF.Parsing.Parser
+-- import ConvertGrammar
+import GF.Conversion.GrammarTypes
+import qualified GF.Conversion.MCFGrammar as M
+import qualified GF.Conversion.CFGrammar as C
+import qualified GF.Parsing.ParseMCFG as PM
+import qualified GF.Parsing.ParseCFG as PC
+--import MCFRange
+
+newParser :: String -> SS.StateGrammar -> GF.Cat -> String -> Err [GF.Term]
+
+-- parsing via MCFG
+newParser (m:strategy) gr (_, startCat) inString
+ | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms
+ where terms = map (ptree2term abstract) trees
+ trees = --tracePrt "trees" (prtBefore "\n") $
+ tracePrt "#trees" (prt . length) $
+ concatMap forest2trees forests
+ forests = --tracePrt "forests" (prtBefore "\n") $
+ tracePrt "#forests" (prt . length) $
+ concatMap (chart2forests chart isMeta) finalEdges
+ isMeta = null . snd
+ finalEdges = tracePrt "finalEdges" (prtBefore "\n") $
+ filter isFinalEdge $ aElems chart
+-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) |
+-- let (i, j) = inputBounds inTokens,
+-- E.Rule cat _ [E.Lin lbl _] _ <- pInf,
+-- isStartCat cat ]
+ isFinalEdge (cat, rows)
+ = isStartCat cat &&
+ inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ]
+ chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $
+ tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
+ PM.parse strategy pInf starters inTokens
+ inTokens = input $ map AbsGFC.KS $ words inString
+ pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $
+ mcfPInfo $ SS.statePInfo gr
+ starters = tracePrt "startCats" prt $
+ filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ]
+ isStartCat (MCFCat cat _) = cat == startCat
+ abstract = tracePrt "abstract module" PrGrammar.prt $
+ SS.absId gr
+
+-- parsing via CFG
+newParser (c:strategy) gr (_, startCat) inString
+ | c=='c' || c=='C' = trace2 "Parser" "CFG" $ Ok terms
+ where terms = -- tracePrt "terms" (unlines . map PrGrammar.prt) $
+ map (ptree2term abstract) trees
+ trees = tracePrt "#trees" (prt . length) $
+ --tracePrt "trees" (prtSep "\n") $
+ concatMap forest2trees forests
+ forests = tracePrt "$cfForests" (prt) $ -- . length) $
+ tracePrt "forests" (unlines . map prt) $
+ concatMap convertFromCFForest cfForests
+ cfForests= tracePrt "cfForests" (unlines . map prt) $
+ concatMap (chart2forests chart (const False)) finalEdges
+ finalEdges = tracePrt "finalChartEdges" prt $
+ map (uncurry Edge (inputBounds inTokens)) starters
+ chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $
+ tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
+ C.edges2chart inTokens edges
+ edges = --tracePrt "finalEdges"
+ --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
+ tracePrt "#edges" (prt . length) $
+ PC.parse strategy pInf starters inTokens
+ inTokens = input $ map AbsGFC.KS $ words inString
+ pInf = cfPInfo $ SS.statePInfo gr
+ starters = tracePrt "startCats" prt $
+ filter isStartCat $ map fst $ aAssocs $ C.topdownRules pInf
+ isStartCat (CFCat (MCFCat cat _) _) = cat == startCat
+ abstract = tracePrt "abstract module" PrGrammar.prt $
+ SS.absId gr
+ --ifNull (Ident.identC "ABS") last $
+ --[i | (i, Mods.ModMod m) <- Mods.modules (SS.grammar gr), Mods.isModAbs m]
+
+newParser "" gr start inString = newParser "c" gr start inString
+
+newParser opt gr (_,cat) _ =
+ Bad ("new-parser '" ++ opt ++ "' not defined yet")
+
+ptree2term :: Ident.Ident -> ParseTree Name -> GF.Term
+ptree2term a (TNode f ts) = Macros.mkApp (Macros.qq (a,f)) (map (ptree2term a) ts)
+ptree2term a (TMeta) = GF.Meta (GF.MetaSymb 0)
+
+----------------------------------------------------------------------
+-- conversion and unification of forests
+
+convertFromCFForest :: ParseForest CFName -> [ParseForest Name]
+convertFromCFForest (FNode (CFName name profile) children)
+ | isCoercion name = concat chForests
+ | otherwise = [ FNode name chForests | not (null chForests) ]
+ where chForests = concat [ mapM (checkProfile forests) profile |
+ forests0 <- children,
+ forests <- mapM convertFromCFForest forests0 ]
+ checkProfile forests = unifyManyForests . map (forests !!)
+ -- foldM unifyForests FMeta . map (forests !!)
+
+isCoercion Ident.IW = True
+isCoercion _ = False
+
+unifyManyForests :: Eq n => [ParseForest n] -> [ParseForest n]
+unifyManyForests [] = [FMeta]
+unifyManyForests [f] = [f]
+unifyManyForests (f:g:fs) = do h <- unifyForests f g
+ unifyManyForests (h:fs)
+
+unifyForests :: Eq n => ParseForest n -> ParseForest n -> [ParseForest n]
+unifyForests FMeta forest = [forest]
+unifyForests forest FMeta = [forest]
+unifyForests (FNode name1 children1) (FNode name2 children2)
+ = [ FNode name1 children | name1 == name2, not (null children) ]
+ where children = [ forests | forests1 <- children1, forests2 <- children2,
+ forests <- zipWithM unifyForests forests1 forests2 ]
+
+
+
+{-
+----------------------------------------------------------------------
+-- conversion and unification for parse trees instead of forests
+
+convertFromCFTree :: ParseTree CFName -> [ParseTree Name]
+convertFromCFTree (TNode (CFName name profile) children0)
+ = [ TNode name children |
+ children1 <- mapM convertFromCFTree children0,
+ children <- mapM (checkProfile children1) profile ]
+ where checkProfile trees = unifyManyTrees . map (trees !!)
+
+unifyManyTrees :: Eq n => [ParseTree n] -> [ParseTree n]
+unifyManyTrees [] = [TMeta]
+unifyManyTrees [f] = [f]
+unifyManyTrees (f:g:fs) = do h <- unifyTrees f g
+ unifyManyTrees (h:fs)
+
+unifyTrees TMeta tree = [tree]
+unifyTrees tree TMeta = [tree]
+unifyTrees (TNode name1 children1) (TNode name2 children2)
+ = [ TNode name1 children | name1 == name2,
+ children <- zipWithM unifyTrees children1 children2 ]
+
+-}
+
diff --git a/src/GF/Parsing/ParseMCFG.hs b/src/GF/Parsing/ParseMCFG.hs
new file mode 100644
index 000000000..4afc44bb7
--- /dev/null
+++ b/src/GF/Parsing/ParseMCFG.hs
@@ -0,0 +1,37 @@
+----------------------------------------------------------------------
+-- |
+-- Module : ParseMCFG
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:43 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Main module for MCFG parsing
+-----------------------------------------------------------------------------
+
+
+module GF.Parsing.ParseMCFG (parse) where
+
+import Char (toLower)
+import GF.Parsing.Parser
+import GF.Conversion.MCFGrammar
+import qualified GF.Parsing.MCFParserBasic as PBas
+import GF.Printing.PrintParser
+---- import qualified MCFParserBasic2 as PBas2 -- file not found AR
+
+
+parse :: (Ord n, Ord c, Ord l, Ord t,
+ Print n, Print c, Print l, Print t) =>
+ String -> MCFParser n c l t
+parse str = decodeParser (map toLower str)
+
+decodeParser "b" = PBas.parse
+---- decodeParser "c" = PBas2.parse
+decodeParser _ = decodeParser "c"
+
+
+
+
diff --git a/src/GF/Parsing/Parser.hs b/src/GF/Parsing/Parser.hs
new file mode 100644
index 000000000..0c18514f9
--- /dev/null
+++ b/src/GF/Parsing/Parser.hs
@@ -0,0 +1,187 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Parser
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/03/21 14:17:43 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- Basic type declarations and functions to be used when parsing
+-----------------------------------------------------------------------------
+
+
+module GF.Parsing.Parser ( -- * Symbols
+ Symbol(..), symbol, mapSymbol,
+ -- * Edges
+ Edge(..),
+ -- * Parser input
+ Input(..), makeInput, input, inputMany,
+ -- * charts, parse forests & trees
+ ParseChart, ParseForest(..), ParseTree(..),
+ chart2forests, forest2trees
+ ) where
+
+-- haskell modules:
+import Monad
+import Array
+-- gf modules:
+import GF.Data.SortedList
+import GF.Data.Assoc
+-- parsing modules:
+import GF.Printing.PrintParser
+
+------------------------------------------------------------
+-- symbols
+
+data Symbol c t = Cat c | Tok t
+ deriving (Eq, Ord, Show)
+
+symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
+mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u
+
+----------
+
+symbol fc ft (Cat cat) = fc cat
+symbol fc ft (Tok tok) = ft tok
+
+mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)
+
+
+------------------------------------------------------------
+-- edges
+
+data Edge s = Edge Int Int s
+ deriving (Eq, Ord, Show)
+
+instance Functor Edge where
+ fmap f (Edge i j s) = Edge i j (f s)
+
+
+------------------------------------------------------------
+-- parser input
+
+data Input t = MkInput { inputEdges :: [Edge t],
+ inputBounds :: (Int, Int),
+ inputFrom :: Array Int (Assoc t [Int]),
+ inputTo :: Array Int (Assoc t [Int]),
+ inputToken :: Assoc t [(Int, Int)]
+ }
+
+makeInput :: Ord t => [Edge t] -> Input t
+input :: Ord t => [t] -> Input t
+inputMany :: Ord t => [[t]] -> Input t
+
+----------
+
+makeInput inEdges | null inEdges = input []
+ | otherwise = MkInput inEdges inBounds inFrom inTo inToken
+ where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ]
+ where minmax (a, b) (a', b') = (min a a', max b b')
+ inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $
+ [ (i, [(tok, j)]) | Edge i j tok <- inEdges ]
+ inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds
+ [ (j, [(tok, i)]) | Edge i j tok <- inEdges ]
+ inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
+
+input toks = MkInput inEdges inBounds inFrom inTo inToken
+ where inEdges = zipWith3 Edge [0..] [1..] toks
+ inBounds = (0, length toks)
+ inFrom = listArray inBounds $
+ [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ]
+ inTo = listArray inBounds $
+ [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ]
+ inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
+
+inputMany toks = MkInput inEdges inBounds inFrom inTo inToken
+ where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ]
+ inBounds = (0, length toks)
+ inFrom = listArray inBounds $
+ [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ]
+ ++ [ listAssoc [] ]
+ inTo = listArray inBounds $
+ [ listAssoc [] ] ++
+ [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ]
+ inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
+
+
+------------------------------------------------------------
+-- charts, parse forests & trees
+
+type ParseChart n e = Assoc e [(n, [[e]])]
+
+data ParseForest n = FNode n [[ParseForest n]] | FMeta
+ deriving (Eq, Ord, Show)
+
+data ParseTree n = TNode n [ParseTree n] | TMeta
+ deriving (Eq, Ord, Show)
+
+chart2forests :: Ord e => ParseChart n e -> (e -> Bool) -> e -> [ParseForest n]
+
+--filterCoercions :: (n -> Bool) -> ParseForest n -> [ParseForest n]
+
+forest2trees :: ParseForest n -> [ParseTree n]
+
+instance Functor ParseTree where
+ fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
+ fmap f (TMeta) = TMeta
+
+instance Functor ParseForest where
+ fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
+ fmap f (FMeta) = FMeta
+
+----------
+
+chart2forests chart isMeta = edge2forests
+ where item2forest (name, children) = FNode name $
+ do edges <- children
+ mapM edge2forests edges
+ edge2forests edge
+ | isMeta edge = [FMeta]
+ | otherwise = filter checkForest $ map item2forest $ chart ? edge
+ checkForest (FNode _ children) = not (null children)
+
+-- filterCoercions _ (FMeta) = [FMeta]
+-- filterCoercions isCoercion (FNode s forests)
+-- | isCoercion s = do [forest] <- forests ; filterCoercions isCoercion forest
+-- | otherwise = FNode s $ do children <- forests ; mapM (filterCoercions isCoercion)
+
+forest2trees (FNode s forests) = map (TNode s) $ forests >>= mapM forest2trees
+forest2trees (FMeta) = [TMeta]
+
+
+
+------------------------------------------------------------
+-- pretty-printing
+
+instance (Print c, Print t) => Print (Symbol c t) where
+ prt = symbol prt (simpleShow.prt)
+ prtList = prtSep " "
+
+simpleShow :: String -> String
+simpleShow s = "\"" ++ concatMap mkEsc s ++ "\""
+ where
+ mkEsc :: Char -> String
+ mkEsc c = case c of
+ _ | elem c "\\\"" -> '\\' : [c]
+ '\n' -> "\\n"
+ '\t' -> "\\t"
+ _ -> [c]
+
+instance (Print s) => Print (Edge s) where
+ prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]"
+ prtList = prtSep ""
+
+instance (Print s) => Print (ParseTree s) where
+ prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}"
+ prt (TMeta) = "?"
+ prtList = prtAfter "\n"
+
+instance (Print s) => Print (ParseForest s) where
+ prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}"
+ prt (FMeta) = "?"
+ prtList = prtAfter "\n"
+
+