summaryrefslogtreecommitdiff
path: root/src/GF/Parsing/GFC.hs
diff options
context:
space:
mode:
authorpeb <unknown>2005-04-11 12:57:45 +0000
committerpeb <unknown>2005-04-11 12:57:45 +0000
commitac00f77dadd4d447803dd7cab5a36f47365325d0 (patch)
tree2fd02b19234f8d1fcc20ee67a2367d4d4eebfcd8 /src/GF/Parsing/GFC.hs
parentf6273f7033b85eea9a8d0cc7d31e9697ba95d5b7 (diff)
"Committed_by_peb"
Diffstat (limited to 'src/GF/Parsing/GFC.hs')
-rw-r--r--src/GF/Parsing/GFC.hs187
1 files changed, 187 insertions, 0 deletions
diff --git a/src/GF/Parsing/GFC.hs b/src/GF/Parsing/GFC.hs
new file mode 100644
index 000000000..11fdbbe04
--- /dev/null
+++ b/src/GF/Parsing/GFC.hs
@@ -0,0 +1,187 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/11 13:52:51 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.1 $
+--
+-- The main parsing module, parsing GFC grammars
+-- by translating to simpler formats, such as PMCFG and CFG
+----------------------------------------------------------------------
+
+module GF.NewParsing.GFC
+ (parse, PInfo(..), buildPInfo) where
+
+import GF.System.Tracing
+import GF.Infra.Print
+import qualified PrGrammar
+
+import Monad
+
+import qualified Grammar
+-- import Values
+import qualified Macros
+-- import qualified Modules
+import qualified AbsGFC
+import qualified Ident
+import Operations
+import CFIdent (CFCat, cfCat2Ident, CFTok, prCFTok)
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+import GF.Formalism.Utilities
+import GF.Conversion.Types
+import GF.Formalism.SimpleGFC
+import qualified GF.Formalism.MCFG as M
+import qualified GF.Formalism.CFG as C
+-- import qualified GF.NewParsing.MCFG as PM
+import qualified GF.NewParsing.CFG as PC
+--import qualified GF.Conversion.FromGFC as From
+
+----------------------------------------------------------------------
+-- parsing information
+
+data PInfo = PInfo { mcfPInfo :: (), -- ^ not implemented yet
+ cfPInfo :: PC.CFPInfo CCat CName Token }
+
+buildPInfo :: MGrammar -> CGrammar -> PInfo
+buildPInfo mcfg cfg = PInfo { mcfPInfo = (),
+ cfPInfo = PC.buildCFPInfo cfg }
+
+
+----------------------------------------------------------------------
+-- main parsing function
+
+parse :: String -- ^ parsing strategy
+ -> PInfo -- ^ compiled grammars (mcfg and cfg)
+ -> Ident.Ident -- ^ abstract module name
+ -> CFCat -- ^ starting category
+ -> [CFTok] -- ^ input tokens
+ -> [Grammar.Term] -- ^ resulting GF terms
+
+-- parsing via CFG
+parse (c:strategy) pinfo abs startCat
+ | c=='c' || c=='C' = map (tree2term abs) .
+ parseCFG strategy pinfo startCats .
+ map prCFTok
+ where startCats = tracePrt "startCats" prt $
+ filter isStartCat $ map fst $ aAssocs $ PC.topdownRules $ cfPInfo pinfo
+ isStartCat (CCat (MCat cat _) _) = cat == cfCat2Ident startCat
+
+-- default parser
+parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start
+
+
+----------------------------------------------------------------------
+
+parseCFG :: String -> PInfo -> [CCat] -> [Token] -> [SyntaxTree Name]
+parseCFG strategy pInfo startCats inString = trace2 "Parser" "CFG" $
+ trees
+ where trees = tracePrt "#trees" (prt . length) $
+ nubsort $ forests >>= forest2trees
+ -- compactFs >>= forest2trees
+
+ -- compactFs = tracePrt "#compactForests" (prt . length) $
+ -- tracePrt "compactForests" (prtBefore "\n") $
+ -- compactForests forests
+
+ forests = tracePrt "#forests" (prt . length) $
+ cfForests >>= convertFromCFForest
+ cfForests= tracePrt "#cfForests" (prt . length) $
+ chart2forests chart (const False) finalEdges
+
+ finalEdges = tracePrt "finalChartEdges" prt $
+ map (uncurry Edge (inputBounds inTokens)) startCats
+ chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $
+ tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
+ C.grammar2chart cfChart
+ cfChart = --tracePrt "finalEdges"
+ --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
+ tracePrt "#cfChart" (prt . length) $
+ PC.parseCF strategy (cfPInfo pInfo) startCats inTokens
+
+ inTokens = input inString
+
+
+{-
+-- parsing via MCFG
+newParser (m:strategy) gr (_, startCat) inString
+ | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms
+ where terms = map (tree2term abstract) trees
+ trees = --tracePrt "trees" (prtBefore "\n") $
+ tracePrt "#trees" (prt . length) $
+ concatMap forest2trees forests
+ forests = --tracePrt "forests" (prtBefore "\n") $
+ tracePrt "#forests" (prt . length) $
+ concatMap (chart2forests chart isMeta) finalEdges
+ isMeta = null . snd
+ finalEdges = tracePrt "finalEdges" (prtBefore "\n") $
+ filter isFinalEdge $ aElems chart
+-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) |
+-- let (i, j) = inputBounds inTokens,
+-- E.Rule cat _ [E.Lin lbl _] _ <- pInf,
+-- isStartCat cat ]
+ isFinalEdge (cat, rows)
+ = isStartCat cat &&
+ inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ]
+ chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $
+ tracePrt "#chart" (prt . map (length.snd) . aAssocs) $
+ PM.parse strategy pInf starters inTokens
+ inTokens = input $ map AbsGFC.KS $ words inString
+ pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $
+ mcfPInfo $ SS.statePInfo gr
+ starters = tracePrt "startCats" prt $
+ filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ]
+ isStartCat (MCFCat cat _) = cat == startCat
+ abstract = tracePrt "abstract module" PrGrammar.prt $
+ SS.absId gr
+-}
+
+
+----------------------------------------------------------------------
+-- parse trees to GF terms
+
+tree2term :: Ident.Ident -> SyntaxTree Name -> Grammar.Term
+tree2term abs (TNode f ts) = Macros.mkApp (Macros.qq (abs,f)) (map (tree2term abs) ts)
+tree2term abs (TMeta) = Macros.mkMeta 0
+
+
+----------------------------------------------------------------------
+-- conversion and unification of forests
+
+convertFromCFForest :: SyntaxForest CName -> [SyntaxForest Name]
+
+-- simplest implementation
+convertFromCFForest (FNode (CName name profile) children)
+ | isCoercion name = concat chForests
+ | otherwise = [ FNode name chForests | not (null chForests) ]
+ where chForests = concat [ mapM (checkProfile forests) profile |
+ forests0 <- children,
+ forests <- mapM convertFromCFForest forests0 ]
+
+{-
+-- more intelligent(?) implementation
+convertFromCFForest (FNode (CName name profile) children)
+ | isCoercion name = concat chForests
+ | otherwise = [ FNode name chForests | not (null chForests) ]
+ where chForests = concat [ mapM (checkProfile forests) profile |
+ forests0 <- children,
+ forests <- mapM convertFromCFForest forests0 ]
+-}
+
+checkProfile forests = unifyManyForests . map (forests !!)
+
+
+----------------------------------------------------------------------
+-- conversion and unification for parse trees instead of forests
+
+convertFromCFTree :: SyntaxTree CName -> [SyntaxTree Name]
+convertFromCFTree (TNode (CName name profile) children0)
+ = [ TNode name children |
+ children1 <- mapM convertFromCFTree children0,
+ children <- mapM (checkProfile children1) profile ]
+ where checkProfile trees = unifyManyTrees . map (trees !!)
+