From 9c2d27b8d19343c4401e0f622e7d541101982670 Mon Sep 17 00:00:00 2001 From: krasimir Date: Thu, 29 May 2008 12:27:26 +0000 Subject: move GF.Parsing.FCFG.PInfo to GF.GFCC.BuildParser and rename FCFPInfo to ParserInfo --- GF.cabal | 4 +- src-3.0/GF/Compile/GrammarToGFCC.hs | 4 +- src-3.0/GF/GFCC/BuildParser.hs | 84 ++++++++++++++++++++++++++++++++ src-3.0/GF/GFCC/DataGFCC.hs | 30 ++++++------ src-3.0/GF/GFCC/GFCCtoJS.hs | 2 +- src-3.0/GF/GFCC/Macros.hs | 11 +++-- src-3.0/GF/GFCC/Raw/ConvertGFCC.hs | 8 +-- src-3.0/GF/Parsing/FCFG.hs | 14 +++--- src-3.0/GF/Parsing/FCFG/Active.hs | 26 ++++++---- src-3.0/GF/Parsing/FCFG/PInfo.hs | 97 ------------------------------------- 10 files changed, 139 insertions(+), 141 deletions(-) create mode 100644 src-3.0/GF/GFCC/BuildParser.hs delete mode 100644 src-3.0/GF/Parsing/FCFG/PInfo.hs diff --git a/GF.cabal b/GF.cabal index b7d68a286..07b4548e9 100644 --- a/GF.cabal +++ b/GF.cabal @@ -34,6 +34,7 @@ library GF.GFCC.Macros GF.GFCC.Generate GF.GFCC.Linearize + GF.GFCC.BuildParser GF.Command.LexGFShell GF.Command.AbsGFShell GF.Command.PrintGFShell @@ -46,7 +47,6 @@ library GF.Data.Assoc GF.Infra.PrintClass GF.Formalism.Utilities - GF.Parsing.FCFG.PInfo GF.Parsing.FCFG.Active GF.GFCC.Raw.ConvertGFCC GF.Data.ErrM @@ -99,7 +99,7 @@ executable gf3 GF.GFCC.Raw.ParGFCCRaw GF.GFCC.Raw.PrintGFCCRaw GF.Formalism.Utilities - GF.Parsing.FCFG.PInfo + GF.GFCC.BuildParser GF.GFCC.DataGFCC GF.Parsing.FCFG.Active GF.GFCC.Raw.ConvertGFCC diff --git a/src-3.0/GF/Compile/GrammarToGFCC.hs b/src-3.0/GF/Compile/GrammarToGFCC.hs index 4877ff556..d29c20e17 100644 --- a/src-3.0/GF/Compile/GrammarToGFCC.hs +++ b/src-3.0/GF/Compile/GrammarToGFCC.hs @@ -8,6 +8,7 @@ import qualified GF.GFCC.DataGFCC as C import qualified GF.GFCC.DataGFCC as D import GF.GFCC.CId import GF.GFCC.PrintGFCC +import GF.GFCC.BuildParser (buildParserInfo) import GF.Grammar.Predef import GF.Grammar.PrGrammar import GF.Grammar.Grammar @@ -19,7 +20,6 @@ import qualified GF.Infra.Modules as M import qualified GF.Infra.Option as O import GF.Compile.GenerateFCFG (convertConcrete) -import GF.Parsing.FCFG.PInfo (buildFCFPInfo) import GF.Infra.Ident import GF.Infra.Option import GF.Data.Operations @@ -54,7 +54,7 @@ mkCanon2gfcc opts cnc gr = addParsers :: D.GFCC -> D.GFCC addParsers gfcc = gfcc { D.concretes = Map.map conv (D.concretes gfcc) } where - conv cnc = cnc { D.parser = Just (buildFCFPInfo (convertConcrete (D.abstract gfcc) cnc)) } + conv cnc = cnc { D.parser = Just (buildParserInfo (convertConcrete (D.abstract gfcc) cnc)) } -- Generate GFCC from GFCM. -- this assumes a grammar translated by canon2canon diff --git a/src-3.0/GF/GFCC/BuildParser.hs b/src-3.0/GF/GFCC/BuildParser.hs new file mode 100644 index 000000000..a32b6c65d --- /dev/null +++ b/src-3.0/GF/GFCC/BuildParser.hs @@ -0,0 +1,84 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.GFCC.BuildParser where + +import GF.Infra.PrintClass +import GF.Formalism.Utilities +import GF.Data.SortedList +import GF.Data.Assoc +import GF.GFCC.CId +import GF.GFCC.DataGFCC + +import Data.Array +import Data.Maybe +import qualified Data.Map as Map +import qualified Data.Set as Set +import Debug.Trace + + +------------------------------------------------------------ +-- parser information + +getLeftCornerTok (FRule _ _ _ _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymTok tok -> [tok] + _ -> [] + | otherwise = [] + where + syms = lins ! 0 + +getLeftCornerCat (FRule _ _ args _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat _ d -> [args !! d] + _ -> [] + | otherwise = [] + where + syms = lins ! 0 + +buildParserInfo :: FGrammar -> ParserInfo +buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ + ParserInfo { allRules = allrules + , topdownRules = topdownrules + -- , emptyRules = emptyrules + , epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + , grammarToks = grammartoks + , startupCats = startup + } + + where allrules = listArray (0,length grammar-1) grammar + topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules] + epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules, + not (inRange (bounds (lins ! 0)) 0) ] + leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ] + leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ] + grammarcats = aElems topdownrules + grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] + + +---------------------------------------------------------------------- +-- pretty-printing of statistics + +instance Print ParserInfo where + prt pI = "[ allRules=" ++ sl (elems . allRules) ++ + "; tdRules=" ++ sla topdownRules ++ + -- "; emptyRules=" ++ sl emptyRules ++ + "; epsilonRules=" ++ sl epsilonRules ++ + "; lcCats=" ++ sla leftcornerCats ++ + "; lcTokens=" ++ sla leftcornerTokens ++ + "; categories=" ++ sl grammarCats ++ + " ]" + + where sl f = show $ length $ f pI + sla f = let (as, bs) = unzip $ aAssocs $ f pI + in show (length as) ++ "/" ++ show (length (concat bs)) + diff --git a/src-3.0/GF/GFCC/DataGFCC.hs b/src-3.0/GF/GFCC/DataGFCC.hs index 95a1c28ec..a1ca5a02d 100644 --- a/src-3.0/GF/GFCC/DataGFCC.hs +++ b/src-3.0/GF/GFCC/DataGFCC.hs @@ -35,7 +35,7 @@ data Concr = Concr { lindefs :: Map.Map CId Term, -- lin default of a cat printnames :: Map.Map CId Term, -- printname of a cat or a fun paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names - parser :: Maybe FCFPInfo -- parser + parser :: Maybe ParserInfo -- parser } data Type = @@ -100,20 +100,20 @@ data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos type RuleId = Int -data FCFPInfo - = FCFPInfo { allRules :: Array RuleId FRule - , topdownRules :: Assoc FCat [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - -- , emptyRules :: [RuleId] - , epsilonRules :: [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , leftcornerCats :: Assoc FCat [RuleId] - , leftcornerTokens :: Assoc FToken [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , grammarCats :: [FCat] - , grammarToks :: [FToken] - , startupCats :: Map.Map CId [FCat] - } +data ParserInfo + = ParserInfo { allRules :: Array RuleId FRule + , topdownRules :: Assoc FCat [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + -- , emptyRules :: [RuleId] + , epsilonRules :: [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , leftcornerCats :: Assoc FCat [RuleId] + , leftcornerTokens :: Assoc FToken [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: [FCat] + , grammarToks :: [FToken] + , startupCats :: Map.Map CId [FCat] + } fcatString, fcatInt, fcatFloat, fcatVar :: Int diff --git a/src-3.0/GF/GFCC/GFCCtoJS.hs b/src-3.0/GF/GFCC/GFCCtoJS.hs index f0b19ba09..d2d12a776 100644 --- a/src-3.0/GF/GFCC/GFCCtoJS.hs +++ b/src-3.0/GF/GFCC/GFCCtoJS.hs @@ -85,7 +85,7 @@ children :: JS.Ident children = JS.Ident "cs" -- Parser -parser2js :: String -> FCFPInfo -> [JS.Expr] +parser2js :: String -> ParserInfo -> [JS.Expr] parser2js start p = [new "Parser" [JS.EStr start, JS.EArray $ map frule2js (Array.elems (allRules p)), JS.EObj $ map cats (Map.assocs (startupCats p))]] diff --git a/src-3.0/GF/GFCC/Macros.hs b/src-3.0/GF/GFCC/Macros.hs index 85a92523a..0750fb2ff 100644 --- a/src-3.0/GF/GFCC/Macros.hs +++ b/src-3.0/GF/GFCC/Macros.hs @@ -2,10 +2,10 @@ module GF.GFCC.Macros where import GF.GFCC.CId import GF.GFCC.DataGFCC -import GF.Parsing.FCFG.PInfo (fcfPInfoToFGrammar) import GF.Infra.PrintClass import Control.Monad -import qualified Data.Map as Map +import qualified Data.Map as Map +import qualified Data.Array as Array import Data.Maybe import Data.List @@ -31,11 +31,14 @@ lookType :: GFCC -> CId -> Type lookType gfcc f = fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc)) -lookParser :: GFCC -> CId -> Maybe FCFPInfo +lookParser :: GFCC -> CId -> Maybe ParserInfo lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc lookFCFG :: GFCC -> CId -> Maybe FGrammar -lookFCFG gfcc lang = fmap fcfPInfoToFGrammar $ lookParser gfcc lang +lookFCFG gfcc lang = fmap toFGrammar $ lookParser gfcc lang + where + toFGrammar :: ParserInfo -> FGrammar + toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo) lookStartCat :: GFCC -> String lookStartCat gfcc = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) diff --git a/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs b/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs index 73b362888..26e7cb153 100644 --- a/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs +++ b/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs @@ -3,10 +3,10 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where import GF.GFCC.CId import GF.GFCC.DataGFCC import GF.GFCC.Raw.AbsGFCCRaw +import GF.GFCC.BuildParser (buildParserInfo) import GF.Infra.PrintClass import GF.Formalism.Utilities -import GF.Parsing.FCFG.PInfo (buildFCFPInfo) import qualified Data.Array as Array import qualified Data.Map as Map @@ -66,8 +66,8 @@ toConcr = foldl add (Concr { add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts } add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) } -toPInfo :: [RExp] -> FCFPInfo -toPInfo [App "rules" rs, App "startupcats" cs] = buildFCFPInfo (rules, cats) +toPInfo :: [RExp] -> ParserInfo +toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats) where rules = map toFRule rs cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs] @@ -204,7 +204,7 @@ fromTerm e = case e of -- ** Parsing info -fromPInfo :: FCFPInfo -> RExp +fromPInfo :: ParserInfo -> RExp fromPInfo p = App "parser" [ App "rules" [fromFRule rule | rule <- Array.elems (allRules p)], App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)] diff --git a/src-3.0/GF/Parsing/FCFG.hs b/src-3.0/GF/Parsing/FCFG.hs index f0d172f18..050c30f81 100644 --- a/src-3.0/GF/Parsing/FCFG.hs +++ b/src-3.0/GF/Parsing/FCFG.hs @@ -8,7 +8,7 @@ ----------------------------------------------------------------------------- module GF.Parsing.FCFG - (parseFCF,buildFCFPInfo,FCFPInfo(..),makeFinalEdge) where + (parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where import GF.Data.SortedList import GF.Data.Assoc @@ -17,11 +17,11 @@ import GF.Infra.PrintClass import GF.Formalism.Utilities -import qualified GF.Parsing.FCFG.Active as Active -import GF.Parsing.FCFG.PInfo +import GF.Parsing.FCFG.Active -import GF.GFCC.DataGFCC import GF.GFCC.CId +import GF.GFCC.DataGFCC +import GF.GFCC.BuildParser import GF.GFCC.Macros import GF.Data.ErrM @@ -34,7 +34,7 @@ import qualified Data.Map as Map parseFCF :: String -> -- ^ parsing strategy - FCFPInfo -> -- ^ compiled grammar (fcfg) + ParserInfo -> -- ^ compiled grammar (fcfg) CId -> -- ^ starting category [String] -> -- ^ input tokens Err [Exp] -- ^ resulting GF terms @@ -51,8 +51,8 @@ parseFCF strategy pinfo startCat inString = return $ map tree2term trees where parseFCF :: String -> Err (FCFParser) - parseFCF "bottomup" = Ok $ Active.parse "b" - parseFCF "topdown" = Ok $ Active.parse "t" + parseFCF "bottomup" = Ok $ parse "b" + parseFCF "topdown" = Ok $ parse "t" parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat ---------------------------------------------------------------------- diff --git a/src-3.0/GF/Parsing/FCFG/Active.hs b/src-3.0/GF/Parsing/FCFG/Active.hs index 3b389f237..a64d53f1c 100644 --- a/src-3.0/GF/Parsing/FCFG/Active.hs +++ b/src-3.0/GF/Parsing/FCFG/Active.hs @@ -7,7 +7,7 @@ -- MCFG parsing, the active algorithm ----------------------------------------------------------------------------- -module GF.Parsing.FCFG.Active (parse) where +module GF.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where import GF.Data.GeneralDeduction import GF.Data.Assoc @@ -20,8 +20,6 @@ import GF.Formalism.Utilities import GF.Infra.PrintClass -import GF.Parsing.FCFG.PInfo - import Control.Monad (guard) import qualified Data.List as List @@ -32,6 +30,16 @@ import Data.Array ---------------------------------------------------------------------- -- * parsing +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +-- | the list of categories = possible starting categories +type FCFParser = ParserInfo + -> [FCat] + -> Input FToken + -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) + + parse :: String -> FCFParser parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo where chart = process strategy pinfo toks axioms emptyXChart @@ -42,12 +50,12 @@ isBU s = s=="b" isTD s = s=="t" -- used in prediction -emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec +emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) where FRule _ _ rhs _ _ = allRules pinfo ! ruleid -process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat +process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat process strategy pinfo toks [] chart = chart process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart where @@ -129,7 +137,7 @@ insertXChart (XChart actives finals) item@(Final _ _) c = lookupXChartAct (XChart actives finals) c = chartLookup actives c lookupXChartFinal (XChart actives finals) c = chartLookup finals c -xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) +xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) xchart2syntaxchart (XChart actives finals) pinfo = accumAssoc groupSyntaxNodes $ [ case node of @@ -141,7 +149,7 @@ xchart2syntaxchart (XChart actives finals) pinfo = | (cat, Final found node) <- chartAssocs finals ] -literals :: FCFPInfo -> Input FToken -> [(FCat,Item)] +literals :: ParserInfo -> Input FToken -> [(FCat,Item)] literals pinfo toks = [let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] where @@ -157,7 +165,7 @@ literals pinfo toks = -- Earley -- -- called with all starting categories -initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] initialTD pinfo starts toks = do cat <- starts ruleid <- topdownRules pinfo ? cat @@ -167,7 +175,7 @@ initialTD pinfo starts toks = ---------------------------------------------------------------------- -- Kilbury -- -initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)] +initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] initialBU pinfo toks = do (tok,rngs) <- aAssocs (inputToken toks) ruleid <- leftcornerTokens pinfo ? tok diff --git a/src-3.0/GF/Parsing/FCFG/PInfo.hs b/src-3.0/GF/Parsing/FCFG/PInfo.hs deleted file mode 100644 index e151a5ac1..000000000 --- a/src-3.0/GF/Parsing/FCFG/PInfo.hs +++ /dev/null @@ -1,97 +0,0 @@ ---------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing, parser information ------------------------------------------------------------------------------ - -module GF.Parsing.FCFG.PInfo where - -import GF.Infra.PrintClass -import GF.Formalism.Utilities -import GF.Data.SortedList -import GF.Data.Assoc -import GF.GFCC.CId -import GF.GFCC.DataGFCC - -import Data.Array -import Data.Maybe -import qualified Data.Map as Map -import qualified Data.Set as Set -import Debug.Trace - ----------------------------------------------------------------------- --- type declarations - --- | the list of categories = possible starting categories -type FCFParser = FCFPInfo - -> [FCat] - -> Input FToken - -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) - -makeFinalEdge cat 0 0 = (cat, [EmptyRange]) -makeFinalEdge cat i j = (cat, [makeRange i j]) - ------------------------------------------------------------- --- parser information - -getLeftCornerTok (FRule _ _ _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymTok tok -> [tok] - _ -> [] - | otherwise = [] - where - syms = lins ! 0 - -getLeftCornerCat (FRule _ _ args _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat _ d -> [args !! d] - _ -> [] - | otherwise = [] - where - syms = lins ! 0 - -buildFCFPInfo :: FGrammar -> FCFPInfo -buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ - FCFPInfo { allRules = allrules - , topdownRules = topdownrules - -- , emptyRules = emptyrules - , epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarCats = grammarcats - , grammarToks = grammartoks - , startupCats = startup - } - - where allrules = listArray (0,length grammar-1) grammar - topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules] - epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules, - not (inRange (bounds (lins ! 0)) 0) ] - leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ] - leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ] - grammarcats = aElems topdownrules - grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] - -fcfPInfoToFGrammar :: FCFPInfo -> FGrammar -fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo) - ----------------------------------------------------------------------- --- pretty-printing of statistics - -instance Print FCFPInfo where - prt pI = "[ allRules=" ++ sl (elems . allRules) ++ - "; tdRules=" ++ sla topdownRules ++ - -- "; emptyRules=" ++ sl emptyRules ++ - "; epsilonRules=" ++ sl epsilonRules ++ - "; lcCats=" ++ sla leftcornerCats ++ - "; lcTokens=" ++ sla leftcornerTokens ++ - "; categories=" ++ sl grammarCats ++ - " ]" - - where sl f = show $ length $ f pI - sla f = let (as, bs) = unzip $ aAssocs $ f pI - in show (length as) ++ "/" ++ show (length (concat bs)) - -- cgit v1.2.3