summaryrefslogtreecommitdiff
path: root/src/GF/GFCC
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2008-01-03 17:10:05 +0000
committerbringert <bringert@cs.chalmers.se>2008-01-03 17:10:05 +0000
commitaf1a3a2473747942dcec647a42e5724fcb21d1b9 (patch)
treee08020dce28ad35a5434328e6483ec695693fc20 /src/GF/GFCC
parent43ddb41d314e7d547fa8f8bb1cd23397dfa30f65 (diff)
Store FCFPInfo (all information needed for FCFG parsing) in GFCC files, and in the internal DataGFCC.GFCC structure. The parsing information format is still in flux.
Diffstat (limited to 'src/GF/GFCC')
-rw-r--r--src/GF/GFCC/API.hs12
-rw-r--r--src/GF/GFCC/DataGFCC.hs6
-rw-r--r--src/GF/GFCC/Macros.hs8
-rw-r--r--src/GF/GFCC/Raw/ConvertGFCC.hs181
4 files changed, 175 insertions, 32 deletions
diff --git a/src/GF/GFCC/API.hs b/src/GF/GFCC/API.hs
index 111857b18..0a3b37cc5 100644
--- a/src/GF/GFCC/API.hs
+++ b/src/GF/GFCC/API.hs
@@ -27,7 +27,6 @@ import GF.Command.PPrTree
import GF.Data.ErrM
import GF.Parsing.FCFG
-import GF.Conversion.SimpleToFCFG (convertGrammar)
--import GF.Data.Operations
--import GF.Infra.UseIO
@@ -44,7 +43,7 @@ import System.Directory (doesFileExist)
-- Interface
---------------------------------------------------
-data MultiGrammar = MultiGrammar {gfcc :: GFCC, parsers :: [(Language,FCFPInfo)]}
+data MultiGrammar = MultiGrammar {gfcc :: GFCC}
type Language = String
type Category = String
type Tree = Exp
@@ -77,10 +76,7 @@ startCat :: MultiGrammar -> Category
file2grammar f = do
gfcc <- file2gfcc f
- return (MultiGrammar gfcc (gfcc2parsers gfcc))
-
-gfcc2parsers gfcc =
- [(lang, buildFCFPInfo fcfg) | (CId lang,fcfg) <- convertGrammar gfcc]
+ return (MultiGrammar gfcc)
file2gfcc f = do
s <- readFileIf f
@@ -90,7 +86,7 @@ file2gfcc f = do
linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (CId lang)
parse mgr lang cat s =
- case lookup lang (parsers mgr) of
+ case lookParser (gfcc mgr) (CId lang) of
Nothing -> error "no parser"
Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of
Ok x -> x
@@ -126,7 +122,7 @@ categories mgr = [c | CId c <- Map.keys (cats (abstract (gfcc mgr)))]
startCat mgr = "S" ----
-emptyMultiGrammar = MultiGrammar emptyGFCC []
+emptyMultiGrammar = MultiGrammar emptyGFCC
------------ for internal use only
diff --git a/src/GF/GFCC/DataGFCC.hs b/src/GF/GFCC/DataGFCC.hs
index 35c8b08c5..89ab28170 100644
--- a/src/GF/GFCC/DataGFCC.hs
+++ b/src/GF/GFCC/DataGFCC.hs
@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
import GF.GFCC.CId
import GF.Infra.CompactPrint
import GF.Text.UTF8
+import GF.Formalism.FCFG
+import GF.Parsing.FCFG.PInfo
import Data.Map
import Data.List
@@ -31,7 +33,8 @@ data Concr = Concr {
lincats :: Map CId Term, -- lin type of a cat
lindefs :: Map CId Term, -- lin default of a cat
printnames :: Map CId Term, -- printname of a cat or a fun
- paramlincats :: Map CId Term -- lin type of cat, with printable param names
+ paramlincats :: Map CId Term, -- lin type of cat, with printable param names
+ parser :: Maybe FCFPInfo -- parser
}
data Type =
@@ -116,7 +119,6 @@ emptyGFCC = GFCC {
concretes = empty
}
-
-- default map and filter are for Map here
lmap = Prelude.map
lfilter = Prelude.filter
diff --git a/src/GF/GFCC/Macros.hs b/src/GF/GFCC/Macros.hs
index 3e88952d4..383b77d34 100644
--- a/src/GF/GFCC/Macros.hs
+++ b/src/GF/GFCC/Macros.hs
@@ -2,6 +2,8 @@ module GF.GFCC.Macros where
import GF.GFCC.CId
import GF.GFCC.DataGFCC
+import GF.Formalism.FCFG (FGrammar)
+import GF.Parsing.FCFG.PInfo (FCFPInfo, fcfPInfoToFGrammar)
----import GF.GFCC.PrintGFCC
import Data.Map
import Data.List
@@ -28,6 +30,12 @@ lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
+lookParser :: GFCC -> CId -> Maybe FCFPInfo
+lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc
+
+lookFCFG :: GFCC -> CId -> Maybe FGrammar
+lookFCFG gfcc lang = fmap fcfPInfoToFGrammar $ lookParser gfcc lang
+
lookGlobalFlag :: GFCC -> CId -> String
lookGlobalFlag gfcc f =
lookMap "?" f (gflags gfcc)
diff --git a/src/GF/GFCC/Raw/ConvertGFCC.hs b/src/GF/GFCC/Raw/ConvertGFCC.hs
index 325d6ea6d..0636cf5e1 100644
--- a/src/GF/GFCC/Raw/ConvertGFCC.hs
+++ b/src/GF/GFCC/Raw/ConvertGFCC.hs
@@ -3,8 +3,15 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where
import GF.GFCC.DataGFCC
import GF.GFCC.Raw.AbsGFCCRaw
+import GF.Data.Assoc
+import GF.Formalism.FCFG
+import GF.Formalism.Utilities (NameProfile(..), Profile(..), SyntaxForest(..))
+import GF.Parsing.FCFG.PInfo (FCFPInfo(..))
+
+import qualified Data.Array as Array
import Data.Map
+
-- convert parsed grammar to internal GFCC
toGFCC :: Grammar -> GFCC
@@ -31,29 +38,88 @@ toGFCC (Grm [
catfuns = fromAscList
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
in Abstr aflags funs cats catfuns,
- concretes = fromAscList (lmap mkCnc ccs)
+ concretes = fromAscList [(lang, toConcr ts) | App lang ts <- ccs]
}
where
- mkCnc (
- App lang [
- App (CId "flags") fls,
- App (CId "lin") ls,
- App (CId "oper") ops,
- App (CId "lincat") lincs,
- App (CId "lindef") linds,
- App (CId "printname") prns,
- App (CId "param") params
- ]) = (lang,
- Concr {
- cflags = fromAscList [(f,v) | App f [AStr v] <- fls],
- lins = fromAscList [(f,toTerm v) | App f [v] <- ls],
- opers = fromAscList [(f,toTerm v) | App f [v] <- ops],
- lincats = fromAscList [(f,toTerm v) | App f [v] <- lincs],
- lindefs = fromAscList [(f,toTerm v) | App f [v] <- linds],
- printnames = fromAscList [(f,toTerm v) | App f [v] <- prns],
- paramlincats = fromAscList [(f,toTerm v) | App f [v] <- params]
- }
- )
+
+toConcr :: [RExp] -> Concr
+toConcr = foldl add (Concr {
+ cflags = empty,
+ lins = empty,
+ opers = empty,
+ lincats = empty,
+ lindefs = empty,
+ printnames = empty,
+ paramlincats = empty,
+ parser = Nothing
+ })
+ where
+ add :: Concr -> RExp -> Concr
+ add cnc (App (CId "flags") ts) = cnc { cflags = fromAscList [(f,v) | App f [AStr v] <- ts] }
+ add cnc (App (CId "lin") ts) = cnc { lins = mkTermMap ts }
+ add cnc (App (CId "oper") ts) = cnc { opers = mkTermMap ts }
+ add cnc (App (CId "lincat") ts) = cnc { lincats = mkTermMap ts }
+ add cnc (App (CId "lindef") ts) = cnc { lindefs = mkTermMap ts }
+ add cnc (App (CId "printname") ts) = cnc { printnames = mkTermMap ts }
+ add cnc (App (CId "param") ts) = cnc { paramlincats = mkTermMap ts }
+ add cnc (App (CId "parser") ts) = cnc { parser = Just (toPInfo ts) }
+
+toPInfo :: [RExp] -> FCFPInfo
+toPInfo = foldl add (FCFPInfo {
+ allRules = error "FCFPInfo.allRules",
+ topdownRules = error "FCFPInfo.topdownRules",
+ epsilonRules = error "FCFPInfo.epsilonRules",
+ leftcornerCats = error "FCFPInfo.leftcornerCats",
+ leftcornerTokens = error "FCFPInfo.leftcornerTokens",
+ grammarCats = error "FCFPInfo.grammarCats",
+ grammarToks = error "FCFPInfo.grammarToks",
+ startupCats = error "FCFPInfo.startupCats"})
+ where
+ add :: FCFPInfo -> RExp -> FCFPInfo
+ add p (App (CId f) ts) =
+ case f of
+ "rules" -> p { allRules = mkArray (lmap toFRule ts) }
+ "topdownrules" -> p { topdownRules = toAssoc expToInt (lmap expToInt) ts }
+ "epsilonrules" -> p { epsilonRules = lmap expToInt ts }
+ "lccats" -> p { leftcornerCats = toAssoc expToInt (lmap expToInt) ts }
+ "lctoks" -> p { leftcornerTokens = toAssoc expToStr (lmap expToInt) ts }
+ "cats" -> p { grammarCats = lmap expToInt ts }
+ "toks" -> p { grammarToks = lmap expToStr ts }
+ "startupcats" -> p { startupCats = fromList [(c, lmap expToInt cs) | App c cs <- ts] }
+ toFRule :: RExp -> FRule
+ toFRule (App (CId "rule")
+ [n,
+ App (CId "cats") (rt:at),
+ App (CId "R") ls]) = FRule name args res lins
+ where
+ name = toFName n
+ args = lmap expToInt at
+ res = expToInt rt
+ lins = mkArray [mkArray [toSymbol s | s <- l] | App (CId "S") l <- ls]
+
+toFName :: RExp -> FName
+toFName (App (CId "_A") [x]) = Name (CId "_") [Unify [expToInt x]]
+toFName (App f ts) = Name f (lmap toProfile ts)
+ where
+ toProfile :: RExp -> Profile (SyntaxForest CId)
+ toProfile AMet = Unify []
+ toProfile (App (CId "_A") [t]) = Unify [expToInt t]
+ toProfile (App (CId "_U") ts) = Unify [expToInt t | App (CId "_A") [t] <- ts]
+ toProfile t = Constant (toSyntaxForest t)
+
+ toSyntaxForest :: RExp -> SyntaxForest CId
+ toSyntaxForest AMet = FMeta
+ toSyntaxForest (App n ts) = FNode n [lmap toSyntaxForest ts]
+ toSyntaxForest (AStr s) = FString s
+ toSyntaxForest (AInt i) = FInt i
+ toSyntaxForest (AFlt f) = FFloat f
+
+toSymbol :: RExp -> FSymbol
+toSymbol (App (CId "proj") [c,n,l]) = FSymCat (expToInt c) (expToInt l) (expToInt n)
+toSymbol (AStr t) = FSymTok t
+
+toAssoc :: Ord a => (RExp -> a) -> ([RExp] -> b) -> [RExp] -> Assoc a b
+toAssoc f g xs = listAssoc [(f k, g v) | App (CId "map") (k:v) <- xs]
toType :: RExp -> Type
toType e = case e of
@@ -120,7 +186,7 @@ fromGFCC gfcc0 = Grm [
app "lindef" [App f [fromTerm v] | (f,v) <- toList (lindefs cnc)],
app "printname" [App f [fromTerm v] | (f,v) <- toList (printnames cnc)],
app "param" [App f [fromTerm v] | (f,v) <- toList (paramlincats cnc)]
- ]
+ ] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
fromType :: Type -> RExp
fromType e = case e of
@@ -163,3 +229,74 @@ fromTerm e = case e of
where
app = App . CId
str v = app "S" (lmap AStr v)
+
+-- ** Parsing info
+
+fromPInfo :: FCFPInfo -> RExp
+fromPInfo p = app "parser" [
+ app "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
+ app "topdownrules" (fromAssoc intToExp (lmap intToExp) (topdownRules p)),
+ app "epsilonrules" (lmap intToExp (epsilonRules p)),
+ app "lccats" (fromAssoc intToExp (lmap intToExp) (leftcornerCats p)),
+ app "lctoks" (fromAssoc AStr (lmap intToExp) (leftcornerTokens p)),
+ app "cats" (lmap intToExp (grammarCats p)),
+ app "toks" (lmap AStr (grammarToks p)),
+ app "startupcats" [App f (lmap intToExp cs) | (f,cs) <- toList (startupCats p)]
+ ]
+
+fromAssoc :: Ord a => (a -> RExp) -> (b -> [RExp]) -> Assoc a b -> [RExp]
+fromAssoc f g xs = [app "map" (f x:g y) | (x,y) <- aAssocs xs]
+
+fromFRule :: FRule -> RExp
+fromFRule (FRule n args res lins) =
+ app "rule" [fromFName n,
+ app "cats" (intToExp res:lmap intToExp args),
+ app "R" [app "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
+ ]
+
+fromFName :: FName -> RExp
+fromFName n = case n of
+ Name (CId "_") [p] -> fromProfile p
+ Name f ps -> App f (lmap fromProfile ps)
+ where
+ fromProfile :: Profile (SyntaxForest CId) -> RExp
+ fromProfile (Unify []) = AMet
+ fromProfile (Unify [x]) = daughter x
+ fromProfile (Unify args) = app "_U" (lmap daughter args)
+ fromProfile (Constant forest) = fromSyntaxForest forest
+
+ daughter n = app "_A" [intToExp n]
+
+ fromSyntaxForest :: SyntaxForest CId -> RExp
+ fromSyntaxForest FMeta = AMet
+ -- FIXME: is there always just one element here?
+ fromSyntaxForest (FNode n [args]) = App n (lmap fromSyntaxForest args)
+ fromSyntaxForest (FString s) = AStr s
+ fromSyntaxForest (FInt i) = AInt i
+ fromSyntaxForest (FFloat f) = AFlt f
+
+fromSymbol :: FSymbol -> RExp
+fromSymbol (FSymCat c l n) = app "proj" [intToExp c, intToExp n, intToExp l]
+fromSymbol (FSymTok t) = AStr t
+
+-- ** Utilities
+
+mkTermMap :: [RExp] -> Map CId Term
+mkTermMap ts = fromAscList [(f,toTerm v) | App f [v] <- ts]
+
+app :: String -> [RExp] -> RExp
+app = App . CId
+
+mkArray :: [a] -> Array.Array Int a
+mkArray xs = Array.listArray (0, length xs - 1) xs
+
+expToInt :: Integral a => RExp -> a
+expToInt (App (CId "neg") [AInt i]) = fromIntegral (negate i)
+expToInt (AInt i) = fromIntegral i
+
+expToStr :: RExp -> String
+expToStr (AStr s) = s
+
+intToExp :: Integral a => a -> RExp
+intToExp x | x < 0 = App (CId "neg") [AInt (fromIntegral (negate x))]
+ | otherwise = AInt (fromIntegral x)