summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Importing.hs34
-rw-r--r--src/compiler/GF/Compile/CFGtoPGF.hs58
-rw-r--r--src/compiler/GF/Compile/GetGrammar.hs38
-rw-r--r--src/compiler/GF/Grammar/CF.hs143
-rw-r--r--src/compiler/GF/Grammar/CFG.hs (renamed from src/compiler/GF/Speech/CFG.hs)18
-rw-r--r--src/compiler/GF/Grammar/EBNF.hs232
-rw-r--r--src/compiler/GF/Grammar/Lexer.x5
-rw-r--r--src/compiler/GF/Grammar/Parser.y77
-rw-r--r--src/compiler/GF/Speech/CFGToFA.hs2
-rw-r--r--src/compiler/GF/Speech/GSL.hs2
-rw-r--r--src/compiler/GF/Speech/JSGF.hs2
-rw-r--r--src/compiler/GF/Speech/PGFToCFG.hs2
-rw-r--r--src/compiler/GF/Speech/PrRegExp.hs2
-rw-r--r--src/compiler/GF/Speech/SISR.hs2
-rw-r--r--src/compiler/GF/Speech/SLF.hs2
-rw-r--r--src/compiler/GF/Speech/SRG.hs2
-rw-r--r--src/compiler/GF/Speech/SRGS_ABNF.hs2
-rw-r--r--src/compiler/GF/Speech/SRGS_XML.hs2
-rw-r--r--src/compiler/GFC.hs26
19 files changed, 237 insertions, 414 deletions
diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs
index 2bdc091f8..78c019bd4 100644
--- a/src/compiler/GF/Command/Importing.hs
+++ b/src/compiler/GF/Command/Importing.hs
@@ -5,23 +5,25 @@ import PGF.Data
import GF.Compile
import GF.Compile.Multi (readMulti)
+import GF.Compile.GetGrammar (getCFRules, getEBNFRules)
import GF.Grammar (identS, SourceGrammar) -- for cc command
-import GF.Grammar.CF
+import GF.Grammar.CFG
import GF.Grammar.EBNF
+import GF.Compile.CFGtoPGF
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Data.ErrM
---import Data.List (nubBy)
import System.FilePath
+import qualified Data.Set as Set
-- import a grammar in an environment where it extends an existing grammar
importGrammar :: PGF -> Options -> [FilePath] -> IO PGF
importGrammar pgf0 _ [] = return pgf0
importGrammar pgf0 opts files =
case takeExtensions (last files) of
- ".cf" -> importCF opts files getCF
- ".ebnf" -> importCF opts files getEBNF
+ ".cf" -> importCF opts files getCFRules id
+ ".ebnf" -> importCF opts files getEBNFRules ebnf2cf
".gfm" -> do
ascss <- mapM readMulti files
let cs = concatMap snd ascss
@@ -52,13 +54,17 @@ importSource src0 opts files = do
return src0
-- for different cf formats
-importCF opts files get = do
- s <- fmap unlines $ mapM readFile files
- gf <- case get (last files) s of
- Ok gf -> return gf
- Bad s -> error s ----
- Ok gr <- appIOE $ compileSourceGrammar opts gf
- epgf <- appIOE $ link opts (identS (justModuleName (last files) ++ "Abs"), (), gr)
- case epgf of
- Ok pgf -> return pgf
- Bad s -> error s ----
+importCF opts files get convert = do
+ res <- appIOE impCF
+ case res of
+ Ok pgf -> return pgf
+ Bad s -> error s
+ where
+ impCF = do
+ rules <- fmap (convert . concat) $ mapM (get opts) files
+ startCat <- case rules of
+ (CFRule cat _ _ : _) -> return cat
+ _ -> fail "empty CFG"
+ let gf = cf2gf (last files) (uniqueFuns (mkCFG startCat Set.empty rules))
+ gr <- compileSourceGrammar opts gf
+ link opts (identS (justModuleName (last files) ++ "Abs"), (), gr)
diff --git a/src/compiler/GF/Compile/CFGtoPGF.hs b/src/compiler/GF/Compile/CFGtoPGF.hs
new file mode 100644
index 000000000..b42c0fbc4
--- /dev/null
+++ b/src/compiler/GF/Compile/CFGtoPGF.hs
@@ -0,0 +1,58 @@
+module GF.Compile.CFGtoPGF (cf2gf) where
+
+import GF.Grammar.Grammar hiding (Cat)
+import GF.Grammar.Macros
+import GF.Grammar.CFG
+import GF.Infra.Ident(Ident,identS)
+import GF.Infra.Option
+import GF.Infra.UseIO
+
+import GF.Data.Operations
+
+import PGF(showCId)
+
+import qualified Data.Set as Set
+import qualified Data.Map as Map
+
+
+--------------------------
+-- the compiler ----------
+--------------------------
+
+cf2gf :: FilePath -> CFG -> SourceGrammar
+cf2gf fpath cf = mGrammar [
+ (aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs),
+ (cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc)
+ ]
+ where
+ name = justModuleName fpath
+ (abs,cnc,cat) = cf2grammar cf
+ aname = identS $ name ++ "Abs"
+ cname = identS name
+
+
+cf2grammar :: CFG -> (BinTree Ident Info, BinTree Ident Info, String)
+cf2grammar cfg = (buildTree abs, buildTree conc, cfgStartCat cfg) where
+ abs = cats ++ funs
+ conc = lincats ++ lins
+ cats = [(identS cat, AbsCat (Just (L NoLoc []))) | cat <- Map.keys (cfgRules cfg)]
+ lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats]
+ (funs,lins) = unzip (map cf2rule (concatMap Set.toList (Map.elems (cfgRules cfg))))
+
+cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
+cf2rule (CFRule cat items (CFObj fun _)) = (def,ldef) where
+ f = identS (showCId fun)
+ def = (f, AbsFun (Just (L NoLoc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True))
+ args0 = zip (map (identS . ("x" ++) . show) [0..]) items
+ args = [((Explicit,v), Cn (identS c)) | (v, NonTerminal c) <- args0]
+ args' = [(Explicit,identS "_", Cn (identS c)) | (_, NonTerminal c) <- args0]
+ ldef = (f, CncFun
+ Nothing
+ (Just (L NoLoc (mkAbs (map fst args)
+ (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))))
+ Nothing
+ Nothing)
+ mkIt (v, NonTerminal _) = P (Vr v) theLinLabel
+ mkIt (_, Terminal a) = K a
+ foldconcat [] = K ""
+ foldconcat tt = foldr1 C tt
diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs
index 6393d51d2..4647cfcb4 100644
--- a/src/compiler/GF/Compile/GetGrammar.hs
+++ b/src/compiler/GF/Compile/GetGrammar.hs
@@ -12,27 +12,25 @@
-- this module builds the internal GF grammar that is sent to the type checker
-----------------------------------------------------------------------------
-module GF.Compile.GetGrammar (getSourceModule) where
+module GF.Compile.GetGrammar (getSourceModule, getCFRules, getEBNFRules) where
import Prelude hiding (catch)
import GF.Data.Operations
---import GF.System.Catch
import GF.Infra.UseIO
import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
import GF.Grammar.Lexer
import GF.Grammar.Parser
import GF.Grammar.Grammar
---import GF.Compile.Coding
+import GF.Grammar.CFG
+import GF.Grammar.EBNF
import GF.Compile.ReadFiles(parseSource,lift)
---import GF.Text.Coding(decodeUnicodeIO)
import qualified Data.ByteString.Char8 as BS
import Data.Char(isAscii)
import Control.Monad (foldM,when,unless)
import System.Cmd (system)
---import System.IO(mkTextEncoding) --,utf8
import System.Directory(removeFile,getCurrentDirectory)
import System.FilePath(makeRelative)
@@ -64,17 +62,25 @@ getSourceModule opts file0 =
--lift $ transcodeModule' (i,mi) -- old lexer
return (i,mi) -- new lexer
-{-
-transcodeModule sm00 =
- do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
- let sm = decodeStringsInModule enc sm00
- return sm
-
-transcodeModule' sm00 =
- do let enc = utf8
- let sm = decodeStringsInModule enc sm00
- return sm
--}
+getCFRules :: Options -> FilePath -> IOE [CFRule]
+getCFRules opts fpath = do
+ raw <- liftIO (BS.readFile fpath)
+ (optCoding,parsed) <- parseSource opts pCFRules raw
+ case parsed of
+ Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory
+ let location = makeRelative cwd fpath++":"++show l++":"++show c
+ raise (location++":\n "++msg)
+ Right rules -> return rules
+
+getEBNFRules :: Options -> FilePath -> IOE [ERule]
+getEBNFRules opts fpath = do
+ raw <- liftIO (BS.readFile fpath)
+ (optCoding,parsed) <- parseSource opts pEBNFRules raw
+ case parsed of
+ Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory
+ let location = makeRelative cwd fpath++":"++show l++":"++show c
+ raise (location++":\n "++msg)
+ Right rules -> return rules
runPreprocessor :: Temporary -> String -> IO Temporary
runPreprocessor tmp0 p =
diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs
deleted file mode 100644
index a48238e42..000000000
--- a/src/compiler/GF/Grammar/CF.hs
+++ /dev/null
@@ -1,143 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : CF
--- Maintainer : AR
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/11/15 17:56:13 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.13 $
---
--- parsing CF grammars and converting them to GF
------------------------------------------------------------------------------
-
-module GF.Grammar.CF (getCF,CFItem,CFCat,CFFun,cf2gf,CFRule) where
-
-import GF.Grammar.Grammar
-import GF.Grammar.Macros
-import GF.Infra.Ident(Ident,identS)
-import GF.Infra.Option
-import GF.Infra.UseIO
-
-import GF.Data.Operations
-import GF.Data.Utilities (nub')
-
-import qualified Data.Set as S
-import Data.Char
-import Data.List
---import System.FilePath
-
-getCF :: ErrorMonad m => FilePath -> String -> m SourceGrammar
-getCF fpath = fmap (cf2gf fpath . uniqueFuns) . pCF
-
----------------------
--- the parser -------
----------------------
-
-pCF :: ErrorMonad m => String -> m CF
-pCF s = do
- rules <- mapM getCFRule $ filter isRule $ lines s
- return $ concat rules
- where
- isRule line = case dropWhile isSpace line of
- '-':'-':_ -> False
- _ -> not $ all isSpace line
-
--- rules have an amazingly easy parser, if we use the format
--- fun. C -> item1 item2 ... where unquoted items are treated as cats
--- Actually would be nice to add profiles to this.
-
-getCFRule :: ErrorMonad m => String -> m [CFRule]
-getCFRule s = getcf (wrds s) where
- getcf ws = case ws of
- fun : cat : a : its | isArrow a ->
- return [L NoLoc (init fun, (cat, map mkIt its))]
- cat : a : its | isArrow a ->
- return [L NoLoc (mkFun cat it, (cat, map mkIt it)) | it <- chunk its]
- _ -> raise (" invalid rule:" +++ s)
- isArrow a = elem a ["->", "::="]
- mkIt w = case w of
- ('"':w@(_:_)) -> Right (init w)
- _ -> Left w
- chunk its = case its of
- [] -> [[]]
- _ -> chunks "|" its
- mkFun cat its = case its of
- [] -> cat ++ "_"
- _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style
- clean = filter isAlphaNum -- to form valid identifiers
- wrds = takeWhile (/= ";") . words -- to permit semicolon in the end
-
-type CF = [CFRule]
-
-type CFRule = L (CFFun, (CFCat, [CFItem]))
-
-type CFItem = Either CFCat String
-
-type CFCat = String
-type CFFun = String
-
-
---------------------------------
--- make function names unique --
---------------------------------
-
-uniqueFuns :: CF -> CF
-uniqueFuns = snd . mapAccumL uniqueFun S.empty
- where
- uniqueFun funs (L l (fun,rule)) = (S.insert fun' funs,L l (fun',rule))
- where
- fun' = head [fun'|suffix<-"":map show ([2..]::[Int]),
- let fun'=fun++suffix,
- not (fun' `S.member` funs)]
-
-
---------------------------
--- the compiler ----------
---------------------------
-
-cf2gf :: FilePath -> CF -> SourceGrammar
-cf2gf fpath cf = mGrammar [
- (aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs),
- (cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc)
- ]
- where
- name = justModuleName fpath
- (abs,cnc,cat) = cf2grammar cf
- aname = identS $ name ++ "Abs"
- cname = identS name
-
-
-cf2grammar :: CF -> (BinTree Ident Info, BinTree Ident Info, String)
-cf2grammar rules = (buildTree abs, buildTree conc, cat) where
- abs = cats ++ funs
- conc = lincats ++ lins
- cat = case rules of
- (L _ (_,(c,_))):_ -> c -- the value category of the first rule
- _ -> error "empty CF"
- cats = [(cat, AbsCat (Just (L NoLoc []))) |
- cat <- nub' (concat (map cf2cat rules))] ----notPredef cat
- lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats]
- (funs,lins) = unzip (map cf2rule rules)
-
-cf2cat :: CFRule -> [Ident]
-cf2cat (L loc (_,(cat, items))) = map identS $ cat : [c | Left c <- items]
-
-cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
-cf2rule (L loc (fun, (cat, items))) = (def,ldef) where
- f = identS fun
- def = (f, AbsFun (Just (L loc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True))
- args0 = zip (map (identS . ("x" ++) . show) [0..]) items
- args = [((Explicit,v), Cn (identS c)) | (v, Left c) <- args0]
- args' = [(Explicit,identS "_", Cn (identS c)) | (_, Left c) <- args0]
- ldef = (f, CncFun
- Nothing
- (Just (L loc (mkAbs (map fst args)
- (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))))
- Nothing
- Nothing)
- mkIt (v, Left _) = P (Vr v) theLinLabel
- mkIt (_, Right a) = K a
- foldconcat [] = K ""
- foldconcat tt = foldr1 C tt
diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Grammar/CFG.hs
index 1a252139e..93bce2aad 100644
--- a/src/compiler/GF/Speech/CFG.hs
+++ b/src/compiler/GF/Grammar/CFG.hs
@@ -4,7 +4,7 @@
--
-- Context-free grammar representation and manipulation.
----------------------------------------------------------------------
-module GF.Speech.CFG where
+module GF.Grammar.CFG where
import GF.Data.Utilities
import PGF
@@ -53,6 +53,7 @@ data CFG = CFG { cfgStartCat :: Cat,
cfgRules :: Map Cat (Set CFRule) }
deriving (Eq, Ord, Show)
+
--
-- * Grammar filtering
--
@@ -222,6 +223,21 @@ mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules
groupProds :: [CFRule] -> Map Cat (Set CFRule)
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
+uniqueFuns :: CFG -> CFG
+uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg
+ ,cfgExternalCats = cfgExternalCats cfg
+ ,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg))))
+ }
+ where
+ uniqueFunSet funs (cat,rules) =
+ let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules)
+ in (funs',(cat,Set.fromList rules'))
+ uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args))
+ where
+ fun' = head [fun'|suffix<-"":map show ([2..]::[Int]),
+ let fun'=mkCId (showCId fun++suffix),
+ not (fun' `Set.member` funs)]
+
-- | Gets all rules in a CFG.
allRules :: CFG -> [CFRule]
allRules = concat . map Set.toList . Map.elems . cfgRules
diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs
index b1854da54..50a5ff90a 100644
--- a/src/compiler/GF/Grammar/EBNF.hs
+++ b/src/compiler/GF/Grammar/EBNF.hs
@@ -12,34 +12,19 @@
-- (Description of the module)
-----------------------------------------------------------------------------
-module GF.Grammar.EBNF (getEBNF) where
+module GF.Grammar.EBNF (EBNF, ERule, ERHS(..), ebnf2cf) where
import GF.Data.Operations
---import GF.Infra.Comments
-import GF.Grammar.CF
---import GF.CF.CFIdent
-import GF.Grammar.Grammar
---import GF.Grammar.PrGrammar
---import qualified GF.Source.AbsGF as A
+import GF.Grammar.CFG
+import PGF (mkCId)
-import Data.Char
import Data.List
---import System.FilePath
-
-
-
--- AR 18/4/2000 - 31/3/2004
-
-getEBNF :: FilePath -> String -> Err SourceGrammar
-getEBNF fpath = fmap (cf2gf fpath . ebnf2cf) . pEBNF
type EBNF = [ERule]
type ERule = (ECat, ERHS)
type ECat = (String,[Int])
type ETok = String
-ebnfID = "EBNF" ---- make this parametric!
-
data ERHS =
ETerm ETok
| ENonTerm ECat
@@ -50,13 +35,14 @@ data ERHS =
| EOpt ERHS
| EEmpty
-type CFRHS = [CFItem]
-type CFJustRule = (CFCat, CFRHS)
+type CFRHS = [CFSymbol]
+type CFJustRule = (Cat, CFRHS)
ebnf2cf :: EBNF -> [CFRule]
ebnf2cf ebnf =
- [L NoLoc (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
- mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i)
+ [CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)]
+ where
+ mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) []
normEBNF :: EBNF -> [CFJustRule]
normEBNF erules = let
@@ -115,13 +101,13 @@ substERules g (cat,itss) = (cat, map sub itss) where
sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
-eitem2cfitem :: EItem -> CFItem
+eitem2cfitem :: EItem -> CFSymbol
eitem2cfitem it = case it of
- EITerm a -> Right a
- EINonTerm cat -> Left (mkCFCatE cat)
- EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star"))
- EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus"))
- EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt"))
+ EITerm a -> Terminal a
+ EINonTerm cat -> NonTerminal (mkCFCatE cat)
+ EIStar (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Star"))
+ EIPlus (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Plus"))
+ EIOpt (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Opt"))
type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
@@ -157,198 +143,10 @@ mkECat ints = ("C", ints)
prECat (c,[]) = c
prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
-mkCFCatE :: ECat -> CFCat
+mkCFCatE :: ECat -> Cat
mkCFCatE = prECat
updECat _ (c,[]) = (c,[])
updECat ii (c,_) = (c,ii)
mkNewECat (c,ii) str = (c ++ str,ii)
-
------- parser for EBNF grammars
-
-pEBNF :: String -> Err EBNF
-pEBNF = parseResultErr (longestOfMany (pJ pERule))
-
-pERule :: Parser Char ERule
-pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
-
-pERHS :: Int -> Parser Char ERHS
-pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
-pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
-pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
-pERHS 3 = pQuotedString *** ETerm
- ||| pECat *** ENonTerm ||| pParenth (pERHS 0)
-
-pUnaryEOp :: Parser Char (ERHS -> ERHS)
-pUnaryEOp =
- lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
-
-pECat = pIdent *** (\c -> (c,[]))
-
-
-
-----------------------------------------------------------------------
--- Module : Parsers
--- some parser combinators a la Wadler and Hutton.
--- (only used in module "EBNF")
------------------------------------------------------------------------------
-
-infixr 2 |||, +||
-infixr 3 ***
-infixr 5 .>.
-infixr 5 ...
-infixr 5 ....
-infixr 5 +..
-infixr 5 ..+
-infixr 6 |>
-infixr 3 <<<
-
-
-type Parser a b = [a] -> [(b,[a])]
-
-parseResults :: Parser a b -> [a] -> [b]
-parseResults p s = [x | (x,r) <- p s, null r]
-
-parseResultErr :: Show a => Parser a b -> [a] -> Err b
-parseResultErr p s = case parseResults p s of
- [x] -> return x
- [] -> case
- maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of
- r -> Bad $ "\nno parse; reached" ++++ take 300 (show r)
- _ -> Bad "ambiguous"
-
-(...) :: Parser a b -> Parser a c -> Parser a (b,c)
-(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t]
-
-(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c
-(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t]
-
-(|||) :: Parser a b -> Parser a b -> Parser a b
-(p ||| q) s = p s ++ q s
-
-(+||) :: Parser a b -> Parser a b -> Parser a b
-p1 +|| p2 = take 1 . (p1 ||| p2)
-
-literal :: (Eq a) => a -> Parser a a
-literal x (c:cs) = [(x,cs) | x == c]
-literal _ _ = []
-
-(***) :: Parser a b -> (b -> c) -> Parser a c
-(p *** f) s = [(f x,r) | (x,r) <- p s]
-
-succeed :: b -> Parser a b
-succeed v s = [(v,s)]
-
-fails :: Parser a b
-fails s = []
-
-(+..) :: Parser a b -> Parser a c -> Parser a c
-p1 +.. p2 = p1 ... p2 *** snd
-
-(..+) :: Parser a b -> Parser a c -> Parser a b
-p1 ..+ p2 = p1 ... p2 *** fst
-
-(<<<) :: Parser a b -> c -> Parser a c -- return
-p <<< v = p *** (\x -> v)
-
-(|>) :: Parser a b -> (b -> Bool) -> Parser a b
-p |> b = p .>. (\x -> if b x then succeed x else fails)
-
-many :: Parser a b -> Parser a [b]
-many p = (p ... many p *** uncurry (:)) +|| succeed []
-
-some :: Parser a b -> Parser a [b]
-some p = (p ... many p) *** uncurry (:)
-
-longestOfMany :: Parser a b -> Parser a [b]
-longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed []
-
-closure :: (b -> Parser a b) -> (b -> Parser a b)
-closure p v = p v .>. closure p ||| succeed v
-
-pJunk :: Parser Char String
-pJunk = longestOfMany (satisfy (\x -> elem x "\n\t "))
-
-pJ :: Parser Char a -> Parser Char a
-pJ p = pJunk +.. p ..+ pJunk
-
-pTList :: String -> Parser Char a -> Parser Char [a]
-pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999
-
-pTJList :: String -> String -> Parser Char a -> Parser Char [a]
-pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:))
-
-pElem :: [String] -> Parser Char String
-pElem l = foldr (+||) fails (map literals l)
-
-(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c)
-p1 .... p2 = p1 ... pJunk +.. p2
-
-item :: Parser a a
-item (c:cs) = [(c,cs)]
-item [] = []
-
-satisfy :: (a -> Bool) -> Parser a a
-satisfy b = item |> b
-
-literals :: (Eq a,Show a) => [a] -> Parser a [a]
-literals l = case l of
- [] -> succeed []
- a:l -> literal a ... literals l *** (\ (x,y) -> x:y)
-
-lits :: (Eq a,Show a) => [a] -> Parser a [a]
-lits ts = literals ts
-
-jL :: String -> Parser Char String
-jL = pJ . lits
-
-pParenth :: Parser Char a -> Parser Char a
-pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')'
-
--- | p,...,p
-pCommaList :: Parser Char a -> Parser Char [a]
-pCommaList p = pTList "," (pJ p)
-
--- | the same or nothing
-pOptCommaList :: Parser Char a -> Parser Char [a]
-pOptCommaList p = pCommaList p ||| succeed []
-
--- | (p,...,p), poss. empty
-pArgList :: Parser Char a -> Parser Char [a]
-pArgList p = pParenth (pCommaList p) ||| succeed []
-
--- | min. 2 args
-pArgList2 :: Parser Char a -> Parser Char [a]
-pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:)
-
-longestOfSome :: Parser a b -> Parser a [b]
-longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y)
-
-pIdent :: Parser Char String
-pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:)
- where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\''
-
-pLetter, pDigit :: Parser Char Char
-pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++
- ['\192' .. '\255'])) -- no such in Char
-pDigit = satisfy isDigit
-
-pLetters :: Parser Char String
-pLetters = longestOfSome pLetter
-
-pAlphanum, pAlphaPlusChar :: Parser Char Char
-pAlphanum = pDigit ||| pLetter
-pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'")
-
-pQuotedString :: Parser Char String
-pQuotedString = literal '"' +.. pEndQuoted where
- pEndQuoted =
- literal '"' *** (const [])
- +|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:))
- +|| item .>. \ c -> pEndQuoted *** (c:)
-
-pIntc :: Parser Char Int
-pIntc = some (satisfy numb) *** read
- where numb x = elem x ['0'..'9']
-
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index c4f7159a2..0293d3915 100644
--- a/src/compiler/GF/Grammar/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -26,7 +26,7 @@ $i = [$l $d _ '] -- identifier character
$u = [.\n] -- universal: any character
@rsyms = -- symbols and non-identifier-like reserved words
- \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
+ \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \=
:-
"--" [.]* ; -- Toss single line comments
@@ -83,6 +83,7 @@ data Token
| T_ccurly
| T_underscore
| T_at
+ | T_cfarrow
| T_PType
| T_Str
| T_Strs
@@ -169,6 +170,8 @@ resWords = Map.fromList
, b "|" T_bar
, b "_" T_underscore
, b "@" T_at
+ , b "::=" T_cfarrow
+ , b ":=" T_cfarrow
, b "PType" T_PType
, b "Str" T_Str
, b "Strs" T_Strs
diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y
index 6f7f5854e..387b69dd3 100644
--- a/src/compiler/GF/Grammar/Parser.y
+++ b/src/compiler/GF/Grammar/Parser.y
@@ -7,6 +7,8 @@ module GF.Grammar.Parser
, pModHeader
, pExp
, pTopDef
+ , pCFRules
+ , pEBNFRules
) where
import GF.Infra.Ident
@@ -14,17 +16,23 @@ import GF.Infra.Option
import GF.Data.Operations
import GF.Grammar.Predef
import GF.Grammar.Grammar
+import GF.Grammar.CFG
+import GF.Grammar.EBNF
import GF.Grammar.Macros
import GF.Grammar.Lexer
import GF.Compile.Update (buildAnyTree)
---import Codec.Binary.UTF8.String(decodeString)
---import Data.Char(toLower)
+import Data.List(intersperse)
+import Data.Char(isAlphaNum)
+import PGF(mkCId)
+
}
%name pModDef ModDef
%name pTopDef TopDef
%partial pModHeader ModHeader
%name pExp Exp
+%name pCFRules ListCFRule
+%name pEBNFRules ListEBNFRule
-- no lexer declaration
%monad { P } { >>= } { return }
@@ -64,6 +72,7 @@ import GF.Compile.Update (buildAnyTree)
'\\\\' { T_lamlam }
'_' { T_underscore}
'|' { T_bar }
+ '::=' { T_cfarrow }
'PType' { T_PType }
'Str' { T_Str }
'Strs' { T_Strs }
@@ -602,6 +611,70 @@ ListDDecl
: {- empty -} { [] }
| DDecl ListDDecl { $1 ++ $2 }
+ListCFRule :: { [CFRule] }
+ListCFRule
+ : CFRule { $1 }
+ | CFRule ListCFRule { $1 ++ $2 }
+
+CFRule :: { [CFRule] }
+CFRule
+ : Ident '.' Ident '::=' ListCFSymbol ';' { [CFRule (showIdent $3) $5 (CFObj (mkCId (showIdent $1)) [])]
+ }
+ | Ident '::=' ListCFRHS ';' { let { cat = showIdent $1;
+ mkFun cat its =
+ case its of {
+ [] -> cat ++ "_";
+ _ -> concat $ intersperse "_" (cat : filter (not . null) (map clean its)) -- CLE style
+ };
+ clean sym =
+ case sym of {
+ Terminal c -> filter isAlphaNum c;
+ NonTerminal t -> t
+ }
+ } in map (\rhs -> CFRule cat rhs (CFObj (mkCId (mkFun cat rhs)) [])) $3
+ }
+
+ListCFRHS :: { [[CFSymbol]] }
+ListCFRHS
+ : ListCFSymbol { [$1] }
+ | ListCFSymbol '|' ListCFRHS { $1 : $3 }
+
+ListCFSymbol :: { [CFSymbol] }
+ListCFSymbol
+ : {- empty -} { [] }
+ | CFSymbol ListCFSymbol { $1 : $2 }
+
+CFSymbol :: { CFSymbol }
+ : String { Terminal $1 }
+ | Ident { NonTerminal (showIdent $1) }
+
+ListEBNFRule :: { [ERule] }
+ListEBNFRule
+ : EBNFRule { [$1] }
+ | EBNFRule ListEBNFRule { $1 : $2 }
+
+EBNFRule :: { ERule }
+ : Ident '::=' ERHS0 ';' { ((showIdent $1,[]),$3) }
+
+ERHS0 :: { ERHS }
+ : ERHS1 { $1 }
+ | ERHS1 '|' ERHS0 { EAlt $1 $3 }
+
+ERHS1 :: { ERHS }
+ : ERHS2 { $1 }
+ | ERHS2 ERHS1 { ESeq $1 $2 }
+
+ERHS2 :: { ERHS }
+ : ERHS3 '*' { EStar $1 }
+ | ERHS3 '+' { EPlus $1 }
+ | ERHS3 '?' { EOpt $1 }
+ | ERHS3 { $1 }
+
+ERHS3 :: { ERHS }
+ : String { ETerm $1 }
+ | Ident { ENonTerm (showIdent $1,[]) }
+ | '(' ERHS0 ')' { $2 }
+
Posn :: { Posn }
Posn
: {- empty -} {% getPosn }
diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs
index 4f5e3621e..330c763e5 100644
--- a/src/compiler/GF/Speech/CFGToFA.hs
+++ b/src/compiler/GF/Speech/CFGToFA.hs
@@ -17,7 +17,7 @@ import qualified Data.Set as Set
--import PGF.CId
import PGF.Data
import GF.Data.Utilities
-import GF.Speech.CFG
+import GF.Grammar.CFG
--import GF.Speech.PGFToCFG
--import GF.Infra.Ident (Ident)
diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs
index 3557ff21f..3eb4c20a7 100644
--- a/src/compiler/GF/Speech/GSL.hs
+++ b/src/compiler/GF/Speech/GSL.hs
@@ -9,7 +9,7 @@
module GF.Speech.GSL (gslPrinter) where
--import GF.Data.Utilities
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Infra.Option
diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs
index 921108e11..6a4935a7f 100644
--- a/src/compiler/GF/Speech/JSGF.hs
+++ b/src/compiler/GF/Speech/JSGF.hs
@@ -14,7 +14,7 @@ module GF.Speech.JSGF (jsgfPrinter) where
--import GF.Data.Utilities
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.RegExp
import GF.Speech.SISR
import GF.Speech.SRG
diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs
index 5c13ca471..d70a74fe7 100644
--- a/src/compiler/GF/Speech/PGFToCFG.hs
+++ b/src/compiler/GF/Speech/PGFToCFG.hs
@@ -10,7 +10,7 @@ import PGF(showCId)
import PGF.Data as PGF
import PGF.Macros
--import GF.Infra.Ident
-import GF.Speech.CFG hiding (Symbol)
+import GF.Grammar.CFG hiding (Symbol)
import Data.Array.IArray as Array
--import Data.List
diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs
index 0fc35d541..2829839f3 100644
--- a/src/compiler/GF/Speech/PrRegExp.hs
+++ b/src/compiler/GF/Speech/PrRegExp.hs
@@ -7,7 +7,7 @@
module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.CFGToFA
import GF.Speech.PGFToCFG
import GF.Speech.RegExp
diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs
index 8417fb203..5f9161547 100644
--- a/src/compiler/GF/Speech/SISR.hs
+++ b/src/compiler/GF/Speech/SISR.hs
@@ -13,7 +13,7 @@ import Data.List
--import GF.Data.Utilities
--import GF.Infra.Ident
import GF.Infra.Option (SISRFormat(..))
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.SRG (SRGNT)
import PGF(showCId)
diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs
index 7785f2382..d93d1b362 100644
--- a/src/compiler/GF/Speech/SLF.hs
+++ b/src/compiler/GF/Speech/SLF.hs
@@ -13,7 +13,7 @@ module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter,
slfSubPrinter,slfSubGraphvizPrinter) where
import GF.Data.Utilities
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.FiniteState
--import GF.Speech.CFG
import GF.Speech.CFGToFA
diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs
index 4e5508de0..d5bedc797 100644
--- a/src/compiler/GF/Speech/SRG.hs
+++ b/src/compiler/GF/Speech/SRG.hs
@@ -21,7 +21,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol
import GF.Data.Utilities
--import GF.Infra.Ident
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.PGFToCFG
--import GF.Data.Relation
--import GF.Speech.FiniteState
diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs
index 5d07762bb..a359b2c38 100644
--- a/src/compiler/GF/Speech/SRGS_ABNF.hs
+++ b/src/compiler/GF/Speech/SRGS_ABNF.hs
@@ -21,7 +21,7 @@ module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
--import GF.Data.Utilities
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.SISR as SISR
import GF.Speech.SRG
import GF.Speech.RegExp
diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs
index fe973c2e6..397bfb739 100644
--- a/src/compiler/GF/Speech/SRGS_XML.hs
+++ b/src/compiler/GF/Speech/SRGS_XML.hs
@@ -9,7 +9,7 @@ module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
--import GF.Data.Utilities
import GF.Data.XML
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.RegExp
import GF.Speech.SISR as SISR
import GF.Speech.SRG
diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs
index acb4e21ab..8d548e449 100644
--- a/src/compiler/GFC.hs
+++ b/src/compiler/GFC.hs
@@ -8,10 +8,11 @@ import PGF.Optimize
import PGF.Binary(putSplitAbs)
import GF.Compile
import GF.Compile.Export
+import GF.Compile.CFGtoPGF
+import GF.Compile.GetGrammar
+import GF.Grammar.CFG
-import GF.Grammar.CF ---- should this be on a deeper level? AR 15/10/2008
import GF.Infra.Ident(identS,showIdent)
-
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Data.ErrM
@@ -21,6 +22,7 @@ import Data.Maybe
import Data.Binary(encode,encodeFile)
import Data.Binary.Put(runPut)
import qualified Data.Map as Map
+import qualified Data.Set as Set
import qualified Data.ByteString as BSS
import qualified Data.ByteString.Lazy as BSL
import System.FilePath
@@ -61,14 +63,18 @@ compileSourceFiles opts fs =
writeOutputs opts pgf
compileCFFiles :: Options -> [FilePath] -> IOE ()
-compileCFFiles opts fs =
- do s <- liftIO $ fmap unlines $ mapM readFile fs
- let cnc = justModuleName (last fs)
- gr <- compileSourceGrammar opts =<< getCF cnc s
- unless (flag optStopAfterPhase opts == Compile) $
- do pgf <- link opts (identS cnc, (), gr)
- writePGF opts pgf
- writeOutputs opts pgf
+compileCFFiles opts fs = do
+ rules <- fmap concat $ mapM (getCFRules opts) fs
+ startCat <- case rules of
+ (CFRule cat _ _ : _) -> return cat
+ _ -> fail "empty CFG"
+ let gf = cf2gf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules))
+ gr <- compileSourceGrammar opts gf
+ let cnc = justModuleName (last fs)
+ unless (flag optStopAfterPhase opts == Compile) $
+ do pgf <- link opts (identS cnc, (), gr)
+ writePGF opts pgf
+ writeOutputs opts pgf
unionPGFFiles :: Options -> [FilePath] -> IOE ()
unionPGFFiles opts fs =