summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Grammar
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler/GF/Grammar')
-rw-r--r--src/compiler/GF/Grammar/CF.hs143
-rw-r--r--src/compiler/GF/Grammar/CFG.hs386
-rw-r--r--src/compiler/GF/Grammar/EBNF.hs232
-rw-r--r--src/compiler/GF/Grammar/Lexer.x5
-rw-r--r--src/compiler/GF/Grammar/Parser.y77
5 files changed, 480 insertions, 363 deletions
diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs
deleted file mode 100644
index a48238e42..000000000
--- a/src/compiler/GF/Grammar/CF.hs
+++ /dev/null
@@ -1,143 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : CF
--- Maintainer : AR
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/11/15 17:56:13 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.13 $
---
--- parsing CF grammars and converting them to GF
------------------------------------------------------------------------------
-
-module GF.Grammar.CF (getCF,CFItem,CFCat,CFFun,cf2gf,CFRule) where
-
-import GF.Grammar.Grammar
-import GF.Grammar.Macros
-import GF.Infra.Ident(Ident,identS)
-import GF.Infra.Option
-import GF.Infra.UseIO
-
-import GF.Data.Operations
-import GF.Data.Utilities (nub')
-
-import qualified Data.Set as S
-import Data.Char
-import Data.List
---import System.FilePath
-
-getCF :: ErrorMonad m => FilePath -> String -> m SourceGrammar
-getCF fpath = fmap (cf2gf fpath . uniqueFuns) . pCF
-
----------------------
--- the parser -------
----------------------
-
-pCF :: ErrorMonad m => String -> m CF
-pCF s = do
- rules <- mapM getCFRule $ filter isRule $ lines s
- return $ concat rules
- where
- isRule line = case dropWhile isSpace line of
- '-':'-':_ -> False
- _ -> not $ all isSpace line
-
--- rules have an amazingly easy parser, if we use the format
--- fun. C -> item1 item2 ... where unquoted items are treated as cats
--- Actually would be nice to add profiles to this.
-
-getCFRule :: ErrorMonad m => String -> m [CFRule]
-getCFRule s = getcf (wrds s) where
- getcf ws = case ws of
- fun : cat : a : its | isArrow a ->
- return [L NoLoc (init fun, (cat, map mkIt its))]
- cat : a : its | isArrow a ->
- return [L NoLoc (mkFun cat it, (cat, map mkIt it)) | it <- chunk its]
- _ -> raise (" invalid rule:" +++ s)
- isArrow a = elem a ["->", "::="]
- mkIt w = case w of
- ('"':w@(_:_)) -> Right (init w)
- _ -> Left w
- chunk its = case its of
- [] -> [[]]
- _ -> chunks "|" its
- mkFun cat its = case its of
- [] -> cat ++ "_"
- _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style
- clean = filter isAlphaNum -- to form valid identifiers
- wrds = takeWhile (/= ";") . words -- to permit semicolon in the end
-
-type CF = [CFRule]
-
-type CFRule = L (CFFun, (CFCat, [CFItem]))
-
-type CFItem = Either CFCat String
-
-type CFCat = String
-type CFFun = String
-
-
---------------------------------
--- make function names unique --
---------------------------------
-
-uniqueFuns :: CF -> CF
-uniqueFuns = snd . mapAccumL uniqueFun S.empty
- where
- uniqueFun funs (L l (fun,rule)) = (S.insert fun' funs,L l (fun',rule))
- where
- fun' = head [fun'|suffix<-"":map show ([2..]::[Int]),
- let fun'=fun++suffix,
- not (fun' `S.member` funs)]
-
-
---------------------------
--- the compiler ----------
---------------------------
-
-cf2gf :: FilePath -> CF -> SourceGrammar
-cf2gf fpath cf = mGrammar [
- (aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs),
- (cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc)
- ]
- where
- name = justModuleName fpath
- (abs,cnc,cat) = cf2grammar cf
- aname = identS $ name ++ "Abs"
- cname = identS name
-
-
-cf2grammar :: CF -> (BinTree Ident Info, BinTree Ident Info, String)
-cf2grammar rules = (buildTree abs, buildTree conc, cat) where
- abs = cats ++ funs
- conc = lincats ++ lins
- cat = case rules of
- (L _ (_,(c,_))):_ -> c -- the value category of the first rule
- _ -> error "empty CF"
- cats = [(cat, AbsCat (Just (L NoLoc []))) |
- cat <- nub' (concat (map cf2cat rules))] ----notPredef cat
- lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats]
- (funs,lins) = unzip (map cf2rule rules)
-
-cf2cat :: CFRule -> [Ident]
-cf2cat (L loc (_,(cat, items))) = map identS $ cat : [c | Left c <- items]
-
-cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
-cf2rule (L loc (fun, (cat, items))) = (def,ldef) where
- f = identS fun
- def = (f, AbsFun (Just (L loc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True))
- args0 = zip (map (identS . ("x" ++) . show) [0..]) items
- args = [((Explicit,v), Cn (identS c)) | (v, Left c) <- args0]
- args' = [(Explicit,identS "_", Cn (identS c)) | (_, Left c) <- args0]
- ldef = (f, CncFun
- Nothing
- (Just (L loc (mkAbs (map fst args)
- (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))))
- Nothing
- Nothing)
- mkIt (v, Left _) = P (Vr v) theLinLabel
- mkIt (_, Right a) = K a
- foldconcat [] = K ""
- foldconcat tt = foldr1 C tt
diff --git a/src/compiler/GF/Grammar/CFG.hs b/src/compiler/GF/Grammar/CFG.hs
new file mode 100644
index 000000000..93bce2aad
--- /dev/null
+++ b/src/compiler/GF/Grammar/CFG.hs
@@ -0,0 +1,386 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.CFG
+--
+-- Context-free grammar representation and manipulation.
+----------------------------------------------------------------------
+module GF.Grammar.CFG where
+
+import GF.Data.Utilities
+import PGF
+--import GF.Infra.Option
+import GF.Data.Relation
+
+--import Control.Monad
+--import Control.Monad.State (State, get, put, evalState)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.List
+--import Data.Maybe (fromMaybe)
+--import Data.Monoid (mconcat)
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+--
+-- * Types
+--
+
+type Cat = String
+
+data Symbol c t = NonTerminal c | Terminal t
+ deriving (Eq, Ord, Show)
+
+type CFSymbol = Symbol Cat Token
+
+data CFRule = CFRule {
+ lhsCat :: Cat,
+ ruleRhs :: [CFSymbol],
+ ruleName :: CFTerm
+ }
+ deriving (Eq, Ord, Show)
+
+data CFTerm
+ = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
+ | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
+ | CFApp CFTerm CFTerm -- ^ Application
+ | CFRes Int -- ^ The result of the n:th (0-based) non-terminal
+ | CFVar Int -- ^ A lambda-bound variable
+ | CFMeta CId -- ^ A metavariable
+ deriving (Eq, Ord, Show)
+
+data CFG = CFG { cfgStartCat :: Cat,
+ cfgExternalCats :: Set Cat,
+ cfgRules :: Map Cat (Set CFRule) }
+ deriving (Eq, Ord, Show)
+
+
+--
+-- * Grammar filtering
+--
+
+-- | Removes all directly and indirectly cyclic productions.
+-- FIXME: this may be too aggressive, only one production
+-- needs to be removed to break a given cycle. But which
+-- one should we pick?
+-- FIXME: Does not (yet) remove productions which are cyclic
+-- because of empty productions.
+removeCycles :: CFG -> CFG
+removeCycles = onRules f
+ where f rs = filter (not . isCycle) rs
+ where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
+ isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
+ isCycle _ = False
+
+-- | Better bottom-up filter that also removes categories which contain no finite
+-- strings.
+bottomUpFilter :: CFG -> CFG
+bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
+ where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
+ okSym g = symbol (`elem` allCats g) (const True)
+
+-- | Removes categories which are not reachable from any external category.
+topDownFilter :: CFG -> CFG
+topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg
+ where
+ rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
+ uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
+ keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg
+
+-- | Merges categories with identical right-hand-sides.
+-- FIXME: handle probabilities
+mergeIdentical :: CFG -> CFG
+mergeIdentical g = onRules (map subst) g
+ where
+ -- maps categories to their replacement
+ m = Map.fromList [(y,concat (intersperse "+" xs))
+ | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
+ -- build data to compare for each category: a set of name,rhs pairs
+ rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
+ subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
+ substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
+
+-- | Keeps only the start category as an external category.
+purgeExternalCats :: CFG -> CFG
+purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) }
+
+--
+-- * Removing left recursion
+--
+
+-- The LC_LR algorithm from
+-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
+removeLeftRecursion :: CFG -> CFG
+removeLeftRecursion gr
+ = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
+ where
+ scheme1 = [CFRule a [x,NonTerminal a_x] n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ not (isLeftRecursive x),
+ let a_x = mkCat (NonTerminal a) x,
+ -- this is an extension of LC_LR to avoid generating
+ -- A-X categories for which there are no productions:
+ a_x `Set.member` newCats,
+ let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
+ (\_ -> CFRes 0) x]
+ scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
+ a <- retainedLeftRecursive,
+ b@(NonTerminal b') <- properLeftCornersOf a,
+ isLeftRecursive b,
+ CFRule _ (x:beta) n <- catRules gr b',
+ let a_x = mkCat (NonTerminal a) x,
+ let a_b = mkCat (NonTerminal a) b,
+ let i = length $ filterCats beta,
+ let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
+ (\_ -> CFApp (CFRes i) n) x]
+ scheme3 = [CFRule a_x beta n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ CFRule _ (x':beta) n <- catRules gr a,
+ x == x',
+ let a_x = mkCat (NonTerminal a) x,
+ let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
+ (\_ -> n) x]
+ scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
+
+ newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
+
+ shiftTerm :: CFTerm -> CFTerm
+ shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
+ shiftTerm (CFRes 0) = CFVar 1
+ shiftTerm (CFRes n) = CFRes (n-1)
+ shiftTerm t = t
+ -- note: the rest don't occur in the original grammar
+
+ cats = allCats gr
+ rules = allRules gr
+
+ directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
+ leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
+ properLeftCorner = transitiveClosure directLeftCorner
+ properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
+ isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
+
+ leftRecursive = reflexiveElements properLeftCorner
+ isLeftRecursive = (`Set.member` leftRecursive)
+
+ retained = cfgStartCat gr `Set.insert`
+ Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
+ NonTerminal a <- ruleRhs r]
+ isRetained = (`Set.member` retained)
+
+ retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
+
+ mkCat :: CFSymbol -> CFSymbol -> Cat
+ mkCat x y = showSymbol x ++ "-" ++ showSymbol y
+ where showSymbol = symbol id show
+
+-- | Get the sets of mutually recursive non-terminals for a grammar.
+mutRecCats :: Bool -- ^ If true, all categories will be in some set.
+ -- If false, only recursive categories will be included.
+ -> CFG -> [Set Cat]
+mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
+ where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
+ refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
+
+--
+-- * Approximate context-free grammars with regular grammars.
+--
+
+makeSimpleRegular :: CFG -> CFG
+makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
+
+-- Use the transformation algorithm from \"Regular Approximation of Context-free
+-- Grammars through Approximation\", Mohri and Nederhof, 2000
+-- to create an over-generating regular grammar for a context-free
+-- grammar
+makeRegular :: CFG -> CFG
+makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
+ where trSet cs | allXLinear cs rs = rs
+ | otherwise = concatMap handleCat (Set.toList cs)
+ where rs = catSetRules g cs
+ handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
+ ++ concatMap (makeRightLinearRules c) (catRules g c)
+ where c' = newCat c
+ makeRightLinearRules b' (CFRule c ss n) =
+ case ys of
+ [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
+ (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
+ ++ makeRightLinearRules (newCat b) (CFRule c zs n)
+ where (xs,ys) = break (`catElem` cs) ss
+ -- don't add rules on the form A -> A
+ newRule c rhs n | rhs == [NonTerminal c] = []
+ | otherwise = [CFRule c rhs n]
+ newCat c = c ++ "$"
+
+--
+-- * CFG Utilities
+--
+
+mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
+mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
+
+groupProds :: [CFRule] -> Map Cat (Set CFRule)
+groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
+
+uniqueFuns :: CFG -> CFG
+uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg
+ ,cfgExternalCats = cfgExternalCats cfg
+ ,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg))))
+ }
+ where
+ uniqueFunSet funs (cat,rules) =
+ let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules)
+ in (funs',(cat,Set.fromList rules'))
+ uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args))
+ where
+ fun' = head [fun'|suffix<-"":map show ([2..]::[Int]),
+ let fun'=mkCId (showCId fun++suffix),
+ not (fun' `Set.member` funs)]
+
+-- | Gets all rules in a CFG.
+allRules :: CFG -> [CFRule]
+allRules = concat . map Set.toList . Map.elems . cfgRules
+
+-- | Gets all rules in a CFG, grouped by their LHS categories.
+allRulesGrouped :: CFG -> [(Cat,[CFRule])]
+allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
+
+-- | Gets all categories which have rules.
+allCats :: CFG -> [Cat]
+allCats = Map.keys . cfgRules
+
+-- | Gets all categories which have rules or occur in a RHS.
+allCats' :: CFG -> [Cat]
+allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union`
+ Set.fromList [c | rs <- Map.elems (cfgRules cfg),
+ r <- Set.toList rs,
+ NonTerminal c <- ruleRhs r])
+
+-- | Gets all rules for the given category.
+catRules :: CFG -> Cat -> [CFRule]
+catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
+
+-- | Gets all rules for categories in the given set.
+catSetRules :: CFG -> Set Cat -> [CFRule]
+catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
+
+mapCFGCats :: (Cat -> Cat) -> CFG -> CFG
+mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg))
+ (Set.map f (cfgExternalCats cfg))
+ [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg]
+
+onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
+onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
+
+onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
+onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
+
+-- | Clean up CFG after rules have been removed.
+cleanCFG :: CFG -> CFG
+cleanCFG = onCFG (Map.filter (not . Set.null))
+
+-- | Combine two CFGs.
+unionCFG :: CFG -> CFG -> CFG
+unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
+
+filterCFG :: (CFRule -> Bool) -> CFG -> CFG
+filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
+
+filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
+filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
+
+countCats :: CFG -> Int
+countCats = Map.size . cfgRules . cleanCFG
+
+countRules :: CFG -> Int
+countRules = length . allRules
+
+prCFG :: CFG -> String
+prCFG = prProductions . map prRule . allRules
+ where
+ prRule r = (lhsCat r, unwords (map prSym (ruleRhs r)))
+ prSym = symbol id (\t -> "\""++ t ++"\"")
+
+prProductions :: [(Cat,String)] -> String
+prProductions prods =
+ unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods]
+ where
+ maxLHSWidth = maximum $ 0:(map (length . fst) prods)
+ rpad n s = s ++ replicate (n - length s) ' '
+
+prCFTerm :: CFTerm -> String
+prCFTerm = pr 0
+ where
+ pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")")
+ pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t)
+ pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")")
+ pr _ (CFRes i) = "$" ++ show i
+ pr _ (CFVar i) = "x" ++ show i
+ pr _ (CFMeta c) = "?" ++ showCId c
+ paren 0 x = x
+ paren 1 x = "(" ++ x ++ ")"
+
+--
+-- * CFRule Utilities
+--
+
+ruleFun :: CFRule -> CId
+ruleFun (CFRule _ _ t) = f t
+ where f (CFObj n _) = n
+ f (CFApp _ x) = f x
+ f (CFAbs _ x) = f x
+ f _ = mkCId ""
+
+-- | Check if any of the categories used on the right-hand side
+-- are in the given list of categories.
+anyUsedBy :: [Cat] -> CFRule -> Bool
+anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
+
+mkCFTerm :: String -> CFTerm
+mkCFTerm n = CFObj (mkCId n) []
+
+ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
+ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Set Cat -> [CFRule] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+
+-- | Checks if a context-free rule is right-linear.
+isRightLinear :: Set Cat -- ^ The categories to consider
+ -> CFRule -- ^ The rule to check for right-linearity
+ -> Bool
+isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
+
+-- | Checks if a context-free rule is left-linear.
+isLeftLinear :: Set Cat -- ^ The categories to consider
+ -> CFRule -- ^ The rule to check for left-linearity
+ -> Bool
+isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
+
+
+--
+-- * Symbol utilities
+--
+
+symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
+symbol fc ft (NonTerminal cat) = fc cat
+symbol fc ft (Terminal tok) = ft tok
+
+mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
+mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
+
+filterCats :: [Symbol c t] -> [c]
+filterCats syms = [ cat | NonTerminal cat <- syms ]
+
+filterToks :: [Symbol c t] -> [t]
+filterToks syms = [ tok | Terminal tok <- syms ]
+
+-- | Checks if a symbol is a non-terminal of one of the given categories.
+catElem :: Ord c => Symbol c t -> Set c -> Bool
+catElem s cs = symbol (`Set.member` cs) (const False) s
+
+noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
+noCatsInSet cs = not . any (`catElem` cs)
diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs
index b1854da54..50a5ff90a 100644
--- a/src/compiler/GF/Grammar/EBNF.hs
+++ b/src/compiler/GF/Grammar/EBNF.hs
@@ -12,34 +12,19 @@
-- (Description of the module)
-----------------------------------------------------------------------------
-module GF.Grammar.EBNF (getEBNF) where
+module GF.Grammar.EBNF (EBNF, ERule, ERHS(..), ebnf2cf) where
import GF.Data.Operations
---import GF.Infra.Comments
-import GF.Grammar.CF
---import GF.CF.CFIdent
-import GF.Grammar.Grammar
---import GF.Grammar.PrGrammar
---import qualified GF.Source.AbsGF as A
+import GF.Grammar.CFG
+import PGF (mkCId)
-import Data.Char
import Data.List
---import System.FilePath
-
-
-
--- AR 18/4/2000 - 31/3/2004
-
-getEBNF :: FilePath -> String -> Err SourceGrammar
-getEBNF fpath = fmap (cf2gf fpath . ebnf2cf) . pEBNF
type EBNF = [ERule]
type ERule = (ECat, ERHS)
type ECat = (String,[Int])
type ETok = String
-ebnfID = "EBNF" ---- make this parametric!
-
data ERHS =
ETerm ETok
| ENonTerm ECat
@@ -50,13 +35,14 @@ data ERHS =
| EOpt ERHS
| EEmpty
-type CFRHS = [CFItem]
-type CFJustRule = (CFCat, CFRHS)
+type CFRHS = [CFSymbol]
+type CFJustRule = (Cat, CFRHS)
ebnf2cf :: EBNF -> [CFRule]
ebnf2cf ebnf =
- [L NoLoc (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
- mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i)
+ [CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)]
+ where
+ mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) []
normEBNF :: EBNF -> [CFJustRule]
normEBNF erules = let
@@ -115,13 +101,13 @@ substERules g (cat,itss) = (cat, map sub itss) where
sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
-eitem2cfitem :: EItem -> CFItem
+eitem2cfitem :: EItem -> CFSymbol
eitem2cfitem it = case it of
- EITerm a -> Right a
- EINonTerm cat -> Left (mkCFCatE cat)
- EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star"))
- EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus"))
- EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt"))
+ EITerm a -> Terminal a
+ EINonTerm cat -> NonTerminal (mkCFCatE cat)
+ EIStar (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Star"))
+ EIPlus (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Plus"))
+ EIOpt (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Opt"))
type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
@@ -157,198 +143,10 @@ mkECat ints = ("C", ints)
prECat (c,[]) = c
prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
-mkCFCatE :: ECat -> CFCat
+mkCFCatE :: ECat -> Cat
mkCFCatE = prECat
updECat _ (c,[]) = (c,[])
updECat ii (c,_) = (c,ii)
mkNewECat (c,ii) str = (c ++ str,ii)
-
------- parser for EBNF grammars
-
-pEBNF :: String -> Err EBNF
-pEBNF = parseResultErr (longestOfMany (pJ pERule))
-
-pERule :: Parser Char ERule
-pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
-
-pERHS :: Int -> Parser Char ERHS
-pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
-pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
-pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
-pERHS 3 = pQuotedString *** ETerm
- ||| pECat *** ENonTerm ||| pParenth (pERHS 0)
-
-pUnaryEOp :: Parser Char (ERHS -> ERHS)
-pUnaryEOp =
- lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
-
-pECat = pIdent *** (\c -> (c,[]))
-
-
-
-----------------------------------------------------------------------
--- Module : Parsers
--- some parser combinators a la Wadler and Hutton.
--- (only used in module "EBNF")
------------------------------------------------------------------------------
-
-infixr 2 |||, +||
-infixr 3 ***
-infixr 5 .>.
-infixr 5 ...
-infixr 5 ....
-infixr 5 +..
-infixr 5 ..+
-infixr 6 |>
-infixr 3 <<<
-
-
-type Parser a b = [a] -> [(b,[a])]
-
-parseResults :: Parser a b -> [a] -> [b]
-parseResults p s = [x | (x,r) <- p s, null r]
-
-parseResultErr :: Show a => Parser a b -> [a] -> Err b
-parseResultErr p s = case parseResults p s of
- [x] -> return x
- [] -> case
- maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of
- r -> Bad $ "\nno parse; reached" ++++ take 300 (show r)
- _ -> Bad "ambiguous"
-
-(...) :: Parser a b -> Parser a c -> Parser a (b,c)
-(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t]
-
-(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c
-(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t]
-
-(|||) :: Parser a b -> Parser a b -> Parser a b
-(p ||| q) s = p s ++ q s
-
-(+||) :: Parser a b -> Parser a b -> Parser a b
-p1 +|| p2 = take 1 . (p1 ||| p2)
-
-literal :: (Eq a) => a -> Parser a a
-literal x (c:cs) = [(x,cs) | x == c]
-literal _ _ = []
-
-(***) :: Parser a b -> (b -> c) -> Parser a c
-(p *** f) s = [(f x,r) | (x,r) <- p s]
-
-succeed :: b -> Parser a b
-succeed v s = [(v,s)]
-
-fails :: Parser a b
-fails s = []
-
-(+..) :: Parser a b -> Parser a c -> Parser a c
-p1 +.. p2 = p1 ... p2 *** snd
-
-(..+) :: Parser a b -> Parser a c -> Parser a b
-p1 ..+ p2 = p1 ... p2 *** fst
-
-(<<<) :: Parser a b -> c -> Parser a c -- return
-p <<< v = p *** (\x -> v)
-
-(|>) :: Parser a b -> (b -> Bool) -> Parser a b
-p |> b = p .>. (\x -> if b x then succeed x else fails)
-
-many :: Parser a b -> Parser a [b]
-many p = (p ... many p *** uncurry (:)) +|| succeed []
-
-some :: Parser a b -> Parser a [b]
-some p = (p ... many p) *** uncurry (:)
-
-longestOfMany :: Parser a b -> Parser a [b]
-longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed []
-
-closure :: (b -> Parser a b) -> (b -> Parser a b)
-closure p v = p v .>. closure p ||| succeed v
-
-pJunk :: Parser Char String
-pJunk = longestOfMany (satisfy (\x -> elem x "\n\t "))
-
-pJ :: Parser Char a -> Parser Char a
-pJ p = pJunk +.. p ..+ pJunk
-
-pTList :: String -> Parser Char a -> Parser Char [a]
-pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999
-
-pTJList :: String -> String -> Parser Char a -> Parser Char [a]
-pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:))
-
-pElem :: [String] -> Parser Char String
-pElem l = foldr (+||) fails (map literals l)
-
-(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c)
-p1 .... p2 = p1 ... pJunk +.. p2
-
-item :: Parser a a
-item (c:cs) = [(c,cs)]
-item [] = []
-
-satisfy :: (a -> Bool) -> Parser a a
-satisfy b = item |> b
-
-literals :: (Eq a,Show a) => [a] -> Parser a [a]
-literals l = case l of
- [] -> succeed []
- a:l -> literal a ... literals l *** (\ (x,y) -> x:y)
-
-lits :: (Eq a,Show a) => [a] -> Parser a [a]
-lits ts = literals ts
-
-jL :: String -> Parser Char String
-jL = pJ . lits
-
-pParenth :: Parser Char a -> Parser Char a
-pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')'
-
--- | p,...,p
-pCommaList :: Parser Char a -> Parser Char [a]
-pCommaList p = pTList "," (pJ p)
-
--- | the same or nothing
-pOptCommaList :: Parser Char a -> Parser Char [a]
-pOptCommaList p = pCommaList p ||| succeed []
-
--- | (p,...,p), poss. empty
-pArgList :: Parser Char a -> Parser Char [a]
-pArgList p = pParenth (pCommaList p) ||| succeed []
-
--- | min. 2 args
-pArgList2 :: Parser Char a -> Parser Char [a]
-pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:)
-
-longestOfSome :: Parser a b -> Parser a [b]
-longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y)
-
-pIdent :: Parser Char String
-pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:)
- where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\''
-
-pLetter, pDigit :: Parser Char Char
-pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++
- ['\192' .. '\255'])) -- no such in Char
-pDigit = satisfy isDigit
-
-pLetters :: Parser Char String
-pLetters = longestOfSome pLetter
-
-pAlphanum, pAlphaPlusChar :: Parser Char Char
-pAlphanum = pDigit ||| pLetter
-pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'")
-
-pQuotedString :: Parser Char String
-pQuotedString = literal '"' +.. pEndQuoted where
- pEndQuoted =
- literal '"' *** (const [])
- +|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:))
- +|| item .>. \ c -> pEndQuoted *** (c:)
-
-pIntc :: Parser Char Int
-pIntc = some (satisfy numb) *** read
- where numb x = elem x ['0'..'9']
-
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index c4f7159a2..0293d3915 100644
--- a/src/compiler/GF/Grammar/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -26,7 +26,7 @@ $i = [$l $d _ '] -- identifier character
$u = [.\n] -- universal: any character
@rsyms = -- symbols and non-identifier-like reserved words
- \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
+ \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \=
:-
"--" [.]* ; -- Toss single line comments
@@ -83,6 +83,7 @@ data Token
| T_ccurly
| T_underscore
| T_at
+ | T_cfarrow
| T_PType
| T_Str
| T_Strs
@@ -169,6 +170,8 @@ resWords = Map.fromList
, b "|" T_bar
, b "_" T_underscore
, b "@" T_at
+ , b "::=" T_cfarrow
+ , b ":=" T_cfarrow
, b "PType" T_PType
, b "Str" T_Str
, b "Strs" T_Strs
diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y
index 6f7f5854e..387b69dd3 100644
--- a/src/compiler/GF/Grammar/Parser.y
+++ b/src/compiler/GF/Grammar/Parser.y
@@ -7,6 +7,8 @@ module GF.Grammar.Parser
, pModHeader
, pExp
, pTopDef
+ , pCFRules
+ , pEBNFRules
) where
import GF.Infra.Ident
@@ -14,17 +16,23 @@ import GF.Infra.Option
import GF.Data.Operations
import GF.Grammar.Predef
import GF.Grammar.Grammar
+import GF.Grammar.CFG
+import GF.Grammar.EBNF
import GF.Grammar.Macros
import GF.Grammar.Lexer
import GF.Compile.Update (buildAnyTree)
---import Codec.Binary.UTF8.String(decodeString)
---import Data.Char(toLower)
+import Data.List(intersperse)
+import Data.Char(isAlphaNum)
+import PGF(mkCId)
+
}
%name pModDef ModDef
%name pTopDef TopDef
%partial pModHeader ModHeader
%name pExp Exp
+%name pCFRules ListCFRule
+%name pEBNFRules ListEBNFRule
-- no lexer declaration
%monad { P } { >>= } { return }
@@ -64,6 +72,7 @@ import GF.Compile.Update (buildAnyTree)
'\\\\' { T_lamlam }
'_' { T_underscore}
'|' { T_bar }
+ '::=' { T_cfarrow }
'PType' { T_PType }
'Str' { T_Str }
'Strs' { T_Strs }
@@ -602,6 +611,70 @@ ListDDecl
: {- empty -} { [] }
| DDecl ListDDecl { $1 ++ $2 }
+ListCFRule :: { [CFRule] }
+ListCFRule
+ : CFRule { $1 }
+ | CFRule ListCFRule { $1 ++ $2 }
+
+CFRule :: { [CFRule] }
+CFRule
+ : Ident '.' Ident '::=' ListCFSymbol ';' { [CFRule (showIdent $3) $5 (CFObj (mkCId (showIdent $1)) [])]
+ }
+ | Ident '::=' ListCFRHS ';' { let { cat = showIdent $1;
+ mkFun cat its =
+ case its of {
+ [] -> cat ++ "_";
+ _ -> concat $ intersperse "_" (cat : filter (not . null) (map clean its)) -- CLE style
+ };
+ clean sym =
+ case sym of {
+ Terminal c -> filter isAlphaNum c;
+ NonTerminal t -> t
+ }
+ } in map (\rhs -> CFRule cat rhs (CFObj (mkCId (mkFun cat rhs)) [])) $3
+ }
+
+ListCFRHS :: { [[CFSymbol]] }
+ListCFRHS
+ : ListCFSymbol { [$1] }
+ | ListCFSymbol '|' ListCFRHS { $1 : $3 }
+
+ListCFSymbol :: { [CFSymbol] }
+ListCFSymbol
+ : {- empty -} { [] }
+ | CFSymbol ListCFSymbol { $1 : $2 }
+
+CFSymbol :: { CFSymbol }
+ : String { Terminal $1 }
+ | Ident { NonTerminal (showIdent $1) }
+
+ListEBNFRule :: { [ERule] }
+ListEBNFRule
+ : EBNFRule { [$1] }
+ | EBNFRule ListEBNFRule { $1 : $2 }
+
+EBNFRule :: { ERule }
+ : Ident '::=' ERHS0 ';' { ((showIdent $1,[]),$3) }
+
+ERHS0 :: { ERHS }
+ : ERHS1 { $1 }
+ | ERHS1 '|' ERHS0 { EAlt $1 $3 }
+
+ERHS1 :: { ERHS }
+ : ERHS2 { $1 }
+ | ERHS2 ERHS1 { ESeq $1 $2 }
+
+ERHS2 :: { ERHS }
+ : ERHS3 '*' { EStar $1 }
+ | ERHS3 '+' { EPlus $1 }
+ | ERHS3 '?' { EOpt $1 }
+ | ERHS3 { $1 }
+
+ERHS3 :: { ERHS }
+ : String { ETerm $1 }
+ | Ident { ENonTerm (showIdent $1,[]) }
+ | '(' ERHS0 ')' { $2 }
+
Posn :: { Posn }
Posn
: {- empty -} {% getPosn }