diff options
Diffstat (limited to 'src/compiler/GF/Grammar')
| -rw-r--r-- | src/compiler/GF/Grammar/CF.hs | 143 | ||||
| -rw-r--r-- | src/compiler/GF/Grammar/CFG.hs | 386 | ||||
| -rw-r--r-- | src/compiler/GF/Grammar/EBNF.hs | 232 | ||||
| -rw-r--r-- | src/compiler/GF/Grammar/Lexer.x | 5 | ||||
| -rw-r--r-- | src/compiler/GF/Grammar/Parser.y | 77 |
5 files changed, 480 insertions, 363 deletions
diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs deleted file mode 100644 index a48238e42..000000000 --- a/src/compiler/GF/Grammar/CF.hs +++ /dev/null @@ -1,143 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : CF --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/15 17:56:13 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.13 $ --- --- parsing CF grammars and converting them to GF ------------------------------------------------------------------------------ - -module GF.Grammar.CF (getCF,CFItem,CFCat,CFFun,cf2gf,CFRule) where - -import GF.Grammar.Grammar -import GF.Grammar.Macros -import GF.Infra.Ident(Ident,identS) -import GF.Infra.Option -import GF.Infra.UseIO - -import GF.Data.Operations -import GF.Data.Utilities (nub') - -import qualified Data.Set as S -import Data.Char -import Data.List ---import System.FilePath - -getCF :: ErrorMonad m => FilePath -> String -> m SourceGrammar -getCF fpath = fmap (cf2gf fpath . uniqueFuns) . pCF - ---------------------- --- the parser ------- ---------------------- - -pCF :: ErrorMonad m => String -> m CF -pCF s = do - rules <- mapM getCFRule $ filter isRule $ lines s - return $ concat rules - where - isRule line = case dropWhile isSpace line of - '-':'-':_ -> False - _ -> not $ all isSpace line - --- rules have an amazingly easy parser, if we use the format --- fun. C -> item1 item2 ... where unquoted items are treated as cats --- Actually would be nice to add profiles to this. - -getCFRule :: ErrorMonad m => String -> m [CFRule] -getCFRule s = getcf (wrds s) where - getcf ws = case ws of - fun : cat : a : its | isArrow a -> - return [L NoLoc (init fun, (cat, map mkIt its))] - cat : a : its | isArrow a -> - return [L NoLoc (mkFun cat it, (cat, map mkIt it)) | it <- chunk its] - _ -> raise (" invalid rule:" +++ s) - isArrow a = elem a ["->", "::="] - mkIt w = case w of - ('"':w@(_:_)) -> Right (init w) - _ -> Left w - chunk its = case its of - [] -> [[]] - _ -> chunks "|" its - mkFun cat its = case its of - [] -> cat ++ "_" - _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style - clean = filter isAlphaNum -- to form valid identifiers - wrds = takeWhile (/= ";") . words -- to permit semicolon in the end - -type CF = [CFRule] - -type CFRule = L (CFFun, (CFCat, [CFItem])) - -type CFItem = Either CFCat String - -type CFCat = String -type CFFun = String - - --------------------------------- --- make function names unique -- --------------------------------- - -uniqueFuns :: CF -> CF -uniqueFuns = snd . mapAccumL uniqueFun S.empty - where - uniqueFun funs (L l (fun,rule)) = (S.insert fun' funs,L l (fun',rule)) - where - fun' = head [fun'|suffix<-"":map show ([2..]::[Int]), - let fun'=fun++suffix, - not (fun' `S.member` funs)] - - --------------------------- --- the compiler ---------- --------------------------- - -cf2gf :: FilePath -> CF -> SourceGrammar -cf2gf fpath cf = mGrammar [ - (aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs), - (cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc) - ] - where - name = justModuleName fpath - (abs,cnc,cat) = cf2grammar cf - aname = identS $ name ++ "Abs" - cname = identS name - - -cf2grammar :: CF -> (BinTree Ident Info, BinTree Ident Info, String) -cf2grammar rules = (buildTree abs, buildTree conc, cat) where - abs = cats ++ funs - conc = lincats ++ lins - cat = case rules of - (L _ (_,(c,_))):_ -> c -- the value category of the first rule - _ -> error "empty CF" - cats = [(cat, AbsCat (Just (L NoLoc []))) | - cat <- nub' (concat (map cf2cat rules))] ----notPredef cat - lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats] - (funs,lins) = unzip (map cf2rule rules) - -cf2cat :: CFRule -> [Ident] -cf2cat (L loc (_,(cat, items))) = map identS $ cat : [c | Left c <- items] - -cf2rule :: CFRule -> ((Ident,Info),(Ident,Info)) -cf2rule (L loc (fun, (cat, items))) = (def,ldef) where - f = identS fun - def = (f, AbsFun (Just (L loc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True)) - args0 = zip (map (identS . ("x" ++) . show) [0..]) items - args = [((Explicit,v), Cn (identS c)) | (v, Left c) <- args0] - args' = [(Explicit,identS "_", Cn (identS c)) | (_, Left c) <- args0] - ldef = (f, CncFun - Nothing - (Just (L loc (mkAbs (map fst args) - (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)])))) - Nothing - Nothing) - mkIt (v, Left _) = P (Vr v) theLinLabel - mkIt (_, Right a) = K a - foldconcat [] = K "" - foldconcat tt = foldr1 C tt diff --git a/src/compiler/GF/Grammar/CFG.hs b/src/compiler/GF/Grammar/CFG.hs new file mode 100644 index 000000000..93bce2aad --- /dev/null +++ b/src/compiler/GF/Grammar/CFG.hs @@ -0,0 +1,386 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.CFG +-- +-- Context-free grammar representation and manipulation. +---------------------------------------------------------------------- +module GF.Grammar.CFG where + +import GF.Data.Utilities +import PGF +--import GF.Infra.Option +import GF.Data.Relation + +--import Control.Monad +--import Control.Monad.State (State, get, put, evalState) +import Data.Map (Map) +import qualified Data.Map as Map +import Data.List +--import Data.Maybe (fromMaybe) +--import Data.Monoid (mconcat) +import Data.Set (Set) +import qualified Data.Set as Set + +-- +-- * Types +-- + +type Cat = String + +data Symbol c t = NonTerminal c | Terminal t + deriving (Eq, Ord, Show) + +type CFSymbol = Symbol Cat Token + +data CFRule = CFRule { + lhsCat :: Cat, + ruleRhs :: [CFSymbol], + ruleName :: CFTerm + } + deriving (Eq, Ord, Show) + +data CFTerm + = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments + | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id. + | CFApp CFTerm CFTerm -- ^ Application + | CFRes Int -- ^ The result of the n:th (0-based) non-terminal + | CFVar Int -- ^ A lambda-bound variable + | CFMeta CId -- ^ A metavariable + deriving (Eq, Ord, Show) + +data CFG = CFG { cfgStartCat :: Cat, + cfgExternalCats :: Set Cat, + cfgRules :: Map Cat (Set CFRule) } + deriving (Eq, Ord, Show) + + +-- +-- * Grammar filtering +-- + +-- | Removes all directly and indirectly cyclic productions. +-- FIXME: this may be too aggressive, only one production +-- needs to be removed to break a given cycle. But which +-- one should we pick? +-- FIXME: Does not (yet) remove productions which are cyclic +-- because of empty productions. +removeCycles :: CFG -> CFG +removeCycles = onRules f + where f rs = filter (not . isCycle) rs + where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs] + isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c + isCycle _ = False + +-- | Better bottom-up filter that also removes categories which contain no finite +-- strings. +bottomUpFilter :: CFG -> CFG +bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty }) + where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr + okSym g = symbol (`elem` allCats g) (const True) + +-- | Removes categories which are not reachable from any external category. +topDownFilter :: CFG -> CFG +topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg + where + rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ] + uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats + keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg + +-- | Merges categories with identical right-hand-sides. +-- FIXME: handle probabilities +mergeIdentical :: CFG -> CFG +mergeIdentical g = onRules (map subst) g + where + -- maps categories to their replacement + m = Map.fromList [(y,concat (intersperse "+" xs)) + | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs] + -- build data to compare for each category: a set of name,rhs pairs + rulesKey = Set.map (\ (CFRule _ r n) -> (n,r)) + subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n + substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m + +-- | Keeps only the start category as an external category. +purgeExternalCats :: CFG -> CFG +purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) } + +-- +-- * Removing left recursion +-- + +-- The LC_LR algorithm from +-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf +removeLeftRecursion :: CFG -> CFG +removeLeftRecursion gr + = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] } + where + scheme1 = [CFRule a [x,NonTerminal a_x] n' | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + not (isLeftRecursive x), + let a_x = mkCat (NonTerminal a) x, + -- this is an extension of LC_LR to avoid generating + -- A-X categories for which there are no productions: + a_x `Set.member` newCats, + let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0)) + (\_ -> CFRes 0) x] + scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' | + a <- retainedLeftRecursive, + b@(NonTerminal b') <- properLeftCornersOf a, + isLeftRecursive b, + CFRule _ (x:beta) n <- catRules gr b', + let a_x = mkCat (NonTerminal a) x, + let a_b = mkCat (NonTerminal a) b, + let i = length $ filterCats beta, + let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n))) + (\_ -> CFApp (CFRes i) n) x] + scheme3 = [CFRule a_x beta n' | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + CFRule _ (x':beta) n <- catRules gr a, + x == x', + let a_x = mkCat (NonTerminal a) x, + let n' = symbol (\_ -> CFAbs 1 (shiftTerm n)) + (\_ -> n) x] + scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats + + newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3)) + + shiftTerm :: CFTerm -> CFTerm + shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts) + shiftTerm (CFRes 0) = CFVar 1 + shiftTerm (CFRes n) = CFRes (n-1) + shiftTerm t = t + -- note: the rest don't occur in the original grammar + + cats = allCats gr + rules = allRules gr + + directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr] + leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner + properLeftCorner = transitiveClosure directLeftCorner + properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal + isProperLeftCornerOf = flip (isRelatedTo properLeftCorner) + + leftRecursive = reflexiveElements properLeftCorner + isLeftRecursive = (`Set.member` leftRecursive) + + retained = cfgStartCat gr `Set.insert` + Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr), + NonTerminal a <- ruleRhs r] + isRetained = (`Set.member` retained) + + retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained + + mkCat :: CFSymbol -> CFSymbol -> Cat + mkCat x y = showSymbol x ++ "-" ++ showSymbol y + where showSymbol = symbol id show + +-- | Get the sets of mutually recursive non-terminals for a grammar. +mutRecCats :: Bool -- ^ If true, all categories will be in some set. + -- If false, only recursive categories will be included. + -> CFG -> [Set Cat] +mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r + where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss] + refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation + +-- +-- * Approximate context-free grammars with regular grammars. +-- + +makeSimpleRegular :: CFG -> CFG +makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles + +-- Use the transformation algorithm from \"Regular Approximation of Context-free +-- Grammars through Approximation\", Mohri and Nederhof, 2000 +-- to create an over-generating regular grammar for a context-free +-- grammar +makeRegular :: CFG -> CFG +makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) } + where trSet cs | allXLinear cs rs = rs + | otherwise = concatMap handleCat (Set.toList cs) + where rs = catSetRules g cs + handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c + makeRightLinearRules b' (CFRule c ss n) = + case ys of + [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left + (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n + ++ makeRightLinearRules (newCat b) (CFRule c zs n) + where (xs,ys) = break (`catElem` cs) ss + -- don't add rules on the form A -> A + newRule c rhs n | rhs == [NonTerminal c] = [] + | otherwise = [CFRule c rhs n] + newCat c = c ++ "$" + +-- +-- * CFG Utilities +-- + +mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG +mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs } + +groupProds :: [CFRule] -> Map Cat (Set CFRule) +groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) + +uniqueFuns :: CFG -> CFG +uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg + ,cfgExternalCats = cfgExternalCats cfg + ,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg)))) + } + where + uniqueFunSet funs (cat,rules) = + let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules) + in (funs',(cat,Set.fromList rules')) + uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args)) + where + fun' = head [fun'|suffix<-"":map show ([2..]::[Int]), + let fun'=mkCId (showCId fun++suffix), + not (fun' `Set.member` funs)] + +-- | Gets all rules in a CFG. +allRules :: CFG -> [CFRule] +allRules = concat . map Set.toList . Map.elems . cfgRules + +-- | Gets all rules in a CFG, grouped by their LHS categories. +allRulesGrouped :: CFG -> [(Cat,[CFRule])] +allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules + +-- | Gets all categories which have rules. +allCats :: CFG -> [Cat] +allCats = Map.keys . cfgRules + +-- | Gets all categories which have rules or occur in a RHS. +allCats' :: CFG -> [Cat] +allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union` + Set.fromList [c | rs <- Map.elems (cfgRules cfg), + r <- Set.toList rs, + NonTerminal c <- ruleRhs r]) + +-- | Gets all rules for the given category. +catRules :: CFG -> Cat -> [CFRule] +catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr) + +-- | Gets all rules for categories in the given set. +catSetRules :: CFG -> Set Cat -> [CFRule] +catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr + +mapCFGCats :: (Cat -> Cat) -> CFG -> CFG +mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg)) + (Set.map f (cfgExternalCats cfg)) + [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg] + +onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG +onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) } + +onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG +onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg } + +-- | Clean up CFG after rules have been removed. +cleanCFG :: CFG -> CFG +cleanCFG = onCFG (Map.filter (not . Set.null)) + +-- | Combine two CFGs. +unionCFG :: CFG -> CFG -> CFG +unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x + +filterCFG :: (CFRule -> Bool) -> CFG -> CFG +filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p)) + +filterCFGCats :: (Cat -> Bool) -> CFG -> CFG +filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c)) + +countCats :: CFG -> Int +countCats = Map.size . cfgRules . cleanCFG + +countRules :: CFG -> Int +countRules = length . allRules + +prCFG :: CFG -> String +prCFG = prProductions . map prRule . allRules + where + prRule r = (lhsCat r, unwords (map prSym (ruleRhs r))) + prSym = symbol id (\t -> "\""++ t ++"\"") + +prProductions :: [(Cat,String)] -> String +prProductions prods = + unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods] + where + maxLHSWidth = maximum $ 0:(map (length . fst) prods) + rpad n s = s ++ replicate (n - length s) ' ' + +prCFTerm :: CFTerm -> String +prCFTerm = pr 0 + where + pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")") + pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t) + pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")") + pr _ (CFRes i) = "$" ++ show i + pr _ (CFVar i) = "x" ++ show i + pr _ (CFMeta c) = "?" ++ showCId c + paren 0 x = x + paren 1 x = "(" ++ x ++ ")" + +-- +-- * CFRule Utilities +-- + +ruleFun :: CFRule -> CId +ruleFun (CFRule _ _ t) = f t + where f (CFObj n _) = n + f (CFApp _ x) = f x + f (CFAbs _ x) = f x + f _ = mkCId "" + +-- | Check if any of the categories used on the right-hand side +-- are in the given list of categories. +anyUsedBy :: [Cat] -> CFRule -> Bool +anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) + +mkCFTerm :: String -> CFTerm +mkCFTerm n = CFObj (mkCId n) [] + +ruleIsNonRecursive :: Set Cat -> CFRule -> Bool +ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + +-- | Check if all the rules are right-linear, or all the rules are +-- left-linear, with respect to given categories. +allXLinear :: Set Cat -> [CFRule] -> Bool +allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs + +-- | Checks if a context-free rule is right-linear. +isRightLinear :: Set Cat -- ^ The categories to consider + -> CFRule -- ^ The rule to check for right-linearity + -> Bool +isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs + +-- | Checks if a context-free rule is left-linear. +isLeftLinear :: Set Cat -- ^ The categories to consider + -> CFRule -- ^ The rule to check for left-linearity + -> Bool +isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs + + +-- +-- * Symbol utilities +-- + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (NonTerminal cat) = fc cat +symbol fc ft (Terminal tok) = ft tok + +mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t' +mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft) + +filterCats :: [Symbol c t] -> [c] +filterCats syms = [ cat | NonTerminal cat <- syms ] + +filterToks :: [Symbol c t] -> [t] +filterToks syms = [ tok | Terminal tok <- syms ] + +-- | Checks if a symbol is a non-terminal of one of the given categories. +catElem :: Ord c => Symbol c t -> Set c -> Bool +catElem s cs = symbol (`Set.member` cs) (const False) s + +noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool +noCatsInSet cs = not . any (`catElem` cs) diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs index b1854da54..50a5ff90a 100644 --- a/src/compiler/GF/Grammar/EBNF.hs +++ b/src/compiler/GF/Grammar/EBNF.hs @@ -12,34 +12,19 @@ -- (Description of the module) ----------------------------------------------------------------------------- -module GF.Grammar.EBNF (getEBNF) where +module GF.Grammar.EBNF (EBNF, ERule, ERHS(..), ebnf2cf) where import GF.Data.Operations ---import GF.Infra.Comments -import GF.Grammar.CF ---import GF.CF.CFIdent -import GF.Grammar.Grammar ---import GF.Grammar.PrGrammar ---import qualified GF.Source.AbsGF as A +import GF.Grammar.CFG +import PGF (mkCId) -import Data.Char import Data.List ---import System.FilePath - - - --- AR 18/4/2000 - 31/3/2004 - -getEBNF :: FilePath -> String -> Err SourceGrammar -getEBNF fpath = fmap (cf2gf fpath . ebnf2cf) . pEBNF type EBNF = [ERule] type ERule = (ECat, ERHS) type ECat = (String,[Int]) type ETok = String -ebnfID = "EBNF" ---- make this parametric! - data ERHS = ETerm ETok | ENonTerm ECat @@ -50,13 +35,14 @@ data ERHS = | EOpt ERHS | EEmpty -type CFRHS = [CFItem] -type CFJustRule = (CFCat, CFRHS) +type CFRHS = [CFSymbol] +type CFJustRule = (Cat, CFRHS) ebnf2cf :: EBNF -> [CFRule] ebnf2cf ebnf = - [L NoLoc (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where - mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i) + [CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)] + where + mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) [] normEBNF :: EBNF -> [CFJustRule] normEBNF erules = let @@ -115,13 +101,13 @@ substERules g (cat,itss) = (cat, map sub itss) where sub (EIPlus r : ii) = EIPlus (substERules g r) : ii sub (EIOpt r : ii) = EIOpt (substERules g r) : ii -eitem2cfitem :: EItem -> CFItem +eitem2cfitem :: EItem -> CFSymbol eitem2cfitem it = case it of - EITerm a -> Right a - EINonTerm cat -> Left (mkCFCatE cat) - EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star")) - EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus")) - EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt")) + EITerm a -> Terminal a + EINonTerm cat -> NonTerminal (mkCFCatE cat) + EIStar (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Star")) + EIPlus (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Plus")) + EIOpt (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Opt")) type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items @@ -157,198 +143,10 @@ mkECat ints = ("C", ints) prECat (c,[]) = c prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints) -mkCFCatE :: ECat -> CFCat +mkCFCatE :: ECat -> Cat mkCFCatE = prECat updECat _ (c,[]) = (c,[]) updECat ii (c,_) = (c,ii) mkNewECat (c,ii) str = (c ++ str,ii) - ------- parser for EBNF grammars - -pEBNF :: String -> Err EBNF -pEBNF = parseResultErr (longestOfMany (pJ pERule)) - -pERule :: Parser Char ERule -pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";" - -pERHS :: Int -> Parser Char ERHS -pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt -pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty -pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a) -pERHS 3 = pQuotedString *** ETerm - ||| pECat *** ENonTerm ||| pParenth (pERHS 0) - -pUnaryEOp :: Parser Char (ERHS -> ERHS) -pUnaryEOp = - lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id - -pECat = pIdent *** (\c -> (c,[])) - - - ----------------------------------------------------------------------- --- Module : Parsers --- some parser combinators a la Wadler and Hutton. --- (only used in module "EBNF") ------------------------------------------------------------------------------ - -infixr 2 |||, +|| -infixr 3 *** -infixr 5 .>. -infixr 5 ... -infixr 5 .... -infixr 5 +.. -infixr 5 ..+ -infixr 6 |> -infixr 3 <<< - - -type Parser a b = [a] -> [(b,[a])] - -parseResults :: Parser a b -> [a] -> [b] -parseResults p s = [x | (x,r) <- p s, null r] - -parseResultErr :: Show a => Parser a b -> [a] -> Err b -parseResultErr p s = case parseResults p s of - [x] -> return x - [] -> case - maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of - r -> Bad $ "\nno parse; reached" ++++ take 300 (show r) - _ -> Bad "ambiguous" - -(...) :: Parser a b -> Parser a c -> Parser a (b,c) -(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t] - -(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c -(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t] - -(|||) :: Parser a b -> Parser a b -> Parser a b -(p ||| q) s = p s ++ q s - -(+||) :: Parser a b -> Parser a b -> Parser a b -p1 +|| p2 = take 1 . (p1 ||| p2) - -literal :: (Eq a) => a -> Parser a a -literal x (c:cs) = [(x,cs) | x == c] -literal _ _ = [] - -(***) :: Parser a b -> (b -> c) -> Parser a c -(p *** f) s = [(f x,r) | (x,r) <- p s] - -succeed :: b -> Parser a b -succeed v s = [(v,s)] - -fails :: Parser a b -fails s = [] - -(+..) :: Parser a b -> Parser a c -> Parser a c -p1 +.. p2 = p1 ... p2 *** snd - -(..+) :: Parser a b -> Parser a c -> Parser a b -p1 ..+ p2 = p1 ... p2 *** fst - -(<<<) :: Parser a b -> c -> Parser a c -- return -p <<< v = p *** (\x -> v) - -(|>) :: Parser a b -> (b -> Bool) -> Parser a b -p |> b = p .>. (\x -> if b x then succeed x else fails) - -many :: Parser a b -> Parser a [b] -many p = (p ... many p *** uncurry (:)) +|| succeed [] - -some :: Parser a b -> Parser a [b] -some p = (p ... many p) *** uncurry (:) - -longestOfMany :: Parser a b -> Parser a [b] -longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed [] - -closure :: (b -> Parser a b) -> (b -> Parser a b) -closure p v = p v .>. closure p ||| succeed v - -pJunk :: Parser Char String -pJunk = longestOfMany (satisfy (\x -> elem x "\n\t ")) - -pJ :: Parser Char a -> Parser Char a -pJ p = pJunk +.. p ..+ pJunk - -pTList :: String -> Parser Char a -> Parser Char [a] -pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999 - -pTJList :: String -> String -> Parser Char a -> Parser Char [a] -pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:)) - -pElem :: [String] -> Parser Char String -pElem l = foldr (+||) fails (map literals l) - -(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c) -p1 .... p2 = p1 ... pJunk +.. p2 - -item :: Parser a a -item (c:cs) = [(c,cs)] -item [] = [] - -satisfy :: (a -> Bool) -> Parser a a -satisfy b = item |> b - -literals :: (Eq a,Show a) => [a] -> Parser a [a] -literals l = case l of - [] -> succeed [] - a:l -> literal a ... literals l *** (\ (x,y) -> x:y) - -lits :: (Eq a,Show a) => [a] -> Parser a [a] -lits ts = literals ts - -jL :: String -> Parser Char String -jL = pJ . lits - -pParenth :: Parser Char a -> Parser Char a -pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')' - --- | p,...,p -pCommaList :: Parser Char a -> Parser Char [a] -pCommaList p = pTList "," (pJ p) - --- | the same or nothing -pOptCommaList :: Parser Char a -> Parser Char [a] -pOptCommaList p = pCommaList p ||| succeed [] - --- | (p,...,p), poss. empty -pArgList :: Parser Char a -> Parser Char [a] -pArgList p = pParenth (pCommaList p) ||| succeed [] - --- | min. 2 args -pArgList2 :: Parser Char a -> Parser Char [a] -pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:) - -longestOfSome :: Parser a b -> Parser a [b] -longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y) - -pIdent :: Parser Char String -pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:) - where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\'' - -pLetter, pDigit :: Parser Char Char -pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++ - ['\192' .. '\255'])) -- no such in Char -pDigit = satisfy isDigit - -pLetters :: Parser Char String -pLetters = longestOfSome pLetter - -pAlphanum, pAlphaPlusChar :: Parser Char Char -pAlphanum = pDigit ||| pLetter -pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'") - -pQuotedString :: Parser Char String -pQuotedString = literal '"' +.. pEndQuoted where - pEndQuoted = - literal '"' *** (const []) - +|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:)) - +|| item .>. \ c -> pEndQuoted *** (c:) - -pIntc :: Parser Char Int -pIntc = some (satisfy numb) *** read - where numb x = elem x ['0'..'9'] - diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x index c4f7159a2..0293d3915 100644 --- a/src/compiler/GF/Grammar/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -26,7 +26,7 @@ $i = [$l $d _ '] -- identifier character $u = [.\n] -- universal: any character @rsyms = -- symbols and non-identifier-like reserved words - \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ + \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= :- "--" [.]* ; -- Toss single line comments @@ -83,6 +83,7 @@ data Token | T_ccurly | T_underscore | T_at + | T_cfarrow | T_PType | T_Str | T_Strs @@ -169,6 +170,8 @@ resWords = Map.fromList , b "|" T_bar , b "_" T_underscore , b "@" T_at + , b "::=" T_cfarrow + , b ":=" T_cfarrow , b "PType" T_PType , b "Str" T_Str , b "Strs" T_Strs diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index 6f7f5854e..387b69dd3 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -7,6 +7,8 @@ module GF.Grammar.Parser , pModHeader , pExp , pTopDef + , pCFRules + , pEBNFRules ) where import GF.Infra.Ident @@ -14,17 +16,23 @@ import GF.Infra.Option import GF.Data.Operations import GF.Grammar.Predef import GF.Grammar.Grammar +import GF.Grammar.CFG +import GF.Grammar.EBNF import GF.Grammar.Macros import GF.Grammar.Lexer import GF.Compile.Update (buildAnyTree) ---import Codec.Binary.UTF8.String(decodeString) ---import Data.Char(toLower) +import Data.List(intersperse) +import Data.Char(isAlphaNum) +import PGF(mkCId) + } %name pModDef ModDef %name pTopDef TopDef %partial pModHeader ModHeader %name pExp Exp +%name pCFRules ListCFRule +%name pEBNFRules ListEBNFRule -- no lexer declaration %monad { P } { >>= } { return } @@ -64,6 +72,7 @@ import GF.Compile.Update (buildAnyTree) '\\\\' { T_lamlam } '_' { T_underscore} '|' { T_bar } + '::=' { T_cfarrow } 'PType' { T_PType } 'Str' { T_Str } 'Strs' { T_Strs } @@ -602,6 +611,70 @@ ListDDecl : {- empty -} { [] } | DDecl ListDDecl { $1 ++ $2 } +ListCFRule :: { [CFRule] } +ListCFRule + : CFRule { $1 } + | CFRule ListCFRule { $1 ++ $2 } + +CFRule :: { [CFRule] } +CFRule + : Ident '.' Ident '::=' ListCFSymbol ';' { [CFRule (showIdent $3) $5 (CFObj (mkCId (showIdent $1)) [])] + } + | Ident '::=' ListCFRHS ';' { let { cat = showIdent $1; + mkFun cat its = + case its of { + [] -> cat ++ "_"; + _ -> concat $ intersperse "_" (cat : filter (not . null) (map clean its)) -- CLE style + }; + clean sym = + case sym of { + Terminal c -> filter isAlphaNum c; + NonTerminal t -> t + } + } in map (\rhs -> CFRule cat rhs (CFObj (mkCId (mkFun cat rhs)) [])) $3 + } + +ListCFRHS :: { [[CFSymbol]] } +ListCFRHS + : ListCFSymbol { [$1] } + | ListCFSymbol '|' ListCFRHS { $1 : $3 } + +ListCFSymbol :: { [CFSymbol] } +ListCFSymbol + : {- empty -} { [] } + | CFSymbol ListCFSymbol { $1 : $2 } + +CFSymbol :: { CFSymbol } + : String { Terminal $1 } + | Ident { NonTerminal (showIdent $1) } + +ListEBNFRule :: { [ERule] } +ListEBNFRule + : EBNFRule { [$1] } + | EBNFRule ListEBNFRule { $1 : $2 } + +EBNFRule :: { ERule } + : Ident '::=' ERHS0 ';' { ((showIdent $1,[]),$3) } + +ERHS0 :: { ERHS } + : ERHS1 { $1 } + | ERHS1 '|' ERHS0 { EAlt $1 $3 } + +ERHS1 :: { ERHS } + : ERHS2 { $1 } + | ERHS2 ERHS1 { ESeq $1 $2 } + +ERHS2 :: { ERHS } + : ERHS3 '*' { EStar $1 } + | ERHS3 '+' { EPlus $1 } + | ERHS3 '?' { EOpt $1 } + | ERHS3 { $1 } + +ERHS3 :: { ERHS } + : String { ETerm $1 } + | Ident { ENonTerm (showIdent $1,[]) } + | '(' ERHS0 ')' { $2 } + Posn :: { Posn } Posn : {- empty -} {% getPosn } |
