summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2010-09-24 12:39:49 +0000
committeraarne <aarne@chalmers.se>2010-09-24 12:39:49 +0000
commit709b0518fa284b41777cbec015a4647303e7a0b3 (patch)
tree045b344fdc5c2d0d985da62e5dac7a2176255c3e /src
parent617ce3cce67acca54a1ef3127da91bcd3e6a12ab (diff)
restored the .ebnf grammar format
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Importing.hs27
-rw-r--r--src/compiler/GF/Grammar/CF.hs2
-rw-r--r--src/compiler/GF/Grammar/EBNF.hs353
3 files changed, 370 insertions, 12 deletions
diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs
index 194c993ba..80f3833ee 100644
--- a/src/compiler/GF/Command/Importing.hs
+++ b/src/compiler/GF/Command/Importing.hs
@@ -6,6 +6,7 @@ import PGF.Data
import GF.Compile
import GF.Grammar (identC, SourceGrammar) -- for cc command
import GF.Grammar.CF
+import GF.Grammar.EBNF
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Data.ErrM
@@ -19,17 +20,8 @@ importGrammar :: PGF -> Options -> [FilePath] -> IO PGF
importGrammar pgf0 _ [] = return pgf0
importGrammar pgf0 opts files =
case takeExtensions (last files) of
- ".cf" -> do
- s <- fmap unlines $ mapM readFile files
- let cnc = justModuleName (last files)
- gf <- case getCF cnc s of
- Ok g -> return g
- Bad s -> error s ----
- Ok gr <- appIOE $ compileSourceGrammar opts gf
- epgf <- appIOE $ link opts (identC (BS.pack (cnc ++ "Abs"))) gr
- case epgf of
- Ok pgf -> return pgf
- Bad s -> error s ----
+ ".cf" -> importCF opts files getCF
+ ".ebnf" -> importCF opts files getEBNF
s | elem s [".gf",".gfo"] -> do
res <- appIOE $ compileToPGF opts files
case res of
@@ -49,3 +41,16 @@ importSource src0 opts files = do
Bad msg -> do
putStrLn msg
return src0
+
+-- for different cf formats
+importCF opts files get = do
+ s <- fmap unlines $ mapM readFile files
+ let cnc = justModuleName (last files)
+ gf <- case get cnc s of
+ Ok g -> return g
+ Bad s -> error s ----
+ Ok gr <- appIOE $ compileSourceGrammar opts gf
+ epgf <- appIOE $ link opts (identC (BS.pack (cnc ++ "Abs"))) gr
+ case epgf of
+ Ok pgf -> return pgf
+ Bad s -> error s ----
diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs
index 06f67234b..e45008485 100644
--- a/src/compiler/GF/Grammar/CF.hs
+++ b/src/compiler/GF/Grammar/CF.hs
@@ -12,7 +12,7 @@
-- parsing CF grammars and converting them to GF
-----------------------------------------------------------------------------
-module GF.Grammar.CF (getCF) where
+module GF.Grammar.CF (getCF,CFItem,CFCat,CFFun,cf2gf,CFRule) where
import GF.Grammar.Grammar
import GF.Grammar.Macros
diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs
new file mode 100644
index 000000000..11a2b3c4b
--- /dev/null
+++ b/src/compiler/GF/Grammar/EBNF.hs
@@ -0,0 +1,353 @@
+----------------------------------------------------------------------
+-- |
+-- Module : EBNF
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:13 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.5 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.Grammar.EBNF (getEBNF) where
+
+import GF.Data.Operations
+--import GF.Infra.Comments
+import GF.Grammar.CF
+--import GF.CF.CFIdent
+import GF.Grammar.Grammar
+--import GF.Grammar.PrGrammar
+--import qualified GF.Source.AbsGF as A
+
+import Data.Char
+import Data.List
+
+
+
+-- AR 18/4/2000 - 31/3/2004
+
+getEBNF :: String -> String -> Err SourceGrammar
+getEBNF name = fmap (cf2gf name . ebnf2cf) . pEBNF
+
+type EBNF = [ERule]
+type ERule = (ECat, ERHS)
+type ECat = (String,[Int])
+type ETok = String
+
+ebnfID = "EBNF" ---- make this parametric!
+
+data ERHS =
+ ETerm ETok
+ | ENonTerm ECat
+ | ESeq ERHS ERHS
+ | EAlt ERHS ERHS
+ | EStar ERHS
+ | EPlus ERHS
+ | EOpt ERHS
+ | EEmpty
+
+type CFRHS = [CFItem]
+type CFJustRule = (CFCat, CFRHS)
+
+ebnf2cf :: EBNF -> [CFRule]
+ebnf2cf ebnf =
+ [L (0,0) (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
+ mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i)
+
+normEBNF :: EBNF -> [CFJustRule]
+normEBNF erules = let
+ erules1 = [normERule ([i],r) | (i,r) <- zip [0..] erules]
+ erules2 = erules1 ---refreshECats erules1 --- this seems to be just bad !
+ erules3 = concat (map pickERules erules2)
+ erules4 = nubERules erules3
+ in [(mkCFCatE cat, map eitem2cfitem its) | (cat,itss) <- erules3, its <- itss]
+
+refreshECats :: [NormERule] -> [NormERule]
+refreshECats rules = [recas [i] rule | (i,rule) <- zip [0..] rules] where
+ recas ii (cat,its) = (updECat ii cat, [recss ii 0 s | s <- its])
+ recss ii n [] = []
+ recss ii n (s:ss) = recit (ii ++ [n]) s : recss ii (n+1) ss
+ recit ii it = case it of
+ EINonTerm cat -> EINonTerm (updECat ii cat)
+ EIStar (cat,t) -> EIStar (updECat ii cat, [recss ii 0 s | s <- t])
+ EIPlus (cat,t) -> EIPlus (updECat ii cat, [recss ii 0 s | s <- t])
+ EIOpt (cat,t) -> EIOpt (updECat ii cat, [recss ii 0 s | s <- t])
+ _ -> it
+
+pickERules :: NormERule -> [NormERule]
+pickERules rule@(cat,alts) = rule : concat (map pics (concat alts)) where
+ pics it = case it of
+ EIStar ru@(cat,t) -> mkEStarRules cat ++ pickERules ru
+ EIPlus ru@(cat,t) -> mkEPlusRules cat ++ pickERules ru
+ EIOpt ru@(cat,t) -> mkEOptRules cat ++ pickERules ru
+ _ -> []
+ mkEStarRules cat = [(cat', [[],[EINonTerm cat, EINonTerm cat']])]
+ where cat' = mkNewECat cat "Star"
+ mkEPlusRules cat = [(cat', [[EINonTerm cat],[EINonTerm cat, EINonTerm cat']])]
+ where cat' = mkNewECat cat "Plus"
+ mkEOptRules cat = [(cat', [[],[EINonTerm cat]])]
+ where cat' = mkNewECat cat "Opt"
+
+nubERules :: [NormERule] -> [NormERule]
+nubERules rules = nub optim where
+ optim = map (substERules (map mkSubst replaces)) irreducibles
+ (replaces,irreducibles) = partition reducible rules
+ reducible (cat,[items]) = isNewCat cat && all isOldIt items
+ reducible _ = False
+ isNewCat (_,ints) = ints == []
+ isOldIt (EITerm _) = True
+ isOldIt (EINonTerm cat) = not (isNewCat cat)
+ isOldIt _ = False
+ mkSubst (cat,its) = (cat, head its) -- def of reducible: its must be singleton
+--- the optimization assumes each cat has at most one EBNF rule.
+
+substERules :: [(ECat,[EItem])] -> NormERule -> NormERule
+substERules g (cat,itss) = (cat, map sub itss) where
+ sub [] = []
+ sub (i@(EINonTerm cat') : ii) = case lookup cat g of
+ Just its -> its ++ sub ii
+ _ -> i : sub ii
+ sub (EIStar r : ii) = EIStar (substERules g r) : ii
+ sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
+ sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
+
+eitem2cfitem :: EItem -> CFItem
+eitem2cfitem it = case it of
+ EITerm a -> Right a
+ EINonTerm cat -> Left (mkCFCatE cat)
+ EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star"))
+ EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus"))
+ EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt"))
+
+type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
+
+data EItem =
+ EITerm String
+ | EINonTerm ECat
+ | EIStar NormERule
+ | EIPlus NormERule
+ | EIOpt NormERule
+ deriving Eq
+
+normERule :: ([Int],ERule) -> NormERule
+normERule (ii,(cat,rhs)) =
+ (cat,[map (mkEItem (ii ++ [i])) r' | (i,r') <- zip [0..] (disjNorm rhs)]) where
+ disjNorm r = case r of
+ ESeq r1 r2 -> [x ++ y | x <- disjNorm r1, y <- disjNorm r2]
+ EAlt r1 r2 -> disjNorm r1 ++ disjNorm r2
+ EEmpty -> [[]]
+ _ -> [[r]]
+
+mkEItem :: [Int] -> ERHS -> EItem
+mkEItem ii rhs = case rhs of
+ ETerm a -> EITerm a
+ ENonTerm cat -> EINonTerm cat
+ EStar r -> EIStar (normERule (ii,(mkECat ii, r)))
+ EPlus r -> EIPlus (normERule (ii,(mkECat ii, r)))
+ EOpt r -> EIOpt (normERule (ii,(mkECat ii, r)))
+ _ -> EINonTerm ("?????",[])
+-- _ -> error "should not happen in ebnf" ---
+
+mkECat ints = ("C", ints)
+
+prECat (c,[]) = c
+prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
+
+mkCFCatE :: ECat -> CFCat
+mkCFCatE = prECat
+
+updECat _ (c,[]) = (c,[])
+updECat ii (c,_) = (c,ii)
+
+mkNewECat (c,ii) str = (c ++ str,ii)
+
+------ parser for EBNF grammars
+
+pEBNF :: String -> Err EBNF
+pEBNF = parseResultErr (longestOfMany (pJ pERule))
+
+pERule :: Parser Char ERule
+pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
+
+pERHS :: Int -> Parser Char ERHS
+pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
+pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
+pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
+pERHS 3 = pQuotedString *** ETerm
+ ||| pECat *** ENonTerm ||| pParenth (pERHS 0)
+
+pUnaryEOp :: Parser Char (ERHS -> ERHS)
+pUnaryEOp =
+ lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
+
+pECat = pIdent *** (\c -> (c,[]))
+
+
+
+----------------------------------------------------------------------
+-- Module : Parsers
+-- some parser combinators a la Wadler and Hutton.
+-- (only used in module "EBNF")
+-----------------------------------------------------------------------------
+
+infixr 2 |||, +||
+infixr 3 ***
+infixr 5 .>.
+infixr 5 ...
+infixr 5 ....
+infixr 5 +..
+infixr 5 ..+
+infixr 6 |>
+infixr 3 <<<
+
+
+type Parser a b = [a] -> [(b,[a])]
+
+parseResults :: Parser a b -> [a] -> [b]
+parseResults p s = [x | (x,r) <- p s, null r]
+
+parseResultErr :: Show a => Parser a b -> [a] -> Err b
+parseResultErr p s = case parseResults p s of
+ [x] -> return x
+ [] -> case
+ maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of
+ r -> Bad $ "\nno parse; reached" ++++ take 300 (show r)
+ _ -> Bad "ambiguous"
+
+(...) :: Parser a b -> Parser a c -> Parser a (b,c)
+(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t]
+
+(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c
+(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t]
+
+(|||) :: Parser a b -> Parser a b -> Parser a b
+(p ||| q) s = p s ++ q s
+
+(+||) :: Parser a b -> Parser a b -> Parser a b
+p1 +|| p2 = take 1 . (p1 ||| p2)
+
+literal :: (Eq a) => a -> Parser a a
+literal x (c:cs) = [(x,cs) | x == c]
+literal _ _ = []
+
+(***) :: Parser a b -> (b -> c) -> Parser a c
+(p *** f) s = [(f x,r) | (x,r) <- p s]
+
+succeed :: b -> Parser a b
+succeed v s = [(v,s)]
+
+fails :: Parser a b
+fails s = []
+
+(+..) :: Parser a b -> Parser a c -> Parser a c
+p1 +.. p2 = p1 ... p2 *** snd
+
+(..+) :: Parser a b -> Parser a c -> Parser a b
+p1 ..+ p2 = p1 ... p2 *** fst
+
+(<<<) :: Parser a b -> c -> Parser a c -- return
+p <<< v = p *** (\x -> v)
+
+(|>) :: Parser a b -> (b -> Bool) -> Parser a b
+p |> b = p .>. (\x -> if b x then succeed x else fails)
+
+many :: Parser a b -> Parser a [b]
+many p = (p ... many p *** uncurry (:)) +|| succeed []
+
+some :: Parser a b -> Parser a [b]
+some p = (p ... many p) *** uncurry (:)
+
+longestOfMany :: Parser a b -> Parser a [b]
+longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed []
+
+closure :: (b -> Parser a b) -> (b -> Parser a b)
+closure p v = p v .>. closure p ||| succeed v
+
+pJunk :: Parser Char String
+pJunk = longestOfMany (satisfy (\x -> elem x "\n\t "))
+
+pJ :: Parser Char a -> Parser Char a
+pJ p = pJunk +.. p ..+ pJunk
+
+pTList :: String -> Parser Char a -> Parser Char [a]
+pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999
+
+pTJList :: String -> String -> Parser Char a -> Parser Char [a]
+pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:))
+
+pElem :: [String] -> Parser Char String
+pElem l = foldr (+||) fails (map literals l)
+
+(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c)
+p1 .... p2 = p1 ... pJunk +.. p2
+
+item :: Parser a a
+item (c:cs) = [(c,cs)]
+item [] = []
+
+satisfy :: (a -> Bool) -> Parser a a
+satisfy b = item |> b
+
+literals :: (Eq a,Show a) => [a] -> Parser a [a]
+literals l = case l of
+ [] -> succeed []
+ a:l -> literal a ... literals l *** (\ (x,y) -> x:y)
+
+lits :: (Eq a,Show a) => [a] -> Parser a [a]
+lits ts = literals ts
+
+jL :: String -> Parser Char String
+jL = pJ . lits
+
+pParenth :: Parser Char a -> Parser Char a
+pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')'
+
+-- | p,...,p
+pCommaList :: Parser Char a -> Parser Char [a]
+pCommaList p = pTList "," (pJ p)
+
+-- | the same or nothing
+pOptCommaList :: Parser Char a -> Parser Char [a]
+pOptCommaList p = pCommaList p ||| succeed []
+
+-- | (p,...,p), poss. empty
+pArgList :: Parser Char a -> Parser Char [a]
+pArgList p = pParenth (pCommaList p) ||| succeed []
+
+-- | min. 2 args
+pArgList2 :: Parser Char a -> Parser Char [a]
+pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:)
+
+longestOfSome :: Parser a b -> Parser a [b]
+longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y)
+
+pIdent :: Parser Char String
+pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:)
+ where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\''
+
+pLetter, pDigit :: Parser Char Char
+pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++
+ ['\192' .. '\255'])) -- no such in Char
+pDigit = satisfy isDigit
+
+pLetters :: Parser Char String
+pLetters = longestOfSome pLetter
+
+pAlphanum, pAlphaPlusChar :: Parser Char Char
+pAlphanum = pDigit ||| pLetter
+pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'")
+
+pQuotedString :: Parser Char String
+pQuotedString = literal '"' +.. pEndQuoted where
+ pEndQuoted =
+ literal '"' *** (const [])
+ +|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:))
+ +|| item .>. \ c -> pEndQuoted *** (c:)
+
+pIntc :: Parser Char Int
+pIntc = some (satisfy numb) *** read
+ where numb x = elem x ['0'..'9']
+