GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

author: aarne <aarne@cs.chalmers.se> 2008-05-21 09:26:44 +0000
committer: aarne <aarne@cs.chalmers.se> 2008-05-21 09:26:44 +0000
commit: 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree: 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF
parent: 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
9 files changed, 1497 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CF.hs b/src-3.0/GF/CF/CF.hs
new file mode 100644
index 000000000..9233e905a
--- /dev/null
+++ b/src-3.0/GF/CF/CF.hs
@@ -0,0 +1,213 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : CF
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:07 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.6 $
+--
+-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001
+-----------------------------------------------------------------------------
+
+module GF.CF.CF (-- * Types
+	   CF(..), CFRule, CFRuleGroup, 
+	   CFItem(..), CFTree(..), CFPredef, CFParser,
+	   RegExp(..), CFWord,
+	   -- * Functions
+	   cfParseResults,
+	   -- ** to construct CF grammars
+	   emptyCF, emptyCFPredef, rules2CF, groupCFRules,
+	   -- ** to construct rules
+	   atomCFRule, atomCFTerm, atomRegExp, altsCFTerm,
+	   -- ** to construct trees
+	   atomCFTree, buildCFTree,
+	   -- ** to decide whether a token matches a terminal item
+	   matchCFTerm, satRegExp,
+	   -- ** to analyse a CF grammar
+	   catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat, 
+	   valCatCF, valItemsCF, valFunCF, 
+	   startCat, predefOfCF, appCFPredef, valCFItem,
+	   cfTokens, wordsOfRegExp, forCFItem, 
+	   isCircularCF, predefRules
+	  ) where
+
+import GF.Data.Operations
+import GF.Data.Str
+import GF.Canon.AbsGFC
+import GF.Canon.GFC
+import GF.CF.CFIdent
+import Data.List (nub,nubBy)
+import Data.Char (isUpper, isLower, toUpper, toLower)
+
+-- CF grammar data types
+
+-- | abstract type CF. 
+-- Invariant: each category has all its rules grouped with it 
+--      also: the list is never empty (the category is just missing then)
+newtype CF  = CF ([CFRuleGroup], CFPredef) 
+type CFRule = (CFFun, (CFCat, [CFItem]))
+type CFRuleGroup = (CFCat,[CFRule])
+
+-- | CFPredef is a hack for variable symbols and literals; normally = @const []@
+data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
+
+newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
+
+-- | recognize literals, variables, etc
+type CFPredef  = CFTok -> [(CFCat, CFFun)]
+
+-- | Wadler style + return information
+type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
+
+cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
+cfParseResults rs = [b | (b,[]) <- fst rs]
+
+-- | terminals are regular expressions on words; to be completed to full regexp
+data RegExp = 
+    RegAlts [CFWord]         -- ^ list of alternative words
+  | RegSpec CFTok            -- ^ special token
+  deriving (Eq, Ord, Show)
+
+type CFWord = String
+
+-- the above types should be kept abstract, and the following functions used
+
+-- to construct CF grammars
+
+emptyCF :: CF
+emptyCF = CF ([], emptyCFPredef)
+
+emptyCFPredef :: CFPredef
+emptyCFPredef = const []
+
+rules2CF :: [CFRule] -> CF
+rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
+
+groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
+groupCFRules = foldr ins [] where
+  ins rule crs = case crs of
+    (c,r) : rs | compatCF c cat -> (c,rule:r) : rs
+    cr    : rs            -> cr         : ins rule rs
+    _                     -> [(cat,[rule])]
+   where
+     cat = valCatCF rule
+
+-- to construct rules
+
+-- | make a rule from a single token without constituents
+atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
+atomCFRule c f s = (f, (c, [atomCFTerm s]))
+
+-- | usual terminal
+atomCFTerm :: CFTok -> CFItem 
+atomCFTerm = CFTerm . atomRegExp
+
+atomRegExp :: CFTok -> RegExp
+atomRegExp t = case t of
+  TS s -> RegAlts [s] 
+  _    -> RegSpec t
+
+-- | terminal consisting of alternatives
+altsCFTerm :: [String] -> CFItem 
+altsCFTerm = CFTerm . RegAlts
+
+
+-- to construct trees
+
+-- | make a tree without constituents
+atomCFTree :: CFCat -> CFFun -> CFTree
+atomCFTree c f = buildCFTree c f []
+
+-- | make a tree with constituents. 
+buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
+buildCFTree c f trees = CFTree (f,(c,trees))
+
+{- ----
+cfMeta0 :: CFTree
+cfMeta0 = atomCFTree uCFCat metaCFFun
+
+-- used in happy
+litCFTree :: String -> CFTree --- Maybe CFTree
+litCFTree s = maybe cfMeta0 id $ do
+  (c,f) <- getCFLiteral s
+  return $ buildCFTree c f []
+-}
+
+-- to decide whether a token matches a terminal item
+
+matchCFTerm :: CFItem -> CFTok -> Bool
+matchCFTerm (CFTerm t) s = satRegExp t s
+matchCFTerm _ _ = False
+
+satRegExp :: RegExp -> CFTok -> Bool
+satRegExp r t = case (r,t) of 
+  (RegAlts tt, TS s) -> elem s tt
+  (RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
+  (RegSpec x,  _)    -> t == x ---
+  _ -> False
+ where
+   caseUpperOrLower s = case s of
+     c:cs | isUpper c -> [s, toLower c : cs]
+     c:cs | isLower c -> [s, toUpper c : cs]
+     _ -> [s]
+
+-- to analyse a CF grammar
+
+catsOfCF :: CF -> [CFCat]
+catsOfCF (CF (rr,_)) = map fst rr 
+
+rulesOfCF :: CF -> [CFRule]
+rulesOfCF (CF (rr,_)) = concatMap snd rr
+
+ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
+ruleGroupsOfCF (CF (rr,_)) = rr
+
+rulesForCFCat :: CF -> CFCat -> [CFRule]
+rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
+
+valCatCF :: CFRule -> CFCat
+valCatCF (_,(c,_)) = c
+
+valItemsCF :: CFRule -> [CFItem]
+valItemsCF (_,(_,i)) = i
+
+valFunCF :: CFRule -> CFFun
+valFunCF  (f,(_,_)) = f
+
+startCat :: CF -> CFCat
+startCat (CF (rr,_)) = fst (head rr) --- hardly useful
+
+predefOfCF :: CF -> CFPredef
+predefOfCF (CF (_,f)) = f
+
+appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
+appCFPredef = ($) . predefOfCF
+
+valCFItem :: CFItem -> Either RegExp CFCat
+valCFItem (CFTerm r)     = Left r
+valCFItem (CFNonterm nt) = Right nt
+
+cfTokens :: CF -> [CFWord]
+cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf, 
+                                                 CFTerm i <- valItemsCF r]
+
+wordsOfRegExp :: RegExp -> [CFWord]
+wordsOfRegExp (RegAlts tt) = tt
+wordsOfRegExp _ = []
+
+forCFItem :: CFTok -> CFRule -> Bool
+forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
+forCFItem _ _ = False
+
+-- | we should make a test of circular chains, too
+isCircularCF :: CFRule -> Bool
+isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
+isCircularCF _ = False
+
+-- | coercion to the older predef cf type
+predefRules :: CFPredef -> CFTok -> [CFRule]
+predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]
+
diff --git a/src-3.0/GF/CF/CFIdent.hs b/src-3.0/GF/CF/CFIdent.hs
new file mode 100644
index 000000000..02ee482c0
--- /dev/null
+++ b/src-3.0/GF/CF/CFIdent.hs
@@ -0,0 +1,253 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : CFIdent
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/14 16:03:40 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.13 $
+--
+-- symbols (categories, functions) for context-free grammars.
+-----------------------------------------------------------------------------
+
+module GF.CF.CFIdent (-- * Tokens and categories
+		CFTok(..), CFCat(..),
+		tS, tC, tL, tI, tF, tV, tM, tInt,
+		prCFTok,
+		-- * Function names and profiles
+		CFFun(..), Profile,
+		wordsCFTok,
+		-- * CF Functions
+		mkCFFun, varCFFun, consCFFun, string2CFFun, stringCFFun, 
+                intCFFun, floatCFFun, dummyCFFun,
+		cfFun2String, cfFun2Ident, cfFun2Profile, metaCFFun,
+		-- * CF Categories
+		mkCIdent, ident2CFCat, labels2CFCat, string2CFCat, 
+                catVarCF, cat2CFCat, cfCatString, cfCatInt,cfCatFloat,
+		moduleOfCFCat, cfCat2Cat, cfCat2Ident, lexCFCat,
+		-- * CF Tokens
+		string2CFTok, str2cftoks,
+		-- * Comparisons
+		compatToks, compatTok, compatCFFun, compatCF,
+                wordsLits
+	       ) where
+
+import GF.Data.Operations
+import GF.Canon.GFC
+import GF.Infra.Ident
+import GF.Grammar.Values (cPredefAbs)
+import GF.Canon.AbsGFC
+import GF.Grammar.Macros (ident2label)
+import GF.Grammar.PrGrammar
+import GF.Data.Str
+import Data.Char (toLower, toUpper, isSpace)
+import Data.List (intersperse)
+
+-- | this type should be abstract
+data CFTok = 
+   TS String     -- ^ normal strings
+ | TC String     -- ^ strings that are ambiguous between upper or lower case
+ | TL String     -- ^ string literals
+ | TI Integer    -- ^ integer literals
+ | TF Double     -- ^ float literals
+ | TV Ident      -- ^ variables
+ | TM Int String -- ^ metavariables; the integer identifies it
+  deriving (Eq, Ord, Show)
+
+-- | this type should be abstract
+newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
+
+tS :: String -> CFTok
+tC :: String -> CFTok
+tL :: String -> CFTok
+tI :: String -> CFTok
+tF :: String -> CFTok
+tV :: String -> CFTok
+tM :: String -> CFTok
+
+tS  = TS
+tC  = TC
+tL  = TL
+tI  = TI . read 
+tF  = TF . read 
+tV  = TV . identC
+tM  = TM 0
+
+tInt :: Integer -> CFTok
+tInt = TI
+
+prCFTok :: CFTok -> String
+prCFTok t = case t of
+  TS s -> s
+  TC s -> s
+  TL s -> s
+  TI i -> show i
+  TF i -> show i
+  TV x -> prt x
+  TM i m -> m --- "?" --- m
+
+-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@
+newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show) 
+-- - - - - - - - - - - - - - - - - - - - -         ^^^ added by peb, 21/5-04 
+
+type Profile  = [([[Int]],[Int])]
+
+wordsCFTok :: CFTok -> [String]
+wordsCFTok t = case t of
+  TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]]
+  _ -> [prCFTok t]
+
+-- the following functions should be used instead of constructors
+
+-- to construct CF functions
+
+mkCFFun :: Atom -> CFFun
+mkCFFun t = CFFun (t,[])
+
+varCFFun :: Ident -> CFFun
+varCFFun = mkCFFun . AV
+
+consCFFun :: CIdent -> CFFun
+consCFFun = mkCFFun . AC
+
+-- | standard way of making cf fun
+string2CFFun :: String -> String -> CFFun
+string2CFFun m c = consCFFun $ mkCIdent m c
+
+stringCFFun :: String -> CFFun 
+stringCFFun = mkCFFun . AS
+
+intCFFun :: Integer -> CFFun 
+intCFFun = mkCFFun . AI
+
+floatCFFun :: Double -> CFFun 
+floatCFFun = mkCFFun . AF
+
+-- | used in lexer-by-need rules
+dummyCFFun :: CFFun
+dummyCFFun = varCFFun $ identC "_"
+
+cfFun2String :: CFFun -> String
+cfFun2String (CFFun (f,_)) = prt f
+
+cfFun2Ident :: CFFun -> Ident
+cfFun2Ident (CFFun (f,_)) = identC $ prt_ f ---
+
+cfFun2Profile :: CFFun -> Profile
+cfFun2Profile (CFFun (_,p)) = p
+
+{- ----
+strPro2cfFun :: String -> Profile -> CFFun
+strPro2cfFun str p = (CFFun (AC (Ident str), p))
+-}
+
+metaCFFun :: CFFun
+metaCFFun = mkCFFun $ AM 0
+
+-- to construct CF categories
+
+-- | belongs elsewhere
+mkCIdent :: String -> String -> CIdent
+mkCIdent m c = CIQ (identC m) (identC c)
+
+ident2CFCat :: CIdent -> Ident -> CFCat
+ident2CFCat mc d = CFCat (mc, L d)
+
+labels2CFCat :: CIdent -> [Label] -> CFCat
+labels2CFCat mc d = CFCat (mc, L (identC (concat (intersperse "." (map prt d))))) ----
+
+-- | standard way of making cf cat: label s
+string2CFCat :: String -> String -> CFCat
+string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
+
+idents2CFCat :: Ident -> Ident -> CFCat
+idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")
+
+catVarCF :: CFCat
+catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
+
+cat2CFCat :: (Ident,Ident) -> CFCat
+cat2CFCat = uncurry idents2CFCat
+
+-- | literals
+cfCatString :: CFCat
+cfCatString = string2CFCat (prt cPredefAbs) "String"
+
+cfCatInt, cfCatFloat :: CFCat
+cfCatInt = string2CFCat (prt cPredefAbs) "Int"
+cfCatFloat = string2CFCat (prt cPredefAbs) "Float"
+
+
+
+{- ----
+uCFCat :: CFCat
+uCFCat = cat2CFCat uCat
+-}
+
+moduleOfCFCat :: CFCat -> Ident
+moduleOfCFCat (CFCat (CIQ m _, _)) = m
+
+-- | the opposite direction
+cfCat2Cat :: CFCat -> (Ident,Ident)
+cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
+
+cfCat2Ident :: CFCat -> Ident
+cfCat2Ident = snd . cfCat2Cat
+
+lexCFCat :: CFCat -> CFCat
+lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*")
+
+-- to construct CF tokens
+
+string2CFTok :: String -> CFTok
+string2CFTok = tS
+
+str2cftoks :: Str -> [CFTok]
+str2cftoks = map tS . wordsLits . sstr
+
+-- decide if two token lists look the same (in parser postprocessing)
+
+compatToks :: [CFTok] -> [CFTok] -> Bool
+compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]
+
+compatTok :: CFTok -> CFTok -> Bool
+compatTok (TM _ _) _ = True --- hack because metas are renamed
+compatTok _ (TM _ _) = True
+compatTok t u = any (`elem` (alts t)) (alts u) where
+  alts u = case u of
+    TC (c:s) -> [toLower c : s, toUpper c : s]
+    TL s -> [s, prQuotedString s]
+    _ -> [prCFTok u]
+
+-- | decide if two CFFuns have the same function head (profiles may differ)
+compatCFFun :: CFFun -> CFFun -> Bool
+compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
+
+-- | decide whether two categories match
+-- the modifiers can be from different modules, but on the same extension
+-- path, so there is no clash, and they can be safely ignored ---
+compatCF :: CFCat -> CFCat -> Bool
+----compatCF = (==)
+compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
+
+-- | Like 'words', but does not split on whitespace inside
+--   double quotes.wordsLits :: String -> [String]
+-- Also treats escaped quotes in quotes (AR 21/12/2005) by breaks
+-- instead of break
+wordsLits [] = []
+wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs)
+		 | isQuote c
+		     = let (l,rs) = breaks (==c) cs
+			   rs' = drop 1 rs
+			   in ([c]++l++[c]):wordsLits rs'
+		 | otherwise = let (w,rs) = break isSpaceQ cs
+				   in (c:w):wordsLits rs
+  where
+    breaks c cs = case break c cs of
+      (l@(_:_),d:rs) | last l == '\\' -> 
+        let (r,ts) = breaks c rs in (l++[d]++r, ts)
+      v -> v
+    isQuote c = elem c "\"'" 
+    isSpaceQ c = isSpace c ---- || isQuote c
diff --git a/src-3.0/GF/CF/CFtoGrammar.hs b/src-3.0/GF/CF/CFtoGrammar.hs
new file mode 100644
index 000000000..5e73aec31
--- /dev/null
+++ b/src-3.0/GF/CF/CFtoGrammar.hs
@@ -0,0 +1,62 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : CFtoGrammar
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:09 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.7 $
+--
+-- 26\/1\/2000 -- 18\/4 -- 24\/3\/2004
+-----------------------------------------------------------------------------
+
+module GF.CF.CFtoGrammar (cf2grammar) where
+
+import GF.Infra.Ident
+import GF.Grammar.Grammar
+import qualified GF.Source.AbsGF as A
+import qualified GF.Source.GrammarToSource as S
+import GF.Grammar.Macros
+
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.CF.PPrCF
+
+import GF.Data.Operations
+
+import Data.List (nub)
+import Data.Char (isSpace)
+
+cf2grammar :: CF -> [A.TopDef]
+cf2grammar cf = concatMap S.trAnyDef (abs ++ conc) where
+  rules = rulesOfCF cf
+  abs   = cats ++ funs
+  conc  = lintypes ++ lins
+  cats  = [(cat, AbsCat (yes []) (yes [])) | 
+             cat <- nub (concat (map cf2cat rules))] ----notPredef cat
+  lintypes = [(cat, CncCat (yes defLinType) nope nope) | (cat,AbsCat _ _) <- cats]
+  (funs,lins) = unzip (map cf2rule rules)
+
+cf2cat :: CFRule -> [Ident]
+cf2cat (_,(cat, items)) = map cfCat2Ident $ cat : [c | CFNonterm c <- items]
+
+cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
+cf2rule (fun, (cat, items)) = (def,ldef) where
+ f     = cfFun2Ident fun
+ def   = (f, AbsFun (yes (mkProd (args', Cn (cfCat2Ident cat), []))) nope)
+ args0 = zip (map (mkIdent "x") [0..]) items
+ args  = [(v, Cn (cfCat2Ident c)) | (v, CFNonterm c) <- args0]
+ args' = [(zIdent "_", Cn (cfCat2Ident c)) | (_, CFNonterm c) <- args0]
+ ldef  = (f, CncFun 
+               Nothing 
+               (yes (mkAbs (map fst args) 
+                      (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)])))
+               nope)
+ mkIt (v, CFNonterm _) = P (Vr v) theLinLabel
+ mkIt (_, CFTerm (RegAlts [a])) = K a
+ mkIt _ = K "" --- regexp not recognized in input CF ; use EBNF for this
+ foldconcat [] = K ""
+ foldconcat tt = foldr1 C tt
+
diff --git a/src-3.0/GF/CF/CanonToCF.hs b/src-3.0/GF/CF/CanonToCF.hs
new file mode 100644
index 000000000..80ce2e79d
--- /dev/null
+++ b/src-3.0/GF/CF/CanonToCF.hs
@@ -0,0 +1,214 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : CanonToCF
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/14 16:03:41 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.15 $
+--
+-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003 
+-----------------------------------------------------------------------------
+
+module GF.CF.CanonToCF (canon2cf) where
+
+import GF.System.Tracing -- peb 8/6-04
+
+import GF.Data.Operations
+import GF.Infra.Option
+import GF.Infra.Ident
+import GF.Canon.AbsGFC
+import GF.Grammar.LookAbs (allBindCatsOf)
+import GF.Canon.GFC
+import GF.Grammar.Values (isPredefCat,cPredefAbs)
+import GF.Grammar.PrGrammar
+import GF.Canon.CMacros
+import qualified GF.Infra.Modules as M
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.UseGrammar.Morphology
+import GF.Data.Trie2
+import Data.List (nub,partition)
+import Control.Monad
+
+-- | The main function: for a given cnc module 'm', build the CF grammar with all the
+-- rules coming from modules that 'm' extends. The categories are qualified by
+-- the abstract module name 'a' that 'm' is of.
+-- The ign argument tells what rules not to generate a parser for.
+canon2cf :: Options -> (Ident -> Bool) -> CanonGrammar -> Ident -> Err CF
+canon2cf opts ign gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
+  let ms = M.allExtends gr c
+  a <- M.abstractOfConcrete gr c
+  let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
+  let mms  = [(a, tree2list (M.jments m)) | m <- cncs]
+  cnc <- liftM M.jments $ M.lookupModMod gr c
+  rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts ign cnc)) mms
+  let bindcats = map snd $ allBindCatsOf gr 
+  let rules = filter (not . isCircularCF) rules0 ---- temporarily here
+  let grules = groupCFRules rules
+  let predef = mkCFPredef opts bindcats grules
+  return $ CF predef
+
+cnc2cfCond :: Options -> (Ident -> Bool) -> BinTree Ident Info -> 
+              Ident -> [(Ident,Info)] -> Err [CFRule]
+cnc2cfCond opts ign cnc m gr = 
+  liftM concat $ 
+  mapM lin2cf [(m,fun,cat,args,lin) | 
+                 (fun, CncFun cat args lin _) <- gr, notign fun, is fun] 
+ where
+   is f = isInBinTree f cnc
+   notign = not . ign
+
+type IFun = Ident
+type ICat = CIdent
+
+-- | all CF rules corresponding to a linearization rule
+lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
+lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
+  let rhss0 = allLinBranches lin          -- :: [([Label], Term)]
+  rhss1  <- mapM (mkCFItems m) rhss0    -- :: [([Label], [[PreCFItem]])]
+  mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
+
+-- | making sequences of CF items from every branch in a linearization
+mkCFItems :: Ident -> ([Label], Term) -> Err ([Label], [[PreCFItem]])
+mkCFItems m (labs,t) = do
+  items <- term2CFItems m t
+  return (labs, items)
+
+-- | making CF rules from sequences of CF items
+mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> ([Label], [[PreCFItem]]) -> Err [CFRule]
+mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
+ where
+  mkOneRule its = do
+    let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
+        profile  = mkProfile nonterms
+	cfcat    = labels2CFCat (redirectIdent m cat) lab
+        cffun    = CFFun (AC (CIQ m fun), profile)
+        cfits    = map precf2cf its
+    return (cffun,(cfcat,cfits))
+  mkProfile nonterms = map mkOne args 
+    where
+      mkOne (A  c i) = mkOne (AB c 0 i)
+      mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
+        where
+          mkB x = [k | (k,(j, [LV y], False)) <- nonterms, j == i, y == x]
+
+-- | intermediate data structure of CFItems with information for profiles
+data PreCFItem = 
+    PTerm RegExp                         -- ^ like ordinary Terminal 
+  | PNonterm CIdent Integer [Label] Bool -- ^ cat, position, part\/bind, whether arg
+   deriving Eq                    
+
+precf2cf :: PreCFItem -> CFItem
+precf2cf (PTerm r) = CFTerm r
+precf2cf (PNonterm cm _ ls True) = CFNonterm (labels2CFCat cm ls)
+precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
+
+
+-- | the main job in translating linearization rules into sequences of cf items 
+term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
+term2CFItems m t = errIn "forming cf items" $ case t of
+   S c _ -> t2c c
+
+   T _ cc -> do
+     its  <- mapM t2c [t | Cas _ t <- cc]
+     tryMkCFTerm (concat its)
+   V _ cc -> do
+     its  <- mapM t2c [t | t <- cc]
+     tryMkCFTerm (concat its)
+
+   C t1 t2 -> do
+     its1 <- t2c t1
+     its2 <- t2c t2
+     return [x ++ y | x <- its1, y <- its2]
+
+   FV ts -> do
+     its <- mapM t2c ts
+     tryMkCFTerm (concat its)
+
+   P (S c _) _ -> t2c c --- w-around for bug in Compute? AR 31/1/2006
+
+   P arg s -> extrR arg s
+
+   K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]
+
+   E -> return [[]]
+
+   K (KP d vs) -> do
+     let its  =  [PTerm (RegAlts [s]) | s <- d]
+     let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
+     tryMkCFTerm (its : itss)
+
+   _ -> return [] ---- prtBad "no cf for" t ----
+
+  where 
+
+    t2c = term2CFItems m
+
+    -- optimize the number of rules by a factorization
+    tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]  
+    tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
+      case mapM mkOne (counterparts ii) of
+        Ok tt -> return [tt]
+        _ -> return ii
+       where
+         mkOne cfits = case mapM mkOneTerm cfits of
+           Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
+           _ -> mkOneNonTerm cfits
+         mkOneTerm (PTerm (RegAlts t)) = return t
+         mkOneTerm _ = Bad ""
+         mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) = 
+           if all (== n) cc 
+	      then return n
+	      else Bad ""
+         mkOneNonTerm _ = Bad ""
+         counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
+    tryMkCFTerm itss = return itss
+
+    extrR arg lab = case (arg0,labs) of
+      (Arg (A  cat pos),   [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
+      (Arg (AB cat b pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
+      (Arg (A  cat pos),   _)  -> return [[PNonterm (cIQ cat) pos labs True]]
+      (Arg (AB cat b pos), _)  -> return [[PNonterm (cIQ cat) pos labs True]]
+                                     ---- ??
+      _   -> prtBad "cannot extract record field from" arg
+     where 
+       (arg0,labs) = headProj arg [lab]
+
+    headProj r ls = case r of
+      P r0 l0 -> headProj r0 (l0:ls)
+      S r0 _  -> headProj r0 ls
+      _ -> (r,ls)
+    cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c
+
+mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
+mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where
+  (ruls,preds) = if oElem lexerByNeed opts  -- option -cflexer
+                   then predefLexer rules 
+                   else (rules,emptyTrie)
+  preds0 s = 
+    [(cat,         metaCFFun)     | TM _ _ <- [s], cat <- cats] ++
+    [(cat,         varCFFun x)    | TV x   <- [s], cat <- catVarCF : bindcats] ++
+    [(cfCatString, stringCFFun t) | TL t   <- [s]]              ++
+    [(cfCatInt,    intCFFun t)    | TI t   <- [s]] ++
+    [(cfCatFloat,  floatCFFun t)  | TF t   <- [s]]
+  cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its]
+  bindcats = [c | c <- cats, elem (cfCat2Ident c) binds]
+  look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens
+
+--- TODO: integrate with morphology
+--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
+predefLexer groups = (reverse ruls, tcompile preds) where
+  (ruls,preds) = foldr mkOne ([],[]) groups
+  mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
+    (rule,pre) = case partition isLexical rules of
+      ([],_) -> (group,[])
+      (ls,rest) -> ((cat,rest), concatMap mkLexRule ls)
+  isLexical (f,(c,its)) = case its of
+    [CFTerm (RegAlts ws)] -> True
+    _ -> False
+  mkLexRule r = case r of
+    (fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws]
+    _ -> []
diff --git a/src-3.0/GF/CF/ChartParser.hs b/src-3.0/GF/CF/ChartParser.hs
new file mode 100644
index 000000000..740c4d787
--- /dev/null
+++ b/src-3.0/GF/CF/ChartParser.hs
@@ -0,0 +1,206 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : ChartParser
+-- Maintainer  : Peter Ljungl�f
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:12 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.10 $
+--
+-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
+-- OBSOLETE -- should use new MCFG parsers instead 
+-----------------------------------------------------------------------------
+
+module GF.CF.ChartParser (chartParser) where
+
+-- import Tracing
+-- import PrintParser
+-- import PrintSimplifiedTerm
+
+import GF.Data.Operations
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.CF.PPrCF (prCFItem)
+
+import GF.Data.OrdSet
+import GF.Data.OrdMap2
+
+import Data.List (groupBy)
+
+type Token      = CFTok
+type Name       = CFFun
+type Category   = CFItem
+type Grammar    = ([Production], Terminal)
+type Production = (Name, Category, [Category])
+type Terminal   = Token -> [(Category, Maybe Name)]
+type GParser    = Grammar -> Category -> [Token] -> ([ParseTree],String)
+data ParseTree  = Node Name Category [ParseTree] | Leaf Token
+
+maxTake :: Int
+-- maxTake = 1000
+maxTake = maxBound
+
+--------------------------------------------------
+-- converting between GF parsing and CFG parsing
+
+buildParser :: GParser -> CF -> CFCat -> CFParser
+buildParser gparser cf = parse
+  where 
+    parse = \start input -> 
+            let parse2 = parse' (CFNonterm start) input in 
+	    (take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2) 
+    parse' = gparser (cf2grammar cf)
+
+cf2grammar :: CF -> Grammar
+cf2grammar cf = (productions, terminal)
+  where 
+    productions  = [ (name, CFNonterm cat, rhs) | 
+		     (name, (cat, rhs)) <- cfRules ]
+    terminal tok = [ (CFNonterm cat, Just name) |
+		     (cat, name) <- cfPredef tok ] 
+		   ++
+		   [ (item, Nothing) |
+		     item <- elems rhsItems,
+		     matchCFTerm item tok ]
+    cfRules  = rulesOfCF cf
+    cfPredef = predefOfCF cf
+    rhsItems :: Set Category
+    rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
+
+parse2tree :: ParseTree -> CFTree
+parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees')) 
+  where
+    trees' = [ parse2tree t | t@(Node _ _ _) <- trees ]   -- ignore leafs
+
+maybeNode :: Maybe Name -> Category -> Token -> ParseTree
+maybeNode (Just name) cat tok = Node name cat [Leaf tok]
+maybeNode Nothing     _   tok = Leaf tok
+
+
+--------------------------------------------------
+-- chart parsing (bottom up kilbury-like)
+
+type Chart   = [CState]
+type CState  = Set Edge 
+type Edge    = (Int, Category, [Category])
+type Passive = (Int, Int, Category)
+
+chartParser :: CF -> CFCat -> CFParser
+chartParser = buildParser chartParser0
+
+chartParser0 :: GParser
+chartParser0 (productions, terminal) = cparse
+  where
+    emptyCats :: Set Category
+    emptyCats = empties emptySet
+      where 
+        empties cats | cats==cats' = cats
+		     | otherwise   = empties cats'
+	  where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
+				        all (`elemSet` cats) rhs ]
+
+    grammarMap :: Map Category [(Name, [Category])]
+    grammarMap = makeMapWith (++) 
+		 [ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
+
+    leftCornerMap :: Map Category (Set (Category,[Category]))
+    leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) | 
+					 (_, b, abs) <- productions, 
+					 (a : bs) <- removeNullable abs ]
+
+    removeNullable :: [Category] -> [[Category]]
+    removeNullable [] = []
+    removeNullable cats@(cat:cats')
+	| cat `elemSet` emptyCats = cats : removeNullable cats'
+	| otherwise               = [cats]
+
+    cparse :: Category -> [Token] -> ([ParseTree], String)
+    cparse start input = -- trace "ChartParser" $
+			 case lookup (0, length input, start) $
+			      -- tracePrt "#edgeTrees" (prt . map (length.snd)) $
+			      edgeTrees of
+			   Just trees -> -- tracePrt "#trees" (prt . length . fst) $
+					 (trees, "Chart:" ++++ prChart passiveEdges)
+			   Nothing    -> ([],    "Chart:" ++++ prChart passiveEdges)
+      where 
+        finalChart :: Chart
+        finalChart = map buildState initialChart
+
+        finalChartMap :: [Map Category (Set Edge)]
+	finalChartMap = map stateMap finalChart
+
+        stateMap :: CState -> Map Category (Set Edge)
+	stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
+					      (i, b, a:bs) <- elems state ]
+
+        initialChart :: Chart
+        initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $
+		       emptySet : map initialState (zip [0..] input)
+          where initialState (j, sym) = makeSet [ (j, cat, []) | 
+						  (cat, _) <- terminal sym ]
+
+        buildState :: CState -> CState
+	buildState = limit more
+	  where more (j, a, [])   = ordSet [ (j, b, bs) |
+				             (b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
+				    <++>
+				    lookupWith emptySet (finalChartMap !! j) a
+		more (j, b, a:bs) = ordSet [ (j, b, bs) |
+					     a `elemSet` emptyCats ]
+
+        passiveEdges :: [Passive]
+	passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $
+		       [ (i, j, cat) |
+			 (j, state) <- zip [0..] $
+			   -- tracePrt "#passiveChart" 
+			   -- (prt . map (length.filter (\(_,_,x)->null x).elems)) $
+			   -- tracePrt "#activeChart" (prt . map (length.elems)) $
+			   finalChart,
+			 (i, cat, []) <- elems state ]
+		       ++
+		       [ (i, i, cat) |
+			 i <- [0 .. length input],
+			 cat <- elems emptyCats ]
+
+        edgeTrees :: [ (Passive, [ParseTree]) ]
+	edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
+
+        edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
+	edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
+					  ((i,j,c), trees) <- edgeTrees ]
+
+        treesFor :: Passive -> [ParseTree]
+	treesFor (i, j, cat) = [ Node name cat trees |
+				 (name, rhs) <- lookupWith [] grammarMap cat,
+				 trees <- children rhs i j ]
+			       ++
+			       [ maybeNode name cat tok |
+				 i == j-1,
+				 let tok = input !! i,
+				 Just name <- [lookup cat (terminal tok)] ]
+
+        children :: [Category] -> Int -> Int -> [[ParseTree]]
+	children []     i k = [ [] | i == k ]
+	children (c:cs) i k = [ tree : rest |
+				i <= k,
+				(j, trees) <- lookupWith [] edgeTreesMap (i,c),
+				rest <- children cs j k,
+				tree <- trees ]
+
+
+{-
+instance Print ParseTree where
+    prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}"
+    prt (Leaf token) = prt token
+-}
+
+-- AR 10/12/2002
+
+prChart :: [Passive] -> String
+prChart = unlines . map (unwords . map prOne) . positions where
+  prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
+  positions = groupBy (\ (i,_,_) (j,_,_) -> i == j) 
+
+
diff --git a/src-3.0/GF/CF/EBNF.hs b/src-3.0/GF/CF/EBNF.hs
new file mode 100644
index 000000000..f091d19cb
--- /dev/null
+++ b/src-3.0/GF/CF/EBNF.hs
@@ -0,0 +1,191 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : EBNF
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:13 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.5 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.CF.EBNF (pEBNFasGrammar) where
+
+import GF.Data.Operations
+import GF.Data.Parsers
+import GF.Infra.Comments
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.Grammar.Grammar
+import GF.Grammar.PrGrammar
+import GF.CF.CFtoGrammar
+import qualified GF.Source.AbsGF as A
+
+import Data.List (nub, partition)
+
+-- AR 18/4/2000 - 31/3/2004
+
+-- Extended BNF grammar with token type a
+-- put a = String for simple applications
+
+type EBNF = [ERule]
+type ERule = (ECat, ERHS)
+type ECat = (String,[Int])
+type ETok = String
+
+ebnfID = "EBNF" ---- make this parametric!
+
+data ERHS =
+   ETerm ETok
+ | ENonTerm ECat
+ | ESeq  ERHS ERHS
+ | EAlt  ERHS ERHS
+ | EStar ERHS
+ | EPlus ERHS
+ | EOpt  ERHS
+ | EEmpty
+
+type CFRHS = [CFItem]
+type CFJustRule = (CFCat, CFRHS)
+
+ebnf2gf :: EBNF -> [A.TopDef]
+ebnf2gf = cf2grammar . rules2CF . ebnf2cf
+
+ebnf2cf :: EBNF -> [CFRule]
+ebnf2cf ebnf = [(mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
+  mkCFF i (CFCat (_,c), _) = string2CFFun ebnfID ("Mk" ++ prt c ++ "_" ++ show i)
+
+normEBNF :: EBNF -> [CFJustRule]
+normEBNF erules = let
+  erules1 = [normERule ([i],r) | (i,r) <- zip [0..] erules]
+  erules2 = erules1 ---refreshECats erules1 --- this seems to be just bad !
+  erules3 = concat (map pickERules erules2)
+  erules4 = nubERules erules3
+ in [(mkCFCatE cat, map eitem2cfitem its) | (cat,itss) <- erules3, its <- itss]
+
+refreshECats :: [NormERule] -> [NormERule]
+refreshECats rules = [recas [i] rule | (i,rule) <- zip [0..] rules] where
+ recas ii (cat,its) = (updECat ii cat, [recss ii 0 s | s <- its])
+ recss ii n [] = []
+ recss ii n (s:ss) = recit (ii ++ [n]) s : recss ii (n+1) ss
+ recit ii it = case it of
+   EINonTerm cat  -> EINonTerm (updECat ii cat)
+   EIStar (cat,t) -> EIStar (updECat ii cat, [recss ii 0 s | s <- t])
+   EIPlus (cat,t) -> EIPlus (updECat ii cat, [recss ii 0 s | s <- t])
+   EIOpt  (cat,t) -> EIOpt  (updECat ii cat, [recss ii 0 s | s <- t])
+   _ -> it
+  
+pickERules :: NormERule -> [NormERule]
+pickERules rule@(cat,alts) = rule : concat (map pics (concat alts)) where
+ pics it = case it of
+   EIStar ru@(cat,t) -> mkEStarRules cat ++ pickERules ru
+   EIPlus ru@(cat,t) -> mkEPlusRules cat ++ pickERules ru
+   EIOpt  ru@(cat,t) -> mkEOptRules cat ++ pickERules ru
+   _ -> []
+ mkEStarRules cat = [(cat', [[],[EINonTerm cat, EINonTerm cat']])] 
+                                        where cat' = mkNewECat cat "Star"
+ mkEPlusRules cat = [(cat', [[EINonTerm cat],[EINonTerm cat, EINonTerm cat']])] 
+                                        where cat' = mkNewECat cat "Plus"
+ mkEOptRules cat  = [(cat', [[],[EINonTerm cat]])] 
+                                        where cat' = mkNewECat cat "Opt"
+
+nubERules :: [NormERule] -> [NormERule]
+nubERules rules = nub optim where 
+  optim = map (substERules (map mkSubst replaces)) irreducibles
+  (replaces,irreducibles) = partition reducible rules
+  reducible (cat,[items]) = isNewCat cat && all isOldIt items
+  reducible _ = False
+  isNewCat (_,ints) = ints == []
+  isOldIt (EITerm _) = True
+  isOldIt (EINonTerm cat) = not (isNewCat cat)
+  isOldIt _ = False
+  mkSubst (cat,its) = (cat, head its) -- def of reducible: its must be singleton
+--- the optimization assumes each cat has at most one EBNF rule.
+
+substERules :: [(ECat,[EItem])] -> NormERule -> NormERule
+substERules g (cat,itss) = (cat, map sub itss) where
+  sub [] = []
+  sub (i@(EINonTerm cat') : ii) = case lookup cat g of
+                                    Just its -> its ++ sub ii 
+                                    _ -> i : sub ii
+  sub (EIStar r : ii) = EIStar (substERules g r) : ii
+  sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
+  sub (EIOpt r : ii)  = EIOpt  (substERules g r) : ii
+
+eitem2cfitem :: EItem -> CFItem 
+eitem2cfitem it = case it of
+  EITerm a       -> atomCFTerm $ tS a
+  EINonTerm cat  -> CFNonterm (mkCFCatE cat)
+  EIStar (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Star"))
+  EIPlus (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Plus"))
+  EIOpt  (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Opt"))
+
+type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
+
+data EItem =
+   EITerm String
+ | EINonTerm ECat
+ | EIStar NormERule
+ | EIPlus NormERule
+ | EIOpt  NormERule
+  deriving Eq
+
+normERule :: ([Int],ERule) -> NormERule
+normERule (ii,(cat,rhs)) = 
+ (cat,[map (mkEItem (ii ++ [i])) r' | (i,r') <- zip [0..] (disjNorm rhs)]) where
+  disjNorm r = case r of
+    ESeq r1 r2 -> [x ++ y | x <- disjNorm r1, y <- disjNorm r2]
+    EAlt r1 r2 -> disjNorm r1 ++ disjNorm r2
+    EEmpty -> [[]]
+    _ -> [[r]]
+
+mkEItem :: [Int] -> ERHS -> EItem
+mkEItem ii rhs = case rhs of
+  ETerm a -> EITerm a
+  ENonTerm cat -> EINonTerm cat
+  EStar r -> EIStar (normERule (ii,(mkECat ii, r)))
+  EPlus r -> EIPlus (normERule (ii,(mkECat ii, r)))
+  EOpt  r -> EIOpt  (normERule (ii,(mkECat ii, r)))
+  _ -> EINonTerm ("?????",[])
+--  _ -> error "should not happen in ebnf" ---
+
+mkECat ints = ("C", ints)
+
+prECat (c,[]) = c
+prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
+
+mkCFCatE :: ECat -> CFCat
+mkCFCatE = string2CFCat ebnfID . prECat
+
+updECat _ (c,[]) = (c,[])
+updECat ii (c,_) = (c,ii)
+
+mkNewECat (c,ii) str = (c ++ str,ii)
+
+------ parser for EBNF grammars
+
+pEBNFasGrammar :: String -> Err [A.TopDef]
+pEBNFasGrammar = parseResultErr (pEBNF *** ebnf2gf) . remComments
+
+pEBNF :: Parser Char EBNF
+pEBNF = longestOfMany (pJ pERule)
+
+pERule :: Parser Char ERule
+pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
+
+pERHS :: Int -> Parser Char ERHS
+pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
+pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
+pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
+pERHS 3 = pQuotedString *** ETerm
+          ||| pECat *** ENonTerm ||| pParenth (pERHS 0)
+
+pUnaryEOp :: Parser Char (ERHS -> ERHS)
+pUnaryEOp = 
+ lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
+
+pECat = pIdent *** (\c -> (c,[]))
+
diff --git a/src-3.0/GF/CF/PPrCF.hs b/src-3.0/GF/CF/PPrCF.hs
new file mode 100644
index 000000000..1c2203e94
--- /dev/null
+++ b/src-3.0/GF/CF/PPrCF.hs
@@ -0,0 +1,102 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : PPrCF
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/15 17:56:13 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.13 $
+--
+-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003
+--
+-- use the Print class instead!
+-----------------------------------------------------------------------------
+
+module GF.CF.PPrCF (prCF, prCFTree, prCFRule, prCFFun, prCFCat, prCFItem, prRegExp, pCF) where
+
+import GF.Data.Operations
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.Canon.AbsGFC
+import GF.Grammar.PrGrammar
+
+import Data.Char
+import Data.List
+
+prCF :: CF -> String
+prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function
+
+prCFTree :: CFTree -> String
+prCFTree (CFTree (fun, (_,trees))) = prCFFun fun ++ prs trees where 
+ prs [] = ""
+ prs ts = " " ++ unwords (map ps ts)
+ ps t@(CFTree (_,(_,[]))) = prCFTree t
+ ps t = prParenth (prCFTree t)
+{-# NOINLINE prCFTree #-}
+-- Workaround ghc 6.8.2 bug
+
+
+prCFRule :: CFRule -> String
+prCFRule (fun,(cat,its)) = 
+  prCFFun fun ++ "." +++ prCFCat cat +++ "::=" +++ 
+  unwords (map prCFItem its) +++ ";"
+
+prCFFun :: CFFun -> String
+prCFFun = prCFFun' True ---- False -- print profiles for debug
+
+prCFFun' :: Bool -> CFFun -> String
+prCFFun' profs (CFFun (t, p)) = prt_ t ++ pp p where
+    pp p = if (not profs || normal p) then "" else "_" ++ concat (map show p)
+    normal p = and [x==y && null b | ((b,x),y) <- zip p (map (:[]) [0..])]
+
+prCFCat :: CFCat -> String
+prCFCat (CFCat (c,l)) = prt_ c ++ case prt_ l of
+  "s" -> []
+  _   -> "-" ++ prt_ l ----
+
+prCFItem :: CFItem -> String
+prCFItem (CFNonterm c) = prCFCat c
+prCFItem (CFTerm a) = prRegExp a
+
+prRegExp :: RegExp -> String
+prRegExp (RegAlts tt) = case tt of
+  [t] -> prQuotedString t
+  _ -> prParenth (prTList " | " (map prQuotedString tt))
+
+-- rules have an amazingly easy parser, if we use the format
+-- fun. C -> item1 item2 ... where unquoted items are treated as cats
+-- Actually would be nice to add profiles to this.
+
+getCFRule :: String -> String -> Err [CFRule]
+getCFRule mo s = getcf (wrds s) where
+  getcf ws = case ws of
+    fun : cat : a : its | isArrow a -> 
+      Ok [(string2CFFun mo (init fun), 
+            (string2CFCat mo cat, map mkIt its))]
+    cat : a : its | isArrow a -> 
+      Ok [(string2CFFun mo (mkFun cat it), 
+            (string2CFCat mo cat, map mkIt it)) | it <- chunk its]
+    _ -> Bad (" invalid rule:" +++ s)
+  isArrow a = elem a ["->", "::="] 
+  mkIt w = case w of
+    ('"':w@(_:_)) -> atomCFTerm (string2CFTok (init w))
+    _             -> CFNonterm (string2CFCat mo w)
+  chunk its = case its of
+    [] -> [[]]
+    _ -> chunks "|" its
+  mkFun cat its = case its of
+    [] -> cat ++ "_"
+    _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style
+  clean = filter isAlphaNum -- to form valid identifiers
+  wrds = takeWhile (/= ";") . words -- to permit semicolon in the end
+
+pCF :: String -> String -> Err [CFRule]
+pCF mo s = do
+  rules <- mapM (getCFRule mo) $ filter isRule $ lines s
+  return $ concat rules
+ where
+   isRule line = case dropWhile isSpace line of
+     '-':'-':_ -> False
+     _ -> not $ all isSpace line
diff --git a/src-3.0/GF/CF/PrLBNF.hs b/src-3.0/GF/CF/PrLBNF.hs
new file mode 100644
index 000000000..4ba2019bc
--- /dev/null
+++ b/src-3.0/GF/CF/PrLBNF.hs
@@ -0,0 +1,150 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : PrLBNF
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/06/17 14:15:16 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.11 $
+--
+-- Printing CF grammars generated from GF as LBNF grammar for BNFC.
+-- AR 26/1/2000 -- 9/6/2003 (PPrCF) -- 8/11/2003 -- 27/9/2004.
+-- With primitive error messaging, by rules and rule tails commented out
+-----------------------------------------------------------------------------
+
+module GF.CF.PrLBNF (prLBNF,prBNF) where
+
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.Canon.AbsGFC
+import GF.Infra.Ident
+import GF.Grammar.PrGrammar
+import GF.Compile.ShellState
+import GF.Canon.GFC
+import GF.Canon.Look
+
+import GF.Data.Operations
+import GF.Infra.Modules
+
+import Data.Char
+import Data.List (nub)
+
+prLBNF :: Bool -> StateGrammar -> String
+prLBNF new gr = unlines $ pragmas ++ (map (prCFRule cs) rules')
+  where    
+    cs = map IC ["Int","String"] ++ [catIdPlus c | (_,(c,_)) <- rules]
+    cf = stateCF gr
+    (pragmas,rules) = if new  -- tries to treat precedence levels
+                         then mkLBNF (stateGrammarST gr) $ rulesOfCF cf
+                         else ([],rulesOfCF cf) -- "normal" behaviour
+    rules' = concatMap expand rules
+    expand (f,(c,its)) = [(f,(c,it)) | it <- combinations (map expIt its)]
+    expIt i = case i of
+      CFTerm (RegAlts ss) -> [CFTerm (RegAlts [s]) | s <- ss]
+      _ -> [i]
+
+-- | a hack to hide the LBNF details
+prBNF :: Bool -> StateGrammar -> String
+prBNF b = unlines . (map (unwords . unLBNF . drop 1 . words)) . lines . prLBNF b
+  where
+    unLBNF r = case r of
+      "---":ts -> ts
+      ";":"---":ts -> ts
+      c:ts -> c : unLBNF ts
+      _ -> r
+
+--- | awful low level code without abstraction over label names etc
+mkLBNF :: CanonGrammar -> [CFRule] -> ([String],[CFRule])
+mkLBNF gr rules = (coercions, nub $ concatMap mkRule rules) where
+  coercions = ["coercions" +++ prt_ c +++ show n +++ ";"  | 
+    (_,ModMod m) <- modules gr,
+    (c,CncCat (RecType ls) _ _) <- tree2list $ jments m,
+    Lbg (L (IC "p")) (TInts n) <- ls
+    ]
+  precedences = [(f,(prec,assoc))  | 
+    (_,ModMod m) <- modules gr,
+    (f,CncFun _ _ (R lin) _) <- tree2list $ jments m,
+    (Just prec, Just assoc) <- [(
+       lookup "p" [(lab,p) | Ass (L (IC lab)) (EInt p) <- lin],
+       lookup "a" [(lab,a) | Ass (L (IC lab)) (Par (CIQ _ (IC a)) []) <- lin]
+       )]
+    ]
+  precfuns = map fst precedences
+  mkRule r@(fun@(CFFun (t, p)),(cat,its)) = case t of
+    AC (CIQ _ c) -> case lookup c precedences of
+      Just (prec,assoc) -> [(fun,(mkCat prec cat,mkIts cat prec assoc 0 its))]
+      _ -> return r
+    AD (CIQ _ c) -> case lookup c precedences of
+      Just (prec,assoc) -> [(fun,(mkCat prec cat,mkIts cat prec assoc 0 its))]
+      _ -> return r
+    _ -> return r
+  mkIts cat prec assoc i its = case its of
+    CFTerm (RegAlts ["("]):n@(CFNonterm k):CFTerm (RegAlts [")"]):rest | k==cat -> 
+      mkIts cat prec assoc i $ n:rest  -- remove variants with parentheses 
+    CFNonterm k:rest | k==cat -> 
+      CFNonterm (mkNonterm prec assoc i k) : mkIts cat prec assoc (i+1) rest
+    it:rest -> it:mkIts cat prec assoc i rest
+    [] -> []
+
+  mkCat prec (CFCat ((CIQ m (IC c)),l)) = CFCat ((CIQ m (IC (c ++ show prec ++ "+"))),l)
+  mkNonterm prec assoc i cat = mkCat prec' cat 
+    where 
+      prec' = case (assoc,i) of
+        ("PL",0) -> prec
+        ("PR",0) -> prec + 1
+        ("PR",_) -> prec
+        _        -> prec + 1
+
+catId ((CFCat ((CIQ _ c),l))) = c
+
+catIdPlus ((CFCat ((CIQ _ c@(IC s)),l))) = case reverse s of
+  '+':cs -> IC $ reverse $ dropWhile isDigit cs
+  _ -> c
+
+prCFRule :: [Ident] -> CFRule -> String
+prCFRule cs (fun,(cat,its)) = 
+  prCFFun cat fun ++ "." +++ prCFCat True cat +++ "::=" +++  --- err in cat -> in syntax
+  unwords (map (prCFItem cs) its) +++ ";"
+
+prCFFun :: CFCat -> CFFun -> String
+prCFFun (CFCat (_,l)) (CFFun (t, p)) = case t of
+  AC (CIQ _ x) -> let f = prId True x in (f ++ lab +++ f2 f +++ prP p)
+  AD (CIQ _ x) -> let f = prId True x in (f ++ lab +++ f2 f +++ prP p)
+  _ -> prErr True $ prt t
+ where
+   lab = prLab l
+   f2 f = if null lab then "" else f
+   prP = concatMap show
+   
+prId b i = case i of
+     IC "Int"  -> "Integer"
+     IC "#Var" -> "Ident"
+     IC "Var"  -> "Ident"
+     IC "id_"  -> "_"
+     IC s@(c:_) | last s == '+' -> init s  -- hack to save precedence information
+     IC s@(c:_) | isUpper c -> s ++ if isDigit (last s) then "_" else ""
+     _ -> prErr b $ prt i
+
+prLab i = case i of
+     L (IC "s") -> "" ---
+     L (IC "_") -> "" ---
+     _ -> let x = prt i in "_" ++ x ++ if isDigit (last x) then "_" else ""
+
+-- | just comment out the rest if you cannot interpret the function name in LBNF
+-- two versions, depending on whether in the beginning of a rule or elsewhere;
+-- in the latter case, error just terminates the rule 
+prErr :: Bool -> String -> String
+prErr b s = (if b then "" else " ;") +++ "---" +++ s
+
+prCFCat :: Bool -> CFCat -> String
+prCFCat b (CFCat ((CIQ _ c),l)) = prId b c ++ prLab l ----
+
+-- | if a category does not have a production of its own, we replace it by Ident
+prCFItem cs (CFNonterm c) = if elem (catIdPlus c) cs then prCFCat False c else "Ident"
+prCFItem _ (CFTerm a) = prRegExp a
+
+prRegExp (RegAlts tt) = case tt of
+  [t] -> prQuotedString t
+  _ -> prErr False $ prParenth (prTList " | " (map prQuotedString tt))
diff --git a/src-3.0/GF/CF/Profile.hs b/src-3.0/GF/CF/Profile.hs
new file mode 100644
index 000000000..e573bec78
--- /dev/null
+++ b/src-3.0/GF/CF/Profile.hs
@@ -0,0 +1,106 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Profile
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:14 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.8 $
+--
+-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001
+-- revised 8/4/2002 for the new profile structure
+-----------------------------------------------------------------------------
+
+module GF.CF.Profile (postParse) where
+
+import GF.Canon.AbsGFC
+import GF.Canon.GFC
+import qualified GF.Infra.Ident as I
+import GF.Canon.CMacros
+---import MMacros
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.CF.PPrCF -- for error msg
+import GF.Grammar.PrGrammar
+
+import GF.Data.Operations
+
+import Control.Monad
+import Data.List (nub)
+
+-- | the job is done in two passes: 
+-- 
+--   1. tree2term: restore constituent order from Profile 
+-- 
+--   2. term2trm:  restore Bindings from Binds
+postParse :: CFTree -> Err Exp
+postParse tree = do
+  iterm <- errIn ("postprocessing parse tree" +++ prCFTree tree) $ tree2term tree
+  return $ term2trm  iterm
+
+-- | an intermediate data structure
+data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta   deriving (Eq,Show)
+type BindVs = [[I.Ident]]
+
+-- | (1) restore constituent order from Profile 
+tree2term :: CFTree -> Err ITerm
+-- tree2term (CFTree (f,(_,[t]))) | f == dummyCFFun = tree2term t -- not used
+tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
+  AM _ -> return IMeta
+  _ -> do
+    args  <- mapM mkArg pro
+    binds <- mapM mkBinds pro
+    return $ ITerm (fun, binds) args
+ where
+   mkArg (_,arg) = case arg of
+     [x] -> do               -- one occurrence
+       trx <- trees !? x
+       tree2term trx
+     []  -> return IMeta     -- suppression
+     _   -> do               -- reduplication
+       trees' <- mapM (trees !?) arg
+       xs1    <- mapM tree2term trees' 
+       xs2    <- checkArity xs1
+       unif xs2
+
+   checkArity xs = if length (nub [length xx | ITerm _ xx <- xs']) > 1 
+                   then Bad "arity error" 
+                   else return xs'
+           where xs' = [t | t@(ITerm _ _) <- xs]
+   unif xs = case [t | t@(ITerm _ _) <- xs] of 
+    [] -> return $ IMeta
+    (ITerm fp@(f,_) xx : ts) -> do
+      let hs = [h | ITerm (h,_) _ <- ts, h /= f]
+      testErr (null hs)                 -- if fails, hs must be nonempty
+              ("unification expects" +++ prt f +++ "but found" +++ prt (head hs))
+      xx' <- mapM unifArg [0 .. length xx - 1]
+      return $ ITerm fp xx'
+    where
+      unifArg i = unif [zz !! i | ITerm _ zz <- xs]
+
+   mkBinds (xss,_) = mapM mkBind xss
+   mkBind xs = do
+     ts <- mapM (trees !?) xs
+     let vs = [x | CFTree (CFFun (AV x,_),(_,[])) <- ts]
+     testErr (length ts == length vs) "non-variable in bound position"
+     case vs of
+       [x]  -> return x
+       []   -> return $ I.identC "h_" ---- uBoundVar
+       y:ys -> do
+         testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y)
+         return y
+
+-- | (2) restore Bindings from Binds
+term2trm :: ITerm -> Exp 
+term2trm IMeta = EAtom (AM 0) ---- mExp0
+term2trm (ITerm (fun, binds) terms) =
+  let bterms = zip binds terms
+  in mkAppAtom fun [mkAbsR xs (term2trm t) |  (xs,t) <- bterms]
+
+ --- these are deprecated
+ where
+   mkAbsR c e = foldr EAbs e c
+   mkAppAtom a = mkApp (EAtom a)
+   mkApp = foldl EApp
author	aarne <aarne@cs.chalmers.se>	2008-05-21 09:26:44 +0000
committer	aarne <aarne@cs.chalmers.se>	2008-05-21 09:26:44 +0000
commit	055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree	0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF
parent	915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)