diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF')
| -rw-r--r-- | src-3.0/GF/CF/CF.hs | 213 | ||||
| -rw-r--r-- | src-3.0/GF/CF/CFIdent.hs | 253 | ||||
| -rw-r--r-- | src-3.0/GF/CF/CFtoGrammar.hs | 62 | ||||
| -rw-r--r-- | src-3.0/GF/CF/CanonToCF.hs | 214 | ||||
| -rw-r--r-- | src-3.0/GF/CF/ChartParser.hs | 206 | ||||
| -rw-r--r-- | src-3.0/GF/CF/EBNF.hs | 191 | ||||
| -rw-r--r-- | src-3.0/GF/CF/PPrCF.hs | 102 | ||||
| -rw-r--r-- | src-3.0/GF/CF/PrLBNF.hs | 150 | ||||
| -rw-r--r-- | src-3.0/GF/CF/Profile.hs | 106 |
9 files changed, 1497 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CF.hs b/src-3.0/GF/CF/CF.hs new file mode 100644 index 000000000..9233e905a --- /dev/null +++ b/src-3.0/GF/CF/CF.hs @@ -0,0 +1,213 @@ +---------------------------------------------------------------------- +-- | +-- Module : CF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:07 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001 +----------------------------------------------------------------------------- + +module GF.CF.CF (-- * Types + CF(..), CFRule, CFRuleGroup, + CFItem(..), CFTree(..), CFPredef, CFParser, + RegExp(..), CFWord, + -- * Functions + cfParseResults, + -- ** to construct CF grammars + emptyCF, emptyCFPredef, rules2CF, groupCFRules, + -- ** to construct rules + atomCFRule, atomCFTerm, atomRegExp, altsCFTerm, + -- ** to construct trees + atomCFTree, buildCFTree, + -- ** to decide whether a token matches a terminal item + matchCFTerm, satRegExp, + -- ** to analyse a CF grammar + catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat, + valCatCF, valItemsCF, valFunCF, + startCat, predefOfCF, appCFPredef, valCFItem, + cfTokens, wordsOfRegExp, forCFItem, + isCircularCF, predefRules + ) where + +import GF.Data.Operations +import GF.Data.Str +import GF.Canon.AbsGFC +import GF.Canon.GFC +import GF.CF.CFIdent +import Data.List (nub,nubBy) +import Data.Char (isUpper, isLower, toUpper, toLower) + +-- CF grammar data types + +-- | abstract type CF. +-- Invariant: each category has all its rules grouped with it +-- also: the list is never empty (the category is just missing then) +newtype CF = CF ([CFRuleGroup], CFPredef) +type CFRule = (CFFun, (CFCat, [CFItem])) +type CFRuleGroup = (CFCat,[CFRule]) + +-- | CFPredef is a hack for variable symbols and literals; normally = @const []@ +data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show) + +newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show) + +-- | recognize literals, variables, etc +type CFPredef = CFTok -> [(CFCat, CFFun)] + +-- | Wadler style + return information +type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String) + +cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree] +cfParseResults rs = [b | (b,[]) <- fst rs] + +-- | terminals are regular expressions on words; to be completed to full regexp +data RegExp = + RegAlts [CFWord] -- ^ list of alternative words + | RegSpec CFTok -- ^ special token + deriving (Eq, Ord, Show) + +type CFWord = String + +-- the above types should be kept abstract, and the following functions used + +-- to construct CF grammars + +emptyCF :: CF +emptyCF = CF ([], emptyCFPredef) + +emptyCFPredef :: CFPredef +emptyCFPredef = const [] + +rules2CF :: [CFRule] -> CF +rules2CF rs = CF (groupCFRules rs, emptyCFPredef) + +groupCFRules :: [CFRule] -> [(CFCat,[CFRule])] +groupCFRules = foldr ins [] where + ins rule crs = case crs of + (c,r) : rs | compatCF c cat -> (c,rule:r) : rs + cr : rs -> cr : ins rule rs + _ -> [(cat,[rule])] + where + cat = valCatCF rule + +-- to construct rules + +-- | make a rule from a single token without constituents +atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule +atomCFRule c f s = (f, (c, [atomCFTerm s])) + +-- | usual terminal +atomCFTerm :: CFTok -> CFItem +atomCFTerm = CFTerm . atomRegExp + +atomRegExp :: CFTok -> RegExp +atomRegExp t = case t of + TS s -> RegAlts [s] + _ -> RegSpec t + +-- | terminal consisting of alternatives +altsCFTerm :: [String] -> CFItem +altsCFTerm = CFTerm . RegAlts + + +-- to construct trees + +-- | make a tree without constituents +atomCFTree :: CFCat -> CFFun -> CFTree +atomCFTree c f = buildCFTree c f [] + +-- | make a tree with constituents. +buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree +buildCFTree c f trees = CFTree (f,(c,trees)) + +{- ---- +cfMeta0 :: CFTree +cfMeta0 = atomCFTree uCFCat metaCFFun + +-- used in happy +litCFTree :: String -> CFTree --- Maybe CFTree +litCFTree s = maybe cfMeta0 id $ do + (c,f) <- getCFLiteral s + return $ buildCFTree c f [] +-} + +-- to decide whether a token matches a terminal item + +matchCFTerm :: CFItem -> CFTok -> Bool +matchCFTerm (CFTerm t) s = satRegExp t s +matchCFTerm _ _ = False + +satRegExp :: RegExp -> CFTok -> Bool +satRegExp r t = case (r,t) of + (RegAlts tt, TS s) -> elem s tt + (RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s] + (RegSpec x, _) -> t == x --- + _ -> False + where + caseUpperOrLower s = case s of + c:cs | isUpper c -> [s, toLower c : cs] + c:cs | isLower c -> [s, toUpper c : cs] + _ -> [s] + +-- to analyse a CF grammar + +catsOfCF :: CF -> [CFCat] +catsOfCF (CF (rr,_)) = map fst rr + +rulesOfCF :: CF -> [CFRule] +rulesOfCF (CF (rr,_)) = concatMap snd rr + +ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])] +ruleGroupsOfCF (CF (rr,_)) = rr + +rulesForCFCat :: CF -> CFCat -> [CFRule] +rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr + +valCatCF :: CFRule -> CFCat +valCatCF (_,(c,_)) = c + +valItemsCF :: CFRule -> [CFItem] +valItemsCF (_,(_,i)) = i + +valFunCF :: CFRule -> CFFun +valFunCF (f,(_,_)) = f + +startCat :: CF -> CFCat +startCat (CF (rr,_)) = fst (head rr) --- hardly useful + +predefOfCF :: CF -> CFPredef +predefOfCF (CF (_,f)) = f + +appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)] +appCFPredef = ($) . predefOfCF + +valCFItem :: CFItem -> Either RegExp CFCat +valCFItem (CFTerm r) = Left r +valCFItem (CFNonterm nt) = Right nt + +cfTokens :: CF -> [CFWord] +cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf, + CFTerm i <- valItemsCF r] + +wordsOfRegExp :: RegExp -> [CFWord] +wordsOfRegExp (RegAlts tt) = tt +wordsOfRegExp _ = [] + +forCFItem :: CFTok -> CFRule -> Bool +forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a +forCFItem _ _ = False + +-- | we should make a test of circular chains, too +isCircularCF :: CFRule -> Bool +isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c +isCircularCF _ = False + +-- | coercion to the older predef cf type +predefRules :: CFPredef -> CFTok -> [CFRule] +predefRules pre s = [atomCFRule c f s | (c,f) <- pre s] + diff --git a/src-3.0/GF/CF/CFIdent.hs b/src-3.0/GF/CF/CFIdent.hs new file mode 100644 index 000000000..02ee482c0 --- /dev/null +++ b/src-3.0/GF/CF/CFIdent.hs @@ -0,0 +1,253 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFIdent +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/14 16:03:40 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.13 $ +-- +-- symbols (categories, functions) for context-free grammars. +----------------------------------------------------------------------------- + +module GF.CF.CFIdent (-- * Tokens and categories + CFTok(..), CFCat(..), + tS, tC, tL, tI, tF, tV, tM, tInt, + prCFTok, + -- * Function names and profiles + CFFun(..), Profile, + wordsCFTok, + -- * CF Functions + mkCFFun, varCFFun, consCFFun, string2CFFun, stringCFFun, + intCFFun, floatCFFun, dummyCFFun, + cfFun2String, cfFun2Ident, cfFun2Profile, metaCFFun, + -- * CF Categories + mkCIdent, ident2CFCat, labels2CFCat, string2CFCat, + catVarCF, cat2CFCat, cfCatString, cfCatInt,cfCatFloat, + moduleOfCFCat, cfCat2Cat, cfCat2Ident, lexCFCat, + -- * CF Tokens + string2CFTok, str2cftoks, + -- * Comparisons + compatToks, compatTok, compatCFFun, compatCF, + wordsLits + ) where + +import GF.Data.Operations +import GF.Canon.GFC +import GF.Infra.Ident +import GF.Grammar.Values (cPredefAbs) +import GF.Canon.AbsGFC +import GF.Grammar.Macros (ident2label) +import GF.Grammar.PrGrammar +import GF.Data.Str +import Data.Char (toLower, toUpper, isSpace) +import Data.List (intersperse) + +-- | this type should be abstract +data CFTok = + TS String -- ^ normal strings + | TC String -- ^ strings that are ambiguous between upper or lower case + | TL String -- ^ string literals + | TI Integer -- ^ integer literals + | TF Double -- ^ float literals + | TV Ident -- ^ variables + | TM Int String -- ^ metavariables; the integer identifies it + deriving (Eq, Ord, Show) + +-- | this type should be abstract +newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show) + +tS :: String -> CFTok +tC :: String -> CFTok +tL :: String -> CFTok +tI :: String -> CFTok +tF :: String -> CFTok +tV :: String -> CFTok +tM :: String -> CFTok + +tS = TS +tC = TC +tL = TL +tI = TI . read +tF = TF . read +tV = TV . identC +tM = TM 0 + +tInt :: Integer -> CFTok +tInt = TI + +prCFTok :: CFTok -> String +prCFTok t = case t of + TS s -> s + TC s -> s + TL s -> s + TI i -> show i + TF i -> show i + TV x -> prt x + TM i m -> m --- "?" --- m + +-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@ +newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show) +-- - - - - - - - - - - - - - - - - - - - - ^^^ added by peb, 21/5-04 + +type Profile = [([[Int]],[Int])] + +wordsCFTok :: CFTok -> [String] +wordsCFTok t = case t of + TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]] + _ -> [prCFTok t] + +-- the following functions should be used instead of constructors + +-- to construct CF functions + +mkCFFun :: Atom -> CFFun +mkCFFun t = CFFun (t,[]) + +varCFFun :: Ident -> CFFun +varCFFun = mkCFFun . AV + +consCFFun :: CIdent -> CFFun +consCFFun = mkCFFun . AC + +-- | standard way of making cf fun +string2CFFun :: String -> String -> CFFun +string2CFFun m c = consCFFun $ mkCIdent m c + +stringCFFun :: String -> CFFun +stringCFFun = mkCFFun . AS + +intCFFun :: Integer -> CFFun +intCFFun = mkCFFun . AI + +floatCFFun :: Double -> CFFun +floatCFFun = mkCFFun . AF + +-- | used in lexer-by-need rules +dummyCFFun :: CFFun +dummyCFFun = varCFFun $ identC "_" + +cfFun2String :: CFFun -> String +cfFun2String (CFFun (f,_)) = prt f + +cfFun2Ident :: CFFun -> Ident +cfFun2Ident (CFFun (f,_)) = identC $ prt_ f --- + +cfFun2Profile :: CFFun -> Profile +cfFun2Profile (CFFun (_,p)) = p + +{- ---- +strPro2cfFun :: String -> Profile -> CFFun +strPro2cfFun str p = (CFFun (AC (Ident str), p)) +-} + +metaCFFun :: CFFun +metaCFFun = mkCFFun $ AM 0 + +-- to construct CF categories + +-- | belongs elsewhere +mkCIdent :: String -> String -> CIdent +mkCIdent m c = CIQ (identC m) (identC c) + +ident2CFCat :: CIdent -> Ident -> CFCat +ident2CFCat mc d = CFCat (mc, L d) + +labels2CFCat :: CIdent -> [Label] -> CFCat +labels2CFCat mc d = CFCat (mc, L (identC (concat (intersperse "." (map prt d))))) ---- + +-- | standard way of making cf cat: label s +string2CFCat :: String -> String -> CFCat +string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s") + +idents2CFCat :: Ident -> Ident -> CFCat +idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s") + +catVarCF :: CFCat +catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ---- + +cat2CFCat :: (Ident,Ident) -> CFCat +cat2CFCat = uncurry idents2CFCat + +-- | literals +cfCatString :: CFCat +cfCatString = string2CFCat (prt cPredefAbs) "String" + +cfCatInt, cfCatFloat :: CFCat +cfCatInt = string2CFCat (prt cPredefAbs) "Int" +cfCatFloat = string2CFCat (prt cPredefAbs) "Float" + + + +{- ---- +uCFCat :: CFCat +uCFCat = cat2CFCat uCat +-} + +moduleOfCFCat :: CFCat -> Ident +moduleOfCFCat (CFCat (CIQ m _, _)) = m + +-- | the opposite direction +cfCat2Cat :: CFCat -> (Ident,Ident) +cfCat2Cat (CFCat (CIQ m c,_)) = (m,c) + +cfCat2Ident :: CFCat -> Ident +cfCat2Ident = snd . cfCat2Cat + +lexCFCat :: CFCat -> CFCat +lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*") + +-- to construct CF tokens + +string2CFTok :: String -> CFTok +string2CFTok = tS + +str2cftoks :: Str -> [CFTok] +str2cftoks = map tS . wordsLits . sstr + +-- decide if two token lists look the same (in parser postprocessing) + +compatToks :: [CFTok] -> [CFTok] -> Bool +compatToks ts us = and [compatTok t u | (t,u) <- zip ts us] + +compatTok :: CFTok -> CFTok -> Bool +compatTok (TM _ _) _ = True --- hack because metas are renamed +compatTok _ (TM _ _) = True +compatTok t u = any (`elem` (alts t)) (alts u) where + alts u = case u of + TC (c:s) -> [toLower c : s, toUpper c : s] + TL s -> [s, prQuotedString s] + _ -> [prCFTok u] + +-- | decide if two CFFuns have the same function head (profiles may differ) +compatCFFun :: CFFun -> CFFun -> Bool +compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g + +-- | decide whether two categories match +-- the modifiers can be from different modules, but on the same extension +-- path, so there is no clash, and they can be safely ignored --- +compatCF :: CFCat -> CFCat -> Bool +----compatCF = (==) +compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l' + +-- | Like 'words', but does not split on whitespace inside +-- double quotes.wordsLits :: String -> [String] +-- Also treats escaped quotes in quotes (AR 21/12/2005) by breaks +-- instead of break +wordsLits [] = [] +wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs) + | isQuote c + = let (l,rs) = breaks (==c) cs + rs' = drop 1 rs + in ([c]++l++[c]):wordsLits rs' + | otherwise = let (w,rs) = break isSpaceQ cs + in (c:w):wordsLits rs + where + breaks c cs = case break c cs of + (l@(_:_),d:rs) | last l == '\\' -> + let (r,ts) = breaks c rs in (l++[d]++r, ts) + v -> v + isQuote c = elem c "\"'" + isSpaceQ c = isSpace c ---- || isQuote c diff --git a/src-3.0/GF/CF/CFtoGrammar.hs b/src-3.0/GF/CF/CFtoGrammar.hs new file mode 100644 index 000000000..5e73aec31 --- /dev/null +++ b/src-3.0/GF/CF/CFtoGrammar.hs @@ -0,0 +1,62 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFtoGrammar +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:09 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.7 $ +-- +-- 26\/1\/2000 -- 18\/4 -- 24\/3\/2004 +----------------------------------------------------------------------------- + +module GF.CF.CFtoGrammar (cf2grammar) where + +import GF.Infra.Ident +import GF.Grammar.Grammar +import qualified GF.Source.AbsGF as A +import qualified GF.Source.GrammarToSource as S +import GF.Grammar.Macros + +import GF.CF.CF +import GF.CF.CFIdent +import GF.CF.PPrCF + +import GF.Data.Operations + +import Data.List (nub) +import Data.Char (isSpace) + +cf2grammar :: CF -> [A.TopDef] +cf2grammar cf = concatMap S.trAnyDef (abs ++ conc) where + rules = rulesOfCF cf + abs = cats ++ funs + conc = lintypes ++ lins + cats = [(cat, AbsCat (yes []) (yes [])) | + cat <- nub (concat (map cf2cat rules))] ----notPredef cat + lintypes = [(cat, CncCat (yes defLinType) nope nope) | (cat,AbsCat _ _) <- cats] + (funs,lins) = unzip (map cf2rule rules) + +cf2cat :: CFRule -> [Ident] +cf2cat (_,(cat, items)) = map cfCat2Ident $ cat : [c | CFNonterm c <- items] + +cf2rule :: CFRule -> ((Ident,Info),(Ident,Info)) +cf2rule (fun, (cat, items)) = (def,ldef) where + f = cfFun2Ident fun + def = (f, AbsFun (yes (mkProd (args', Cn (cfCat2Ident cat), []))) nope) + args0 = zip (map (mkIdent "x") [0..]) items + args = [(v, Cn (cfCat2Ident c)) | (v, CFNonterm c) <- args0] + args' = [(zIdent "_", Cn (cfCat2Ident c)) | (_, CFNonterm c) <- args0] + ldef = (f, CncFun + Nothing + (yes (mkAbs (map fst args) + (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))) + nope) + mkIt (v, CFNonterm _) = P (Vr v) theLinLabel + mkIt (_, CFTerm (RegAlts [a])) = K a + mkIt _ = K "" --- regexp not recognized in input CF ; use EBNF for this + foldconcat [] = K "" + foldconcat tt = foldr1 C tt + diff --git a/src-3.0/GF/CF/CanonToCF.hs b/src-3.0/GF/CF/CanonToCF.hs new file mode 100644 index 000000000..80ce2e79d --- /dev/null +++ b/src-3.0/GF/CF/CanonToCF.hs @@ -0,0 +1,214 @@ +---------------------------------------------------------------------- +-- | +-- Module : CanonToCF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/14 16:03:41 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.15 $ +-- +-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003 +----------------------------------------------------------------------------- + +module GF.CF.CanonToCF (canon2cf) where + +import GF.System.Tracing -- peb 8/6-04 + +import GF.Data.Operations +import GF.Infra.Option +import GF.Infra.Ident +import GF.Canon.AbsGFC +import GF.Grammar.LookAbs (allBindCatsOf) +import GF.Canon.GFC +import GF.Grammar.Values (isPredefCat,cPredefAbs) +import GF.Grammar.PrGrammar +import GF.Canon.CMacros +import qualified GF.Infra.Modules as M +import GF.CF.CF +import GF.CF.CFIdent +import GF.UseGrammar.Morphology +import GF.Data.Trie2 +import Data.List (nub,partition) +import Control.Monad + +-- | The main function: for a given cnc module 'm', build the CF grammar with all the +-- rules coming from modules that 'm' extends. The categories are qualified by +-- the abstract module name 'a' that 'm' is of. +-- The ign argument tells what rules not to generate a parser for. +canon2cf :: Options -> (Ident -> Bool) -> CanonGrammar -> Ident -> Err CF +canon2cf opts ign gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04 + let ms = M.allExtends gr c + a <- M.abstractOfConcrete gr c + let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms] + let mms = [(a, tree2list (M.jments m)) | m <- cncs] + cnc <- liftM M.jments $ M.lookupModMod gr c + rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts ign cnc)) mms + let bindcats = map snd $ allBindCatsOf gr + let rules = filter (not . isCircularCF) rules0 ---- temporarily here + let grules = groupCFRules rules + let predef = mkCFPredef opts bindcats grules + return $ CF predef + +cnc2cfCond :: Options -> (Ident -> Bool) -> BinTree Ident Info -> + Ident -> [(Ident,Info)] -> Err [CFRule] +cnc2cfCond opts ign cnc m gr = + liftM concat $ + mapM lin2cf [(m,fun,cat,args,lin) | + (fun, CncFun cat args lin _) <- gr, notign fun, is fun] + where + is f = isInBinTree f cnc + notign = not . ign + +type IFun = Ident +type ICat = CIdent + +-- | all CF rules corresponding to a linearization rule +lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule] +lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do + let rhss0 = allLinBranches lin -- :: [([Label], Term)] + rhss1 <- mapM (mkCFItems m) rhss0 -- :: [([Label], [[PreCFItem]])] + mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat + +-- | making sequences of CF items from every branch in a linearization +mkCFItems :: Ident -> ([Label], Term) -> Err ([Label], [[PreCFItem]]) +mkCFItems m (labs,t) = do + items <- term2CFItems m t + return (labs, items) + +-- | making CF rules from sequences of CF items +mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> ([Label], [[PreCFItem]]) -> Err [CFRule] +mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss + where + mkOneRule its = do + let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its] + profile = mkProfile nonterms + cfcat = labels2CFCat (redirectIdent m cat) lab + cffun = CFFun (AC (CIQ m fun), profile) + cfits = map precf2cf its + return (cffun,(cfcat,cfits)) + mkProfile nonterms = map mkOne args + where + mkOne (A c i) = mkOne (AB c 0 i) + mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i]) + where + mkB x = [k | (k,(j, [LV y], False)) <- nonterms, j == i, y == x] + +-- | intermediate data structure of CFItems with information for profiles +data PreCFItem = + PTerm RegExp -- ^ like ordinary Terminal + | PNonterm CIdent Integer [Label] Bool -- ^ cat, position, part\/bind, whether arg + deriving Eq + +precf2cf :: PreCFItem -> CFItem +precf2cf (PTerm r) = CFTerm r +precf2cf (PNonterm cm _ ls True) = CFNonterm (labels2CFCat cm ls) +precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF + + +-- | the main job in translating linearization rules into sequences of cf items +term2CFItems :: Ident -> Term -> Err [[PreCFItem]] +term2CFItems m t = errIn "forming cf items" $ case t of + S c _ -> t2c c + + T _ cc -> do + its <- mapM t2c [t | Cas _ t <- cc] + tryMkCFTerm (concat its) + V _ cc -> do + its <- mapM t2c [t | t <- cc] + tryMkCFTerm (concat its) + + C t1 t2 -> do + its1 <- t2c t1 + its2 <- t2c t2 + return [x ++ y | x <- its1, y <- its2] + + FV ts -> do + its <- mapM t2c ts + tryMkCFTerm (concat its) + + P (S c _) _ -> t2c c --- w-around for bug in Compute? AR 31/1/2006 + + P arg s -> extrR arg s + + K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]] + + E -> return [[]] + + K (KP d vs) -> do + let its = [PTerm (RegAlts [s]) | s <- d] + let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs] + tryMkCFTerm (its : itss) + + _ -> return [] ---- prtBad "no cf for" t ---- + + where + + t2c = term2CFItems m + + -- optimize the number of rules by a factorization + tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]] + tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss = + case mapM mkOne (counterparts ii) of + Ok tt -> return [tt] + _ -> return ii + where + mkOne cfits = case mapM mkOneTerm cfits of + Ok tt -> return $ PTerm (RegAlts (concat (nub tt))) + _ -> mkOneNonTerm cfits + mkOneTerm (PTerm (RegAlts t)) = return t + mkOneTerm _ = Bad "" + mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) = + if all (== n) cc + then return n + else Bad "" + mkOneNonTerm _ = Bad "" + counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]] + tryMkCFTerm itss = return itss + + extrR arg lab = case (arg0,labs) of + (Arg (A cat pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]] + (Arg (AB cat b pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]] + (Arg (A cat pos), _) -> return [[PNonterm (cIQ cat) pos labs True]] + (Arg (AB cat b pos), _) -> return [[PNonterm (cIQ cat) pos labs True]] + ---- ?? + _ -> prtBad "cannot extract record field from" arg + where + (arg0,labs) = headProj arg [lab] + + headProj r ls = case r of + P r0 l0 -> headProj r0 (l0:ls) + S r0 _ -> headProj r0 ls + _ -> (r,ls) + cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c + +mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef) +mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where + (ruls,preds) = if oElem lexerByNeed opts -- option -cflexer + then predefLexer rules + else (rules,emptyTrie) + preds0 s = + [(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++ + [(cat, varCFFun x) | TV x <- [s], cat <- catVarCF : bindcats] ++ + [(cfCatString, stringCFFun t) | TL t <- [s]] ++ + [(cfCatInt, intCFFun t) | TI t <- [s]] ++ + [(cfCatFloat, floatCFFun t) | TF t <- [s]] + cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its] + bindcats = [c | c <- cats, elem (cfCat2Ident c) binds] + look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens + +--- TODO: integrate with morphology +--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)])) +predefLexer groups = (reverse ruls, tcompile preds) where + (ruls,preds) = foldr mkOne ([],[]) groups + mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where + (rule,pre) = case partition isLexical rules of + ([],_) -> (group,[]) + (ls,rest) -> ((cat,rest), concatMap mkLexRule ls) + isLexical (f,(c,its)) = case its of + [CFTerm (RegAlts ws)] -> True + _ -> False + mkLexRule r = case r of + (fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws] + _ -> [] diff --git a/src-3.0/GF/CF/ChartParser.hs b/src-3.0/GF/CF/ChartParser.hs new file mode 100644 index 000000000..740c4d787 --- /dev/null +++ b/src-3.0/GF/CF/ChartParser.hs @@ -0,0 +1,206 @@ +---------------------------------------------------------------------- +-- | +-- Module : ChartParser +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:12 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.10 $ +-- +-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5. +-- OBSOLETE -- should use new MCFG parsers instead +----------------------------------------------------------------------------- + +module GF.CF.ChartParser (chartParser) where + +-- import Tracing +-- import PrintParser +-- import PrintSimplifiedTerm + +import GF.Data.Operations +import GF.CF.CF +import GF.CF.CFIdent +import GF.CF.PPrCF (prCFItem) + +import GF.Data.OrdSet +import GF.Data.OrdMap2 + +import Data.List (groupBy) + +type Token = CFTok +type Name = CFFun +type Category = CFItem +type Grammar = ([Production], Terminal) +type Production = (Name, Category, [Category]) +type Terminal = Token -> [(Category, Maybe Name)] +type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String) +data ParseTree = Node Name Category [ParseTree] | Leaf Token + +maxTake :: Int +-- maxTake = 1000 +maxTake = maxBound + +-------------------------------------------------- +-- converting between GF parsing and CFG parsing + +buildParser :: GParser -> CF -> CFCat -> CFParser +buildParser gparser cf = parse + where + parse = \start input -> + let parse2 = parse' (CFNonterm start) input in + (take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2) + parse' = gparser (cf2grammar cf) + +cf2grammar :: CF -> Grammar +cf2grammar cf = (productions, terminal) + where + productions = [ (name, CFNonterm cat, rhs) | + (name, (cat, rhs)) <- cfRules ] + terminal tok = [ (CFNonterm cat, Just name) | + (cat, name) <- cfPredef tok ] + ++ + [ (item, Nothing) | + item <- elems rhsItems, + matchCFTerm item tok ] + cfRules = rulesOfCF cf + cfPredef = predefOfCF cf + rhsItems :: Set Category + rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ] + +parse2tree :: ParseTree -> CFTree +parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees')) + where + trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs + +maybeNode :: Maybe Name -> Category -> Token -> ParseTree +maybeNode (Just name) cat tok = Node name cat [Leaf tok] +maybeNode Nothing _ tok = Leaf tok + + +-------------------------------------------------- +-- chart parsing (bottom up kilbury-like) + +type Chart = [CState] +type CState = Set Edge +type Edge = (Int, Category, [Category]) +type Passive = (Int, Int, Category) + +chartParser :: CF -> CFCat -> CFParser +chartParser = buildParser chartParser0 + +chartParser0 :: GParser +chartParser0 (productions, terminal) = cparse + where + emptyCats :: Set Category + emptyCats = empties emptySet + where + empties cats | cats==cats' = cats + | otherwise = empties cats' + where cats' = makeSet [ cat | (_, cat, rhs) <- productions, + all (`elemSet` cats) rhs ] + + grammarMap :: Map Category [(Name, [Category])] + grammarMap = makeMapWith (++) + [ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ] + + leftCornerMap :: Map Category (Set (Category,[Category])) + leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) | + (_, b, abs) <- productions, + (a : bs) <- removeNullable abs ] + + removeNullable :: [Category] -> [[Category]] + removeNullable [] = [] + removeNullable cats@(cat:cats') + | cat `elemSet` emptyCats = cats : removeNullable cats' + | otherwise = [cats] + + cparse :: Category -> [Token] -> ([ParseTree], String) + cparse start input = -- trace "ChartParser" $ + case lookup (0, length input, start) $ + -- tracePrt "#edgeTrees" (prt . map (length.snd)) $ + edgeTrees of + Just trees -> -- tracePrt "#trees" (prt . length . fst) $ + (trees, "Chart:" ++++ prChart passiveEdges) + Nothing -> ([], "Chart:" ++++ prChart passiveEdges) + where + finalChart :: Chart + finalChart = map buildState initialChart + + finalChartMap :: [Map Category (Set Edge)] + finalChartMap = map stateMap finalChart + + stateMap :: CState -> Map Category (Set Edge) + stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) | + (i, b, a:bs) <- elems state ] + + initialChart :: Chart + initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $ + emptySet : map initialState (zip [0..] input) + where initialState (j, sym) = makeSet [ (j, cat, []) | + (cat, _) <- terminal sym ] + + buildState :: CState -> CState + buildState = limit more + where more (j, a, []) = ordSet [ (j, b, bs) | + (b, bs) <- elems (lookupWith emptySet leftCornerMap a) ] + <++> + lookupWith emptySet (finalChartMap !! j) a + more (j, b, a:bs) = ordSet [ (j, b, bs) | + a `elemSet` emptyCats ] + + passiveEdges :: [Passive] + passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $ + [ (i, j, cat) | + (j, state) <- zip [0..] $ + -- tracePrt "#passiveChart" + -- (prt . map (length.filter (\(_,_,x)->null x).elems)) $ + -- tracePrt "#activeChart" (prt . map (length.elems)) $ + finalChart, + (i, cat, []) <- elems state ] + ++ + [ (i, i, cat) | + i <- [0 .. length input], + cat <- elems emptyCats ] + + edgeTrees :: [ (Passive, [ParseTree]) ] + edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ] + + edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])] + edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) | + ((i,j,c), trees) <- edgeTrees ] + + treesFor :: Passive -> [ParseTree] + treesFor (i, j, cat) = [ Node name cat trees | + (name, rhs) <- lookupWith [] grammarMap cat, + trees <- children rhs i j ] + ++ + [ maybeNode name cat tok | + i == j-1, + let tok = input !! i, + Just name <- [lookup cat (terminal tok)] ] + + children :: [Category] -> Int -> Int -> [[ParseTree]] + children [] i k = [ [] | i == k ] + children (c:cs) i k = [ tree : rest | + i <= k, + (j, trees) <- lookupWith [] edgeTreesMap (i,c), + rest <- children cs j k, + tree <- trees ] + + +{- +instance Print ParseTree where + prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}" + prt (Leaf token) = prt token +-} + +-- AR 10/12/2002 + +prChart :: [Passive] -> String +prChart = unlines . map (unwords . map prOne) . positions where + prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it + positions = groupBy (\ (i,_,_) (j,_,_) -> i == j) + + diff --git a/src-3.0/GF/CF/EBNF.hs b/src-3.0/GF/CF/EBNF.hs new file mode 100644 index 000000000..f091d19cb --- /dev/null +++ b/src-3.0/GF/CF/EBNF.hs @@ -0,0 +1,191 @@ +---------------------------------------------------------------------- +-- | +-- Module : EBNF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:13 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.CF.EBNF (pEBNFasGrammar) where + +import GF.Data.Operations +import GF.Data.Parsers +import GF.Infra.Comments +import GF.CF.CF +import GF.CF.CFIdent +import GF.Grammar.Grammar +import GF.Grammar.PrGrammar +import GF.CF.CFtoGrammar +import qualified GF.Source.AbsGF as A + +import Data.List (nub, partition) + +-- AR 18/4/2000 - 31/3/2004 + +-- Extended BNF grammar with token type a +-- put a = String for simple applications + +type EBNF = [ERule] +type ERule = (ECat, ERHS) +type ECat = (String,[Int]) +type ETok = String + +ebnfID = "EBNF" ---- make this parametric! + +data ERHS = + ETerm ETok + | ENonTerm ECat + | ESeq ERHS ERHS + | EAlt ERHS ERHS + | EStar ERHS + | EPlus ERHS + | EOpt ERHS + | EEmpty + +type CFRHS = [CFItem] +type CFJustRule = (CFCat, CFRHS) + +ebnf2gf :: EBNF -> [A.TopDef] +ebnf2gf = cf2grammar . rules2CF . ebnf2cf + +ebnf2cf :: EBNF -> [CFRule] +ebnf2cf ebnf = [(mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where + mkCFF i (CFCat (_,c), _) = string2CFFun ebnfID ("Mk" ++ prt c ++ "_" ++ show i) + +normEBNF :: EBNF -> [CFJustRule] +normEBNF erules = let + erules1 = [normERule ([i],r) | (i,r) <- zip [0..] erules] + erules2 = erules1 ---refreshECats erules1 --- this seems to be just bad ! + erules3 = concat (map pickERules erules2) + erules4 = nubERules erules3 + in [(mkCFCatE cat, map eitem2cfitem its) | (cat,itss) <- erules3, its <- itss] + +refreshECats :: [NormERule] -> [NormERule] +refreshECats rules = [recas [i] rule | (i,rule) <- zip [0..] rules] where + recas ii (cat,its) = (updECat ii cat, [recss ii 0 s | s <- its]) + recss ii n [] = [] + recss ii n (s:ss) = recit (ii ++ [n]) s : recss ii (n+1) ss + recit ii it = case it of + EINonTerm cat -> EINonTerm (updECat ii cat) + EIStar (cat,t) -> EIStar (updECat ii cat, [recss ii 0 s | s <- t]) + EIPlus (cat,t) -> EIPlus (updECat ii cat, [recss ii 0 s | s <- t]) + EIOpt (cat,t) -> EIOpt (updECat ii cat, [recss ii 0 s | s <- t]) + _ -> it + +pickERules :: NormERule -> [NormERule] +pickERules rule@(cat,alts) = rule : concat (map pics (concat alts)) where + pics it = case it of + EIStar ru@(cat,t) -> mkEStarRules cat ++ pickERules ru + EIPlus ru@(cat,t) -> mkEPlusRules cat ++ pickERules ru + EIOpt ru@(cat,t) -> mkEOptRules cat ++ pickERules ru + _ -> [] + mkEStarRules cat = [(cat', [[],[EINonTerm cat, EINonTerm cat']])] + where cat' = mkNewECat cat "Star" + mkEPlusRules cat = [(cat', [[EINonTerm cat],[EINonTerm cat, EINonTerm cat']])] + where cat' = mkNewECat cat "Plus" + mkEOptRules cat = [(cat', [[],[EINonTerm cat]])] + where cat' = mkNewECat cat "Opt" + +nubERules :: [NormERule] -> [NormERule] +nubERules rules = nub optim where + optim = map (substERules (map mkSubst replaces)) irreducibles + (replaces,irreducibles) = partition reducible rules + reducible (cat,[items]) = isNewCat cat && all isOldIt items + reducible _ = False + isNewCat (_,ints) = ints == [] + isOldIt (EITerm _) = True + isOldIt (EINonTerm cat) = not (isNewCat cat) + isOldIt _ = False + mkSubst (cat,its) = (cat, head its) -- def of reducible: its must be singleton +--- the optimization assumes each cat has at most one EBNF rule. + +substERules :: [(ECat,[EItem])] -> NormERule -> NormERule +substERules g (cat,itss) = (cat, map sub itss) where + sub [] = [] + sub (i@(EINonTerm cat') : ii) = case lookup cat g of + Just its -> its ++ sub ii + _ -> i : sub ii + sub (EIStar r : ii) = EIStar (substERules g r) : ii + sub (EIPlus r : ii) = EIPlus (substERules g r) : ii + sub (EIOpt r : ii) = EIOpt (substERules g r) : ii + +eitem2cfitem :: EItem -> CFItem +eitem2cfitem it = case it of + EITerm a -> atomCFTerm $ tS a + EINonTerm cat -> CFNonterm (mkCFCatE cat) + EIStar (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Star")) + EIPlus (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Plus")) + EIOpt (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Opt")) + +type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items + +data EItem = + EITerm String + | EINonTerm ECat + | EIStar NormERule + | EIPlus NormERule + | EIOpt NormERule + deriving Eq + +normERule :: ([Int],ERule) -> NormERule +normERule (ii,(cat,rhs)) = + (cat,[map (mkEItem (ii ++ [i])) r' | (i,r') <- zip [0..] (disjNorm rhs)]) where + disjNorm r = case r of + ESeq r1 r2 -> [x ++ y | x <- disjNorm r1, y <- disjNorm r2] + EAlt r1 r2 -> disjNorm r1 ++ disjNorm r2 + EEmpty -> [[]] + _ -> [[r]] + +mkEItem :: [Int] -> ERHS -> EItem +mkEItem ii rhs = case rhs of + ETerm a -> EITerm a + ENonTerm cat -> EINonTerm cat + EStar r -> EIStar (normERule (ii,(mkECat ii, r))) + EPlus r -> EIPlus (normERule (ii,(mkECat ii, r))) + EOpt r -> EIOpt (normERule (ii,(mkECat ii, r))) + _ -> EINonTerm ("?????",[]) +-- _ -> error "should not happen in ebnf" --- + +mkECat ints = ("C", ints) + +prECat (c,[]) = c +prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints) + +mkCFCatE :: ECat -> CFCat +mkCFCatE = string2CFCat ebnfID . prECat + +updECat _ (c,[]) = (c,[]) +updECat ii (c,_) = (c,ii) + +mkNewECat (c,ii) str = (c ++ str,ii) + +------ parser for EBNF grammars + +pEBNFasGrammar :: String -> Err [A.TopDef] +pEBNFasGrammar = parseResultErr (pEBNF *** ebnf2gf) . remComments + +pEBNF :: Parser Char EBNF +pEBNF = longestOfMany (pJ pERule) + +pERule :: Parser Char ERule +pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";" + +pERHS :: Int -> Parser Char ERHS +pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt +pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty +pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a) +pERHS 3 = pQuotedString *** ETerm + ||| pECat *** ENonTerm ||| pParenth (pERHS 0) + +pUnaryEOp :: Parser Char (ERHS -> ERHS) +pUnaryEOp = + lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id + +pECat = pIdent *** (\c -> (c,[])) + diff --git a/src-3.0/GF/CF/PPrCF.hs b/src-3.0/GF/CF/PPrCF.hs new file mode 100644 index 000000000..1c2203e94 --- /dev/null +++ b/src-3.0/GF/CF/PPrCF.hs @@ -0,0 +1,102 @@ +---------------------------------------------------------------------- +-- | +-- Module : PPrCF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/15 17:56:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.13 $ +-- +-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003 +-- +-- use the Print class instead! +----------------------------------------------------------------------------- + +module GF.CF.PPrCF (prCF, prCFTree, prCFRule, prCFFun, prCFCat, prCFItem, prRegExp, pCF) where + +import GF.Data.Operations +import GF.CF.CF +import GF.CF.CFIdent +import GF.Canon.AbsGFC +import GF.Grammar.PrGrammar + +import Data.Char +import Data.List + +prCF :: CF -> String +prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function + +prCFTree :: CFTree -> String +prCFTree (CFTree (fun, (_,trees))) = prCFFun fun ++ prs trees where + prs [] = "" + prs ts = " " ++ unwords (map ps ts) + ps t@(CFTree (_,(_,[]))) = prCFTree t + ps t = prParenth (prCFTree t) +{-# NOINLINE prCFTree #-} +-- Workaround ghc 6.8.2 bug + + +prCFRule :: CFRule -> String +prCFRule (fun,(cat,its)) = + prCFFun fun ++ "." +++ prCFCat cat +++ "::=" +++ + unwords (map prCFItem its) +++ ";" + +prCFFun :: CFFun -> String +prCFFun = prCFFun' True ---- False -- print profiles for debug + +prCFFun' :: Bool -> CFFun -> String +prCFFun' profs (CFFun (t, p)) = prt_ t ++ pp p where + pp p = if (not profs || normal p) then "" else "_" ++ concat (map show p) + normal p = and [x==y && null b | ((b,x),y) <- zip p (map (:[]) [0..])] + +prCFCat :: CFCat -> String +prCFCat (CFCat (c,l)) = prt_ c ++ case prt_ l of + "s" -> [] + _ -> "-" ++ prt_ l ---- + +prCFItem :: CFItem -> String +prCFItem (CFNonterm c) = prCFCat c +prCFItem (CFTerm a) = prRegExp a + +prRegExp :: RegExp -> String +prRegExp (RegAlts tt) = case tt of + [t] -> prQuotedString t + _ -> prParenth (prTList " | " (map prQuotedString tt)) + +-- rules have an amazingly easy parser, if we use the format +-- fun. C -> item1 item2 ... where unquoted items are treated as cats +-- Actually would be nice to add profiles to this. + +getCFRule :: String -> String -> Err [CFRule] +getCFRule mo s = getcf (wrds s) where + getcf ws = case ws of + fun : cat : a : its | isArrow a -> + Ok [(string2CFFun mo (init fun), + (string2CFCat mo cat, map mkIt its))] + cat : a : its | isArrow a -> + Ok [(string2CFFun mo (mkFun cat it), + (string2CFCat mo cat, map mkIt it)) | it <- chunk its] + _ -> Bad (" invalid rule:" +++ s) + isArrow a = elem a ["->", "::="] + mkIt w = case w of + ('"':w@(_:_)) -> atomCFTerm (string2CFTok (init w)) + _ -> CFNonterm (string2CFCat mo w) + chunk its = case its of + [] -> [[]] + _ -> chunks "|" its + mkFun cat its = case its of + [] -> cat ++ "_" + _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style + clean = filter isAlphaNum -- to form valid identifiers + wrds = takeWhile (/= ";") . words -- to permit semicolon in the end + +pCF :: String -> String -> Err [CFRule] +pCF mo s = do + rules <- mapM (getCFRule mo) $ filter isRule $ lines s + return $ concat rules + where + isRule line = case dropWhile isSpace line of + '-':'-':_ -> False + _ -> not $ all isSpace line diff --git a/src-3.0/GF/CF/PrLBNF.hs b/src-3.0/GF/CF/PrLBNF.hs new file mode 100644 index 000000000..4ba2019bc --- /dev/null +++ b/src-3.0/GF/CF/PrLBNF.hs @@ -0,0 +1,150 @@ +---------------------------------------------------------------------- +-- | +-- Module : PrLBNF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/06/17 14:15:16 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.11 $ +-- +-- Printing CF grammars generated from GF as LBNF grammar for BNFC. +-- AR 26/1/2000 -- 9/6/2003 (PPrCF) -- 8/11/2003 -- 27/9/2004. +-- With primitive error messaging, by rules and rule tails commented out +----------------------------------------------------------------------------- + +module GF.CF.PrLBNF (prLBNF,prBNF) where + +import GF.CF.CF +import GF.CF.CFIdent +import GF.Canon.AbsGFC +import GF.Infra.Ident +import GF.Grammar.PrGrammar +import GF.Compile.ShellState +import GF.Canon.GFC +import GF.Canon.Look + +import GF.Data.Operations +import GF.Infra.Modules + +import Data.Char +import Data.List (nub) + +prLBNF :: Bool -> StateGrammar -> String +prLBNF new gr = unlines $ pragmas ++ (map (prCFRule cs) rules') + where + cs = map IC ["Int","String"] ++ [catIdPlus c | (_,(c,_)) <- rules] + cf = stateCF gr + (pragmas,rules) = if new -- tries to treat precedence levels + then mkLBNF (stateGrammarST gr) $ rulesOfCF cf + else ([],rulesOfCF cf) -- "normal" behaviour + rules' = concatMap expand rules + expand (f,(c,its)) = [(f,(c,it)) | it <- combinations (map expIt its)] + expIt i = case i of + CFTerm (RegAlts ss) -> [CFTerm (RegAlts [s]) | s <- ss] + _ -> [i] + +-- | a hack to hide the LBNF details +prBNF :: Bool -> StateGrammar -> String +prBNF b = unlines . (map (unwords . unLBNF . drop 1 . words)) . lines . prLBNF b + where + unLBNF r = case r of + "---":ts -> ts + ";":"---":ts -> ts + c:ts -> c : unLBNF ts + _ -> r + +--- | awful low level code without abstraction over label names etc +mkLBNF :: CanonGrammar -> [CFRule] -> ([String],[CFRule]) +mkLBNF gr rules = (coercions, nub $ concatMap mkRule rules) where + coercions = ["coercions" +++ prt_ c +++ show n +++ ";" | + (_,ModMod m) <- modules gr, + (c,CncCat (RecType ls) _ _) <- tree2list $ jments m, + Lbg (L (IC "p")) (TInts n) <- ls + ] + precedences = [(f,(prec,assoc)) | + (_,ModMod m) <- modules gr, + (f,CncFun _ _ (R lin) _) <- tree2list $ jments m, + (Just prec, Just assoc) <- [( + lookup "p" [(lab,p) | Ass (L (IC lab)) (EInt p) <- lin], + lookup "a" [(lab,a) | Ass (L (IC lab)) (Par (CIQ _ (IC a)) []) <- lin] + )] + ] + precfuns = map fst precedences + mkRule r@(fun@(CFFun (t, p)),(cat,its)) = case t of + AC (CIQ _ c) -> case lookup c precedences of + Just (prec,assoc) -> [(fun,(mkCat prec cat,mkIts cat prec assoc 0 its))] + _ -> return r + AD (CIQ _ c) -> case lookup c precedences of + Just (prec,assoc) -> [(fun,(mkCat prec cat,mkIts cat prec assoc 0 its))] + _ -> return r + _ -> return r + mkIts cat prec assoc i its = case its of + CFTerm (RegAlts ["("]):n@(CFNonterm k):CFTerm (RegAlts [")"]):rest | k==cat -> + mkIts cat prec assoc i $ n:rest -- remove variants with parentheses + CFNonterm k:rest | k==cat -> + CFNonterm (mkNonterm prec assoc i k) : mkIts cat prec assoc (i+1) rest + it:rest -> it:mkIts cat prec assoc i rest + [] -> [] + + mkCat prec (CFCat ((CIQ m (IC c)),l)) = CFCat ((CIQ m (IC (c ++ show prec ++ "+"))),l) + mkNonterm prec assoc i cat = mkCat prec' cat + where + prec' = case (assoc,i) of + ("PL",0) -> prec + ("PR",0) -> prec + 1 + ("PR",_) -> prec + _ -> prec + 1 + +catId ((CFCat ((CIQ _ c),l))) = c + +catIdPlus ((CFCat ((CIQ _ c@(IC s)),l))) = case reverse s of + '+':cs -> IC $ reverse $ dropWhile isDigit cs + _ -> c + +prCFRule :: [Ident] -> CFRule -> String +prCFRule cs (fun,(cat,its)) = + prCFFun cat fun ++ "." +++ prCFCat True cat +++ "::=" +++ --- err in cat -> in syntax + unwords (map (prCFItem cs) its) +++ ";" + +prCFFun :: CFCat -> CFFun -> String +prCFFun (CFCat (_,l)) (CFFun (t, p)) = case t of + AC (CIQ _ x) -> let f = prId True x in (f ++ lab +++ f2 f +++ prP p) + AD (CIQ _ x) -> let f = prId True x in (f ++ lab +++ f2 f +++ prP p) + _ -> prErr True $ prt t + where + lab = prLab l + f2 f = if null lab then "" else f + prP = concatMap show + +prId b i = case i of + IC "Int" -> "Integer" + IC "#Var" -> "Ident" + IC "Var" -> "Ident" + IC "id_" -> "_" + IC s@(c:_) | last s == '+' -> init s -- hack to save precedence information + IC s@(c:_) | isUpper c -> s ++ if isDigit (last s) then "_" else "" + _ -> prErr b $ prt i + +prLab i = case i of + L (IC "s") -> "" --- + L (IC "_") -> "" --- + _ -> let x = prt i in "_" ++ x ++ if isDigit (last x) then "_" else "" + +-- | just comment out the rest if you cannot interpret the function name in LBNF +-- two versions, depending on whether in the beginning of a rule or elsewhere; +-- in the latter case, error just terminates the rule +prErr :: Bool -> String -> String +prErr b s = (if b then "" else " ;") +++ "---" +++ s + +prCFCat :: Bool -> CFCat -> String +prCFCat b (CFCat ((CIQ _ c),l)) = prId b c ++ prLab l ---- + +-- | if a category does not have a production of its own, we replace it by Ident +prCFItem cs (CFNonterm c) = if elem (catIdPlus c) cs then prCFCat False c else "Ident" +prCFItem _ (CFTerm a) = prRegExp a + +prRegExp (RegAlts tt) = case tt of + [t] -> prQuotedString t + _ -> prErr False $ prParenth (prTList " | " (map prQuotedString tt)) diff --git a/src-3.0/GF/CF/Profile.hs b/src-3.0/GF/CF/Profile.hs new file mode 100644 index 000000000..e573bec78 --- /dev/null +++ b/src-3.0/GF/CF/Profile.hs @@ -0,0 +1,106 @@ +---------------------------------------------------------------------- +-- | +-- Module : Profile +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:14 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001 +-- revised 8/4/2002 for the new profile structure +----------------------------------------------------------------------------- + +module GF.CF.Profile (postParse) where + +import GF.Canon.AbsGFC +import GF.Canon.GFC +import qualified GF.Infra.Ident as I +import GF.Canon.CMacros +---import MMacros +import GF.CF.CF +import GF.CF.CFIdent +import GF.CF.PPrCF -- for error msg +import GF.Grammar.PrGrammar + +import GF.Data.Operations + +import Control.Monad +import Data.List (nub) + +-- | the job is done in two passes: +-- +-- 1. tree2term: restore constituent order from Profile +-- +-- 2. term2trm: restore Bindings from Binds +postParse :: CFTree -> Err Exp +postParse tree = do + iterm <- errIn ("postprocessing parse tree" +++ prCFTree tree) $ tree2term tree + return $ term2trm iterm + +-- | an intermediate data structure +data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta deriving (Eq,Show) +type BindVs = [[I.Ident]] + +-- | (1) restore constituent order from Profile +tree2term :: CFTree -> Err ITerm +-- tree2term (CFTree (f,(_,[t]))) | f == dummyCFFun = tree2term t -- not used +tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of + AM _ -> return IMeta + _ -> do + args <- mapM mkArg pro + binds <- mapM mkBinds pro + return $ ITerm (fun, binds) args + where + mkArg (_,arg) = case arg of + [x] -> do -- one occurrence + trx <- trees !? x + tree2term trx + [] -> return IMeta -- suppression + _ -> do -- reduplication + trees' <- mapM (trees !?) arg + xs1 <- mapM tree2term trees' + xs2 <- checkArity xs1 + unif xs2 + + checkArity xs = if length (nub [length xx | ITerm _ xx <- xs']) > 1 + then Bad "arity error" + else return xs' + where xs' = [t | t@(ITerm _ _) <- xs] + unif xs = case [t | t@(ITerm _ _) <- xs] of + [] -> return $ IMeta + (ITerm fp@(f,_) xx : ts) -> do + let hs = [h | ITerm (h,_) _ <- ts, h /= f] + testErr (null hs) -- if fails, hs must be nonempty + ("unification expects" +++ prt f +++ "but found" +++ prt (head hs)) + xx' <- mapM unifArg [0 .. length xx - 1] + return $ ITerm fp xx' + where + unifArg i = unif [zz !! i | ITerm _ zz <- xs] + + mkBinds (xss,_) = mapM mkBind xss + mkBind xs = do + ts <- mapM (trees !?) xs + let vs = [x | CFTree (CFFun (AV x,_),(_,[])) <- ts] + testErr (length ts == length vs) "non-variable in bound position" + case vs of + [x] -> return x + [] -> return $ I.identC "h_" ---- uBoundVar + y:ys -> do + testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y) + return y + +-- | (2) restore Bindings from Binds +term2trm :: ITerm -> Exp +term2trm IMeta = EAtom (AM 0) ---- mExp0 +term2trm (ITerm (fun, binds) terms) = + let bterms = zip binds terms + in mkAppAtom fun [mkAbsR xs (term2trm t) | (xs,t) <- bterms] + + --- these are deprecated + where + mkAbsR c e = foldr EAbs e c + mkAppAtom a = mkApp (EAtom a) + mkApp = foldl EApp |
