diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF/CanonToCF.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF/CanonToCF.hs')
| -rw-r--r-- | src-3.0/GF/CF/CanonToCF.hs | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CanonToCF.hs b/src-3.0/GF/CF/CanonToCF.hs new file mode 100644 index 000000000..80ce2e79d --- /dev/null +++ b/src-3.0/GF/CF/CanonToCF.hs @@ -0,0 +1,214 @@ +---------------------------------------------------------------------- +-- | +-- Module : CanonToCF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/14 16:03:41 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.15 $ +-- +-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003 +----------------------------------------------------------------------------- + +module GF.CF.CanonToCF (canon2cf) where + +import GF.System.Tracing -- peb 8/6-04 + +import GF.Data.Operations +import GF.Infra.Option +import GF.Infra.Ident +import GF.Canon.AbsGFC +import GF.Grammar.LookAbs (allBindCatsOf) +import GF.Canon.GFC +import GF.Grammar.Values (isPredefCat,cPredefAbs) +import GF.Grammar.PrGrammar +import GF.Canon.CMacros +import qualified GF.Infra.Modules as M +import GF.CF.CF +import GF.CF.CFIdent +import GF.UseGrammar.Morphology +import GF.Data.Trie2 +import Data.List (nub,partition) +import Control.Monad + +-- | The main function: for a given cnc module 'm', build the CF grammar with all the +-- rules coming from modules that 'm' extends. The categories are qualified by +-- the abstract module name 'a' that 'm' is of. +-- The ign argument tells what rules not to generate a parser for. +canon2cf :: Options -> (Ident -> Bool) -> CanonGrammar -> Ident -> Err CF +canon2cf opts ign gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04 + let ms = M.allExtends gr c + a <- M.abstractOfConcrete gr c + let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms] + let mms = [(a, tree2list (M.jments m)) | m <- cncs] + cnc <- liftM M.jments $ M.lookupModMod gr c + rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts ign cnc)) mms + let bindcats = map snd $ allBindCatsOf gr + let rules = filter (not . isCircularCF) rules0 ---- temporarily here + let grules = groupCFRules rules + let predef = mkCFPredef opts bindcats grules + return $ CF predef + +cnc2cfCond :: Options -> (Ident -> Bool) -> BinTree Ident Info -> + Ident -> [(Ident,Info)] -> Err [CFRule] +cnc2cfCond opts ign cnc m gr = + liftM concat $ + mapM lin2cf [(m,fun,cat,args,lin) | + (fun, CncFun cat args lin _) <- gr, notign fun, is fun] + where + is f = isInBinTree f cnc + notign = not . ign + +type IFun = Ident +type ICat = CIdent + +-- | all CF rules corresponding to a linearization rule +lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule] +lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do + let rhss0 = allLinBranches lin -- :: [([Label], Term)] + rhss1 <- mapM (mkCFItems m) rhss0 -- :: [([Label], [[PreCFItem]])] + mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat + +-- | making sequences of CF items from every branch in a linearization +mkCFItems :: Ident -> ([Label], Term) -> Err ([Label], [[PreCFItem]]) +mkCFItems m (labs,t) = do + items <- term2CFItems m t + return (labs, items) + +-- | making CF rules from sequences of CF items +mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> ([Label], [[PreCFItem]]) -> Err [CFRule] +mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss + where + mkOneRule its = do + let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its] + profile = mkProfile nonterms + cfcat = labels2CFCat (redirectIdent m cat) lab + cffun = CFFun (AC (CIQ m fun), profile) + cfits = map precf2cf its + return (cffun,(cfcat,cfits)) + mkProfile nonterms = map mkOne args + where + mkOne (A c i) = mkOne (AB c 0 i) + mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i]) + where + mkB x = [k | (k,(j, [LV y], False)) <- nonterms, j == i, y == x] + +-- | intermediate data structure of CFItems with information for profiles +data PreCFItem = + PTerm RegExp -- ^ like ordinary Terminal + | PNonterm CIdent Integer [Label] Bool -- ^ cat, position, part\/bind, whether arg + deriving Eq + +precf2cf :: PreCFItem -> CFItem +precf2cf (PTerm r) = CFTerm r +precf2cf (PNonterm cm _ ls True) = CFNonterm (labels2CFCat cm ls) +precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF + + +-- | the main job in translating linearization rules into sequences of cf items +term2CFItems :: Ident -> Term -> Err [[PreCFItem]] +term2CFItems m t = errIn "forming cf items" $ case t of + S c _ -> t2c c + + T _ cc -> do + its <- mapM t2c [t | Cas _ t <- cc] + tryMkCFTerm (concat its) + V _ cc -> do + its <- mapM t2c [t | t <- cc] + tryMkCFTerm (concat its) + + C t1 t2 -> do + its1 <- t2c t1 + its2 <- t2c t2 + return [x ++ y | x <- its1, y <- its2] + + FV ts -> do + its <- mapM t2c ts + tryMkCFTerm (concat its) + + P (S c _) _ -> t2c c --- w-around for bug in Compute? AR 31/1/2006 + + P arg s -> extrR arg s + + K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]] + + E -> return [[]] + + K (KP d vs) -> do + let its = [PTerm (RegAlts [s]) | s <- d] + let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs] + tryMkCFTerm (its : itss) + + _ -> return [] ---- prtBad "no cf for" t ---- + + where + + t2c = term2CFItems m + + -- optimize the number of rules by a factorization + tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]] + tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss = + case mapM mkOne (counterparts ii) of + Ok tt -> return [tt] + _ -> return ii + where + mkOne cfits = case mapM mkOneTerm cfits of + Ok tt -> return $ PTerm (RegAlts (concat (nub tt))) + _ -> mkOneNonTerm cfits + mkOneTerm (PTerm (RegAlts t)) = return t + mkOneTerm _ = Bad "" + mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) = + if all (== n) cc + then return n + else Bad "" + mkOneNonTerm _ = Bad "" + counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]] + tryMkCFTerm itss = return itss + + extrR arg lab = case (arg0,labs) of + (Arg (A cat pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]] + (Arg (AB cat b pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]] + (Arg (A cat pos), _) -> return [[PNonterm (cIQ cat) pos labs True]] + (Arg (AB cat b pos), _) -> return [[PNonterm (cIQ cat) pos labs True]] + ---- ?? + _ -> prtBad "cannot extract record field from" arg + where + (arg0,labs) = headProj arg [lab] + + headProj r ls = case r of + P r0 l0 -> headProj r0 (l0:ls) + S r0 _ -> headProj r0 ls + _ -> (r,ls) + cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c + +mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef) +mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where + (ruls,preds) = if oElem lexerByNeed opts -- option -cflexer + then predefLexer rules + else (rules,emptyTrie) + preds0 s = + [(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++ + [(cat, varCFFun x) | TV x <- [s], cat <- catVarCF : bindcats] ++ + [(cfCatString, stringCFFun t) | TL t <- [s]] ++ + [(cfCatInt, intCFFun t) | TI t <- [s]] ++ + [(cfCatFloat, floatCFFun t) | TF t <- [s]] + cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its] + bindcats = [c | c <- cats, elem (cfCat2Ident c) binds] + look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens + +--- TODO: integrate with morphology +--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)])) +predefLexer groups = (reverse ruls, tcompile preds) where + (ruls,preds) = foldr mkOne ([],[]) groups + mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where + (rule,pre) = case partition isLexical rules of + ([],_) -> (group,[]) + (ls,rest) -> ((cat,rest), concatMap mkLexRule ls) + isLexical (f,(c,its)) = case its of + [CFTerm (RegAlts ws)] -> True + _ -> False + mkLexRule r = case r of + (fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws] + _ -> [] |
