summaryrefslogtreecommitdiff
path: root/src-3.0/GF/CF/CanonToCF.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF/CanonToCF.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF/CanonToCF.hs')
-rw-r--r--src-3.0/GF/CF/CanonToCF.hs214
1 files changed, 214 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CanonToCF.hs b/src-3.0/GF/CF/CanonToCF.hs
new file mode 100644
index 000000000..80ce2e79d
--- /dev/null
+++ b/src-3.0/GF/CF/CanonToCF.hs
@@ -0,0 +1,214 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CanonToCF
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/14 16:03:41 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.15 $
+--
+-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003
+-----------------------------------------------------------------------------
+
+module GF.CF.CanonToCF (canon2cf) where
+
+import GF.System.Tracing -- peb 8/6-04
+
+import GF.Data.Operations
+import GF.Infra.Option
+import GF.Infra.Ident
+import GF.Canon.AbsGFC
+import GF.Grammar.LookAbs (allBindCatsOf)
+import GF.Canon.GFC
+import GF.Grammar.Values (isPredefCat,cPredefAbs)
+import GF.Grammar.PrGrammar
+import GF.Canon.CMacros
+import qualified GF.Infra.Modules as M
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.UseGrammar.Morphology
+import GF.Data.Trie2
+import Data.List (nub,partition)
+import Control.Monad
+
+-- | The main function: for a given cnc module 'm', build the CF grammar with all the
+-- rules coming from modules that 'm' extends. The categories are qualified by
+-- the abstract module name 'a' that 'm' is of.
+-- The ign argument tells what rules not to generate a parser for.
+canon2cf :: Options -> (Ident -> Bool) -> CanonGrammar -> Ident -> Err CF
+canon2cf opts ign gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
+ let ms = M.allExtends gr c
+ a <- M.abstractOfConcrete gr c
+ let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
+ let mms = [(a, tree2list (M.jments m)) | m <- cncs]
+ cnc <- liftM M.jments $ M.lookupModMod gr c
+ rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts ign cnc)) mms
+ let bindcats = map snd $ allBindCatsOf gr
+ let rules = filter (not . isCircularCF) rules0 ---- temporarily here
+ let grules = groupCFRules rules
+ let predef = mkCFPredef opts bindcats grules
+ return $ CF predef
+
+cnc2cfCond :: Options -> (Ident -> Bool) -> BinTree Ident Info ->
+ Ident -> [(Ident,Info)] -> Err [CFRule]
+cnc2cfCond opts ign cnc m gr =
+ liftM concat $
+ mapM lin2cf [(m,fun,cat,args,lin) |
+ (fun, CncFun cat args lin _) <- gr, notign fun, is fun]
+ where
+ is f = isInBinTree f cnc
+ notign = not . ign
+
+type IFun = Ident
+type ICat = CIdent
+
+-- | all CF rules corresponding to a linearization rule
+lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
+lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
+ let rhss0 = allLinBranches lin -- :: [([Label], Term)]
+ rhss1 <- mapM (mkCFItems m) rhss0 -- :: [([Label], [[PreCFItem]])]
+ mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
+
+-- | making sequences of CF items from every branch in a linearization
+mkCFItems :: Ident -> ([Label], Term) -> Err ([Label], [[PreCFItem]])
+mkCFItems m (labs,t) = do
+ items <- term2CFItems m t
+ return (labs, items)
+
+-- | making CF rules from sequences of CF items
+mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> ([Label], [[PreCFItem]]) -> Err [CFRule]
+mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
+ where
+ mkOneRule its = do
+ let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
+ profile = mkProfile nonterms
+ cfcat = labels2CFCat (redirectIdent m cat) lab
+ cffun = CFFun (AC (CIQ m fun), profile)
+ cfits = map precf2cf its
+ return (cffun,(cfcat,cfits))
+ mkProfile nonterms = map mkOne args
+ where
+ mkOne (A c i) = mkOne (AB c 0 i)
+ mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
+ where
+ mkB x = [k | (k,(j, [LV y], False)) <- nonterms, j == i, y == x]
+
+-- | intermediate data structure of CFItems with information for profiles
+data PreCFItem =
+ PTerm RegExp -- ^ like ordinary Terminal
+ | PNonterm CIdent Integer [Label] Bool -- ^ cat, position, part\/bind, whether arg
+ deriving Eq
+
+precf2cf :: PreCFItem -> CFItem
+precf2cf (PTerm r) = CFTerm r
+precf2cf (PNonterm cm _ ls True) = CFNonterm (labels2CFCat cm ls)
+precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
+
+
+-- | the main job in translating linearization rules into sequences of cf items
+term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
+term2CFItems m t = errIn "forming cf items" $ case t of
+ S c _ -> t2c c
+
+ T _ cc -> do
+ its <- mapM t2c [t | Cas _ t <- cc]
+ tryMkCFTerm (concat its)
+ V _ cc -> do
+ its <- mapM t2c [t | t <- cc]
+ tryMkCFTerm (concat its)
+
+ C t1 t2 -> do
+ its1 <- t2c t1
+ its2 <- t2c t2
+ return [x ++ y | x <- its1, y <- its2]
+
+ FV ts -> do
+ its <- mapM t2c ts
+ tryMkCFTerm (concat its)
+
+ P (S c _) _ -> t2c c --- w-around for bug in Compute? AR 31/1/2006
+
+ P arg s -> extrR arg s
+
+ K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]
+
+ E -> return [[]]
+
+ K (KP d vs) -> do
+ let its = [PTerm (RegAlts [s]) | s <- d]
+ let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
+ tryMkCFTerm (its : itss)
+
+ _ -> return [] ---- prtBad "no cf for" t ----
+
+ where
+
+ t2c = term2CFItems m
+
+ -- optimize the number of rules by a factorization
+ tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]
+ tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
+ case mapM mkOne (counterparts ii) of
+ Ok tt -> return [tt]
+ _ -> return ii
+ where
+ mkOne cfits = case mapM mkOneTerm cfits of
+ Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
+ _ -> mkOneNonTerm cfits
+ mkOneTerm (PTerm (RegAlts t)) = return t
+ mkOneTerm _ = Bad ""
+ mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) =
+ if all (== n) cc
+ then return n
+ else Bad ""
+ mkOneNonTerm _ = Bad ""
+ counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
+ tryMkCFTerm itss = return itss
+
+ extrR arg lab = case (arg0,labs) of
+ (Arg (A cat pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
+ (Arg (AB cat b pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
+ (Arg (A cat pos), _) -> return [[PNonterm (cIQ cat) pos labs True]]
+ (Arg (AB cat b pos), _) -> return [[PNonterm (cIQ cat) pos labs True]]
+ ---- ??
+ _ -> prtBad "cannot extract record field from" arg
+ where
+ (arg0,labs) = headProj arg [lab]
+
+ headProj r ls = case r of
+ P r0 l0 -> headProj r0 (l0:ls)
+ S r0 _ -> headProj r0 ls
+ _ -> (r,ls)
+ cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c
+
+mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
+mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where
+ (ruls,preds) = if oElem lexerByNeed opts -- option -cflexer
+ then predefLexer rules
+ else (rules,emptyTrie)
+ preds0 s =
+ [(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
+ [(cat, varCFFun x) | TV x <- [s], cat <- catVarCF : bindcats] ++
+ [(cfCatString, stringCFFun t) | TL t <- [s]] ++
+ [(cfCatInt, intCFFun t) | TI t <- [s]] ++
+ [(cfCatFloat, floatCFFun t) | TF t <- [s]]
+ cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its]
+ bindcats = [c | c <- cats, elem (cfCat2Ident c) binds]
+ look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens
+
+--- TODO: integrate with morphology
+--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
+predefLexer groups = (reverse ruls, tcompile preds) where
+ (ruls,preds) = foldr mkOne ([],[]) groups
+ mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
+ (rule,pre) = case partition isLexical rules of
+ ([],_) -> (group,[])
+ (ls,rest) -> ((cat,rest), concatMap mkLexRule ls)
+ isLexical (f,(c,its)) = case its of
+ [CFTerm (RegAlts ws)] -> True
+ _ -> False
+ mkLexRule r = case r of
+ (fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws]
+ _ -> []