summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2010-06-09 11:32:59 +0000
committerkrasimir <krasimir@chalmers.se>2010-06-09 11:32:59 +0000
commitd6f32b3bcd03e7fe806a1b64cd370ba78dc00aa7 (patch)
tree12bc89cc43f10e80e95f7b76c52611caa5aa4b40 /src
parent4e35f7e5ecfebb2503a516c84e4b7d932731a94d (diff)
dead code elimination for PGF. Note: the produced grammars will not work well with metavariables and high-order abstract syntax
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Compile.hs3
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs2
-rw-r--r--src/compiler/GF/Infra/Option.hs6
-rw-r--r--src/compiler/GFC.hs4
-rw-r--r--src/runtime/haskell/PGF/Binary.hs2
-rw-r--r--src/runtime/haskell/PGF/Macros.hs57
-rw-r--r--src/runtime/haskell/PGF/Optimize.hs215
7 files changed, 228 insertions, 61 deletions
diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs
index 1aebeaf31..bf872c138 100644
--- a/src/compiler/GF/Compile.hs
+++ b/src/compiler/GF/Compile.hs
@@ -41,6 +41,7 @@ import PGF.Check
import PGF.CId
import PGF.Data
import PGF.Macros
+import PGF.Optimize
-- | Compiles a number of source files and builds a 'PGF' structure for them.
@@ -60,7 +61,7 @@ link opts cnc gr = do
(True, True) -> ioeIO $ putStrLn "OK"
(False,True) -> return ()
_ -> ioeIO $ putStrLn $ "Corrupted PGF"
- return gc
+ return $ if flag optOptimizePGF opts then optimizePGF gc else gc
Bad s -> fail s
batchCompile :: Options -> [FilePath] -> IOE SourceGrammar
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index 995219efd..d1121e827 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -5,7 +5,7 @@ import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import PGF.CId
-import PGF.Macros(updateProductionIndices)
+import PGF.Optimize(updateProductionIndices)
import PGF.Check(checkLin)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index ee8d76b45..6c00336de 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -161,6 +161,7 @@ data Flags = Flags {
optPreprocessors :: [String],
optEncoding :: String,
optOptimizations :: Set Optimization,
+ optOptimizePGF :: Bool,
optCFGTransforms :: Set CFGTransform,
optLibraryPath :: [FilePath],
optStartCat :: Maybe String,
@@ -260,6 +261,7 @@ defaultFlags = Flags {
optPreprocessors = [],
optEncoding = "latin1",
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
+ optOptimizePGF = False,
optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter,
CFGTopDownFilter, CFGMergeIdentical],
optLibraryPath = [],
@@ -348,6 +350,8 @@ optDescr =
Option [] ["unlexer"] (ReqArg unlexer "UNLEXER") "Use unlexer UNLEXER.",
Option [] ["optimize"] (ReqArg optimize "OPT")
"Select an optimization package. OPT = all | values | parametrize | none",
+ Option [] ["optimize-pgf"] (NoArg (optimize_pgf True))
+ "Enable or disable global grammar optimization. This could significantly reduce the size of the final PGF file",
Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).",
Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).",
Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...",
@@ -406,6 +410,8 @@ optDescr =
optimize x = case lookup x optimizationPackages of
Just p -> set $ \o -> o { optOptimizations = p }
Nothing -> fail $ "Unknown optimization package: " ++ x
+
+ optimize_pgf x = set $ \o -> o { optOptimizePGF = x }
toggleOptimize x b = set $ setOptimization' x b
diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs
index 1f0ac870b..352827f6d 100644
--- a/src/compiler/GFC.hs
+++ b/src/compiler/GFC.hs
@@ -4,6 +4,7 @@ module GFC (mainGFC) where
import PGF
import PGF.CId
import PGF.Data
+import PGF.Optimize
import GF.Compile
import GF.Compile.Export
@@ -55,7 +56,8 @@ compileCFFiles opts fs =
unionPGFFiles :: Options -> [FilePath] -> IOE ()
unionPGFFiles opts fs =
do pgfs <- mapM readPGFVerbose fs
- let pgf = foldl1 unionPGF pgfs
+ let pgf0 = foldl1 unionPGF pgfs
+ pgf = if flag optOptimizePGF opts then optimizePGF pgf0 else pgf0
pgfFile = grammarName opts pgf <.> "pgf"
if pgfFile `elem` fs
then putStrLnE $ "Refusing to overwrite " ++ pgfFile
diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs
index 623cbe7bb..92f551b0e 100644
--- a/src/runtime/haskell/PGF/Binary.hs
+++ b/src/runtime/haskell/PGF/Binary.hs
@@ -2,7 +2,7 @@ module PGF.Binary where
import PGF.CId
import PGF.Data
-import PGF.Macros
+import PGF.Optimize
import Data.Binary
import Data.Binary.Put
import Data.Binary.Get
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index dea535af7..445592a9b 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -10,7 +10,6 @@ import qualified Data.IntSet as IntSet
import qualified Data.Array as Array
import Data.Maybe
import Data.List
-import GF.Data.Utilities(sortNub)
import Text.PrettyPrint
-- operations for manipulating PGF grammars and objects
@@ -148,62 +147,6 @@ cidVar = mkCId "__gfVar"
_B = mkCId "__gfB"
_V = mkCId "__gfV"
-updateProductionIndices :: PGF -> PGF
-updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pgf) }
- where
- updateConcrete cnc =
- let p_prods = (filterProductions IntMap.empty . parseIndex cnc) (productions cnc)
- l_prods = (linIndex cnc . filterProductions IntMap.empty) (productions cnc)
- in cnc{pproductions = p_prods, lproductions = l_prods}
-
- filterProductions prods0 prods
- | prods0 == prods1 = prods0
- | otherwise = filterProductions prods1 prods
- where
- prods1 = IntMap.unionWith Set.union prods0 (IntMap.mapMaybe (filterProdSet prods0) prods)
-
- filterProdSet prods0 set
- | Set.null set1 = Nothing
- | otherwise = Just set1
- where
- set1 = Set.filter (filterRule prods0) set
-
- filterRule prods0 (PApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods0) args
- filterRule prods0 (PCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods0
- filterRule prods0 _ = True
-
- parseIndex cnc = IntMap.mapMaybeWithKey filterProdSet
- where
- filterProdSet fid prods
- | fid `IntSet.member` ho_fids = Just prods
- | otherwise = let prods' = Set.filter (not . is_ho_prod) prods
- in if Set.null prods'
- then Nothing
- else Just prods'
-
- is_ho_prod (PApply _ [fid]) | fid == fcatVar = True
- is_ho_prod _ = False
-
- ho_fids :: IntSet.IntSet
- ho_fids = IntSet.fromList [fid | cat <- ho_cats
- , fid <- maybe [] (\(CncCat s e _) -> [s..e]) (Map.lookup cat (cnccats cnc))]
-
- ho_cats :: [CId]
- ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf))
- , h <- case ty of {DTyp hyps val _ -> hyps}
- , c <- fst (catSkeleton (typeOfHypo h))]
-
- linIndex cnc productions =
- Map.fromListWith (IntMap.unionWith Set.union)
- [(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
- , prod <- Set.toList prods
- , fun <- getFunctions prod]
- where
- getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc Array.! funid in [fun]
- getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
- Nothing -> []
- Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
-
-- Utilities for doing linearization
diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs
new file mode 100644
index 000000000..6151c26a9
--- /dev/null
+++ b/src/runtime/haskell/PGF/Optimize.hs
@@ -0,0 +1,215 @@
+module PGF.Optimize
+ ( optimizePGF
+ , updateProductionIndices
+ ) where
+
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import Data.Maybe
+import Data.List (mapAccumL, nub)
+import Data.Array.IArray
+import Data.Array.MArray
+import Data.Array.ST
+import Data.Array.Unboxed
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntSet as IntSet
+import qualified Data.IntMap as IntMap
+import Control.Monad.ST
+import GF.Data.Utilities(sortNub)
+
+optimizePGF :: PGF -> PGF
+optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .
+ topDownFilter (lookStartCat pgf) .
+ bottomUpFilter ) (concretes pgf)}
+
+updateProductionIndices :: PGF -> PGF
+updateProductionIndices pgf = pgf{concretes = fmap (updateConcrete (abstract pgf)) (concretes pgf)}
+
+topDownFilter :: CId -> Concr -> Concr
+topDownFilter startCat cnc =
+ let ((seqs,funs),prods) = IntMap.mapAccumWithKey (\env res set -> mapAccumLSet (optimize res) env set)
+ (Map.empty,Map.empty)
+ (productions cnc)
+ cats = Map.mapWithKey filterCatLabels (cnccats cnc)
+ in cnc{ sequences = mkSetArray seqs
+ , cncfuns = mkSetArray funs
+ , productions = prods
+ , cnccats = cats
+ }
+ where
+ fid2cat fid =
+ case IntMap.lookup fid fid2catMap of
+ Just cat -> cat
+ Nothing -> case [fid | Just set <- [IntMap.lookup fid (productions cnc)], PCoerce fid <- Set.toList set] of
+ (fid:_) -> fid2cat fid
+ _ -> error "unknown forest id"
+ where
+ fid2catMap = IntMap.fromList [(fid,cat) | (cat,CncCat start end lbls) <- Map.toList (cnccats cnc),
+ fid <- [start..end]]
+
+ starts =
+ case Map.lookup startCat (cnccats cnc) of
+ Just (CncCat _ _ lbls) -> [(startCat,lbl) | lbl <- indices lbls]
+ Nothing -> []
+
+ allRelations =
+ Map.unionsWith Set.union
+ [rel fid prod | (fid,set) <- IntMap.toList (productions cnc),
+ prod <- Set.toList set]
+ where
+ rel fid (PApply funid args) = Map.fromList [((fid2cat fid,lbl),deps args seqid) | (lbl,seqid) <- assocs lin]
+ where
+ CncFun _ lin = cncfuns cnc ! funid
+ rel fid _ = Map.empty
+
+ deps args seqid = Set.fromList [(fid2cat (args !! r),d) | SymCat r d <- elems seq]
+ where
+ seq = sequences cnc ! seqid
+
+ -- here we create a mapping from category to an array of indices.
+ -- An element of the array is equal to -1 if the corresponding index
+ -- is not going to be used in the optimized grammar, or the new index
+ -- if it will be used
+ closure :: Map.Map CId (UArray LIndex LIndex)
+ closure = runST $ do
+ set <- initSet
+ addLitCat cidString set
+ addLitCat cidInt set
+ addLitCat cidFloat set
+ addLitCat cidVar set
+ closureSet set starts
+ doneSet set
+ where
+ initSet :: ST s (Map.Map CId (STUArray s LIndex LIndex))
+ initSet =
+ fmap Map.fromAscList $ sequence
+ [fmap ((,) cat) (newArray (bounds lbls) (-1))
+ | (cat,CncCat _ _ lbls) <- Map.toAscList (cnccats cnc)]
+
+ addLitCat cat set =
+ case Map.lookup cat set of
+ Just indices -> writeArray indices 0 0
+ Nothing -> return ()
+
+ closureSet set [] = return ()
+ closureSet set (x@(cat,index):xs) =
+ case Map.lookup cat set of
+ Just indices -> do v <- readArray indices index
+ writeArray indices index 0
+ if v < 0
+ then case Map.lookup x allRelations of
+ Just ys -> closureSet set (Set.toList ys++xs)
+ Nothing -> closureSet set xs
+ else closureSet set xs
+ Nothing -> error "unknown cat"
+
+ doneSet set =
+ fmap Map.fromAscList $ mapM done (Map.toAscList set)
+ where
+ done (cat,indices) = do
+ (s,e) <- getBounds indices
+ reindex indices s e 0
+ indices <- unsafeFreeze indices
+ return (cat,indices)
+
+ reindex indices i j k
+ | i <= j = do v <- readArray indices i
+ if v < 0
+ then reindex indices (i+1) j k
+ else writeArray indices i k >>
+ reindex indices (i+1) j (k+1)
+ | otherwise = return ()
+
+ optimize res (seqs,funs) (PApply funid args) =
+ let (seqs',lin') = mapAccumL addUnique seqs [amap updateSymbol (sequences cnc ! seqid) |
+ (lbl,seqid) <- assocs lin, indicesOf res ! lbl >= 0]
+ (funs',funid') = addUnique funs (CncFun fun (mkArray lin'))
+ in ((seqs',funs'), PApply funid' args)
+ where
+ CncFun fun lin = cncfuns cnc ! funid
+
+ indicesOf fid =
+ case Map.lookup (fid2cat fid) closure of
+ Just indices -> indices
+ Nothing -> error "unknown category"
+
+ addUnique seqs seq =
+ case Map.lookup seq seqs of
+ Just seqid -> (seqs,seqid)
+ Nothing -> let seqid = Map.size seqs
+ in (Map.insert seq seqid seqs, seqid)
+
+ updateSymbol (SymCat r d) = SymCat r (indicesOf (args !! r) ! d)
+ updateSymbol s = s
+ optimize res env prod = (env,prod)
+
+ filterCatLabels cat (CncCat start end lbls) =
+ case Map.lookup cat closure of
+ Just indices -> let lbls' = mkArray [lbl | (i,lbl) <- assocs lbls, indices ! i >= 0]
+ in CncCat start end lbls'
+ Nothing -> error "unknown category"
+
+ mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
+ mkArray lst = listArray (0,length lst-1) lst
+
+ mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)
+ in (b',Set.fromList lst)
+
+
+bottomUpFilter :: Concr -> Concr
+bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)}
+
+filterProductions prods0 prods
+ | prods0 == prods1 = prods0
+ | otherwise = filterProductions prods1 prods
+ where
+ prods1 = IntMap.unionWith Set.union prods0 (IntMap.mapMaybe (filterProdSet prods0) prods)
+
+ filterProdSet prods0 set
+ | Set.null set1 = Nothing
+ | otherwise = Just set1
+ where
+ set1 = Set.filter (filterRule prods0) set
+
+ filterRule prods0 (PApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods0) args
+ filterRule prods0 (PCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods0
+ filterRule prods0 _ = True
+
+updateConcrete abs cnc =
+ let p_prods = (filterProductions IntMap.empty . parseIndex cnc) (productions cnc)
+ l_prods = (linIndex cnc . filterProductions IntMap.empty) (productions cnc)
+ in cnc{pproductions = p_prods, lproductions = l_prods}
+ where
+ parseIndex cnc = IntMap.mapMaybeWithKey filterProdSet
+ where
+ filterProdSet fid prods
+ | fid `IntSet.member` ho_fids = Just prods
+ | otherwise = let prods' = Set.filter (not . is_ho_prod) prods
+ in if Set.null prods'
+ then Nothing
+ else Just prods'
+
+ is_ho_prod (PApply _ [fid]) | fid == fcatVar = True
+ is_ho_prod _ = False
+
+ ho_fids :: IntSet.IntSet
+ ho_fids = IntSet.fromList [fid | cat <- ho_cats
+ , fid <- maybe [] (\(CncCat s e _) -> [s..e]) (Map.lookup cat (cnccats cnc))]
+
+ ho_cats :: [CId]
+ ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs abs)
+ , h <- case ty of {DTyp hyps val _ -> hyps}
+ , c <- fst (catSkeleton (typeOfHypo h))]
+
+ linIndex cnc productions =
+ Map.fromListWith (IntMap.unionWith Set.union)
+ [(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
+ , prod <- Set.toList prods
+ , fun <- getFunctions prod]
+ where
+ getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun]
+ getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
+ Nothing -> []
+ Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]