summaryrefslogtreecommitdiff
path: root/src/GF/Compile/OptimizeGF.hs
blob: 41b828aa324233afcd2f453b68de8c839b43045d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
----------------------------------------------------------------------
-- |
-- Module      : OptimizeGF
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:33 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- Optimizations on GF source code: sharing, parametrization, value sets.
--
-- optimization: sharing branches in tables. AR 25\/4\/2003.
-- following advice of Josef Svenningsson
-----------------------------------------------------------------------------

module GF.Compile.OptimizeGF (
  optModule,unshareModule,unsubexpModule,unoptModule,subexpModule,shareModule
  ) where 

import GF.Grammar.Grammar
import GF.Grammar.Lookup
import GF.Infra.Ident
import qualified GF.Grammar.Macros as C
import GF.Grammar.PrGrammar (prt)
import qualified GF.Infra.Modules as M
import GF.Data.Operations

import Control.Monad
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS
import Data.List

optModule :: (Ident, SourceModInfo) -> (Ident, SourceModInfo)
optModule = subexpModule . shareModule

shareModule = processModule optim

unoptModule :: SourceGrammar -> (Ident, SourceModInfo) -> (Ident, SourceModInfo)
unoptModule gr = unshareModule gr . unsubexpModule

unshareModule :: SourceGrammar -> (Ident, SourceModInfo) -> (Ident, SourceModInfo)
unshareModule gr = processModule (const (unoptim gr))

processModule :: 
  (Ident -> Term -> Term) -> (Ident, SourceModInfo) -> (Ident, SourceModInfo)
processModule opt (i,m) = case m of
  M.ModMod mo -> 
    (i,M.ModMod (M.replaceJudgements mo (mapTree (shareInfo opt) (M.jments mo))))
  _ -> (i,m)

shareInfo opt (c, CncCat ty (Yes t) m) = (c,CncCat ty (Yes (opt c t)) m)
shareInfo opt (c, CncFun kxs (Yes t) m) = (c,CncFun kxs (Yes (opt c t)) m)
shareInfo opt (c, ResOper ty (Yes t)) = (c,ResOper ty (Yes (opt c t)))
shareInfo _ i = i

-- the function putting together optimizations
optim :: Ident -> Term -> Term
optim c = values . factor c 0

-- we need no counter to create new variable names, since variables are 
-- local to tables (only true in GFC) ---

-- factor parametric branches

factor :: Ident -> Int -> Term -> Term
factor c i t = case t of
  T _ [_] -> t
  T _ []  -> t
  T (TComp ty) cs -> 
    T (TTyped ty) $ factors i [(p, factor c (i+1) v) | (p, v) <- cs]
  _ -> C.composSafeOp (factor c i) t
 where

   factors i psvs =   -- we know psvs has at least 2 elements
     let p   = qqIdent c i
         vs' = map (mkFun p) psvs
     in if   allEqs vs'
        then mkCase p vs' 
        else psvs

   mkFun p (patt, val) = replace (C.patt2term patt) (Vr p) val

   allEqs (v:vs) = all (==v) vs

   mkCase p (v:_) = [(PV p, v)]

--- we hope this will be fresh and don't check... in GFC would be safe

qqIdent c i = identC (BS.pack ("q_" ++ prt c ++ "__" ++ show i))


--  we need to replace subterms

replace :: Term -> Term -> Term -> Term
replace old new trm = case trm of
  
  -- these are the important cases, since they can correspond to patterns  
  QC _ _   | trm == old -> new
  App t ts | trm == old -> new
  App t ts              -> App (repl t) (repl ts)
  R _ | isRec && trm == old -> new
  _ -> C.composSafeOp repl trm
 where
   repl = replace old new
   isRec = case trm of
     R _ -> True
     _ -> False

-- It is very important that this is performed only after case
-- expansion since otherwise the order and number of values can
-- be incorrect. Guaranteed by the TComp flag.

values :: Term -> Term
values t = case t of
  T ty [(ps,t)]    -> T ty [(ps,values t)] -- don't destroy parametrization
  T (TComp ty)  cs -> V ty [values t | (_, t) <- cs]
  T (TTyped ty) cs -> V ty [values t | (_, t) <- cs] 
        ---- why are these left?
        ---- printing with GrammarToSource does not preserve the distinction
  _ -> C.composSafeOp values t


-- to undo the effect of factorization

unoptim :: SourceGrammar -> Term -> Term
unoptim gr = unfactor gr

unfactor :: SourceGrammar -> Term -> Term
unfactor gr t = case t of
  T (TTyped ty) [(PV x,u)] -> V ty [restore x v (unfac u) | v <- vals ty]
  _ -> C.composSafeOp unfac t
 where
   unfac = unfactor gr
   vals  = err error id . allParamValues gr
   restore x u t = case t of
     Vr y | y == x -> u
     _ -> C.composSafeOp (restore x u) t


----------------------------------------------------------------------

{-
This module implements a simple common subexpression elimination
 for gfc grammars, to factor out shared subterms in lin rules.
It works in three phases: 

  (1) collectSubterms collects recursively all subterms of forms table and (P x..y)
      from lin definitions (experience shows that only these forms
      tend to get shared) and counts how many times they occur
  (2) addSubexpConsts takes those subterms t that occur more than once
      and creates definitions of form "oper A''n = t" where n is a
      fresh number; notice that we assume no ids of this form are in
      scope otherwise
  (3) elimSubtermsMod goes through lins and the created opers by replacing largest
      possible subterms by the newly created identifiers

The optimization is invoked in gf by the flag i -subs.

If an application does not support GFC opers, the effect of this
optimization can be undone by the function unSubelimCanon.

The function unSubelimCanon can be used to diagnostisize how much
cse is possible in the grammar. It is used by the flag pg -printer=subs.

-}

subexpModule :: SourceModule -> SourceModule
subexpModule (n,m) = errVal (n,m) $ case m of
  M.ModMod mo -> do
    let ljs = tree2list (M.jments mo)
    (tree,_) <- appSTM (getSubtermsMod n ljs) (Map.empty,0)
    js2 <- liftM buildTree $ addSubexpConsts n tree $ ljs
    return (n,M.ModMod (M.replaceJudgements mo js2))
  _ -> return (n,m)

unsubexpModule :: SourceModule -> SourceModule
unsubexpModule sm@(i,m) = case m of
    M.ModMod mo | hasSub ljs ->
      (i, M.ModMod (M.replaceJudgements mo
                     (rebuild (map unparInfo ljs)))) 
                        where ljs = tree2list (M.jments mo)
    _ -> (i,m)
  where
    -- perform this iff the module has opers
    hasSub ljs = not $ null [c | (c,ResOper _ _) <- ljs]
    unparInfo (c,info) = case info of
      CncFun xs (Yes t) m -> [(c, CncFun xs (Yes (unparTerm t)) m)]
      ResOper (Yes (EInt 8)) _ -> [] -- subexp-generated opers
      ResOper pty (Yes t) -> [(c, ResOper pty (Yes (unparTerm t)))]
      _ -> [(c,info)]
    unparTerm t = case t of
      Q m c | isOperIdent c -> --- name convention of subexp opers
        errVal t $ liftM unparTerm $ lookupResDef gr m c 
      _ -> C.composSafeOp unparTerm t
    gr = M.MGrammar [sm] 
    rebuild = buildTree . concat

-- implementation

type TermList = Map Term (Int,Int) -- number of occs, id
type TermM a = STM (TermList,Int) a

addSubexpConsts :: 
  Ident -> Map Term (Int,Int) -> [(Ident,Info)] -> Err [(Ident,Info)]
addSubexpConsts mo tree lins = do
  let opers = [oper id trm | (trm,(_,id)) <- list]
  mapM mkOne $ opers ++ lins
 where

   mkOne (f,def) = case def of
     CncFun xs (Yes trm) pn -> do
       trm' <- recomp f trm
       return (f,CncFun xs (Yes trm') pn)
     ResOper ty (Yes trm) -> do
       trm' <- recomp f trm
       return (f,ResOper ty (Yes trm'))
     _ -> return (f,def)
   recomp f t = case Map.lookup t tree of
     Just (_,id) | operIdent id /= f -> return $ Q mo (operIdent id)
     _ -> C.composOp (recomp f) t

   list = Map.toList tree

   oper id trm = (operIdent id, ResOper (Yes (EInt 8)) (Yes trm)) 
   --- impossible type encoding generated opers

getSubtermsMod :: Ident -> [(Ident,Info)] -> TermM (Map Term (Int,Int))
getSubtermsMod mo js = do
  mapM (getInfo (collectSubterms mo)) js
  (tree0,_) <- readSTM
  return $ Map.filter (\ (nu,_) -> nu > 1) tree0
 where
   getInfo get fi@(f,i) = case i of
     CncFun xs (Yes trm) pn -> do
       get trm
       return $ fi
     ResOper ty (Yes trm) -> do
       get trm
       return $ fi
     _ -> return fi

collectSubterms :: Ident -> Term -> TermM Term
collectSubterms mo t = case t of
  App f a -> do
    collect f
    collect a
    add t 
  T ty cs -> do
    let (_,ts) = unzip cs
    mapM collect ts
    add t
  V ty ts -> do
    mapM collect ts
    add t
----  K (KP _ _)  -> add t
  _ -> C.composOp (collectSubterms mo) t
 where
   collect = collectSubterms mo
   add t = do
     (ts,i) <- readSTM
     let 
       ((count,id),next) = case Map.lookup t ts of
         Just (nu,id) -> ((nu+1,id), i)
         _ ->            ((1,   i ), i+1)
     writeSTM (Map.insert t (count,id) ts, next)
     return t --- only because of composOp

operIdent :: Int -> Ident
operIdent i = identC (operPrefix `BS.append` (BS.pack (show i))) ---

isOperIdent :: Ident -> Bool
isOperIdent id = BS.isPrefixOf operPrefix (ident2bs id)

operPrefix = BS.pack ("A''")