summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Compile/GeneratePMCFG.hs
diff options
context:
space:
mode:
authoradelon <22380201+adelon@users.noreply.github.com>2026-05-17 20:37:48 +0200
committeradelon <22380201+adelon@users.noreply.github.com>2026-05-17 20:37:48 +0200
commit3cc01b9d311c7a9f86fbf2fa8c2d66921f9ba030 (patch)
tree5b979361c2d6b8ba19ef65345f1f20cf56059fb8 /src/compiler/GF/Compile/GeneratePMCFG.hs
parent1c086bed25811db1cf71990fb2eeca023e62c060 (diff)
Benchmarks, initial sketches
Diffstat (limited to 'src/compiler/GF/Compile/GeneratePMCFG.hs')
-rw-r--r--src/compiler/GF/Compile/GeneratePMCFG.hs282
1 files changed, 246 insertions, 36 deletions
diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs
index 74615dc98..e483911d1 100644
--- a/src/compiler/GF/Compile/GeneratePMCFG.hs
+++ b/src/compiler/GF/Compile/GeneratePMCFG.hs
@@ -34,10 +34,12 @@ import qualified Data.IntSet as IntSet
import GF.Text.Pretty
import Data.Array.IArray
import Data.Array.Unboxed
+import Data.Array.ST
--import Data.Maybe
--import Data.Char (isDigit)
import Control.Applicative(Applicative(..))
import Control.Monad
+import Control.Monad.ST (ST)
import Control.Monad.Identity
--import Control.Exception
--import Debug.Trace(trace)
@@ -98,10 +100,10 @@ addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncFun mty@(Just (cat,cont
(ctxt,res,_) = err bug typeForm (lookupFunType gr am id)
addRule lins (newCat', newArgs') env0 =
- let [newCat] = getFIds newCat'
- !fun = mkArray lins
- newArgs = map getFIds newArgs'
- in addFunction env0 newCat fun newArgs
+ let (env1,newCat) = getSingleFIdCached env0 newCat'
+ !fun = mkArray lins
+ (env2,rect) = getRectangleCached env1 newArgs'
+ in addFunction env2 newCat fun rect
addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ lincat))
mdef@(Just (L loc1 def))
@@ -135,14 +137,16 @@ addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ linc
seqs2 `seq` pmcfg `seq` return (seqs2,GF.Grammar.CncCat mty mdef mref mprn (Just pmcfg))
where
addLindef lins (newCat', newArgs') env0 =
- let [newCat] = getFIds newCat'
- !fun = mkArray lins
- in addFunction env0 newCat fun [[fidVar]]
+ let (env1,newCat) = getSingleFIdCached env0 newCat'
+ !fun = mkArray lins
+ !rect = Rectangle [singletonFId fidVar]
+ in addFunction env1 newCat fun rect
addLinref lins (newCat', [newArg']) env0 =
- let newArg = getFIds newArg'
- !fun = mkArray lins
- in addFunction env0 fidVar fun [newArg]
+ let (env1,newArg) = getFIdAltsCached env0 newArg'
+ !fun = mkArray lins
+ !rect = Rectangle [newArg]
+ in addFunction env1 fidVar fun rect
addPMCFG opts gr cenv opath am cm seqs id info = return (seqs, info)
@@ -302,14 +306,25 @@ protoFCat gr cat lincat =
((_,f),schema) -> PFCat (snd cat) f schema
getFIds :: ProtoFCat -> [FId]
-getFIds (PFCat _ _ schema) =
- reverse (solutions (variants schema) ())
+getFIds = fidAltsToList . getFIdAlts
+
+getFIdAlts :: ProtoFCat -> FIdAlts
+getFIdAlts = fIdAltsFromKey . fIdKey
+
+getSingleFId :: ProtoFCat -> FId
+getSingleFId = expectSingleFId "getSingleFId" . getFIdAlts
+
+fIdKey :: ProtoFCat -> FIdKey
+fIdKey (PFCat _ _ schema) =
+ FIdKey (collect schema)
where
- variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs
- variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs
- variants (CStr _) = return 0
- variants (CPar (m,values)) = do (value,index) <- member values
- return (m*index)
+ collect (CRec rs) = concatMap (\(lbl,Identity t) -> collect t) rs
+ collect (CTbl _ cs) = concatMap (\(trm,Identity t) -> collect t) cs
+ collect (CStr _) = []
+ collect (CPar (m,values)) = [weightedChoices m values]
+
+ weightedChoices m values =
+ listArray (0,length values-1) [m*index | (value,index) <- values]
catFactor :: ProtoFCat -> Int
catFactor (PFCat _ f _) = f
@@ -549,36 +564,231 @@ getVarIndex x = maybe err id $ getArgIndex x
----------------------------------------------------------------------
-- GrammarEnv
-data PMCFGEnv = PMCFGEnv !ProdSet !FunSet
-type ProdSet = Set.Set Production
-type FunSet = Map.Map (UArray LIndex SeqId) FunId
+data PMCFGEnv = PMCFGEnv !ProdGroups !FunSet !FIdCache
+type ProdGroups = Map.Map (FId,FunId) ProdGroup
+type FunSet = Map.Map (UArray LIndex SeqId) FunId
+type FIdCache = Map.Map FIdKey FIdAlts
+
+newtype FIdAlts = FIdAlts (UArray Int FId)
+ deriving (Eq,Ord)
+
+newtype FIdKey = FIdKey [UArray Int FId]
+ deriving (Eq,Ord)
+
+-- Keep exact rectangles to preserve the old finalizer's duplicate and
+-- rectangle-area semantics, but store each argument list compactly.
+newtype Rectangle = Rectangle [FIdAlts]
+ deriving (Eq,Ord)
+
+data ProdGroup = ProdGroup
+ !(Set.Set Rectangle)
+ !(Maybe [IntSet.IntSet])
+ {-# UNPACK #-} !Int
emptyPMCFGEnv =
- PMCFGEnv Set.empty Map.empty
+ PMCFGEnv Map.empty Map.empty Map.empty
-addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> [[FId]] -> PMCFGEnv
-addFunction (PMCFGEnv prodSet funSet) !fid fun args =
+addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> Rectangle -> PMCFGEnv
+addFunction (PMCFGEnv prodGroups funSet fidCache) !fid fun rect =
case Map.lookup fun funSet of
- Just !funid -> PMCFGEnv (Set.insert (Production fid funid args) prodSet)
+ Just !funid -> PMCFGEnv (insertProduction fid funid rect prodGroups)
funSet
+ fidCache
Nothing -> let !funid = Map.size funSet
- in PMCFGEnv (Set.insert (Production fid funid args) prodSet)
+ in PMCFGEnv (insertProduction fid funid rect prodGroups)
(Map.insert fun funid funSet)
+ fidCache
getPMCFG :: PMCFGEnv -> PMCFG
-getPMCFG (PMCFGEnv prodSet funSet) =
- PMCFG (optimize prodSet) (mkSetArray funSet)
+getPMCFG (PMCFGEnv prodGroups funSet _) =
+ PMCFG (Map.foldrWithKey addGroup [] prodGroups) (mkSetArray funSet)
where
- optimize ps = Map.foldrWithKey ff [] (Map.fromListWith (++) [((fid,funid),[args]) | (Production fid funid args) <- Set.toList ps])
+ addGroup :: (FId,FunId) -> ProdGroup -> [Production] -> [Production]
+ addGroup (fid,funid) (ProdGroup rectangles mArgSets count) prods
+ | product (map IntSet.size argSets) == count
+ = Production fid funid (map IntSet.toList argSets) : prods
+ | otherwise = map (Production fid funid . unpackRectangle) (reverse (Set.toList rectangles)) ++ prods
where
- ff :: (FId,FunId) -> [[[FId]]] -> [Production] -> [Production]
- ff (fid,funid) xs prods
- | product (map IntSet.size ys) == count
- = (Production fid funid (map IntSet.toList ys)) : prods
- | otherwise = map (Production fid funid) xs ++ prods
- where
- count = sum (map (product . map length) xs)
- ys = foldl (zipWith (foldr IntSet.insert)) (repeat IntSet.empty) xs
+ argSets = case mArgSets of
+ Just argSets -> argSets
+ Nothing -> rectangleArgSets rectangles
+
+insertProduction :: FId -> FunId -> Rectangle -> ProdGroups -> ProdGroups
+insertProduction !fid !funid rect prodGroups =
+ Map.insert (fid,funid) group' prodGroups
+ where
+ group' =
+ case Map.lookup (fid,funid) prodGroups of
+ Nothing -> singletonProdGroup rect
+ Just group -> insertRectangle rect group
+
+singletonProdGroup :: Rectangle -> ProdGroup
+singletonProdGroup rect =
+ let !rects = Set.singleton rect
+ !argSets = rectangleArgSetsOne rect
+ !count = rectangleArea rect
+ in ProdGroup rects (Just argSets) count
+
+insertRectangle :: Rectangle -> ProdGroup -> ProdGroup
+insertRectangle rect group@(ProdGroup rectangles mArgSets count)
+ | Set.member rect rectangles
+ = group
+ | otherwise
+ = let !rectangles' = Set.insert rect rectangles
+ !mArgSets' = updateArgSets mArgSets rect
+ !count' = count + rectangleArea rect
+ in ProdGroup rectangles' mArgSets' count'
+ where
+ addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids
+
+ updateArgSets Nothing _ = Nothing
+ updateArgSets (Just argSets) (Rectangle args)
+ | length argSets == length args = let !argSets' = zipWithStrict addArgSet argSets args
+ in Just argSets'
+ | otherwise = Nothing
+
+rectangleArgSets :: Set.Set Rectangle -> [IntSet.IntSet]
+rectangleArgSets rectangles =
+ List.foldl' addRectangle (repeat IntSet.empty) (reverse (Set.toList rectangles))
+ where
+ addRectangle argSets (Rectangle args) = zipWith addArgSet argSets args
+ addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids
+
+rectangleArgSetsOne :: Rectangle -> [IntSet.IntSet]
+rectangleArgSetsOne (Rectangle args) =
+ mapStrict (foldFIdAlts (\s fid -> IntSet.insert fid s) IntSet.empty) args
+
+unpackRectangle :: Rectangle -> [[FId]]
+unpackRectangle (Rectangle args) = map fidAltsToList args
+
+rectangleArea :: Rectangle -> Int
+rectangleArea (Rectangle args) = product (map fidAltsSize args)
+
+getFIdAltsCached :: PMCFGEnv -> ProtoFCat -> (PMCFGEnv, FIdAlts)
+getFIdAltsCached env@(PMCFGEnv prodGroups funSet fidCache) pcat
+ | shouldCacheFIdKey key resultSize =
+ case Map.lookup key fidCache of
+ Just alts -> (env,alts)
+ Nothing -> let !alts = fIdAltsFromKeyWithSize key resultSize
+ !fidCache' = Map.insert key alts fidCache
+ in (PMCFGEnv prodGroups funSet fidCache',alts)
+ | otherwise =
+ let !alts = fIdAltsFromKeyWithSize key resultSize
+ in (env,alts)
+ where
+ !key = fIdKey pcat
+ !resultSize = fIdKeyResultSize key
+
+getSingleFIdCached :: PMCFGEnv -> ProtoFCat -> (PMCFGEnv, FId)
+getSingleFIdCached env pcat =
+ case getFIdAltsCached env pcat of
+ (env',alts) -> (env',expectSingleFId "getSingleFIdCached" alts)
+
+getRectangleCached :: PMCFGEnv -> [ProtoFCat] -> (PMCFGEnv, Rectangle)
+getRectangleCached env0 pcats =
+ let !(env,alts) = List.foldl' addAlt (env0,[]) pcats
+ !rect = Rectangle (reverse alts)
+ in (env,rect)
+ where
+ addAlt (env,alts) pcat =
+ let !(env',alt) = getFIdAltsCached env pcat
+ in (env',alt:alts)
+
+shouldCacheFIdKey :: FIdKey -> Int -> Bool
+shouldCacheFIdKey key resultSize =
+ fIdKeyComponents key > 1 &&
+ resultSize >= 8 &&
+ resultSize > fIdKeyComponentSizeSum key
+
+fIdAltsFromKey :: FIdKey -> FIdAlts
+fIdAltsFromKey key = fIdAltsFromKeyWithSize key (fIdKeyResultSize key)
+
+fIdAltsFromKeyWithSize :: FIdKey -> Int -> FIdAlts
+fIdAltsFromKeyWithSize key@(FIdKey comps) resultSize
+ | resultSize == 0 = FIdAlts (listArray (0,-1) [])
+ | resultSize == 1 = singletonFId (fIdKeySingleton key)
+ | otherwise = FIdAlts $ runSTUArray $ do
+ arr <- newArray_ (0,resultSize-1)
+ _ <- fillFIds arr 0 0 comps
+ return arr
+
+fillFIds :: STUArray s Int FId -> Int -> FId -> [UArray Int FId] -> ST s Int
+fillFIds arr !offset !sum [] = do
+ writeArray arr offset sum
+ return (offset+1)
+-- Components are ordered outer-to-inner. This must match the old
+-- reverse (solutions (variants schema) ()) ordering.
+fillFIds arr !offset !sum (choices:choices') =
+ foldUArrayM (\offset' choice -> fillFIds arr offset' (sum+choice) choices') offset choices
+
+foldUArrayM :: Monad m => (a -> FId -> m a) -> a -> UArray Int FId -> m a
+foldUArrayM f z arr = go (fst bnds) z
+ where
+ !bnds@(_,hi) = bounds arr
+ go !i !acc
+ | i > hi = return acc
+ | otherwise = do acc' <- f acc (arr ! i)
+ go (i+1) acc'
+
+fIdKeyResultSize :: FIdKey -> Int
+fIdKeyResultSize (FIdKey comps) = product (map arraySize comps)
+
+fIdKeyComponentSizeSum :: FIdKey -> Int
+fIdKeyComponentSizeSum (FIdKey comps) = sum (map arraySize comps)
+
+fIdKeyComponents :: FIdKey -> Int
+fIdKeyComponents (FIdKey comps) = length comps
+
+fIdKeySingleton :: FIdKey -> FId
+fIdKeySingleton (FIdKey comps) = List.foldl' addChoice 0 comps
+ where
+ addChoice :: FId -> UArray Int FId -> FId
+ addChoice acc choices
+ | arraySize choices == 1 = acc + choices ! fst (bounds choices)
+ | otherwise = bug "fIdKeySingleton: non-singleton key"
+
+singletonFId :: FId -> FIdAlts
+singletonFId fid = FIdAlts (listArray (0,0) [fid])
+
+fidAltsSize :: FIdAlts -> Int
+fidAltsSize (FIdAlts arr) = arraySize arr
+
+fidAltsIndex :: FIdAlts -> Int -> FId
+fidAltsIndex (FIdAlts arr) i = arr ! i
+
+expectSingleFId :: String -> FIdAlts -> FId
+expectSingleFId label alts
+ | fidAltsSize alts == 1 = fidAltsIndex alts 0
+ | otherwise = bug (label++": expected singleton category")
+
+fidAltsToList :: FIdAlts -> [FId]
+fidAltsToList (FIdAlts arr) = elems arr
+
+foldFIdAlts :: (a -> FId -> a) -> a -> FIdAlts -> a
+foldFIdAlts f z (FIdAlts arr) = go (fst bnds) z
+ where
+ !bnds@(_,hi) = bounds arr
+ go !i !acc
+ | i > hi = acc
+ | otherwise = let !acc' = f acc (arr ! i)
+ in go (i+1) acc'
+
+arraySize :: UArray Int FId -> Int
+arraySize arr = let !(lo,hi) = bounds arr
+ in max 0 (hi-lo+1)
+
+mapStrict :: (a -> b) -> [a] -> [b]
+mapStrict f [] = []
+mapStrict f (x:xs) = let !y = f x
+ !ys = mapStrict f xs
+ in y:ys
+
+zipWithStrict :: (a -> b -> c) -> [a] -> [b] -> [c]
+zipWithStrict f [] [] = []
+zipWithStrict f (x:xs) (y:ys) = let !z = f x y
+ !zs = zipWithStrict f xs ys
+ in z:zs
+zipWithStrict f _ _ = bug "zipWithStrict: inconsistent list lengths"
------------------------------------------------------------
-- updating the MCF rule