From 3cc01b9d311c7a9f86fbf2fa8c2d66921f9ba030 Mon Sep 17 00:00:00 2001 From: adelon <22380201+adelon@users.noreply.github.com> Date: Sun, 17 May 2026 20:37:48 +0200 Subject: Benchmarks, initial sketches --- src/compiler/GF/Compile/GeneratePMCFG.hs | 282 +++++++++++++++++++++++++++---- 1 file changed, 246 insertions(+), 36 deletions(-) (limited to 'src/compiler/GF/Compile/GeneratePMCFG.hs') diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 74615dc98..e483911d1 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -34,10 +34,12 @@ import qualified Data.IntSet as IntSet import GF.Text.Pretty import Data.Array.IArray import Data.Array.Unboxed +import Data.Array.ST --import Data.Maybe --import Data.Char (isDigit) import Control.Applicative(Applicative(..)) import Control.Monad +import Control.Monad.ST (ST) import Control.Monad.Identity --import Control.Exception --import Debug.Trace(trace) @@ -98,10 +100,10 @@ addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncFun mty@(Just (cat,cont (ctxt,res,_) = err bug typeForm (lookupFunType gr am id) addRule lins (newCat', newArgs') env0 = - let [newCat] = getFIds newCat' - !fun = mkArray lins - newArgs = map getFIds newArgs' - in addFunction env0 newCat fun newArgs + let (env1,newCat) = getSingleFIdCached env0 newCat' + !fun = mkArray lins + (env2,rect) = getRectangleCached env1 newArgs' + in addFunction env2 newCat fun rect addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ lincat)) mdef@(Just (L loc1 def)) @@ -135,14 +137,16 @@ addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ linc seqs2 `seq` pmcfg `seq` return (seqs2,GF.Grammar.CncCat mty mdef mref mprn (Just pmcfg)) where addLindef lins (newCat', newArgs') env0 = - let [newCat] = getFIds newCat' - !fun = mkArray lins - in addFunction env0 newCat fun [[fidVar]] + let (env1,newCat) = getSingleFIdCached env0 newCat' + !fun = mkArray lins + !rect = Rectangle [singletonFId fidVar] + in addFunction env1 newCat fun rect addLinref lins (newCat', [newArg']) env0 = - let newArg = getFIds newArg' - !fun = mkArray lins - in addFunction env0 fidVar fun [newArg] + let (env1,newArg) = getFIdAltsCached env0 newArg' + !fun = mkArray lins + !rect = Rectangle [newArg] + in addFunction env1 fidVar fun rect addPMCFG opts gr cenv opath am cm seqs id info = return (seqs, info) @@ -302,14 +306,25 @@ protoFCat gr cat lincat = ((_,f),schema) -> PFCat (snd cat) f schema getFIds :: ProtoFCat -> [FId] -getFIds (PFCat _ _ schema) = - reverse (solutions (variants schema) ()) +getFIds = fidAltsToList . getFIdAlts + +getFIdAlts :: ProtoFCat -> FIdAlts +getFIdAlts = fIdAltsFromKey . fIdKey + +getSingleFId :: ProtoFCat -> FId +getSingleFId = expectSingleFId "getSingleFId" . getFIdAlts + +fIdKey :: ProtoFCat -> FIdKey +fIdKey (PFCat _ _ schema) = + FIdKey (collect schema) where - variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs - variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs - variants (CStr _) = return 0 - variants (CPar (m,values)) = do (value,index) <- member values - return (m*index) + collect (CRec rs) = concatMap (\(lbl,Identity t) -> collect t) rs + collect (CTbl _ cs) = concatMap (\(trm,Identity t) -> collect t) cs + collect (CStr _) = [] + collect (CPar (m,values)) = [weightedChoices m values] + + weightedChoices m values = + listArray (0,length values-1) [m*index | (value,index) <- values] catFactor :: ProtoFCat -> Int catFactor (PFCat _ f _) = f @@ -549,36 +564,231 @@ getVarIndex x = maybe err id $ getArgIndex x ---------------------------------------------------------------------- -- GrammarEnv -data PMCFGEnv = PMCFGEnv !ProdSet !FunSet -type ProdSet = Set.Set Production -type FunSet = Map.Map (UArray LIndex SeqId) FunId +data PMCFGEnv = PMCFGEnv !ProdGroups !FunSet !FIdCache +type ProdGroups = Map.Map (FId,FunId) ProdGroup +type FunSet = Map.Map (UArray LIndex SeqId) FunId +type FIdCache = Map.Map FIdKey FIdAlts + +newtype FIdAlts = FIdAlts (UArray Int FId) + deriving (Eq,Ord) + +newtype FIdKey = FIdKey [UArray Int FId] + deriving (Eq,Ord) + +-- Keep exact rectangles to preserve the old finalizer's duplicate and +-- rectangle-area semantics, but store each argument list compactly. +newtype Rectangle = Rectangle [FIdAlts] + deriving (Eq,Ord) + +data ProdGroup = ProdGroup + !(Set.Set Rectangle) + !(Maybe [IntSet.IntSet]) + {-# UNPACK #-} !Int emptyPMCFGEnv = - PMCFGEnv Set.empty Map.empty + PMCFGEnv Map.empty Map.empty Map.empty -addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> [[FId]] -> PMCFGEnv -addFunction (PMCFGEnv prodSet funSet) !fid fun args = +addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> Rectangle -> PMCFGEnv +addFunction (PMCFGEnv prodGroups funSet fidCache) !fid fun rect = case Map.lookup fun funSet of - Just !funid -> PMCFGEnv (Set.insert (Production fid funid args) prodSet) + Just !funid -> PMCFGEnv (insertProduction fid funid rect prodGroups) funSet + fidCache Nothing -> let !funid = Map.size funSet - in PMCFGEnv (Set.insert (Production fid funid args) prodSet) + in PMCFGEnv (insertProduction fid funid rect prodGroups) (Map.insert fun funid funSet) + fidCache getPMCFG :: PMCFGEnv -> PMCFG -getPMCFG (PMCFGEnv prodSet funSet) = - PMCFG (optimize prodSet) (mkSetArray funSet) +getPMCFG (PMCFGEnv prodGroups funSet _) = + PMCFG (Map.foldrWithKey addGroup [] prodGroups) (mkSetArray funSet) where - optimize ps = Map.foldrWithKey ff [] (Map.fromListWith (++) [((fid,funid),[args]) | (Production fid funid args) <- Set.toList ps]) + addGroup :: (FId,FunId) -> ProdGroup -> [Production] -> [Production] + addGroup (fid,funid) (ProdGroup rectangles mArgSets count) prods + | product (map IntSet.size argSets) == count + = Production fid funid (map IntSet.toList argSets) : prods + | otherwise = map (Production fid funid . unpackRectangle) (reverse (Set.toList rectangles)) ++ prods where - ff :: (FId,FunId) -> [[[FId]]] -> [Production] -> [Production] - ff (fid,funid) xs prods - | product (map IntSet.size ys) == count - = (Production fid funid (map IntSet.toList ys)) : prods - | otherwise = map (Production fid funid) xs ++ prods - where - count = sum (map (product . map length) xs) - ys = foldl (zipWith (foldr IntSet.insert)) (repeat IntSet.empty) xs + argSets = case mArgSets of + Just argSets -> argSets + Nothing -> rectangleArgSets rectangles + +insertProduction :: FId -> FunId -> Rectangle -> ProdGroups -> ProdGroups +insertProduction !fid !funid rect prodGroups = + Map.insert (fid,funid) group' prodGroups + where + group' = + case Map.lookup (fid,funid) prodGroups of + Nothing -> singletonProdGroup rect + Just group -> insertRectangle rect group + +singletonProdGroup :: Rectangle -> ProdGroup +singletonProdGroup rect = + let !rects = Set.singleton rect + !argSets = rectangleArgSetsOne rect + !count = rectangleArea rect + in ProdGroup rects (Just argSets) count + +insertRectangle :: Rectangle -> ProdGroup -> ProdGroup +insertRectangle rect group@(ProdGroup rectangles mArgSets count) + | Set.member rect rectangles + = group + | otherwise + = let !rectangles' = Set.insert rect rectangles + !mArgSets' = updateArgSets mArgSets rect + !count' = count + rectangleArea rect + in ProdGroup rectangles' mArgSets' count' + where + addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids + + updateArgSets Nothing _ = Nothing + updateArgSets (Just argSets) (Rectangle args) + | length argSets == length args = let !argSets' = zipWithStrict addArgSet argSets args + in Just argSets' + | otherwise = Nothing + +rectangleArgSets :: Set.Set Rectangle -> [IntSet.IntSet] +rectangleArgSets rectangles = + List.foldl' addRectangle (repeat IntSet.empty) (reverse (Set.toList rectangles)) + where + addRectangle argSets (Rectangle args) = zipWith addArgSet argSets args + addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids + +rectangleArgSetsOne :: Rectangle -> [IntSet.IntSet] +rectangleArgSetsOne (Rectangle args) = + mapStrict (foldFIdAlts (\s fid -> IntSet.insert fid s) IntSet.empty) args + +unpackRectangle :: Rectangle -> [[FId]] +unpackRectangle (Rectangle args) = map fidAltsToList args + +rectangleArea :: Rectangle -> Int +rectangleArea (Rectangle args) = product (map fidAltsSize args) + +getFIdAltsCached :: PMCFGEnv -> ProtoFCat -> (PMCFGEnv, FIdAlts) +getFIdAltsCached env@(PMCFGEnv prodGroups funSet fidCache) pcat + | shouldCacheFIdKey key resultSize = + case Map.lookup key fidCache of + Just alts -> (env,alts) + Nothing -> let !alts = fIdAltsFromKeyWithSize key resultSize + !fidCache' = Map.insert key alts fidCache + in (PMCFGEnv prodGroups funSet fidCache',alts) + | otherwise = + let !alts = fIdAltsFromKeyWithSize key resultSize + in (env,alts) + where + !key = fIdKey pcat + !resultSize = fIdKeyResultSize key + +getSingleFIdCached :: PMCFGEnv -> ProtoFCat -> (PMCFGEnv, FId) +getSingleFIdCached env pcat = + case getFIdAltsCached env pcat of + (env',alts) -> (env',expectSingleFId "getSingleFIdCached" alts) + +getRectangleCached :: PMCFGEnv -> [ProtoFCat] -> (PMCFGEnv, Rectangle) +getRectangleCached env0 pcats = + let !(env,alts) = List.foldl' addAlt (env0,[]) pcats + !rect = Rectangle (reverse alts) + in (env,rect) + where + addAlt (env,alts) pcat = + let !(env',alt) = getFIdAltsCached env pcat + in (env',alt:alts) + +shouldCacheFIdKey :: FIdKey -> Int -> Bool +shouldCacheFIdKey key resultSize = + fIdKeyComponents key > 1 && + resultSize >= 8 && + resultSize > fIdKeyComponentSizeSum key + +fIdAltsFromKey :: FIdKey -> FIdAlts +fIdAltsFromKey key = fIdAltsFromKeyWithSize key (fIdKeyResultSize key) + +fIdAltsFromKeyWithSize :: FIdKey -> Int -> FIdAlts +fIdAltsFromKeyWithSize key@(FIdKey comps) resultSize + | resultSize == 0 = FIdAlts (listArray (0,-1) []) + | resultSize == 1 = singletonFId (fIdKeySingleton key) + | otherwise = FIdAlts $ runSTUArray $ do + arr <- newArray_ (0,resultSize-1) + _ <- fillFIds arr 0 0 comps + return arr + +fillFIds :: STUArray s Int FId -> Int -> FId -> [UArray Int FId] -> ST s Int +fillFIds arr !offset !sum [] = do + writeArray arr offset sum + return (offset+1) +-- Components are ordered outer-to-inner. This must match the old +-- reverse (solutions (variants schema) ()) ordering. +fillFIds arr !offset !sum (choices:choices') = + foldUArrayM (\offset' choice -> fillFIds arr offset' (sum+choice) choices') offset choices + +foldUArrayM :: Monad m => (a -> FId -> m a) -> a -> UArray Int FId -> m a +foldUArrayM f z arr = go (fst bnds) z + where + !bnds@(_,hi) = bounds arr + go !i !acc + | i > hi = return acc + | otherwise = do acc' <- f acc (arr ! i) + go (i+1) acc' + +fIdKeyResultSize :: FIdKey -> Int +fIdKeyResultSize (FIdKey comps) = product (map arraySize comps) + +fIdKeyComponentSizeSum :: FIdKey -> Int +fIdKeyComponentSizeSum (FIdKey comps) = sum (map arraySize comps) + +fIdKeyComponents :: FIdKey -> Int +fIdKeyComponents (FIdKey comps) = length comps + +fIdKeySingleton :: FIdKey -> FId +fIdKeySingleton (FIdKey comps) = List.foldl' addChoice 0 comps + where + addChoice :: FId -> UArray Int FId -> FId + addChoice acc choices + | arraySize choices == 1 = acc + choices ! fst (bounds choices) + | otherwise = bug "fIdKeySingleton: non-singleton key" + +singletonFId :: FId -> FIdAlts +singletonFId fid = FIdAlts (listArray (0,0) [fid]) + +fidAltsSize :: FIdAlts -> Int +fidAltsSize (FIdAlts arr) = arraySize arr + +fidAltsIndex :: FIdAlts -> Int -> FId +fidAltsIndex (FIdAlts arr) i = arr ! i + +expectSingleFId :: String -> FIdAlts -> FId +expectSingleFId label alts + | fidAltsSize alts == 1 = fidAltsIndex alts 0 + | otherwise = bug (label++": expected singleton category") + +fidAltsToList :: FIdAlts -> [FId] +fidAltsToList (FIdAlts arr) = elems arr + +foldFIdAlts :: (a -> FId -> a) -> a -> FIdAlts -> a +foldFIdAlts f z (FIdAlts arr) = go (fst bnds) z + where + !bnds@(_,hi) = bounds arr + go !i !acc + | i > hi = acc + | otherwise = let !acc' = f acc (arr ! i) + in go (i+1) acc' + +arraySize :: UArray Int FId -> Int +arraySize arr = let !(lo,hi) = bounds arr + in max 0 (hi-lo+1) + +mapStrict :: (a -> b) -> [a] -> [b] +mapStrict f [] = [] +mapStrict f (x:xs) = let !y = f x + !ys = mapStrict f xs + in y:ys + +zipWithStrict :: (a -> b -> c) -> [a] -> [b] -> [c] +zipWithStrict f [] [] = [] +zipWithStrict f (x:xs) (y:ys) = let !z = f x y + !zs = zipWithStrict f xs ys + in z:zs +zipWithStrict f _ _ = bug "zipWithStrict: inconsistent list lengths" ------------------------------------------------------------ -- updating the MCF rule -- cgit v1.2.3