diff options
Diffstat (limited to 'src/compiler/GF')
| -rw-r--r-- | src/compiler/GF/Compile/GeneratePMCFG.hs | 130 |
1 files changed, 58 insertions, 72 deletions
diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 5a0359a91..c3f20a19e 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -315,7 +315,11 @@ getFIdAlts :: ProtoFCat -> FIdAlts getFIdAlts = fIdAltsFromFactors . fIdFactors getSingleFId :: ProtoFCat -> FId -getSingleFId = expectSingleFId "getSingleFId" . getFIdAlts +getSingleFId pcat = + let !factors = fIdFactors pcat + in case fIdFactorsResultSize factors of + 1 -> fIdFactorsSingleton factors + _ -> bug "getSingleFId: expected singleton category" #ifdef PMCFG_TEST_HOOKS pmcfgTestGetFIds :: TestSchema -> [FId] @@ -346,13 +350,13 @@ fIdFactors :: ProtoFCat -> FIdFactors fIdFactors (PFCat _ _ schema) = FIdFactors (collect schema) where - collect (CRec rs) = concatMap (\(lbl,Identity t) -> collect t) rs - collect (CTbl _ cs) = concatMap (\(trm,Identity t) -> collect t) cs + collect (CRec rs) = concatMap (\(_,Identity t) -> collect t) rs + collect (CTbl _ cs) = concatMap (\(_,Identity t) -> collect t) cs collect (CStr _) = [] collect (CPar (m,values)) = [weightedChoices m values] weightedChoices m values = - listArray (0,length values-1) [m*index | (value,index) <- values] + listArray (0,length values-1) [m*index | (_,index) <- values] catFactor :: ProtoFCat -> Int catFactor (PFCat _ f _) = f @@ -596,25 +600,28 @@ data PMCFGEnv = PMCFGEnv !ProdGroups !FunSet type ProdGroups = Map.Map (FId,FunId) ProdGroup type FunSet = Map.Map (UArray LIndex SeqId) FunId +-- FIdAlts arrays always use dense 0-based bounds, so their derived Ord matches +-- the old list ordering of alternatives. newtype FIdAlts = FIdAlts (UArray Int FId) deriving (Eq, Ord) +-- Factors are weighted parameter-choice arrays, in schema traversal order, +-- preserving duplicates. newtype FIdFactors = FIdFactors [UArray Int FId] --- Keep exact argument FId products to preserve the old finalizer's duplicate --- and product-area semantics, but store each argument list compactly. +-- Store the exact argument FId alternatives for one Production compactly. newtype ArgFIdProduct = ArgFIdProduct [FIdAlts] deriving (Eq, Ord) -- Accumulator type for Productions with the same result FId and function. --- The set keeps the exact distinct argument products. The optional IntSets --- record the per-argument union of FIds when all products have the same arity. --- The final Int stores areaSum, the sum of product sizes. A group can be --- emitted as one compressed Production exactly when the union area equals --- areaSum. +-- The Set matches the old ProdSet deduplication for argument products. The +-- IntSets cache the per-argument union of FIds. The final Int stores areaSum, +-- the sum of product sizes for distinct products. A group can be emitted as +-- one compressed Production exactly when the union area equals areaSum. All +-- products in a group must have the same argument arity. data ProdGroup = ProdGroup !(Set.Set ArgFIdProduct) - !(Maybe [IntSet.IntSet]) + ![IntSet.IntSet] {-# UNPACK #-} !Int emptyPMCFGEnv = @@ -622,26 +629,31 @@ emptyPMCFGEnv = addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> ArgFIdProduct -> PMCFGEnv addFunction (PMCFGEnv prodGroups funSet) !fid fun argProduct = - case Map.lookup fun funSet of - Just !funid -> PMCFGEnv (insertProduction fid funid argProduct prodGroups) funSet - Nothing -> let !funid = Map.size funSet - in PMCFGEnv (insertProduction fid funid argProduct prodGroups) - (Map.insert fun funid funSet) + let (!funid,!funSet') = + case Map.lookup fun funSet of + Just !funid -> (funid, funSet) + Nothing -> let !funid = Map.size funSet + in (funid, Map.insert fun funid funSet) + update Nothing = Just $! singletonProdGroup argProduct + update (Just group) = Just $! insertArgFIdProduct argProduct group + !prodGroups' = Map.alter update (fid,funid) prodGroups + in PMCFGEnv prodGroups' funSet' getPMCFG :: PMCFGEnv -> PMCFG getPMCFG (PMCFGEnv prodGroups funSet) = PMCFG (Map.foldrWithKey addGroup [] prodGroups) (mkSetArray funSet) where addGroup :: (FId,FunId) -> ProdGroup -> [Production] -> [Production] - addGroup (fid,funid) (ProdGroup products mArgSets areaSum) prods + addGroup (fid,funid) (ProdGroup products argSets areaSum) prods | product (map IntSet.size argSets) == areaSum = Production fid funid (map IntSet.toList argSets) : prods - -- We reverse the list for byte-to-byte equivalence with the previous grouping order. + -- The old implementation enumerated the global Production Set in ascending + -- order, then grouped with Map.fromListWith (++), reversing each group's + -- uncompressed products. Keep that order for stable serialized output. | otherwise = map (Production fid funid . unpackArgFIdProduct) (reverse (Set.toList products)) ++ prods where - argSets = case mArgSets of - Just argSets -> argSets - Nothing -> argFIdProductArgSets products + unpackArgFIdProduct :: ArgFIdProduct -> [[FId]] + unpackArgFIdProduct (ArgFIdProduct args) = map fidAltsToList args #ifdef PMCFG_TEST_HOOKS pmcfgTestBuildPMCFG :: [TestProduction] -> PMCFG @@ -655,59 +667,40 @@ pmcfgTestBuildPMCFG = fIdAltsFromList fids = FIdAlts (listArray (0,length fids-1) fids) #endif -insertProduction :: FId -> FunId -> ArgFIdProduct -> ProdGroups -> ProdGroups -insertProduction !fid !funid argProduct prodGroups = - Map.insert (fid,funid) group' prodGroups - where - group' = - case Map.lookup (fid,funid) prodGroups of - Nothing -> singletonProdGroup argProduct - Just group -> insertArgFIdProduct argProduct group - singletonProdGroup :: ArgFIdProduct -> ProdGroup -singletonProdGroup argProduct = +singletonProdGroup argProduct@(ArgFIdProduct args) = let !products = Set.singleton argProduct - !argSets = argFIdProductArgSetsOne argProduct + !argSets = fmap (insertFIdAlts IntSet.empty) args !areaSum = argFIdProductSize argProduct - in ProdGroup products (Just argSets) areaSum + in ProdGroup products argSets areaSum insertArgFIdProduct :: ArgFIdProduct -> ProdGroup -> ProdGroup -insertArgFIdProduct argProduct group@(ProdGroup products mArgSets areaSum) +insertArgFIdProduct argProduct group@(ProdGroup products argSets areaSum) | Set.member argProduct products = group | otherwise = let !products' = Set.insert argProduct products - !mArgSets' = updateArgSets mArgSets argProduct + !argSets' = updateArgSets argSets argProduct !areaSum' = areaSum + argFIdProductSize argProduct - in ProdGroup products' mArgSets' areaSum' - where - addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids - - updateArgSets Nothing _ = Nothing - updateArgSets (Just argSets) (ArgFIdProduct argFIds) - | length argSets == length argFIds = let !argSets' = zipWith addArgSet argSets argFIds - in Just argSets' - | otherwise = Nothing - -argFIdProductArgSets :: Set.Set ArgFIdProduct -> [IntSet.IntSet] -argFIdProductArgSets products = - List.foldl' addProduct (repeat IntSet.empty) (reverse (Set.toList products)) + in ProdGroup products' argSets' areaSum' where - addProduct argSets (ArgFIdProduct args) = zipWith addArgSet argSets args - addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids + addArgSet argSet fids = insertFIdAlts argSet fids -argFIdProductArgSetsOne :: ArgFIdProduct -> [IntSet.IntSet] -argFIdProductArgSetsOne (ArgFIdProduct args) = - fmap (foldFIdAlts (\s fid -> IntSet.insert fid s) IntSet.empty) args + updateArgSets argSets (ArgFIdProduct argFIds) = go argSets argFIds + where + go [] [] = [] + go (argSet:argSets) (argFIds:argFIds') = + addArgSet argSet argFIds : go argSets argFIds' + go _ _ = bug "insertArgFIdProduct: arity mismatch" -unpackArgFIdProduct :: ArgFIdProduct -> [[FId]] -unpackArgFIdProduct (ArgFIdProduct args) = map fidAltsToList args +insertFIdAlts :: IntSet.IntSet -> FIdAlts -> IntSet.IntSet +insertFIdAlts = foldFIdAlts (\s fid -> IntSet.insert fid s) argFIdProductSize :: ArgFIdProduct -> Int argFIdProductSize (ArgFIdProduct args) = product (map fidAltsSize args) getArgFIdProduct :: [ProtoFCat] -> ArgFIdProduct -getArgFIdProduct pcats = ArgFIdProduct (fmap getFIdAlts pcats) +getArgFIdProduct = ArgFIdProduct . map getFIdAlts fIdAltsFromFactors :: FIdFactors -> FIdAlts fIdAltsFromFactors factors@(FIdFactors comps) @@ -720,14 +713,15 @@ fIdAltsFromFactors factors@(FIdFactors comps) where !resultSize = fIdFactorsResultSize factors -fillFIds :: STUArray s Int FId -> Int -> FId -> [UArray Int FId] -> ST s Int -fillFIds arr !offset !sum [] = do - writeArray arr offset sum - return (offset+1) -- Components are ordered outer-to-inner. This must match the old --- reverse (solutions (variants schema) ()) ordering. -fillFIds arr !offset !sum (choices:choices') = - foldUArrayM (\offset' choice -> fillFIds arr offset' (sum+choice) choices') offset choices +-- reverse (solutions (variants schema) ()) ordering, where the last component +-- varies fastest. +fillFIds :: STUArray s Int FId -> Int -> FId -> [UArray Int FId] -> ST s Int +fillFIds arr !offset !fidAcc [] = do + writeArray arr offset fidAcc + return (offset + 1) +fillFIds arr !offset !fidAcc (factor : factors) = + foldUArrayM (\offset' fidDelta -> fillFIds arr offset' (fidAcc + fidDelta) factors) offset factor foldUArrayM :: Monad m => (a -> FId -> m a) -> a -> UArray Int FId -> m a foldUArrayM f z arr = go (fst bnds) z @@ -755,14 +749,6 @@ singletonFId fid = FIdAlts (listArray (0,0) [fid]) fidAltsSize :: FIdAlts -> Int fidAltsSize (FIdAlts arr) = rangeSize (bounds arr) -fidAltsIndex :: FIdAlts -> Int -> FId -fidAltsIndex (FIdAlts arr) i = arr ! i - -expectSingleFId :: String -> FIdAlts -> FId -expectSingleFId label alts - | fidAltsSize alts == 1 = fidAltsIndex alts 0 - | otherwise = bug (label++": expected singleton category") - fidAltsToList :: FIdAlts -> [FId] fidAltsToList (FIdAlts arr) = elems arr |
