diff options
| author | adelon <22380201+adelon@users.noreply.github.com> | 2026-05-17 20:37:48 +0200 |
|---|---|---|
| committer | adelon <22380201+adelon@users.noreply.github.com> | 2026-05-17 20:37:48 +0200 |
| commit | 3cc01b9d311c7a9f86fbf2fa8c2d66921f9ba030 (patch) | |
| tree | 5b979361c2d6b8ba19ef65345f1f20cf56059fb8 | |
| parent | 1c086bed25811db1cf71990fb2eeca023e62c060 (diff) | |
Benchmarks, initial sketches
| -rw-r--r-- | .gitignore | 7 | ||||
| -rw-r--r-- | Makefile | 94 | ||||
| -rw-r--r-- | bench/compiler/Main.hs | 176 | ||||
| -rw-r--r-- | gf.cabal | 26 | ||||
| -rw-r--r-- | src/compiler/GF/Compile/GeneratePMCFG.hs | 282 | ||||
| -rw-r--r-- | src/compiler/GF/Compile/GeneratePmcfgPre.hs | 640 | ||||
| -rw-r--r-- | src/compiler/GF/Compile/GrammarToPGF.hs | 6 | ||||
| -rw-r--r-- | src/compiler/GF/CompileOne.hs | 5 | ||||
| -rw-r--r-- | stack-ghc9.6.7.yaml | 9 | ||||
| -rw-r--r-- | stack.yaml | 9 |
10 files changed, 1216 insertions, 38 deletions
diff --git a/.gitignore b/.gitignore index 93d660fcc..44475cab1 100644 --- a/.gitignore +++ b/.gitignore @@ -78,4 +78,9 @@ src/www/gf-web-api.html result .vscode .envrc -.pre-commit-config.yaml
\ No newline at end of file +.pre-commit-config.yaml + +# Profiling +profile/ +.stack-work-profile/ +.stack-work-profile-pre-pmcfg/ @@ -1,4 +1,6 @@ .PHONY: all build install doc clean html deb pkg bintar sdist +.PHONY: profile-build profile-rgl-time profile-rgl-memory profile-rgl-clean +.PHONY: bench-build bench-rgl bench-rgl-pre-pmcfg bench-rgl-clean # This gets the numeric part of the version from the cabal file VERSION=$(shell sed -ne "s/^version: *\([0-9.]*\).*/\1/p" gf.cabal) @@ -18,6 +20,51 @@ else endif endif +# Profiling configuration. +# +# The profiling targets intentionally use Stack even when the normal build falls +# back to cabal, because Stack's --work-dir makes it easy to keep profiled and +# unprofiled build products separate. The default RGL target is German, which +# is complex and a good compiler stress test; override +# RGL_PROFILE_MODULES or RGL_PROFILE_GF_OPTIONS for different experiments. +PROFILE_STACK_WORK ?= .stack-work-profile +PROFILE_GHC_OPTIONS ?= -fprof-auto -fprof-cafs +PROFILE_DIR ?= $(CURDIR)/profile + +RGL_DIR ?= ../gf-rgl +RGL_PROFILE_DIR ?= $(PROFILE_DIR)/rgl +RGL_PROFILE_GFO_DIR ?= $(RGL_PROFILE_DIR)/gfo +RGL_PROFILE_PGF_DIR ?= $(RGL_PROFILE_DIR)/pgf +RGL_PROFILE_MODULES ?= $(RGL_DIR)/src/german/LangGer.gf +RGL_PROFILE_IMPORT_DIRS ?= $(RGL_DIR)/src/api +RGL_PROFILE_IMPORT_OPTIONS = $(foreach dir,$(RGL_PROFILE_IMPORT_DIRS),-i $(dir)) +RGL_PROFILE_GF_OPTIONS ?= --make --quiet --src --preproc=mkPresent --gf-lib-path=$(RGL_PROFILE_GFO_DIR) --gfo-dir=$(RGL_PROFILE_GFO_DIR) --output-dir=$(RGL_PROFILE_PGF_DIR) --name=RGLProfile $(RGL_PROFILE_IMPORT_OPTIONS) +RGL_PROFILE_GF = stack --stack-yaml $(CURDIR)/stack.yaml --work-dir $(PROFILE_STACK_WORK) exec --profile gf -- + +RGL_TIME_STEM ?= $(RGL_PROFILE_DIR)/rgl-time +RGL_MEMORY_STEM ?= $(RGL_PROFILE_DIR)/rgl-memory +RGL_MEMORY_RTS ?= -hc +RGL_HEAP_SAMPLE_INTERVAL ?= 0.1 + +BENCH_STACK_WORK ?= $(PROFILE_STACK_WORK) +BENCH_PROFILE_FLAGS ?= --profile +BENCH_GHC_OPTIONS ?= $(PROFILE_GHC_OPTIONS) +BENCH_TARGET ?= gf:bench:gf-compiler-bench +BENCH_TASTY_OPTIONS ?= +BENCH_PRE_PMCFG_FLAG ?= --flag gf:pre-pmcfg +BENCH_PRE_PMCFG_STACK_WORK ?= .stack-work-profile-pre-pmcfg + +RGL_BENCH_DIR ?= $(PROFILE_DIR)/rgl-bench +RGL_BENCH_GFO_DIR ?= $(RGL_BENCH_DIR)/gfo +RGL_BENCH_PGF_DIR ?= $(RGL_BENCH_DIR)/pgf +RGL_BENCH_NAME ?= RGLBench +RGL_BENCH_MODULES ?= +RGL_BENCH_IMPORT_DIRS ?= $(RGL_PROFILE_IMPORT_DIRS) $(RGL_DIR)/src/prelude +RGL_BENCH_PRE_PMCFG_DIR ?= $(PROFILE_DIR)/rgl-bench-pre-pmcfg +RGL_BENCH_PRE_PMCFG_NAME ?= RGLBenchPrePmcfg +BENCH_RTS_OPTIONS ?= +RTS -T -RTS +BENCH_STACK = stack --stack-yaml $(CURDIR)/stack.yaml --work-dir $(BENCH_STACK_WORK) + all: build dist/setup-config: gf.cabal Setup.hs WebSetup.hs @@ -28,6 +75,53 @@ endif build: dist/setup-config ${CMD} ${CMD_PFX}build +profile-build: + stack --work-dir $(PROFILE_STACK_WORK) build --profile --ghc-options "$(PROFILE_GHC_OPTIONS)" + +profile-rgl-time: profile-build + @test -n "$(RGL_PROFILE_MODULES)" || { echo "No RGL modules found. Set RGL_DIR or RGL_PROFILE_MODULES."; exit 1; } + @command -v ghc-prof-flamegraph >/dev/null || { echo "Missing ghc-prof-flamegraph."; exit 1; } + rm -rf $(RGL_PROFILE_GFO_DIR) $(RGL_PROFILE_PGF_DIR) + mkdir -p $(RGL_PROFILE_DIR) $(RGL_PROFILE_GFO_DIR) $(RGL_PROFILE_PGF_DIR) + rm -f $(RGL_TIME_STEM).prof $(RGL_TIME_STEM).svg $(RGL_TIME_STEM).log + $(RGL_PROFILE_GF) $(RGL_PROFILE_GF_OPTIONS) $(RGL_PROFILE_MODULES) +RTS -s -p -po$(RGL_TIME_STEM) -RTS > $(RGL_TIME_STEM).log 2>&1 || { tail -n 80 $(RGL_TIME_STEM).log; exit 1; } + ghc-prof-flamegraph $(RGL_TIME_STEM).prof --output $(RGL_TIME_STEM).svg + +profile-rgl-memory: profile-build + @test -n "$(RGL_PROFILE_MODULES)" || { echo "No RGL modules found. Set RGL_DIR or RGL_PROFILE_MODULES."; exit 1; } + @command -v hp2ps >/dev/null || { echo "Missing hp2ps."; exit 1; } + rm -rf $(RGL_PROFILE_GFO_DIR) $(RGL_PROFILE_PGF_DIR) + mkdir -p $(RGL_PROFILE_DIR) $(RGL_PROFILE_GFO_DIR) $(RGL_PROFILE_PGF_DIR) + rm -f $(RGL_MEMORY_STEM).hp $(RGL_MEMORY_STEM).ps $(RGL_MEMORY_STEM).aux $(RGL_MEMORY_STEM).tmp.ps $(RGL_MEMORY_STEM).log + $(RGL_PROFILE_GF) $(RGL_PROFILE_GF_OPTIONS) $(RGL_PROFILE_MODULES) +RTS -s $(RGL_MEMORY_RTS) -i$(RGL_HEAP_SAMPLE_INTERVAL) -po$(RGL_MEMORY_STEM) -RTS > $(RGL_MEMORY_STEM).log 2>&1 || { tail -n 80 $(RGL_MEMORY_STEM).log; exit 1; } + cd $(dir $(RGL_MEMORY_STEM)) && hp2ps $(notdir $(RGL_MEMORY_STEM)).hp + @if command -v gs >/dev/null 2>&1; then \ + gs -q -dNOPAUSE -dBATCH -sOutputFile=$(RGL_MEMORY_STEM).tmp.ps -sDEVICE=ps2write -c "<</Orientation 1>> setpagedevice" -- $(RGL_MEMORY_STEM).ps && mv $(RGL_MEMORY_STEM).tmp.ps $(RGL_MEMORY_STEM).ps; \ + else \ + echo "Missing gs; leaving $(RGL_MEMORY_STEM).ps without orientation fix."; \ + fi + +profile-rgl-clean: + rm -rf $(RGL_PROFILE_DIR) + +bench-build: + $(BENCH_STACK) build $(BENCH_TARGET) --bench --no-run-benchmarks $(BENCH_PROFILE_FLAGS) --ghc-options "$(BENCH_GHC_OPTIONS)" + +bench-rgl: bench-build + @test -d "$(RGL_DIR)" || { echo "Missing RGL_DIR: $(RGL_DIR)"; exit 1; } + rm -rf $(RGL_BENCH_GFO_DIR) $(RGL_BENCH_PGF_DIR) + mkdir -p $(RGL_BENCH_GFO_DIR) $(RGL_BENCH_PGF_DIR) + GF_BENCH_RGL_DIR="$(RGL_DIR)" GF_BENCH_NAME="$(RGL_BENCH_NAME)" GF_BENCH_GFO_DIR="$(RGL_BENCH_GFO_DIR)" GF_BENCH_PGF_DIR="$(RGL_BENCH_PGF_DIR)" GF_BENCH_MODULES="$(RGL_BENCH_MODULES)" GF_BENCH_IMPORT_DIRS="$(RGL_BENCH_IMPORT_DIRS)" $(BENCH_STACK) bench $(BENCH_TARGET) $(BENCH_PROFILE_FLAGS) --ghc-options "$(BENCH_GHC_OPTIONS)" --benchmark-arguments "$(BENCH_TASTY_OPTIONS) $(BENCH_RTS_OPTIONS)"; \ + status=$$?; \ + if test -f gf-compiler-bench.prof; then mv gf-compiler-bench.prof "$(RGL_BENCH_DIR)/gf-compiler-bench.prof"; fi; \ + exit $$status + +bench-rgl-pre-pmcfg: + $(MAKE) bench-rgl BENCH_STACK_WORK="$(BENCH_PRE_PMCFG_STACK_WORK)" RGL_BENCH_DIR="$(RGL_BENCH_PRE_PMCFG_DIR)" RGL_BENCH_NAME="$(RGL_BENCH_PRE_PMCFG_NAME)" BENCH_PROFILE_FLAGS="$(BENCH_PROFILE_FLAGS) $(BENCH_PRE_PMCFG_FLAG)" + +bench-rgl-clean: + rm -rf $(RGL_BENCH_DIR) $(RGL_BENCH_PRE_PMCFG_DIR) + install: ifeq ($(STACK),1) stack install diff --git a/bench/compiler/Main.hs b/bench/compiler/Main.hs new file mode 100644 index 000000000..a56a89848 --- /dev/null +++ b/bench/compiler/Main.hs @@ -0,0 +1,176 @@ +module Main where + +import qualified GF +import Test.Tasty.Bench + +import Control.Exception (catch) +import Control.Monad (filterM, unless, when) +import Data.Char (isAlphaNum, toLower, toUpper) +import System.Directory + ( createDirectoryIfMissing + , doesDirectoryExist + , doesFileExist + , getCurrentDirectory + , removeFile + ) +import System.Environment (lookupEnv, withArgs) +import System.FilePath ((</>), (<.>), splitSearchPath) +import System.FilePath (takeBaseName) +import System.IO.Error (isDoesNotExistError) + +data BenchConfig = BenchConfig + { benchLabel :: String + , benchOutputName :: String + , benchGfoDir :: FilePath + , benchPgfDir :: FilePath + , benchModules :: [FilePath] + , benchImportDirs :: [FilePath] + } + +main :: IO () +main = do + configs <- readBenchConfigs + mapM_ validateBenchConfig configs + mapM_ prepareOutputDirs configs + defaultMain + [ bgroup "GF compiler" + [ bench (benchLabel config) $ whnfIO (runCompiler config) + | config <- configs + ] + ] + +readBenchConfigs :: IO [BenchConfig] +readBenchConfigs = do + cwd <- getCurrentDirectory + rglDir <- lookupEnvDefault "GF_BENCH_RGL_DIR" (".." </> "gf-rgl") + let benchDir = cwd </> "profile" </> "rgl-bench" + defaultImportDirs = [rglDir </> "src" </> "api"] + outputName <- lookupEnvDefault "GF_BENCH_NAME" "RGLBench" + gfoDir <- lookupEnvDefault "GF_BENCH_GFO_DIR" (benchDir </> "gfo") + pgfDir <- lookupEnvDefault "GF_BENCH_PGF_DIR" (benchDir </> "pgf") + importDirs <- lookupEnvList "GF_BENCH_IMPORT_DIRS" defaultImportDirs + customModules <- lookupEnvList "GF_BENCH_MODULES" [] + return $ + if null customModules + then map (defaultBenchConfig rglDir outputName gfoDir pgfDir importDirs) + defaultRGLBenchmarks + else customBenchConfigs outputName gfoDir pgfDir importDirs customModules + +defaultRGLBenchmarks :: [(String, String, FilePath)] +defaultRGLBenchmarks = + [ ("English", "english", "english" </> "LangEng.gf") + , ("German", "german", "german" </> "LangGer.gf") + , ("French", "french", "french" </> "LangFre.gf") + , ("Russian", "russian", "russian" </> "LangRus.gf") + , ("Finnish", "finnish", "finnish" </> "LangFin.gf") + ] + +defaultBenchConfig + :: FilePath -> String -> FilePath -> FilePath -> [FilePath] + -> (String, String, FilePath) + -> BenchConfig +defaultBenchConfig rglDir outputName gfoDir pgfDir importDirs (language, slug, modulePath) = + BenchConfig + { benchLabel = "RGL " ++ language ++ " compile/link" + , benchOutputName = outputNameFor outputName slug + , benchGfoDir = gfoDir </> slug + , benchPgfDir = pgfDir </> slug + , benchModules = [rglDir </> "src" </> modulePath] + , benchImportDirs = importDirs + } + +customBenchConfigs :: String -> FilePath -> FilePath -> [FilePath] -> [FilePath] -> [BenchConfig] +customBenchConfigs outputName gfoDir pgfDir importDirs modules = + [ BenchConfig + { benchLabel = "RGL " ++ takeBaseName modulePath ++ " compile/link" + , benchOutputName = caseOutputName modulePath + , benchGfoDir = caseDir gfoDir modulePath + , benchPgfDir = caseDir pgfDir modulePath + , benchModules = [modulePath] + , benchImportDirs = importDirs + } + | modulePath <- modules + ] + where + multiple = length modules > 1 + + caseOutputName modulePath + | multiple = outputNameFor outputName (takeBaseName modulePath) + | otherwise = outputName + + caseDir dir modulePath + | multiple = dir </> slugify (takeBaseName modulePath) + | otherwise = dir + +lookupEnvDefault :: String -> String -> IO String +lookupEnvDefault key fallback = do + value <- lookupEnv key + return $ case value of + Just xs | not (null xs) -> xs + _ -> fallback + +lookupEnvList :: String -> [FilePath] -> IO [FilePath] +lookupEnvList key fallback = do + value <- lookupEnv key + return $ case value of + Just xs | not (null xs) -> parsePathList xs + _ -> fallback + +parsePathList :: String -> [FilePath] +parsePathList = + filter (not . null) . concatMap splitSearchPath . words + +validateBenchConfig :: BenchConfig -> IO () +validateBenchConfig config = do + when (null (benchModules config)) $ + fail "GF_BENCH_MODULES did not contain any input modules." + missingModules <- filterM (fmap not . doesFileExist) (benchModules config) + unless (null missingModules) $ + fail $ "Missing GF benchmark module(s): " ++ unwords missingModules + missingImportDirs <- filterM (fmap not . doesDirectoryExist) (benchImportDirs config) + unless (null missingImportDirs) $ + fail $ "Missing GF benchmark import dir(s): " ++ unwords missingImportDirs + +prepareOutputDirs :: BenchConfig -> IO () +prepareOutputDirs config = do + createDirectoryIfMissing True (benchGfoDir config) + createDirectoryIfMissing True (benchPgfDir config) + +runCompiler :: BenchConfig -> IO () +runCompiler config = do + prepareOutputDirs config + removeFileIfExists (benchPgfDir config </> benchOutputName config <.> "pgf") + withArgs (compilerArgs config) GF.main + +compilerArgs :: BenchConfig -> [String] +compilerArgs config = + [ "--make" + , "--quiet" + , "--src" + , "--preproc=mkPresent" + , "--gf-lib-path=" ++ benchGfoDir config + , "--gfo-dir=" ++ benchGfoDir config + , "--output-dir=" ++ benchPgfDir config + , "--name=" ++ benchOutputName config + ] + ++ concatMap (\dir -> ["-i", dir]) (benchImportDirs config) + ++ benchModules config + +removeFileIfExists :: FilePath -> IO () +removeFileIfExists path = + removeFile path `catch` \err -> + unless (isDoesNotExistError err) (ioError err) + +outputNameFor :: String -> String -> String +outputNameFor outputName slug = outputName ++ capitalize (filter isAlphaNum slug) + +capitalize :: String -> String +capitalize [] = [] +capitalize (x:xs) = toUpper x : xs + +slugify :: String -> String +slugify = map clean + where + clean c + | isAlphaNum c = toLower c + | otherwise = '-' @@ -68,6 +68,11 @@ flag c-runtime Description: Include functionality from the C run-time library (which must be installed already) Default: False +flag pre-pmcfg + Description: Use the pre-optimization PMCFG generator + Default: False + Manual: True + library default-language: Haskell2010 build-depends: @@ -291,6 +296,11 @@ library if flag(c-runtime) cpp-options: -DC_RUNTIME + if flag(pre-pmcfg) + cpp-options: -DPRE_PMCFG + other-modules: + GF.Compile.GeneratePmcfgPre + if flag(server) build-depends: cgi >= 3001.3.0.2 && < 3001.6, @@ -394,6 +404,22 @@ executable gf -- if impl(ghc>=7.0) -- ghc-options: -rtsopts +benchmark gf-compiler-bench + type: exitcode-stdio-1.0 + main-is: Main.hs + hs-source-dirs: bench/compiler + build-depends: + base >= 4.9.1 && < 4.22, + directory >= 1.3.0 && < 1.4, + filepath >= 1.4.1 && < 1.5, + gf, + tasty-bench >= 0.5 && < 0.6 + ghc-options: -rtsopts + default-language: Haskell2010 + + if impl(ghc<8.0) + buildable: False + test-suite gf-tests type: exitcode-stdio-1.0 main-is: run.hs diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 74615dc98..e483911d1 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -34,10 +34,12 @@ import qualified Data.IntSet as IntSet import GF.Text.Pretty import Data.Array.IArray import Data.Array.Unboxed +import Data.Array.ST --import Data.Maybe --import Data.Char (isDigit) import Control.Applicative(Applicative(..)) import Control.Monad +import Control.Monad.ST (ST) import Control.Monad.Identity --import Control.Exception --import Debug.Trace(trace) @@ -98,10 +100,10 @@ addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncFun mty@(Just (cat,cont (ctxt,res,_) = err bug typeForm (lookupFunType gr am id) addRule lins (newCat', newArgs') env0 = - let [newCat] = getFIds newCat' - !fun = mkArray lins - newArgs = map getFIds newArgs' - in addFunction env0 newCat fun newArgs + let (env1,newCat) = getSingleFIdCached env0 newCat' + !fun = mkArray lins + (env2,rect) = getRectangleCached env1 newArgs' + in addFunction env2 newCat fun rect addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ lincat)) mdef@(Just (L loc1 def)) @@ -135,14 +137,16 @@ addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ linc seqs2 `seq` pmcfg `seq` return (seqs2,GF.Grammar.CncCat mty mdef mref mprn (Just pmcfg)) where addLindef lins (newCat', newArgs') env0 = - let [newCat] = getFIds newCat' - !fun = mkArray lins - in addFunction env0 newCat fun [[fidVar]] + let (env1,newCat) = getSingleFIdCached env0 newCat' + !fun = mkArray lins + !rect = Rectangle [singletonFId fidVar] + in addFunction env1 newCat fun rect addLinref lins (newCat', [newArg']) env0 = - let newArg = getFIds newArg' - !fun = mkArray lins - in addFunction env0 fidVar fun [newArg] + let (env1,newArg) = getFIdAltsCached env0 newArg' + !fun = mkArray lins + !rect = Rectangle [newArg] + in addFunction env1 fidVar fun rect addPMCFG opts gr cenv opath am cm seqs id info = return (seqs, info) @@ -302,14 +306,25 @@ protoFCat gr cat lincat = ((_,f),schema) -> PFCat (snd cat) f schema getFIds :: ProtoFCat -> [FId] -getFIds (PFCat _ _ schema) = - reverse (solutions (variants schema) ()) +getFIds = fidAltsToList . getFIdAlts + +getFIdAlts :: ProtoFCat -> FIdAlts +getFIdAlts = fIdAltsFromKey . fIdKey + +getSingleFId :: ProtoFCat -> FId +getSingleFId = expectSingleFId "getSingleFId" . getFIdAlts + +fIdKey :: ProtoFCat -> FIdKey +fIdKey (PFCat _ _ schema) = + FIdKey (collect schema) where - variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs - variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs - variants (CStr _) = return 0 - variants (CPar (m,values)) = do (value,index) <- member values - return (m*index) + collect (CRec rs) = concatMap (\(lbl,Identity t) -> collect t) rs + collect (CTbl _ cs) = concatMap (\(trm,Identity t) -> collect t) cs + collect (CStr _) = [] + collect (CPar (m,values)) = [weightedChoices m values] + + weightedChoices m values = + listArray (0,length values-1) [m*index | (value,index) <- values] catFactor :: ProtoFCat -> Int catFactor (PFCat _ f _) = f @@ -549,36 +564,231 @@ getVarIndex x = maybe err id $ getArgIndex x ---------------------------------------------------------------------- -- GrammarEnv -data PMCFGEnv = PMCFGEnv !ProdSet !FunSet -type ProdSet = Set.Set Production -type FunSet = Map.Map (UArray LIndex SeqId) FunId +data PMCFGEnv = PMCFGEnv !ProdGroups !FunSet !FIdCache +type ProdGroups = Map.Map (FId,FunId) ProdGroup +type FunSet = Map.Map (UArray LIndex SeqId) FunId +type FIdCache = Map.Map FIdKey FIdAlts + +newtype FIdAlts = FIdAlts (UArray Int FId) + deriving (Eq,Ord) + +newtype FIdKey = FIdKey [UArray Int FId] + deriving (Eq,Ord) + +-- Keep exact rectangles to preserve the old finalizer's duplicate and +-- rectangle-area semantics, but store each argument list compactly. +newtype Rectangle = Rectangle [FIdAlts] + deriving (Eq,Ord) + +data ProdGroup = ProdGroup + !(Set.Set Rectangle) + !(Maybe [IntSet.IntSet]) + {-# UNPACK #-} !Int emptyPMCFGEnv = - PMCFGEnv Set.empty Map.empty + PMCFGEnv Map.empty Map.empty Map.empty -addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> [[FId]] -> PMCFGEnv -addFunction (PMCFGEnv prodSet funSet) !fid fun args = +addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> Rectangle -> PMCFGEnv +addFunction (PMCFGEnv prodGroups funSet fidCache) !fid fun rect = case Map.lookup fun funSet of - Just !funid -> PMCFGEnv (Set.insert (Production fid funid args) prodSet) + Just !funid -> PMCFGEnv (insertProduction fid funid rect prodGroups) funSet + fidCache Nothing -> let !funid = Map.size funSet - in PMCFGEnv (Set.insert (Production fid funid args) prodSet) + in PMCFGEnv (insertProduction fid funid rect prodGroups) (Map.insert fun funid funSet) + fidCache getPMCFG :: PMCFGEnv -> PMCFG -getPMCFG (PMCFGEnv prodSet funSet) = - PMCFG (optimize prodSet) (mkSetArray funSet) +getPMCFG (PMCFGEnv prodGroups funSet _) = + PMCFG (Map.foldrWithKey addGroup [] prodGroups) (mkSetArray funSet) where - optimize ps = Map.foldrWithKey ff [] (Map.fromListWith (++) [((fid,funid),[args]) | (Production fid funid args) <- Set.toList ps]) + addGroup :: (FId,FunId) -> ProdGroup -> [Production] -> [Production] + addGroup (fid,funid) (ProdGroup rectangles mArgSets count) prods + | product (map IntSet.size argSets) == count + = Production fid funid (map IntSet.toList argSets) : prods + | otherwise = map (Production fid funid . unpackRectangle) (reverse (Set.toList rectangles)) ++ prods where - ff :: (FId,FunId) -> [[[FId]]] -> [Production] -> [Production] - ff (fid,funid) xs prods - | product (map IntSet.size ys) == count - = (Production fid funid (map IntSet.toList ys)) : prods - | otherwise = map (Production fid funid) xs ++ prods - where - count = sum (map (product . map length) xs) - ys = foldl (zipWith (foldr IntSet.insert)) (repeat IntSet.empty) xs + argSets = case mArgSets of + Just argSets -> argSets + Nothing -> rectangleArgSets rectangles + +insertProduction :: FId -> FunId -> Rectangle -> ProdGroups -> ProdGroups +insertProduction !fid !funid rect prodGroups = + Map.insert (fid,funid) group' prodGroups + where + group' = + case Map.lookup (fid,funid) prodGroups of + Nothing -> singletonProdGroup rect + Just group -> insertRectangle rect group + +singletonProdGroup :: Rectangle -> ProdGroup +singletonProdGroup rect = + let !rects = Set.singleton rect + !argSets = rectangleArgSetsOne rect + !count = rectangleArea rect + in ProdGroup rects (Just argSets) count + +insertRectangle :: Rectangle -> ProdGroup -> ProdGroup +insertRectangle rect group@(ProdGroup rectangles mArgSets count) + | Set.member rect rectangles + = group + | otherwise + = let !rectangles' = Set.insert rect rectangles + !mArgSets' = updateArgSets mArgSets rect + !count' = count + rectangleArea rect + in ProdGroup rectangles' mArgSets' count' + where + addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids + + updateArgSets Nothing _ = Nothing + updateArgSets (Just argSets) (Rectangle args) + | length argSets == length args = let !argSets' = zipWithStrict addArgSet argSets args + in Just argSets' + | otherwise = Nothing + +rectangleArgSets :: Set.Set Rectangle -> [IntSet.IntSet] +rectangleArgSets rectangles = + List.foldl' addRectangle (repeat IntSet.empty) (reverse (Set.toList rectangles)) + where + addRectangle argSets (Rectangle args) = zipWith addArgSet argSets args + addArgSet argSet fids = foldFIdAlts (\s fid -> IntSet.insert fid s) argSet fids + +rectangleArgSetsOne :: Rectangle -> [IntSet.IntSet] +rectangleArgSetsOne (Rectangle args) = + mapStrict (foldFIdAlts (\s fid -> IntSet.insert fid s) IntSet.empty) args + +unpackRectangle :: Rectangle -> [[FId]] +unpackRectangle (Rectangle args) = map fidAltsToList args + +rectangleArea :: Rectangle -> Int +rectangleArea (Rectangle args) = product (map fidAltsSize args) + +getFIdAltsCached :: PMCFGEnv -> ProtoFCat -> (PMCFGEnv, FIdAlts) +getFIdAltsCached env@(PMCFGEnv prodGroups funSet fidCache) pcat + | shouldCacheFIdKey key resultSize = + case Map.lookup key fidCache of + Just alts -> (env,alts) + Nothing -> let !alts = fIdAltsFromKeyWithSize key resultSize + !fidCache' = Map.insert key alts fidCache + in (PMCFGEnv prodGroups funSet fidCache',alts) + | otherwise = + let !alts = fIdAltsFromKeyWithSize key resultSize + in (env,alts) + where + !key = fIdKey pcat + !resultSize = fIdKeyResultSize key + +getSingleFIdCached :: PMCFGEnv -> ProtoFCat -> (PMCFGEnv, FId) +getSingleFIdCached env pcat = + case getFIdAltsCached env pcat of + (env',alts) -> (env',expectSingleFId "getSingleFIdCached" alts) + +getRectangleCached :: PMCFGEnv -> [ProtoFCat] -> (PMCFGEnv, Rectangle) +getRectangleCached env0 pcats = + let !(env,alts) = List.foldl' addAlt (env0,[]) pcats + !rect = Rectangle (reverse alts) + in (env,rect) + where + addAlt (env,alts) pcat = + let !(env',alt) = getFIdAltsCached env pcat + in (env',alt:alts) + +shouldCacheFIdKey :: FIdKey -> Int -> Bool +shouldCacheFIdKey key resultSize = + fIdKeyComponents key > 1 && + resultSize >= 8 && + resultSize > fIdKeyComponentSizeSum key + +fIdAltsFromKey :: FIdKey -> FIdAlts +fIdAltsFromKey key = fIdAltsFromKeyWithSize key (fIdKeyResultSize key) + +fIdAltsFromKeyWithSize :: FIdKey -> Int -> FIdAlts +fIdAltsFromKeyWithSize key@(FIdKey comps) resultSize + | resultSize == 0 = FIdAlts (listArray (0,-1) []) + | resultSize == 1 = singletonFId (fIdKeySingleton key) + | otherwise = FIdAlts $ runSTUArray $ do + arr <- newArray_ (0,resultSize-1) + _ <- fillFIds arr 0 0 comps + return arr + +fillFIds :: STUArray s Int FId -> Int -> FId -> [UArray Int FId] -> ST s Int +fillFIds arr !offset !sum [] = do + writeArray arr offset sum + return (offset+1) +-- Components are ordered outer-to-inner. This must match the old +-- reverse (solutions (variants schema) ()) ordering. +fillFIds arr !offset !sum (choices:choices') = + foldUArrayM (\offset' choice -> fillFIds arr offset' (sum+choice) choices') offset choices + +foldUArrayM :: Monad m => (a -> FId -> m a) -> a -> UArray Int FId -> m a +foldUArrayM f z arr = go (fst bnds) z + where + !bnds@(_,hi) = bounds arr + go !i !acc + | i > hi = return acc + | otherwise = do acc' <- f acc (arr ! i) + go (i+1) acc' + +fIdKeyResultSize :: FIdKey -> Int +fIdKeyResultSize (FIdKey comps) = product (map arraySize comps) + +fIdKeyComponentSizeSum :: FIdKey -> Int +fIdKeyComponentSizeSum (FIdKey comps) = sum (map arraySize comps) + +fIdKeyComponents :: FIdKey -> Int +fIdKeyComponents (FIdKey comps) = length comps + +fIdKeySingleton :: FIdKey -> FId +fIdKeySingleton (FIdKey comps) = List.foldl' addChoice 0 comps + where + addChoice :: FId -> UArray Int FId -> FId + addChoice acc choices + | arraySize choices == 1 = acc + choices ! fst (bounds choices) + | otherwise = bug "fIdKeySingleton: non-singleton key" + +singletonFId :: FId -> FIdAlts +singletonFId fid = FIdAlts (listArray (0,0) [fid]) + +fidAltsSize :: FIdAlts -> Int +fidAltsSize (FIdAlts arr) = arraySize arr + +fidAltsIndex :: FIdAlts -> Int -> FId +fidAltsIndex (FIdAlts arr) i = arr ! i + +expectSingleFId :: String -> FIdAlts -> FId +expectSingleFId label alts + | fidAltsSize alts == 1 = fidAltsIndex alts 0 + | otherwise = bug (label++": expected singleton category") + +fidAltsToList :: FIdAlts -> [FId] +fidAltsToList (FIdAlts arr) = elems arr + +foldFIdAlts :: (a -> FId -> a) -> a -> FIdAlts -> a +foldFIdAlts f z (FIdAlts arr) = go (fst bnds) z + where + !bnds@(_,hi) = bounds arr + go !i !acc + | i > hi = acc + | otherwise = let !acc' = f acc (arr ! i) + in go (i+1) acc' + +arraySize :: UArray Int FId -> Int +arraySize arr = let !(lo,hi) = bounds arr + in max 0 (hi-lo+1) + +mapStrict :: (a -> b) -> [a] -> [b] +mapStrict f [] = [] +mapStrict f (x:xs) = let !y = f x + !ys = mapStrict f xs + in y:ys + +zipWithStrict :: (a -> b -> c) -> [a] -> [b] -> [c] +zipWithStrict f [] [] = [] +zipWithStrict f (x:xs) (y:ys) = let !z = f x y + !zs = zipWithStrict f xs ys + in z:zs +zipWithStrict f _ _ = bug "zipWithStrict: inconsistent list lengths" ------------------------------------------------------------ -- updating the MCF rule diff --git a/src/compiler/GF/Compile/GeneratePmcfgPre.hs b/src/compiler/GF/Compile/GeneratePmcfgPre.hs new file mode 100644 index 000000000..749cb0696 --- /dev/null +++ b/src/compiler/GF/Compile/GeneratePmcfgPre.hs @@ -0,0 +1,640 @@ +{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses, PatternGuards #-} +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Convert PGF grammar to PMCFG grammar. +-- +----------------------------------------------------------------------------- + +module GF.Compile.GeneratePmcfgPre + (generatePMCFG, pgfCncCat, addPMCFG, resourceValues + ) where + +--import PGF.CId +import PGF.Internal as PGF(CncCat(..),Symbol(..),fidVar) + +import GF.Infra.Option +import GF.Grammar hiding (Env, mkRecord, mkTable) +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Grammar.Lockfield (isLockLabel) +import GF.Data.BacktrackM +import GF.Data.Operations +import GF.Infra.UseIO (ePutStr,ePutStrLn) -- IOE, +import GF.Data.Utilities (updateNthM) --updateNth +import GF.Compile.Compute.Concrete(normalForm,resourceValues) +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.List as List +--import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet +import GF.Text.Pretty +import Data.Array.IArray +import Data.Array.Unboxed +--import Data.Maybe +--import Data.Char (isDigit) +import Control.Applicative(Applicative(..)) +import Control.Monad +import Control.Monad.Identity +--import Control.Exception +--import Debug.Trace(trace) +import qualified Control.Monad.Fail as Fail + +---------------------------------------------------------------------- +-- main conversion function + +--generatePMCFG :: Options -> SourceGrammar -> Maybe FilePath -> SourceModule -> IOE SourceModule +generatePMCFG opts sgr opath cmo@(cm,cmi) = do + (seqs,js) <- mapAccumWithKeyM (addPMCFG opts gr cenv opath am cm) Map.empty (jments cmi) + when (verbAtLeast opts Verbose) $ ePutStrLn "" + return (cm,cmi{mseqs = Just (mkSetArray seqs), jments = js}) + where + cenv = resourceValues opts gr + gr = prependModule sgr cmo + MTConcrete am = mtype cmi + +mapAccumWithKeyM :: (Monad m, Ord k) => (a -> k -> b -> m (a,c)) -> a + -> Map.Map k b -> m (a,Map.Map k c) +mapAccumWithKeyM f a m = do let xs = Map.toAscList m + (a,ys) <- mapAccumM f a xs + return (a,Map.fromAscList ys) + where + mapAccumM f a [] = return (a,[]) + mapAccumM f a ((k,x):kxs) = do (a,y ) <- f a k x + (a,kys) <- mapAccumM f a kxs + return (a,(k,y):kys) + + +--addPMCFG :: Options -> SourceGrammar -> GlobalEnv -> Maybe FilePath -> Ident -> Ident -> SeqSet -> Ident -> Info -> IOE (SeqSet, Info) +addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncFun mty@(Just (cat,cont,val)) mlin@(Just (L loc term)) mprn Nothing) = do +--when (verbAtLeast opts Verbose) $ ePutStr ("\n+ "++showIdent id++" ...") + let pres = protoFCat gr res val + pargs = [protoFCat gr (snd $ catSkeleton ty) lincat | ((_,_,ty),(_,_,lincat)) <- zip ctxt cont] + + pmcfgEnv0 = emptyPMCFGEnv + b <- convert opts gr cenv (floc opath loc id) term (cont,val) pargs + let (seqs1,b1) = addSequencesB seqs b + pmcfgEnv1 = foldBM addRule + pmcfgEnv0 + (goB b1 CNil []) + (pres,pargs) + pmcfg = getPMCFG pmcfgEnv1 + + stats = let PMCFG prods funs = pmcfg + (s,e) = bounds funs + !prods_cnt = length prods + !funs_cnt = e-s+1 + in (prods_cnt,funs_cnt) + + when (verbAtLeast opts Verbose) $ + ePutStr ("\n+ "++showIdent id++" "++show (product (map catFactor pargs))) + seqs1 `seq` stats `seq` return () + when (verbAtLeast opts Verbose) $ ePutStr (" "++show stats) + return (seqs1,GF.Grammar.CncFun mty mlin mprn (Just pmcfg)) + where + (ctxt,res,_) = err bug typeForm (lookupFunType gr am id) + + addRule lins (newCat', newArgs') env0 = + let [newCat] = getFIds newCat' + !fun = mkArray lins + newArgs = map getFIds newArgs' + in addFunction env0 newCat fun newArgs + +addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncCat mty@(Just (L _ lincat)) + mdef@(Just (L loc1 def)) + mref@(Just (L loc2 ref)) + mprn + Nothing) = do + let pcat = protoFCat gr (am,id) lincat + pvar = protoFCat gr (MN identW,cVar) typeStr + + pmcfgEnv0 = emptyPMCFGEnv + + let lincont = [(Explicit, varStr, typeStr)] + b <- convert opts gr cenv (floc opath loc1 id) def (lincont,lincat) [pvar] + let (seqs1,b1) = addSequencesB seqs b + pmcfgEnv1 = foldBM addLindef + pmcfgEnv0 + (goB b1 CNil []) + (pcat,[pvar]) + + let lincont = [(Explicit, varStr, lincat)] + b <- convert opts gr cenv (floc opath loc2 id) ref (lincont,typeStr) [pcat] + let (seqs2,b2) = addSequencesB seqs1 b + pmcfgEnv2 = foldBM addLinref + pmcfgEnv1 + (goB b2 CNil []) + (pvar,[pcat]) + + let pmcfg = getPMCFG pmcfgEnv2 + + when (verbAtLeast opts Verbose) $ ePutStr ("\n+ "++showIdent id++" "++show (catFactor pcat)) + seqs2 `seq` pmcfg `seq` return (seqs2,GF.Grammar.CncCat mty mdef mref mprn (Just pmcfg)) + where + addLindef lins (newCat', newArgs') env0 = + let [newCat] = getFIds newCat' + !fun = mkArray lins + in addFunction env0 newCat fun [[fidVar]] + + addLinref lins (newCat', [newArg']) env0 = + let newArg = getFIds newArg' + !fun = mkArray lins + in addFunction env0 fidVar fun [newArg] + +addPMCFG opts gr cenv opath am cm seqs id info = return (seqs, info) + +floc opath loc id = maybe (L loc id) (\path->L (External path loc) id) opath + +convert opts gr cenv loc term ty@(_,val) pargs = + case normalForm cenv loc (etaExpand ty term) of + Error s -> fail $ render $ ppL loc ("Predef.error: "++s) + term -> return $ runCnvMonad gr (convertTerm opts CNil val term) (pargs,[]) + where + etaExpand (context,val) = mkAbs pars . flip mkApp args + where pars = [(Explicit,v) | v <- vars] + args = map Vr vars + vars = map (\(bt,x,t) -> x) context + +pgfCncCat :: SourceGrammar -> Type -> Int -> CncCat +pgfCncCat gr lincat index = + let ((_,size),schema) = computeCatRange gr lincat + in PGF.CncCat index (index+size-1) + (mkArray (map (renderStyle style{mode=OneLineMode} . ppPath) + (getStrPaths schema))) + where + getStrPaths :: Schema Identity s c -> [Path] + getStrPaths = collect CNil [] + where + collect path paths (CRec rs) = foldr (\(lbl,Identity t) paths -> collect (CProj lbl path) paths t) paths rs + collect path paths (CTbl _ cs) = foldr (\(trm,Identity t) paths -> collect (CSel trm path) paths t) paths cs + collect path paths (CStr _) = reversePath path : paths + collect path paths (CPar _) = paths + +---------------------------------------------------------------------- +-- CnvMonad monad +-- +-- The branching monad provides backtracking together with +-- recording of the choices made. We have two cases +-- when we have alternative choices: +-- +-- * when we have parameter type, then +-- we have to try all possible values +-- * when we have variants we have to try all alternatives +-- +-- The conversion monad keeps track of the choices and they are +-- returned as 'Branch' data type. + +data Branch a + = Case Int Path [(Term,Branch a)] + | Variant [Branch a] + | Return a + +newtype CnvMonad a = CM {unCM :: SourceGrammar + -> forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b) + -> ([ProtoFCat],[Symbol]) + -> Branch b} + +instance Fail.MonadFail CnvMonad where + fail = bug + +instance Applicative CnvMonad where + pure a = CM (\gr c s -> c a s) + (<*>) = ap + +instance Monad CnvMonad where + return = pure + CM m >>= k = CM (\gr c s -> m gr (\a s -> unCM (k a) gr c s) s) + +instance MonadState ([ProtoFCat],[Symbol]) CnvMonad where + get = CM (\gr c s -> c s s) + put s = CM (\gr c _ -> c () s) + +instance Functor CnvMonad where + fmap f (CM m) = CM (\gr c s -> m gr (c . f) s) + +runCnvMonad :: SourceGrammar -> CnvMonad a -> ([ProtoFCat],[Symbol]) -> Branch a +runCnvMonad gr (CM m) s = m gr (\v s -> Return v) s + +-- | backtracking for all variants +variants :: [a] -> CnvMonad a +variants xs = CM (\gr c s -> Variant [c x s | x <- xs]) + +-- | backtracking for all parameter values that a variable could take +choices :: Int -> Path -> CnvMonad Term +choices nr path = do (args,_) <- get + let PFCat _ _ schema = args !! nr + descend schema path CNil + where + descend (CRec rs) (CProj lbl path) rpath = case lookup lbl rs of + Just (Identity t) -> descend t path (CProj lbl rpath) + descend (CRec rs) CNil rpath = do rs <- mapM (\(lbl,Identity t) -> fmap (assign lbl) (descend t CNil (CProj lbl rpath))) rs + return (R rs) + descend (CTbl pt cs) (CSel trm path) rpath = case lookup trm cs of + Just (Identity t) -> descend t path (CSel trm rpath) + descend (CTbl pt cs) CNil rpath = do cs <- mapM (\(trm,Identity t) -> descend t CNil (CSel trm rpath)) cs + return (V pt cs) + descend (CPar (m,vs)) CNil rpath = case vs of + [(value,index)] -> return value + values -> let path = reversePath rpath + in CM (\gr c s -> Case nr path [(value, updateEnv path value gr c s) + | (value,index) <- values]) + descend schema path rpath = bug $ "descend "++show (schema,path,rpath) + + updateEnv path value gr c (args,seq) = + case updateNthM (restrictProtoFCat path value) nr args of + Just args -> c value (args,seq) + Nothing -> bug "conflict in updateEnv" + +-- | the argument should be a parameter type and then +-- the function returns all possible values. +getAllParamValues :: Type -> CnvMonad [Term] +getAllParamValues ty = CM (\gr c -> c (err bug id (allParamValues gr ty))) + +mkRecord :: [(Label,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c) +mkRecord xs = CM (\gr c -> foldl (\c (lbl,CM m) bs s -> c ((lbl,m gr (\v s -> Return v) s) : bs) s) (c . CRec) xs []) + +mkTable :: Type -> [(Term ,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c) +mkTable pt xs = CM (\gr c -> foldl (\c (trm,CM m) bs s -> c ((trm,m gr (\v s -> Return v) s) : bs) s) (c . CTbl pt) xs []) + +---------------------------------------------------------------------- +-- Term Schema +-- +-- The term schema is a term-like structure, with records, tables, +-- strings and parameters values, but in addition we could add +-- annotations of arbitrary types + +-- | Term schema +data Schema b s c + = CRec [(Label,b (Schema b s c))] + | CTbl Type [(Term, b (Schema b s c))] + | CStr s + | CPar c +--deriving Show -- doesn't work + +instance Show s => Show (Schema b s c) where + showsPrec _ sch = + case sch of + CRec r -> showString "CRec " . shows (map fst r) + CTbl t _ -> showString "CTbl " . showsPrec 10 t . showString " _" + CStr s -> showString "CStr " . showsPrec 10 s + CPar c -> showString "CPar{}" + +-- | Path into a term or term schema +data Path + = CProj Label Path + | CSel Term Path + | CNil + deriving (Eq,Show) + +-- | The ProtoFCat represents a linearization type as term schema. +-- The annotations are as follows: the strings are annotated with +-- their index in the PMCFG tuple, the parameters are annotated +-- with their value both as term and as index. +data ProtoFCat = PFCat Ident Int (Schema Identity Int (Int,[(Term,Int)])) +type Env = (ProtoFCat, [ProtoFCat]) + +protoFCat :: SourceGrammar -> Cat -> Type -> ProtoFCat +protoFCat gr cat lincat = + case computeCatRange gr lincat of + ((_,f),schema) -> PFCat (snd cat) f schema + +getFIds :: ProtoFCat -> [FId] +getFIds (PFCat _ _ schema) = + reverse (solutions (variants schema) ()) + where + variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs + variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs + variants (CStr _) = return 0 + variants (CPar (m,values)) = do (value,index) <- member values + return (m*index) + +catFactor :: ProtoFCat -> Int +catFactor (PFCat _ f _) = f + +computeCatRange gr lincat = compute (0,1) lincat + where + compute st (RecType rs) = let (st',rs') = List.mapAccumL (\st (lbl,t) -> case lbl of + LVar _ -> let (st',t') = compute st t + in (st ,(lbl,Identity t')) + _ -> let (st',t') = compute st t + in (st',(lbl,Identity t'))) st rs + in (st',CRec rs') + compute st (Table pt vt) = let vs = err bug id (allParamValues gr pt) + (st',cs') = List.mapAccumL (\st v -> let (st',vt') = compute st vt + in (st',(v,Identity vt'))) st vs + in (st',CTbl pt cs') + compute st (Sort s) + | s == cStr = let (index,m) = st + in ((index+1,m),CStr index) + compute st t = let vs = err bug id (allParamValues gr t) + (index,m) = st + in ((index,m*length vs),CPar (m,zip vs [0..])) + +ppPath (CProj lbl path) = lbl <+> ppPath path +ppPath (CSel trm path) = ppU 5 trm <+> ppPath path +ppPath CNil = empty + +reversePath path = rev CNil path + where + rev path0 CNil = path0 + rev path0 (CProj lbl path) = rev (CProj lbl path0) path + rev path0 (CSel trm path) = rev (CSel trm path0) path + + +---------------------------------------------------------------------- +-- term conversion + +type Value a = Schema Branch a Term + +convertTerm :: Options -> Path -> Type -> Term -> CnvMonad (Value [Symbol]) +convertTerm opts sel ctype (Vr x) = convertArg opts ctype (getVarIndex x) (reversePath sel) +convertTerm opts sel ctype (Abs _ _ t) = convertTerm opts sel ctype t -- there are only top-level abstractions and we ignore them !!! +convertTerm opts sel ctype (R record) = convertRec opts sel ctype record +convertTerm opts sel ctype (P term l) = convertTerm opts (CProj l sel) ctype term +convertTerm opts sel ctype (V pt ts) = convertTbl opts sel ctype pt ts +convertTerm opts sel ctype (S term p) = do v <- evalTerm CNil p + convertTerm opts (CSel v sel) ctype term +convertTerm opts sel ctype (FV vars) = do term <- variants vars + convertTerm opts sel ctype term +convertTerm opts sel ctype (C t1 t2) = do v1 <- convertTerm opts sel ctype t1 + v2 <- convertTerm opts sel ctype t2 + return (CStr (concat [s | CStr s <- [v1,v2]])) +convertTerm opts sel ctype (K t) = return (CStr [SymKS t]) +convertTerm opts sel ctype Empty = return (CStr []) +convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil ctype s + alts <- forM alts $ \(u,alt) -> do + CStr u <- convertTerm opts CNil ctype u + Strs ps <- unPatt alt + ps <- mapM (convertTerm opts CNil ctype) ps + return (u,map unSym ps) + return (CStr [SymKP s alts]) + where + unSym (CStr []) = "" + unSym (CStr [SymKS t]) = t + unSym _ = ppbug $ hang ("invalid prefix in pre expression:") 4 (Alts s alts) + + unPatt (EPatt p) = fmap Strs (getPatts p) + unPatt u = return u + + getPatts p = case p of + PAlt a b -> liftM2 (++) (getPatts a) (getPatts b) + PString s -> return [K s] + PSeq a b -> do + as <- getPatts a + bs <- getPatts b + return [K (s ++ t) | K s <- as, K t <- bs] + _ -> fail (render ("not valid pattern in pre expression" <+> ppPatt Unqualified 0 p)) + +convertTerm opts sel ctype (Q (m,f)) + | m == cPredef && + f == cBIND = return (CStr [SymBIND]) + | m == cPredef && + f == cSOFT_BIND = return (CStr [SymSOFT_BIND]) + | m == cPredef && + f == cSOFT_SPACE = return (CStr [SymSOFT_SPACE]) + | m == cPredef && + f == cCAPIT = return (CStr [SymCAPIT]) + | m == cPredef && + f == cALL_CAPIT = return (CStr [SymALL_CAPIT]) + | m == cPredef && + f == cNonExist = return (CStr [SymNE]) +{- +convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2)) + | l `elem` map fst rs2 = convertTerm opts sel ctype t2 + | otherwise = convertTerm opts sel ctype t1 + +convertTerm opts sel@(CProj l _) ctype (ExtR t1@(R rs1) t2) + | l `elem` map fst rs1 = convertTerm opts sel ctype t1 + | otherwise = convertTerm opts sel ctype t2 +-} +convertTerm opts CNil ctype t = do v <- evalTerm CNil t + return (CPar v) +convertTerm _ sel _ t = ppbug ("convertTerm" <+> sep [parens (show sel),ppU 10 t]) + +convertArg :: Options -> Term -> Int -> Path -> CnvMonad (Value [Symbol]) +convertArg opts (RecType rs) nr path = + mkRecord (map (\(lbl,ctype) -> (lbl,convertArg opts ctype nr (CProj lbl path))) rs) +convertArg opts (Table pt vt) nr path = do + vs <- getAllParamValues pt + mkTable pt (map (\v -> (v,convertArg opts vt nr (CSel v path))) vs) +convertArg opts (Sort _) nr path = do + (args,_) <- get + let PFCat cat _ schema = args !! nr + l = index (reversePath path) schema + sym | CProj (LVar i) CNil <- path = SymVar nr i + | isLiteralCat opts cat = SymLit nr l + | otherwise = SymCat nr l + return (CStr [sym]) + where + index (CProj lbl path) (CRec rs) = case lookup lbl rs of + Just (Identity t) -> index path t + index (CSel trm path) (CTbl _ rs) = case lookup trm rs of + Just (Identity t) -> index path t + index CNil (CStr idx) = idx +convertArg opts ty nr path = do + value <- choices nr (reversePath path) + return (CPar value) + +convertRec opts CNil (RecType rs) record = + mkRecord [(lbl,convertTerm opts CNil ctype (proj lbl))|(lbl,ctype)<-rs] + where proj lbl = if isLockLabel lbl then R [] else projectRec lbl record +convertRec opts (CProj lbl path) ctype record = + convertTerm opts path ctype (projectRec lbl record) +convertRec opts _ ctype _ = bug ("convertRec: "++show ctype) + +convertTbl opts CNil (Table _ vt) pt ts = do + vs <- getAllParamValues pt + mkTable pt (zipWith (\v t -> (v,convertTerm opts CNil vt t)) vs ts) +convertTbl opts (CSel v sub_sel) ctype pt ts = do + vs <- getAllParamValues pt + case lookup v (zip vs ts) of + Just t -> convertTerm opts sub_sel ctype t + Nothing -> ppbug ( "convertTbl:" <+> ("missing value" <+> v $$ + "among" <+> vcat vs)) +convertTbl opts _ ctype _ _ = bug ("convertTbl: "++show ctype) + + +goB :: Branch (Value SeqId) -> Path -> [SeqId] -> BacktrackM Env [SeqId] +goB (Case nr path bs) rpath ss = do (value,b) <- member bs + restrictArg nr path value + goB b rpath ss +goB (Variant bs) rpath ss = do b <- member bs + goB b rpath ss +goB (Return v) rpath ss = goV v rpath ss + +goV :: Value SeqId -> Path -> [SeqId] -> BacktrackM Env [SeqId] +goV (CRec xs) rpath ss = foldM (\ss (lbl,b) -> goB b (CProj lbl rpath) ss) ss (reverse xs) +goV (CTbl _ xs) rpath ss = foldM (\ss (trm,b) -> goB b (CSel trm rpath) ss) ss (reverse xs) +goV (CStr seqid) rpath ss = return (seqid : ss) +goV (CPar t) rpath ss = restrictHead (reversePath rpath) t >> return ss + + +---------------------------------------------------------------------- +-- SeqSet + +type SeqSet = Map.Map Sequence SeqId + +addSequencesB :: SeqSet -> Branch (Value [Symbol]) -> (SeqSet, Branch (Value SeqId)) +addSequencesB seqs (Case nr path bs) = let !(seqs1,bs1) = mapAccumL' (\seqs (trm,b) -> let !(seqs',b') = addSequencesB seqs b + in (seqs',(trm,b'))) seqs bs + in (seqs1,Case nr path bs1) +addSequencesB seqs (Variant bs) = let !(seqs1,bs1) = mapAccumL' addSequencesB seqs bs + in (seqs1,Variant bs1) +addSequencesB seqs (Return v) = let !(seqs1,v1) = addSequencesV seqs v + in (seqs1,Return v1) + +addSequencesV :: SeqSet -> Value [Symbol] -> (SeqSet, Value SeqId) +addSequencesV seqs (CRec vs) = let !(seqs1,vs1) = mapAccumL' (\seqs (lbl,b) -> let !(seqs',b') = addSequencesB seqs b + in (seqs',(lbl,b'))) seqs vs + in (seqs1,CRec vs1) +addSequencesV seqs (CTbl pt vs)=let !(seqs1,vs1) = mapAccumL' (\seqs (trm,b) -> let !(seqs',b') = addSequencesB seqs b + in (seqs',(trm,b'))) seqs vs + in (seqs1,CTbl pt vs1) +addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs lin + in (seqs1,CStr seqid) +addSequencesV seqs (CPar i) = (seqs,CPar i) + +-- a strict version of Data.List.mapAccumL +mapAccumL' f s [] = (s,[]) +mapAccumL' f s (x:xs) = (s'',y:ys) + where !(s', y ) = f s x + !(s'',ys) = mapAccumL' f s' xs + +addSequence :: SeqSet -> [Symbol] -> (SeqSet,SeqId) +addSequence seqs lst = + case Map.lookup seq seqs of + Just id -> (seqs,id) + Nothing -> let !last_seq = Map.size seqs + in (Map.insert seq last_seq seqs, last_seq) + where + seq = mkArray lst + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: Path -> Term -> CnvMonad Term +evalTerm CNil (QC f) = return (QC f) +evalTerm CNil (App x y) = do x <- evalTerm CNil x + y <- evalTerm CNil y + return (App x y) +evalTerm path (Vr x) = choices (getVarIndex x) path +evalTerm path (R rs) = + case path of + CProj lbl path -> evalTerm path (projectRec lbl rs) + CNil -> R `fmap` mapM (\(lbl,(_,t)) -> assign lbl `fmap` evalTerm path t) rs +evalTerm path (P term lbl) = evalTerm (CProj lbl path) term +evalTerm path (V pt ts) = + case path of + CNil -> V pt `fmap` mapM (evalTerm path) ts + CSel trm path -> + do vs <- getAllParamValues pt + case lookup trm (zip vs ts) of + Just t -> evalTerm path t + Nothing -> ppbug $ "evalTerm: missing value:"<+>trm + $$ "among:" <+>fsep (map (ppU 10) vs) +evalTerm path (S term sel) = do v <- evalTerm CNil sel + evalTerm (CSel v path) term +evalTerm path (FV terms) = variants terms >>= evalTerm path +evalTerm path (EInt n) = return (EInt n) +evalTerm path t = ppbug ("evalTerm" <+> parens t) +--evalTerm path t = ppbug (text "evalTerm" <+> sep [parens (text (show path)),parens (text (show t))]) + +getVarIndex x = maybe err id $ getArgIndex x + where err = bug ("getVarIndex "++show x) + +---------------------------------------------------------------------- +-- GrammarEnv + +data PMCFGEnv = PMCFGEnv !ProdSet !FunSet +type ProdSet = Set.Set Production +type FunSet = Map.Map (UArray LIndex SeqId) FunId + +emptyPMCFGEnv = + PMCFGEnv Set.empty Map.empty + +addFunction :: PMCFGEnv -> FId -> UArray LIndex SeqId -> [[FId]] -> PMCFGEnv +addFunction (PMCFGEnv prodSet funSet) !fid fun args = + case Map.lookup fun funSet of + Just !funid -> PMCFGEnv (Set.insert (Production fid funid args) prodSet) + funSet + Nothing -> let !funid = Map.size funSet + in PMCFGEnv (Set.insert (Production fid funid args) prodSet) + (Map.insert fun funid funSet) + +getPMCFG :: PMCFGEnv -> PMCFG +getPMCFG (PMCFGEnv prodSet funSet) = + PMCFG (optimize prodSet) (mkSetArray funSet) + where + optimize ps = Map.foldrWithKey ff [] (Map.fromListWith (++) [((fid,funid),[args]) | (Production fid funid args) <- Set.toList ps]) + where + ff :: (FId,FunId) -> [[[FId]]] -> [Production] -> [Production] + ff (fid,funid) xs prods + | product (map IntSet.size ys) == count + = (Production fid funid (map IntSet.toList ys)) : prods + | otherwise = map (Production fid funid) xs ++ prods + where + count = sum (map (product . map length) xs) + ys = foldl (zipWith (foldr IntSet.insert)) (repeat IntSet.empty) xs + +------------------------------------------------------------ +-- updating the MCF rule + +restrictArg :: LIndex -> Path -> Term -> BacktrackM Env () +restrictArg nr path index = do + (head, args) <- get + args <- updateNthM (restrictProtoFCat path index) nr args + put (head, args) + +restrictHead :: Path -> Term -> BacktrackM Env () +restrictHead path term = do + (head, args) <- get + head <- restrictProtoFCat path term head + put (head, args) + +restrictProtoFCat :: (Functor m, MonadPlus m) => Path -> Term -> ProtoFCat -> m ProtoFCat +restrictProtoFCat path v (PFCat cat f schema) = do + schema <- addConstraint path v schema + return (PFCat cat f schema) + where + addConstraint (CProj lbl path) v (CRec rs) = fmap CRec $ update lbl (addConstraint path v) rs + addConstraint (CSel trm path) v (CTbl pt cs) = fmap (CTbl pt) $ update trm (addConstraint path v) cs + addConstraint CNil v (CPar (m,vs)) = case lookup v vs of + Just index -> return (CPar (m,[(v,index)])) + Nothing -> mzero + addConstraint CNil v (CStr _) = bug "restrictProtoFCat: string path" + + update k0 f [] = return [] + update k0 f (x@(k,Identity v):xs) + | k0 == k = do v <- f v + return ((k,Identity v):xs) + | otherwise = do xs <- update k0 f xs + return (x:xs) + +mkArray lst = listArray (0,length lst-1) lst +mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + +bug msg = ppbug msg +ppbug msg = error completeMsg + where + originalMsg = render $ hang "Internal error in GeneratePMCFG:" 4 msg + completeMsg = + case render msg of -- the error message for pattern matching a runtime string + "descend (CStr 0,CNil,CProj (LIdent (Id {rawId2utf8 = \"s\"})) CNil)" + -> unlines [originalMsg -- add more helpful output + ,"" + ,"1) Check that you are not trying to pattern match a /runtime string/." + ," These are illegal:" + ," lin Test foo = case foo.s of {" + ," \"str\" => … } ; <- explicit matching argument of a lin" + ," lin Test foo = opThatMatches foo <- calling an oper that pattern matches" + ,"" + ,"2) Not about pattern matching? Submit a bug report and we update the error message." + ," https://github.com/GrammaticalFramework/gf-core/issues" + ] + _ -> originalMsg -- any other message: just print it as is + +ppU = ppTerm Unqualified diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index 8c4d4558c..31f339cd8 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -1,8 +1,12 @@ -{-# LANGUAGE BangPatterns, FlexibleContexts #-} +{-# LANGUAGE BangPatterns, CPP, FlexibleContexts #-} module GF.Compile.GrammarToPGF (mkCanon2pgf) where --import GF.Compile.Export +#ifdef PRE_PMCFG +import GF.Compile.GeneratePmcfgPre +#else import GF.Compile.GeneratePMCFG +#endif import GF.Compile.GenerateBC import PGF(CId,mkCId,utf8CId) diff --git a/src/compiler/GF/CompileOne.hs b/src/compiler/GF/CompileOne.hs index 48761671a..9671f9e92 100644 --- a/src/compiler/GF/CompileOne.hs +++ b/src/compiler/GF/CompileOne.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} module GF.CompileOne(-- ** Compiling a single module OneOutput,CompiledModule, compileOne,reuseGFO,useTheSource @@ -10,7 +11,11 @@ import GF.Compile.Rename(renameModule) import GF.Compile.CheckGrammar(checkModule) import GF.Compile.Optimize(optimizeModule) import GF.Compile.SubExOpt(subexpModule,unsubexpModule) +#ifdef PRE_PMCFG +import GF.Compile.GeneratePmcfgPre(generatePMCFG) +#else import GF.Compile.GeneratePMCFG(generatePMCFG) +#endif import GF.Compile.Update(extendModule,rebuildModule) import GF.Compile.Tags(writeTags,gf2gftags) diff --git a/stack-ghc9.6.7.yaml b/stack-ghc9.6.7.yaml index 2e3f1b11d..ad0d21fcd 100644 --- a/stack-ghc9.6.7.yaml +++ b/stack-ghc9.6.7.yaml @@ -29,3 +29,12 @@ extra-deps: - syb-0.7.2.4@sha256:936d5a92084ad9d88c5a9dd2e622deab57ce48ce85be93e6273b3f8eb64c12ca,3872 - th-compat-0.1.6@sha256:e83d97946f84fe492762ceb3b4753b4770c78b0b70e594078700baa91a5106c2,2885 - utf8-string-1.0.2@sha256:79416292186feeaf1f60e49ac5a1ffae9bf1b120e040a74bf0e81ca7f1d31d3f,1538 +- tasty-bench-0.5@sha256:4f142713f5a7fe760a014299492a2790b1903f55b3331ab7f93709308f25e2ab,2037 +- tasty-1.5.3@sha256:6b5dda3f16db1274a0b3e6c4073ac57172a1e96b1dca05666c5cbd1183639412,2923 +- ansi-terminal-1.1.5@sha256:408d9232e5304efead718f96138d6d7eb2da608c6121c5c0dc6a71a833b14320,3327 +- optparse-applicative-0.19.0.0@sha256:0b47b3ff7eb4a855b5f7d401690365fc62a32057374927ce0d478b620996222d,5813 +- tagged-0.8.10@sha256:e9b97c98e9827981d62f37c5febf9e6bbb67acec92b8bd41fd9f7ace5eb31d32,2201 +- ansi-terminal-types-1.1.3@sha256:1d6061eceaf35a9ed269b81177dd4c8c60403a499526f7f68fdffa4ec7842e7f,1523 +- colour-2.3.7@sha256:613dc8d5f6e51babe57398a742ab8d5c2964fa98b773f3bf6e6a72b7e87f1c6e,2543 +- prettyprinter-1.7.2@sha256:e3a802ea939d465b2e37b9fa09d8e5bb88b5775d6d62dd2ec6ca32b63416656d,6618 +- prettyprinter-ansi-terminal-1.1.4@sha256:5184ba6002a650de3307547347eaba19bdc9b82eb68e943f406818caa215ce4f,3134 diff --git a/stack.yaml b/stack.yaml index 8c6a0e871..e66ee5d69 100644 --- a/stack.yaml +++ b/stack.yaml @@ -32,3 +32,12 @@ extra-deps: - syb-0.7.2.4@sha256:936d5a92084ad9d88c5a9dd2e622deab57ce48ce85be93e6273b3f8eb64c12ca,3872 - th-compat-0.1.6@sha256:e83d97946f84fe492762ceb3b4753b4770c78b0b70e594078700baa91a5106c2,2885 - utf8-string-1.0.2@sha256:79416292186feeaf1f60e49ac5a1ffae9bf1b120e040a74bf0e81ca7f1d31d3f,1538 +- tasty-bench-0.5@sha256:4f142713f5a7fe760a014299492a2790b1903f55b3331ab7f93709308f25e2ab,2037 +- tasty-1.5.3@sha256:6b5dda3f16db1274a0b3e6c4073ac57172a1e96b1dca05666c5cbd1183639412,2923 +- ansi-terminal-1.1.5@sha256:408d9232e5304efead718f96138d6d7eb2da608c6121c5c0dc6a71a833b14320,3327 +- optparse-applicative-0.19.0.0@sha256:0b47b3ff7eb4a855b5f7d401690365fc62a32057374927ce0d478b620996222d,5813 +- tagged-0.8.10@sha256:e9b97c98e9827981d62f37c5febf9e6bbb67acec92b8bd41fd9f7ace5eb31d32,2201 +- ansi-terminal-types-1.1.3@sha256:1d6061eceaf35a9ed269b81177dd4c8c60403a499526f7f68fdffa4ec7842e7f,1523 +- colour-2.3.7@sha256:613dc8d5f6e51babe57398a742ab8d5c2964fa98b773f3bf6e6a72b7e87f1c6e,2543 +- prettyprinter-1.7.2@sha256:e3a802ea939d465b2e37b9fa09d8e5bb88b5775d6d62dd2ec6ca32b63416656d,6618 +- prettyprinter-ansi-terminal-1.1.4@sha256:5184ba6002a650de3307547347eaba19bdc9b82eb68e943f406818caa215ce4f,3134 |
