diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/compiler | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/compiler')
111 files changed, 20655 insertions, 0 deletions
diff --git a/src/compiler/GF.hs b/src/compiler/GF.hs new file mode 100644 index 000000000..32a95ca1f --- /dev/null +++ b/src/compiler/GF.hs @@ -0,0 +1,45 @@ +{-# OPTIONS -cpp #-} +module Main where + +import GFC +import GFI +import GF.Data.ErrM +import GF.Infra.Option +import GF.Infra.UseIO +import Paths_gf + +import Data.Version +import System.Directory +import System.Environment (getArgs) +import System.Exit +import System.IO +#ifdef mingw32_HOST_OS +import System.Win32.Console +import System.Win32.NLS +#endif + +main :: IO () +main = do +#ifdef mingw32_HOST_OS + codepage <- getACP + setConsoleCP codepage + setConsoleOutputCP codepage +#endif + args <- getArgs + case parseOptions args of + Ok (opts,files) -> do curr_dir <- getCurrentDirectory + lib_dir <- getLibraryDirectory opts + mainOpts (fixRelativeLibPaths curr_dir lib_dir opts) files + Bad err -> do hPutStrLn stderr err + hPutStrLn stderr "You may want to try --help." + exitFailure + +mainOpts :: Options -> [FilePath] -> IO () +mainOpts opts files = + case flag optMode opts of + ModeVersion -> putStrLn $ "Grammatical Framework (GF) version " ++ showVersion version + ModeHelp -> putStrLn helpMessage + ModeInteractive -> mainGFI opts files + ModeRun -> mainRunGFI opts files + ModeCompiler -> dieIOE (mainGFC opts files) + diff --git a/src/compiler/GF/Command/Abstract.hs b/src/compiler/GF/Command/Abstract.hs new file mode 100644 index 000000000..1f7c4014e --- /dev/null +++ b/src/compiler/GF/Command/Abstract.hs @@ -0,0 +1,79 @@ +module GF.Command.Abstract where + +import PGF.CId +import PGF.Data + +type Ident = String + +type CommandLine = [Pipe] + +type Pipe = [Command] + +data Command + = Command Ident [Option] Argument + deriving (Eq,Ord,Show) + +data Option + = OOpt Ident + | OFlag Ident Value + deriving (Eq,Ord,Show) + +data Value + = VId Ident + | VInt Int + | VStr String + deriving (Eq,Ord,Show) + +data Argument + = AExpr Expr + | ANoArg + | AMacro Ident + deriving (Eq,Ord,Show) + +valCIdOpts :: String -> CId -> [Option] -> CId +valCIdOpts flag def opts = + case [v | OFlag f (VId v) <- opts, f == flag] of + (v:_) -> mkCId v + _ -> def + +valIntOpts :: String -> Int -> [Option] -> Int +valIntOpts flag def opts = + case [v | OFlag f (VInt v) <- opts, f == flag] of + (v:_) -> v + _ -> def + +valStrOpts :: String -> String -> [Option] -> String +valStrOpts flag def opts = + case [v | OFlag f v <- opts, f == flag] of + (VStr v:_) -> v + (VId v:_) -> v + (VInt v:_) -> show v + _ -> def + +isOpt :: String -> [Option] -> Bool +isOpt o opts = elem o [x | OOpt x <- opts] + +isFlag :: String -> [Option] -> Bool +isFlag o opts = elem o [x | OFlag x _ <- opts] + +optsAndFlags :: [Option] -> ([Option],[Option]) +optsAndFlags = foldr add ([],[]) where + add o (os,fs) = case o of + OOpt _ -> (o:os,fs) + OFlag _ _ -> (os,o:fs) + +prOpt :: Option -> String +prOpt o = case o of + OOpt i -> i + OFlag f x -> f ++ "=" ++ show x + +mkOpt :: String -> Option +mkOpt = OOpt + +-- abbreviation convention from gf commands +getCommandOp s = case break (=='_') s of + (a:_,_:b:_) -> [a,b] -- axx_byy --> ab + _ -> case s of + [a,b] -> s -- ab --> ab + a:_ -> [a] -- axx --> a + diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs new file mode 100644 index 000000000..d8e2a3023 --- /dev/null +++ b/src/compiler/GF/Command/Commands.hs @@ -0,0 +1,931 @@ +{-# LANGUAGE PatternGuards #-} + +module GF.Command.Commands ( + allCommands, + lookCommand, + exec, + isOpt, + options, + flags, + needsTypeCheck, + CommandInfo, + CommandOutput + ) where + +import PGF +import PGF.CId +import PGF.ShowLinearize +import PGF.VisualizeTree +import PGF.Macros +import PGF.Data ---- +import PGF.Morphology +import GF.Compile.Export +import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) +import GF.Infra.UseIO +import GF.Data.ErrM ---- +import GF.Command.Abstract +import GF.Command.Messages +import GF.Text.Lexing +import GF.Text.Transliterations +import GF.Quiz + +import GF.Command.TreeOperations ---- temporary place for typecheck and compute + +import GF.Data.Operations +import GF.Text.Coding + +import Data.List +import Data.Maybe +import qualified Data.Map as Map +import System.Cmd +import Text.PrettyPrint +import Data.List (sort) +import Debug.Trace + +type CommandOutput = ([Expr],String) ---- errors, etc + +data CommandInfo = CommandInfo { + exec :: [Option] -> [Expr] -> IO CommandOutput, + synopsis :: String, + syntax :: String, + explanation :: String, + longname :: String, + options :: [(String,String)], + flags :: [(String,String)], + examples :: [String], + needsTypeCheck :: Bool + } + +emptyCommandInfo :: CommandInfo +emptyCommandInfo = CommandInfo { + exec = \_ ts -> return (ts,[]), ---- + synopsis = "", + syntax = "", + explanation = "", + longname = "", + options = [], + flags = [], + examples = [], + needsTypeCheck = True + } + +lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo +lookCommand = Map.lookup + +commandHelpAll :: Encoding -> PGFEnv -> [Option] -> String +commandHelpAll cod pgf opts = unlines + [commandHelp (isOpt "full" opts) (co,info) + | (co,info) <- Map.assocs (allCommands cod pgf)] + +commandHelp :: Bool -> (String,CommandInfo) -> String +commandHelp full (co,info) = unlines $ [ + co ++ ", " ++ longname info, + synopsis info] ++ if full then [ + "", + "syntax:" ++++ " " ++ syntax info, + "", + explanation info, + "options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info], + "flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info], + "examples:" ++++ unlines [" " ++ s | s <- examples info] + ] else [] + +-- for printing with txt2tags formatting + +commandHelpTags :: Bool -> (String,CommandInfo) -> String +commandHelpTags full (co,info) = unlines $ [ + "#VSPACE","","#NOINDENT", + lit co ++ " = " ++ lit (longname info) ++ ": " ++ + "//" ++ synopsis info ++ ".//"] ++ if full then [ + "","#TINY","", + explanation info, + "- Syntax: ``" ++ syntax info ++ "``", + "- Options:\n" ++++ + unlines [" | ``-" ++ o ++ "`` | " ++ e | (o,e) <- options info], + "- Flags:\n" ++++ + unlines [" | ``-" ++ o ++ "`` | " ++ e | (o,e) <- flags info], + "- Examples:\n```" ++++ + unlines [" " ++ s | s <- examples info], + "```", + "", "#NORMAL", "" + ] else [] + where + lit s = "``" ++ s ++ "``" + +type PGFEnv = (PGF, Map.Map Language Morpho) + +-- this list must no more be kept sorted by the command name +allCommands :: Encoding -> PGFEnv -> Map.Map String CommandInfo +allCommands cod env@(pgf, mos) = Map.fromList [ + ("!", emptyCommandInfo { + synopsis = "system command: escape to system shell", + syntax = "! SYSTEMCOMMAND", + examples = [ + "! ls *.gf -- list all GF files in the working directory" + ], + needsTypeCheck = False + }), + ("?", emptyCommandInfo { + synopsis = "system pipe: send value from previous command to a system command", + syntax = "? SYSTEMCOMMAND", + examples = [ + "gt | l | ? wc -- generate, linearize, word-count" + ], + needsTypeCheck = False + }), + + ("aw", emptyCommandInfo { + longname = "align_words", + synopsis = "show word alignments between languages graphically", + explanation = unlines [ + "Prints a set of strings in the .dot format (the graphviz format).", + "The graph can be saved in a file by the wf command as usual.", + "If the -view flag is defined, the graph is saved in a temporary file", + "which is processed by graphviz and displayed by the program indicated", + "by the flag. The target format is postscript, unless overridden by the", + "flag -format." + ], + exec = \opts es -> do + let grph = if null es then [] else graphvizAlignment pgf (head es) + if isFlag "view" opts || isFlag "format" opts then do + let file s = "_grph." ++ s + let view = optViewGraph opts ++ " " + let format = optViewFormat opts + writeFile (file "dot") (enc grph) + system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ + " ; " ++ view ++ file format + return void + else return $ fromString grph, + examples = [ + "gr | aw -- generate a tree and show word alignment as graph script", + "gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac" + ], + options = [ + ], + flags = [ + ("format","format of the visualization file (default \"png\")"), + ("view","program to open the resulting file (default \"open\")") + ] + }), + + ("cc", emptyCommandInfo { + longname = "compute_concrete", + syntax = "cc (-all | -table | -unqual)? TERM", + synopsis = "computes concrete syntax term using a source grammar", + explanation = unlines [ + "Compute TERM by concrete syntax definitions. Uses the topmost", + "module (the last one imported) to resolve constant names.", + "N.B.1 You need the flag -retain when importing the grammar, if you want", + "the definitions to be retained after compilation.", + "N.B.2 The resulting term is not a tree in the sense of abstract syntax", + "and hence not a valid input to a Tree-expecting command.", + "This command must be a line of its own, and thus cannot be a part", + "of a pipe." + ], + options = [ + ("all","pick all strings (forms and variants) from records and tables"), + ("table","show all strings labelled by parameters"), + ("unqual","hide qualifying module names") + ], + needsTypeCheck = False + }), + ("dc", emptyCommandInfo { + longname = "define_command", + syntax = "dc IDENT COMMANDLINE", + synopsis = "define a command macro", + explanation = unlines [ + "Defines IDENT as macro for COMMANDLINE, until IDENT gets redefined.", + "A call of the command has the form %IDENT. The command may take an", + "argument, which in COMMANDLINE is marked as ?0. Both strings and", + "trees can be arguments. Currently at most one argument is possible.", + "This command must be a line of its own, and thus cannot be a part", + "of a pipe." + ], + needsTypeCheck = False + }), + ("dt", emptyCommandInfo { + longname = "define_tree", + syntax = "dt IDENT (TREE | STRING | \"<\" COMMANDLINE)", + synopsis = "define a tree or string macro", + explanation = unlines [ + "Defines IDENT as macro for TREE or STRING, until IDENT gets redefined.", + "The defining value can also come from a command, preceded by \"<\".", + "If the command gives many values, the first one is selected.", + "A use of the macro has the form %IDENT. Currently this use cannot be", + "a subtree of another tree. This command must be a line of its own", + "and thus cannot be a part of a pipe." + ], + examples = [ + ("dt ex \"hello world\" -- define ex as string"), + ("dt ex UseN man_N -- define ex as string"), + ("dt ex < p -cat=NP \"the man in the car\" -- define ex as parse result"), + ("l -lang=LangSwe %ex | ps -to_utf8 -- linearize the tree ex") + ], + needsTypeCheck = False + }), + ("e", emptyCommandInfo { + longname = "empty", + synopsis = "empty the environment" + }), + ("gr", emptyCommandInfo { + longname = "generate_random", + synopsis = "generate random trees in the current abstract syntax", + syntax = "gr [-cat=CAT] [-number=INT]", + examples = [ + "gr -- one tree in the startcat of the current grammar", + "gr -cat=NP -number=16 -- 16 trees in the category NP", + "gr -lang=LangHin,LangTha -cat=Cl -- Cl, both in LangHin and LangTha" + ], + explanation = unlines [ + "Generates a list of random trees, by default one tree." +---- "If a tree argument is given, the command completes the Tree with values to", +---- "the metavariables in the tree." + ], + flags = [ + ("cat","generation category"), + ("lang","uses only functions that have linearizations in all these languages"), + ("number","number of trees generated") + ], + exec = \opts _ -> do + let pgfr = optRestricted opts + ts <- generateRandom pgfr (optType opts) + returnFromExprs $ take (optNum opts) ts + }), + ("gt", emptyCommandInfo { + longname = "generate_trees", + synopsis = "generates a list of trees, by default exhaustive", + explanation = unlines [ + "Generates all trees of a given category, with increasing depth.", + "By default, the depth is 4, but this can be changed by a flag." + ---- "If a Tree argument is given, the command completes the Tree with values", + ---- "to the metavariables in the tree." + ], + flags = [ + ("cat","the generation category"), + ("depth","the maximum generation depth"), + ("lang","excludes functions that have no linearization in this language"), + ("number","the number of trees generated") + ], + exec = \opts _ -> do + let pgfr = optRestricted opts + let dp = return $ valIntOpts "depth" 4 opts + let ts = generateAllDepth pgfr (optType opts) dp + returnFromExprs $ take (optNumInf opts) ts + }), + ("h", emptyCommandInfo { + longname = "help", + syntax = "h (-full)? COMMAND?", + synopsis = "get description of a command, or a the full list of commands", + explanation = unlines [ + "Displays information concerning the COMMAND.", + "Without argument, shows the synopsis of all commands." + ], + options = [ + ("changes","give a summary of changes from GF 2.9"), + ("coding","give advice on character encoding"), + ("full","give full information of the commands"), + ("license","show copyright and license information") + ], + exec = \opts ts -> + let + msg = case ts of + _ | isOpt "changes" opts -> changesMsg + _ | isOpt "coding" opts -> codingMsg + _ | isOpt "license" opts -> licenseMsg + [t] -> let co = getCommandOp (showExpr [] t) in + case lookCommand co (allCommands cod env) of ---- new map ??!! + Just info -> commandHelp True (co,info) + _ -> "command not found" + _ -> commandHelpAll cod env opts + in return (fromString msg), + needsTypeCheck = False + }), + ("i", emptyCommandInfo { + longname = "import", + synopsis = "import a grammar from source code or compiled .pgf file", + explanation = unlines [ + "Reads a grammar from File and compiles it into a GF runtime grammar.", + "If a grammar with the same concrete name is already in the state", + "it is overwritten - but only if compilation succeeds.", + "The grammar parser depends on the file name suffix:", + " .gf normal GF source", + " .gfo compiled GF source", + " .pgf precompiled grammar in Portable Grammar Format" + ], + options = [ + -- ["prob", "retain", "gfo", "src", "no-cpu", "cpu", "quiet", "verbose"] + ("retain","retain operations (used for cc command)"), + ("src", "force compilation from source"), + ("v", "be verbose - show intermediate status information") + ], + needsTypeCheck = False + }), + ("l", emptyCommandInfo { + longname = "linearize", + synopsis = "convert an abstract syntax expression to string", + explanation = unlines [ + "Shows the linearization of a Tree by the grammars in scope.", + "The -lang flag can be used to restrict this to fewer languages.", + "A sequence of string operations (see command ps) can be given", + "as options, and works then like a pipe to the ps command, except", + "that it only affect the strings, not e.g. the table labels.", + "These can be given separately to each language with the unlexer flag", + "whose results are prepended to the other lexer flags. The value of the", + "unlexer flag is a space-separated list of comma-separated string operation", + "sequences; see example." + ], + examples = [ + "l -langs=LangSwe,LangNor no_Utt -- linearize tree to LangSwe and LangNor", + "gr -lang=LangHin -cat=Cl | l -table -to_devanagari -to_utf8 -- hindi table", + "l -unlexer=\"LangSwe=to_utf8 LangHin=to_devanagari,to_utf8\" -- different lexers" + ], + exec = \opts -> return . fromStrings . map (optLin opts), + options = [ + ("all","show all forms and variants"), + ("bracket","show tree structure with brackets and paths to nodes"), + ("multi","linearize to all languages (default)"), + ("record","show source-code-like record"), + ("table","show all forms labelled by parameters"), + ("term", "show PGF term"), + ("treebank","show the tree and tag linearizations with language names") + ] ++ stringOpOptions, + flags = [ + ("lang","the languages of linearization (comma-separated, no spaces)"), + ("unlexer","set unlexers separately to each language (space-separated)") + ] + }), + ("ma", emptyCommandInfo { + longname = "morpho_analyse", + synopsis = "print the morphological analyses of all words in the string", + explanation = unlines [ + "Prints all the analyses of space-separated words in the input string,", + "using the morphological analyser of the actual grammar (see command pf)" + ], + exec = \opts -> + return . fromString . unlines . + map prMorphoAnalysis . concatMap (morphos opts) . + concatMap words . toStrings + }), + + ("mq", emptyCommandInfo { + longname = "morpho_quiz", + synopsis = "start a morphology quiz", + exec = \opts _ -> do + let lang = optLang opts + let typ = optType opts + morphologyQuiz cod pgf lang typ + return void, + flags = [ + ("lang","language of the quiz"), + ("cat","category of the quiz"), + ("number","maximum number of questions") + ] + }), + + ("p", emptyCommandInfo { + longname = "parse", + synopsis = "parse a string to abstract syntax expression", + explanation = unlines [ + "Shows all trees returned by parsing a string in the grammars in scope.", + "The -lang flag can be used to restrict this to fewer languages.", + "The default start category can be overridden by the -cat flag.", + "See also the ps command for lexing and character encoding.", + "", + "The -openclass flag is experimental and allows some robustness in ", + "the parser. For example if -openclass=\"A,N,V\" is given, the parser", + "will accept unknown adjectives, nouns and verbs with the resource grammar." + ], + exec = \opts -> returnFromExprs . concatMap (par opts) . toStrings, + flags = [ + ("cat","target category of parsing"), + ("lang","the languages of parsing (comma-separated, no spaces)"), + ("openclass","list of open-class categories for robust parsing") + ] + }), + ("pg", emptyCommandInfo { ----- + longname = "print_grammar", + synopsis = "print the actual grammar with the given printer", + explanation = unlines [ + "Prints the actual grammar, with all involved languages.", + "In some printers, this can be restricted to a subset of languages", + "with the -lang=X,Y flag (comma-separated, no spaces).", + "The -printer=P flag sets the format in which the grammar is printed.", + "N.B.1 Since grammars are compiled when imported, this command", + "generally shows a grammar that looks rather different from the source.", + "N.B.2 This command is slightly obsolete: to produce different formats", + "the batch compiler gfc is recommended, and has many more options." + ], + exec = \opts _ -> prGrammar opts, + flags = [ + --"cat", + ("lang", "select languages for the some options (default all languages)"), + ("printer","select the printing format (see gfc --help)") + ], + options = [ + ("cats", "show just the names of abstract syntax categories"), + ("fullform", "print the fullform lexicon"), + ("missing","show just the names of functions that have no linearization") + ] + }), + ("ph", emptyCommandInfo { + longname = "print_history", + synopsis = "print command history", + explanation = unlines [ + "Prints the commands issued during the GF session.", + "The result is readable by the eh command.", + "The result can be used as a script when starting GF." + ], + examples = [ + "ph | wf -file=foo.gfs -- save the history into a file" + ] + }), + ("ps", emptyCommandInfo { + longname = "put_string", + syntax = "ps OPT? STRING", + synopsis = "return a string, possibly processed with a function", + explanation = unlines [ + "Returns a string obtained from its argument string by applying", + "string processing functions in the order given in the command line", + "option list. Thus 'ps -f -g s' returns g (f s). Typical string processors", + "are lexers and unlexers, but also character encoding conversions are possible.", + "The unlexers preserve the division of their input to lines.", + "To see transliteration tables, use command ut." + ], + examples = [ + "l (EAdd 3 4) | ps -code -- linearize code-like output", + "ps -lexer=code | p -cat=Exp -- parse code-like input", + "gr -cat=QCl | l | ps -bind -- linearization output from LangFin", + "ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal", + "rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8", + "rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration" + ], + exec = \opts -> + let (os,fs) = optsAndFlags opts in + return . fromString . stringOps (envFlag fs) (map prOpt os) . toString, + options = stringOpOptions, + flags = [ + ("env","apply in this environment only") + ] + }), + ("pt", emptyCommandInfo { + longname = "put_tree", + syntax = "ps OPT? TREE", + synopsis = "return a tree, possibly processed with a function", + explanation = unlines [ + "Returns a tree obtained from its argument tree by applying", + "tree processing functions in the order given in the command line", + "option list. Thus 'pt -f -g s' returns g (f s). Typical tree processors", + "are type checking and semantic computation." + ], + examples = [ + "pt -compute (plus one two) -- compute value" + ], + exec = \opts -> + returnFromExprs . takeOptNum opts . treeOps opts, + options = treeOpOptions pgf, + flags = [("number","take at most this many trees")] ++ treeOpFlags pgf + }), + ("q", emptyCommandInfo { + longname = "quit", + synopsis = "exit GF interpreter" + }), + ("rf", emptyCommandInfo { + longname = "read_file", + synopsis = "read string or tree input from a file", + explanation = unlines [ + "Reads input from file. The filename must be in double quotes.", + "The input is interpreted as a string by default, and can hence be", + "piped e.g. to the parse command. The option -tree interprets the", + "input as a tree, which can be given e.g. to the linearize command.", + "The option -lines will result in a list of strings or trees, one by line." + ], + options = [ + ("lines","return the list of lines, instead of the singleton of all contents"), + ("tree","convert strings into trees") + ], + exec = \opts _ -> do + let file = valStrOpts "file" "_gftmp" opts + let exprs [] = ([],empty) + exprs ((n,s):ls) | null s + = exprs ls + exprs ((n,s):ls) = case readExpr s of + Just e -> let (es,err) = exprs ls + in case inferExpr pgf e of + Right (e,t) -> (e:es,err) + Left tcerr -> (es,text "on line" <+> int n <> colon $$ nest 2 (ppTcError tcerr) $$ err) + Nothing -> let (es,err) = exprs ls + in (es,text "on line" <+> int n <> colon <+> text "parse error" $$ err) + returnFromLines ls = case exprs ls of + (es, err) | null es -> return ([], render (err $$ text "no trees found")) + | otherwise -> return (es, render err) + + s <- readFile file + case opts of + _ | isOpt "lines" opts && isOpt "tree" opts -> + returnFromLines (zip [1..] (lines s)) + _ | isOpt "tree" opts -> + returnFromLines [(1,s)] + _ | isOpt "lines" opts -> return (fromStrings $ lines s) + _ -> return (fromString s), + flags = [("file","the input file name")] + }), + ("tq", emptyCommandInfo { + longname = "translation_quiz", + synopsis = "start a translation quiz", + exec = \opts _ -> do + let from = valCIdOpts "from" (optLang opts) opts + let to = valCIdOpts "to" (optLang opts) opts + let typ = optType opts + translationQuiz cod pgf from to typ + return void, + flags = [ + ("from","translate from this language"), + ("to","translate to this language"), + ("cat","translate in this category"), + ("number","the maximum number of questions") + ] + }), + ("se", emptyCommandInfo { + longname = "set_encoding", + synopsis = "set the encoding used in current terminal", + syntax = "se ID", + examples = [ + "se cp1251 -- set encoding to cp1521", + "se utf8 -- set encoding to utf8 (default)" + ], + needsTypeCheck = False + }), + ("sp", emptyCommandInfo { + longname = "system_pipe", + synopsis = "send argument to a system command", + syntax = "sp -command=\"SYSTEMCOMMAND\", alt. ? SYSTEMCOMMAND", + exec = \opts arg -> do + let tmpi = "_tmpi" --- + let tmpo = "_tmpo" + writeFile tmpi $ enc $ toString arg + let syst = optComm opts ++ " " ++ tmpi + system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo + s <- readFile tmpo + return $ fromString s, + flags = [ + ("command","the system command applied to the argument") + ], + examples = [ + "sp -command=\"wc\" \"foo\"", + "gt | l | sp -command=\"grep \\\"who\\\"\" | sp -command=\"wc\"" + ] + }), + ("ut", emptyCommandInfo { + longname = "unicode_table", + synopsis = "show a transliteration table for a unicode character set", + exec = \opts _ -> do + let t = concatMap prOpt (take 1 opts) + let out = maybe "no such transliteration" characterTable $ transliteration t + return $ fromString out, + options = transliterationPrintNames + }), + + ("vd", emptyCommandInfo { + longname = "visualize_dependency", + synopsis = "show word dependency tree graphically", + explanation = unlines [ + "Prints a dependency tree in the .dot format (the graphviz format, default)", + "or the MaltParser/CoNLL format (flag -output=malt for training, malt_input)", + "for unanalysed input.", + "By default, the last argument is the head of every abstract syntax", + "function; moreover, the head depends on the head of the function above.", + "The graph can be saved in a file by the wf command as usual.", + "If the -view flag is defined, the graph is saved in a temporary file", + "which is processed by graphviz and displayed by the program indicated", + "by the flag. The target format is png, unless overridden by the", + "flag -format." + ], + exec = \opts es -> do + let debug = isOpt "v" opts + let file = valStrOpts "file" "" opts + let outp = valStrOpts "output" "dot" opts + mlab <- case file of + "" -> return Nothing + _ -> readFile file >>= return . Just . getDepLabels . lines + let lang = optLang opts + let grphs = unlines $ map (graphvizDependencyTree outp debug mlab Nothing pgf lang) es + if isFlag "view" opts || isFlag "format" opts then do + let file s = "_grphd." ++ s + let view = optViewGraph opts ++ " " + let format = optViewFormat opts + writeFile (file "dot") (enc grphs) + system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ + " ; " ++ view ++ file format + return void + else return $ fromString grphs, + examples = [ + "gr | vd -- generate a tree and show dependency tree in .dot", + "gr | vd -view=open -- generate a tree and display dependency tree on a Mac", + "gr -number=1000 | vd -file=dep.labels -output=malt -- generate training treebank", + "gr -number=100 | vd -file=dep.labels -output=malt_input -- generate test sentences" + ], + options = [ + ("v","show extra information") + ], + flags = [ + ("file","configuration file for labels per fun, format 'fun l1 ... label ... l2'"), + ("format","format of the visualization file (default \"png\")"), + ("output","output format of graph source (default \"dot\")"), + ("view","program to open the resulting file (default \"open\")") + ] + }), + + + ("vp", emptyCommandInfo { + longname = "visualize_parse", + synopsis = "show parse tree graphically", + explanation = unlines [ + "Prints a parse tree the .dot format (the graphviz format).", + "The graph can be saved in a file by the wf command as usual.", + "If the -view flag is defined, the graph is saved in a temporary file", + "which is processed by graphviz and displayed by the program indicated", + "by the flag. The target format is png, unless overridden by the", + "flag -format." + ], + exec = \opts es -> do + let lang = optLang opts + let grph = if null es then [] else graphvizParseTree pgf lang (head es) + if isFlag "view" opts || isFlag "format" opts then do + let file s = "_grph." ++ s + let view = optViewGraph opts ++ " " + let format = optViewFormat opts + writeFile (file "dot") (enc grph) + system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ + " ; " ++ view ++ file format + return void + else return $ fromString grph, + examples = [ + "p \"John walks\" | vp -- generate a tree and show parse tree as .dot script", + "gr | vp -view=\"open\" -- generate a tree and display parse tree on a Mac" + ], + options = [ + ], + flags = [ + ("format","format of the visualization file (default \"png\")"), + ("view","program to open the resulting file (default \"open\")") + ] + }), + + ("vt", emptyCommandInfo { + longname = "visualize_tree", + synopsis = "show a set of trees graphically", + explanation = unlines [ + "Prints a set of trees in the .dot format (the graphviz format).", + "The graph can be saved in a file by the wf command as usual.", + "If the -view flag is defined, the graph is saved in a temporary file", + "which is processed by graphviz and displayed by the program indicated", + "by the flag. The target format is postscript, unless overridden by the", + "flag -format.", + "With option -mk, use for showing library style function names of form 'mkC'." + ], + exec = \opts es -> + if isOpt "mk" opts + then return $ fromString $ unlines $ map (tree2mk pgf) es + else do + let funs = not (isOpt "nofun" opts) + let cats = not (isOpt "nocat" opts) + let grph = unlines (map (graphvizAbstractTree pgf (funs,cats)) es) -- True=digraph + if isFlag "view" opts || isFlag "format" opts then do + let file s = "_grph." ++ s + let view = optViewGraph opts ++ " " + let format = optViewFormat opts + writeFile (file "dot") (enc grph) + system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ + " ; " ++ view ++ file format + return void + else return $ fromString grph, + examples = [ + "p \"hello\" | vt -- parse a string and show trees as graph script", + "p \"hello\" | vt -view=\"open\" -- parse a string and display trees on a Mac" + ], + options = [ + ("mk", "show the tree with function names converted to 'mkC' with value cats C"), + ("nofun","don't show functions but only categories"), + ("nocat","don't show categories but only functions") + ], + flags = [ + ("format","format of the visualization file (default \"png\")"), + ("view","program to open the resulting file (default \"open\")") + ] + }), + ("wf", emptyCommandInfo { + longname = "write_file", + synopsis = "send string or tree to a file", + exec = \opts arg -> do + let file = valStrOpts "file" "_gftmp" opts + if isOpt "append" opts + then appendFile file (enc (toString arg)) + else writeFile file (enc (toString arg)) + return void, + options = [ + ("append","append to file, instead of overwriting it") + ], + flags = [("file","the output filename")] + }), + ("ai", emptyCommandInfo { + longname = "abstract_info", + syntax = "ai IDENTIFIER or ai EXPR", + synopsis = "Provides an information about a function, an expression or a category from the abstract syntax", + explanation = unlines [ + "The command has one argument which is either function, expression or", + "a category defined in the abstract syntax of the current grammar. ", + "If the argument is a function then ?its type is printed out.", + "If it is a category then the category definition is printed.", + "If a whole expression is given it prints the expression with refined", + "metavariables and the type of the expression." + ], + exec = \opts arg -> do + case arg of + [EFun id] -> case Map.lookup id (funs (abstract pgf)) of + Just (ty,_,eqs) -> return $ fromString $ + render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$ + if null eqs + then empty + else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts + in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]) + Nothing -> case Map.lookup id (cats (abstract pgf)) of + Just hyps -> do return $ fromString $ + render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)) $$ + if null (functionsToCat pgf id) + then empty + else space $$ + text "fun" <+> vcat [ppCId fid <+> colon <+> ppType 0 [] ty + | (fid,ty) <- functionsToCat pgf id]) + Nothing -> do putStrLn ("unknown category of function identifier "++show id) + return void + [e] -> case inferExpr pgf e of + Left tcErr -> error $ render (ppTcError tcErr) + Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e) + putStrLn ("Type: "++showType [] ty) + return void + _ -> do putStrLn "a single identifier or expression is expected from the command" + return void, + needsTypeCheck = False + }) + ] + where + enc = encodeUnicode cod + par opts s = case optOpenTypes opts of + [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang] + open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang] + + void = ([],[]) + + optLin opts t = unlines $ + case opts of + _ | isOpt "treebank" opts -> (showCId (abstractName pgf) ++ ": " ++ showExpr [] t) : + [showCId lang ++ ": " ++ linear opts lang t | lang <- optLangs opts] + _ -> [linear opts lang t | lang <- optLangs opts] + + linear :: [Option] -> CId -> Expr -> String + linear opts lang = let unl = unlex opts lang in case opts of + _ | isOpt "all" opts -> allLinearize unl pgf lang + _ | isOpt "table" opts -> tableLinearize unl pgf lang + _ | isOpt "term" opts -> termLinearize pgf lang + _ | isOpt "record" opts -> recordLinearize pgf lang + _ | isOpt "bracket" opts -> markLinearize pgf lang + _ -> unl . linearize pgf lang + + unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- + + getUnlex opts lang = case words (valStrOpts "unlexer" "" opts) of + lexs -> case lookup lang + [(mkCId la,tail le) | lex <- lexs, let (la,le) = span (/='=') lex, not (null le)] of + Just le -> chunks ',' le + _ -> [] + +-- Proposed logic of coding in unlexing: +-- - If lang has no coding flag, or -to_utf8 is not in opts, just opts are used. +-- - If lang has flag coding=utf8, -to_utf8 is ignored. +-- - If lang has coding=other, and -to_utf8 is in opts, from_other is applied first. +-- THIS DOES NOT WORK UNFORTUNATELY - can't use the grammar flag properly + unlexx opts lang = {- trace (unwords optsC) $ -} stringOps Nothing optsC where ---- + optsC = case lookFlag pgf lang "coding" of + Just "utf8" -> filter (/="to_utf8") $ map prOpt opts + Just other | isOpt "to_utf8" opts -> + let cod = ("from_" ++ other) + in cod : filter (/=cod) (map prOpt opts) + _ -> map prOpt opts + + optRestricted opts = + restrictPGF (\f -> and [hasLin pgf la f | la <- optLangs opts]) pgf + + optLangs opts = case valStrOpts "lang" "" opts of + "" -> languages pgf + lang -> map mkCId (chunks ',' lang) + optLang opts = head $ optLangs opts ++ [wildCId] + + optOpenTypes opts = case valStrOpts "openclass" "" opts of + "" -> [] + cats -> mapMaybe readType (chunks ',' cats) + + optType opts = + let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts + in case readType str of + Just ty -> case checkType pgf ty of + Left tcErr -> error $ render (ppTcError tcErr) + Right ty -> ty + Nothing -> error ("Can't parse '"++str++"' as type") + optComm opts = valStrOpts "command" "" opts + optViewFormat opts = valStrOpts "format" "png" opts + optViewGraph opts = valStrOpts "view" "open" opts + optNum opts = valIntOpts "number" 1 opts + optNumInf opts = valIntOpts "number" 1000000000 opts ---- 10^9 + takeOptNum opts = take (optNumInf opts) + + fromExprs es = (es,unlines (map (showExpr []) es)) + fromStrings ss = (map (ELit . LStr) ss, unlines ss) + fromString s = ([ELit (LStr s)], s) + toStrings = map showAsString + toString = unwords . toStrings + + returnFromExprs es = return $ case es of + [] -> ([], "no trees found") + _ -> fromExprs es + + prGrammar opts + | isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf + | isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts + | isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) | + la <- optLangs opts, let cs = missingLins pgf la] + | otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts) + return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf + + morphos opts s = + [morpho [] (\mo -> lookupMorpho mo s) la | la <- optLangs opts] + + morpho z f la = maybe z f $ Map.lookup la mos + + -- ps -f -g s returns g (f s) + stringOps menv opts s = foldr (menvop . app) s (reverse opts) where + app f = maybe id id (stringOp f) + menvop op = maybe op (\ (b,e) -> opInEnv b e op) menv + + envFlag fs = case valStrOpts "env" "global" fs of + "quotes" -> Just ("\"","\"") + _ -> Nothing + + treeOps opts s = foldr app s (reverse opts) where + app (OOpt op) | Just (Left f) <- treeOp pgf op = f + app (OFlag op (VId x)) | Just (Right f) <- treeOp pgf op = f (mkCId x) + app _ = id + + showAsString t = case t of + ELit (LStr s) -> s + _ -> "\n" ++ showExpr [] t --- newline needed in other cases than the first + +stringOpOptions = sort $ [ + ("bind","bind tokens separated by Prelude.BIND, i.e. &+"), + ("chars","lexer that makes every non-space character a token"), + ("from_cp1251","decode from cp1251 (Cyrillic used in Bulgarian resource)"), + ("from_utf8","decode from utf8 (default)"), + ("lextext","text-like lexer"), + ("lexcode","code-like lexer"), + ("lexmixed","mixture of text and code (code between $...$)"), + ("to_cp1251","encode to cp1251 (Cyrillic used in Bulgarian resource)"), + ("to_html","wrap in a html file with linebreaks"), + ("to_utf8","encode to utf8 (default)"), + ("unlextext","text-like unlexer"), + ("unlexcode","code-like unlexer"), + ("unlexmixed","mixture of text and code (code between $...$)"), + ("unchars","unlexer that puts no spaces between tokens"), + ("unwords","unlexer that puts a single space between tokens (default)"), + ("words","lexer that assumes tokens separated by spaces (default)") + ] ++ + concat [ + [("from_" ++ p, "from unicode to GF " ++ n ++ " transliteration"), + ("to_" ++ p, "from GF " ++ n ++ " transliteration to unicode")] | + (p,n) <- transliterationPrintNames] + +treeOpOptions pgf = [(op,expl) | (op,(expl,Left _)) <- allTreeOps pgf] +treeOpFlags pgf = [(op,expl) | (op,(expl,Right _)) <- allTreeOps pgf] + +translationQuiz :: Encoding -> PGF -> Language -> Language -> Type -> IO () +translationQuiz cod pgf ig og typ = do + tts <- translationList pgf ig og typ infinity + mkQuiz cod "Welcome to GF Translation Quiz." tts + +morphologyQuiz :: Encoding -> PGF -> Language -> Type -> IO () +morphologyQuiz cod pgf ig typ = do + tts <- morphologyList pgf ig typ infinity + mkQuiz cod "Welcome to GF Morphology Quiz." tts + +-- | the maximal number of precompiled quiz problems +infinity :: Int +infinity = 256 + +lookFlag :: PGF -> String -> String -> Maybe String +lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag) + +prFullFormLexicon :: Morpho -> String +prFullFormLexicon mo = + unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- fullFormLexicon mo] + +prMorphoAnalysis :: [(Lemma,Analysis)] -> String +prMorphoAnalysis lps = unlines [showCId l ++ " " ++ p | (l,p) <- lps] + diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs new file mode 100644 index 000000000..06deab6c6 --- /dev/null +++ b/src/compiler/GF/Command/Importing.hs @@ -0,0 +1,50 @@ +module GF.Command.Importing (importGrammar, importSource) where + +import PGF +import PGF.Data + +import GF.Compile +import GF.Grammar.Grammar (SourceGrammar) -- for cc command +import GF.Grammar.CF +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Data.ErrM + +import Data.List (nubBy) +import System.FilePath + +-- import a grammar in an environment where it extends an existing grammar +importGrammar :: PGF -> Options -> [FilePath] -> IO PGF +importGrammar pgf0 _ [] = return pgf0 +importGrammar pgf0 opts files = + case takeExtensions (last files) of + ".cf" -> do + s <- fmap unlines $ mapM readFile files + let cnc = justModuleName (last files) + gf <- case getCF cnc s of + Ok g -> return g + Bad s -> error s ---- + Ok gr <- appIOE $ compileSourceGrammar opts gf + epgf <- appIOE $ link opts (cnc ++ "Abs") gr + case epgf of + Ok pgf -> return pgf + Bad s -> error s ---- + s | elem s [".gf",".gfo"] -> do + res <- appIOE $ compileToPGF opts files + case res of + Ok pgf2 -> do return $ unionPGF pgf0 pgf2 + Bad msg -> do putStrLn ('\n':'\n':msg) + return pgf0 + ".pgf" -> do + pgf2 <- mapM readPGF files >>= return . foldl1 unionPGF + return $ unionPGF pgf0 pgf2 + ext -> die $ "Unknown filename extension: " ++ show ext + +importSource :: SourceGrammar -> Options -> [FilePath] -> IO SourceGrammar +importSource src0 opts files = do + src <- appIOE $ batchCompile opts files + case src of + Ok gr -> return gr + Bad msg -> do + putStrLn msg + return src0 diff --git a/src/compiler/GF/Command/Interpreter.hs b/src/compiler/GF/Command/Interpreter.hs new file mode 100644 index 000000000..ff84da8a3 --- /dev/null +++ b/src/compiler/GF/Command/Interpreter.hs @@ -0,0 +1,132 @@ +module GF.Command.Interpreter ( + CommandEnv (..), + mkCommandEnv, + emptyCommandEnv, + interpretCommandLine, + interpretPipe, + getCommandOp + ) where + +import GF.Command.Commands +import GF.Command.Abstract +import GF.Command.Parse +import PGF +import PGF.Data +import PGF.Morphology +import GF.System.Signal +import GF.Infra.UseIO +import GF.Infra.Option + +import Text.PrettyPrint +import Control.Monad.Error +import qualified Data.Map as Map + +data CommandEnv = CommandEnv { + multigrammar :: PGF, + morphos :: Map.Map Language Morpho, + commands :: Map.Map String CommandInfo, + commandmacros :: Map.Map String CommandLine, + expmacros :: Map.Map String Expr + } + +mkCommandEnv :: Encoding -> PGF -> CommandEnv +mkCommandEnv enc pgf = + let mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] in + CommandEnv pgf mos (allCommands enc (pgf, mos)) Map.empty Map.empty + +emptyCommandEnv :: CommandEnv +emptyCommandEnv = mkCommandEnv UTF_8 emptyPGF + +interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO () +interpretCommandLine enc env line = + case readCommandLine line of + Just [] -> return () + Just pipes -> mapM_ (interpretPipe enc env) pipes + Nothing -> putStrLnFlush "command not parsed" + +interpretPipe enc env cs = do + v@(_,s) <- intercs ([],"") cs + putStrLnFlush $ enc s + return v + where + intercs treess [] = return treess + intercs (trees,_) (c:cs) = do + treess2 <- interc trees c + intercs treess2 cs + interc es comm@(Command co opts arg) = case co of + '%':f -> case Map.lookup f (commandmacros env) of + Just css -> + case getCommandTrees env False arg es of + Right es -> do mapM_ (interpretPipe enc env) (appLine es css) + return ([],[]) + Left msg -> do putStrLn ('\n':msg) + return ([],[]) + Nothing -> do + putStrLn $ "command macro " ++ co ++ " not interpreted" + return ([],[]) + _ -> interpret enc env es comm + appLine es = map (map (appCommand es)) + +-- macro definition applications: replace ?i by (exps !! i) +appCommand :: [Expr] -> Command -> Command +appCommand xs c@(Command i os arg) = case arg of + AExpr e -> Command i os (AExpr (app e)) + _ -> c + where + app e = case e of + EAbs b x e -> EAbs b x (app e) + EApp e1 e2 -> EApp (app e1) (app e2) + ELit l -> ELit l + EMeta i -> xs !! i + EFun x -> EFun x + +-- return the trees to be sent in pipe, and the output possibly printed +interpret :: (String -> String) -> CommandEnv -> [Expr] -> Command -> IO CommandOutput +interpret enc env trees comm = + case getCommand env trees comm of + Left msg -> do putStrLn ('\n':msg) + return ([],[]) + Right (info,opts,trees) -> do tss@(_,s) <- exec info opts trees + if isOpt "tr" opts + then putStrLn (enc s) + else return () + return tss + +-- analyse command parse tree to a uniform datastructure, normalizing comm name +--- the env is needed for macro lookup +getCommand :: CommandEnv -> [Expr] -> Command -> Either String (CommandInfo,[Option],[Expr]) +getCommand env es co@(Command c opts arg) = do + info <- getCommandInfo env c + checkOpts info opts + es <- getCommandTrees env (needsTypeCheck info) arg es + return (info,opts,es) + +getCommandInfo :: CommandEnv -> String -> Either String CommandInfo +getCommandInfo env cmd = + case lookCommand (getCommandOp cmd) (commands env) of + Just info -> return info + Nothing -> fail $ "command " ++ cmd ++ " not interpreted" + +checkOpts :: CommandInfo -> [Option] -> Either String () +checkOpts info opts = + case + [o | OOpt o <- opts, notElem o ("tr" : map fst (options info))] ++ + [o | OFlag o _ <- opts, notElem o (map fst (flags info))] + of + [] -> return () + [o] -> fail $ "option not interpreted: " ++ o + os -> fail $ "options not interpreted: " ++ unwords os + +getCommandTrees :: CommandEnv -> Bool -> Argument -> [Expr] -> Either String [Expr] +getCommandTrees env needsTypeCheck a es = + case a of + AMacro m -> case Map.lookup m (expmacros env) of + Just e -> return [e] + _ -> return [] + AExpr e -> if needsTypeCheck + then case inferExpr (multigrammar env) e of + Left tcErr -> fail $ render (ppTcError tcErr) + Right (e,ty) -> return [e] -- ignore piped + else return [e] + ANoArg -> return es -- use piped + diff --git a/src/compiler/GF/Command/Messages.hs b/src/compiler/GF/Command/Messages.hs new file mode 100644 index 000000000..8dda92d49 --- /dev/null +++ b/src/compiler/GF/Command/Messages.hs @@ -0,0 +1,54 @@ +module GF.Command.Messages where + +licenseMsg = unlines [ + "Copyright (c)", + "Krasimir Angelov, Bj\246rn Bringert, H\229kan Burden, Hans-Joachim Daniels,", + "Markus Forsberg, Thomas Hallgren, Harald Hammarstr\246m, Kristofer Johannisson,", + "Janna Khegai, Peter Ljungl\246f, Petri M\228enp\228\228, and", + "Aarne Ranta, 1998-2008, under GNU General Public License (GPL)", + "see LICENSE in GF distribution, or http://www.gnu.org/licenses/gpl.html." + ] + +codingMsg = unlines [ + "The GF shell uses Unicode internally, but assumes user input to be UTF8", + "and converts terminal and file output to UTF8. If your terminal is not UTF8", + "see 'help set_encoding." + ] + +changesMsg = unlines [ + "While GF 3.0 is backward compatible with source grammars, the shell commands", + "have changed from version 2.9. Below the most importand changes. Bug reports", + "and feature requests should be sent to http://trac.haskell.org/gf/.", + "", + "af use wf -append", + "at not supported", + "eh not yet supported", + "es no longer supported; use javascript generation", + "g not yet supported", + "l now by default multilingual", + "ml not yet supported", + "p now by default multilingual", + "pi not yet supported", + "pl not yet supported", + "pm subsumed to pg", + "po not yet supported", + "pt not yet supported", + "r not yet supported", + "rf changed syntax", + "rl not supported", + "s no longer needed", + "sa not supported", + "sf not supported", + "si not supported", + "so not yet supported", + "t use pipe with l and p", + "tb use l -treebank", + "tl not yet supported", + "tq changed syntax", + "ts not supported", + "tt use ps", + "ut not supported", + "vg not yet supported", + "wf changed syntax", + "wt not supported" + ] diff --git a/src/compiler/GF/Command/Parse.hs b/src/compiler/GF/Command/Parse.hs new file mode 100644 index 000000000..44366c472 --- /dev/null +++ b/src/compiler/GF/Command/Parse.hs @@ -0,0 +1,64 @@ +module GF.Command.Parse(readCommandLine, pCommand) where + +import PGF.CId +import PGF.Expr +import GF.Command.Abstract + +import Data.Char +import Control.Monad +import qualified Text.ParserCombinators.ReadP as RP + +readCommandLine :: String -> Maybe CommandLine +readCommandLine s = case [x | (x,cs) <- RP.readP_to_S pCommandLine s, all isSpace cs] of + [x] -> Just x + _ -> Nothing + +pCommandLine = + (RP.skipSpaces >> RP.char '-' >> RP.char '-' >> RP.skipMany (RP.satisfy (const True)) >> return []) -- comment + RP.<++ + (RP.sepBy (RP.skipSpaces >> pPipe) (RP.skipSpaces >> RP.char ';')) + +pPipe = RP.sepBy1 (RP.skipSpaces >> pCommand) (RP.skipSpaces >> RP.char '|') + +pCommand = (do + cmd <- pIdent RP.<++ (RP.char '%' >> pIdent >>= return . ('%':)) + RP.skipSpaces + opts <- RP.sepBy pOption RP.skipSpaces + arg <- pArgument + return (Command cmd opts arg) + ) + RP.<++ (do + RP.char '?' + c <- pSystemCommand + return (Command "sp" [OFlag "command" (VStr c)] ANoArg) + ) + +pOption = do + RP.char '-' + flg <- pIdent + RP.option (OOpt flg) (fmap (OFlag flg) (RP.char '=' >> pValue)) + +pValue = do + fmap (VInt . read) (RP.munch1 isDigit) + RP.<++ + fmap VStr pStr + RP.<++ + fmap VId pFilename + +pFilename = liftM2 (:) (RP.satisfy isFileFirst) (RP.munch (not . isSpace)) where + isFileFirst c = not (isSpace c) && not (isDigit c) + +pArgument = + RP.option ANoArg + (fmap AExpr pExpr + RP.<++ + (RP.munch isSpace >> RP.char '%' >> fmap AMacro pIdent)) + +pSystemCommand = + RP.munch isSpace >> ( + (RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"'))) + RP.<++ + RP.many RP.get + ) + where + pEsc = RP.char '\\' >> RP.get diff --git a/src/compiler/GF/Command/TreeOperations.hs b/src/compiler/GF/Command/TreeOperations.hs new file mode 100644 index 000000000..941f03782 --- /dev/null +++ b/src/compiler/GF/Command/TreeOperations.hs @@ -0,0 +1,32 @@ +module GF.Command.TreeOperations ( + treeOp, + allTreeOps + ) where + +import PGF +import PGF.Data +import Data.List + +type TreeOp = [Expr] -> [Expr] + +treeOp :: PGF -> String -> Maybe (Either TreeOp (CId -> TreeOp)) +treeOp pgf f = fmap snd $ lookup f $ allTreeOps pgf + +allTreeOps :: PGF -> [(String,(String,Either TreeOp (CId -> TreeOp)))] +allTreeOps pgf = [ + ("compute",("compute by using semantic definitions (def)", + Left $ map (compute pgf))), + ("transfer",("syntactic transfer by applying function and computing", + Right $ \f -> map (compute pgf . EApp (EFun f)))), + ("paraphrase",("paraphrase by using semantic definitions (def)", + Left $ nub . concatMap (paraphrase pgf))), + ("smallest",("sort trees from smallest to largest, in number of nodes", + Left $ smallest)) + ] + +smallest :: [Expr] -> [Expr] +smallest = sortBy (\t u -> compare (size t) (size u)) where + size t = case t of + EAbs _ _ e -> size e + 1 + EApp e1 e2 -> size e1 + size e2 + 1 + _ -> 1 diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs new file mode 100644 index 000000000..e0c60178e --- /dev/null +++ b/src/compiler/GF/Compile.hs @@ -0,0 +1,252 @@ +module GF.Compile (batchCompile, link, compileToPGF, compileSourceGrammar) where + +-- the main compiler passes +import GF.Compile.GetGrammar +import GF.Compile.Rename +import GF.Compile.CheckGrammar +import GF.Compile.Optimize +import GF.Compile.SubExOpt +import GF.Compile.OptimizeGFCC +import GF.Compile.GrammarToGFCC +import GF.Compile.ReadFiles +import GF.Compile.Update +import GF.Compile.Refresh + +import GF.Compile.Coding +import GF.Text.UTF8 ---- + +import GF.Grammar.Grammar +import GF.Grammar.Lookup +import GF.Grammar.Printer +import GF.Grammar.Binary + +import GF.Infra.Ident +import GF.Infra.Option +import GF.Infra.Modules +import GF.Infra.UseIO +import GF.Infra.CheckM + +import GF.Data.Operations + +import Control.Monad +import System.IO +import System.Directory +import System.FilePath +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.List(nub) +import Data.Maybe (isNothing) +import Data.Binary +import Text.PrettyPrint + +import PGF.Check +import PGF.CId +import PGF.Data +import PGF.Macros + + +-- | Compiles a number of source files and builds a 'PGF' structure for them. +compileToPGF :: Options -> [FilePath] -> IOE PGF +compileToPGF opts fs = + do gr <- batchCompile opts fs + let name = justModuleName (last fs) + link opts name gr + +link :: Options -> String -> SourceGrammar -> IOE PGF +link opts cnc gr = do + let isv = (verbAtLeast opts Normal) + gc1 <- putPointE Normal opts "linking ... " $ + let (abs,gc0) = mkCanon2gfcc opts cnc gr + in case checkPGF gc0 of + Ok (gc,b) -> do + case (isv,b) of + (True, True) -> ioeIO $ putStrLn "OK" + (False,True) -> return () + _ -> ioeIO $ putStrLn $ "Corrupted PGF" + return gc + Bad s -> fail s + ioeIO $ buildParser opts $ optimize opts gc1 + +optimize :: Options -> PGF -> PGF +optimize opts = cse . suf + where os = flag optOptimizations opts + cse = if OptCSE `Set.member` os then cseOptimize else id + suf = if OptStem `Set.member` os then suffixOptimize else id + +buildParser :: Options -> PGF -> IO PGF +buildParser opts = + case flag optBuildParser opts of + BuildParser -> addParsers opts + DontBuildParser -> return + BuildParserOnDemand -> return . mapConcretes (\cnc -> cnc { cflags = Map.insert (mkCId "parser") "ondemand" (cflags cnc) }) + +batchCompile :: Options -> [FilePath] -> IOE SourceGrammar +batchCompile opts files = do + (_,gr,_) <- foldM (compileModule opts) emptyCompileEnv files + return gr + +-- to compile a set of modules, e.g. an old GF or a .cf file +compileSourceGrammar :: Options -> SourceGrammar -> IOE SourceGrammar +compileSourceGrammar opts gr@(MGrammar ms) = do + (_,gr',_) <- foldM compOne (0,emptySourceGrammar,Map.empty) ms + return gr' + where + compOne env mo = do + (k,mo') <- compileSourceModule opts env mo + extendCompileEnvInt env k Nothing mo' --- file for the same of modif time... + +-- to output an intermediate stage +intermOut :: Options -> Dump -> Doc -> IOE () +intermOut opts d doc + | dump opts d = ioeIO (hPutStrLn stderr (encodeUTF8 (render (text "\n\n--#" <+> text (show d) $$ doc)))) + | otherwise = return () + +-- | the environment +type CompileEnv = (Int,SourceGrammar,ModEnv) + +-- | compile with one module as starting point +-- command-line options override options (marked by --#) in the file +-- As for path: if it is read from file, the file path is prepended to each name. +-- If from command line, it is used as it is. + +compileModule :: Options -- ^ Options from program command line and shell command. + -> CompileEnv -> FilePath -> IOE CompileEnv +compileModule opts1 env file = do + file <- getRealFile file + opts0 <- getOptionsFromFile file + curr_dir <- return $ dropFileName file + lib_dir <- ioeIO $ getLibraryDirectory (addOptions opts0 opts1) + let opts = addOptions (fixRelativeLibPaths curr_dir lib_dir opts0) opts1 + ps0 <- ioeIO $ extendPathEnv opts + let ps = nub (curr_dir : ps0) + ioeIO $ putIfVerb opts $ "module search path:" +++ show ps ---- + let (_,sgr,rfs) = env + files <- getAllFiles opts ps rfs file + ioeIO $ putIfVerb opts $ "files to read:" +++ show files ---- + let names = map justModuleName files + ioeIO $ putIfVerb opts $ "modules to include:" +++ show names ---- + foldM (compileOne opts) (0,sgr,rfs) files + where + getRealFile file = do + exists <- ioeIO $ doesFileExist file + if exists + then return file + else if isRelative file + then do lib_dir <- ioeIO $ getLibraryDirectory opts1 + let file1 = lib_dir </> file + exists <- ioeIO $ doesFileExist file1 + if exists + then return file1 + else ioeErr $ Bad (render (text "None of this files exist:" $$ nest 2 (text file $$ text file1))) + else ioeErr $ Bad (render (text "File" <+> text file <+> text "does not exist.")) + +compileOne :: Options -> CompileEnv -> FullPath -> IOE CompileEnv +compileOne opts env@(_,srcgr,_) file = do + + let putpOpt v m act + | verbAtLeast opts Verbose = putPointE Normal opts v act + | verbAtLeast opts Normal = ioeIO (putStrFlush m) >> act + | otherwise = putPointE Verbose opts v act + + let gf = takeExtensions file + let path = dropFileName file + let name = dropExtension file + + case gf of + + -- for compiled gf, read the file and update environment + -- also undo common subexp optimization, to enable normal computations + ".gfo" -> do + sm00 <- putPointE Normal opts ("+ reading" +++ file) $ ioeIO (decodeFile file) + let sm0 = addOptionsToModule opts sm00 + + intermOut opts DumpSource (ppModule Qualified sm0) + + let sm1 = unsubexpModule sm0 + sm <- {- putPointE Normal opts "creating indirections" $ -} ioeErr $ extendModule srcgr sm1 + + extendCompileEnv env file sm + + -- for gf source, do full compilation and generate code + _ -> do + + let gfo = gf2gfo opts file + b1 <- ioeIO $ doesFileExist file + if not b1 + then compileOne opts env $ gfo + else do + + sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $ + getSourceModule opts file + let sm0 = decodeStringsInModule sm00 + + intermOut opts DumpSource (ppModule Qualified sm0) + + (k',sm) <- compileSourceModule opts env sm0 + putPointE Verbose opts " generating code... " $ generateModuleCode opts gfo sm + extendCompileEnvInt env k' (Just gfo) sm + where + isConcr (_,m) = isModCnc m && mstatus m /= MSIncomplete + +compileSourceModule :: Options -> CompileEnv -> SourceModule -> IOE (Int,SourceModule) +compileSourceModule opts env@(k,gr,_) mo@(i,mi) = do + + let puts = putPointE Quiet opts + putpp = putPointE Verbose opts + + mo1 <- ioeErr $ rebuildModule gr mo + intermOut opts DumpRebuild (ppModule Qualified mo1) + + mo1b <- ioeErr $ extendModule gr mo1 + intermOut opts DumpExtend (ppModule Qualified mo1b) + + case mo1b of + (_,n) | not (isCompleteModule n) -> do + return (k,mo1b) -- refresh would fail, since not renamed + _ -> do + let mos = modules gr + + (mo2,warnings) <- putpp " renaming " $ ioeErr $ runCheck (renameModule mos mo1b) + if null warnings then return () else puts warnings $ return () + intermOut opts DumpRename (ppModule Qualified mo2) + + (mo3,warnings) <- putpp " type checking" $ ioeErr $ runCheck (checkModule mos mo2) + if null warnings then return () else puts warnings $ return () + intermOut opts DumpTypeCheck (ppModule Qualified mo3) + + (k',mo3r:_) <- putpp " refreshing " $ ioeErr $ refreshModule (k,mos) mo3 + intermOut opts DumpRefresh (ppModule Qualified mo3r) + + mo4 <- putpp " optimizing " $ ioeErr $ optimizeModule opts mos mo3r + intermOut opts DumpOptimize (ppModule Qualified mo4) + + return (k',mo4) + +generateModuleCode :: Options -> FilePath -> SourceModule -> IOE SourceModule +generateModuleCode opts file minfo = do + let minfo1 = subexpModule minfo + minfo2 = case minfo1 of + (m,mi) -> (m,mi{jments=Map.filter (\x -> case x of {AnyInd _ _ -> False; _ -> True}) (jments mi) + , positions=Map.empty}) + putPointE Normal opts (" wrote file" +++ file) $ ioeIO $ encodeFile file minfo2 + return minfo1 + +-- auxiliaries + +reverseModules (MGrammar ms) = MGrammar $ reverse ms + +emptyCompileEnv :: CompileEnv +emptyCompileEnv = (0,emptyMGrammar,Map.empty) + +extendCompileEnvInt (_,MGrammar ss,menv) k mfile sm = do + let (mod,imps) = importsOfModule sm + menv2 <- case mfile of + Just file -> do + t <- ioeIO $ getModificationTime file + return $ Map.insert mod (t,imps) menv + _ -> return menv + return (k,MGrammar (sm:ss),menv2) --- reverse later + +extendCompileEnv e@(k,_,_) file sm = extendCompileEnvInt e k (Just file) sm + + diff --git a/src/compiler/GF/Compile/Abstract/Compute.hs b/src/compiler/GF/Compile/Abstract/Compute.hs new file mode 100644 index 000000000..d5c9a163c --- /dev/null +++ b/src/compiler/GF/Compile/Abstract/Compute.hs @@ -0,0 +1,138 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Compile.Abstract.Compute +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/02 20:50:19 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.8 $ +-- +-- computation in abstract syntax w.r.t. explicit definitions. +-- +-- old GF computation; to be updated +----------------------------------------------------------------------------- + +module GF.Compile.Abstract.Compute (LookDef, + compute, + computeAbsTerm, + computeAbsTermIn, + beta + ) where + +import GF.Data.Operations + +import GF.Grammar +import GF.Grammar.Lookup + +import Debug.Trace +import Data.List(intersperse) +import Control.Monad (liftM, liftM2) +import Text.PrettyPrint + +-- for debugging +tracd m t = t +-- tracd = trace + +compute :: SourceGrammar -> Exp -> Err Exp +compute = computeAbsTerm + +computeAbsTerm :: SourceGrammar -> Exp -> Err Exp +computeAbsTerm gr = computeAbsTermIn (lookupAbsDef gr) [] + +-- | a hack to make compute work on source grammar as well +type LookDef = Ident -> Ident -> Err (Maybe Int,Maybe [Equation]) + +computeAbsTermIn :: LookDef -> [Ident] -> Exp -> Err Exp +computeAbsTermIn lookd xs e = errIn (render (text "computing" <+> ppTerm Unqualified 0 e)) $ compt xs e where + compt vv t = case t of +-- Prod x a b -> liftM2 (Prod x) (compt vv a) (compt (x:vv) b) +-- Abs x b -> liftM (Abs x) (compt (x:vv) b) + _ -> do + let t' = beta vv t + (yy,f,aa) <- termForm t' + let vv' = map snd yy ++ vv + aa' <- mapM (compt vv') aa + case look f of + Just eqs -> tracd (text "\nmatching" <+> ppTerm Unqualified 0 f) $ + case findMatch eqs aa' of + Ok (d,g) -> do + --- let (xs,ts) = unzip g + --- ts' <- alphaFreshAll vv' ts + let g' = g --- zip xs ts' + d' <- compt vv' $ substTerm vv' g' d + tracd (text "by Egs:" <+> ppTerm Unqualified 0 d') $ return $ mkAbs yy $ d' + _ -> tracd (text "no match" <+> ppTerm Unqualified 0 t') $ + do + let v = mkApp f aa' + return $ mkAbs yy $ v + _ -> do + let t2 = mkAbs yy $ mkApp f aa' + tracd (text "not defined" <+> ppTerm Unqualified 0 t2) $ return t2 + + look t = case t of + (Q m f) -> case lookd m f of + Ok (_,md) -> md + _ -> Nothing + _ -> Nothing + +beta :: [Ident] -> Exp -> Exp +beta vv c = case c of + Let (x,(_,a)) b -> beta vv $ substTerm vv [(x,beta vv a)] (beta (x:vv) b) + App f a -> + let (a',f') = (beta vv a, beta vv f) in + case f' of + Abs _ x b -> beta vv $ substTerm vv [(x,a')] (beta (x:vv) b) + _ -> (if a'==a && f'==f then id else beta vv) $ App f' a' + Prod b x a t -> Prod b x (beta vv a) (beta (x:vv) t) + Abs b x t -> Abs b x (beta (x:vv) t) + _ -> c + +-- special version of pattern matching, to deal with comp under lambda + +findMatch :: [([Patt],Term)] -> [Term] -> Err (Term, Substitution) +findMatch cases terms = case cases of + [] -> Bad $ render (text "no applicable case for" <+> hcat (punctuate comma (map (ppTerm Unqualified 0) terms))) + (patts,_):_ | length patts /= length terms -> + Bad (render (text "wrong number of args for patterns :" <+> + hsep (map (ppPatt Unqualified 0) patts) <+> text "cannot take" <+> hsep (map (ppTerm Unqualified 0) terms))) + (patts,val):cc -> case mapM tryMatch (zip patts terms) of + Ok substs -> return (tracd (text "value" <+> ppTerm Unqualified 0 val) val, concat substs) + _ -> findMatch cc terms + +tryMatch :: (Patt, Term) -> Err [(Ident, Term)] +tryMatch (p,t) = do + t' <- termForm t + trym p t' + where + + trym p t' = err (\s -> tracd s (Bad s)) (\t -> tracd (prtm p t) (return t)) $ ---- + case (p,t') of + (PW, _) | notMeta t -> return [] -- optimization with wildcard + (PV x, _) | notMeta t -> return [(x,t)] + (PString s, ([],K i,[])) | s==i -> return [] + (PInt s, ([],EInt i,[])) | s==i -> return [] + (PFloat s,([],EFloat i,[])) | s==i -> return [] --- rounding? + (PP q p pp, ([], QC r f, tt)) | + p `eqStrIdent` f && length pp == length tt -> do + matches <- mapM tryMatch (zip pp tt) + return (concat matches) + (PP q p pp, ([], Q r f, tt)) | + p `eqStrIdent` f && length pp == length tt -> do + matches <- mapM tryMatch (zip pp tt) + return (concat matches) + (PT _ p',_) -> trym p' t' + (PAs x p',_) -> do + subst <- trym p' t' + return $ (x,t) : subst + _ -> Bad (render (text "no match in pattern" <+> ppPatt Unqualified 0 p <+> text "for" <+> ppTerm Unqualified 0 t)) + + notMeta e = case e of + Meta _ -> False + App f a -> notMeta f && notMeta a + Abs _ _ b -> notMeta b + _ -> True + + prtm p g = + ppPatt Unqualified 0 p <+> colon $$ hsep (punctuate semi [ppIdent x <+> char '=' <+> ppTerm Unqualified 0 y | (x,y) <- g]) diff --git a/src/compiler/GF/Compile/Abstract/TC.hs b/src/compiler/GF/Compile/Abstract/TC.hs new file mode 100644 index 000000000..163301838 --- /dev/null +++ b/src/compiler/GF/Compile/Abstract/TC.hs @@ -0,0 +1,294 @@ +---------------------------------------------------------------------- +-- | +-- Module : TC +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/02 20:50:19 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.11 $ +-- +-- Thierry Coquand's type checking algorithm that creates a trace +----------------------------------------------------------------------------- + +module GF.Compile.Abstract.TC (AExp(..), + Theory, + checkExp, + inferExp, + checkBranch, + eqVal, + whnf + ) where + +import GF.Data.Operations +import GF.Grammar +import GF.Grammar.Predef + +import Control.Monad +import Data.List (sortBy) +import Data.Maybe +import Text.PrettyPrint + +data AExp = + AVr Ident Val + | ACn QIdent Val + | AType + | AInt Integer + | AFloat Double + | AStr String + | AMeta MetaId Val + | AApp AExp AExp Val + | AAbs Ident Val AExp + | AProd Ident AExp AExp + | AEqs [([Exp],AExp)] --- not used + | ARecType [ALabelling] + | AR [AAssign] + | AP AExp Label Val + | AData Val + deriving (Eq,Show) + +type ALabelling = (Label, AExp) +type AAssign = (Label, (Val, AExp)) + +type Theory = QIdent -> Err Val + +lookupConst :: Theory -> QIdent -> Err Val +lookupConst th f = th f + +lookupVar :: Env -> Ident -> Err Val +lookupVar g x = maybe (Bad (render (text "unknown variable" <+> ppIdent x))) return $ lookup x ((IW,uVal):g) +-- wild card IW: no error produced, ?0 instead. + +type TCEnv = (Int,Env,Env) + +emptyTCEnv :: TCEnv +emptyTCEnv = (0,[],[]) + +whnf :: Val -> Err Val +whnf v = ---- errIn ("whnf" +++ prt v) $ ---- debug + case v of + VApp u w -> do + u' <- whnf u + w' <- whnf w + app u' w' + VClos env e -> eval env e + _ -> return v + +app :: Val -> Val -> Err Val +app u v = case u of + VClos env (Abs _ x e) -> eval ((x,v):env) e + _ -> return $ VApp u v + +eval :: Env -> Exp -> Err Val +eval env e = ---- errIn ("eval" +++ prt e +++ "in" +++ prEnv env) $ + case e of + Vr x -> lookupVar env x + Q m c -> return $ VCn (m,c) + QC m c -> return $ VCn (m,c) ---- == Q ? + Sort c -> return $ VType --- the only sort is Type + App f a -> join $ liftM2 app (eval env f) (eval env a) + RecType xs -> do xs <- mapM (\(l,e) -> eval env e >>= \e -> return (l,e)) xs + return (VRecType xs) + _ -> return $ VClos env e + +eqVal :: Int -> Val -> Val -> Err [(Val,Val)] +eqVal k u1 u2 = ---- errIn (prt u1 +++ "<>" +++ prBracket (show k) +++ prt u2) $ + do + w1 <- whnf u1 + w2 <- whnf u2 + let v = VGen k + case (w1,w2) of + (VApp f1 a1, VApp f2 a2) -> liftM2 (++) (eqVal k f1 f2) (eqVal k a1 a2) + (VClos env1 (Abs _ x1 e1), VClos env2 (Abs _ x2 e2)) -> + eqVal (k+1) (VClos ((x1,v x1):env1) e1) (VClos ((x2,v x1):env2) e2) + (VClos env1 (Prod _ x1 a1 e1), VClos env2 (Prod _ x2 a2 e2)) -> + liftM2 (++) + (eqVal k (VClos env1 a1) (VClos env2 a2)) + (eqVal (k+1) (VClos ((x1,v x1):env1) e1) (VClos ((x2,v x1):env2) e2)) + (VGen i _, VGen j _) -> return [(w1,w2) | i /= j] + (VCn (_, i), VCn (_,j)) -> return [(w1,w2) | i /= j] + --- thus ignore qualifications; valid because inheritance cannot + --- be qualified. Simplifies annotation. AR 17/3/2005 + _ -> return [(w1,w2) | w1 /= w2] +-- invariant: constraints are in whnf + +checkType :: Theory -> TCEnv -> Exp -> Err (AExp,[(Val,Val)]) +checkType th tenv e = checkExp th tenv e vType + +checkExp :: Theory -> TCEnv -> Exp -> Val -> Err (AExp, [(Val,Val)]) +checkExp th tenv@(k,rho,gamma) e ty = do + typ <- whnf ty + let v = VGen k + case e of + Meta m -> return $ (AMeta m typ,[]) + + Abs _ x t -> case typ of + VClos env (Prod _ y a b) -> do + a' <- whnf $ VClos env a --- + (t',cs) <- checkExp th + (k+1,(x,v x):rho, (x,a'):gamma) t (VClos ((y,v x):env) b) + return (AAbs x a' t', cs) + _ -> Bad (render (text "function type expected for" <+> ppTerm Unqualified 0 e <+> text "instead of" <+> ppValue Unqualified 0 typ)) + + Prod _ x a b -> do + testErr (typ == vType) "expected Type" + (a',csa) <- checkType th tenv a + (b',csb) <- checkType th (k+1, (x,v x):rho, (x,VClos rho a):gamma) b + return (AProd x a' b', csa ++ csb) + + R xs -> + case typ of + VRecType ys -> do case [l | (l,_) <- ys, isNothing (lookup l xs)] of + [] -> return () + ls -> fail (render (text "no value given for label:" <+> fsep (punctuate comma (map ppLabel ls)))) + r <- mapM (checkAssign th tenv ys) xs + let (xs,css) = unzip r + return (AR xs, concat css) + _ -> Bad (render (text "record type expected for" <+> ppTerm Unqualified 0 e <+> text "instead of" <+> ppValue Unqualified 0 typ)) + + P r l -> do (r',cs) <- checkExp th tenv r (VRecType [(l,typ)]) + return (AP r' l typ,cs) + + _ -> checkInferExp th tenv e typ + +checkInferExp :: Theory -> TCEnv -> Exp -> Val -> Err (AExp, [(Val,Val)]) +checkInferExp th tenv@(k,_,_) e typ = do + (e',w,cs1) <- inferExp th tenv e + cs2 <- eqVal k w typ + return (e',cs1 ++ cs2) + +inferExp :: Theory -> TCEnv -> Exp -> Err (AExp, Val, [(Val,Val)]) +inferExp th tenv@(k,rho,gamma) e = case e of + Vr x -> mkAnnot (AVr x) $ noConstr $ lookupVar gamma x + Q m c | m == cPredefAbs && isPredefCat c + -> return (ACn (m,c) vType, vType, []) + | otherwise -> mkAnnot (ACn (m,c)) $ noConstr $ lookupConst th (m,c) + QC m c -> mkAnnot (ACn (m,c)) $ noConstr $ lookupConst th (m,c) ---- + EInt i -> return (AInt i, valAbsInt, []) + EFloat i -> return (AFloat i, valAbsFloat, []) + K i -> return (AStr i, valAbsString, []) + Sort _ -> return (AType, vType, []) + RecType xs -> do r <- mapM (checkLabelling th tenv) xs + let (xs,css) = unzip r + return (ARecType xs, vType, concat css) + App f t -> do + (f',w,csf) <- inferExp th tenv f + typ <- whnf w + case typ of + VClos env (Prod _ x a b) -> do + (a',csa) <- checkExp th tenv t (VClos env a) + b' <- whnf $ VClos ((x,VClos rho t):env) b + return $ (AApp f' a' b', b', csf ++ csa) + _ -> Bad (render (text "Prod expected for function" <+> ppTerm Unqualified 0 f <+> text "instead of" <+> ppValue Unqualified 0 typ)) + _ -> Bad (render (text "cannot infer type of expression" <+> ppTerm Unqualified 0 e)) + +checkLabelling :: Theory -> TCEnv -> Labelling -> Err (ALabelling, [(Val,Val)]) +checkLabelling th tenv (lbl,typ) = do + (atyp,cs) <- checkType th tenv typ + return ((lbl,atyp),cs) + +checkAssign :: Theory -> TCEnv -> [(Label,Val)] -> Assign -> Err (AAssign, [(Val,Val)]) +checkAssign th tenv@(k,rho,gamma) typs (lbl,(Just typ,exp)) = do + (atyp,cs1) <- checkType th tenv typ + val <- eval rho typ + cs2 <- case lookup lbl typs of + Nothing -> return [] + Just val0 -> eqVal k val val0 + (aexp,cs3) <- checkExp th tenv exp val + return ((lbl,(val,aexp)),cs1++cs2++cs3) +checkAssign th tenv@(k,rho,gamma) typs (lbl,(Nothing,exp)) = do + case lookup lbl typs of + Nothing -> do (aexp,val,cs) <- inferExp th tenv exp + return ((lbl,(val,aexp)),cs) + Just val -> do (aexp,cs) <- checkExp th tenv exp val + return ((lbl,(val,aexp)),cs) + +checkBranch :: Theory -> TCEnv -> Equation -> Val -> Err (([Exp],AExp),[(Val,Val)]) +checkBranch th tenv b@(ps,t) ty = errIn ("branch" +++ show b) $ + chB tenv' ps' ty + where + + (ps',_,rho2,k') = ps2ts k ps + tenv' = (k, rho2++rho, gamma) ---- k' ? + (k,rho,gamma) = tenv + + chB tenv@(k,rho,gamma) ps ty = case ps of + p:ps2 -> do + typ <- whnf ty + case typ of + VClos env (Prod _ y a b) -> do + a' <- whnf $ VClos env a + (p', sigma, binds, cs1) <- checkP tenv p y a' + let tenv' = (length binds, sigma ++ rho, binds ++ gamma) + ((ps',exp),cs2) <- chB tenv' ps2 (VClos ((y,p'):env) b) + return ((p:ps',exp), cs1 ++ cs2) -- don't change the patt + _ -> Bad (render (text "Product expected for definiens" <+> ppTerm Unqualified 0 t <+> text "instead of" <+> ppValue Unqualified 0 typ)) + [] -> do + (e,cs) <- checkExp th tenv t ty + return (([],e),cs) + checkP env@(k,rho,gamma) t x a = do + (delta,cs) <- checkPatt th env t a + let sigma = [(x, VGen i x) | ((x,_),i) <- zip delta [k..]] + return (VClos sigma t, sigma, delta, cs) + + ps2ts k = foldr p2t ([],0,[],k) + p2t p (ps,i,g,k) = case p of + PW -> (Meta i : ps, i+1,g,k) + PV x -> (Vr x : ps, i, upd x k g,k+1) + PString s -> (K s : ps, i, g, k) + PInt n -> (EInt n : ps, i, g, k) + PFloat n -> (EFloat n : ps, i, g, k) + PP m c xs -> (mkApp (Q m c) xss : ps, j, g',k') + where (xss,j,g',k') = foldr p2t ([],i,g,k) xs + _ -> error $ render (text "undefined p2t case" <+> ppPatt Unqualified 0 p <+> text "in checkBranch") + + upd x k g = (x, VGen k x) : g --- hack to recognize pattern variables + + +checkPatt :: Theory -> TCEnv -> Exp -> Val -> Err (Binds,[(Val,Val)]) +checkPatt th tenv exp val = do + (aexp,_,cs) <- checkExpP tenv exp val + let binds = extrBinds aexp + return (binds,cs) + where + extrBinds aexp = case aexp of + AVr i v -> [(i,v)] + AApp f a _ -> extrBinds f ++ extrBinds a + _ -> [] -- no other cases are possible + +--- ad hoc, to find types of variables + checkExpP tenv@(k,rho,gamma) exp val = case exp of + Meta m -> return $ (AMeta m val, val, []) + Vr x -> return $ (AVr x val, val, []) + EInt i -> return (AInt i, valAbsInt, []) + EFloat i -> return (AFloat i, valAbsFloat, []) + K s -> return (AStr s, valAbsString, []) + + Q m c -> do + typ <- lookupConst th (m,c) + return $ (ACn (m,c) typ, typ, []) + QC m c -> do + typ <- lookupConst th (m,c) + return $ (ACn (m,c) typ, typ, []) ---- + App f t -> do + (f',w,csf) <- checkExpP tenv f val + typ <- whnf w + case typ of + VClos env (Prod _ x a b) -> do + (a',_,csa) <- checkExpP tenv t (VClos env a) + b' <- whnf $ VClos ((x,VClos rho t):env) b + return $ (AApp f' a' b', b', csf ++ csa) + _ -> Bad (render (text "Prod expected for function" <+> ppTerm Unqualified 0 f <+> text "instead of" <+> ppValue Unqualified 0 typ)) + _ -> Bad (render (text "cannot typecheck pattern" <+> ppTerm Unqualified 0 exp)) + +-- auxiliaries + +noConstr :: Err Val -> Err (Val,[(Val,Val)]) +noConstr er = er >>= (\v -> return (v,[])) + +mkAnnot :: (Val -> AExp) -> Err (Val,[(Val,Val)]) -> Err (AExp,Val,[(Val,Val)]) +mkAnnot a ti = do + (v,cs) <- ti + return (a v, v, cs) + diff --git a/src/compiler/GF/Compile/Abstract/TypeCheck.hs b/src/compiler/GF/Compile/Abstract/TypeCheck.hs new file mode 100644 index 000000000..2632c54dd --- /dev/null +++ b/src/compiler/GF/Compile/Abstract/TypeCheck.hs @@ -0,0 +1,83 @@ +---------------------------------------------------------------------- +-- | +-- Module : TypeCheck +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/15 16:22:02 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.16 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Compile.Abstract.TypeCheck (-- * top-level type checking functions; TC should not be called directly. + checkContext, + checkTyp, + checkDef, + checkConstrs, + ) where + +import GF.Data.Operations + +import GF.Infra.CheckM +import GF.Grammar +import GF.Grammar.Lookup +import GF.Grammar.Unify +import GF.Compile.Refresh +import GF.Compile.Abstract.Compute +import GF.Compile.Abstract.TC + +import Text.PrettyPrint +import Control.Monad (foldM, liftM, liftM2) + +-- | invariant way of creating TCEnv from context +initTCEnv gamma = + (length gamma,[(x,VGen i x) | ((x,_),i) <- zip gamma [0..]], gamma) + +-- interface to TC type checker + +type2val :: Type -> Val +type2val = VClos [] + +cont2exp :: Context -> Exp +cont2exp c = mkProd c eType [] -- to check a context + +cont2val :: Context -> Val +cont2val = type2val . cont2exp + +-- some top-level batch-mode checkers for the compiler + +justTypeCheck :: SourceGrammar -> Exp -> Val -> Err Constraints +justTypeCheck gr e v = do + (_,constrs0) <- checkExp (grammar2theory gr) (initTCEnv []) e v + (constrs1,_) <- unifyVal constrs0 + return $ filter notJustMeta constrs1 + +notJustMeta (c,k) = case (c,k) of + (VClos g1 (Meta m1), VClos g2 (Meta m2)) -> False + _ -> True + +grammar2theory :: SourceGrammar -> Theory +grammar2theory gr (m,f) = case lookupFunType gr m f of + Ok t -> return $ type2val t + Bad s -> case lookupCatContext gr m f of + Ok cont -> return $ cont2val cont + _ -> Bad s + +checkContext :: SourceGrammar -> Context -> [Message] +checkContext st = checkTyp st . cont2exp + +checkTyp :: SourceGrammar -> Type -> [Message] +checkTyp gr typ = err (\x -> [text x]) ppConstrs $ justTypeCheck gr typ vType + +checkDef :: SourceGrammar -> Fun -> Type -> [Equation] -> [Message] +checkDef gr (m,fun) typ eqs = err (\x -> [text x]) ppConstrs $ do + bcs <- mapM (\b -> checkBranch (grammar2theory gr) (initTCEnv []) b (type2val typ)) eqs + let (bs,css) = unzip bcs + (constrs,_) <- unifyVal (concat css) + return $ filter notJustMeta constrs + +checkConstrs :: SourceGrammar -> Cat -> [Ident] -> [String] +checkConstrs gr cat _ = [] ---- check constructors! diff --git a/src/compiler/GF/Compile/CheckGrammar.hs b/src/compiler/GF/Compile/CheckGrammar.hs new file mode 100644 index 000000000..f4765eb26 --- /dev/null +++ b/src/compiler/GF/Compile/CheckGrammar.hs @@ -0,0 +1,284 @@ +---------------------------------------------------------------------- +-- | +-- Module : CheckGrammar +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:33 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.31 $ +-- +-- AR 4\/12\/1999 -- 1\/4\/2000 -- 8\/9\/2001 -- 15\/5\/2002 -- 27\/11\/2002 -- 18\/6\/2003 +-- +-- type checking also does the following modifications: +-- +-- - types of operations and local constants are inferred and put in place +-- +-- - both these types and linearization types are computed +-- +-- - tables are type-annotated +----------------------------------------------------------------------------- + +module GF.Compile.CheckGrammar(checkModule) where + +import GF.Infra.Ident +import GF.Infra.Modules + +import GF.Compile.Abstract.TypeCheck +import GF.Compile.Concrete.TypeCheck + +import GF.Grammar +import GF.Grammar.Lexer +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Grammar.PatternMatch + +import GF.Data.Operations +import GF.Infra.CheckM + +import Data.List +import qualified Data.Set as Set +import Control.Monad +import Text.PrettyPrint + +-- | checking is performed in the dependency order of modules +checkModule :: [SourceModule] -> SourceModule -> Check SourceModule +checkModule ms m@(name,mo) = checkIn (text "checking module" <+> ppIdent name) $ do + checkRestrictedInheritance ms m + m <- case mtype mo of + MTConcrete a -> do let gr = MGrammar (m:ms) + abs <- checkErr $ lookupModule gr a + checkCompleteGrammar gr (a,abs) m + _ -> return m + infos <- checkErr $ topoSortJments m + foldM updateCheckInfo m infos + where + updateCheckInfo (name,mo) (i,info) = do + info <- checkInfo ms (name,mo) i info + return (name,updateModule mo i info) + +-- check if restricted inheritance modules are still coherent +-- i.e. that the defs of remaining names don't depend on omitted names +checkRestrictedInheritance :: [SourceModule] -> SourceModule -> Check () +checkRestrictedInheritance mos (name,mo) = do + let irs = [ii | ii@(_,mi) <- extend mo, mi /= MIAll] -- names with restr. inh. + let mrs = [((i,m),mi) | (i,m) <- mos, Just mi <- [lookup i irs]] + -- the restr. modules themself, with restr. infos + mapM_ checkRem mrs + where + checkRem ((i,m),mi) = do + let (incl,excl) = partition (isInherited mi) (map fst (tree2list (jments m))) + let incld c = Set.member c (Set.fromList incl) + let illegal c = Set.member c (Set.fromList excl) + let illegals = [(f,is) | + (f,cs) <- allDeps, incld f, let is = filter illegal cs, not (null is)] + case illegals of + [] -> return () + cs -> checkError (text "In inherited module" <+> ppIdent i <> text ", dependence of excluded constants:" $$ + nest 2 (vcat [ppIdent f <+> text "on" <+> fsep (map ppIdent is) | (f,is) <- cs])) + allDeps = concatMap (allDependencies (const True) . jments . snd) mos + +checkCompleteGrammar :: SourceGrammar -> SourceModule -> SourceModule -> Check SourceModule +checkCompleteGrammar gr (am,abs) (cm,cnc) = do + let jsa = jments abs + let jsc = jments cnc + + -- check that all concrete constants are in abstract; build types for all lin + jsc <- foldM checkCnc emptyBinTree (tree2list jsc) + + -- check that all abstract constants are in concrete; build default lin and lincats + jsc <- foldM checkAbs jsc (tree2list jsa) + + return (cm,replaceJudgements cnc jsc) + where + checkAbs js i@(c,info) = + case info of + AbsFun (Just ty) _ _ -> do let mb_def = do + let (cxt,(_,i),_) = typeForm ty + info <- lookupIdent i js + info <- case info of + (AnyInd _ m) -> do (m,info) <- lookupOrigInfo gr m i + return info + _ -> return info + case info of + CncCat (Just (RecType [])) _ _ -> return (foldr (\_ -> Abs Explicit identW) (R []) cxt) + _ -> Bad "no def lin" + + case lookupIdent c js of + Ok (AnyInd _ _) -> return js + Ok (CncFun ty (Just def) pn) -> + return $ updateTree (c,CncFun ty (Just def) pn) js + Ok (CncFun ty Nothing pn) -> + case mb_def of + Ok def -> return $ updateTree (c,CncFun ty (Just def) pn) js + Bad _ -> do checkWarn $ text "no linearization of" <+> ppIdent c + return js + _ -> do + case mb_def of + Ok def -> do (cont,val) <- linTypeOfType gr cm ty + let linty = (snd (valCat ty),cont,val) + return $ updateTree (c,CncFun (Just linty) (Just def) Nothing) js + Bad _ -> do checkWarn $ text "no linearization of" <+> ppIdent c + return js + AbsCat (Just _) _ -> case lookupIdent c js of + Ok (AnyInd _ _) -> return js + Ok (CncCat (Just _) _ _) -> return js + Ok (CncCat _ mt mp) -> do + checkWarn $ + text "no linearization type for" <+> ppIdent c <> text ", inserting default {s : Str}" + return $ updateTree (c,CncCat (Just defLinType) mt mp) js + _ -> do + checkWarn $ + text "no linearization type for" <+> ppIdent c <> text ", inserting default {s : Str}" + return $ updateTree (c,CncCat (Just defLinType) Nothing Nothing) js + _ -> return js + + checkCnc js i@(c,info) = + case info of + CncFun _ d pn -> case lookupOrigInfo gr am c of + Ok (_,AbsFun (Just ty) _ _) -> + do (cont,val) <- linTypeOfType gr cm ty + let linty = (snd (valCat ty),cont,val) + return $ updateTree (c,CncFun (Just linty) d pn) js + _ -> do checkWarn $ text "function" <+> ppIdent c <+> text "is not in abstract" + return js + CncCat _ _ _ -> case lookupOrigInfo gr am c of + Ok _ -> return $ updateTree i js + _ -> do checkWarn $ text "category" <+> ppIdent c <+> text "is not in abstract" + return js + _ -> return $ updateTree i js + + +-- | General Principle: only Just-values are checked. +-- A May-value has always been checked in its origin module. +checkInfo :: [SourceModule] -> SourceModule -> Ident -> Info -> Check Info +checkInfo ms (m,mo) c info = do + checkReservedId c + case info of + AbsCat (Just cont) _ -> mkCheck "category" $ + checkContext gr cont + + AbsFun (Just typ0) ma md -> do + typ <- compAbsTyp [] typ0 -- to calculate let definitions + mkCheck "type of function" $ + checkTyp gr typ + case md of + Just eqs -> mkCheck "definition of function" $ + checkDef gr (m,c) typ eqs + Nothing -> return info + return (AbsFun (Just typ) ma md) + + CncFun linty@(Just (cat,cont,val)) (Just trm) mpr -> chIn "linearization of" $ do + (trm',_) <- checkLType gr [] trm (mkFunType (map (\(_,_,ty) -> ty) cont) val) -- erases arg vars + mpr <- checkPrintname gr mpr + return (CncFun linty (Just trm') mpr) + + CncCat (Just typ) mdef mpr -> chIn "linearization type of" $ do + (typ,_) <- checkLType gr [] typ typeType + typ <- computeLType gr [] typ + mdef <- case mdef of + Just def -> do + (def,_) <- checkLType gr [] def (mkFunType [typeStr] typ) + return $ Just def + _ -> return mdef + mpr <- checkPrintname gr mpr + return (CncCat (Just typ) mdef mpr) + + ResOper pty pde -> chIn "operation" $ do + (pty', pde') <- case (pty,pde) of + (Just ty, Just de) -> do + ty' <- checkLType gr [] ty typeType >>= computeLType gr [] . fst + (de',_) <- checkLType gr [] de ty' + return (Just ty', Just de') + (_ , Just de) -> do + (de',ty') <- inferLType gr [] de + return (Just ty', Just de') + (_ , Nothing) -> do + checkError (text "No definition given to the operation") + return (ResOper pty' pde') + + ResOverload os tysts -> chIn "overloading" $ do + tysts' <- mapM (uncurry $ flip (checkLType gr [])) tysts -- return explicit ones + tysts0 <- checkErr $ lookupOverload gr m c -- check against inherited ones too + tysts1 <- mapM (uncurry $ flip (checkLType gr [])) + [(mkFunType args val,tr) | (args,(val,tr)) <- tysts0] + --- this can only be a partial guarantee, since matching + --- with value type is only possible if expected type is given + checkUniq $ + sort [let (xs,t) = typeFormCnc x in t : map (\(b,x,t) -> t) xs | (_,x) <- tysts1] + return (ResOverload os [(y,x) | (x,y) <- tysts']) + + ResParam (Just pcs) _ -> chIn "parameter type" $ do + ts <- checkErr $ liftM concat $ mapM mkPar pcs + return (ResParam (Just pcs) (Just ts)) + + _ -> return info + where + gr = MGrammar ((m,mo) : ms) + chIn cat = checkIn (text "Happened in" <+> text cat <+> ppIdent c <+> ppPosition mo c <> colon) + + mkPar (f,co) = do + vs <- liftM combinations $ mapM (\(_,_,ty) -> allParamValues gr ty) co + return $ map (mkApp (QC m f)) vs + + checkUniq xss = case xss of + x:y:xs + | x == y -> checkError $ text "ambiguous for type" <+> + ppType (mkFunType (tail x) (head x)) + | otherwise -> checkUniq $ y:xs + _ -> return () + + mkCheck cat ss = case ss of + [] -> return info + _ -> checkError (vcat ss $$ text "in" <+> text cat <+> ppIdent c <+> ppPosition mo c) + + compAbsTyp g t = case t of + Vr x -> maybe (checkError (text "no value given to variable" <+> ppIdent x)) return $ lookup x g + Let (x,(_,a)) b -> do + a' <- compAbsTyp g a + compAbsTyp ((x, a'):g) b + Prod b x a t -> do + a' <- compAbsTyp g a + t' <- compAbsTyp ((x,Vr x):g) t + return $ Prod b x a' t' + Abs _ _ _ -> return t + _ -> composOp (compAbsTyp g) t + + +checkPrintname :: SourceGrammar -> Maybe Term -> Check (Maybe Term) +checkPrintname gr (Just t) = do (t,_) <- checkLType gr [] t typeStr + return (Just t) +checkPrintname gr Nothing = return Nothing + +-- | for grammars obtained otherwise than by parsing ---- update!! +checkReservedId :: Ident -> Check () +checkReservedId x + | isReservedWord (ident2bs x) = checkWarn (text "reserved word used as identifier:" <+> ppIdent x) + | otherwise = return () + +-- auxiliaries + +-- | linearization types and defaults +linTypeOfType :: SourceGrammar -> Ident -> Type -> Check (Context,Type) +linTypeOfType cnc m typ = do + let (cont,cat) = typeSkeleton typ + val <- lookLin cat + args <- mapM mkLinArg (zip [0..] cont) + return (args, val) + where + mkLinArg (i,(n,mc@(m,cat))) = do + val <- lookLin mc + let vars = mkRecType varLabel $ replicate n typeStr + symb = argIdent n cat i + rec <- if n==0 then return val else + checkErr $ errIn (render (text "extending" $$ + nest 2 (ppTerm Unqualified 0 vars) $$ + text "with" $$ + nest 2 (ppTerm Unqualified 0 val))) $ + plusRecType vars val + return (Explicit,symb,rec) + lookLin (_,c) = checks [ --- rather: update with defLinType ? + checkErr (lookupLincat cnc m c) >>= computeLType cnc [] + ,return defLinType + ] diff --git a/src/compiler/GF/Compile/Coding.hs b/src/compiler/GF/Compile/Coding.hs new file mode 100644 index 000000000..49538bd35 --- /dev/null +++ b/src/compiler/GF/Compile/Coding.hs @@ -0,0 +1,55 @@ +module GF.Compile.Coding where + +import GF.Grammar.Grammar +import GF.Grammar.Macros +import GF.Text.Coding +import GF.Infra.Modules +import GF.Infra.Option +import GF.Data.Operations + +import Data.Char + +encodeStringsInModule :: SourceModule -> SourceModule +encodeStringsInModule = codeSourceModule (encodeUnicode UTF_8) + +decodeStringsInModule :: SourceModule -> SourceModule +decodeStringsInModule mo = codeSourceModule (decodeUnicode (flag optEncoding (flagsModule mo))) mo + +codeSourceModule :: (String -> String) -> SourceModule -> SourceModule +codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo))) + where + codj (c,info) = case info of + ResOper pty pt -> ResOper (fmap (codeTerm co) pty) (fmap (codeTerm co) pt) + ResOverload es tyts -> ResOverload es [(codeTerm co ty,codeTerm co t) | (ty,t) <- tyts] + CncCat pty pt mpr -> CncCat pty (fmap (codeTerm co) pt) (fmap (codeTerm co) mpr) + CncFun mty pt mpr -> CncFun mty (fmap (codeTerm co) pt) (fmap (codeTerm co) mpr) + _ -> info + +codeTerm :: (String -> String) -> Term -> Term +codeTerm co t = case t of + K s -> K (co s) + T ty cs -> T ty [(codp p,codeTerm co v) | (p,v) <- cs] + EPatt p -> EPatt (codp p) + _ -> composSafeOp (codeTerm co) t + where + codp p = case p of --- really: composOpPatt + PR rs -> PR [(l,codp p) | (l,p) <- rs] + PString s -> PString (co s) + PChars s -> PChars (co s) + PT x p -> PT x (codp p) + PAs x p -> PAs x (codp p) + PNeg p -> PNeg (codp p) + PRep p -> PRep (codp p) + PSeq p q -> PSeq (codp p) (codp q) + PAlt p q -> PAlt (codp p) (codp q) + _ -> p + +-- | Run an encoding function on all string literals within the given string. +codeStringLiterals :: (String -> String) -> String -> String +codeStringLiterals _ [] = [] +codeStringLiterals co ('"':cs) = '"' : inStringLiteral cs + where inStringLiteral [] = error "codeStringLiterals: unterminated string literal" + inStringLiteral ('"':ds) = '"' : codeStringLiterals co ds + inStringLiteral ('\\':d:ds) = '\\' : co [d] ++ inStringLiteral ds + inStringLiteral (d:ds) = co [d] ++ inStringLiteral ds +codeStringLiterals co (c:cs) = c : codeStringLiterals co cs diff --git a/src/compiler/GF/Compile/Concrete/AppPredefined.hs b/src/compiler/GF/Compile/Concrete/AppPredefined.hs new file mode 100644 index 000000000..c05127191 --- /dev/null +++ b/src/compiler/GF/Compile/Concrete/AppPredefined.hs @@ -0,0 +1,158 @@ +---------------------------------------------------------------------- +-- | +-- Module : AppPredefined +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/06 14:21:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.13 $ +-- +-- Predefined function type signatures and definitions. +----------------------------------------------------------------------------- + +module GF.Compile.Concrete.AppPredefined (isInPredefined, typPredefined, appPredefined + ) where + +import GF.Infra.Ident +import GF.Data.Operations +import GF.Grammar.Predef +import GF.Grammar.Grammar +import GF.Grammar.Macros +import GF.Grammar.Printer +import qualified Data.ByteString.Char8 as BS +import Text.PrettyPrint + +-- predefined function type signatures and definitions. AR 12/3/2003. + +isInPredefined :: Ident -> Bool +isInPredefined = err (const True) (const False) . typPredefined + +typPredefined :: Ident -> Err Type +typPredefined f + | f == cInt = return typePType + | f == cFloat = return typePType + | f == cErrorType = return typeType + | f == cInts = return $ mkFunType [typeInt] typePType + | f == cPBool = return typePType + | f == cError = return $ mkFunType [typeStr] typeError -- non-can. of empty set + | f == cPFalse = return $ typePBool + | f == cPTrue = return $ typePBool + | f == cDp = return $ mkFunType [typeInt,typeTok] typeTok + | f == cDrop = return $ mkFunType [typeInt,typeTok] typeTok + | f == cEqInt = return $ mkFunType [typeInt,typeInt] typePBool + | f == cLessInt = return $ mkFunType [typeInt,typeInt] typePBool + | f == cEqStr = return $ mkFunType [typeTok,typeTok] typePBool + | f == cLength = return $ mkFunType [typeTok] typeInt + | f == cOccur = return $ mkFunType [typeTok,typeTok] typePBool + | f == cOccurs = return $ mkFunType [typeTok,typeTok] typePBool + | f == cPlus = return $ mkFunType [typeInt,typeInt] (typeInt) +---- "read" -> (P : Type) -> Tok -> P + | f == cShow = return $ mkProd -- (P : PType) -> P -> Tok + [(Explicit,varP,typePType),(Explicit,identW,Vr varP)] typeStr [] + | f == cToStr = return $ mkProd -- (L : Type) -> L -> Str + [(Explicit,varL,typeType),(Explicit,identW,Vr varL)] typeStr [] + | f == cMapStr = return $ mkProd -- (L : Type) -> (Str -> Str) -> L -> L + [(Explicit,varL,typeType),(Explicit,identW,mkFunType [typeStr] typeStr),(Explicit,identW,Vr varL)] (Vr varL) [] + | f == cTake = return $ mkFunType [typeInt,typeTok] typeTok + | f == cTk = return $ mkFunType [typeInt,typeTok] typeTok + | otherwise = Bad (render (text "unknown in Predef:" <+> ppIdent f)) + +varL :: Ident +varL = identC (BS.pack "L") + +varP :: Ident +varP = identC (BS.pack "P") + +appPredefined :: Term -> Err (Term,Bool) +appPredefined t = case t of + App f x0 -> do + (x,_) <- appPredefined x0 + case f of + -- one-place functions + Q mod f | mod == cPredef -> + case x of + (K s) | f == cLength -> retb $ EInt $ toInteger $ length s + _ -> retb t + + -- two-place functions + App (Q mod f) z0 | mod == cPredef -> do + (z,_) <- appPredefined z0 + case (norm z, norm x) of + (EInt i, K s) | f == cDrop -> retb $ K (drop (fi i) s) + (EInt i, K s) | f == cTake -> retb $ K (take (fi i) s) + (EInt i, K s) | f == cTk -> retb $ K (take (max 0 (length s - fi i)) s) + (EInt i, K s) | f == cDp -> retb $ K (drop (max 0 (length s - fi i)) s) + (K s, K t) | f == cEqStr -> retb $ if s == t then predefTrue else predefFalse + (K s, K t) | f == cOccur -> retb $ if substring s t then predefTrue else predefFalse + (K s, K t) | f == cOccurs -> retb $ if any (flip elem t) s then predefTrue else predefFalse + (EInt i, EInt j) | f == cEqInt -> retb $ if i==j then predefTrue else predefFalse + (EInt i, EInt j) | f == cLessInt -> retb $ if i<j then predefTrue else predefFalse + (EInt i, EInt j) | f == cPlus -> retb $ EInt $ i+j + (_, t) | f == cShow -> retb $ foldr C Empty $ map K $ words $ render (ppTerm Unqualified 0 t) + (_, K s) | f == cRead -> retb $ Cn (identC (BS.pack s)) --- because of K, only works for atomic tags + (_, t) | f == cToStr -> trm2str t >>= retb + _ -> retb t ---- prtBad "cannot compute predefined" t + + -- three-place functions + App (App (Q mod f) z0) y0 | mod == cPredef -> do + (y,_) <- appPredefined y0 + (z,_) <- appPredefined z0 + case (z, y, x) of + (ty,op,t) | f == cMapStr -> retf $ mapStr ty op t + _ -> retb t ---- prtBad "cannot compute predefined" t + + _ -> retb t ---- prtBad "cannot compute predefined" t + _ -> retb t + ---- should really check the absence of arg variables + where + retb t = return (retc t,True) -- no further computing needed + retf t = return (retc t,False) -- must be computed further + retc t = case t of + K [] -> t + K s -> foldr1 C (map K (words s)) + _ -> t + norm t = case t of + Empty -> K [] + C u v -> case (norm u,norm v) of + (K x,K y) -> K (x +++ y) + _ -> t + _ -> t + fi = fromInteger + +-- read makes variables into constants + +predefTrue = QC cPredef cPTrue +predefFalse = QC cPredef cPFalse + +substring :: String -> String -> Bool +substring s t = case (s,t) of + (c:cs, d:ds) -> (c == d && substring cs ds) || substring s ds + ([],_) -> True + _ -> False + +trm2str :: Term -> Err Term +trm2str t = case t of + R ((_,(_,s)):_) -> trm2str s + T _ ((_,s):_) -> trm2str s + V _ (s:_) -> trm2str s + C _ _ -> return $ t + K _ -> return $ t + S c _ -> trm2str c + Empty -> return $ t + _ -> Bad (render (text "cannot get Str from term" <+> ppTerm Unqualified 0 t)) + +-- simultaneous recursion on type and term: type arg is essential! +-- But simplify the task by assuming records are type-annotated +-- (this has been done in type checking) +mapStr :: Type -> Term -> Term -> Term +mapStr ty f t = case (ty,t) of + _ | elem ty [typeStr,typeTok] -> App f t + (_, R ts) -> R [(l,mapField v) | (l,v) <- ts] + (Table a b,T ti cs) -> T ti [(p,mapStr b f v) | (p,v) <- cs] + _ -> t + where + mapField (mty,te) = case mty of + Just ty -> (mty,mapStr ty f te) + _ -> (mty,te) diff --git a/src/compiler/GF/Compile/Concrete/Compute.hs b/src/compiler/GF/Compile/Concrete/Compute.hs new file mode 100644 index 000000000..9c016116b --- /dev/null +++ b/src/compiler/GF/Compile/Concrete/Compute.hs @@ -0,0 +1,456 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Compile.Concrete.Compute +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/01 15:39:12 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.19 $ +-- +-- Computation of source terms. Used in compilation and in @cc@ command. +----------------------------------------------------------------------------- + +module GF.Compile.Concrete.Compute (computeConcrete, computeTerm,computeConcreteRec) where + +import GF.Data.Operations +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Option +import GF.Infra.Modules +import GF.Data.Str +import GF.Grammar.Printer +import GF.Grammar.Predef +import GF.Grammar.Macros +import GF.Grammar.Lookup +import GF.Compile.Refresh +import GF.Grammar.PatternMatch +import GF.Grammar.Lockfield (isLockLabel,unlockRecord) ---- + +import GF.Compile.Concrete.AppPredefined + +import Data.List (nub,intersperse) +import Control.Monad (liftM2, liftM) +import Text.PrettyPrint + +-- | computation of concrete syntax terms into normal form +-- used mainly for partial evaluation +computeConcrete :: SourceGrammar -> Term -> Err Term +computeConcrete g t = {- refreshTerm t >>= -} computeTerm g [] t +computeConcreteRec g t = {- refreshTerm t >>= -} computeTermOpt True g [] t + +computeTerm :: SourceGrammar -> Substitution -> Term -> Err Term +computeTerm = computeTermOpt False + +-- rec=True is used if it cannot be assumed that looked-up constants +-- have already been computed (mainly with -optimize=noexpand in .gfr) + +computeTermOpt :: Bool -> SourceGrammar -> Substitution -> Term -> Err Term +computeTermOpt rec gr = comput True where + + comput full g t = ---- errIn ("subterm" +++ prt t) $ --- for debugging + case t of + + Q p c | p == cPredef -> return t + | otherwise -> look p c + + Vr x -> do + t' <- maybe (Bad (render (text "no value given to variable" <+> ppIdent x))) return $ lookup x g + case t' of + _ | t == t' -> return t + _ -> comp g t' + + -- Abs x@(IA _) b -> do + Abs _ _ _ | full -> do + let (xs,b1) = termFormCnc t + b' <- comp ([(x,Vr x) | (_,x) <- xs] ++ g) b1 + return $ mkAbs xs b' + -- b' <- comp (ext x (Vr x) g) b + -- return $ Abs x b' + Abs _ _ _ -> return t -- hnf + + Let (x,(_,a)) b -> do + a' <- comp g a + comp (ext x a' g) b + + Prod b x a t -> do + a' <- comp g a + t' <- comp (ext x (Vr x) g) t + return $ Prod b x a' t' + + -- beta-convert + App f a -> case appForm t of + (h,as) | length as > 1 -> do + h' <- hnf g h + as' <- mapM (comp g) as + case h' of + _ | not (null [() | FV _ <- as']) -> compApp g (mkApp h' as') + c@(QC _ _) -> do + return $ mkApp c as' + Q mod f | mod == cPredef -> do + (t',b) <- appPredefined (mkApp h' as') + if b then return t' else comp g t' + + Abs _ _ _ -> do + let (xs,b) = termFormCnc h' + let g' = (zip (map snd xs) as') ++ g + let as2 = drop (length xs) as' + let xs2 = drop (length as') xs + b' <- comp g' (mkAbs xs2 b) + if null as2 then return b' else comp g (mkApp b' as2) + + _ -> compApp g (mkApp h' as') + _ -> compApp g t + + P t l | isLockLabel l -> return $ R [] + ---- a workaround 18/2/2005: take this away and find the reason + ---- why earlier compilation destroys the lock field + + + P t l -> do + t' <- comp g t + case t' of + FV rs -> mapM (\c -> comp g (P c l)) rs >>= returnC . variants + R r -> maybe (Bad (render (text "no value for label" <+> ppLabel l))) (comp g . snd) $ + lookup l $ reverse r + + ExtR a (R b) -> + case comp g (P (R b) l) of + Ok v -> return v + _ -> comp g (P a l) + +--- { - --- this is incorrect, since b can contain the proper value + ExtR (R a) b -> -- NOT POSSIBLE both a and b records! + case comp g (P (R a) l) of + Ok v -> return v + _ -> comp g (P b l) +--- - } --- + + S (T i cs) e -> prawitz g i (flip P l) cs e + S (V i cs) e -> prawitzV g i (flip P l) cs e + + _ -> returnC $ P t' l + + S t v -> do + t' <- compTable g t + v' <- comp g v + t1 <- case t' of +---- V (RecType fs) _ -> uncurrySelect g fs t' v' +---- T (TComp (RecType fs)) _ -> uncurrySelect g fs t' v' + _ -> return $ S t' v' + compSelect g t1 + + -- normalize away empty tokens + K "" -> return Empty + + -- glue if you can + Glue x0 y0 -> do + x <- comp g x0 + y <- comp g y0 + case (x,y) of + (FV ks,_) -> do + kys <- mapM (comp g . flip Glue y) ks + return $ variants kys + (_,FV ks) -> do + xks <- mapM (comp g . Glue x) ks + return $ variants xks + + (S (T i cs) e, s) -> prawitz g i (flip Glue s) cs e + (s, S (T i cs) e) -> prawitz g i (Glue s) cs e + (S (V i cs) e, s) -> prawitzV g i (flip Glue s) cs e + (s, S (V i cs) e) -> prawitzV g i (Glue s) cs e + (_,Empty) -> return x + (Empty,_) -> return y + (K a, K b) -> return $ K (a ++ b) + (_, Alts (d,vs)) -> do +---- (K a, Alts (d,vs)) -> do + let glx = Glue x + comp g $ Alts (glx d, [(glx v,c) | (v,c) <- vs]) + (Alts _, ka) -> checks [do + y' <- strsFromTerm ka +---- (Alts _, K a) -> checks [do + x' <- strsFromTerm x -- this may fail when compiling opers + return $ variants [ + foldr1 C (map K (str2strings (glueStr v u))) | v <- x', u <- y'] +---- foldr1 C (map K (str2strings (glueStr v (str a)))) | v <- x'] + ,return $ Glue x y + ] + (C u v,_) -> comp g $ C u (Glue v y) + + _ -> do + mapM_ checkNoArgVars [x,y] + r <- composOp (comp g) t + returnC r + + Alts (d,aa) -> do + d' <- comp g d + aa' <- mapM (compInAlts g) aa + returnC (Alts (d',aa')) + + -- remove empty + C a b -> do + a' <- comp g a + b' <- comp g b + case (a',b') of + (Alts _, K a) -> checks [do + as <- strsFromTerm a' -- this may fail when compiling opers + return $ variants [ + foldr1 C (map K (str2strings (plusStr v (str a)))) | v <- as] + , + return $ C a' b' + ] + (Empty,_) -> returnC b' + (_,Empty) -> returnC a' + _ -> returnC $ C a' b' + + -- reduce free variation as much as you can + FV ts -> mapM (comp g) ts >>= returnC . variants + + -- merge record extensions if you can + ExtR r s -> do + r' <- comp g r + s' <- comp g s + case (r',s') of + (R rs, R ss) -> plusRecord r' s' + (RecType rs, RecType ss) -> plusRecType r' s' + _ -> return $ ExtR r' s' + + ELin c r -> do + r' <- comp g r + unlockRecord c r' + + T _ _ -> compTable g t + V _ _ -> compTable g t + + -- otherwise go ahead + _ -> composOp (comp g) t >>= returnC + + where + + compApp g (App f a) = do + f' <- hnf g f + a' <- comp g a + case (f',a') of + (Abs _ x b, FV as) -> + mapM (\c -> comp (ext x c g) b) as >>= return . variants + (_, FV as) -> mapM (\c -> comp g (App f' c)) as >>= return . variants + (FV fs, _) -> mapM (\c -> comp g (App c a')) fs >>= return . variants + (Abs _ x b,_) -> comp (ext x a' g) b + + (QC _ _,_) -> returnC $ App f' a' + + (S (T i cs) e,_) -> prawitz g i (flip App a') cs e + (S (V i cs) e,_) -> prawitzV g i (flip App a') cs e + + _ -> do + (t',b) <- appPredefined (App f' a') + if b then return t' else comp g t' + + hnf = comput False + comp = comput True + + look p c + | rec = lookupResDef gr p c >>= comp [] + | otherwise = lookupResDef gr p c + + ext x a g = (x,a):g + + returnC = return --- . computed + + variants ts = case nub ts of + [t] -> t + ts -> FV ts + + isCan v = case v of + Con _ -> True + QC _ _ -> True + App f a -> isCan f && isCan a + R rs -> all (isCan . snd . snd) rs + _ -> False + + compPatternMacro p = case p of + PM m c -> case look m c of + Ok (EPatt p') -> compPatternMacro p' + _ -> Bad (render (text "pattern expected as value of" $$ nest 2 (ppPatt Unqualified 0 p))) + PAs x p -> do + p' <- compPatternMacro p + return $ PAs x p' + PAlt p q -> do + p' <- compPatternMacro p + q' <- compPatternMacro q + return $ PAlt p' q' + PSeq p q -> do + p' <- compPatternMacro p + q' <- compPatternMacro q + return $ PSeq p' q' + PRep p -> do + p' <- compPatternMacro p + return $ PRep p' + PNeg p -> do + p' <- compPatternMacro p + return $ PNeg p' + PR rs -> do + rs' <- mapPairsM compPatternMacro rs + return $ PR rs' + + _ -> return p + + compSelect g (S t' v') = case v' of + FV vs -> mapM (\c -> comp g (S t' c)) vs >>= returnC . variants + _ -> case t' of + FV ccs -> mapM (\c -> comp g (S c v')) ccs >>= returnC . variants + + T _ [(PW,c)] -> comp g c --- an optimization + T _ [(PT _ PW,c)] -> comp g c + + T _ [(PV z,c)] -> comp (ext z v' g) c --- another optimization + T _ [(PT _ (PV z),c)] -> comp (ext z v' g) c + + -- course-of-values table: look up by index, no pattern matching needed + + V ptyp ts -> do + vs <- allParamValues gr ptyp + case lookupR v' (zip vs [0 .. length vs - 1]) of + Just i -> comp g $ ts !! i + _ -> return $ S t' v' -- if v' is not canonical + T _ cc -> do + case matchPattern cc v' of + Ok (c,g') -> comp (g' ++ g) c + _ | isCan v' -> Bad (render (text "missing case" <+> ppTerm Unqualified 0 v' <+> text "in" <+> ppTerm Unqualified 0 t)) + _ -> return $ S t' v' -- if v' is not canonical + + S (T i cs) e -> prawitz g i (flip S v') cs e + S (V i cs) e -> prawitzV g i (flip S v') cs e + _ -> returnC $ S t' v' + + --- needed to match records with and without type information + ---- todo: eliminate linear search in a list of records! + lookupR v vs = case v of + R rs -> lookup ([(x,y) | (x,(_,y)) <- rs]) + [([(x,y) | (x,(_,y)) <- rs],v) | (R rs,v) <- vs] + _ -> lookup v vs + + -- case-expand tables + -- if already expanded, don't expand again + compTable g t = case t of + T i@(TComp ty) cs -> do + -- if there are no variables, don't even go inside + cs' <- if (null g) then return cs else mapPairsM (comp g) cs +---- return $ V ty (map snd cs') + return $ T i cs' + V ty cs -> do + ty' <- comp g ty + -- if there are no variables, don't even go inside + cs' <- if (null g) then return cs else mapM (comp g) cs + return $ V ty' cs' + + T i cs -> do + pty0 <- getTableType i + ptyp <- comp g pty0 + case allParamValues gr ptyp of + Ok vs0 -> do + let vs = vs0 ---- [Val v ptyp i | (v,i) <- zip vs0 [0..]] + ps0 <- mapM (compPatternMacro . fst) cs + cs' <- mapM (compBranchOpt g) (zip ps0 (map snd cs)) + sts <- mapM (matchPattern cs') vs + ts <- mapM (\ (c,g') -> comp (g' ++ g) c) sts + ps <- mapM term2patt vs + let ps' = ps --- PT ptyp (head ps) : tail ps +---- return $ V ptyp ts -- to save space, just course of values + return $ T (TComp ptyp) (zip ps' ts) + _ -> do + ps0 <- mapM (compPatternMacro . fst) cs + cs' <- mapM (compBranch g) (zip ps0 (map snd cs)) + +---- cs' <- mapM (compBranch g) cs + return $ T i cs' -- happens with variable types + _ -> comp g t + + compBranch g (p,v) = do + let g' = contP p ++ g + v' <- comp g' v + return (p,v') + + compBranchOpt g c@(p,v) = case contP p of + [] -> return c + _ -> err (const (return c)) return $ compBranch g c + + contP p = case p of + PV x -> [(x,Vr x)] + PC _ ps -> concatMap contP ps + PP _ _ ps -> concatMap contP ps + PT _ p -> contP p + PR rs -> concatMap (contP . snd) rs + + PAs x p -> (x,Vr x) : contP p + + PSeq p q -> concatMap contP [p,q] + PAlt p q -> concatMap contP [p,q] + PRep p -> contP p + PNeg p -> contP p + + _ -> [] + + prawitz g i f cs e = do + cs' <- mapM (compBranch g) [(p, f v) | (p,v) <- cs] + return $ S (T i cs') e + prawitzV g i f cs e = do + cs' <- mapM (comp g) [(f v) | v <- cs] + return $ S (V i cs') e + + compInAlts g (v,c) = do + v' <- comp g v + c' <- comp g c + c2 <- case c' of + EPatt p -> liftM Strs $ getPatts p + _ -> return c' + return (v',c2) + where + getPatts p = case p of + PAlt a b -> liftM2 (++) (getPatts a) (getPatts b) + PString s -> return [K s] + PSeq a b -> do + as <- getPatts a + bs <- getPatts b + return [K (s ++ t) | K s <- as, K t <- bs] + _ -> fail (render (text "not valid pattern in pre expression" <+> ppPatt Unqualified 0 p)) + +{- ---- + uncurrySelect g fs t v = do + ts <- mapM (allParamValues gr . snd) fs + vs <- mapM (comp g) [P v r | r <- map fst fs] + return $ reorderSelect t fs ts vs + + reorderSelect t fs pss vs = case (t,fs,pss,vs) of + (V _ ts, f:fs1, ps:pss1, v:vs1) -> + S (V (snd f) + [reorderSelect (V (RecType fs1) t) fs1 pss1 vs1 | + t <- segments (length ts `div` length ps) ts]) v + (T (TComp _) cs, f:fs1, ps:pss1, v:vs1) -> + S (T (TComp (snd f)) + [(p,reorderSelect (T (TComp (RecType fs1)) c) fs1 pss1 vs1) | + (ep,c) <- zip ps (segments (length cs `div` length ps) cs), + let Ok p = term2patt ep]) v + _ -> t + + segments i xs = + let (x0,xs1) = splitAt i xs in x0 : takeWhile (not . null) (segments i xs1) +-} + + +-- | argument variables cannot be glued +checkNoArgVars :: Term -> Err Term +checkNoArgVars t = case t of + Vr (IA _ _) -> Bad $ glueErrorMsg $ ppTerm Unqualified 0 t + Vr (IAV _ _ _) -> Bad $ glueErrorMsg $ ppTerm Unqualified 0 t + _ -> composOp checkNoArgVars t + +glueErrorMsg s = + render (text "Cannot glue (+) term with run-time variable" <+> s <> char '.' $$ + text "Use Prelude.bind instead.") + +getArgType t = case t of + V ty _ -> return ty + T (TComp ty) _ -> return ty + _ -> Bad (render (text "cannot get argument type of table" $$ nest 2 (ppTerm Unqualified 0 t))) diff --git a/src/compiler/GF/Compile/Concrete/TypeCheck.hs b/src/compiler/GF/Compile/Concrete/TypeCheck.hs new file mode 100644 index 000000000..670f36625 --- /dev/null +++ b/src/compiler/GF/Compile/Concrete/TypeCheck.hs @@ -0,0 +1,690 @@ +{-# LANGUAGE PatternGuards #-} +module GF.Compile.Concrete.TypeCheck( checkLType, inferLType, computeLType, ppType ) where + +import GF.Infra.CheckM +import GF.Infra.Modules +import GF.Data.Operations + +import GF.Grammar +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Grammar.PatternMatch +import GF.Grammar.Lockfield (isLockLabel, lockRecType, unlockRecord) +import GF.Compile.Concrete.AppPredefined + +import Data.List +import Control.Monad +import Text.PrettyPrint + +computeLType :: SourceGrammar -> Context -> Type -> Check Type +computeLType gr g0 t = comp (reverse [(b,x, Vr x) | (b,x,_) <- g0] ++ g0) t + where + comp g ty = case ty of + _ | Just _ <- isTypeInts ty -> return ty ---- shouldn't be needed + | isPredefConstant ty -> return ty ---- shouldn't be needed + + Q m ident -> checkIn (text "module" <+> ppIdent m) $ do + ty' <- checkErr (lookupResDef gr m ident) + if ty' == ty then return ty else comp g ty' --- is this necessary to test? + + Vr ident -> checkLookup ident g -- never needed to compute! + + App f a -> do + f' <- comp g f + a' <- comp g a + case f' of + Abs b x t -> comp ((b,x,a'):g) t + _ -> return $ App f' a' + + Prod bt x a b -> do + a' <- comp g a + b' <- comp ((bt,x,Vr x) : g) b + return $ Prod bt x a' b' + + Abs bt x b -> do + b' <- comp ((bt,x,Vr x):g) b + return $ Abs bt x b' + + ExtR r s -> do + r' <- comp g r + s' <- comp g s + case (r',s') of + (RecType rs, RecType ss) -> checkErr (plusRecType r' s') >>= comp g + _ -> return $ ExtR r' s' + + RecType fs -> do + let fs' = sortRec fs + liftM RecType $ mapPairsM (comp g) fs' + + ELincat c t -> do + t' <- comp g t + checkErr $ lockRecType c t' ---- locking to be removed AR 20/6/2009 + + _ | ty == typeTok -> return typeStr + _ | isPredefConstant ty -> return ty + + _ -> composOp (comp g) ty + +-- the underlying algorithms + +inferLType :: SourceGrammar -> Context -> Term -> Check (Term, Type) +inferLType gr g trm = case trm of + + Q m ident | isPredef m -> termWith trm $ checkErr (typPredefined ident) + + Q m ident -> checks [ + termWith trm $ checkErr (lookupResType gr m ident) >>= computeLType gr g + , + checkErr (lookupResDef gr m ident) >>= inferLType gr g + , + checkError (text "cannot infer type of constant" <+> ppTerm Unqualified 0 trm) + ] + + QC m ident | isPredef m -> termWith trm $ checkErr (typPredefined ident) + + QC m ident -> checks [ + termWith trm $ checkErr (lookupResType gr m ident) >>= computeLType gr g + , + checkErr (lookupResDef gr m ident) >>= inferLType gr g + , + checkError (text "cannot infer type of canonical constant" <+> ppTerm Unqualified 0 trm) + ] + + Vr ident -> termWith trm $ checkLookup ident g + + Typed e t -> do + t' <- computeLType gr g t + checkLType gr g e t' + return (e,t') + + App f a -> do + over <- getOverload gr g Nothing trm + case over of + Just trty -> return trty + _ -> do + (f',fty) <- inferLType gr g f + fty' <- computeLType gr g fty + case fty' of + Prod bt z arg val -> do + a' <- justCheck g a arg + ty <- if isWildIdent z + then return val + else substituteLType [(bt,z,a')] val + return (App f' a',ty) + _ -> checkError (text "A function type is expected for" <+> ppTerm Unqualified 0 f <+> text "instead of type" <+> ppType fty) + + S f x -> do + (f', fty) <- inferLType gr g f + case fty of + Table arg val -> do + x'<- justCheck g x arg + return (S f' x', val) + _ -> checkError (text "table lintype expected for the table in" $$ nest 2 (ppTerm Unqualified 0 trm)) + + P t i -> do + (t',ty) <- inferLType gr g t --- ?? + ty' <- computeLType gr g ty + let tr2 = P t' i + termWith tr2 $ case ty' of + RecType ts -> case lookup i ts of + Nothing -> checkError (text "unknown label" <+> ppLabel i <+> text "in" $$ nest 2 (ppTerm Unqualified 0 ty')) + Just x -> return x + _ -> checkError (text "record type expected for:" <+> ppTerm Unqualified 0 t $$ + text " instead of the inferred:" <+> ppTerm Unqualified 0 ty') + + R r -> do + let (ls,fs) = unzip r + fsts <- mapM inferM fs + let ts = [ty | (Just ty,_) <- fsts] + checkCond (text "cannot infer type of record" $$ nest 2 (ppTerm Unqualified 0 trm)) (length ts == length fsts) + return $ (R (zip ls fsts), RecType (zip ls ts)) + + T (TTyped arg) pts -> do + (_,val) <- checks $ map (inferCase (Just arg)) pts + checkLType gr g trm (Table arg val) + T (TComp arg) pts -> do + (_,val) <- checks $ map (inferCase (Just arg)) pts + checkLType gr g trm (Table arg val) + T ti pts -> do -- tries to guess: good in oper type inference + let pts' = [pt | pt@(p,_) <- pts, isConstPatt p] + case pts' of + [] -> checkError (text "cannot infer table type of" <+> ppTerm Unqualified 0 trm) +---- PInt k : _ -> return $ Ints $ max [i | PInt i <- pts'] + _ -> do + (arg,val) <- checks $ map (inferCase Nothing) pts' + checkLType gr g trm (Table arg val) + V arg pts -> do + (_,val) <- checks $ map (inferLType gr g) pts + return (trm, Table arg val) + + K s -> do + if elem ' ' s + then do + let ss = foldr C Empty (map K (words s)) + ----- removed irritating warning AR 24/5/2008 + ----- checkWarn ("token \"" ++ s ++ + ----- "\" converted to token list" ++ prt ss) + return (ss, typeStr) + else return (trm, typeStr) + + EInt i -> return (trm, typeInt) + + EFloat i -> return (trm, typeFloat) + + Empty -> return (trm, typeStr) + + C s1 s2 -> + check2 (flip (justCheck g) typeStr) C s1 s2 typeStr + + Glue s1 s2 -> + check2 (flip (justCheck g) typeStr) Glue s1 s2 typeStr ---- typeTok + +---- hack from Rename.identRenameTerm, to live with files with naming conflicts 18/6/2007 + Strs (Cn c : ts) | c == cConflict -> do + checkWarn (text "unresolved constant, could be any of" <+> hcat (map (ppTerm Unqualified 0) ts)) + inferLType gr g (head ts) + + Strs ts -> do + ts' <- mapM (\t -> justCheck g t typeStr) ts + return (Strs ts', typeStrs) + + Alts (t,aa) -> do + t' <- justCheck g t typeStr + aa' <- flip mapM aa (\ (c,v) -> do + c' <- justCheck g c typeStr + v' <- checks $ map (justCheck g v) [typeStrs, EPattType typeStr] + return (c',v')) + return (Alts (t',aa'), typeStr) + + RecType r -> do + let (ls,ts) = unzip r + ts' <- mapM (flip (justCheck g) typeType) ts + return (RecType (zip ls ts'), typeType) + + ExtR r s -> do + (r',rT) <- inferLType gr g r + rT' <- computeLType gr g rT + (s',sT) <- inferLType gr g s + sT' <- computeLType gr g sT + + let trm' = ExtR r' s' + ---- trm' <- checkErr $ plusRecord r' s' + case (rT', sT') of + (RecType rs, RecType ss) -> do + rt <- checkErr $ plusRecType rT' sT' + checkLType gr g trm' rt ---- return (trm', rt) + _ | rT' == typeType && sT' == typeType -> return (trm', typeType) + _ -> checkError (text "records or record types expected in" <+> ppTerm Unqualified 0 trm) + + Sort _ -> + termWith trm $ return typeType + + Prod bt x a b -> do + a' <- justCheck g a typeType + b' <- justCheck ((bt,x,a'):g) b typeType + return (Prod bt x a' b', typeType) + + Table p t -> do + p' <- justCheck g p typeType --- check p partype! + t' <- justCheck g t typeType + return $ (Table p' t', typeType) + + FV vs -> do + (_,ty) <- checks $ map (inferLType gr g) vs +--- checkIfComplexVariantType trm ty + checkLType gr g trm ty + + EPattType ty -> do + ty' <- justCheck g ty typeType + return (EPattType ty',typeType) + EPatt p -> do + ty <- inferPatt p + return (trm, EPattType ty) + + ELin c trm -> do + (trm',ty) <- inferLType gr g trm + ty' <- checkErr $ lockRecType c ty ---- lookup c; remove lock AR 20/6/2009 + return $ (ELin c trm', ty') + + _ -> checkError (text "cannot infer lintype of" <+> ppTerm Unqualified 0 trm) + + where + isPredef m = elem m [cPredef,cPredefAbs] + + justCheck g ty te = checkLType gr g ty te >>= return . fst + + -- for record fields, which may be typed + inferM (mty, t) = do + (t', ty') <- case mty of + Just ty -> checkLType gr g ty t + _ -> inferLType gr g t + return (Just ty',t') + + inferCase mty (patt,term) = do + arg <- maybe (inferPatt patt) return mty + cont <- pattContext gr g arg patt + (_,val) <- inferLType gr (reverse cont ++ g) term + return (arg,val) + isConstPatt p = case p of + PC _ ps -> True --- all isConstPatt ps + PP _ _ ps -> True --- all isConstPatt ps + PR ps -> all (isConstPatt . snd) ps + PT _ p -> isConstPatt p + PString _ -> True + PInt _ -> True + PFloat _ -> True + PChar -> True + PChars _ -> True + PSeq p q -> isConstPatt p && isConstPatt q + PAlt p q -> isConstPatt p && isConstPatt q + PRep p -> isConstPatt p + PNeg p -> isConstPatt p + PAs _ p -> isConstPatt p + _ -> False + + inferPatt p = case p of + PP q c ps | q /= cPredef -> checkErr $ liftM valTypeCnc (lookupResType gr q c) + PAs _ p -> inferPatt p + PNeg p -> inferPatt p + PAlt p q -> checks [inferPatt p, inferPatt q] + PSeq _ _ -> return $ typeStr + PRep _ -> return $ typeStr + PChar -> return $ typeStr + PChars _ -> return $ typeStr + _ -> inferLType gr g (patt2term p) >>= return . snd + + +-- type inference: Nothing, type checking: Just t +-- the latter permits matching with value type +getOverload :: SourceGrammar -> Context -> Maybe Type -> Term -> Check (Maybe (Term,Type)) +getOverload gr g mt ot = case appForm ot of + (f@(Q m c), ts) -> case lookupOverload gr m c of + Ok typs -> do + ttys <- mapM (inferLType gr g) ts + v <- matchOverload f typs ttys + return $ Just v + _ -> return Nothing + _ -> return Nothing + where + matchOverload f typs ttys = do + let (tts,tys) = unzip ttys + let vfs = lookupOverloadInstance tys typs + let matches = [vf | vf@((v,_),_) <- vfs, matchVal mt v] + + case ([vf | (vf,True) <- matches],[vf | (vf,False) <- matches]) of + ([(val,fun)],_) -> return (mkApp fun tts, val) + ([],[(val,fun)]) -> do + checkWarn (text "ignoring lock fields in resolving" <+> ppTerm Unqualified 0 ot) + return (mkApp fun tts, val) + ([],[]) -> do + let showTypes ty = hsep (map ppType ty) + checkError $ text "no overload instance of" <+> ppTerm Unqualified 0 f $$ + text "for" $$ + nest 2 (showTypes tys) $$ + text "among" $$ + nest 2 (vcat [showTypes ty | (ty,_) <- typs]) $$ + maybe empty (\x -> text "with value type" <+> ppType x) mt + + (vfs1,vfs2) -> case (noProds vfs1,noProds vfs2) of + ([(val,fun)],_) -> do + return (mkApp fun tts, val) + ([],[(val,fun)]) -> do + checkWarn (text "ignoring lock fields in resolving" <+> ppTerm Unqualified 0 ot) + return (mkApp fun tts, val) + +----- unsafely exclude irritating warning AR 24/5/2008 +----- checkWarn $ "overloading of" +++ prt f +++ +----- "resolved by excluding partial applications:" ++++ +----- unlines [prtType env ty | (ty,_) <- vfs', not (noProd ty)] + + + _ -> checkError $ text "ambiguous overloading of" <+> ppTerm Unqualified 0 f <+> + text "for" <+> hsep (map ppType tys) $$ + text "with alternatives" $$ + nest 2 (vcat [ppType ty | (ty,_) <- if null vfs1 then vfs2 else vfs2]) + + matchVal mt v = elem mt [Nothing,Just v,Just (unlocked v)] + + unlocked v = case v of + RecType fs -> RecType $ filter (not . isLockLabel . fst) fs + _ -> v + ---- TODO: accept subtypes + ---- TODO: use a trie + lookupOverloadInstance tys typs = + [((mkFunType rest val, t),isExact) | + let lt = length tys, + (ty,(val,t)) <- typs, length ty >= lt, + let (pre,rest) = splitAt lt ty, + let isExact = pre == tys, + isExact || map unlocked pre == map unlocked tys + ] + + noProds vfs = [(v,f) | (v,f) <- vfs, noProd v] + + noProd ty = case ty of + Prod _ _ _ _ -> False + _ -> True + +checkLType :: SourceGrammar -> Context -> Term -> Type -> Check (Term, Type) +checkLType gr g trm typ0 = do + + typ <- computeLType gr g typ0 + + case trm of + + Abs bt x c -> do + case typ of + Prod bt' z a b -> do + (c',b') <- if isWildIdent z + then checkLType gr ((bt,x,a):g) c b + else do b' <- checkIn (text "abs") $ substituteLType [(bt',z,Vr x)] b + checkLType gr ((bt,x,a):g) c b' + return $ (Abs bt x c', Prod bt' x a b') + _ -> checkError $ text "function type expected instead of" <+> ppType typ + + App f a -> do + over <- getOverload gr g (Just typ) trm + case over of + Just trty -> return trty + _ -> do + (trm',ty') <- inferLType gr g trm + termWith trm' $ checkEqLType gr g typ ty' trm' + + Q _ _ -> do + over <- getOverload gr g (Just typ) trm + case over of + Just trty -> return trty + _ -> do + (trm',ty') <- inferLType gr g trm + termWith trm' $ checkEqLType gr g typ ty' trm' + + T _ [] -> + checkError (text "found empty table in type" <+> ppTerm Unqualified 0 typ) + T _ cs -> case typ of + Table arg val -> do + case allParamValues gr arg of + Ok vs -> do + let ps0 = map fst cs + ps <- checkErr $ testOvershadow ps0 vs + if null ps + then return () + else checkWarn (text "patterns never reached:" $$ + nest 2 (vcat (map (ppPatt Unqualified 0) ps))) + _ -> return () -- happens with variable types + cs' <- mapM (checkCase arg val) cs + return (T (TTyped arg) cs', typ) + _ -> checkError $ text "table type expected for table instead of" $$ nest 2 (ppType typ) + + R r -> case typ of --- why needed? because inference may be too difficult + RecType rr -> do + let (ls,_) = unzip rr -- labels of expected type + fsts <- mapM (checkM r) rr -- check that they are found in the record + return $ (R fsts, typ) -- normalize record + + _ -> checkError (text "record type expected in type checking instead of" $$ nest 2 (ppTerm Unqualified 0 typ)) + + ExtR r s -> case typ of + _ | typ == typeType -> do + trm' <- computeLType gr g trm + case trm' of + RecType _ -> termWith trm $ return typeType + ExtR (Vr _) (RecType _) -> termWith trm $ return typeType + -- ext t = t ** ... + _ -> checkError (text "invalid record type extension" <+> nest 2 (ppTerm Unqualified 0 trm)) + RecType rr -> do + (r',ty,s') <- checks [ + do (r',ty) <- inferLType gr g r + return (r',ty,s) + , + do (s',ty) <- inferLType gr g s + return (s',ty,r) + ] + case ty of + RecType rr1 -> do + let (rr0,rr2) = recParts rr rr1 + r2 <- justCheck g r' rr0 + s2 <- justCheck g s' rr2 + return $ (ExtR r2 s2, typ) + _ -> checkError (text "record type expected in extension of" <+> ppTerm Unqualified 0 r $$ + text "but found" <+> ppTerm Unqualified 0 ty) + + ExtR ty ex -> do + r' <- justCheck g r ty + s' <- justCheck g s ex + return $ (ExtR r' s', typ) --- is this all? + + _ -> checkError (text "record extension not meaningful for" <+> ppTerm Unqualified 0 typ) + + FV vs -> do + ttys <- mapM (flip (checkLType gr g) typ) vs +--- checkIfComplexVariantType trm typ + return (FV (map fst ttys), typ) --- typ' ? + + S tab arg -> checks [ do + (tab',ty) <- inferLType gr g tab + ty' <- computeLType gr g ty + case ty' of + Table p t -> do + (arg',val) <- checkLType gr g arg p + checkEqLType gr g typ t trm + return (S tab' arg', t) + _ -> checkError (text "table type expected for applied table instead of" <+> ppType ty') + , do + (arg',ty) <- inferLType gr g arg + ty' <- computeLType gr g ty + (tab',_) <- checkLType gr g tab (Table ty' typ) + return (S tab' arg', typ) + ] + Let (x,(mty,def)) body -> case mty of + Just ty -> do + (def',ty') <- checkLType gr g def ty + body' <- justCheck ((Explicit,x,ty'):g) body typ + return (Let (x,(Just ty',def')) body', typ) + _ -> do + (def',ty) <- inferLType gr g def -- tries to infer type of local constant + checkLType gr g (Let (x,(Just ty,def')) body) typ + + ELin c tr -> do + tr1 <- checkErr $ unlockRecord c tr + checkLType gr g tr1 typ + + _ -> do + (trm',ty') <- inferLType gr g trm + termWith trm' $ checkEqLType gr g typ ty' trm' + where + justCheck g ty te = checkLType gr g ty te >>= return . fst + + recParts rr t = (RecType rr1,RecType rr2) where + (rr1,rr2) = partition (flip elem (map fst t) . fst) rr + + checkM rms (l,ty) = case lookup l rms of + Just (Just ty0,t) -> do + checkEqLType gr g ty ty0 t + (t',ty') <- checkLType gr g t ty + return (l,(Just ty',t')) + Just (_,t) -> do + (t',ty') <- checkLType gr g t ty + return (l,(Just ty',t')) + _ -> checkError $ + if isLockLabel l + then let cat = drop 5 (showIdent (label2ident l)) + in ppTerm Unqualified 0 (R rms) <+> text "is not in the lincat of" <+> text cat <> + text "; try wrapping it with lin" <+> text cat + else text "cannot find value for label" <+> ppLabel l <+> text "in" <+> ppTerm Unqualified 0 (R rms) + + checkCase arg val (p,t) = do + cont <- pattContext gr g arg p + t' <- justCheck (reverse cont ++ g) t val + return (p,t') + +pattContext :: SourceGrammar -> Context -> Type -> Patt -> Check Context +pattContext env g typ p = case p of + PV x -> return [(Explicit,x,typ)] + PP q c ps | q /= cPredef -> do ---- why this /=? AR 6/1/2006 + t <- checkErr $ lookupResType env q c + let (cont,v) = typeFormCnc t + checkCond (text "wrong number of arguments for constructor in" <+> ppPatt Unqualified 0 p) + (length cont == length ps) + checkEqLType env g typ v (patt2term p) + mapM (\((_,_,ty),p) -> pattContext env g ty p) (zip cont ps) >>= return . concat + PR r -> do + typ' <- computeLType env g typ + case typ' of + RecType t -> do + let pts = [(ty,tr) | (l,tr) <- r, Just ty <- [lookup l t]] + ----- checkWarn $ prt p ++++ show pts ----- debug + mapM (uncurry (pattContext env g)) pts >>= return . concat + _ -> checkError (text "record type expected for pattern instead of" <+> ppTerm Unqualified 0 typ') + PT t p' -> do + checkEqLType env g typ t (patt2term p') + pattContext env g typ p' + + PAs x p -> do + g' <- pattContext env g typ p + return ((Explicit,x,typ):g') + + PAlt p' q -> do + g1 <- pattContext env g typ p' + g2 <- pattContext env g typ q + let pts = nub ([x | pt@(_,x,_) <- g1, notElem pt g2] ++ [x | pt@(_,x,_) <- g2, notElem pt g1]) + checkCond + (text "incompatible bindings of" <+> + fsep (map ppIdent pts) <+> + text "in pattern alterantives" <+> ppPatt Unqualified 0 p) (null pts) + return g1 -- must be g1 == g2 + PSeq p q -> do + g1 <- pattContext env g typ p + g2 <- pattContext env g typ q + return $ g1 ++ g2 + PRep p' -> noBind typeStr p' + PNeg p' -> noBind typ p' + + _ -> return [] ---- check types! + where + noBind typ p' = do + co <- pattContext env g typ p' + if not (null co) + then checkWarn (text "no variable bound inside pattern" <+> ppPatt Unqualified 0 p) + >> return [] + else return [] + +checkEqLType :: SourceGrammar -> Context -> Type -> Type -> Term -> Check Type +checkEqLType gr g t u trm = do + (b,t',u',s) <- checkIfEqLType gr g t u trm + case b of + True -> return t' + False -> checkError $ text s <+> text "type of" <+> ppTerm Unqualified 0 trm $$ + text "expected:" <+> ppType t $$ + text "inferred:" <+> ppType u + +checkIfEqLType :: SourceGrammar -> Context -> Type -> Type -> Term -> Check (Bool,Type,Type,String) +checkIfEqLType gr g t u trm = do + t' <- computeLType gr g t + u' <- computeLType gr g u + case t' == u' || alpha [] t' u' of + True -> return (True,t',u',[]) + -- forgive missing lock fields by only generating a warning. + --- better: use a flag to forgive? (AR 31/1/2006) + _ -> case missingLock [] t' u' of + Ok lo -> do + checkWarn $ text "missing lock field" <+> fsep (map ppLabel lo) + return (True,t',u',[]) + Bad s -> return (False,t',u',s) + + where + + -- t is a subtype of u + --- quick hack version of TC.eqVal + alpha g t u = case (t,u) of + + -- error (the empty type!) is subtype of any other type + (_,u) | u == typeError -> True + + -- contravariance + (Prod _ x a b, Prod _ y c d) -> alpha g c a && alpha ((x,y):g) b d + + -- record subtyping + (RecType rs, RecType ts) -> all (\ (l,a) -> + any (\ (k,b) -> alpha g a b && l == k) ts) rs + (ExtR r s, ExtR r' s') -> alpha g r r' && alpha g s s' + (ExtR r s, t) -> alpha g r t || alpha g s t + + -- the following say that Ints n is a subset of Int and of Ints m >= n + (t,u) | Just m <- isTypeInts t, Just n <- isTypeInts t -> m >= n + | Just _ <- isTypeInts t, u == typeInt -> True ---- check size! + | t == typeInt, Just _ <- isTypeInts u -> True ---- why this ???? AR 11/12/2005 + + ---- this should be made in Rename + (Q m a, Q n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + || m == n --- for Predef + (QC m a, QC n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + (QC m a, Q n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + (Q m a, QC n b) | a == b -> elem m (allExtendsPlus gr n) + || elem n (allExtendsPlus gr m) + + (Table a b, Table c d) -> alpha g a c && alpha g b d + (Vr x, Vr y) -> x == y || elem (x,y) g || elem (y,x) g + _ -> t == u + --- the following should be one-way coercions only. AR 4/1/2001 + || elem t sTypes && elem u sTypes + || (t == typeType && u == typePType) + || (u == typeType && t == typePType) + + missingLock g t u = case (t,u) of + (RecType rs, RecType ts) -> + let + ls = [l | (l,a) <- rs, + not (any (\ (k,b) -> alpha g a b && l == k) ts)] + (locks,others) = partition isLockLabel ls + in case others of + _:_ -> Bad $ render (text "missing record fields:" <+> fsep (punctuate comma (map ppLabel others))) + _ -> return locks + -- contravariance + (Prod _ x a b, Prod _ y c d) -> do + ls1 <- missingLock g c a + ls2 <- missingLock g b d + return $ ls1 ++ ls2 + + _ -> Bad "" + + sTypes = [typeStr, typeTok, typeString] + +-- auxiliaries + +-- | light-weight substitution for dep. types +substituteLType :: Context -> Type -> Check Type +substituteLType g t = case t of + Vr x -> return $ maybe t id $ lookup x [(x,t) | (_,x,t) <- g] + _ -> composOp (substituteLType g) t + +termWith :: Term -> Check Type -> Check (Term, Type) +termWith t ct = do + ty <- ct + return (t,ty) + +-- | compositional check\/infer of binary operations +check2 :: (Term -> Check Term) -> (Term -> Term -> Term) -> + Term -> Term -> Type -> Check (Term,Type) +check2 chk con a b t = do + a' <- chk a + b' <- chk b + return (con a' b', t) + +-- printing a type with a lock field lock_C as C +ppType :: Type -> Doc +ppType ty = + case ty of + RecType fs -> case filter isLockLabel $ map fst fs of + [lock] -> text (drop 5 (showIdent (label2ident lock))) + _ -> ppTerm Unqualified 0 ty + Prod _ x a b -> ppType a <+> text "->" <+> ppType b + _ -> ppTerm Unqualified 0 ty + +checkLookup :: Ident -> Context -> Check Type +checkLookup x g = + case [ty | (b,y,ty) <- g, x == y] of + [] -> checkError (text "unknown variable" <+> ppIdent x) + (ty:_) -> return ty diff --git a/src/compiler/GF/Compile/Export.hs b/src/compiler/GF/Compile/Export.hs new file mode 100644 index 000000000..d03eb947e --- /dev/null +++ b/src/compiler/GF/Compile/Export.hs @@ -0,0 +1,64 @@ +module GF.Compile.Export where + +import PGF.CId +import PGF.Data (PGF(..)) +import GF.Compile.GFCCtoHaskell +import GF.Compile.GFCCtoProlog +import GF.Compile.GFCCtoJS +import GF.Compile.PGFPretty +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.PGFToCFG +import GF.Speech.SRGS_ABNF +import GF.Speech.SRGS_XML +import GF.Speech.JSGF +import GF.Speech.GSL +import GF.Speech.SRG +import GF.Speech.VoiceXML +import GF.Speech.SLF +import GF.Speech.PrRegExp + +import Data.Maybe +import System.FilePath + +-- top-level access to code generation + +exportPGF :: Options + -> OutputFormat + -> PGF + -> [(FilePath,String)] -- ^ List of recommended file names and contents. +exportPGF opts fmt pgf = + case fmt of + FmtPGFPretty -> multi "txt" prPGFPretty + FmtPMCFGPretty -> single "pmcfg" prPMCFGPretty + FmtJavaScript -> multi "js" pgf2js + FmtHaskell -> multi "hs" (grammar2haskell opts name) + FmtProlog -> multi "pl" grammar2prolog + FmtProlog_Abs -> multi "pl" grammar2prolog_abs + FmtBNF -> single "bnf" bnfPrinter + FmtEBNF -> single "ebnf" (ebnfPrinter opts) + FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts) + FmtSRGS_XML_NonRec -> single "grxml" (srgsXmlNonRecursivePrinter opts) + FmtSRGS_ABNF -> single "gram" (srgsAbnfPrinter opts) + FmtSRGS_ABNF_NonRec -> single "gram" (srgsAbnfNonRecursivePrinter opts) + FmtJSGF -> single "jsgf" (jsgfPrinter opts) + FmtGSL -> single "gsl" (gslPrinter opts) + FmtVoiceXML -> single "vxml" grammar2vxml + FmtSLF -> single "slf" slfPrinter + FmtRegExp -> single "rexp" regexpPrinter + FmtFA -> single "dot" slfGraphvizPrinter + where + name = fromMaybe (showCId (absname pgf)) (flag optName opts) + + multi :: String -> (PGF -> String) -> [(FilePath,String)] + multi ext pr = [(name <.> ext, pr pgf)] + + single :: String -> (PGF -> CId -> String) -> [(FilePath,String)] + single ext pr = [(showCId cnc <.> ext, pr pgf cnc) | cnc <- cncnames pgf] + +-- | Get the name of the concrete syntax to generate output from. +-- FIXME: there should be an option to change this. +outputConcr :: PGF -> CId +outputConcr pgf = case cncnames pgf of + [] -> error "No concrete syntax." + cnc:_ -> cnc diff --git a/src/compiler/GF/Compile/GFCCtoHaskell.hs b/src/compiler/GF/Compile/GFCCtoHaskell.hs new file mode 100644 index 000000000..d44d6705c --- /dev/null +++ b/src/compiler/GF/Compile/GFCCtoHaskell.hs @@ -0,0 +1,230 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCtoHaskell +-- Maintainer : Aarne Ranta +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/06/17 12:39:07 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- to write a GF abstract grammar into a Haskell module with translations from +-- data objects into GF trees. Example: GSyntax for Agda. +-- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004 +----------------------------------------------------------------------------- + +module GF.Compile.GFCCtoHaskell (grammar2haskell) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations +import GF.Infra.Option +import GF.Text.UTF8 + +import Data.List --(isPrefixOf, find, intersperse) +import qualified Data.Map as Map + +type Prefix = String -> String + +-- | the main function +grammar2haskell :: Options + -> String -- ^ Module name. + -> PGF + -> String +grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $ + pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr'] + where gr' = hSkeleton gr + gadt = haskellOption opts HaskellGADT + lexical cat = haskellOption opts HaskellLexical && isLexicalCat opts cat + gId | haskellOption opts HaskellNoPrefix = id + | otherwise = ("G"++) + pragmas | gadt = ["{-# OPTIONS_GHC -fglasgow-exts #-}"] + | otherwise = [] + types | gadt = datatypesGADT gId lexical gr' + | otherwise = datatypes gId lexical gr' + +haskPreamble name = + [ + "module " ++ name ++ " where", + "", + "import PGF", + "----------------------------------------------------", + "-- automatic translation from GF to Haskell", + "----------------------------------------------------", + "", + "class Gf a where", + " gf :: a -> Tree", + " fg :: Tree -> a", + "", + predefInst "GString" "String" "unStr" "mkStr", + "", + predefInst "GInt" "Integer" "unInt" "mkInt", + "", + predefInst "GFloat" "Double" "unDouble" "mkDouble", + "", + "----------------------------------------------------", + "-- below this line machine-generated", + "----------------------------------------------------", + "" + ] + +predefInst gtyp typ destr consr = + "newtype" +++ gtyp +++ "=" +++ gtyp +++ typ +++ " deriving Show" +++++ + "instance Gf" +++ gtyp +++ "where" ++++ + " gf (" ++ gtyp +++ "x) =" +++ consr +++ "x" ++++ + " fg t =" ++++ + " case "++destr++" t of" ++++ + " Just x -> " +++ gtyp +++ "x" ++++ + " Nothing -> error (\"no" +++ gtyp +++ "\" ++ show t)" + +type OIdent = String + +type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] + +datatypes :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +datatypes gId lexical = (foldr (+++++) "") . (filter (/="")) . (map (hDatatype gId lexical)) . snd + +gfinstances :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +gfinstances gId lexical (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance gId lexical m)) g + + +hDatatype :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> String +hDatatype _ _ ("Cn",_) = "" --- +hDatatype _ _ (cat,[]) = "" +hDatatype gId _ (cat,rules) | isListCat (cat,rules) = + "newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]" + +++ "deriving Show" +hDatatype gId lexical (cat,rules) = + "data" +++ gId cat +++ "=" ++ + (if length rules == 1 then "" else "\n ") +++ + foldr1 (\x y -> x ++ "\n |" +++ y) constructors ++++ + " deriving Show" + where + constructors = [gId f +++ foldr (+++) "" (map (gId) xx) | (f,xx) <- nonLexicalRules (lexical cat) rules] + ++ if lexical cat then [lexicalConstructor cat +++ "String"] else [] + +nonLexicalRules :: Bool -> [(OIdent, [OIdent])] -> [(OIdent, [OIdent])] +nonLexicalRules False rules = rules +nonLexicalRules True rules = [r | r@(f,t) <- rules, not (null t)] + +lexicalConstructor :: OIdent -> String +lexicalConstructor cat = "Lex" ++ cat + +-- GADT version of data types +datatypesGADT :: Prefix -> (OIdent -> Bool) -> (String,HSkeleton) -> String +datatypesGADT gId lexical (_,skel) = + unlines (concatMap (hCatTypeGADT gId) skel) + +++++ + "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT gId lexical) skel) + +hCatTypeGADT :: Prefix -> (OIdent, [(OIdent, [OIdent])]) -> [String] +hCatTypeGADT gId (cat,rules) + = ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_", + "data"+++gId cat++"_"] + +hDatatypeGADT :: Prefix -> (OIdent -> Bool) -> (OIdent, [(OIdent, [OIdent])]) -> [String] +hDatatypeGADT gId lexical (cat, rules) + | isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t] + | otherwise = + [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t + | (f,args) <- nonLexicalRules (lexical cat) rules ] + ++ if lexical cat then [lexicalConstructor cat +++ ":: String ->"+++ t] else [] + where t = "Tree" +++ gId cat ++ "_" + +gfInstance :: Prefix -> (OIdent -> Bool) -> String -> (OIdent, [(OIdent, [OIdent])]) -> String +gfInstance gId lexical m crs = hInstance gId lexical m crs ++++ fInstance gId lexical m crs + +----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004 +hInstance _ _ m (cat,[]) = "" +hInstance gId lexical m (cat,rules) + | isListCat (cat,rules) = + "instance Gf" +++ gId cat +++ "where" ++++ + " gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])" + +++ "=" +++ mkRHS ("Base"++ec) baseVars ++++ + " gf (" ++ gId cat +++ "(x:xs)) = " + ++ mkRHS ("Cons"++ec) ["x",prParenth (gId cat+++"xs")] +-- no show for GADTs +-- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)" + | otherwise = + "instance Gf" +++ gId cat +++ "where\n" ++ + unlines ([mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] + ++ if lexical cat then [" gf (" ++ lexicalConstructor cat +++ "x) = mkApp (mkCId x) []"] else []) + where + ec = elemCat cat + baseVars = mkVars (baseSize (cat,rules)) + mkInst f xx = let xx' = mkVars (length xx) in " gf " ++ + (if length xx == 0 then gId f else prParenth (gId f +++ foldr1 (+++) xx')) +++ + "=" +++ mkRHS f xx' + mkVars n = ["x" ++ show i | i <- [1..n]] + mkRHS f vars = "mkApp (mkCId \"" ++ f ++ "\")" +++ + "[" ++ prTList ", " ["gf" +++ x | x <- vars] ++ "]" + + +----fInstance m ("Cn",_) = "" --- +fInstance _ _ m (cat,[]) = "" +fInstance gId lexical m (cat,rules) = + " fg t =" ++++ + " case unApp t of" ++++ + unlines [mkInst f xx | (f,xx) <- nonLexicalRules (lexical cat) rules] ++++ + (if lexical cat then " (i,[]) -> " ++ lexicalConstructor cat +++ "(prCId i)" else "") ++++ + " _ -> error (\"no" +++ cat ++ " \" ++ show t)" + where + mkInst f xx = + " Just (i," ++ + "[" ++ prTList "," xx' ++ "])" +++ + "| i == mkCId \"" ++ f ++ "\" ->" +++ mkRHS f xx' + where xx' = ["x" ++ show i | (_,i) <- zip xx [1..]] + mkRHS f vars + | isListCat (cat,rules) = + if "Base" `isPrefixOf` f then + gId cat +++ "[" ++ prTList ", " [ "fg" +++ x | x <- vars ] ++ "]" + else + let (i,t) = (init vars,last vars) + in "let" +++ gId cat +++ "xs = fg " ++ t +++ "in" +++ + gId cat +++ prParenth (prTList ":" (["fg"+++v | v <- i] ++ ["xs"])) + | otherwise = + gId f +++ + prTList " " [prParenth ("fg" +++ x) | x <- vars] + + +--type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] +hSkeleton :: PGF -> (String,HSkeleton) +hSkeleton gr = + (showCId (absname gr), + [(showCId c, [(showCId f, map showCId cs) | (f, (cs,_)) <- fs]) | + fs@((_, (_,c)):_) <- fns] + ) + where + fns = groupBy valtypg (sortBy valtyps (map jty (Map.assocs (funs (abstract gr))))) + valtyps (_, (_,x)) (_, (_,y)) = compare x y + valtypg (_, (_,x)) (_, (_,y)) = x == y + jty (f,(ty,_,_)) = (f,catSkeleton ty) + +updateSkeleton :: OIdent -> HSkeleton -> (OIdent, [OIdent]) -> HSkeleton +updateSkeleton cat skel rule = + case skel of + (cat0,rules):rr | cat0 == cat -> (cat0, rule:rules) : rr + (cat0,rules):rr -> (cat0, rules) : updateSkeleton cat rr rule + +isListCat :: (OIdent, [(OIdent, [OIdent])]) -> Bool +isListCat (cat,rules) = "List" `isPrefixOf` cat && length rules == 2 + && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs + where c = elemCat cat + fs = map fst rules + +-- | Gets the element category of a list category. +elemCat :: OIdent -> OIdent +elemCat = drop 4 + +isBaseFun :: OIdent -> Bool +isBaseFun f = "Base" `isPrefixOf` f + +isConsFun :: OIdent -> Bool +isConsFun f = "Cons" `isPrefixOf` f + +baseSize :: (OIdent, [(OIdent, [OIdent])]) -> Int +baseSize (_,rules) = length bs + where Just (_,bs) = find (("Base" `isPrefixOf`) . fst) rules diff --git a/src/compiler/GF/Compile/GFCCtoJS.hs b/src/compiler/GF/Compile/GFCCtoJS.hs new file mode 100644 index 000000000..312701e3b --- /dev/null +++ b/src/compiler/GF/Compile/GFCCtoJS.hs @@ -0,0 +1,138 @@ +module GF.Compile.GFCCtoJS (pgf2js) where + +import PGF.CId +import PGF.Data hiding (mkStr) +import qualified PGF.Macros as M +import qualified GF.JavaScript.AbsJS as JS +import qualified GF.JavaScript.PrintJS as JS + +import GF.Text.UTF8 +import GF.Data.ErrM +import GF.Infra.Option + +import Control.Monad (mplus) +import Data.Array.Unboxed (UArray) +import qualified Data.Array.IArray as Array +import Data.Maybe (fromMaybe) +import Data.Map (Map) +import qualified Data.Set as Set +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap + +pgf2js :: PGF -> String +pgf2js pgf = + encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] + where + n = showCId $ absname pgf + as = abstract pgf + cs = Map.assocs (concretes pgf) + start = showCId $ M.lookStartCat pgf + grammar = new "GFGrammar" [js_abstract, js_concrete] + js_abstract = abstract2js start as + js_concrete = JS.EObj $ map (concrete2js start n) cs + +abstract2js :: String -> Abstr -> JS.Expr +abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))] + +absdef2js :: (CId,(Type,Int,[Equation])) -> JS.Property +absdef2js (f,(typ,_,_)) = + let (args,cat) = M.catSkeleton typ in + JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)]) + +concrete2js :: String -> String -> (CId,Concr) -> JS.Property +concrete2js start n (c, cnc) = + JS.Prop l (new "GFConcrete" ([flags,(JS.EObj $ ((map (cncdef2js n (showCId c)) ds) ++ litslins))] ++ + maybe [] (parser2js start) (parser cnc))) + where + flags = mapToJSObj JS.EStr $ cflags cnc + l = JS.IdentPropName (JS.Ident (showCId c)) + ds = concatMap Map.assocs [lins cnc, opers cnc, lindefs cnc] + litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), + JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), + JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])] + + +cncdef2js :: String -> String -> (CId,Term) -> JS.Property +cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)]) + +term2js :: String -> String -> Term -> JS.Expr +term2js n l t = f t + where + f t = + case t of + R xs -> new "Arr" (map f xs) + P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y] + S xs -> mkSeq (map f xs) + K t -> tokn2js t + V i -> JS.EIndex (JS.EVar children) (JS.EInt i) + C i -> new "Int" [JS.EInt i] + F f -> JS.ECall (JS.EMember (JS.EIndex (JS.EMember (JS.EVar $ JS.Ident n) (JS.Ident "concretes")) (JS.EStr l)) (JS.Ident "rule")) [JS.EStr (showCId f), JS.EVar children] + FV xs -> new "Variants" (map f xs) + W str x -> new "Suffix" [JS.EStr str, f x] + TM _ -> new "Meta" [] + +tokn2js :: Tokn -> JS.Expr +tokn2js (KS s) = mkStr s +tokn2js (KP ss vs) = mkSeq (map mkStr ss) -- FIXME + +mkStr :: String -> JS.Expr +mkStr s = new "Str" [JS.EStr s] + +mkSeq :: [JS.Expr] -> JS.Expr +mkSeq [x] = x +mkSeq xs = new "Seq" xs + +argIdent :: Integer -> JS.Ident +argIdent n = JS.Ident ("x" ++ show n) + +children :: JS.Ident +children = JS.Ident "cs" + +-- Parser +parser2js :: String -> ParserInfo -> [JS.Expr] +parser2js start p = [new "Parser" [JS.EStr start, + JS.EArray $ [frule2js p cat prod | (cat,set) <- IntMap.toList (productions p), prod <- Set.toList set], + JS.EObj $ map cats (Map.assocs (startCats p))]] + where + cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EArray (map JS.EInt is)) + +frule2js :: ParserInfo -> FCat -> Production -> JS.Expr +frule2js p res (FApply funid args) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js p lins] + where + FFun f ps lins = functions p Array.! funid +frule2js p res (FCoerce arg) = new "Rule" [JS.EInt res, daughter 0, JS.EArray [JS.EInt arg], JS.EArray [JS.EArray [sym2js (FSymCat 0 i)] | i <- [0..catLinArity arg-1]]] + where + catLinArity :: FCat -> Int + catLinArity c = maximum (1:[Array.rangeSize (Array.bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + + topdownRules cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions p)) + + g (FApply funid args) rules = (functions p Array.! funid,args) : rules + g (FCoerce cat) rules = f cat rules + + +name2js :: (CId,[Profile]) -> JS.Expr +name2js (f,ps) = new "FunApp" $ [JS.EStr $ showCId f, JS.EArray (map fromProfile ps)] + where + fromProfile :: Profile -> JS.Expr + fromProfile [] = new "MetaVar" [] + fromProfile [x] = daughter x + fromProfile args = new "Unify" [JS.EArray (map daughter args)] + +daughter i = new "Arg" [JS.EInt i] + +lins2js :: ParserInfo -> UArray FIndex SeqId -> JS.Expr +lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Array.! seqid)] | seqid <- Array.elems ls] + +sym2js :: FSymbol -> JS.Expr +sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymLit n l) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymKS [t]) = new "Terminal" [JS.EStr t] + +new :: String -> [JS.Expr] -> JS.Expr +new f xs = JS.ENew (JS.Ident f) xs + +mapToJSObj :: (a -> JS.Expr) -> Map CId a -> JS.Expr +mapToJSObj f m = JS.EObj [ JS.Prop (JS.IdentPropName (JS.Ident (showCId k))) (f v) | (k,v) <- Map.toList m ] diff --git a/src/compiler/GF/Compile/GFCCtoProlog.hs b/src/compiler/GF/Compile/GFCCtoProlog.hs new file mode 100644 index 000000000..702d4afe5 --- /dev/null +++ b/src/compiler/GF/Compile/GFCCtoProlog.hs @@ -0,0 +1,279 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCtoProlog +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- to write a GF grammar into a Prolog module +----------------------------------------------------------------------------- + +module GF.Compile.GFCCtoProlog (grammar2prolog, grammar2prolog_abs) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations +import GF.Text.UTF8 + +import qualified Data.Map as Map +import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord) +import Data.List (isPrefixOf,mapAccumL) + +grammar2prolog, grammar2prolog_abs :: PGF -> String +-- Most prologs have problems with UTF8 encodings, so we skip that: +grammar2prolog = {- encodeUTF8 . -} foldr (++++) [] . pgf2clauses +grammar2prolog_abs = {- encodeUTF8 . -} foldr (++++) [] . pgf2clauses_abs + + +pgf2clauses :: PGF -> [String] +pgf2clauses (PGF absname cncnames gflags abstract concretes) = + [":- " ++ plFact "module" [plp absname, "[]"]] ++ + clauseHeader "%% concrete(?Module)" + [plFact "concrete" [plp cncname] | cncname <- cncnames] ++ + clauseHeader "%% flag(?Flag, ?Value): global flags" + (map (plpFact2 "flag") (Map.assocs gflags)) ++ + plAbstract (absname, abstract) ++ + concatMap plConcrete (Map.assocs concretes) + +pgf2clauses_abs :: PGF -> [String] +pgf2clauses_abs (PGF absname _cncnames gflags abstract _concretes) = + [":- " ++ plFact "module" [plp absname, "[]"]] ++ + clauseHeader "%% flag(?Flag, ?Value): global flags" + (map (plpFact2 "flag") (Map.assocs gflags)) ++ + plAbstract (absname, abstract) + +clauseHeader :: String -> [String] -> [String] +clauseHeader hdr [] = [] +clauseHeader hdr clauses = "":hdr:clauses + + +---------------------------------------------------------------------- +-- abstract syntax + +plAbstract :: (CId, Abstr) -> [String] +plAbstract (name, Abstr aflags funs cats _catfuns) = + ["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%", + "%% abstract module: " ++ plp name] ++ + clauseHeader "%% absflag(?Flag, ?Value): flags for abstract syntax" + (map (plpFact2 "absflag") (Map.assocs aflags)) ++ + clauseHeader "%% cat(?Type, ?[X:Type,...])" + (map plCat (Map.assocs cats)) ++ + clauseHeader "%% fun(?Fun, ?Type, ?[X:Type,...])" + (map plFun (Map.assocs funs)) ++ + clauseHeader "%% def(?Fun, ?Expr)" + (concatMap plFundef (Map.assocs funs)) + +plCat :: (CId, [Hypo]) -> String +plCat (cat, hypos) = plFact "cat" (plTypeWithHypos typ) + where ((_,subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos + args = reverse [EFun x | (_,x) <- subst] + typ = DTyp hypos' cat args + +plFun :: (CId, (Type, Int, [Equation])) -> String +plFun (fun, (typ,_,_)) = plFact "fun" (plp fun : plTypeWithHypos typ') + where typ' = snd $ alphaConvert emptyEnv typ + +plTypeWithHypos :: Type -> [String] +plTypeWithHypos (DTyp hypos cat args) = [plTerm (plp cat) (map plp args), plList (map (\(_,x,ty) -> plOper ":" (plp x) (plp ty)) hypos)] + +plFundef :: (CId, (Type,Int,[Equation])) -> [String] +plFundef (fun, (_,_,[])) = [] +plFundef (fun, (_,_,eqs)) = [plFact "def" [plp fun, plp fundef']] + where fundef' = snd $ alphaConvert emptyEnv eqs + + +---------------------------------------------------------------------- +-- concrete syntax + +plConcrete :: (CId, Concr) -> [String] +plConcrete (cncname, Concr cflags lins opers lincats lindefs + _printnames _paramlincats _parser) = + ["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%", + "%% concrete module: " ++ plp cncname] ++ + clauseHeader "%% cncflag(?Flag, ?Value): flags for concrete syntax" + (map (mod . plpFact2 "cncflag") (Map.assocs cflags)) ++ + clauseHeader "%% lincat(?Cat, ?Linearization type)" + (map (mod . plpFact2 "lincat") (Map.assocs lincats)) ++ + clauseHeader "%% lindef(?Cat, ?Linearization default)" + (map (mod . plpFact2 "lindef") (Map.assocs lindefs)) ++ + clauseHeader "%% lin(?Fun, ?Linearization)" + (map (mod . plpFact2 "lin") (Map.assocs lins)) ++ + clauseHeader "%% oper(?Oper, ?Linearization)" + (map (mod . plpFact2 "oper") (Map.assocs opers)) + where mod clause = plp cncname ++ ": " ++ clause + + +---------------------------------------------------------------------- +-- prolog-printing pgf datatypes + +instance PLPrint Type where + plp (DTyp hypos cat args) | null hypos = result + | otherwise = plOper " -> " (plList (map (\(_,x,ty) -> plOper ":" (plp x) (plp ty)) hypos)) result + where result = plTerm (plp cat) (map plp args) + +instance PLPrint Expr where + plp (EFun x) = plp x + plp (EAbs _ x e)= plOper "^" (plp x) (plp e) + plp (EApp e e') = plOper " * " (plp e) (plp e') + plp (ELit lit) = plp lit + plp (EMeta n) = "Meta_" ++ show n + +instance PLPrint Patt where + plp (PVar x) = plp x + plp (PApp f ps) = plOper " * " (plp f) (plp ps) + plp (PLit lit) = plp lit + +instance PLPrint Equation where + plp (Equ patterns result) = plOper ":" (plp patterns) (plp result) + +instance PLPrint Term where + plp (S terms) = plTerm "s" [plp terms] + plp (C n) = plTerm "c" [show n] + plp (K tokn) = plTerm "k" [plp tokn] + plp (FV trms) = plTerm "fv" [plp trms] + plp (P t1 t2) = plTerm "p" [plp t1, plp t2] + plp (W s trm) = plTerm "w" [plp s, plp trm] + plp (R terms) = plTerm "r" [plp terms] + plp (F oper) = plTerm "f" [plp oper] + plp (V n) = plTerm "v" [show n] + plp (TM str) = plTerm "tm" [plp str] + +{-- more prolog-like syntax for PGF terms, but also more difficult to handle: +instance PLPrint Term where + plp (S terms) = plp terms + plp (C n) = show n + plp (K token) = plp token + plp (FV terms) = prCurlyList (map plp terms) + plp (P t1 t2) = plOper "/" (plp t1) (plp t2) + plp (W s trm) = plOper "+" (plp s) (plp trm) + plp (R terms) = plTerm "r" (map plp terms) + plp (F oper) = plTerm "f" [plp oper] + plp (V n) = plTerm "arg" [show n] + plp (TM str) = plTerm "meta" [plp str] +--} + +instance PLPrint CId where + plp cid | isLogicalVariable str || + cid == wildCId = plVar str + | otherwise = plAtom str + where str = showCId cid + +instance PLPrint Literal where + plp (LStr s) = plp s + plp (LInt n) = plp (show n) + plp (LFlt f) = plp (show f) + +instance PLPrint Tokn where + plp (KS tokn) = plp tokn + plp (KP strs alts) = plTerm "kp" [plp strs, plList [plOper "/" (plp ss1) (plp ss2) | + Alt ss1 ss2 <- alts]] + +---------------------------------------------------------------------- +-- basic prolog-printing + +class PLPrint a where + plp :: a -> String + plps :: [a] -> String + plps = plList . map plp + +instance PLPrint Char where + plp c = plAtom [c] + plps s = plAtom s + +instance PLPrint a => PLPrint [a] where + plp = plps + +plpFact2 :: (PLPrint a, PLPrint b) => String -> (a, b) -> String +plpFact2 fun (arg1, arg2) = plFact fun [plp arg1, plp arg2] + +plFact :: String -> [String] -> String +plFact fun args = plTerm fun args ++ "." + +plTerm :: String -> [String] -> String +plTerm fun args = plAtom fun ++ prParenth (prTList ", " args) + +plList :: [String] -> String +plList = prBracket . prTList "," + +plOper :: String -> String -> String -> String +plOper op a b = prParenth (a ++ op ++ b) + +plVar :: String -> String +plVar = varPrefix . concatMap changeNonAlphaNum + where varPrefix var@(c:_) | isAsciiUpper c || c=='_' = var + | otherwise = "_" ++ var + changeNonAlphaNum c | isAlphaNumUnderscore c = [c] + | otherwise = "_" ++ show (ord c) ++ "_" + +plAtom :: String -> String +plAtom "" = "''" +plAtom atom@(c:cs) | isAsciiLower c && all isAlphaNumUnderscore cs + || c == '\'' && cs /= "" && last cs == '\'' = atom + | otherwise = "'" ++ concatMap changeQuote atom ++ "'" + where changeQuote '\'' = "\\'" + changeQuote c = [c] + +isAlphaNumUnderscore :: Char -> Bool +isAlphaNumUnderscore c = isAlphaNum c || c == '_' + + +---------------------------------------------------------------------- +-- prolog variables + +createLogicalVariable :: Int -> CId +createLogicalVariable n = mkCId (logicalVariablePrefix ++ show n) + +isLogicalVariable :: String -> Bool +isLogicalVariable = isPrefixOf logicalVariablePrefix + +logicalVariablePrefix :: String +logicalVariablePrefix = "X" + +---------------------------------------------------------------------- +-- alpha convert variables to (unique) logical variables +-- * this is needed if we want to translate variables to Prolog variables +-- * used for abstract syntax, not concrete +-- * not (yet?) used for variables bound in pattern equations + +type ConvertEnv = (Int, [(CId,CId)]) + +emptyEnv :: ConvertEnv +emptyEnv = (0, []) + +class AlphaConvert a where + alphaConvert :: ConvertEnv -> a -> (ConvertEnv, a) + +instance AlphaConvert a => AlphaConvert [a] where + alphaConvert env [] = (env, []) + alphaConvert env (a:as) = (env'', a':as') + where (env', a') = alphaConvert env a + (env'', as') = alphaConvert env' as + +instance AlphaConvert Type where + alphaConvert env@(_,subst) (DTyp hypos cat args) + = ((ctr,subst), DTyp hypos' cat args') + where (env', hypos') = mapAccumL alphaConvertHypo env hypos + ((ctr,_), args') = alphaConvert env' args + +alphaConvertHypo env (b,x,typ) = ((ctr+1,(x,x'):subst), (b,x',typ')) + where ((ctr,subst), typ') = alphaConvert env typ + x' = createLogicalVariable ctr + +instance AlphaConvert Expr where + alphaConvert (ctr,subst) (EAbs b x e) = ((ctr',subst), EAbs b x' e') + where ((ctr',_), e') = alphaConvert (ctr+1,(x,x'):subst) e + x' = createLogicalVariable ctr + alphaConvert env (EApp e1 e2) = (env'', EApp e1' e2') + where (env', e1') = alphaConvert env e1 + (env'', e2') = alphaConvert env' e2 + alphaConvert env expr@(EFun i) = (env, maybe expr EFun (lookup i (snd env))) + alphaConvert env expr = (env, expr) + +-- pattern variables are not alpha converted +-- (but they probably should be...) +instance AlphaConvert Equation where + alphaConvert env@(_,subst) (Equ patterns result) + = ((ctr,subst), Equ patterns result') + where ((ctr,_), result') = alphaConvert env result diff --git a/src/compiler/GF/Compile/GenerateFCFG.hs b/src/compiler/GF/Compile/GenerateFCFG.hs new file mode 100644 index 000000000..52e95f686 --- /dev/null +++ b/src/compiler/GF/Compile/GenerateFCFG.hs @@ -0,0 +1,568 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Compile.GenerateFCFG + (convertConcrete) where + +import PGF.CId +import PGF.Data +import PGF.Macros --hiding (prt) +import PGF.Parsing.FCFG.Utilities + +import GF.Data.BacktrackM +import GF.Data.SortedList +import GF.Data.Utilities (updateNthM, sortNub) + +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set +import qualified Data.List as List +import qualified Data.ByteString.Char8 as BS +import Data.Array.IArray +import Data.Maybe +import Control.Monad + +---------------------------------------------------------------------- +-- main conversion function + +convertConcrete :: Abstr -> Concr -> ParserInfo +convertConcrete abs cnc = fixHoasFuns $ convert abs_defs' conc' cats' + where abs_defs = Map.assocs (funs abs) + conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cats = lincats cnc + (abs_defs',conc',cats') = expandHOAS abs_defs conc cats + +expandHOAS :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ([(CId,(Type,Int,[Equation]))],TermMap,TermMap) +expandHOAS funs lins lincats = (funs' ++ hoFuns ++ varFuns, + Map.unions [lins, hoLins, varLins], + Map.unions [lincats, hoLincats, varLincat]) + where + -- replace higher-order fun argument types with new categories + funs' = [(f,(fixType ty,a,e)) | (f,(ty,a,e)) <- funs] + where + fixType :: Type -> Type + fixType ty = let (ats,rt) = typeSkeleton ty in cftype (map catName ats) rt + + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- funs, (n,c) <- fst (typeSkeleton ty), n > 0] + hoCats = sortNub (map snd hoTypes) + -- for each Cat with N bindings, we add a new category _NCat + -- each new category contains a single function __NCat : Cat -> _Var -> ... -> _Var -> _NCat + hoFuns = [(funName ty,(cftype (c : replicate n varCat) (catName ty),0,[])) | ty@(n,c) <- hoTypes] + -- lincats for the new categories + hoLincats = Map.fromList [(catName ty, modifyRec (++ replicate n (S [])) (lincatOf c)) | ty@(n,c) <- hoTypes] + -- linearizations of the new functions, lin __NCat v_0 ... v_n-1 x = { s1 = x.s1; ...; sk = x.sk; $0 = v_0.s ... + hoLins = Map.fromList [ (funName ty, mkLin c n) | ty@(n,c) <- hoTypes] + where mkLin c n = modifyRec (\fs -> [P (V 0) (C j) | j <- [0..length fs-1]] ++ [P (V i) (C 0) | i <- [1..n]]) (lincatOf c) + -- for each Cat, we a add a fun _Var_Cat : _Var -> Cat + varFuns = [(varFunName cat, (cftype [varCat] cat,0,[])) | cat <- hoCats] + -- linearizations of the _Var_Cat functions + varLins = Map.fromList [(varFunName cat, R [P (V 0) (C 0)]) | cat <- hoCats] + -- lincat for the _Var category + varLincat = Map.singleton varCat (R [S []]) + + lincatOf c = fromMaybe (error $ "No lincat for " ++ showCId c) $ Map.lookup c lincats + + modifyRec :: ([Term] -> [Term]) -> Term -> Term + modifyRec f (R xs) = R (f xs) + modifyRec _ t = error $ "Not a record: " ++ show t + + varCat = mkCId "_Var" + + catName :: (Int,CId) -> CId + catName (0,c) = c + catName (n,c) = mkCId ("_" ++ show n ++ showCId c) + + funName :: (Int,CId) -> CId + funName (n,c) = mkCId ("__" ++ show n ++ showCId c) + + varFunName :: CId -> CId + varFunName c = mkCId ("_Var_" ++ showCId c) + +-- replaces __NCat with _B and _Var_Cat with _. +-- the temporary names are just there to avoid name collisions. +fixHoasFuns :: ParserInfo -> ParserInfo +fixHoasFuns pinfo = pinfo{functions=mkArray [FFun (fixName n) prof lins | FFun n prof lins <- elems (functions pinfo)]} + where fixName (CId n) | BS.pack "__" `BS.isPrefixOf` n = (mkCId "_B") + | BS.pack "_Var_" `BS.isPrefixOf` n = wildCId + fixName n = n + +convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo +convert abs_defs cnc_defs cat_defs = getParserInfo (loop grammarEnv) + where + srules = [ + (XRule id args res (map findLinType args) (findLinType res) term) | + (id, (ty,_,_)) <- abs_defs, let (args,res) = catSkeleton ty, + term <- maybeToList (Map.lookup id cnc_defs)] + + findLinType id = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + + (xrulesMap,grammarEnv) = List.foldl' helper (Map.empty,emptyFFunsEnv) srules + where + helper (xrulesMap,grammarEnv) rule@(XRule id abs_args abs_res cnc_args cnc_res term) = + let xrulesMap' = Map.insertWith (++) abs_res [rule] xrulesMap + grammarEnv' = List.foldl' (\env selector -> convertRule cnc_defs selector rule env) + grammarEnv + (mkSingletonSelectors cnc_defs cnc_res) + in xrulesMap' `seq` grammarEnv' `seq` (xrulesMap',grammarEnv') + + loop grammarEnv = + let (todo, grammarEnv') = takeToDoRules xrulesMap grammarEnv + in case todo of + [] -> grammarEnv' + _ -> loop $! List.foldl' (\env (srules,selector) -> + List.foldl' (\env srule -> convertRule cnc_defs selector srule env) env srules) grammarEnv' todo + +convertRule :: TermMap -> TermSelector -> XRule -> GrammarEnv -> GrammarEnv +convertRule cnc_defs selector (XRule fun args cat ctypes ctype term) grammarEnv = + foldBM addRule + grammarEnv + (convertTerm cnc_defs selector term [([],[])]) + (protoFCat cat, map (\scat -> (protoFCat scat,[])) args, ctype, ctypes) + where + addRule linRec (newCat', newArgs', _, _) env0 = + let (env1, newCat) = genFCatHead env0 newCat' + (env2, newArgs,idxArgs) = foldr (\((xcat@(PFCat cat rcs tcs),xpaths),ctype,idx) (env,args,all_args) -> + let xargs = xcat:[PFCat cat [path] tcs | path <- reverse xpaths] + (env1, xargs1) = List.mapAccumL (genFCatArg cnc_defs ctype) env xargs + in case xcat of + PFCat _ [] _ -> (env , args, all_args) + _ -> (env1,xargs1++args,(idx,zip xargs1 xargs):all_args)) + (env1,[],[]) (zip3 newArgs' ctypes [0..]) + + (env3,newLinRec) = List.mapAccumL (translateLin idxArgs linRec) env2 (case newCat' of {PFCat _ rcs _ -> rcs}) + + (_,newProfile) = List.mapAccumL accumProf 0 newArgs' + where + accumProf nr (PFCat _ [] _,_ ) = (nr, [] ) + accumProf nr (_ ,xpaths) = (nr+cnt+1, [nr..nr+cnt]) + where cnt = length xpaths + + (env4,funid) = addFFun env3 (FFun fun newProfile (mkArray newLinRec)) + + in addProduction env4 newCat (FApply funid newArgs) + +translateLin idxArgs [] grammarEnv lbl' = error "translateLin" +translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl' + | lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms) + | otherwise = translateLin idxArgs lins grammarEnv lbl' + where + instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) + (\t -> case t of + KS s -> FSymKS [s] + KP strs vars -> FSymKP strs vars) + instCat lbl nr xnr nr' ((idx,xargs):idxArgs) + | nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr + in FSymCat (nr'+xnr) (index lbl rcs 0) + | otherwise = instCat lbl nr xnr (nr'+length xargs) idxArgs + + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BacktrackM Env a + +type FPath = [FIndex] +type Env = (ProtoFCat, [(ProtoFCat,[FPath])], Term, [Term]) +type LinRec = [(FPath, [Either (FPath, FIndex, Int) Tokn])] + +type TermMap = Map.Map CId Term + +convertTerm :: TermMap -> TermSelector -> Term -> LinRec -> CnvMonad LinRec +convertTerm cnc_defs selector (V nr) ((lbl_path,lin) : lins) = convertArg selector nr [] lbl_path lin lins +convertTerm cnc_defs selector (C nr) ((lbl_path,lin) : lins) = convertCon selector nr lbl_path lin lins +convertTerm cnc_defs selector (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 record lbl_path lin lins + +convertTerm cnc_defs selector (P term sel) lins = do nr <- evalTerm cnc_defs [] sel + convertTerm cnc_defs (TuplePrj nr selector) term lins +convertTerm cnc_defs selector (FV vars) lins = do term <- member vars + convertTerm cnc_defs selector term lins +convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path + foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) +convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = + do projectHead lbl_path + return ((lbl_path,Right (KS str) : lin) : lins) +convertTerm cnc_defs selector (K (KP strs vars))((lbl_path,lin) : lins) = + do projectHead lbl_path + toks <- member (strs:[strs' | Alt strs' _ <- vars]) + return ((lbl_path, map (Right . KS) toks ++ lin) : lins) +convertTerm cnc_defs selector (F id) lins = case Map.lookup id cnc_defs of + Just term -> convertTerm cnc_defs selector term lins + Nothing -> mzero +convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do + ss <- case t of + R ss -> return ss + F f -> case Map.lookup f cnc_defs of + Just (R ss) -> return ss + _ -> mzero + convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins +convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")") + + +convertArg (TupleSel record) nr path lbl_path lin lins = + foldM (\lins (lbl, selector) -> convertArg selector nr (lbl:path) (lbl:lbl_path) lin lins) lins record +convertArg (TuplePrj lbl selector) nr path lbl_path lin lins = + convertArg selector nr (lbl:path) lbl_path lin lins +convertArg (ConSel indices) nr path lbl_path lin lins = do + index <- member indices + restrictHead lbl_path index + restrictArg nr path index + return lins +convertArg StrSel nr path lbl_path lin lins = do + projectHead lbl_path + xnr <- projectArg nr path + return ((lbl_path, Left (path, nr, xnr) : lin) : lins) + +convertCon (ConSel indices) index lbl_path lin lins = do + guard (index `elem` indices) + restrictHead lbl_path index + return lins +convertCon x _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x + +convertRec cnc_defs selector index [] lbl_path lin lins = return lins +convertRec cnc_defs selector@(TupleSel fields) index (val:record) lbl_path lin lins = select fields + where + select [] = convertRec cnc_defs selector (index+1) record lbl_path lin lins + select ((index',sub_sel) : fields) + | index == index' = do lins <- convertTerm cnc_defs sub_sel val ((index:lbl_path,lin) : lins) + convertRec cnc_defs selector (index+1) record lbl_path lin lins + | otherwise = select fields +convertRec cnc_defs (TuplePrj index' sub_sel) index record lbl_path lin lins = do + convertTerm cnc_defs sub_sel (record !! (index'-index)) ((lbl_path,lin) : lins) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = do term <- readArgCType nr + unifyPType nr (reverse path) (selectTerm path term) +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of + Just term -> evalTerm cnc_defs path term + Nothing -> mzero +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + +unifyPType :: FIndex -> FPath -> Term -> CnvMonad FIndex +unifyPType nr path (C max_index) = + do (_, args, _, _) <- get + let (PFCat _ _ tcs,_) = args !! nr + case lookup path tcs of + Just index -> return index + Nothing -> do index <- member [0..max_index] + restrictArg nr path index + return index +unifyPType nr path t = error $ "unifyPType " ++ show t ---- AR 2/10/2007 + +selectTerm :: FPath -> Term -> Term +selectTerm [] term = term +selectTerm (index:path) (R record) = selectTerm path (record !! index) + + +---------------------------------------------------------------------- +-- GrammarEnv + + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int FCatSet FSeqSet FFunSet (IntMap.IntMap (Set.Set Production)) +type FCatSet = Map.Map CId (Map.Map [FPath] (Map.Map [(FPath,FIndex)] (Either FCat FCat))) +type FSeqSet = Map.Map FSeq SeqId +type FFunSet = Map.Map FFun FunId + +data ProtoFCat = PFCat CId [FPath] [(FPath,FIndex)] + +protoFCat :: CId -> ProtoFCat +protoFCat cat = PFCat cat [] [] + +emptyFFunsEnv = GrammarEnv 0 initFCatSet Map.empty Map.empty IntMap.empty + where + initFCatSet = (ins fcatString (mkCId "String") [[0]] [] $ + ins fcatInt (mkCId "Int") [[0]] [] $ + ins fcatFloat (mkCId "Float") [[0]] [] $ + ins fcatVar (mkCId "_Var") [[0]] [] $ + Map.empty) + + ins fcat cat rcs tcs catSet = + Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet + where + right_fcat = Right fcat + tmap_s = Map.singleton tcs right_fcat + rmap_s = Map.singleton rcs tmap_s + +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) + +addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet prodSet) (_,lst) = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet prodSet,last_seq) + where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) prodSet,last_funid) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions0= prodSet + , productions = prodSet + , startCats = Map.map getFCatList catSet + , totalCats = last_id+1 + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + getFCatList rcs = Map.fold (\tcs lst -> Map.fold (\x lst -> either id id x : lst) lst tcs) [] rcs + + +genFCatHead :: GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) +genFCatHead env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet >>= Map.lookup rcs >>= Map.lookup tcs of + Just (Left fcat) -> (GrammarEnv last_id (ins fcat) seqSet funSet prodSet, fcat) + Just (Right fcat) -> (env, fcat) + Nothing -> let fcat = last_id+1 + in (GrammarEnv fcat (ins fcat) seqSet funSet prodSet, fcat) + where + ins fcat = Map.insertWith (\_ -> Map.insertWith (\_ -> Map.insert tcs right_fcat) rcs tmap_s) cat rmap_s catSet + where + right_fcat = Right fcat + tmap_s = Map.singleton tcs right_fcat + rmap_s = Map.singleton rcs tmap_s + +genFCatArg :: TermMap -> Term -> GrammarEnv -> ProtoFCat -> (GrammarEnv, FCat) +genFCatArg cnc_defs ctype env@(GrammarEnv last_id catSet seqSet funSet prodSet) (PFCat cat rcs tcs) = + case Map.lookup cat catSet >>= Map.lookup rcs of + Just tmap -> case Map.lookup tcs tmap of + Just (Left fcat) -> (env, fcat) + Just (Right fcat) -> (env, fcat) + Nothing -> ins tmap + Nothing -> ins Map.empty + where + ins tmap = + let fcat = last_id+1 + (either_fcat,last_id1,tmap1,prodSet1) + = foldBM (\tcs st (either_fcat,last_id,tmap,prodSet) -> + let (last_id1,tmap1,fcat_arg) = addArg tcs last_id tmap + p = FCoerce fcat_arg + prodSet1 = IntMap.insertWith Set.union fcat (Set.singleton p) prodSet + in if st + then (Right fcat, last_id1,tmap1,prodSet1) + else (either_fcat,last_id, tmap ,prodSet )) + (Left fcat,fcat,Map.insert tcs either_fcat tmap,prodSet) + (gen_tcs ctype [] []) + False + rmap1 = Map.singleton rcs tmap1 + in (GrammarEnv last_id1 (Map.insertWith (\_ -> Map.insert rcs tmap1) cat rmap1 catSet) seqSet funSet prodSet1, fcat) + where + addArg tcs last_id tmap = + case Map.lookup tcs tmap of + Just (Left fcat) -> (last_id, tmap, fcat) + Just (Right fcat) -> (last_id, tmap, fcat) + Nothing -> let fcat = last_id+1 + in (fcat, Map.insert tcs (Left fcat) tmap, fcat) + + gen_tcs :: Term -> FPath -> [(FPath,FIndex)] -> BacktrackM Bool [(FPath,FIndex)] + gen_tcs (R record) path acc = foldM (\acc (label,ctype) -> gen_tcs ctype (label:path) acc) acc (zip [0..] record) + gen_tcs (S _) path acc = return acc + gen_tcs (C max_index) path acc = + case List.lookup path tcs of + Just index -> return $! addConstraint path index acc + Nothing -> do put True + index <- member [0..max_index] + return $! addConstraint path index acc + where + addConstraint path0 index0 (c@(path,index) : cs) + | path0 > path = c:addConstraint path0 index0 cs + addConstraint path0 index0 cs = (path0,index0) : cs + gen_tcs (F id) path acc = case Map.lookup id cnc_defs of + Just term -> gen_tcs term path acc + Nothing -> error ("unknown identifier: "++showCId id) + + + +------------------------------------------------------------ +-- TODO queue organization + +type XRulesMap = Map.Map CId [XRule] +data XRule = XRule CId {- function -} + [CId] {- argument types -} + CId {- result type -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} + +takeToDoRules :: XRulesMap -> GrammarEnv -> ([([XRule], TermSelector)], GrammarEnv) +takeToDoRules xrulesMap (GrammarEnv last_id catSet seqSet funSet prodSet) = + (todo,GrammarEnv last_id catSet' seqSet funSet prodSet) + where + (todo,catSet') = + Map.mapAccumWithKey (\todo cat rmap -> + let (todo1,rmap1) = Map.mapAccumWithKey (\todo rcs tmap -> + let (tcss,tmap') = Map.mapAccumWithKey (\tcss tcs either_xcat -> + case either_xcat of + Left xcat -> (tcs:tcss,Right xcat) + Right xcat -> ( tcss,either_xcat)) [] tmap + in case tcss of + [] -> ( todo,tmap ) + _ -> ((srules,mkSelector rcs tcss) : todo,tmap')) todo rmap + mb_srules = Map.lookup cat xrulesMap + Just srules = mb_srules + + in case mb_srules of + Just srules -> (todo1,rmap1) + Nothing -> (todo ,rmap1)) [] catSet + + +------------------------------------------------------------ +-- The TermSelector + +data TermSelector + = TupleSel [(FIndex, TermSelector)] + | TuplePrj FIndex TermSelector + | ConSel [FIndex] + | StrSel + deriving Show + +mkSingletonSelectors :: TermMap + -> Term -- ^ Type representation term + -> [TermSelector] -- ^ list of selectors containing just one string field +mkSingletonSelectors cnc_defs term = sels0 + where + (sels0,tcss0) = loop [] ([],[]) term + + loop path st (R record) = List.foldl' (\st (index,term) -> loop (index:path) st term) st (zip [0..] record) + loop path (sels,tcss) (C i) = ( sels,map ((,) path) [0..i] : tcss) + loop path (sels,tcss) (S _) = (mkSelector [path] tcss0 : sels, tcss) + loop path (sels,tcss) (F id) = case Map.lookup id cnc_defs of + Just term -> loop path (sels,tcss) term + Nothing -> error ("unknown identifier: "++showCId id) + +mkSelector :: [FPath] -> [[(FPath,FIndex)]] -> TermSelector +mkSelector rcs tcss = + List.foldl' addRestriction (case xs of + (path:xs) -> List.foldl' addProjection (path2selector StrSel path) xs) ys + where + xs = [ reverse path | path <- rcs] + ys = [(reverse path,term) | tcs <- tcss, (path,term) <- tcs] + + addRestriction :: TermSelector -> (FPath,FIndex) -> TermSelector + addRestriction (ConSel indices) ([] ,n_index) = ConSel (add indices) + where + add [] = [n_index] + add (index':indices) + | n_index == index' = index': indices + | otherwise = index':add indices + addRestriction (TupleSel fields) (index : path,n_index) = TupleSel (add fields) + where + add [] = [(index,path2selector (ConSel [n_index]) path)] + add (field@(index',sub_sel):fields) + | index == index' = (index',addRestriction sub_sel (path,n_index)):fields + | otherwise = field : add fields + + addProjection :: TermSelector -> FPath -> TermSelector + addProjection StrSel [] = StrSel + addProjection (TupleSel fields) (index : path) = TupleSel (add fields) + where + add [] = [(index,path2selector StrSel path)] + add (field@(index',sub_sel):fields) + | index == index' = (index',addProjection sub_sel path):fields + | otherwise = field : add fields + + path2selector base [] = base + path2selector base (index : path) = TupleSel [(index,path2selector base path)] + +------------------------------------------------------------ +-- updating the MCF rule + +readArgCType :: FIndex -> CnvMonad Term +readArgCType nr = do (_, _, _, ctypes) <- get + return (ctypes !! nr) + +restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () +restrictArg nr path index = do + (head, args, ctype, ctypes) <- get + args' <- updateNthM (\(xcat,xs) -> do xcat <- restrictProtoFCat path index xcat + return (xcat,xs) ) nr args + put (head, args', ctype, ctypes) + +projectArg :: FIndex -> FPath -> CnvMonad Int +projectArg nr path = do + (head, args, ctype, ctypes) <- get + (xnr,args') <- updateArgs nr args + put (head, args', ctype, ctypes) + return xnr + where + updateArgs :: FIndex -> [(ProtoFCat,[FPath])] -> CnvMonad (Int,[(ProtoFCat,[FPath])]) + updateArgs 0 ((a@(PFCat _ rcs _),xpaths) : as) + | path `elem` rcs = return (length xpaths+1,(a,path:xpaths):as) + | otherwise = do a <- projectProtoFCat path a + return (0,(a,xpaths):as) + updateArgs n (a : as) = do + (xnr,as) <- updateArgs (n-1) as + return (xnr,a:as) + +readHeadCType :: CnvMonad Term +readHeadCType = do (_, _, ctype, _) <- get + return ctype + +restrictHead :: FPath -> FIndex -> CnvMonad () +restrictHead path term + = do (head, args, ctype, ctypes) <- get + head' <- restrictProtoFCat path term head + put (head', args, ctype, ctypes) + +projectHead :: FPath -> CnvMonad () +projectHead path + = do (head, args, ctype, ctypes) <- get + head' <- projectProtoFCat path head + put (head', args, ctype, ctypes) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat +restrictProtoFCat path0 index0 (PFCat cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat cat rcs tcs) + where + addConstraint (c@(path,index) : cs) + | path0 > path = liftM (c:) (addConstraint cs) + | path0 == path = guard (index0 == index) >> + return (c : cs) + addConstraint cs = return ((path0,index0) : cs) + +projectProtoFCat :: FPath -> ProtoFCat -> CnvMonad ProtoFCat +projectProtoFCat path0 (PFCat cat rcs tcs) = do + return (PFCat cat (addConstraint rcs) tcs) + where + addConstraint (path : rcs) + | path0 > path = path : addConstraint rcs + | path0 == path = path : rcs + addConstraint rcs = path0 : rcs + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs new file mode 100644 index 000000000..458cf3f5c --- /dev/null +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -0,0 +1,510 @@ +{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses #-} +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Convert PGF grammar to PMCFG grammar. +-- +----------------------------------------------------------------------------- + +module GF.Compile.GeneratePMCFG + (convertConcrete) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Infra.Option +import GF.Data.BacktrackM +import GF.Data.Utilities (updateNthM, updateNth, sortNub) + +import System.IO +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.List as List +import qualified Data.IntMap as IntMap +import qualified Data.ByteString.Char8 as BS +import Data.Array.IArray +import Data.Maybe +import Control.Monad +import Control.Exception + +---------------------------------------------------------------------- +-- main conversion function + + +convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo +convertConcrete opts abs lang cnc = do + let env0 = emptyGrammarEnv cnc_defs cat_defs + when (flag optProf opts) $ do + profileGrammar lang cnc_defs env0 pfrules + let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0 + env2 = List.foldl' (convertRule cnc_defs) env1 pfrules + return $ getParserInfo env2 + where + abs_defs = Map.assocs (funs abs) + cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cat_defs = Map.insert cidVar (S []) (lincats cnc) + lin_defs = lindefs cnc + + pfrules = [ + (PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | + (id, (ty,_,_)) <- abs_defs, let (args,res) = typeSkeleton ty, + term <- maybeToList (Map.lookup id cnc_defs)] + + findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + +profileGrammar lang cnc_defs (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do + hPutStrLn stderr "" + hPutStrLn stderr ("Language: " ++ show lang) + hPutStrLn stderr "" + hPutStrLn stderr "Categories Count" + hPutStrLn stderr "--------------------------------" + case IntMap.lookup 0 catSet of + Just cats -> mapM_ profileCat (Map.toList cats) + Nothing -> return () + hPutStrLn stderr "--------------------------------" + hPutStrLn stderr "" + hPutStrLn stderr "Rules Count" + hPutStrLn stderr "--------------------------------" + mapM_ profileRule pfrules + hPutStrLn stderr "--------------------------------" + where + profileCat (cid,(fcat1,fcat2,_)) = do + hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1)) + + profileRule (PFRule fun args res ctypes ctype term) = do + let pargs = zipWith (protoFCat cnc_defs) args ctypes + hPutStrLn stderr (lformat 23 fun ++ rformat 9 (product [length xs | PFCat _ _ _ tcs <- pargs, (_,xs) <- tcs])) + + lformat :: Show a => Int -> a -> String + lformat n x = s ++ replicate (n-length s) ' ' + where + s = show x + + rformat :: Show a => Int -> a -> String + rformat n x = replicate (n-length s) ' ' ++ s + where + s = show x + +brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) +brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of + (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1 + where + optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps]) + where + ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv + ff funid xs env + | product (map Set.size ys) == count = + case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of + (env,args) -> addProduction env cat (FApply funid args) + | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs + where + count = length xs + ys = foldr (zipWith Set.insert) (repeat Set.empty) xs + +convertRule :: TermMap -> GrammarEnv -> ProtoFRule -> GrammarEnv +convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) = + let pres = protoFCat cnc_defs res ctype + pargs = zipWith (protoFCat cnc_defs) args ctypes + + b = runBranchM (convertTerm cnc_defs [] ctype term) (pargs,[]) + (grammarEnv1,b1) = addSequences' grammarEnv b + grammarEnv2 = brk (\grammarEnv -> foldBM addRule + grammarEnv + (go' b1 [] []) + (pres,pargs) ) grammarEnv1 + in grammarEnv2 + where + addRule lins (newCat', newArgs') env0 = + let [newCat] = getFCats env0 newCat' + (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' + + (env2,funid) = addFFun env1 (FFun fun [[n] | n <- [0..length newArgs-1]] (mkArray lins)) + + in addProduction env2 newCat (FApply funid newArgs) + +---------------------------------------------------------------------- +-- Branch monad + +newtype BranchM a = BM (forall b . (a -> ([ProtoFCat],[FSymbol]) -> Branch b) -> ([ProtoFCat],[FSymbol]) -> Branch b) + +instance Monad BranchM where + return a = BM (\c s -> c a s) + BM m >>= k = BM (\c s -> m (\a s -> unBM (k a) c s) s) + where unBM (BM m) = m + +instance MonadState ([ProtoFCat],[FSymbol]) BranchM where + get = BM (\c s -> c s s) + put s = BM (\c _ -> c () s) + +instance Functor BranchM where + fmap f (BM m) = BM (\c s -> m (c . f) s) + +runBranchM :: BranchM (Value a) -> ([ProtoFCat],[FSymbol]) -> Branch a +runBranchM (BM m) s = m (\v s -> Return v) s + +variants :: [a] -> BranchM a +variants xs = BM (\c s -> Variant (go xs c s)) + where + go [] c s = [] + go (x:xs) c s = c x s : go xs c s + +choices :: Int -> FPath -> BranchM FIndex +choices nr path = BM (\c s -> let (args,_) = s + PFCat _ _ _ tcs = args !! nr + in case fromMaybe (error "evalTerm: wrong path") (lookup path tcs) of + [index] -> c index s + indices -> Case nr path (go indices c s)) + where + go [] c s = [] + go (i:is) c s = (c i (updateEnv i s)) : go is c s + + updateEnv index (args,seq) = (updateNth (restrictArg path index) nr args,seq) + + restrictArg path index (PFCat n cat rcs tcs) = PFCat n cat rcs (addConstraint path index tcs) + + addConstraint path0 index0 [] = error "restrictProtoFCat: unknown path" + addConstraint path0 index0 (c@(path,indices) : tcs) + | path0 == path = ((path,[index0]) : tcs) + | otherwise = c : addConstraint path0 index0 tcs + +mkRecord :: [BranchM (Value a)] -> BranchM (Value a) +mkRecord xs = BM (\c -> go xs (c . Rec)) + where + go [] c s = c [] s + go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s + +-- cutBranch :: BranchM (Value a) -> BranchM (Branch a) +-- cutBranch (BM m) = BM (\c e -> c (m (\v e -> Return v) e) e) + + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BranchM a + +type FPath = [FIndex] +data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])] +type Env = (ProtoFCat, [ProtoFCat]) +data ProtoFRule = PFRule CId {- function -} + [(Int,CId)] {- argument types: context size and category -} + (Int,CId) {- result type : context size (always 0) and category -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} +type TermMap = Map.Map CId Term + + +protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat +protoFCat cnc_defs (n,cat) ctype = + let (rcs,tcs) = loop [] [] [] ctype' + in PFCat n cat rcs tcs + where + ctype' -- extend the high-order linearization type + | n > 0 = case ctype of + R xs -> R (xs ++ replicate n (S [])) + _ -> error $ "Not a record: " ++ show ctype + | otherwise = ctype + + loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) + loop path rcs tcs (S _) = (path:rcs, tcs) + loop path rcs tcs (F id) = case Map.lookup id cnc_defs of + Just term -> loop path rcs tcs term + Nothing -> error ("unknown identifier: "++show id) + +data Branch a + = Case Int FPath [Branch a] + | Variant [Branch a] + | Return (Value a) + +data Value a + = Rec [Branch a] + | Str a + | Con FIndex + + +go' :: Branch SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] +go' (Case nr path_ bs) path ss = do (index,b) <- member (zip [0..] bs) + restrictArg nr path_ index + go' b path ss +go' (Variant bs) path ss = do b <- member bs + go' b path ss +go' (Return v) path ss = go v path ss + +go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] +go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs) +go (Str seqid) path ss = return (seqid : ss) +go (Con i) path ss = restrictHead path i >> return ss + +addSequences' :: GrammarEnv -> Branch [FSymbol] -> (GrammarEnv, Branch SeqId) +addSequences' env (Case nr path bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs + in (env1,Case nr path bs1) +addSequences' env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs + in (env1,Variant bs1) +addSequences' env (Return v) = let (env1,v1) = addSequences env v + in (env1,Return v1) + +addSequences :: GrammarEnv -> Value [FSymbol] -> (GrammarEnv, Value SeqId) +addSequences env (Rec vs) = let (env1,vs1) = List.mapAccumL addSequences' env vs + in (env1,Rec vs1) +addSequences env (Str lin) = let (env1,seqid) = addFSeq env (optimizeLin lin) + in (env1,Str seqid) +addSequences env (Con i) = (env,Con i) + + +optimizeLin [] = [] +optimizeLin lin@(FSymKS _ : _) = + let (ts,lin') = getRest lin + in FSymKS ts : optimizeLin lin' + where + getRest (FSymKS ts : lin) = let (ts1,lin') = getRest lin + in (ts++ts1,lin') + getRest lin = ([],lin) +optimizeLin (sym : lin) = sym : optimizeLin lin + + +convertTerm :: TermMap -> FPath -> Term -> Term -> CnvMonad (Value [FSymbol]) +convertTerm cnc_defs sel ctype (V nr) = convertArg ctype nr (reverse sel) +convertTerm cnc_defs sel ctype (C nr) = convertCon ctype nr (reverse sel) +convertTerm cnc_defs sel ctype (R record) = convertRec cnc_defs sel ctype record +convertTerm cnc_defs sel ctype (P term p) = do nr <- evalTerm cnc_defs [] p + convertTerm cnc_defs (nr:sel) ctype term +convertTerm cnc_defs sel ctype (FV vars) = do term <- variants vars + convertTerm cnc_defs sel ctype term +convertTerm cnc_defs sel ctype (S ts) = do vs <- mapM (convertTerm cnc_defs sel ctype) ts + return (Str (concat [s | Str s <- vs])) +convertTerm cnc_defs sel ctype (K (KS t)) = return (Str [FSymKS [t]]) +convertTerm cnc_defs sel ctype (K (KP s v))=return (Str [FSymKP s v]) +convertTerm cnc_defs sel ctype (F id) = case Map.lookup id cnc_defs of + Just term -> convertTerm cnc_defs sel ctype term + Nothing -> error ("unknown id " ++ showCId id) +convertTerm cnc_defs sel ctype (W s t) = do + ss <- case t of + R ss -> return ss + F f -> case Map.lookup f cnc_defs of + Just (R ss) -> return ss + _ -> error ("unknown id " ++ showCId f) + convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] +convertTerm cnc_defs sel ctype x = error ("convertTerm ("++show x++")") + +convertArg :: Term -> Int -> FPath -> CnvMonad (Value [FSymbol]) +convertArg (R ctypes) nr path = do + mkRecord (zipWith (\lbl ctype -> convertArg ctype nr (lbl:path)) [0..] ctypes) +convertArg (C max) nr path = do + index <- choices nr path + return (Con index) +convertArg (S _) nr path = do + (args,_) <- get + let PFCat _ cat rcs tcs = args !! nr + l = index path rcs 0 + sym | isLiteralCat cat = FSymLit nr l + | otherwise = FSymCat nr l + return (Str [sym]) + where + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + +convertCon (C max) index [] = return (Con index) +convertCon x _ _ = fail $ "SimpleToFCFG.convertCon: " ++ show x + +convertRec cnc_defs [] (R ctypes) record = do + mkRecord (zipWith (convertTerm cnc_defs []) ctypes record) +convertRec cnc_defs (index:sub_sel) ctype record = + convertTerm cnc_defs sub_sel ctype (record !! index) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = choices nr (reverse path) +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = variants terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of + Just term -> evalTerm cnc_defs path term + Nothing -> error ("unknown id " ++ showCId id) +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + + +---------------------------------------------------------------------- +-- GrammarEnv + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) +type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int])) +type SeqSet = Map.Map FSeq SeqId +type FunSet = Map.Map FFun FunId +type CoerceSet= Map.Map [FCat] FCat + +emptyGrammarEnv cnc_defs lincats = + let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats + in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty + where + computeCatRange index cat ctype + | cat == cidString = (index, (fcatString,fcatString,[])) + | cat == cidInt = (index, (fcatInt, fcatInt, [])) + | cat == cidFloat = (index, (fcatFloat, fcatFloat, [])) + | cat == cidVar = (index, (fcatVar, fcatVar, [])) + | otherwise = (index+size,(index,index+size-1,poly)) + where + (size,poly) = getMultipliers 1 [] ctype + + getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (S _) = (m,ms) + getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) + getMultipliers m ms (F id) = case Map.lookup id cnc_defs of + Just term -> getMultipliers m ms term + Nothing -> error ("unknown identifier: "++showCId id) + +expandHOAS abs_defs cnc_defs lincats lindefs env = + foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats + where + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs + , (n,c) <- fst (typeSkeleton ty), n > 0] + + hoCats :: [CId] + hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs + , h <- case ty of {DTyp hyps val _ -> hyps} + , let ty = typeOfHypo h + , c <- fst (catSkeleton ty)] + + -- add a range of PMCFG categories for each GF high-order category + add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = + case IntMap.lookup 0 catSet >>= Map.lookup cat of + Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet + !last_id' = last_id+(end-start)+1 + in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) + Nothing -> env + + -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat + add_hoFun env (n,cat) = + let linRec = reverse $ + [[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ + [[FSymLit i 0] | i <- [1..n]] + (env1,lins) = List.mapAccumL addFSeq env linRec + newLinRec = mkArray lins + + (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec) + + env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar))) + env2 + (zip (getFCats env2 arg) (getFCats env2 res)) + in env3 + where + (arg,res) = case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ showCId cat + Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype) + + -- add one PMCFG function for each high-order category: _V : Var -> Cat + add_varFun env cat = + convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) + where + lindef = + case Map.lookup cat lindefs of + Nothing -> error $ "No lindef for " ++ showCId cat + Just def -> def + + arg = + case Map.lookup cidVar lincats of + Nothing -> error $ "No lincat for " ++ showCId cat + Just ctype -> ctype + + res = + case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ showCId cat + Just ctype -> ctype + + _B = mkCId "_B" + _V = mkCId "_V" + +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) + +addFSeq :: GrammarEnv -> [FSymbol] -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) lst = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq) + where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid) + +addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat) +addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats = + case sub_fcats of + [fcat] -> (env,fcat) + _ -> case Map.lookup sub_fcats crcSet of + Just fcat -> (env,fcat) + Nothing -> let !fcat = last_id+1 + in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions0= productions0 + , productions = filterProductions productions0 + , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) + , totalCats = last_id+1 + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + productions0 = IntMap.union prodSet coercions + coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] + +getFCats :: GrammarEnv -> ProtoFCat -> [FCat] +getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) + where + variants _ [] fcat = return fcat + variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices + variants ms tcs ((m*index) + fcat) + + +------------------------------------------------------------ +-- updating the MCF rule + +restrictArg :: FIndex -> FPath -> FIndex -> BacktrackM Env () +restrictArg nr path index = do + (head, args) <- get + args' <- updateNthM (restrictProtoFCat path index) nr args + put (head, args') + +restrictHead :: FPath -> FIndex -> BacktrackM Env () +restrictHead path term + = do (head, args) <- get + head' <- restrictProtoFCat path term head + put (head', args) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> BacktrackM Env ProtoFCat +restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat n cat rcs tcs) + where + addConstraint [] = error "restrictProtoFCat: unknown path" + addConstraint (c@(path,indices) : tcs) + | path0 == path = guard (index0 `elem` indices) >> + return ((path,[index0]) : tcs) + | otherwise = liftM (c:) (addConstraint tcs) + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GeneratePMCFGOld.hs b/src/compiler/GF/Compile/GeneratePMCFGOld.hs new file mode 100644 index 000000000..244ed68fe --- /dev/null +++ b/src/compiler/GF/Compile/GeneratePMCFGOld.hs @@ -0,0 +1,374 @@ +{-# LANGUAGE BangPatterns, CPP #-} +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Converting SimpleGFC grammars to fast nonerasing MCFG grammar. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + +module GF.Compile.GeneratePMCFG + (convertConcrete) where + +import PGF.CId +import PGF.Data +import PGF.Macros --hiding (prt) + +import GF.Data.BacktrackM +import GF.Data.SortedList +import GF.Data.Utilities (updateNthM, sortNub) + +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.List as List +import qualified Data.IntMap as IntMap +import qualified Data.ByteString.Char8 as BS +import Data.Array.IArray +import Data.Maybe +import Control.Monad +import Debug.Trace + +---------------------------------------------------------------------- +-- main conversion function + +convertConcrete :: Abstr -> Concr -> ParserInfo +convertConcrete abs cnc = convert abs_defs conc cats + where abs_defs = Map.assocs (funs abs) + conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cats = lincats cnc + +convert :: [(CId,(Type,Expr))] -> TermMap -> TermMap -> ParserInfo +convert abs_defs cnc_defs cat_defs = + let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs) + in getParserInfo (List.foldl' (convertRule cnc_defs) env xrules) + where + xrules = [ + (XRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | + (id, (ty,_)) <- abs_defs, let (args,res) = typeSkeleton ty, + term <- maybeToList (Map.lookup id cnc_defs)] + + findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) + +brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) +brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of + (GrammarEnv last_id catSet seqSet funSet crcSet topdown1) -> IntMap.foldWithKey optimize (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) topdown1 + where + optimize cat ps env = IntMap.foldWithKey ff env (IntMap.fromListWith (++) [(funid,[args]) | FApply funid args <- Set.toList ps]) + where + ff :: FunId -> [[FCat]] -> GrammarEnv -> GrammarEnv + ff funid xs env + | product (map Set.size ys) == count = + case List.mapAccumL (\env c -> addFCoercion env (Set.toList c)) env ys of + (env,args) -> addProduction env cat (FApply funid args) + | otherwise = List.foldl (\env args -> addProduction env cat (FApply funid args)) env xs + where + count = length xs + ys = foldr (zipWith Set.insert) (repeat Set.empty) xs + +convertRule :: TermMap -> GrammarEnv -> XRule -> GrammarEnv +convertRule cnc_defs grammarEnv (XRule fun args res ctypes ctype term) = + brk (\grammarEnv -> foldBM addRule + grammarEnv + (convertTerm cnc_defs [] ctype term [([],[])]) + (protoFCat cnc_defs res ctype, zipWith (protoFCat cnc_defs) args ctypes)) grammarEnv + where + addRule linRec (newCat', newArgs') env0 = + let [newCat] = getFCats env0 newCat' + (env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs' + + (env2,lins) = List.mapAccumL addFSeq env1 linRec + newLinRec = mkArray lins + + (env3,funid) = addFFun env2 (FFun fun [[n] | n <- [0..length newArgs-1]] newLinRec) + + in addProduction env3 newCat (FApply funid newArgs) + +---------------------------------------------------------------------- +-- term conversion + +type CnvMonad a = BacktrackM Env a + +type FPath = [FIndex] +data ProtoFCat = PFCat Int CId [FPath] [(FPath,[FIndex])] +type Env = (ProtoFCat, [ProtoFCat]) +type LinRec = [(FPath, [FSymbol])] +data XRule = XRule CId {- function -} + [(Int,CId)] {- argument types: context size and category -} + (Int,CId) {- result type : context size (always 0) and category -} + [Term] {- argument lin-types representation -} + Term {- result lin-type representation -} + Term {- body -} + +protoFCat :: TermMap -> (Int,CId) -> Term -> ProtoFCat +protoFCat cnc_defs (n,cat) ctype = + let (rcs,tcs) = loop [] [] [] ctype' + in PFCat n cat rcs tcs + where + ctype' -- extend the high-order linearization type + | n > 0 = case ctype of + R xs -> R (xs ++ replicate n (S [])) + _ -> error $ "Not a record: " ++ show ctype + | otherwise = ctype + + loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) + loop path rcs tcs (S _) = (path:rcs, tcs) + loop path rcs tcs (F id) = case Map.lookup id cnc_defs of + Just term -> loop path rcs tcs term + Nothing -> error ("unknown identifier: "++show id) + +type TermMap = Map.Map CId Term + +convertTerm :: TermMap -> FPath -> Term -> Term -> LinRec -> CnvMonad LinRec +convertTerm cnc_defs sel ctype (V nr) ((lbl_path,lin) : lins) = convertArg ctype nr (reverse sel) lbl_path lin lins +convertTerm cnc_defs sel ctype (C nr) ((lbl_path,lin) : lins) = convertCon ctype nr (reverse sel) lbl_path lin lins +convertTerm cnc_defs sel ctype (R record) ((lbl_path,lin) : lins) = convertRec cnc_defs sel ctype record lbl_path lin lins +convertTerm cnc_defs sel ctype (P term p) lins = do nr <- evalTerm cnc_defs [] p + convertTerm cnc_defs (nr:sel) ctype term lins +convertTerm cnc_defs sel ctype (FV vars) lins = do term <- member vars + convertTerm cnc_defs sel ctype term lins +convertTerm cnc_defs sel ctype (S ts) lins = foldM (\lins t -> convertTerm cnc_defs sel ctype t lins) lins (reverse ts) +--convertTerm cnc_defs sel ctype (K t) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok t : lin) : lins) +convertTerm cnc_defs sel ctype (K (KS t)) ((lbl_path,lin) : lins) = return ((lbl_path,FSymTok (KS t) : lin) : lins) +convertTerm cnc_defs sel ctype (K (KP strs vars))((lbl_path,lin) : lins) = + do toks <- member (strs:[strs' | Alt strs' _ <- vars]) + return ((lbl_path, map (FSymTok . KS) toks ++ lin) : lins) +convertTerm cnc_defs sel ctype (F id) lins = case Map.lookup id cnc_defs of + Just term -> convertTerm cnc_defs sel ctype term lins + Nothing -> mzero +convertTerm cnc_defs sel ctype (W s t) ((lbl_path,lin) : lins) = do + ss <- case t of + R ss -> return ss + F f -> case Map.lookup f cnc_defs of + Just (R ss) -> return ss + _ -> mzero + convertRec cnc_defs sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins +convertTerm cnc_defs sel ctype x lins = error ("convertTerm ("++show x++")") + + +convertArg (R record) nr path lbl_path lin lins = + foldM (\lins (lbl, ctype) -> convertArg ctype nr (lbl:path) (lbl:lbl_path) lin lins) lins (zip [0..] record) +convertArg (C max) nr path lbl_path lin lins = do + index <- member [0..max] + restrictHead lbl_path index + restrictArg nr path index + return lins +convertArg (S _) nr path lbl_path lin lins = do + (_, args) <- get + let PFCat _ cat rcs tcs = args !! nr + l = index path rcs 0 + sym | isLiteralCat cat = FSymLit nr l + | otherwise = FSymCat nr l + return ((lbl_path, sym : lin) : lins) + where + index lbl' (lbl:lbls) idx + | lbl' == lbl = idx + | otherwise = index lbl' lbls $! (idx+1) + + +convertCon (C max) index [] lbl_path lin lins = do + guard (index <= max) + restrictHead lbl_path index + return lins +convertCon x _ _ _ _ _ = error $ "SimpleToFCFG,convertCon: " ++ show x + +convertRec cnc_defs [] (R ctypes) record lbl_path lin lins = + foldM (\lins (index,ctype,val) -> convertTerm cnc_defs [] ctype val ((index:lbl_path,lin) : lins)) + lins + (zip3 [0..] ctypes record) +convertRec cnc_defs (index:sub_sel) ctype record lbl_path lin lins = do + convertTerm cnc_defs sub_sel ctype (record !! index) ((lbl_path,lin) : lins) + + +------------------------------------------------------------ +-- eval a term to ground terms + +evalTerm :: TermMap -> FPath -> Term -> CnvMonad FIndex +evalTerm cnc_defs path (V nr) = do (_, args) <- get + let PFCat _ _ _ tcs = args !! nr + rpath = reverse path + index <- member (fromMaybe (error "evalTerm: wrong path") (lookup rpath tcs)) + restrictArg nr rpath index + return index +evalTerm cnc_defs path (C nr) = return nr +evalTerm cnc_defs path (R record) = case path of + (index:path) -> evalTerm cnc_defs path (record !! index) +evalTerm cnc_defs path (P term sel) = do index <- evalTerm cnc_defs [] sel + evalTerm cnc_defs (index:path) term +evalTerm cnc_defs path (FV terms) = member terms >>= evalTerm cnc_defs path +evalTerm cnc_defs path (F id) = case Map.lookup id cnc_defs of + Just term -> evalTerm cnc_defs path term + Nothing -> mzero +evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") + + +---------------------------------------------------------------------- +-- GrammarEnv + +data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) +type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int])) +type SeqSet = Map.Map FSeq SeqId +type FunSet = Map.Map FFun FunId +type CoerceSet= Map.Map [FCat] FCat + +emptyGrammarEnv cnc_defs lincats = + let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats + in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty + where + computeCatRange index cat ctype + | cat == cidString = (index, (fcatString,fcatString,[])) + | cat == cidInt = (index, (fcatInt, fcatInt, [])) + | cat == cidFloat = (index, (fcatFloat, fcatFloat, [])) + | otherwise = (index+size,(index,index+size-1,poly)) + where + (size,poly) = getMultipliers 1 [] ctype + + getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (S _) = (m,ms) + getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) + getMultipliers m ms (F id) = case Map.lookup id cnc_defs of + Just term -> getMultipliers m ms term + Nothing -> error ("unknown identifier: "++prCId id) + + +expandHOAS abs_defs cnc_defs lincats env = + foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats + where + hoTypes :: [(Int,CId)] + hoTypes = sortNub [(n,c) | (_,(ty,_)) <- abs_defs + , (n,c) <- fst (typeSkeleton ty), n > 0] + + hoCats :: [CId] + hoCats = sortNub [c | (_,(ty,_)) <- abs_defs + , Hyp _ ty <- case ty of {DTyp hyps val _ -> hyps} + , c <- fst (catSkeleton ty)] + + -- add a range of PMCFG categories for each GF high-order category + add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = + case IntMap.lookup 0 catSet >>= Map.lookup cat of + Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet + !last_id' = last_id+(end-start)+1 + in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) + Nothing -> env + + -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat + add_hoFun env (n,cat) = + let linRec = reverse $ + [(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ + [([],[FSymLit i 0]) | i <- [1..n]] + (env1,lins) = List.mapAccumL addFSeq env linRec + newLinRec = mkArray lins + + (env2,funid) = addFFun env1 (FFun _B [[i] | i <- [0..n]] newLinRec) + + env3 = foldl (\env (arg,res) -> addProduction env res (FApply funid (arg : replicate n fcatVar))) + env2 + (zip (getFCats env2 arg) (getFCats env2 res)) + in env3 + where + (arg,res) = case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ prCId cat + Just ctype -> (protoFCat cnc_defs (0,cat) ctype, protoFCat cnc_defs (n,cat) ctype) + + -- add one PMCFG function for each high-order category: _V : Var -> Cat + add_varFun env cat = + let (env1,seqid) = addFSeq env ([],[FSymLit 0 0]) + lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid + (env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins)) + env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar])) + env2 + (getFCats env2 res) + in env3 + where + res = case Map.lookup cat lincats of + Nothing -> error $ "No lincat for " ++ prCId cat + Just ctype -> protoFCat cnc_defs (0,cat) ctype + + _B = mkCId "_B" + _V = mkCId "_V" + + +addProduction :: GrammarEnv -> FCat -> Production -> GrammarEnv +addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p = + GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet) + +addFSeq :: GrammarEnv -> (FPath,[FSymbol]) -> (GrammarEnv,SeqId) +addFSeq env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (_,lst) = + case Map.lookup seq seqSet of + Just id -> (env,id) + Nothing -> let !last_seq = Map.size seqSet + in (GrammarEnv last_id catSet (Map.insert seq last_seq seqSet) funSet crcSet prodSet,last_seq) + where + seq = mkArray lst + +addFFun :: GrammarEnv -> FFun -> (GrammarEnv,FunId) +addFFun env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) fun = + case Map.lookup fun funSet of + Just id -> (env,id) + Nothing -> let !last_funid = Map.size funSet + in (GrammarEnv last_id catSet seqSet (Map.insert fun last_funid funSet) crcSet prodSet,last_funid) + +addFCoercion :: GrammarEnv -> [FCat] -> (GrammarEnv,FCat) +addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fcats = + case sub_fcats of + [fcat] -> (env,fcat) + _ -> case Map.lookup sub_fcats crcSet of + Just fcat -> (env,fcat) + Nothing -> let !fcat = last_id+1 + in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) + +getParserInfo :: GrammarEnv -> ParserInfo +getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = + ParserInfo { functions = mkArray funSet + , sequences = mkArray seqSet + , productions = IntMap.union prodSet coercions + , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) + , totalCats = last_id+1 + } + where + mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + + coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] + +getFCats :: GrammarEnv -> ProtoFCat -> [FCat] +getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = + case IntMap.lookup n catSet >>= Map.lookup cat of + Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) + where + variants _ [] fcat = return fcat + variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices + variants ms tcs ((m*index) + fcat) + +------------------------------------------------------------ +-- updating the MCF rule + +restrictArg :: FIndex -> FPath -> FIndex -> CnvMonad () +restrictArg nr path index = do + (head, args) <- get + args' <- updateNthM (restrictProtoFCat path index) nr args + put (head, args') + +restrictHead :: FPath -> FIndex -> CnvMonad () +restrictHead path term + = do (head, args) <- get + head' <- restrictProtoFCat path term head + put (head', args) + +restrictProtoFCat :: FPath -> FIndex -> ProtoFCat -> CnvMonad ProtoFCat +restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do + tcs <- addConstraint tcs + return (PFCat n cat rcs tcs) + where + addConstraint [] = error "restrictProtoFCat: unknown path" + addConstraint (c@(path,indices) : tcs) + | path0 == path = guard (index0 `elem` indices) >> + return ((path,[index0]) : tcs) + | otherwise = liftM (c:) (addConstraint tcs) + +mkArray lst = listArray (0,length lst-1) lst diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs new file mode 100644 index 000000000..c85f9588f --- /dev/null +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -0,0 +1,52 @@ +---------------------------------------------------------------------- +-- | +-- Module : GetGrammar +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/15 17:56:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.16 $ +-- +-- this module builds the internal GF grammar that is sent to the type checker +----------------------------------------------------------------------------- + +module GF.Compile.GetGrammar (getSourceModule, addOptionsToModule) where + +import GF.Data.Operations + +import GF.Infra.UseIO +import GF.Infra.Modules +import GF.Infra.Option +import GF.Grammar.Lexer +import GF.Grammar.Parser +import GF.Grammar.Grammar + +import GF.Compile.ReadFiles + +import Data.Char (toUpper) +import Data.List (nub) +import qualified Data.ByteString.Char8 as BS +import Control.Monad (foldM) +import System.Cmd (system) + +getSourceModule :: Options -> FilePath -> IOE SourceModule +getSourceModule opts file0 = ioe $ + catch (do file <- foldM runPreprocessor file0 (flag optPreprocessors opts) + content <- BS.readFile file + case runP pModDef content of + Left (Pn l c,msg) -> return (Bad (file++":"++show l++":"++show c++": "++msg)) + Right mo -> return (Ok (addOptionsToModule opts mo))) + (\e -> return (Bad (show e))) + +addOptionsToModule :: Options -> SourceModule -> SourceModule +addOptionsToModule opts = mapSourceModule (\m -> m { flags = flags m `addOptions` opts }) + +-- FIXME: should use System.IO.openTempFile +runPreprocessor :: FilePath -> String -> IO FilePath +runPreprocessor file0 p = do + let tmp = "_gf_preproc.tmp" + cmd = p +++ file0 ++ ">" ++ tmp + system cmd + return tmp diff --git a/src/compiler/GF/Compile/GrammarToGFCC.hs b/src/compiler/GF/Compile/GrammarToGFCC.hs new file mode 100644 index 000000000..fb92ef74c --- /dev/null +++ b/src/compiler/GF/Compile/GrammarToGFCC.hs @@ -0,0 +1,587 @@ +{-# LANGUAGE PatternGuards #-} +module GF.Compile.GrammarToGFCC (mkCanon2gfcc,addParsers) where + +import GF.Compile.Export +import qualified GF.Compile.GenerateFCFG as FCFG +import qualified GF.Compile.GeneratePMCFG as PMCFG + +import PGF.CId +import qualified PGF.Macros as CM +import qualified PGF.Data as C +import qualified PGF.Data as D +import GF.Grammar.Predef +import GF.Grammar.Printer +import GF.Grammar.Grammar +import qualified GF.Grammar.Lookup as Look +import qualified GF.Grammar as A +import qualified GF.Grammar.Macros as GM +import qualified GF.Compile.Concrete.Compute as Compute ---- +import qualified GF.Infra.Modules as M +import qualified GF.Infra.Option as O + +import GF.Infra.Ident +import GF.Infra.Option +import GF.Data.Operations + +import Data.List +import Data.Char (isDigit,isSpace) +import qualified Data.Map as Map +import qualified Data.ByteString.Char8 as BS +import Text.PrettyPrint +import Debug.Trace ---- + +-- when developing, swap commenting +--traceD s t = trace s t +traceD s t = t + + +-- the main function: generate PGF from GF. +mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF) +mkCanon2gfcc opts cnc gr = + (showIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon opts abs) gr) + where + abs = err (const c) id $ M.abstractOfConcrete gr c where c = identC (BS.pack cnc) + pars = mkParamLincat gr + +-- Adds parsers for all concretes +addParsers :: Options -> D.PGF -> IO D.PGF +addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] + return pgf { D.concretes = Map.fromList cncs } + where + conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts) + then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc + else return $ FCFG.convertConcrete (D.abstract pgf) cnc + return (lang,cnc { D.parser = Just pinfo }) + where + erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"}) + +-- Generate PGF from GFCM. +-- this assumes a grammar translated by canon2canon + +canon2gfcc :: Options -> (Ident -> Ident -> C.Term) -> SourceGrammar -> D.PGF +canon2gfcc opts pars cgr@(M.MGrammar ((a,abm):cms)) = + (if dump opts DumpCanon then trace (render (vcat (map (ppModule Qualified) (M.modules cgr)))) else id) $ + D.PGF an cns gflags abs cncs + where + -- abstract + an = (i2i a) + cns = map (i2i . fst) cms + abs = D.Abstr aflags funs cats catfuns + gflags = Map.empty + aflags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags abm)] + + mkDef (Just eqs) = [C.Equ ps' (mkExp scope' e) | (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps] + mkDef Nothing = [] + + mkArrity (Just a) = a + mkArrity Nothing = 0 + + -- concretes + lfuns = [(f', (mkType [] ty, mkArrity ma, mkDef pty)) | + (f,AbsFun (Just ty) ma pty) <- tree2list (M.jments abm), let f' = i2i f] + funs = Map.fromAscList lfuns + lcats = [(i2i c, snd (mkContext [] cont)) | + (c,AbsCat (Just cont) _) <- tree2list (M.jments abm)] + cats = Map.fromAscList lcats + catfuns = Map.fromList + [(cat,[f | (f, (C.DTyp _ c _,_,_)) <- lfuns, c==cat]) | (cat,_) <- lcats] + + cncs = Map.fromList [mkConcr lang (i2i lang) mo | (lang,mo) <- cms] + mkConcr lang0 lang mo = + (lang,D.Concr flags lins opers lincats lindefs printnames params fcfg) + where + js = tree2list (M.jments mo) + flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags mo)] + opers = Map.fromAscList [] -- opers will be created as optimization + utf = id -- trace (show lang0 +++ show flags) $ + -- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8 + -- then id else id + ---- then (trace "decode" D.convertStringsInTerm decodeUTF8) else id + umkTerm = utf . mkTerm + lins = Map.fromAscList + [(f', umkTerm tr) | (f,CncFun _ (Just tr) _) <- js, + let f' = i2i f, exists f'] -- eliminating lins without fun + -- needed even here because of restricted inheritance + lincats = Map.fromAscList + [(i2i c, mkCType ty) | (c,CncCat (Just ty) _ _) <- js] + lindefs = Map.fromAscList + [(i2i c, umkTerm tr) | (c,CncCat _ (Just tr) _) <- js] + printnames = Map.union + (Map.fromAscList [(i2i f, umkTerm tr) | (f,CncFun _ _ (Just tr)) <- js]) + (Map.fromAscList [(i2i f, umkTerm tr) | (f,CncCat _ _ (Just tr)) <- js]) + params = Map.fromAscList + [(i2i c, pars lang0 c) | (c,CncCat (Just ty) _ _) <- js] + fcfg = Nothing + + exists f = Map.member f funs + +i2i :: Ident -> CId +i2i = CId . ident2bs + +b2b :: A.BindType -> C.BindType +b2b A.Explicit = C.Explicit +b2b A.Implicit = C.Implicit + +mkType :: [Ident] -> A.Type -> C.Type +mkType scope t = + case GM.typeForm t of + (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps + in C.DTyp hyps' (i2i cat) (map (mkExp scope') args) + +mkExp :: [Ident] -> A.Term -> C.Expr +mkExp scope t = case GM.termForm t of + Ok (xs,c,args) -> mkAbs xs (mkApp (map snd (reverse xs)++scope) c (map (mkExp scope) args)) + where + mkAbs xs t = foldr (\(b,v) -> C.EAbs (b2b b) (i2i v)) t xs + mkApp scope c args = case c of + Q _ c -> foldl C.EApp (C.EFun (i2i c)) args + QC _ c -> foldl C.EApp (C.EFun (i2i c)) args + Vr x -> case lookup x (zip scope [0..]) of + Just i -> foldl C.EApp (C.EVar i) args + Nothing -> foldl C.EApp (C.EMeta 0) args + EInt i -> C.ELit (C.LInt i) + EFloat f -> C.ELit (C.LFlt f) + K s -> C.ELit (C.LStr s) + Meta i -> C.EMeta i + _ -> C.EMeta 0 + +mkPatt scope p = + case p of + A.PP _ c ps -> let (scope',ps') = mapAccumL mkPatt scope ps + in (scope',C.PApp (i2i c) ps') + A.PV x -> (x:scope,C.PVar (i2i x)) + A.PW -> ( scope,C.PWild) + A.PInt i -> ( scope,C.PLit (C.LInt i)) + A.PFloat f -> ( scope,C.PLit (C.LFlt f)) + A.PString s -> ( scope,C.PLit (C.LStr s)) + + +mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo]) +mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty + in if x == identW + then ( scope,(b2b bt,i2i x,ty')) + else (x:scope,(b2b bt,i2i x,ty'))) scope hyps + +mkTerm :: Term -> C.Term +mkTerm tr = case tr of + Vr (IA _ i) -> C.V i + Vr (IAV _ _ i) -> C.V i + Vr (IC s) | isDigit (BS.last s) -> + C.V ((read . BS.unpack . snd . BS.spanEnd isDigit) s) + ---- from gf parser of gfc + EInt i -> C.C $ fromInteger i + R rs -> C.R [mkTerm t | (_, (_,t)) <- rs] + P t l -> C.P (mkTerm t) (C.C (mkLab l)) + T _ cs -> C.R [mkTerm t | (_,t) <- cs] ------ + V _ cs -> C.R [mkTerm t | t <- cs] + S t p -> C.P (mkTerm t) (mkTerm p) + C s t -> C.S $ concatMap flats [mkTerm x | x <- [s,t]] + FV ts -> C.FV [mkTerm t | t <- ts] + K s -> C.K (C.KS s) +----- K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants + Empty -> C.S [] + App _ _ -> prtTrace tr $ C.C 66661 ---- for debugging + Abs _ _ t -> mkTerm t ---- only on toplevel + Alts (td,tvs) -> + C.K (C.KP (strings td) [C.Alt (strings u) (strings v) | (u,v) <- tvs]) + _ -> prtTrace tr $ C.S [C.K (C.KS (render (A.ppTerm Unqualified 0 tr <+> int 66662)))] ---- for debugging + where + mkLab (LIdent l) = case BS.unpack l of + '_':ds -> (read ds) :: Int + _ -> prtTrace tr $ 66663 + strings t = case t of + K s -> [s] + C u v -> strings u ++ strings v + Strs ss -> concatMap strings ss + _ -> prtTrace tr $ ["66660"] + flats t = case t of + C.S ts -> concatMap flats ts + _ -> [t] + +-- encoding PGF-internal lincats as terms +mkCType :: Type -> C.Term +mkCType t = case t of + EInt i -> C.C $ fromInteger i + RecType rs -> C.R [mkCType t | (_, t) <- rs] + Table pt vt -> case pt of + EInt i -> C.R $ replicate (1 + fromInteger i) $ mkCType vt + RecType rs -> mkCType $ foldr Table vt (map snd rs) + _ | Just i <- GM.isTypeInts pt -> C.R $ replicate (fromInteger i) $ mkCType vt + + Sort s | s == cStr -> C.S [] --- Str only + _ | Just i <- GM.isTypeInts t -> C.C $ fromInteger i + _ -> error $ "mkCType " ++ show t + +-- encoding showable lincats (as in source gf) as terms +mkParamLincat :: SourceGrammar -> Ident -> Ident -> C.Term +mkParamLincat sgr lang cat = errVal (C.R [C.S []]) $ do + typ <- Look.lookupLincat sgr lang cat + mkPType typ + where + mkPType typ = case typ of + RecType lts -> do + ts <- mapM (mkPType . snd) lts + return $ C.R [ C.P (kks $ showIdent (label2ident l)) t | ((l,_),t) <- zip lts ts] + Table (RecType lts) v -> do + ps <- mapM (mkPType . snd) lts + v' <- mkPType v + return $ foldr (\p v -> C.S [p,v]) v' ps + Table p v -> do + p' <- mkPType p + v' <- mkPType v + return $ C.S [p',v'] + Sort s | s == cStr -> return $ C.S [] + _ -> return $ + C.FV $ map (kks . filter showable . render . ppTerm Unqualified 0) $ + errVal [] $ Look.allParamValues sgr typ + showable c = not (isSpace c) ---- || (c == ' ') -- to eliminate \n in records + kks = C.K . C.KS + +-- return just one module per language + +reorder :: Ident -> SourceGrammar -> SourceGrammar +reorder abs cg = M.MGrammar $ + (abs, M.ModInfo M.MTAbstract M.MSComplete aflags [] Nothing [] [] adefs poss): + [(c, M.ModInfo (M.MTConcrete abs) M.MSComplete fs [] Nothing [] [] (sorted2tree js) poss) + | (c,(fs,js)) <- cncs] + where + poss = emptyBinTree -- positions no longer needed + mos = M.modules cg + adefs = sorted2tree $ sortIds $ + predefADefs ++ Look.allOrigInfos cg abs + predefADefs = + [(c, AbsCat (Just []) Nothing) | c <- [cFloat,cInt,cString]] + aflags = + concatOptions [M.flags mo | (_,mo) <- M.modules cg, M.isModAbs mo] + + cncs = sortIds [(lang, concr lang) | lang <- M.allConcretes cg abs] + concr la = (flags, + sortIds (predefCDefs ++ jments)) where + jments = Look.allOrigInfos cg la + flags = concatOptions + [M.flags mo | + (i,mo) <- mos, M.isModCnc mo, + Just r <- [lookup i (M.allExtendSpecs cg la)]] + + predefCDefs = + [(c, CncCat (Just GM.defLinType) Nothing Nothing) | c <- [cInt,cFloat,cString]] + + sortIds = sortBy (\ (f,_) (g,_) -> compare f g) + + +-- one grammar per language - needed for symtab generation +repartition :: Ident -> SourceGrammar -> [SourceGrammar] +repartition abs cg = + [M.partOfGrammar cg (lang,mo) | + let mos = M.modules cg, + lang <- case M.allConcretes cg abs of + [] -> [abs] -- to make pgf nonempty even when there are no concretes + cncs -> cncs, + let mo = errVal + (error (render (text "no module found for" <+> A.ppIdent lang))) $ M.lookupModule cg lang + ] + +-- translate tables and records to arrays, parameters and labels to indices + +canon2canon :: Options -> Ident -> SourceGrammar -> SourceGrammar +canon2canon opts abs cg0 = + (recollect . map cl2cl . repartition abs . purgeGrammar abs) cg0 + where + recollect = M.MGrammar . nubBy (\ (i,_) (j,_) -> i==j) . concatMap M.modules + cl2cl = M.MGrammar . js2js . map (c2c p2p) . M.modules + + js2js ms = map (c2c (j2j (M.MGrammar ms))) ms + + c2c f2 (c,mo) = (c, M.replaceJudgements mo $ mapTree f2 (M.jments mo)) + + j2j cg (f,j) = + let debug = if verbAtLeast opts Verbose then trace ("+ " ++ showIdent f) else id in + case j of + CncFun x (Just tr) z -> CncFun x (Just (debug (t2t (unfactor cg0 tr)))) z + CncCat (Just ty) (Just x) y -> CncCat (Just (ty2ty ty)) (Just (t2t (unfactor cg0 x))) y + _ -> j + where + cg1 = cg + t2t = term2term f cg1 pv + ty2ty = type2type cg1 pv + pv@(labels,untyps,typs) = trs $ paramValues cg1 + + unfactor :: SourceGrammar -> Term -> Term + unfactor gr t = case t of + T (TTyped ty) [(PV x,u)] -> V ty [restore x v (unfac u) | v <- vals ty] + _ -> GM.composSafeOp unfac t + where + unfac = unfactor gr + vals = err error id . Look.allParamValues gr + restore x u t = case t of + Vr y | y == x -> u + _ -> GM.composSafeOp (restore x u) t + + -- flatten record arguments of param constructors + p2p (f,j) = case j of + ResParam (Just ps) (Just vs) -> + ResParam (Just [(c,concatMap unRec cont) | (c,cont) <- ps]) (Just (map unrec vs)) + _ -> j + unRec (bt,x,ty) = case ty of + RecType fs -> [ity | (_,typ) <- fs, ity <- unRec (Explicit,identW,typ)] + _ -> [(bt,x,ty)] + unrec t = case t of + App f (R fs) -> GM.mkApp (unrec f) [unrec u | (_,(_,u)) <- fs] + _ -> GM.composSafeOp unrec t + + +---- + trs v = traceD (render (tr v)) v + + tr (labels,untyps,typs) = + (text "LABELS:" <+> + vcat [A.ppIdent c <> char '.' <> hsep (map A.ppLabel l) <+> char '=' <+> text (show i) | ((c,l),i) <- Map.toList labels]) $$ + (text "UNTYPS:" <+> + vcat [A.ppTerm Unqualified 0 t <+> char '=' <+> text (show i) | (t,i) <- Map.toList untyps]) $$ + (text "TYPS: " <+> + vcat [A.ppTerm Unqualified 0 t <+> char '=' <+> text (show (Map.assocs i)) | (t,i) <- Map.toList typs]) +---- + +purgeGrammar :: Ident -> SourceGrammar -> SourceGrammar +purgeGrammar abstr gr = + (M.MGrammar . list . filter complete . purge . M.modules) gr + where + list ms = traceD (render (text "MODULES" <+> hsep (punctuate comma (map (ppIdent . fst) ms)))) ms + purge = nubBy (\x y -> fst x == fst y) . filter (flip elem needed . fst) + needed = nub $ concatMap (requiredCanModules isSingle gr) acncs + acncs = abstr : M.allConcretes gr abstr + isSingle = True + complete (i,m) = M.isCompleteModule m --- not . isIncompleteCanon + +type ParamEnv = + (Map.Map (Ident,[Label]) (Type,Integer), -- numbered labels + Map.Map Term Integer, -- untyped terms to values + Map.Map Type (Map.Map Term Integer)) -- types to their terms to values + +--- gathers those param types that are actually used in lincats and lin terms +paramValues :: SourceGrammar -> ParamEnv +paramValues cgr = (labels,untyps,typs) where + partyps = nub $ + --- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt + [ty | + (_,(_,CncCat (Just ty0) _ _)) <- jments, + ty <- typsFrom ty0 + ] ++ [ + Q m ty | + (m,(ty,ResParam _ _)) <- jments + ] ++ [ty | + (_,(_,CncFun _ (Just tr) _)) <- jments, + ty <- err (const []) snd $ appSTM (typsFromTrm tr) [] + ] + params = [(ty, errVal (traceD ("UNKNOWN PARAM TYPE" +++ show ty) []) $ + Look.allParamValues cgr ty) | ty <- partyps] + typsFrom ty = (if isParam ty then (ty:) else id) $ case ty of + Table p t -> typsFrom p ++ typsFrom t + RecType ls -> concat [typsFrom t | (_, t) <- ls] + _ -> [] + + isParam ty = case ty of + Q _ _ -> True + QC _ _ -> True + RecType rs -> all isParam (map snd rs) + _ -> False + + typsFromTrm :: Term -> STM [Type] Term + typsFromTrm tr = case tr of + R fs -> mapM_ (typsFromField . snd) fs >> return tr + where + typsFromField (mty, t) = case mty of + Just x -> updateSTM (x:) >> typsFromTrm t + _ -> typsFromTrm t + V ty ts -> updateSTM (ty:) >> mapM_ typsFromTrm ts >> return tr + T (TTyped ty) cs -> + updateSTM (ty:) >> mapM_ typsFromTrm [t | (_, t) <- cs] >> return tr + T (TComp ty) cs -> + updateSTM (ty:) >> mapM_ typsFromTrm [t | (_, t) <- cs] >> return tr + _ -> GM.composOp typsFromTrm tr + + mods = traceD (render (hsep (map (ppIdent . fst) ms))) ms where ms = M.modules cgr + + jments = + [(m,j) | (m,mo) <- mods, j <- tree2list $ M.jments mo] + typs = + Map.fromList [(ci,Map.fromList (zip vs [0..])) | (ci,vs) <- params] + untyps = + Map.fromList $ concatMap Map.toList [typ | (_,typ) <- Map.toList typs] + lincats = + [(cat,[f | let RecType fs = GM.defLinType, f <- fs]) | cat <- [cInt,cFloat, cString]] ++ + reverse ---- TODO: really those lincats that are reached + ---- reverse is enough to expel overshadowed ones... + [(cat,ls) | (_,(cat,CncCat (Just ty) _ _)) <- jments, + RecType ls <- [unlockTy ty]] + labels = Map.fromList $ concat + [((cat,[lab]),(typ,i)): + [((cat,[LVar v]),(typ,toInteger (mx + v))) | v <- [0,1]] ++ ---- 1 or 2 vars + [((cat,[lab,lab2]),(ty,j)) | + rs <- getRec typ, ((lab2, ty),j) <- zip rs [0..]] + | + (cat,ls) <- lincats, ((lab, typ),i) <- zip ls [0..], let mx = length ls] + -- go to tables recursively + ---- TODO: even go to deeper records + where + getRec typ = case typ of + RecType rs -> [rs] ---- [unlockTyp rs] -- (sort (unlockTyp ls)) + Table _ t -> getRec t + _ -> [] + +type2type :: SourceGrammar -> ParamEnv -> Type -> Type +type2type cgr env@(labels,untyps,typs) ty = case ty of + RecType rs -> + RecType [(mkLab i, t2t t) | (i,(l, t)) <- zip [0..] (unlockTyp rs)] + Table pt vt -> Table (t2t pt) (t2t vt) + QC _ _ -> look ty + _ -> ty + where + t2t = type2type cgr env + look ty = EInt $ (+ (-1)) $ toInteger $ case Map.lookup ty typs of + Just vs -> length $ Map.assocs vs + _ -> trace ("unknown partype " ++ show ty) 66669 + +term2term :: Ident -> SourceGrammar -> ParamEnv -> Term -> Term +term2term fun cgr env@(labels,untyps,typs) tr = case tr of + App _ _ -> mkValCase (unrec tr) + QC _ _ -> mkValCase tr + R rs -> R [(mkLab i, (Nothing, t2t t)) | + (i,(l,(_,t))) <- zip [0..] (GM.sortRec (unlock rs))] + P t l -> r2r tr + + T (TWild _) _ -> error $ (render (text "wild" <+> ppTerm Qualified 0 tr)) + T (TComp ty) cs -> t2t $ V ty $ map snd cs ---- should be elim'ed in tc + T (TTyped ty) cs -> t2t $ V ty $ map snd cs ---- should be elim'ed in tc + V ty ts -> mkCurry $ V ty [t2t t | t <- ts] + S t p -> mkCurrySel (t2t t) (t2t p) + + _ -> GM.composSafeOp t2t tr + where + t2t = term2term fun cgr env + + unrec t = case t of + App f (R fs) -> GM.mkApp (unrec f) [unrec u | (_,(_,u)) <- fs] + _ -> GM.composSafeOp unrec t + + mkValCase tr = case appSTM (doVar tr) [] of + Ok (tr', st@(_:_)) -> t2t $ comp $ foldr mkCase tr' st + _ -> valNum $ comp tr + + --- this is mainly needed for parameter record projections + ---- was: + comp t = errVal t $ Compute.computeConcreteRec cgr t + + doVar :: Term -> STM [((Type,[Term]),(Term,Term))] Term + doVar tr = case getLab tr of + Ok (cat, lab) -> do + k <- readSTM >>= return . length + let tr' = Vr $ identC $ (BS.pack (show k)) ----- + + let tyvs = case Map.lookup (cat,lab) labels of + Just (ty,_) -> case Map.lookup ty typs of + Just vs -> (ty,[t | + (t,_) <- sortBy (\x y -> compare (snd x) (snd y)) + (Map.assocs vs)]) + _ -> error $ render (text "doVar1" <+> A.ppTerm Unqualified 0 ty) + _ -> error $ render (text "doVar2" <+> A.ppTerm Unqualified 0 tr <+> text (show (cat,lab))) ---- debug + updateSTM ((tyvs, (tr', tr)):) + return tr' + _ -> GM.composOp doVar tr + + r2r tr@(P (S (V ty ts) v) l) = t2t $ S (V ty [comp (P t l) | t <- ts]) v + + r2r tr@(P p _) = case getLab tr of + Ok (cat,labs) -> P (t2t p) . mkLab $ + maybe (prtTrace tr $ 66664) snd $ + Map.lookup (cat,labs) labels + _ -> K (render (A.ppTerm Unqualified 0 tr <+> prtTrace tr (int 66665))) + + -- this goes recursively into tables (ignored) and records (accumulated) + getLab tr = case tr of + Vr (IA cat _) -> return (identC cat,[]) + Vr (IAV cat _ _) -> return (identC cat,[]) + Vr (IC s) -> return (identC cat,[]) where + cat = BS.takeWhile (/='_') s ---- also to match IAVs; no _ in a cat tolerated + ---- init (reverse (dropWhile (/='_') (reverse s))) ---- from gf parser +---- Vr _ -> error $ "getLab " ++ show tr + P p lab2 -> do + (cat,labs) <- getLab p + return (cat,labs++[lab2]) + S p _ -> getLab p + _ -> Bad "getLab" + + + mkCase ((ty,vs),(x,p)) tr = + S (V ty [mkBranch x v tr | v <- vs]) p + mkBranch x t tr = case tr of + _ | tr == x -> t + _ -> GM.composSafeOp (mkBranch x t) tr + + valNum tr = maybe (valNumFV $ tryFV tr) EInt $ Map.lookup tr untyps + where + tryFV tr = case GM.appForm tr of + (c@(QC _ _), ts) -> [GM.mkApp c ts' | ts' <- combinations (map tryFV ts)] + (FV ts,_) -> ts + _ -> [tr] + valNumFV ts = case ts of + [tr] -> let msg = render (text "DEBUG" <+> ppIdent fun <> text ": error in valNum" <+> ppTerm Qualified 0 tr) in + trace msg $ error (showIdent fun) + _ -> FV $ map valNum ts + + mkCurry trm = case trm of + V (RecType [(_,ty)]) ts -> V ty ts + V (RecType ((_,ty):ltys)) ts -> + V ty [mkCurry (V (RecType ltys) cs) | + cs <- chop (product (map (lengthtyp . snd) ltys)) ts] + _ -> trm + lengthtyp ty = case Map.lookup ty typs of + Just m -> length (Map.assocs m) + _ -> error $ "length of type " ++ show ty + chop i xs = case splitAt i xs of + (xs1,[]) -> [xs1] + (xs1,xs2) -> xs1:chop i xs2 + + + mkCurrySel t p = S t p -- done properly in CheckGFCC + + +mkLab k = LIdent (BS.pack ("_" ++ show k)) + +-- remove lock fields; in fact, any empty records and record types +unlock = filter notlock where + notlock (l,(_, t)) = case t of --- need not look at l + R [] -> False + RecType [] -> False + _ -> True + +unlockTyp = filter notlock + +notlock (l, t) = case t of --- need not look at l + RecType [] -> False + _ -> True + +unlockTy ty = case ty of + RecType ls -> RecType $ GM.sortRec [(l, unlockTy t) | (l,t) <- ls, notlock (l,t)] + _ -> GM.composSafeOp unlockTy ty + + +prtTrace tr n = + trace (render (text "-- INTERNAL COMPILER ERROR" <+> A.ppTerm Unqualified 0 tr $$ text (show n))) n +prTrace tr n = trace (render (text "-- OBSERVE" <+> A.ppTerm Unqualified 0 tr <+> text (show n) <+> text (show tr))) n + + +-- | this function finds out what modules are really needed in the canonical gr. +-- its argument is typically a concrete module name +requiredCanModules :: (Ord i, Show i) => Bool -> M.MGrammar i a -> i -> [i] +requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where + exts = M.allExtends gr c + ops = if isSingle + then map fst (M.modules gr) + else iterFix (concatMap more) $ exts + more i = errVal [] $ do + m <- M.lookupModule gr i + return $ M.extends m ++ [o | o <- map M.openedModule (M.opens m)] + notReuse i = errVal True $ do + m <- M.lookupModule gr i + return $ M.isModRes m -- to exclude reused Cnc and Abs from required diff --git a/src/compiler/GF/Compile/ModDeps.hs b/src/compiler/GF/Compile/ModDeps.hs new file mode 100644 index 000000000..1e689aabc --- /dev/null +++ b/src/compiler/GF/Compile/ModDeps.hs @@ -0,0 +1,145 @@ +---------------------------------------------------------------------- +-- | +-- Module : ModDeps +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.14 $ +-- +-- Check correctness of module dependencies. Incomplete. +-- +-- AR 13\/5\/2003 +----------------------------------------------------------------------------- + +module GF.Compile.ModDeps (mkSourceGrammar, + moduleDeps, + openInterfaces, + requiredCanModules + ) where + +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Option +import GF.Grammar.Printer +import GF.Compile.Update +import GF.Grammar.Lookup +import GF.Infra.Modules + +import GF.Data.Operations + +import Control.Monad +import Data.List + +-- | to check uniqueness of module names and import names, the +-- appropriateness of import and extend types, +-- to build a dependency graph of modules, and to sort them topologically +mkSourceGrammar :: [SourceModule] -> Err SourceGrammar +mkSourceGrammar ms = do + let ns = map fst ms + checkUniqueErr ns + mapM (checkUniqueImportNames ns . snd) ms + deps <- moduleDeps ms + deplist <- either + return + (\ms -> Bad $ "circular modules" +++ unwords (map show ms)) $ + topoTest deps + return $ MGrammar [(m, maybe undefined id $ lookup m ms) | IdentM m _ <- deplist] + +checkUniqueErr :: (Show i, Eq i) => [i] -> Err () +checkUniqueErr ms = do + let msg = checkUnique ms + if null msg then return () else Bad $ unlines msg + +-- | check that import names don't clash with module names +checkUniqueImportNames :: [Ident] -> SourceModInfo -> Err () +checkUniqueImportNames ns mo = test [n | OQualif n v <- opens mo, n /= v] + where + test ms = testErr (all (`notElem` ns) ms) + ("import names clashing with module names among" +++ unwords (map prt ms)) + +type Dependencies = [(IdentM Ident,[IdentM Ident])] + +-- | to decide what modules immediately depend on what, and check if the +-- dependencies are appropriate +moduleDeps :: [SourceModule] -> Err Dependencies +moduleDeps ms = mapM deps ms where + deps (c,m) = errIn ("checking dependencies of module" +++ prt c) $ case mtype m of + MTConcrete a -> do + aty <- lookupModuleType gr a + testErr (aty == MTAbstract) "the of-module is not an abstract syntax" + chDep (IdentM c (MTConcrete a)) + (extends m) (MTConcrete a) (opens m) MTResource + t -> chDep (IdentM c t) (extends m) t (opens m) t + + chDep it es ety os oty = do + ests <- mapM (lookupModuleType gr) es + testErr (all (compatMType ety) ests) "inappropriate extension module type" +---- osts <- mapM (lookupModuleType gr . openedModule) os +---- testErr (all (compatOType oty) osts) "inappropriate open module type" + let ab = case it of + IdentM _ (MTConcrete a) -> [IdentM a MTAbstract] + _ -> [] ---- + return (it, ab ++ + [IdentM e ety | e <- es] ++ + [IdentM (openedModule o) oty | o <- os]) + + -- check for superficial compatibility, not submodule relation etc: what can be extended + compatMType mt0 mt = case (mt0,mt) of + (MTResource, MTConcrete _) -> True + (MTInstance _, MTConcrete _) -> True + (MTInterface, MTAbstract) -> True + (MTConcrete _, MTConcrete _) -> True + (MTInstance _, MTInstance _) -> True + (MTInstance _, MTResource) -> True + (MTResource, MTInstance _) -> True + ---- some more? + _ -> mt0 == mt + -- in the same way; this defines what can be opened + compatOType mt0 mt = case mt0 of + MTAbstract -> mt == MTAbstract + _ -> case mt of + MTResource -> True + MTInterface -> True + MTInstance _ -> True + _ -> False + + gr = MGrammar ms --- hack + +openInterfaces :: Dependencies -> Ident -> Err [Ident] +openInterfaces ds m = do + let deps = [(i,ds) | (IdentM i _,ds) <- ds] + let more (c,_) = [(i,mt) | Just is <- [lookup c deps], IdentM i mt <- is] + let mods = iterFix (concatMap more) (more (m,undefined)) + return $ [i | (i,MTInterface) <- mods] + +-- | this function finds out what modules are really needed in the canonical gr. +-- its argument is typically a concrete module name +requiredCanModules :: (Ord i, Show i) => Bool -> MGrammar i a -> i -> [i] +requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where + exts = allExtends gr c + ops = if isSingle + then map fst (modules gr) + else iterFix (concatMap more) $ exts + more i = errVal [] $ do + m <- lookupModule gr i + return $ extends m ++ [o | o <- map openedModule (opens m)] + notReuse i = errVal True $ do + m <- lookupModule gr i + return $ isModRes m -- to exclude reused Cnc and Abs from required + + +{- +-- to test +exampleDeps = [ + (ir "Nat",[ii "Gen", ir "Adj"]), + (ir "Adj",[ii "Num", ii "Gen", ir "Nou"]), + (ir "Nou",[ii "Cas"]) + ] + +ii s = IdentM (IC s) MTInterface +ir s = IdentM (IC s) MTResource +-} + diff --git a/src/compiler/GF/Compile/Optimize.hs b/src/compiler/GF/Compile/Optimize.hs new file mode 100644 index 000000000..2c556b36f --- /dev/null +++ b/src/compiler/GF/Compile/Optimize.hs @@ -0,0 +1,228 @@ +{-# LANGUAGE PatternGuards #-} +---------------------------------------------------------------------- +-- | +-- Module : Optimize +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/16 13:56:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.18 $ +-- +-- Top-level partial evaluation for GF source modules. +----------------------------------------------------------------------------- + +module GF.Compile.Optimize (optimizeModule) where + +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Grammar.Printer +import GF.Grammar.Macros +import GF.Grammar.Lookup +import GF.Grammar.Predef +import GF.Compile.Refresh +import GF.Compile.Concrete.Compute +import GF.Compile.CheckGrammar +import GF.Compile.Update + +import GF.Data.Operations +import GF.Infra.CheckM +import GF.Infra.Option + +import Control.Monad +import Data.List +import qualified Data.Set as Set +import Text.PrettyPrint +import Debug.Trace +import qualified Data.ByteString.Char8 as BS + + +-- | partial evaluation of concrete syntax. AR 6\/2001 -- 16\/5\/2003 -- 5\/2\/2005. + +optimizeModule :: Options -> [SourceModule] -> SourceModule -> Err SourceModule +optimizeModule opts ms m@(name,mi) + | mstatus mi == MSComplete = do + ids <- topoSortJments m + mi <- foldM updateEvalInfo mi ids + return (name,mi) + | otherwise = return m + where + oopts = opts `addOptions` flagsModule m + + updateEvalInfo mi (i,info) = do + info' <- evalInfo oopts ms (name,mi) i info + return (updateModule mi i info') + +evalInfo :: Options -> [SourceModule] -> SourceModule -> Ident -> Info -> Err Info +evalInfo opts ms m c info = do + + (if verbAtLeast opts Verbose then trace (" " ++ showIdent c) else id) return () + + errIn ("optimizing " ++ showIdent c) $ case info of + + CncCat ptyp pde ppr -> do + pde' <- case (ptyp,pde) of + (Just typ, Just de) -> do + de <- partEval opts gr ([(Explicit, varStr, typeStr)], typ) de + return (Just (factor param c 0 de)) + (Just typ, Nothing) -> do + de <- mkLinDefault gr typ + de <- partEval opts gr ([(Explicit, varStr, typeStr)], typ) de + return (Just (factor param c 0 de)) + _ -> return pde -- indirection + + ppr' <- liftM Just $ evalPrintname gr c ppr (Just $ K $ showIdent c) + + return (CncCat ptyp pde' ppr') + + CncFun (mt@(Just (_,cont,val))) pde ppr -> --trace (prt c) $ + eIn (text "linearization in type" <+> ppTerm Unqualified 0 (mkProd cont val []) $$ text "of function") $ do + pde' <- case pde of + Just de -> do de <- partEval opts gr (cont,val) de + return (Just (factor param c 0 de)) + Nothing -> return pde + ppr' <- liftM Just $ evalPrintname gr c ppr pde' + return $ CncFun mt pde' ppr' -- only cat in type actually needed + + ResOper pty pde + | OptExpand `Set.member` optim -> do + pde' <- case pde of + Just de -> do de <- computeConcrete gr de + return (Just (factor param c 0 de)) + Nothing -> return Nothing + return $ ResOper pty pde' + + _ -> return info + where + gr = MGrammar (m : ms) + optim = flag optOptimizations opts + param = OptParametrize `Set.member` optim + eIn cat = errIn (render (text "Error optimizing" <+> cat <+> ppIdent c <+> colon)) + +-- | the main function for compiling linearizations +partEval :: Options -> SourceGrammar -> (Context,Type) -> Term -> Err Term +partEval opts gr (context, val) trm = errIn (render (text "partial evaluation" <+> ppTerm Qualified 0 trm)) $ do + let vars = map (\(bt,x,t) -> x) context + args = map Vr vars + subst = [(v, Vr v) | v <- vars] + trm1 = mkApp trm args + trm2 <- computeTerm gr subst trm1 + trm3 <- if rightType trm2 + then computeTerm gr subst trm2 + else recordExpand val trm2 >>= computeTerm gr subst + return $ mkAbs [(Explicit,v) | v <- vars] trm3 + where + -- don't eta expand records of right length (correct by type checking) + rightType (R rs) = case val of + RecType ts -> length rs == length ts + _ -> False + rightType _ = False + + + + +-- here we must be careful not to reduce +-- variants {{s = "Auto" ; g = N} ; {s = "Wagen" ; g = M}} +-- {s = variants {"Auto" ; "Wagen"} ; g = variants {N ; M}} ; + +recordExpand :: Type -> Term -> Err Term +recordExpand typ trm = case typ of + RecType tys -> case trm of + FV rs -> return $ FV [R [assign lab (P r lab) | (lab,_) <- tys] | r <- rs] + _ -> return $ R [assign lab (P trm lab) | (lab,_) <- tys] + _ -> return trm + + +-- | auxiliaries for compiling the resource + +mkLinDefault :: SourceGrammar -> Type -> Err Term +mkLinDefault gr typ = liftM (Abs Explicit varStr) $ mkDefField typ + where + mkDefField typ = case typ of + Table p t -> do + t' <- mkDefField t + let T _ cs = mkWildCases t' + return $ T (TWild p) cs + Sort s | s == cStr -> return $ Vr varStr + QC q p -> do vs <- lookupParamValues gr q p + case vs of + v:_ -> return v + _ -> Bad (render (text "no parameter values given to type" <+> ppIdent p)) + RecType r -> do + let (ls,ts) = unzip r + ts <- mapM mkDefField ts + return $ R (zipWith assign ls ts) + _ | Just _ <- isTypeInts typ -> return $ EInt 0 -- exists in all as first val + _ -> Bad (render (text "linearization type field cannot be" <+> ppTerm Unqualified 0 typ)) + +-- | Form the printname: if given, compute. If not, use the computed +-- lin for functions, cat name for cats (dispatch made in evalCncDef above). +--- We cannot use linearization at this stage, since we do not know the +--- defaults we would need for question marks - and we're not yet in canon. +evalPrintname :: SourceGrammar -> Ident -> Maybe Term -> Maybe Term -> Err Term +evalPrintname gr c ppr lin = + case ppr of + Just pr -> comp pr + Nothing -> case lin of + Just t -> return $ K $ clean $ render (ppTerm Unqualified 0 (oneBranch t)) + Nothing -> return $ K $ showIdent c ---- + where + comp = computeConcrete gr + + oneBranch t = case t of + Abs _ _ b -> oneBranch b + R (r:_) -> oneBranch $ snd $ snd r + T _ (c:_) -> oneBranch $ snd c + V _ (c:_) -> oneBranch c + FV (t:_) -> oneBranch t + C x y -> C (oneBranch x) (oneBranch y) + S x _ -> oneBranch x + P x _ -> oneBranch x + Alts (d,_) -> oneBranch d + _ -> t + + --- very unclean cleaner + clean s = case s of + '+':'+':' ':cs -> clean cs + '"':cs -> clean cs + c:cs -> c: clean cs + _ -> s + + +-- do even more: factor parametric branches + +factor :: Bool -> Ident -> Int -> Term -> Term +factor param c i t = + case t of + T (TComp ty) cs -> factors ty [(p, factor param c (i+1) v) | (p, v) <- cs] + _ -> composSafeOp (factor param c i) t + where + factors ty pvs0 + | not param = V ty (map snd pvs0) + factors ty [] = V ty [] + factors ty pvs0@[(p,v)] = V ty [v] + factors ty pvs0@(pv:pvs) = + let t = mkFun pv + ts = map mkFun pvs + in if all (==t) ts + then T (TTyped ty) (mkCases t) + else V ty (map snd pvs0) + + --- we hope this will be fresh and don't check... in GFC would be safe + qvar = identC (BS.pack ("q_" ++ showIdent c ++ "__" ++ show i)) + + mkFun (patt, val) = replace (patt2term patt) (Vr qvar) val + mkCases t = [(PV qvar, t)] + +-- we need to replace subterms +replace :: Term -> Term -> Term -> Term +replace old new trm = + case trm of + -- these are the important cases, since they can correspond to patterns + QC _ _ | trm == old -> new + App _ _ | trm == old -> new + R _ | trm == old -> new + App x y -> App (replace old new x) (replace old new y) + _ -> composSafeOp (replace old new) trm diff --git a/src/compiler/GF/Compile/OptimizeGFCC.hs b/src/compiler/GF/Compile/OptimizeGFCC.hs new file mode 100644 index 000000000..2a218e1bb --- /dev/null +++ b/src/compiler/GF/Compile/OptimizeGFCC.hs @@ -0,0 +1,121 @@ +module GF.Compile.OptimizeGFCC where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations + +import Data.List +import qualified Data.Map as Map + + +-- back-end optimization: +-- suffix analysis followed by common subexpression elimination + +optPGF :: PGF -> PGF +optPGF = cseOptimize . suffixOptimize + +suffixOptimize :: PGF -> PGF +suffixOptimize = mapConcretes opt + where + opt cnc = cnc { + lins = Map.map optTerm (lins cnc), + lindefs = Map.map optTerm (lindefs cnc), + printnames = Map.map optTerm (printnames cnc) + } + +cseOptimize :: PGF -> PGF +cseOptimize = mapConcretes subex + +-- analyse word form lists into prefix + suffixes +-- suffix sets can later be shared by subex elim + +optTerm :: Term -> Term +optTerm tr = case tr of + R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts] + R ts -> R $ map optTerm ts + P t v -> P (optTerm t) v + _ -> tr + where + optToks ss = prf : suffs where + prf = pref (head ss) (tail ss) + suffs = map (drop (length prf)) ss + pref cand ss = case ss of + s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss + _ -> cand + isK t = case t of + K (KS _) -> True + _ -> False + mkSuff ("":ws) = R (map (K . KS) ws) + mkSuff (p:ws) = W p (R (map (K . KS) ws)) + + +-- common subexpression elimination + +---subex :: [(CId,Term)] -> [(CId,Term)] +subex :: Concr -> Concr +subex cnc = err error id $ do + (tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0) + return $ addSubexpConsts tree cnc + +type TermList = Map.Map Term (Int,Int) -- number of occs, id +type TermM a = STM (TermList,Int) a + +addSubexpConsts :: TermList -> Concr -> Concr +addSubexpConsts tree cnc = cnc { + opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops], + lins = rec lins, + lindefs = rec lindefs, + printnames = rec printnames + } + where + ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree] + mkOne (f,trm) = (f, recomp f trm) + recomp f t = case Map.lookup t tree of + Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself + _ -> case t of + R ts -> R $ map (recomp f) ts + S ts -> S $ map (recomp f) ts + W s t -> W s (recomp f t) + P t p -> P (recomp f t) (recomp f p) + _ -> t + fid n = mkCId $ "_" ++ show n + rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)] + + +getSubtermsMod :: Concr -> TermM TermList +getSubtermsMod cnc = do + mapM getSubterms (Map.assocs (lins cnc)) + mapM getSubterms (Map.assocs (lindefs cnc)) + mapM getSubterms (Map.assocs (printnames cnc)) + (tree0,_) <- readSTM + return $ Map.filter (\ (nu,_) -> nu > 1) tree0 + where + getSubterms (f,trm) = collectSubterms trm >> return () + +collectSubterms :: Term -> TermM () +collectSubterms t = case t of + R ts -> do + mapM collectSubterms ts + add t + S ts -> do + mapM collectSubterms ts + add t + W s u -> do + collectSubterms u + add t + P p u -> do + collectSubterms p + collectSubterms u + add t + _ -> return () + where + add t = do + (ts,i) <- readSTM + let + ((count,id),next) = case Map.lookup t ts of + Just (nu,id) -> ((nu+1,id), i) + _ -> ((1, i ), i+1) + writeSTM (Map.insert t (count,id) ts, next) + diff --git a/src/compiler/GF/Compile/PGFPretty.hs b/src/compiler/GF/Compile/PGFPretty.hs new file mode 100644 index 000000000..679714db5 --- /dev/null +++ b/src/compiler/GF/Compile/PGFPretty.hs @@ -0,0 +1,93 @@ +-- | Print a part of a PGF grammar on the human-readable format used in +-- the paper "PGF: A Portable Run-Time Format for Type-Theoretical Grammars". +module GF.Compile.PGFPretty (prPGFPretty, prPMCFGPretty) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.PMCFG + +import GF.Data.Operations + +import Data.Map (Map) +import qualified Data.Map as Map +import Text.PrettyPrint.HughesPJ + + +prPGFPretty :: PGF -> String +prPGFPretty pgf = render $ prAbs (abstract pgf) $$ prAll (prCnc (abstract pgf)) (concretes pgf) + +prPMCFGPretty :: PGF -> CId -> String +prPMCFGPretty pgf lang = render $ + case lookParser pgf lang of + Nothing -> empty + Just pinfo -> text "language" <+> ppCId lang $$ ppPMCFG pinfo + + +prAbs :: Abstr -> Doc +prAbs a = prAll prCat (cats a) $$ prAll prFun (funs a) + +prCat :: CId -> [Hypo] -> Doc +prCat c h | isLiteralCat c = empty + | otherwise = text "cat" <+> ppCId c + +prFun :: CId -> (Type,Int,[Equation]) -> Doc +prFun f (t,_,_) = text "fun" <+> ppCId f <+> text ":" <+> prType t + +prType :: Type -> Doc +prType t = parens (hsep (punctuate (text ",") (map ppCId cs))) <+> text "->" <+> ppCId c + where (cs,c) = catSkeleton t + + +-- FIXME: show concrete name +-- FIXME: inline opers first +prCnc :: Abstr -> CId -> Concr -> Doc +prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c)) + where + prLinCat :: CId -> Term -> Doc + prLinCat c t | isLiteralCat c = empty + | otherwise = text "lincat" <+> ppCId c <+> text "=" <+> pr 0 t + where + pr p (R ts) = prec p 1 (hsep (punctuate (text " *") (map (pr 1) ts))) + pr _ (S []) = text "Str" + pr _ (C n) = text "Int_" <> text (show (n+1)) + + prLin :: CId -> Term -> Doc + prLin f t = text "lin" <+> ppCId f <+> text "=" <+> pr 0 t + where + pr :: Int -> Term -> Doc + pr p (R ts) = text "<" <+> hsep (punctuate (text ",") (map (pr 0) ts)) <+> text ">" + pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2) + pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts))) + pr p (K (KS t)) = doubleQuotes (text t) + pr p (V i) = text ("argv_" ++ show (i+1)) + pr p (C i) = text (show (i+1)) + pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts))) + pr _ t = error $ "PGFPretty.prLin " ++ show t + +linCat :: Concr -> CId -> Term +linCat cnc c = Map.findWithDefault (error $ "lincat: " ++ showCId c) c (lincats cnc) + +prec :: Int -> Int -> Doc -> Doc +prec p m | p >= m = parens + | otherwise = id + +expand :: Concr -> Concr +expand cnc = cnc { lins = Map.map (f "") (lins cnc) } + where + -- FIXME: handle KP + f :: String -> Term -> Term + f w (R ts) = R (map (f w) ts) + f w (P t1 t2) = P (f w t1) (f w t2) + f w (S []) = S (if null w then [] else [K (KS w)]) + f w (S (t:ts)) = S (f w t : map (f "") ts) + f w (FV ts) = FV (map (f w) ts) + f w (W s t) = f (w++s) t + f w (K (KS t)) = K (KS (w++t)) + f w (F o) = f w (Map.findWithDefault (error $ "Bad oper: " ++ showCId o) o (opers cnc)) + f w t = t + +-- Utilities + +prAll :: (a -> b -> Doc) -> Map a b -> Doc +prAll p m = vcat [ p k v | (k,v) <- Map.toList m]
\ No newline at end of file diff --git a/src/compiler/GF/Compile/ReadFiles.hs b/src/compiler/GF/Compile/ReadFiles.hs new file mode 100644 index 000000000..b96d3127b --- /dev/null +++ b/src/compiler/GF/Compile/ReadFiles.hs @@ -0,0 +1,220 @@ +---------------------------------------------------------------------- +-- | +-- Module : ReadFiles +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.26 $ +-- +-- Decide what files to read as function of dependencies and time stamps. +-- +-- make analysis for GF grammar modules. AR 11\/6\/2003--24\/2\/2004 +-- +-- to find all files that have to be read, put them in dependency order, and +-- decide which files need recompilation. Name @file.gf@ is returned for them, +-- and @file.gfo@ otherwise. +----------------------------------------------------------------------------- + +module GF.Compile.ReadFiles + ( getAllFiles,ModName,ModEnv,importsOfModule, + gfoFile,gfFile,isGFO,gf2gfo, + getOptionsFromFile) where + +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Data.Operations +import GF.Grammar.Lexer +import GF.Grammar.Parser +import GF.Grammar.Grammar +import GF.Grammar.Binary + +import Control.Monad +import Data.Char +import Data.List +import Data.Maybe(isJust) +import qualified Data.ByteString.Char8 as BS +import qualified Data.Map as Map +import System.Time +import System.Directory +import System.FilePath +import Text.PrettyPrint + +type ModName = String +type ModEnv = Map.Map ModName (ClockTime,[ModName]) + + +-- | Returns a list of all files to be compiled in topological order i.e. +-- the low level (leaf) modules are first. +getAllFiles :: Options -> [InitPath] -> ModEnv -> FileName -> IOE [FullPath] +getAllFiles opts ps env file = do + -- read module headers from all files recursively + ds <- liftM reverse $ get [] [] (justModuleName file) + ioeIO $ putIfVerb opts $ "all modules:" +++ show [name | (name,_,_,_,_) <- ds] + return $ paths ds + where + -- construct list of paths to read + paths ds = concatMap mkFile ds + where + mkFile (f,st,gfTime,gfoTime,p) = + case st of + CSComp -> [p </> gfFile f] + CSRead | isJust gfTime -> [gf2gfo opts (p </> gfFile f)] + | otherwise -> [p </> gfoFile f] + CSEnv -> [] + + -- | traverses the dependency graph and returns a topologicaly sorted + -- list of ModuleInfo. An error is raised if there is circular dependency + get :: [ModName] -- ^ keeps the current path in the dependency graph to avoid cycles + -> [ModuleInfo] -- ^ a list of already traversed modules + -> ModName -- ^ the current module + -> IOE [ModuleInfo] -- ^ the final + get trc ds name + | name `elem` trc = ioeErr $ Bad $ "circular modules" +++ unwords trc + | (not . null) [n | (n,_,_,_,_) <- ds, name == n] --- file already read + = return ds + | otherwise = do + (name,st0,t0,imps,p) <- findModule name + ds <- foldM (get (name:trc)) ds imps + let (st,t) | (not . null) [f | (f,_,t1,_,_) <- ds, elem f imps && liftM2 (>=) t0 t1 /= Just True] + = (CSComp,Nothing) + | otherwise = (st0,t0) + return ((name,st,t,imps,p):ds) + + -- searches for module in the search path and if it is found + -- returns 'ModuleInfo'. It fails if there is no such module + findModule :: ModName -> IOE ModuleInfo + findModule name = do + (file,gfTime,gfoTime) <- do + mb_gfFile <- ioeIO $ getFilePath ps (gfFile name) + case mb_gfFile of + Just gfFile -> do gfTime <- ioeIO $ getModificationTime gfFile + mb_gfoTime <- ioeIO $ catch (liftM Just $ getModificationTime (gf2gfo opts gfFile)) + (\_->return Nothing) + return (gfFile, Just gfTime, mb_gfoTime) + Nothing -> do mb_gfoFile <- ioeIO $ getFilePath (maybe id (:) (flag optGFODir opts) ps) (gfoFile name) + case mb_gfoFile of + Just gfoFile -> do gfoTime <- ioeIO $ getModificationTime gfoFile + return (gfoFile, Nothing, Just gfoTime) + Nothing -> ioeErr $ Bad (render (text "File" <+> text (gfFile name) <+> text "does not exist." $$ + text "searched in:" <+> vcat (map text ps))) + + + let mb_envmod = Map.lookup name env + (st,t) = selectFormat opts (fmap fst mb_envmod) gfTime gfoTime + + (mname,imps) <- case st of + CSEnv -> return (name, maybe [] snd mb_envmod) + CSRead -> ioeIO $ fmap importsOfModule (decodeModHeader ((if isGFO file then id else gf2gfo opts) file)) + CSComp -> do s <- ioeIO $ BS.readFile file + case runP pModHeader s of + Left (Pn l c,msg) -> ioeBad (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) + Right mo -> return (importsOfModule mo) + ioeErr $ testErr (mname == name) + ("module name" +++ mname +++ "differs from file name" +++ name) + return (name,st,t,imps,dropFileName file) + +isGFO :: FilePath -> Bool +isGFO = (== ".gfo") . takeExtensions + +gfoFile :: FilePath -> FilePath +gfoFile f = addExtension f "gfo" + +gfFile :: FilePath -> FilePath +gfFile f = addExtension f "gf" + +gf2gfo :: Options -> FilePath -> FilePath +gf2gfo opts file = maybe (gfoFile (dropExtension file)) + (\dir -> dir </> gfoFile (dropExtension (takeFileName file))) + (flag optGFODir opts) + +-- From the given Options and the time stamps computes +-- whether the module have to be computed, read from .gfo or +-- the environment version have to be used +selectFormat :: Options -> Maybe ClockTime -> Maybe ClockTime -> Maybe ClockTime -> (CompStatus,Maybe ClockTime) +selectFormat opts mtenv mtgf mtgfo = + case (mtenv,mtgfo,mtgf) of + (_,_,Just tgf) | fromSrc -> (CSComp,Nothing) + (Just tenv,_,_) | fromComp -> (CSEnv, Just tenv) + (_,Just tgfo,_) | fromComp -> (CSRead,Just tgfo) + (Just tenv,_,Just tgf) | tenv > tgf -> (CSEnv, Just tenv) + (_,Just tgfo,Just tgf) | tgfo > tgf -> (CSRead,Just tgfo) + (Just tenv,_,Nothing) -> (CSEnv,Just tenv) -- source does not exist + (_,Just tgfo,Nothing) -> (CSRead,Just tgfo) -- source does not exist + _ -> (CSComp,Nothing) + where + fromComp = flag optRecomp opts == NeverRecomp + fromSrc = flag optRecomp opts == AlwaysRecomp + + +-- internal module dep information + + +data CompStatus = + CSComp -- compile: read gf + | CSRead -- read gfo + | CSEnv -- gfo is in env + deriving Eq + +type ModuleInfo = (ModName,CompStatus,Maybe ClockTime,[ModName],InitPath) + +importsOfModule :: SourceModule -> (ModName,[ModName]) +importsOfModule (m,mi) = (modName m,depModInfo mi []) + where + depModInfo mi = + depModType (mtype mi) . + depExtends (extend mi) . + depWith (mwith mi) . + depExDeps (mexdeps mi). + depOpens (opens mi) + + depModType (MTAbstract) xs = xs + depModType (MTResource) xs = xs + depModType (MTInterface) xs = xs + depModType (MTConcrete m2) xs = modName m2:xs + depModType (MTInstance m2) xs = modName m2:xs + + depExtends es xs = foldr depInclude xs es + + depWith (Just (m,_,is)) xs = modName m : depInsts is xs + depWith Nothing xs = xs + + depExDeps eds xs = map modName eds ++ xs + + depOpens os xs = foldr depOpen xs os + + depInsts is xs = foldr depInst xs is + + depInclude (m,_) xs = modName m:xs + + depOpen (OSimple n ) xs = modName n:xs + depOpen (OQualif _ n) xs = modName n:xs + + depInst (m,n) xs = modName m:modName n:xs + + modName = showIdent + +-- | options can be passed to the compiler by comments in @--#@, in the main file +getOptionsFromFile :: FilePath -> IOE Options +getOptionsFromFile file = do + s <- ioe $ catch (fmap Ok $ BS.readFile file) + (\_ -> return (Bad $ "File " ++ file ++ " does not exist")) + let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s + fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls + ioeErr $ parseModuleOptions fs + +getFilePath :: [FilePath] -> String -> IO (Maybe FilePath) +getFilePath paths file = get paths + where + get [] = return Nothing + get (p:ps) = do + let pfile = p </> file + exist <- doesFileExist pfile + if not exist + then get ps + else do pfile <- canonicalizePath pfile + return (Just pfile) diff --git a/src/compiler/GF/Compile/Refresh.hs b/src/compiler/GF/Compile/Refresh.hs new file mode 100644 index 000000000..04800fcce --- /dev/null +++ b/src/compiler/GF/Compile/Refresh.hs @@ -0,0 +1,133 @@ +---------------------------------------------------------------------- +-- | +-- Module : Refresh +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:27 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Compile.Refresh (refreshTerm, refreshTermN, + refreshModule + ) where + +import GF.Data.Operations +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Grammar.Macros +import Control.Monad + +refreshTerm :: Term -> Err Term +refreshTerm = refreshTermN 0 + +refreshTermN :: Int -> Term -> Err Term +refreshTermN i e = liftM snd $ refreshTermKN i e + +refreshTermKN :: Int -> Term -> Err (Int,Term) +refreshTermKN i e = liftM (\ (t,(_,i)) -> (i,t)) $ + appSTM (refresh e) (initIdStateN i) + +refresh :: Term -> STM IdState Term +refresh e = case e of + + Vr x -> liftM Vr (lookVar x) + Abs b x t -> liftM2 (Abs b) (refVarPlus x) (refresh t) + + Prod b x a t -> do + a' <- refresh a + x' <- refVar x + t' <- refresh t + return $ Prod b x' a' t' + + Let (x,(mt,a)) b -> do + a' <- refresh a + mt' <- case mt of + Just t -> refresh t >>= (return . Just) + _ -> return mt + x' <- refVar x + b' <- refresh b + return (Let (x',(mt',a')) b') + + R r -> liftM R $ refreshRecord r + + ExtR r s -> liftM2 ExtR (refresh r) (refresh s) + + T i cc -> liftM2 T (refreshTInfo i) (mapM refreshCase cc) + + _ -> composOp refresh e + +refreshCase :: (Patt,Term) -> STM IdState (Patt,Term) +refreshCase (p,t) = liftM2 (,) (refreshPatt p) (refresh t) + +refreshPatt p = case p of + PV x -> liftM PV (refVar x) + PC c ps -> liftM (PC c) (mapM refreshPatt ps) + PP q c ps -> liftM (PP q c) (mapM refreshPatt ps) + PR r -> liftM PR (mapPairsM refreshPatt r) + PT t p' -> liftM2 PT (refresh t) (refreshPatt p') + + PAs x p' -> liftM2 PAs (refVar x) (refreshPatt p') + + PSeq p' q' -> liftM2 PSeq (refreshPatt p') (refreshPatt q') + PAlt p' q' -> liftM2 PAlt (refreshPatt p') (refreshPatt q') + PRep p' -> liftM PRep (refreshPatt p') + PNeg p' -> liftM PNeg (refreshPatt p') + + _ -> return p + +refreshRecord r = case r of + [] -> return r + (x,(mt,a)):b -> do + a' <- refresh a + mt' <- case mt of + Just t -> refresh t >>= (return . Just) + _ -> return mt + b' <- refreshRecord b + return $ (x,(mt',a')) : b' + +refreshTInfo i = case i of + TTyped t -> liftM TTyped $ refresh t + TComp t -> liftM TComp $ refresh t + TWild t -> liftM TWild $ refresh t + _ -> return i + +-- for abstract syntax + +refreshEquation :: Equation -> Err ([Patt],Term) +refreshEquation pst = err Bad (return . fst) (appSTM (refr pst) initIdState) where + refr (ps,t) = liftM2 (,) (mapM refreshPatt ps) (refresh t) + +-- for concrete and resource in grammar, before optimizing + +refreshGrammar :: SourceGrammar -> Err SourceGrammar +refreshGrammar = liftM (MGrammar . snd) . foldM refreshModule (0,[]) . modules + +refreshModule :: (Int,[SourceModule]) -> SourceModule -> Err (Int,[SourceModule]) +refreshModule (k,ms) mi@(i,mo) + | isModCnc mo || isModRes mo = do + (k',js') <- foldM refreshRes (k,[]) $ tree2list $ jments mo + return (k', (i, replaceJudgements mo (buildTree js')) : ms) + | otherwise = return (k, mi:ms) + where + refreshRes (k,cs) ci@(c,info) = case info of + ResOper ptyp (Just trm) -> do ---- refresh ptyp + (k',trm') <- refreshTermKN k trm + return $ (k', (c, ResOper ptyp (Just trm')):cs) + ResOverload os tyts -> do + (k',tyts') <- liftM (\ (t,(_,i)) -> (i,t)) $ + appSTM (mapPairsM refresh tyts) (initIdStateN k) + return $ (k', (c, ResOverload os tyts'):cs) + CncCat mt (Just trm) pn -> do ---- refresh mt, pn + (k',trm') <- refreshTermKN k trm + return $ (k', (c, CncCat mt (Just trm') pn):cs) + CncFun mt (Just trm) pn -> do ---- refresh pn + (k',trm') <- refreshTermKN k trm + return $ (k', (c, CncFun mt (Just trm') pn):cs) + _ -> return (k, ci:cs) + diff --git a/src/compiler/GF/Compile/Rename.hs b/src/compiler/GF/Compile/Rename.hs new file mode 100644 index 000000000..30616b4cb --- /dev/null +++ b/src/compiler/GF/Compile/Rename.hs @@ -0,0 +1,313 @@ +---------------------------------------------------------------------- +-- | +-- Module : Rename +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/30 18:39:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.19 $ +-- +-- AR 14\/5\/2003 +-- The top-level function 'renameGrammar' does several things: +-- +-- - extends each module symbol table by indirections to extended module +-- +-- - changes unqualified and as-qualified imports to absolutely qualified +-- +-- - goes through the definitions and resolves names +-- +-- Dependency analysis between modules has been performed before this pass. +-- Hence we can proceed by @fold@ing "from left to right". +----------------------------------------------------------------------------- + +module GF.Compile.Rename ( + renameSourceTerm, + renameModule + ) where + +import GF.Grammar.Grammar +import GF.Grammar.Values +import GF.Grammar.Predef +import GF.Infra.Modules +import GF.Infra.Ident +import GF.Infra.CheckM +import GF.Grammar.Macros +import GF.Grammar.Printer +import GF.Grammar.Lookup +import GF.Grammar.Printer +import GF.Data.Operations + +import Control.Monad +import Data.List (nub) +import Text.PrettyPrint + +-- | this gives top-level access to renaming term input in the cc command +renameSourceTerm :: SourceGrammar -> Ident -> Term -> Check Term +renameSourceTerm g m t = do + mo <- checkErr $ lookupModule g m + status <- buildStatus g m mo + renameTerm status [] t + +renameModule :: [SourceModule] -> SourceModule -> Check SourceModule +renameModule ms (name,mo) = checkIn (text "renaming module" <+> ppIdent name) $ do + let js1 = jments mo + status <- buildStatus (MGrammar ms) name mo + js2 <- checkMap (renameInfo mo status) js1 + return (name, mo {opens = map forceQualif (opens mo), jments = js2}) + +type Status = (StatusTree, [(OpenSpec Ident, StatusTree)]) + +type StatusTree = BinTree Ident StatusInfo + +type StatusInfo = Ident -> Term + +renameIdentTerm :: Status -> Term -> Check Term +renameIdentTerm env@(act,imps) t = + checkIn (text "atomic term" <+> ppTerm Qualified 0 t $$ text "given" <+> hsep (punctuate comma (map (ppIdent . fst) qualifs))) $ + case t of + Vr c -> ident predefAbs c + Cn c -> ident (\_ s -> checkError s) c + Q m' c | m' == cPredef {- && isInPredefined c -} -> return t + Q m' c -> do + m <- checkErr (lookupErr m' qualifs) + f <- lookupTree showIdent c m + return $ f c + QC m' c | m' == cPredef {- && isInPredefined c -} -> return t + QC m' c -> do + m <- checkErr (lookupErr m' qualifs) + f <- lookupTree showIdent c m + return $ f c + _ -> return t + where + opens = [st | (OSimple _,st) <- imps] + qualifs = [(m, st) | (OQualif m _, st) <- imps] ++ + [(m, st) | (OSimple m, st) <- imps] -- qualif is always possible + + -- this facility is mainly for BWC with GF1: you need not import PredefAbs + predefAbs c s + | isPredefCat c = return $ Q cPredefAbs c + | otherwise = checkError s + + ident alt c = case lookupTree showIdent c act of + Ok f -> return $ f c + _ -> case lookupTreeManyAll showIdent opens c of + [f] -> return $ f c + [] -> alt c (text "constant not found:" <+> ppIdent c) + fs -> case nub [f c | f <- fs] of + [tr] -> return tr + ts@(t:_) -> do checkWarn (text "conflict" <+> hsep (punctuate comma (map (ppTerm Qualified 0) ts))) + return t + -- a warning will be generated in CheckGrammar, and the head returned + -- in next V: + -- Bad $ "conflicting imports:" +++ unwords (map prt ts) + +info2status :: Maybe Ident -> (Ident,Info) -> StatusInfo +info2status mq (c,i) = case i of + AbsFun _ _ Nothing -> maybe Con QC mq + ResValue _ -> maybe Con QC mq + ResParam _ _ -> maybe Con QC mq + AnyInd True m -> maybe Con (const (QC m)) mq + AnyInd False m -> maybe Cn (const (Q m)) mq + _ -> maybe Cn Q mq + +tree2status :: OpenSpec Ident -> BinTree Ident Info -> BinTree Ident StatusInfo +tree2status o = case o of + OSimple i -> mapTree (info2status (Just i)) + OQualif i j -> mapTree (info2status (Just j)) + +buildStatus :: SourceGrammar -> Ident -> SourceModInfo -> Check Status +buildStatus gr c mo = let mo' = self2status c mo in do + let gr1 = MGrammar ((c,mo) : modules gr) + ops = [OSimple e | e <- allExtends gr1 c] ++ opens mo + mods <- checkErr $ mapM (lookupModule gr1 . openedModule) ops + let sts = map modInfo2status $ zip ops mods + return $ if isModCnc mo + then (emptyBinTree, reverse sts) -- the module itself does not define any names + else (mo',reverse sts) -- so the empty ident is not needed + +modInfo2status :: (OpenSpec Ident,SourceModInfo) -> (OpenSpec Ident, StatusTree) +modInfo2status (o,mo) = (o,tree2status o (jments mo)) + +self2status :: Ident -> SourceModInfo -> StatusTree +self2status c m = mapTree (info2status (Just c)) (jments m) + +forceQualif o = case o of + OSimple i -> OQualif i i + OQualif _ i -> OQualif i i + +renameInfo :: SourceModInfo -> Status -> Ident -> Info -> Check Info +renameInfo mo status i info = checkIn + (text "renaming definition of" <+> ppIdent i <+> ppPosition mo i) $ + case info of + AbsCat pco pfs -> liftM2 AbsCat (renPerh (renameContext status) pco) + (renPerh (mapM rent) pfs) + AbsFun pty pa ptr -> liftM3 AbsFun (ren pty) (return pa) (renPerh (mapM (renameEquation status [])) ptr) + ResOper pty ptr -> liftM2 ResOper (ren pty) (ren ptr) + ResOverload os tysts -> + liftM (ResOverload os) (mapM (pairM rent) tysts) + + ResParam (Just pp) m -> do + pp' <- mapM (renameParam status) pp + return (ResParam (Just pp') m) + ResValue t -> do + t <- rent t + return (ResValue t) + CncCat pty ptr ppr -> liftM3 CncCat (ren pty) (ren ptr) (ren ppr) + CncFun mt ptr ppr -> liftM2 (CncFun mt) (ren ptr) (ren ppr) + _ -> return info + where + ren = renPerh rent + rent = renameTerm status [] + +renPerh ren (Just t) = liftM Just $ ren t +renPerh ren Nothing = return Nothing + +renameTerm :: Status -> [Ident] -> Term -> Check Term +renameTerm env vars = ren vars where + ren vs trm = case trm of + Abs b x t -> liftM (Abs b x) (ren (x:vs) t) + Prod bt x a b -> liftM2 (Prod bt x) (ren vs a) (ren (x:vs) b) + Typed a b -> liftM2 Typed (ren vs a) (ren vs b) + Vr x + | elem x vs -> return trm + | otherwise -> renid trm + Cn _ -> renid trm + Con _ -> renid trm + Q _ _ -> renid trm + QC _ _ -> renid trm + T i cs -> do + i' <- case i of + TTyped ty -> liftM TTyped $ ren vs ty -- the only annotation in source + _ -> return i + liftM (T i') $ mapM (renCase vs) cs + + Let (x,(m,a)) b -> do + m' <- case m of + Just ty -> liftM Just $ ren vs ty + _ -> return m + a' <- ren vs a + b' <- ren (x:vs) b + return $ Let (x,(m',a')) b' + + P t@(Vr r) l -- Here we have $r.l$ and this is ambiguous it could be either + -- record projection from variable or constant $r$ or qualified expression with module $r$ + | elem r vs -> return trm -- try var proj first .. + | otherwise -> checks [ renid (Q r (label2ident l)) -- .. and qualified expression second. + , renid t >>= \t -> return (P t l) -- try as a constant at the end + , checkError (text "unknown qualified constant" <+> ppTerm Unqualified 0 trm) + ] + + EPatt p -> do + (p',_) <- renpatt p + return $ EPatt p' + + _ -> composOp (ren vs) trm + + renid = renameIdentTerm env + renCase vs (p,t) = do + (p',vs') <- renpatt p + t' <- ren (vs' ++ vs) t + return (p',t') + renpatt = renamePattern env + +-- | vars not needed in env, since patterns always overshadow old vars +renamePattern :: Status -> Patt -> Check (Patt,[Ident]) +renamePattern env patt = case patt of + + PMacro c -> do + c' <- renid $ Vr c + case c' of + Q p d -> renp $ PM p d + _ -> checkError (text "unresolved pattern" <+> ppPatt Unqualified 0 patt) + + PC c ps -> do + c' <- renid $ Cn c + case c' of + QC m c -> do psvss <- mapM renp ps + let (ps,vs) = unzip psvss + return (PP m c ps, concat vs) + Q _ _ -> checkError (text "data constructor expected but" <+> ppTerm Qualified 0 c' <+> text "is found instead") + _ -> checkError (text "unresolved data constructor" <+> ppTerm Qualified 0 c') + + PP p c ps -> do + (QC p' c') <- renid (QC p c) + psvss <- mapM renp ps + let (ps',vs) = unzip psvss + return (PP p' c' ps', concat vs) + + PM p c -> do + x <- renid (Q p c) + (p',c') <- case x of + (Q p' c') -> return (p',c') + _ -> checkError (text "not a pattern macro" <+> ppPatt Qualified 0 patt) + return (PM p' c', []) + + PV x -> checks [ renid (Vr x) >>= \t' -> case t' of + QC m c -> return (PP m c [],[]) + _ -> checkError (text "not a constructor") + , return (patt, [x]) + ] + + PR r -> do + let (ls,ps) = unzip r + psvss <- mapM renp ps + let (ps',vs') = unzip psvss + return (PR (zip ls ps'), concat vs') + + PAlt p q -> do + (p',vs) <- renp p + (q',ws) <- renp q + return (PAlt p' q', vs ++ ws) + + PSeq p q -> do + (p',vs) <- renp p + (q',ws) <- renp q + return (PSeq p' q', vs ++ ws) + + PRep p -> do + (p',vs) <- renp p + return (PRep p', vs) + + PNeg p -> do + (p',vs) <- renp p + return (PNeg p', vs) + + PAs x p -> do + (p',vs) <- renp p + return (PAs x p', x:vs) + + _ -> return (patt,[]) + + where + renp = renamePattern env + renid = renameIdentTerm env + +renameParam :: Status -> (Ident, Context) -> Check (Ident, Context) +renameParam env (c,co) = do + co' <- renameContext env co + return (c,co') + +renameContext :: Status -> Context -> Check Context +renameContext b = renc [] where + renc vs cont = case cont of + (bt,x,t) : xts + | isWildIdent x -> do + t' <- ren vs t + xts' <- renc vs xts + return $ (bt,x,t') : xts' + | otherwise -> do + t' <- ren vs t + let vs' = x:vs + xts' <- renc vs' xts + return $ (bt,x,t') : xts' + _ -> return cont + ren = renameTerm b + +-- | vars not needed in env, since patterns always overshadow old vars +renameEquation :: Status -> [Ident] -> Equation -> Check Equation +renameEquation b vs (ps,t) = do + (ps',vs') <- liftM unzip $ mapM (renamePattern b) ps + t' <- renameTerm b (concat vs' ++ vs) t + return (ps',t') diff --git a/src/compiler/GF/Compile/SubExOpt.hs b/src/compiler/GF/Compile/SubExOpt.hs new file mode 100644 index 000000000..c7dbb5d3d --- /dev/null +++ b/src/compiler/GF/Compile/SubExOpt.hs @@ -0,0 +1,142 @@ +---------------------------------------------------------------------- +-- | +-- Module : SubExOpt +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- This module implements a simple common subexpression elimination +-- for .gfo grammars, to factor out shared subterms in lin rules. +-- It works in three phases: +-- +-- (1) collectSubterms collects recursively all subterms of forms table and (P x..y) +-- from lin definitions (experience shows that only these forms +-- tend to get shared) and counts how many times they occur +-- (2) addSubexpConsts takes those subterms t that occur more than once +-- and creates definitions of form "oper A''n = t" where n is a +-- fresh number; notice that we assume no ids of this form are in +-- scope otherwise +-- (3) elimSubtermsMod goes through lins and the created opers by replacing largest +-- possible subterms by the newly created identifiers +-- +----------------------------------------------------------------------------- + +module GF.Compile.SubExOpt (subexpModule,unsubexpModule) where + +import GF.Grammar.Grammar +import GF.Grammar.Lookup +import GF.Infra.Ident +import qualified GF.Grammar.Macros as C +import qualified GF.Infra.Modules as M +import GF.Data.Operations + +import Control.Monad +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.ByteString.Char8 as BS +import Data.List + +subexpModule :: SourceModule -> SourceModule +subexpModule (n,mo) = errVal (n,mo) $ do + let ljs = tree2list (M.jments mo) + (tree,_) <- appSTM (getSubtermsMod n ljs) (Map.empty,0) + js2 <- liftM buildTree $ addSubexpConsts n tree $ ljs + return (n,M.replaceJudgements mo js2) + +unsubexpModule :: SourceModule -> SourceModule +unsubexpModule sm@(i,mo) + | hasSub ljs = (i,M.replaceJudgements mo (rebuild (map unparInfo ljs))) + | otherwise = sm + where + ljs = tree2list (M.jments mo) + + -- perform this iff the module has opers + hasSub ljs = not $ null [c | (c,ResOper _ _) <- ljs] + unparInfo (c,info) = case info of + CncFun xs (Just t) m -> [(c, CncFun xs (Just (unparTerm t)) m)] + ResOper (Just (EInt 8)) _ -> [] -- subexp-generated opers + ResOper pty (Just t) -> [(c, ResOper pty (Just (unparTerm t)))] + _ -> [(c,info)] + unparTerm t = case t of + Q m c | isOperIdent c -> --- name convention of subexp opers + errVal t $ liftM unparTerm $ lookupResDef gr m c + _ -> C.composSafeOp unparTerm t + gr = M.MGrammar [sm] + rebuild = buildTree . concat + +-- implementation + +type TermList = Map Term (Int,Int) -- number of occs, id +type TermM a = STM (TermList,Int) a + +addSubexpConsts :: + Ident -> Map Term (Int,Int) -> [(Ident,Info)] -> Err [(Ident,Info)] +addSubexpConsts mo tree lins = do + let opers = [oper id trm | (trm,(_,id)) <- list] + mapM mkOne $ opers ++ lins + where + mkOne (f,def) = case def of + CncFun xs (Just trm) pn -> do + trm' <- recomp f trm + return (f,CncFun xs (Just trm') pn) + ResOper ty (Just trm) -> do + trm' <- recomp f trm + return (f,ResOper ty (Just trm')) + _ -> return (f,def) + recomp f t = case Map.lookup t tree of + Just (_,id) | operIdent id /= f -> return $ Q mo (operIdent id) + _ -> C.composOp (recomp f) t + + list = Map.toList tree + + oper id trm = (operIdent id, ResOper (Just (EInt 8)) (Just trm)) + --- impossible type encoding generated opers + +getSubtermsMod :: Ident -> [(Ident,Info)] -> TermM (Map Term (Int,Int)) +getSubtermsMod mo js = do + mapM (getInfo (collectSubterms mo)) js + (tree0,_) <- readSTM + return $ Map.filter (\ (nu,_) -> nu > 1) tree0 + where + getInfo get fi@(f,i) = case i of + CncFun xs (Just trm) pn -> do + get trm + return $ fi + ResOper ty (Just trm) -> do + get trm + return $ fi + _ -> return fi + +collectSubterms :: Ident -> Term -> TermM Term +collectSubterms mo t = case t of + App f a -> do + collect f + collect a + add t + T ty cs -> do + let (_,ts) = unzip cs + mapM collect ts + add t + V ty ts -> do + mapM collect ts + add t +---- K (KP _ _) -> add t + _ -> C.composOp (collectSubterms mo) t + where + collect = collectSubterms mo + add t = do + (ts,i) <- readSTM + let + ((count,id),next) = case Map.lookup t ts of + Just (nu,id) -> ((nu+1,id), i) + _ -> ((1, i ), i+1) + writeSTM (Map.insert t (count,id) ts, next) + return t --- only because of composOp + +operIdent :: Int -> Ident +operIdent i = identC (operPrefix `BS.append` (BS.pack (show i))) --- + +isOperIdent :: Ident -> Bool +isOperIdent id = BS.isPrefixOf operPrefix (ident2bs id) + +operPrefix = BS.pack ("A''") diff --git a/src/compiler/GF/Compile/Update.hs b/src/compiler/GF/Compile/Update.hs new file mode 100644 index 000000000..1e39a2e03 --- /dev/null +++ b/src/compiler/GF/Compile/Update.hs @@ -0,0 +1,226 @@ +---------------------------------------------------------------------- +-- | +-- Module : Update +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/30 18:39:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.8 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Compile.Update (buildAnyTree, extendModule, rebuildModule) where + +import GF.Infra.Ident +import GF.Grammar.Grammar +import GF.Grammar.Printer +import GF.Grammar.Lookup +import GF.Infra.Modules +import GF.Infra.Option + +import GF.Data.Operations + +import Data.List +import qualified Data.Map as Map +import Control.Monad +import Text.PrettyPrint + +-- | combine a list of definitions into a balanced binary search tree +buildAnyTree :: Ident -> [(Ident,Info)] -> Err (BinTree Ident Info) +buildAnyTree m = go Map.empty + where + go map [] = return map + go map ((c,j):is) = do + case Map.lookup c map of + Just i -> case unifyAnyInfo m i j of + Ok k -> go (Map.insert c k map) is + Bad _ -> fail $ render (text "cannot unify the informations" $$ + nest 4 (ppJudgement Qualified (c,i)) $$ + text "and" $+$ + nest 4 (ppJudgement Qualified (c,j)) $$ + text "in module" <+> ppIdent m) + Nothing -> go (Map.insert c j map) is + +extendModule :: SourceGrammar -> SourceModule -> Err SourceModule +extendModule gr (name,m) + ---- Just to allow inheritance in incomplete concrete (which are not + ---- compiled anyway), extensions are not built for them. + ---- Should be replaced by real control. AR 4/2/2005 + | mstatus m == MSIncomplete && isModCnc m = return (name,m) + | otherwise = do m' <- foldM extOne m (extend m) + return (name,m') + where + extOne mo (n,cond) = do + m0 <- lookupModule gr n + + -- test that the module types match, and find out if the old is complete + testErr (sameMType (mtype m) (mtype mo)) + ("illegal extension type to module" +++ showIdent name) + + let isCompl = isCompleteModule m0 + + -- build extension in a way depending on whether the old module is complete + js1 <- extendMod gr isCompl (n, isInherited cond) name (jments m0) (jments mo) + + -- if incomplete, throw away extension information + return $ + if isCompl + then mo {jments = js1} + else mo {extend = filter ((/=n) . fst) (extend mo) + ,mexdeps= nub (n : mexdeps mo) + ,jments = js1 + } + +-- | rebuilding instance + interface, and "with" modules, prior to renaming. +-- AR 24/10/2003 +rebuildModule :: SourceGrammar -> SourceModule -> Err SourceModule +rebuildModule gr mo@(i,mi@(ModInfo mt stat fs_ me mw ops_ med_ js_ ps_)) = do +---- deps <- moduleDeps ms +---- is <- openInterfaces deps i + let is = [] ---- the method above is buggy: try "i -src" for two grs. AR 8/3/2005 + mi' <- case mw of + + -- add the information given in interface into an instance module + Nothing -> do + testErr (null is || mstatus mi == MSIncomplete) + ("module" +++ showIdent i +++ + "has open interfaces and must therefore be declared incomplete") + case mt of + MTInstance i0 -> do + m1 <- lookupModule gr i0 + testErr (isModRes m1) ("interface expected instead of" +++ showIdent i0) + js' <- extendMod gr False (i0,const True) i (jments m1) (jments mi) + --- to avoid double inclusions, in instance I of I0 = J0 ** ... + case extends mi of + [] -> return $ replaceJudgements mi js' + j0s -> do + m0s <- mapM (lookupModule gr) j0s + let notInM0 c _ = all (not . isInBinTree c . jments) m0s + let js2 = filterBinTree notInM0 js' + return $ (replaceJudgements mi js2) + {positions = Map.union (positions m1) (positions mi)} + _ -> return mi + + -- add the instance opens to an incomplete module "with" instances + Just (ext,incl,ops) -> do + let (infs,insts) = unzip ops + let stat' = ifNull MSComplete (const MSIncomplete) + [i | i <- is, notElem i infs] + testErr (stat' == MSComplete || stat == MSIncomplete) + ("module" +++ showIdent i +++ "remains incomplete") + ModInfo mt0 _ fs me' _ ops0 _ js ps0 <- lookupModule gr ext + let ops1 = nub $ + ops_ ++ -- N.B. js has been name-resolved already + [OQualif i j | (i,j) <- ops] ++ + [o | o <- ops0, notElem (openedModule o) infs] ++ + [OQualif i i | i <- insts] ++ + [OSimple i | i <- insts] + + --- check if me is incomplete + let fs1 = fs `addOptions` fs_ -- new flags have priority + let js0 = [ci | ci@(c,_) <- tree2list js, isInherited incl c] + let js1 = buildTree (tree2list js_ ++ js0) + let ps1 = Map.union ps_ ps0 + let med1= nub (ext : infs ++ insts ++ med_) + return $ ModInfo mt0 stat' fs1 me Nothing ops1 med1 js1 ps1 + + return (i,mi') + +-- | When extending a complete module: new information is inserted, +-- and the process is interrupted if unification fails. +-- If the extended module is incomplete, its judgements are just copied. +extendMod :: SourceGrammar -> + Bool -> (Ident,Ident -> Bool) -> Ident -> + BinTree Ident Info -> BinTree Ident Info -> + Err (BinTree Ident Info) +extendMod gr isCompl (name,cond) base old new = foldM try new $ Map.toList old + where + try new (c,i) + | not (cond c) = return new + | otherwise = case Map.lookup c new of + Just j -> case unifyAnyInfo name i j of + Ok k -> return $ updateTree (c,k) new + Bad _ -> do (base,j) <- case j of + AnyInd _ m -> lookupOrigInfo gr m c + _ -> return (base,j) + (name,i) <- case i of + AnyInd _ m -> lookupOrigInfo gr m c + _ -> return (name,i) + fail $ render (text "cannot unify the information" $$ + nest 4 (ppJudgement Qualified (c,i)) $$ + text "in module" <+> ppIdent name <+> text "with" $$ + nest 4 (ppJudgement Qualified (c,j)) $$ + text "in module" <+> ppIdent base) + Nothing-> if isCompl + then return $ updateTree (c,indirInfo name i) new + else return $ updateTree (c,i) new + + indirInfo :: Ident -> Info -> Info + indirInfo n info = AnyInd b n' where + (b,n') = case info of + ResValue _ -> (True,n) + ResParam _ _ -> (True,n) + AbsFun _ _ Nothing -> (True,n) + AnyInd b k -> (b,k) + _ -> (False,n) ---- canonical in Abs + +unifyAnyInfo :: Ident -> Info -> Info -> Err Info +unifyAnyInfo m i j = case (i,j) of + (AbsCat mc1 mf1, AbsCat mc2 mf2) -> + liftM2 AbsCat (unifMaybe mc1 mc2) (unifConstrs mf1 mf2) -- adding constrs + (AbsFun mt1 ma1 md1, AbsFun mt2 ma2 md2) -> + liftM3 AbsFun (unifMaybe mt1 mt2) (unifAbsArrity ma1 ma2) (unifAbsDefs md1 md2) -- adding defs + + (ResParam mt1 mv1, ResParam mt2 mv2) -> + liftM2 ResParam (unifMaybe mt1 mt2) (unifMaybe mv1 mv2) + (ResValue t1, ResValue t2) + | t1==t2 -> return (ResValue t1) + | otherwise -> fail "" + (_, ResOverload ms t) | elem m ms -> + return $ ResOverload ms t + (ResOper mt1 m1, ResOper mt2 m2) -> + liftM2 ResOper (unifMaybe mt1 mt2) (unifMaybe m1 m2) + + (CncCat mc1 mf1 mp1, CncCat mc2 mf2 mp2) -> + liftM3 CncCat (unifMaybe mc1 mc2) (unifMaybe mf1 mf2) (unifMaybe mp1 mp2) + (CncFun m mt1 md1, CncFun _ mt2 md2) -> + liftM2 (CncFun m) (unifMaybe mt1 mt2) (unifMaybe md1 md2) ---- adding defs + + (AnyInd b1 m1, AnyInd b2 m2) -> do + testErr (b1 == b2) $ "indirection status" + testErr (m1 == m2) $ "different sources of indirection" + return i + + _ -> fail "informations" + +-- | this is what happens when matching two values in the same module +unifMaybe :: Eq a => Maybe a -> Maybe a -> Err (Maybe a) +unifMaybe Nothing Nothing = return Nothing +unifMaybe (Just p1) Nothing = return (Just p1) +unifMaybe Nothing (Just p2) = return (Just p2) +unifMaybe (Just p1) (Just p2) + | p1==p2 = return (Just p1) + | otherwise = fail "" + +unifAbsArrity :: Maybe Int -> Maybe Int -> Err (Maybe Int) +unifAbsArrity Nothing Nothing = return Nothing +unifAbsArrity (Just a ) Nothing = return (Just a ) +unifAbsArrity Nothing (Just a ) = return (Just a ) +unifAbsArrity (Just a1) (Just a2) + | a1==a2 = return (Just a1) + | otherwise = fail "" + +unifAbsDefs :: Maybe [Equation] -> Maybe [Equation] -> Err (Maybe [Equation]) +unifAbsDefs Nothing Nothing = return Nothing +unifAbsDefs (Just _ ) Nothing = fail "" +unifAbsDefs Nothing (Just _ ) = fail "" +unifAbsDefs (Just xs) (Just ys) = return (Just (xs ++ ys)) + +unifConstrs :: Maybe [Term] -> Maybe [Term] -> Err (Maybe [Term]) +unifConstrs p1 p2 = case (p1,p2) of + (Nothing, _) -> return p2 + (_, Nothing) -> return p1 + (Just bs, Just ds) -> return $ Just $ bs ++ ds diff --git a/src/compiler/GF/Data/Assoc.hs b/src/compiler/GF/Data/Assoc.hs new file mode 100644 index 000000000..f775319ea --- /dev/null +++ b/src/compiler/GF/Data/Assoc.hs @@ -0,0 +1,143 @@ +---------------------------------------------------------------------- +-- | +-- Module : Assoc +-- Maintainer : Peter Ljunglöf +-- Stability : Stable +-- Portability : Haskell 98 +-- +-- > CVS $Date: 2005/05/09 09:28:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.4 $ +-- +-- Association lists, or finite maps, +-- including sets as maps with result type @()@. +-- function names stolen from module @Array@. +-- /O(log n)/ key lookup +----------------------------------------------------------------------------- + +module GF.Data.Assoc ( Assoc, + Set, + emptyAssoc, + emptySet, + listAssoc, + listSet, + accumAssoc, + aAssocs, + aElems, + assocMap, + assocFilter, + lookupAssoc, + lookupWith, + (?), + (?=) + ) where + +import GF.Data.SortedList + +infixl 9 ?, ?= + +-- | a set is a finite map with empty values +type Set a = Assoc a () + +emptyAssoc :: Ord a => Assoc a b +emptySet :: Ord a => Set a + +-- | creating a finite map from a sorted key-value list +listAssoc :: Ord a => SList (a, b) -> Assoc a b + +-- | creating a set from a sorted list +listSet :: Ord a => SList a -> Set a + +-- | building a finite map from a list of keys and 'b's, +-- and a function that combines a sorted list of 'b's into a value +accumAssoc :: (Ord a, Ord c) => (SList c -> b) -> [(a, c)] -> Assoc a b + +-- | all key-value pairs from an association list +aAssocs :: Ord a => Assoc a b -> SList (a, b) + +-- | all keys from an association list +aElems :: Ord a => Assoc a b -> SList a + +-- fmap :: Ord a => (b -> b') -> Assoc a b -> Assoc a b' + +-- | mapping values to other values. +-- the mapping function can take the key as information +assocMap :: Ord a => (a -> b -> b') -> Assoc a b -> Assoc a b' + +assocFilter :: Ord a => (b -> Bool) -> Assoc a b -> Assoc a b +assocFilter pred = listAssoc . filter (pred . snd) . aAssocs + +-- | monadic lookup function, +-- returning failure if the key does not exist +lookupAssoc :: (Ord a, Monad m) => Assoc a b -> a -> m b + +-- | if the key does not exist, +-- the first argument is returned +lookupWith :: Ord a => b -> Assoc a b -> a -> b + +-- | if the values are monadic, we can return the value type +(?) :: (Ord a, Monad m) => Assoc a (m b) -> a -> m b + +-- | checking wheter the map contains a given key +(?=) :: Ord a => Assoc a b -> a -> Bool + + +------------------------------------------------------------ + +data Assoc a b = ANil | ANode (Assoc a b) a b (Assoc a b) + deriving (Eq, Ord, Show) + +emptyAssoc = ANil +emptySet = emptyAssoc + +listAssoc as = assoc + where (assoc, []) = sl2bst (length as) as + sl2bst 0 xs = (ANil, xs) + sl2bst 1 (x:xs) = (ANode ANil (fst x) (snd x) ANil, xs) + sl2bst n xs = (ANode left (fst x) (snd x) right, zs) + where llen = (n-1) `div` 2 + rlen = n - 1 - llen + (left, x:ys) = sl2bst llen xs + (right, zs) = sl2bst rlen ys + +listSet as = listAssoc (zip as (repeat ())) + +accumAssoc join = listAssoc . map (mapSnd join) . groupPairs . nubsort + where mapSnd f (a, b) = (a, f b) + +aAssocs as = prs as [] + where prs ANil = id + prs (ANode left a b right) = prs left . ((a,b) :) . prs right + +aElems = map fst . aAssocs + + +instance Ord a => Functor (Assoc a) where + fmap f = assocMap (const f) + +assocMap f ANil = ANil +assocMap f (ANode left a b right) = ANode (assocMap f left) a (f a b) (assocMap f right) + + +lookupAssoc ANil _ = fail "key not found" +lookupAssoc (ANode left a b right) a' = case compare a a' of + GT -> lookupAssoc left a' + LT -> lookupAssoc right a' + EQ -> return b + +lookupWith z ANil _ = z +lookupWith z (ANode left a b right) a' = case compare a a' of + GT -> lookupWith z left a' + LT -> lookupWith z right a' + EQ -> b + +(?) = lookupWith (fail "key not found") + +(?=) = \assoc -> maybe False (const True) . lookupAssoc assoc + + + + + + + diff --git a/src/compiler/GF/Data/BacktrackM.hs b/src/compiler/GF/Data/BacktrackM.hs new file mode 100644 index 000000000..36317ebb6 --- /dev/null +++ b/src/compiler/GF/Data/BacktrackM.hs @@ -0,0 +1,86 @@ +---------------------------------------------------------------------- +-- | +-- Module : BacktrackM +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:00 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.4 $ +-- +-- Backtracking state monad, with r\/o environment +----------------------------------------------------------------------------- + +{-# OPTIONS_GHC -fglasgow-exts #-} +module GF.Data.BacktrackM ( + -- * the backtracking state monad + BacktrackM, + -- * monad specific utilities + member, + cut, + -- * running the monad + foldBM, runBM, + foldSolutions, solutions, + foldFinalStates, finalStates, + + -- * reexport the 'MonadState' class + module Control.Monad.State.Class, + ) where + +import Data.List +import Control.Monad +import Control.Monad.State.Class + +---------------------------------------------------------------------- +-- Combining endomorphisms and continuations +-- a la Ralf Hinze + +-- BacktrackM = state monad transformer over the backtracking monad + +newtype BacktrackM s a = BM (forall b . (a -> s -> b -> b) -> s -> b -> b) + +-- * running the monad + +runBM :: BacktrackM s a -> s -> [(s,a)] +runBM (BM m) s = m (\x s xs -> (s,x) : xs) s [] + +foldBM :: (a -> s -> b -> b) -> b -> BacktrackM s a -> s -> b +foldBM f b (BM m) s = m f s b + +foldSolutions :: (a -> b -> b) -> b -> BacktrackM s a -> s -> b +foldSolutions f b (BM m) s = m (\x s b -> f x b) s b + +solutions :: BacktrackM s a -> s -> [a] +solutions = foldSolutions (:) [] + +foldFinalStates :: (s -> b -> b) -> b -> BacktrackM s () -> s -> b +foldFinalStates f b (BM m) s = m (\x s b -> f s b) s b + +finalStates :: BacktrackM s () -> s -> [s] +finalStates bm = map fst . runBM bm + +instance Monad (BacktrackM s) where + return a = BM (\c s b -> c a s b) + BM m >>= k = BM (\c s b -> m (\a s b -> unBM (k a) c s b) s b) + where unBM (BM m) = m + fail _ = mzero + +instance Functor (BacktrackM s) where + fmap f (BM m) = BM (\c s b -> m (\a s b -> c (f a) s b) s b) + +instance MonadPlus (BacktrackM s) where + mzero = BM (\c s b -> b) + (BM f) `mplus` (BM g) = BM (\c s b -> g c s $! f c s b) + +instance MonadState s (BacktrackM s) where + get = BM (\c s b -> c s s b) + put s = BM (\c _ b -> c () s b) + +-- * specific functions on the backtracking monad + +member :: [a] -> BacktrackM s a +member xs = BM (\c s b -> foldl' (\b x -> c x s b) b xs) + +cut :: BacktrackM s a -> BacktrackM s [(s,a)] +cut f = BM (\c s b -> c (runBM f s) s b) diff --git a/src/compiler/GF/Data/ErrM.hs b/src/compiler/GF/Data/ErrM.hs new file mode 100644 index 000000000..e8cea12d4 --- /dev/null +++ b/src/compiler/GF/Data/ErrM.hs @@ -0,0 +1,38 @@ +---------------------------------------------------------------------- +-- | +-- Module : ErrM +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:00 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- hack for BNFC generated files. AR 21/9/2003 +----------------------------------------------------------------------------- + +module GF.Data.ErrM (Err(..)) where + +import Control.Monad (MonadPlus(..)) + +-- | like @Maybe@ type with error msgs +data Err a = Ok a | Bad String + deriving (Read, Show, Eq) + +instance Monad Err where + return = Ok + fail = Bad + Ok a >>= f = f a + Bad s >>= f = Bad s + +-- | added 2\/10\/2003 by PEB +instance Functor Err where + fmap f (Ok a) = Ok (f a) + fmap f (Bad s) = Bad s + +-- | added by KJ +instance MonadPlus Err where + mzero = Bad "error (no reason given)" + mplus (Ok a) _ = Ok a + mplus (Bad s) b = b diff --git a/src/compiler/GF/Data/Graph.hs b/src/compiler/GF/Data/Graph.hs new file mode 100644 index 000000000..bfb289860 --- /dev/null +++ b/src/compiler/GF/Data/Graph.hs @@ -0,0 +1,178 @@ +---------------------------------------------------------------------- +-- | +-- Module : Graph +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/10 16:43:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.2 $ +-- +-- A simple graph module. +----------------------------------------------------------------------------- +module GF.Data.Graph ( Graph(..), Node, Edge, NodeInfo + , newGraph, nodes, edges + , nmap, emap, newNode, newNodes, newEdge, newEdges + , insertEdgeWith + , removeNode, removeNodes + , nodeInfo + , getIncoming, getOutgoing, getNodeLabel + , inDegree, outDegree + , nodeLabel + , edgeFrom, edgeTo, edgeLabel + , reverseGraph, mergeGraphs, renameNodes + ) where + +import GF.Data.Utilities + +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +data Graph n a b = Graph [n] ![Node n a] ![Edge n b] + deriving (Eq,Show) + +type Node n a = (n,a) +type Edge n b = (n,n,b) + +type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b]) + +-- | Create a new empty graph. +newGraph :: [n] -> Graph n a b +newGraph ns = Graph ns [] [] + +-- | Get all the nodes in the graph. +nodes :: Graph n a b -> [Node n a] +nodes (Graph _ ns _) = ns + +-- | Get all the edges in the graph. +edges :: Graph n a b -> [Edge n b] +edges (Graph _ _ es) = es + +-- | Map a function over the node labels. +nmap :: (a -> c) -> Graph n a b -> Graph n c b +nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es + +-- | Map a function over the edge labels. +emap :: (b -> c) -> Graph n a b -> Graph n a c +emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es] + +-- | Add a node to the graph. +newNode :: a -- ^ Node label + -> Graph n a b + -> (Graph n a b,n) -- ^ Node graph and name of new node +newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c) + +newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a]) +newNodes ls g = (g', zip ns ls) + where (g',ns) = mapAccumL (flip newNode) g ls +-- lazy version: +--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns') +-- where (xs,cs') = splitAt (length ls) cs +-- ns' = zip xs ls + +newEdge :: Edge n b -> Graph n a b -> Graph n a b +newEdge e (Graph c ns es) = Graph c ns (e:es) + +newEdges :: [Edge n b] -> Graph n a b -> Graph n a b +newEdges es g = foldl' (flip newEdge) g es +-- lazy version: +-- newEdges es' (Graph c ns es) = Graph c ns (es'++es) + +insertEdgeWith :: Eq n => + (b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b +insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es) + where h [] = [e] + h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es' + | otherwise = e':h es' + +-- | Remove a node and all edges to and from that node. +removeNode :: Ord n => n -> Graph n a b -> Graph n a b +removeNode n = removeNodes (Set.singleton n) + +-- | Remove a set of nodes and all edges to and from those nodes. +removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b +removeNodes xs (Graph c ns es) = Graph c ns' es' + where + keepNode n = not (Set.member n xs) + ns' = [ x | x@(n,_) <- ns, keepNode n ] + es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ] + +-- | Get a map of node names to info about each node. +nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b +nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ] + where + inc = groupEdgesBy edgeTo g + out = groupEdgesBy edgeFrom g + fn m n = fromMaybe [] (Map.lookup n m) + +groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by + -> Graph n a b -> Map n [Edge n b] +groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g] + +lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b]) +lookupNode i n = fromJust $ Map.lookup n i + +getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b] +getIncoming i n = let (_,inc,_) = lookupNode i n in inc + +getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b] +getOutgoing i n = let (_,_,out) = lookupNode i n in out + +inDegree :: Ord n => NodeInfo n a b -> n -> Int +inDegree i n = length $ getIncoming i n + +outDegree :: Ord n => NodeInfo n a b -> n -> Int +outDegree i n = length $ getOutgoing i n + +getNodeLabel :: Ord n => NodeInfo n a b -> n -> a +getNodeLabel i n = let (l,_,_) = lookupNode i n in l + +nodeLabel :: Node n a -> a +nodeLabel = snd + +edgeFrom :: Edge n b -> n +edgeFrom (f,_,_) = f + +edgeTo :: Edge n b -> n +edgeTo (_,t,_) = t + +edgeLabel :: Edge n b -> b +edgeLabel (_,_,l) = l + +reverseGraph :: Graph n a b -> Graph n a b +reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ] + +-- | Add the nodes from the second graph to the first graph. +-- The nodes in the second graph will be renamed using the name +-- supply in the first graph. +-- This function is more efficient when the second graph +-- is smaller than the first. +mergeGraphs :: Ord m => Graph n a b -> Graph m a b + -> (Graph n a b, m -> n) -- ^ The new graph and a function translating + -- the old names of nodes in the second graph + -- to names in the new graph. +mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName) + where + (xs,c') = splitAt (length (nodes g2)) c + newNames = Map.fromList (zip (map fst (nodes g2)) xs) + newName n = fromJust $ Map.lookup n newNames + Graph _ ns2 es2 = renameNodes newName undefined g2 + +-- | Rename the nodes in the graph. +renameNodes :: (n -> m) -- ^ renaming function + -> [m] -- ^ infinite supply of fresh node names, to + -- use when adding nodes in the future. + -> Graph n a b -> Graph m a b +renameNodes newName c (Graph _ ns es) = Graph c ns' es' + where ns' = map' (\ (n,x) -> (newName n,x)) ns + es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es + +-- | A strict 'map' +map' :: (a -> b) -> [a] -> [b] +map' _ [] = [] +map' f (x:xs) = ((:) $! f x) $! map' f xs diff --git a/src/compiler/GF/Data/Graphviz.hs b/src/compiler/GF/Data/Graphviz.hs new file mode 100644 index 000000000..411f76898 --- /dev/null +++ b/src/compiler/GF/Data/Graphviz.hs @@ -0,0 +1,116 @@ +---------------------------------------------------------------------- +-- | +-- Module : Graphviz +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/15 18:10:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.2 $ +-- +-- Graphviz DOT format representation and printing. +----------------------------------------------------------------------------- + +module GF.Data.Graphviz ( + Graph(..), GraphType(..), + Node(..), Edge(..), + Attr, + addSubGraphs, + setName, + setAttr, + prGraphviz + ) where + +import Data.Char + +import GF.Data.Utilities + +-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs +data Graph = Graph { + gType :: GraphType, + gId :: Maybe String, + gAttrs :: [Attr], + gNodes :: [Node], + gEdges :: [Edge], + gSubgraphs :: [Graph] + } + deriving (Show) + +data GraphType = Directed | Undirected + deriving (Show) + +data Node = Node String [Attr] + deriving Show + +data Edge = Edge String String [Attr] + deriving Show + +type Attr = (String,String) + +-- +-- * Graph construction +-- + +addSubGraphs :: [Graph] -> Graph -> Graph +addSubGraphs gs g = g { gSubgraphs = gs ++ gSubgraphs g } + +setName :: String -> Graph -> Graph +setName n g = g { gId = Just n } + +setAttr :: String -> String -> Graph -> Graph +setAttr n v g = g { gAttrs = tableSet n v (gAttrs g) } + +-- +-- * Pretty-printing +-- + +prGraphviz :: Graph -> String +prGraphviz g@(Graph t i _ _ _ _) = + graphtype t ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}\n" + +prSubGraph :: Graph -> String +prSubGraph g@(Graph _ i _ _ _ _) = + "subgraph" ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}" + +prGraph :: Graph -> String +prGraph (Graph t id at ns es ss) = + unlines $ map (++";") (map prAttr at + ++ map prNode ns + ++ map (prEdge t) es + ++ map prSubGraph ss) + +graphtype :: GraphType -> String +graphtype Directed = "digraph" +graphtype Undirected = "graph" + +prNode :: Node -> String +prNode (Node n at) = esc n ++ " " ++ prAttrList at + +prEdge :: GraphType -> Edge -> String +prEdge t (Edge x y at) = esc x ++ " " ++ edgeop t ++ " " ++ esc y ++ " " ++ prAttrList at + +edgeop :: GraphType -> String +edgeop Directed = "->" +edgeop Undirected = "--" + +prAttrList :: [Attr] -> String +prAttrList [] = "" +prAttrList at = "[" ++ join "," (map prAttr at) ++ "]" + +prAttr :: Attr -> String +prAttr (n,v) = esc n ++ " = " ++ esc v + +esc :: String -> String +esc s | needEsc s = "\"" ++ concat [ if shouldEsc c then ['\\',c] else [c] | c <- s ] ++ "\"" + | otherwise = s + where shouldEsc = (`elem` ['"', '\\']) + +needEsc :: String -> Bool +needEsc [] = True +needEsc xs | all isDigit xs = False +needEsc (x:xs) = not (isIDFirst x && all isIDChar xs) + +isIDFirst, isIDChar :: Char -> Bool +isIDFirst c = c `elem` (['_']++['a'..'z']++['A'..'Z']) +isIDChar c = isIDFirst c || isDigit c diff --git a/src/compiler/GF/Data/MultiMap.hs b/src/compiler/GF/Data/MultiMap.hs new file mode 100644 index 000000000..e565f433b --- /dev/null +++ b/src/compiler/GF/Data/MultiMap.hs @@ -0,0 +1,47 @@ +module GF.Data.MultiMap where + +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set +import Prelude hiding (map) +import qualified Prelude + +type MultiMap k a = Map k (Set a) + +empty :: MultiMap k a +empty = Map.empty + +keys :: MultiMap k a -> [k] +keys = Map.keys + +elems :: MultiMap k a -> [a] +elems = concatMap Set.toList . Map.elems + +(!) :: Ord k => MultiMap k a -> k -> [a] +m ! k = Set.toList $ Map.findWithDefault Set.empty k m + +member :: (Ord k, Ord a) => k -> a -> MultiMap k a -> Bool +member k x m = x `Set.member` Map.findWithDefault Set.empty k m + +insert :: (Ord k, Ord a) => k -> a -> MultiMap k a -> MultiMap k a +insert k x m = Map.insertWith Set.union k (Set.singleton x) m + +insert' :: (Ord k, Ord a) => k -> a -> MultiMap k a -> Maybe (MultiMap k a) +insert' k x m | member k x m = Nothing -- FIXME: inefficient + | otherwise = Just (insert k x m) + +union :: (Ord k, Ord a) => MultiMap k a -> MultiMap k a -> MultiMap k a +union = Map.unionWith Set.union + +size :: MultiMap k a -> Int +size = sum . Prelude.map Set.size . Map.elems + +map :: (Ord a, Ord b) => (a -> b) -> MultiMap k a -> MultiMap k b +map f = Map.map (Set.map f) + +fromList :: (Ord k, Ord a) => [(k,a)] -> MultiMap k a +fromList xs = Map.fromListWith Set.union [(k, Set.singleton x) | (k,x) <- xs] + +toList :: MultiMap k a -> [(k,a)] +toList m = [(k,x) | (k,s) <- Map.toList m, x <- Set.toList s]
\ No newline at end of file diff --git a/src/compiler/GF/Data/Operations.hs b/src/compiler/GF/Data/Operations.hs new file mode 100644 index 000000000..7b2afc9fe --- /dev/null +++ b/src/compiler/GF/Data/Operations.hs @@ -0,0 +1,374 @@ +---------------------------------------------------------------------- +-- | +-- Module : Operations +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 16:12:41 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.22 $ +-- +-- some auxiliary GF operations. AR 19\/6\/1998 -- 6\/2\/2001 +-- +-- Copyright (c) Aarne Ranta 1998-2000, under GNU General Public License (see GPL) +----------------------------------------------------------------------------- + +module GF.Data.Operations (-- * misc functions + ifNull, onSnd, + + -- * the Error monad + Err(..), err, maybeErr, testErr, errVal, errIn, + lookupErr, + mapPairListM, mapPairsM, pairM, + singleton, mapsErr, mapsErrTree, + + -- ** checking + checkUnique, + + -- * binary search trees; now with FiniteMap + BinTree, emptyBinTree, isInBinTree, justLookupTree, + lookupTree, lookupTreeMany, lookupTreeManyAll, updateTree, + buildTree, filterBinTree, + sorted2tree, mapTree, mapMTree, tree2list, + + + -- * printing + indent, (+++), (++-), (++++), (+++++), + prUpper, prReplicate, prTList, prQuotedString, prParenth, prCurly, + prBracket, prArgList, prSemicList, prCurlyList, restoreEscapes, + numberedParagraphs, prConjList, prIfEmpty, wrapLines, + + -- * extra + combinations, + + -- * topological sorting with test of cyclicity + topoTest, + + -- * the generic fix point iterator + iterFix, + + -- * chop into separator-separated parts + chunks, readIntArg, + + -- * state monad with error; from Agda 6\/11\/2001 + STM(..), appSTM, stm, stmr, readSTM, updateSTM, writeSTM, done, + + -- * error monad class + ErrorMonad(..), checkAgain, checks, allChecks, doUntil + + ) where + +import Data.Char (isSpace, toUpper, isSpace, isDigit) +import Data.List (nub, sortBy, sort, deleteBy, nubBy) +import qualified Data.Map as Map +import Data.Map (Map) +import Control.Monad (liftM,liftM2, MonadPlus, mzero, mplus) + +import GF.Data.ErrM +import GF.Data.Relation + +infixr 5 +++ +infixr 5 ++- +infixr 5 ++++ +infixr 5 +++++ + +ifNull :: b -> ([a] -> b) -> [a] -> b +ifNull b f xs = if null xs then b else f xs + +onSnd :: (a -> b) -> (c,a) -> (c,b) +onSnd f (x, y) = (x, f y) + +-- the Error monad + +-- | analogue of @maybe@ +err :: (String -> b) -> (a -> b) -> Err a -> b +err d f e = case e of + Ok a -> f a + Bad s -> d s + +-- | add msg s to @Maybe@ failures +maybeErr :: String -> Maybe a -> Err a +maybeErr s = maybe (Bad s) Ok + +testErr :: Bool -> String -> Err () +testErr cond msg = if cond then return () else Bad msg + +errVal :: a -> Err a -> a +errVal a = err (const a) id + +errIn :: String -> Err a -> Err a +errIn msg = err (\s -> Bad (s ++++ "OCCURRED IN" ++++ msg)) return + +lookupErr :: (Eq a,Show a) => a -> [(a,b)] -> Err b +lookupErr a abs = maybeErr ("Unknown" +++ show a) (lookup a abs) + +mapPairListM :: Monad m => ((a,b) -> m c) -> [(a,b)] -> m [(a,c)] +mapPairListM f xys = mapM (\ p@(x,_) -> liftM ((,) x) (f p)) xys + +mapPairsM :: Monad m => (b -> m c) -> [(a,b)] -> m [(a,c)] +mapPairsM f xys = mapM (\ (x,y) -> liftM ((,) x) (f y)) xys + +pairM :: Monad a => (b -> a c) -> (b,b) -> a (c,c) +pairM op (t1,t2) = liftM2 (,) (op t1) (op t2) + +singleton :: a -> [a] +singleton = (:[]) + +-- checking + +checkUnique :: (Show a, Eq a) => [a] -> [String] +checkUnique ss = ["overloaded" +++ show s | s <- nub overloads] where + overloads = filter overloaded ss + overloaded s = length (filter (==s) ss) > 1 + +-- binary search trees + +type BinTree a b = Map a b + +emptyBinTree :: BinTree a b +emptyBinTree = Map.empty + +isInBinTree :: (Ord a) => a -> BinTree a b -> Bool +isInBinTree = Map.member + +justLookupTree :: (Monad m,Ord a) => a -> BinTree a b -> m b +justLookupTree = lookupTree (const []) + +lookupTree :: (Monad m,Ord a) => (a -> String) -> a -> BinTree a b -> m b +lookupTree pr x tree = case Map.lookup x tree of + Just y -> return y + _ -> fail ("no occurrence of element" +++ pr x) + +lookupTreeMany :: Ord a => (a -> String) -> [BinTree a b] -> a -> Err b +lookupTreeMany pr (t:ts) x = case lookupTree pr x t of + Ok v -> return v + _ -> lookupTreeMany pr ts x +lookupTreeMany pr [] x = Bad $ "failed to find" +++ pr x + +lookupTreeManyAll :: Ord a => (a -> String) -> [BinTree a b] -> a -> [b] +lookupTreeManyAll pr (t:ts) x = case lookupTree pr x t of + Ok v -> v : lookupTreeManyAll pr ts x + _ -> lookupTreeManyAll pr ts x +lookupTreeManyAll pr [] x = [] + +updateTree :: (Ord a) => (a,b) -> BinTree a b -> BinTree a b +updateTree (a,b) = Map.insert a b + +buildTree :: (Ord a) => [(a,b)] -> BinTree a b +buildTree = Map.fromList + +sorted2tree :: Ord a => [(a,b)] -> BinTree a b +sorted2tree = Map.fromAscList + +mapTree :: ((a,b) -> c) -> BinTree a b -> BinTree a c +mapTree f = Map.mapWithKey (\k v -> f (k,v)) + +mapMTree :: (Ord a,Monad m) => ((a,b) -> m c) -> BinTree a b -> m (BinTree a c) +mapMTree f t = liftM Map.fromList $ sequence [liftM ((,) k) (f (k,x)) | (k,x) <- Map.toList t] + +filterBinTree :: Ord a => (a -> b -> Bool) -> BinTree a b -> BinTree a b +filterBinTree = Map.filterWithKey + +tree2list :: BinTree a b -> [(a,b)] -- inorder +tree2list = Map.toList + +-- printing + +indent :: Int -> String -> String +indent i s = replicate i ' ' ++ s + +(+++), (++-), (++++), (+++++) :: String -> String -> String +a +++ b = a ++ " " ++ b +a ++- "" = a +a ++- b = a +++ b +a ++++ b = a ++ "\n" ++ b +a +++++ b = a ++ "\n\n" ++ b + +prUpper :: String -> String +prUpper s = s1 ++ s2' where + (s1,s2) = span isSpace s + s2' = case s2 of + c:t -> toUpper c : t + _ -> s2 + +prReplicate :: Int -> String -> String +prReplicate n s = concat (replicate n s) + +prTList :: String -> [String] -> String +prTList t ss = case ss of + [] -> "" + [s] -> s + s:ss -> s ++ t ++ prTList t ss + +prQuotedString :: String -> String +prQuotedString x = "\"" ++ restoreEscapes x ++ "\"" + +prParenth :: String -> String +prParenth s = if s == "" then "" else "(" ++ s ++ ")" + +prCurly, prBracket :: String -> String +prCurly s = "{" ++ s ++ "}" +prBracket s = "[" ++ s ++ "]" + +prArgList, prSemicList, prCurlyList :: [String] -> String +prArgList = prParenth . prTList "," +prSemicList = prTList " ; " +prCurlyList = prCurly . prSemicList + +restoreEscapes :: String -> String +restoreEscapes s = + case s of + [] -> [] + '"' : t -> '\\' : '"' : restoreEscapes t + '\\': t -> '\\' : '\\' : restoreEscapes t + c : t -> c : restoreEscapes t + +numberedParagraphs :: [[String]] -> [String] +numberedParagraphs t = case t of + [] -> [] + p:[] -> p + _ -> concat [(show n ++ ".") : s | (n,s) <- zip [1..] t] + +prConjList :: String -> [String] -> String +prConjList c [] = "" +prConjList c [s] = s +prConjList c [s,t] = s +++ c +++ t +prConjList c (s:tt) = s ++ "," +++ prConjList c tt + +prIfEmpty :: String -> String -> String -> String -> String +prIfEmpty em _ _ [] = em +prIfEmpty em nem1 nem2 s = nem1 ++ s ++ nem2 + +-- | Thomas Hallgren's wrap lines +wrapLines :: Int -> String -> String +wrapLines n "" = "" +wrapLines n s@(c:cs) = + if isSpace c + then c:wrapLines (n+1) cs + else case lex s of + [(w,rest)] -> if n'>=76 + then '\n':w++wrapLines l rest + else w++wrapLines n' rest + where n' = n+l + l = length w + _ -> s -- give up!! + +--- optWrapLines = if argFlag "wraplines" True then wrapLines 0 else id + +-- | 'combinations' is the same as @sequence@!!! +-- peb 30\/5-04 +combinations :: [[a]] -> [[a]] +combinations t = case t of + [] -> [[]] + aa:uu -> [a:u | a <- aa, u <- combinations uu] + +-- | topological sorting with test of cyclicity +topoTest :: Ord a => [(a,[a])] -> Either [a] [[a]] +topoTest = topologicalSort . mkRel' + +-- | the generic fix point iterator +iterFix :: Eq a => ([a] -> [a]) -> [a] -> [a] +iterFix more start = iter start start + where + iter old new = if (null new') + then old + else iter (new' ++ old) new' + where + new' = filter (`notElem` old) (more new) + +-- | chop into separator-separated parts +chunks :: Eq a => a -> [a] -> [[a]] +chunks sep ws = case span (/= sep) ws of + (a,_:b) -> a : bs where bs = chunks sep b + (a, []) -> if null a then [] else [a] + +readIntArg :: String -> Int +readIntArg n = if (not (null n) && all isDigit n) then read n else 0 + + +-- state monad with error; from Agda 6/11/2001 + +newtype STM s a = STM (s -> Err (a,s)) + +appSTM :: STM s a -> s -> Err (a,s) +appSTM (STM f) s = f s + +stm :: (s -> Err (a,s)) -> STM s a +stm = STM + +stmr :: (s -> (a,s)) -> STM s a +stmr f = stm (\s -> return (f s)) + +instance Monad (STM s) where + return a = STM (\s -> return (a,s)) + STM c >>= f = STM (\s -> do + (x,s') <- c s + let STM f' = f x + f' s') + +readSTM :: STM s s +readSTM = stmr (\s -> (s,s)) + +updateSTM :: (s -> s) -> STM s () +updateSTM f = stmr (\s -> ((),f s)) + +writeSTM :: s -> STM s () +writeSTM s = stmr (const ((),s)) + +done :: Monad m => m () +done = return () + +class Monad m => ErrorMonad m where + raise :: String -> m a + handle :: m a -> (String -> m a) -> m a + handle_ :: m a -> m a -> m a + handle_ a b = a `handle` (\_ -> b) + +instance ErrorMonad Err where + raise = Bad + handle a@(Ok _) _ = a + handle (Bad i) f = f i + +instance ErrorMonad (STM s) where + raise msg = STM (\s -> raise msg) + handle (STM f) g = STM (\s -> (f s) + `handle` (\e -> let STM g' = (g e) in + g' s)) + +-- error recovery with multiple reporting AR 30/5/2008 +mapsErr :: (a -> Err b) -> [a] -> Err [b] + +mapsErr f = seqs . map f where + seqs es = case es of + Ok v : ms -> case seqs ms of + Ok vs -> return (v : vs) + b -> b + Bad s : ms -> case seqs ms of + Ok vs -> Bad s + Bad ss -> Bad (s +++++ ss) + [] -> return [] + +mapsErrTree :: (Ord a) => ((a,b) -> Err (a,c)) -> BinTree a b -> Err (BinTree a c) +mapsErrTree f t = mapsErr f (tree2list t) >>= return . sorted2tree + + +-- | if the first check fails try another one +checkAgain :: ErrorMonad m => m a -> m a -> m a +checkAgain c1 c2 = handle_ c1 c2 + +checks :: ErrorMonad m => [m a] -> m a +checks [] = raise "no chance to pass" +checks cs = foldr1 checkAgain cs + +allChecks :: ErrorMonad m => [m a] -> m [a] +allChecks ms = case ms of + (m: ms) -> let rs = allChecks ms in handle_ (liftM2 (:) m rs) rs + _ -> return [] + +doUntil :: ErrorMonad m => (a -> Bool) -> [m a] -> m a +doUntil cond ms = case ms of + a:as -> do + v <- a + if cond v then return v else doUntil cond as + _ -> raise "no result" diff --git a/src/compiler/GF/Data/Relation.hs b/src/compiler/GF/Data/Relation.hs new file mode 100644 index 000000000..7024a482c --- /dev/null +++ b/src/compiler/GF/Data/Relation.hs @@ -0,0 +1,193 @@ +---------------------------------------------------------------------- +-- | +-- Module : Relation +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/26 17:13:13 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- A simple module for relations. +----------------------------------------------------------------------------- + +module GF.Data.Relation (Rel, mkRel, mkRel' + , allRelated , isRelatedTo + , transitiveClosure + , reflexiveClosure, reflexiveClosure_ + , symmetricClosure + , symmetricSubrelation, reflexiveSubrelation + , reflexiveElements + , equivalenceClasses + , isTransitive, isReflexive, isSymmetric + , isEquivalence + , isSubRelationOf + , topologicalSort) where + +import Data.Foldable (toList) +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Sequence (Seq) +import qualified Data.Sequence as Seq +import Data.Set (Set) +import qualified Data.Set as Set + +import GF.Data.Utilities + +type Rel a = Map a (Set a) + +-- | Creates a relation from a list of related pairs. +mkRel :: Ord a => [(a,a)] -> Rel a +mkRel ps = relates ps Map.empty + +-- | Creates a relation from a list pairs of elements and the elements +-- related to them. +mkRel' :: Ord a => [(a,[a])] -> Rel a +mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs] + +relToList :: Ord a => Rel a -> [(a,a)] +relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ] + +-- | Add a pair to the relation. +relate :: Ord a => a -> a -> Rel a -> Rel a +relate x y r = Map.insertWith Set.union x (Set.singleton y) r + +-- | Add a list of pairs to the relation. +relates :: Ord a => [(a,a)] -> Rel a -> Rel a +relates ps r = foldl (\r' (x,y) -> relate x y r') r ps + +-- | Checks if an element is related to another. +isRelatedTo :: Ord a => Rel a -> a -> a -> Bool +isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r) + +-- | Get the set of elements to which a given element is related. +allRelated :: Ord a => Rel a -> a -> Set a +allRelated r x = fromMaybe Set.empty (Map.lookup x r) + +-- | Get all elements in the relation. +domain :: Ord a => Rel a -> Set a +domain r = foldl Set.union (Map.keysSet r) (Map.elems r) + +reverseRel :: Ord a => Rel a -> Rel a +reverseRel r = mkRel [(y,x) | (x,y) <- relToList r] + +-- | Keep only pairs for which both elements are in the given set. +intersectSetRel :: Ord a => Set a -> Rel a -> Rel a +intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s) + +transitiveClosure :: Ord a => Rel a -> Rel a +transitiveClosure r = fix (Map.map growSet) r + where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys) + +reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined. + -> Rel a -> Rel a +reflexiveClosure_ u r = relates [(x,x) | x <- u] r + +-- | Uses 'domain' +reflexiveClosure :: Ord a => Rel a -> Rel a +reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r + +symmetricClosure :: Ord a => Rel a -> Rel a +symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r + +symmetricSubrelation :: Ord a => Rel a -> Rel a +symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r + +reflexiveSubrelation :: Ord a => Rel a -> Rel a +reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r + +-- | Get the set of elements which are related to themselves. +reflexiveElements :: Ord a => Rel a -> Set a +reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ] + +-- | Keep the related pairs for which the predicate is true. +filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a +filterRel p = fst . purgeEmpty . Map.mapWithKey (Set.filter . p) + +-- | Remove keys that map to no elements. +purgeEmpty :: Ord a => Rel a -> (Rel a, Set a) +purgeEmpty r = let (r',r'') = Map.partition (not . Set.null) r + in (r', Map.keysSet r'') + +-- | Get the equivalence classes from an equivalence relation. +equivalenceClasses :: Ord a => Rel a -> [Set a] +equivalenceClasses r = equivalenceClasses_ (Map.keys r) r + where equivalenceClasses_ [] _ = [] + equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r + where ys = allRelated r x + zs = [x' | x' <- xs, not (x' `Set.member` ys)] + +isTransitive :: Ord a => Rel a -> Bool +isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r, + y <- Set.toList ys, z <- Set.toList (allRelated r y)] + +isReflexive :: Ord a => Rel a -> Bool +isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r) + +isSymmetric :: Ord a => Rel a -> Bool +isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r] + +isEquivalence :: Ord a => Rel a -> Bool +isEquivalence r = isReflexive r && isSymmetric r && isTransitive r + +isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool +isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1) + +-- | Returns 'Left' if there are cycles, and 'Right' if there are cycles. +topologicalSort :: Ord a => Rel a -> Either [a] [[a]] +topologicalSort r = tsort r' noIncoming Seq.empty + where r' = relToRel' r + noIncoming = Seq.fromList [x | (x,(is,_)) <- Map.toList r', Set.null is] + +tsort :: Ord a => Rel' a -> Seq a -> Seq a -> Either [a] [[a]] +tsort r xs l = case Seq.viewl xs of + Seq.EmptyL | isEmpty' r -> Left (toList l) + | otherwise -> Right (findCycles (rel'ToRel r)) + x Seq.:< xs -> tsort r' (xs Seq.>< Seq.fromList new) (l Seq.|> x) + where (r',_,os) = remove x r + new = [o | o <- Set.toList os, Set.null (incoming o r')] + +findCycles :: Ord a => Rel a -> [[a]] +findCycles = map Set.toList . equivalenceClasses . reflexiveSubrelation . symmetricSubrelation . transitiveClosure + +-- +-- * Alternative representation that keeps both incoming and outgoing edges +-- + +-- | Keeps both incoming and outgoing edges. +type Rel' a = Map a (Set a, Set a) + +isEmpty' :: Ord a => Rel' a -> Bool +isEmpty' = Map.null + +relToRel' :: Ord a => Rel a -> Rel' a +relToRel' r = Map.unionWith (\ (i,_) (_,o) -> (i,o)) ir or + where ir = Map.map (\s -> (s,Set.empty)) $ reverseRel r + or = Map.map (\s -> (Set.empty,s)) $ r + +rel'ToRel :: Ord a => Rel' a -> Rel a +rel'ToRel = Map.map snd + +-- | Removes an element from a relation. +-- Returns the new relation, and the set of incoming and outgoing edges +-- of the removed element. +remove :: Ord a => a -> Rel' a -> (Rel' a, Set a, Set a) +remove x r = let (mss,r') = Map.updateLookupWithKey (\_ _ -> Nothing) x r + in case mss of + -- element was not in the relation + Nothing -> (r', Set.empty, Set.empty) + -- remove element from all incoming and outgoing sets + -- of other elements + Just (is,os) -> + let r'' = foldr (\i -> Map.adjust (\ (is',os') -> (is', Set.delete x os')) i) r' $ Set.toList is + r''' = foldr (\o -> Map.adjust (\ (is',os') -> (Set.delete x is', os')) o) r'' $ Set.toList os + in (r''', is, os) + +incoming :: Ord a => a -> Rel' a -> Set a +incoming x r = maybe Set.empty fst $ Map.lookup x r + +outgoing :: Ord a => a -> Rel' a -> Set a +outgoing x r = maybe Set.empty snd $ Map.lookup x r
\ No newline at end of file diff --git a/src/compiler/GF/Data/SortedList.hs b/src/compiler/GF/Data/SortedList.hs new file mode 100644 index 000000000..d77ff68d4 --- /dev/null +++ b/src/compiler/GF/Data/SortedList.hs @@ -0,0 +1,127 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Peter Ljunglöf +-- Stability : stable +-- Portability : portable +-- +-- > CVS $Date: 2005/04/21 16:22:08 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.3 $ +-- +-- Sets as sorted lists +-- +-- * /O(n)/ union, difference and intersection +-- +-- * /O(n log n)/ creating a set from a list (=sorting) +-- +-- * /O(n^2)/ fixed point iteration +----------------------------------------------------------------------------- + +module GF.Data.SortedList + ( -- * type declarations + SList, SMap, + -- * set operations + nubsort, union, + (<++>), (<\\>), (<**>), + limit, + hasCommonElements, subset, + -- * map operations + groupPairs, groupUnion, + unionMap, mergeMap + ) where + +import Data.List (groupBy) +import GF.Data.Utilities (split, foldMerge) + +-- | The list must be sorted and contain no duplicates. +type SList a = [a] + +-- | A sorted map also has unique keys, +-- i.e. 'map fst m :: SList a', if 'm :: SMap a b' +type SMap a b = SList (a, b) + +-- | Group a set of key-value pairs into a sorted map +groupPairs :: Ord a => SList (a, b) -> SMap a (SList b) +groupPairs = map mapFst . groupBy eqFst + where mapFst as = (fst (head as), map snd as) + eqFst a b = fst a == fst b + +-- | Group a set of key-(sets-of-values) pairs into a sorted map +groupUnion :: (Ord a, Ord b) => SList (a, SList b) -> SMap a (SList b) +groupUnion = map unionSnd . groupPairs + where unionSnd (a, bs) = (a, union bs) + +-- | True is the two sets has common elements +hasCommonElements :: Ord a => SList a -> SList a -> Bool +hasCommonElements as bs = not (null (as <**> bs)) + +-- | True if the first argument is a subset of the second argument +subset :: Ord a => SList a -> SList a -> Bool +xs `subset` ys = null (xs <\\> ys) + +-- | Create a set from any list. +-- This function can also be used as an alternative to @nub@ in @List.hs@ +nubsort :: Ord a => [a] -> SList a +nubsort = union . map return + +-- | the union of a list of sorted maps +unionMap :: Ord a => (b -> b -> b) + -> [SMap a b] -> SMap a b +unionMap plus = foldMerge (mergeMap plus) [] + +-- | merging two sorted maps +mergeMap :: Ord a => (b -> b -> b) + -> SMap a b -> SMap a b -> SMap a b +mergeMap plus [] abs = abs +mergeMap plus abs [] = abs +mergeMap plus abs@(ab@(a,bs):abs') cds@(cd@(c,ds):cds') + = case compare a c of + EQ -> (a, plus bs ds) : mergeMap plus abs' cds' + LT -> ab : mergeMap plus abs' cds + GT -> cd : mergeMap plus abs cds' + +-- | The union of a list of sets +union :: Ord a => [SList a] -> SList a +union = foldMerge (<++>) [] + +-- | The union of two sets +(<++>) :: Ord a => SList a -> SList a -> SList a +[] <++> bs = bs +as <++> [] = as +as@(a:as') <++> bs@(b:bs') = case compare a b of + LT -> a : (as' <++> bs) + GT -> b : (as <++> bs') + EQ -> a : (as' <++> bs') + +-- | The difference of two sets +(<\\>) :: Ord a => SList a -> SList a -> SList a +[] <\\> bs = [] +as <\\> [] = as +as@(a:as') <\\> bs@(b:bs') = case compare a b of + LT -> a : (as' <\\> bs) + GT -> (as <\\> bs') + EQ -> (as' <\\> bs') + +-- | The intersection of two sets +(<**>) :: Ord a => SList a -> SList a -> SList a +[] <**> bs = [] +as <**> [] = [] +as@(a:as') <**> bs@(b:bs') = case compare a b of + LT -> (as' <**> bs) + GT -> (as <**> bs') + EQ -> a : (as' <**> bs') + +-- | A fixed point iteration +limit :: Ord a => (a -> SList a) -- ^ The iterator function + -> SList a -- ^ The initial set + -> SList a -- ^ The result of the iteration +limit more start = limit' start start + where limit' chart agenda | null new' = chart + | otherwise = limit' (chart <++> new') new' + where new = union (map more agenda) + new'= new <\\> chart + + + + + diff --git a/src/compiler/GF/Data/Str.hs b/src/compiler/GF/Data/Str.hs new file mode 100644 index 000000000..6f65764c7 --- /dev/null +++ b/src/compiler/GF/Data/Str.hs @@ -0,0 +1,134 @@ +---------------------------------------------------------------------- +-- | +-- Module : Str +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:09 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Data.Str ( + Str (..), Tok (..), --- constructors needed in PrGrammar + str2strings, str2allStrings, str, sstr, sstrV, + isZeroTok, prStr, plusStr, glueStr, + strTok, + allItems +) where + +import GF.Data.Operations +import Data.List (isPrefixOf, isSuffixOf, intersperse) + +-- | abstract token list type. AR 2001, revised and simplified 20\/4\/2003 +newtype Str = Str [Tok] deriving (Read, Show, Eq, Ord) + +-- | notice that having both pre and post would leave to inconsistent situations: +-- +-- > pre {"x" ; "y" / "a"} ++ post {"b" ; "a" / "x"} +-- +-- always violates a condition expressed by the one or the other +data Tok = + TK String + | TN Ss [(Ss, [String])] -- ^ variants depending on next string +--- | TP Ss [(Ss, [String])] -- variants depending on previous string + deriving (Eq, Ord, Show, Read) + + +-- | a variant can itself be a token list, but for simplicity only a list of strings +-- i.e. not itself containing variants +type Ss = [String] + +-- matching functions in both ways + +matchPrefix :: Ss -> [(Ss,[String])] -> [String] -> Ss +matchPrefix s vs t = + head $ [u | + (u,as) <- vs, + any (\c -> isPrefixOf c (concat (unmarkup t))) as + ] ++ [s] + +matchSuffix :: String -> Ss -> [(Ss,[String])] -> Ss +matchSuffix t s vs = + head ([u | (u,as) <- vs, any (\c -> isSuffixOf c t) as] ++ [s]) + +unmarkup :: [String] -> [String] +unmarkup = filter (not . isXMLtag) where + isXMLtag s = case s of + '<':cs@(_:_) -> last cs == '>' + _ -> False + +str2strings :: Str -> Ss +str2strings (Str st) = alls st where + alls st = case st of + TK s : ts -> s : alls ts + TN ds vs : ts -> matchPrefix ds vs t ++ t where t = alls ts +---- u :TP ds vs: ts -> [u] ++ matchSuffix u ds vs ++ alls ts + [] -> [] + +str2allStrings :: Str -> [Ss] +str2allStrings (Str st) = alls st where + alls st = case st of + TK s : ts -> [s : t | t <- alls ts] + TN ds vs : [] -> [ds ++ v | v <- map fst vs] + TN ds vs : ts -> [matchPrefix ds vs t ++ t | t <- alls ts] + [] -> [[]] + +sstr :: Str -> String +sstr = unwords . str2strings + +-- | to handle a list of variants +sstrV :: [Str] -> String +sstrV ss = case ss of + [] -> "*" + _ -> unwords $ intersperse "/" $ map (unwords . str2strings) ss + +str :: String -> Str +str s = if null s then Str [] else Str [itS s] + +itS :: String -> Tok +itS s = TK s + +isZeroTok :: Str -> Bool +isZeroTok t = case t of + Str [] -> True + Str [TK []] -> True + _ -> False + +strTok :: Ss -> [(Ss,[String])] -> Str +strTok ds vs = Str [TN ds vs] + +prStr :: Str -> String +prStr = prQuotedString . sstr + +plusStr :: Str -> Str -> Str +plusStr (Str ss) (Str tt) = Str (ss ++ tt) + +glueStr :: Str -> Str -> Str +glueStr (Str ss) (Str tt) = Str $ case (ss,tt) of + ([],_) -> tt + (_,[]) -> ss + _ -> init ss ++ glueIt (last ss) (head tt) ++ tail tt + where + glueIt t u = case (t,u) of + (TK s, TK s') -> return $ TK $ s ++ s' + (TN ds vs, TN es ws) -> return $ TN (glues (matchPrefix ds vs es) es) + [(glues (matchPrefix ds vs w) w,cs) | (w,cs) <- ws] + (TN ds vs, TK s) -> map TK $ glues (matchPrefix ds vs [s]) [s] + (TK s, TN es ws) -> return $ TN (glues [s] es) [(glues [s] w, c) | (w,c) <- ws] + +glues :: [[a]] -> [[a]] -> [[a]] +glues ss tt = case (ss,tt) of + ([],_) -> tt + (_,[]) -> ss + _ -> init ss ++ [last ss ++ head tt] ++ tail tt + +-- | to create the list of all lexical items +allItems :: Str -> [String] +allItems (Str s) = concatMap allOne s where + allOne t = case t of + TK s -> [s] + TN ds vs -> ds ++ concatMap fst vs diff --git a/src/compiler/GF/Data/TrieMap.hs b/src/compiler/GF/Data/TrieMap.hs new file mode 100644 index 000000000..a6749d641 --- /dev/null +++ b/src/compiler/GF/Data/TrieMap.hs @@ -0,0 +1,66 @@ +module GF.Data.TrieMap
+ ( TrieMap
+
+ , empty
+ , singleton
+
+ , lookup
+
+ , null
+ , decompose
+
+ , insertWith
+
+ , unionWith
+ , unionsWith
+
+ , elems
+ ) where
+
+import Prelude hiding (lookup, null)
+import qualified Data.Map as Map
+
+data TrieMap k v = Tr (Maybe v) (Map.Map k (TrieMap k v))
+
+empty = Tr Nothing Map.empty
+
+singleton :: [k] -> a -> TrieMap k a
+singleton [] v = Tr (Just v) Map.empty
+singleton (k:ks) v = Tr Nothing (Map.singleton k (singleton ks v))
+
+lookup :: Ord k => [k] -> TrieMap k a -> Maybe a
+lookup [] (Tr mb_v m) = mb_v
+lookup (k:ks) (Tr mb_v m) = Map.lookup k m >>= lookup ks
+
+null :: TrieMap k v -> Bool
+null (Tr Nothing m) = Map.null m
+null _ = False
+
+decompose :: TrieMap k v -> (Maybe v, Map.Map k (TrieMap k v))
+decompose (Tr mb_v m) = (mb_v,m)
+
+insertWith :: Ord k => (v -> v -> v) -> [k] -> v -> TrieMap k v -> TrieMap k v
+insertWith f [] v0 (Tr mb_v m) = case mb_v of
+ Just v -> Tr (Just (f v0 v)) m
+ Nothing -> Tr (Just v0 ) m
+insertWith f (k:ks) v0 (Tr mb_v m) = case Map.lookup k m of
+ Nothing -> Tr mb_v (Map.insert k (singleton ks v0) m)
+ Just tr -> Tr mb_v (Map.insert k (insertWith f ks v0 tr) m)
+
+unionWith :: Ord k => (v -> v -> v) -> TrieMap k v -> TrieMap k v -> TrieMap k v
+unionWith f (Tr mb_v1 m1) (Tr mb_v2 m2) =
+ let mb_v = case (mb_v1,mb_v2) of
+ (Nothing,Nothing) -> Nothing
+ (Just v ,Nothing) -> Just v
+ (Nothing,Just v ) -> Just v
+ (Just v1,Just v2) -> Just (f v1 v2)
+ m = Map.unionWith (unionWith f) m1 m2
+ in Tr mb_v m
+
+unionsWith :: Ord k => (v -> v -> v) -> [TrieMap k v] -> TrieMap k v
+unionsWith f = foldl (unionWith f) empty
+
+elems :: TrieMap k v -> [v]
+elems tr = collect tr []
+ where
+ collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.fold collect xs m)
diff --git a/src/compiler/GF/Data/Utilities.hs b/src/compiler/GF/Data/Utilities.hs new file mode 100644 index 000000000..74d3ef81e --- /dev/null +++ b/src/compiler/GF/Data/Utilities.hs @@ -0,0 +1,190 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/26 18:47:16 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- Basic functions not in the standard libraries +----------------------------------------------------------------------------- + + +module GF.Data.Utilities where + +import Data.Maybe +import Data.List +import Control.Monad (MonadPlus(..),liftM) + +-- * functions on lists + +sameLength :: [a] -> [a] -> Bool +sameLength [] [] = True +sameLength (_:xs) (_:ys) = sameLength xs ys +sameLength _ _ = False + +notLongerThan, longerThan :: Int -> [a] -> Bool +notLongerThan n = null . snd . splitAt n +longerThan n = not . notLongerThan n + +lookupList :: Eq a => a -> [(a, b)] -> [b] +lookupList a [] = [] +lookupList a (p:ps) | a == fst p = snd p : lookupList a ps + | otherwise = lookupList a ps + +split :: [a] -> ([a], [a]) +split (x : y : as) = (x:xs, y:ys) + where (xs, ys) = split as +split as = (as, []) + +splitBy :: (a -> Bool) -> [a] -> ([a], [a]) +splitBy p [] = ([], []) +splitBy p (a : as) = if p a then (a:xs, ys) else (xs, a:ys) + where (xs, ys) = splitBy p as + +foldMerge :: (a -> a -> a) -> a -> [a] -> a +foldMerge merge zero = fm + where fm [] = zero + fm [a] = a + fm abs = let (as, bs) = split abs in fm as `merge` fm bs + +select :: [a] -> [(a, [a])] +select [] = [] +select (x:xs) = (x,xs) : [ (y,x:ys) | (y,ys) <- select xs ] + +updateNth :: (a -> a) -> Int -> [a] -> [a] +updateNth update 0 (a : as) = update a : as +updateNth update n (a : as) = a : updateNth update (n-1) as + +updateNthM :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNthM update 0 (a : as) = liftM (:as) (update a) +updateNthM update n (a : as) = liftM (a:) (updateNthM update (n-1) as) + +-- | Like 'init', but returns the empty list when the input is empty. +safeInit :: [a] -> [a] +safeInit [] = [] +safeInit xs = init xs + +-- | Like 'nub', but more efficient as it uses sorting internally. +sortNub :: Ord a => [a] -> [a] +sortNub = map head . group . sort + +-- | Like 'nubBy', but more efficient as it uses sorting internally. +sortNubBy :: (a -> a -> Ordering) -> [a] -> [a] +sortNubBy f = map head . sortGroupBy f + +-- | Sorts and then groups elements given and ordering of the +-- elements. +sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]] +sortGroupBy f = groupBy (compareEq f) . sortBy f + +-- | Take the union of a list of lists. +unionAll :: Eq a => [[a]] -> [a] +unionAll = nub . concat + +-- | Like 'lookup', but fails if the argument is not found, +-- instead of returning Nothing. +lookup' :: (Show a, Eq a) => a -> [(a,b)] -> b +lookup' x = fromMaybe (error $ "Not found: " ++ show x) . lookup x + +-- | Like 'find', but fails if nothing is found. +find' :: (a -> Bool) -> [a] -> a +find' p = fromJust . find p + +-- | Set a value in a lookup table. +tableSet :: Eq a => a -> b -> [(a,b)] -> [(a,b)] +tableSet x y [] = [(x,y)] +tableSet x y (p@(x',_):xs) | x' == x = (x,y):xs + | otherwise = p:tableSet x y xs + +-- | Group tuples by their first elements. +buildMultiMap :: Ord a => [(a,b)] -> [(a,[b])] +buildMultiMap = map (\g -> (fst (head g), map snd g) ) + . sortGroupBy (compareBy fst) + +-- | Replace all occurences of an element by another element. +replace :: Eq a => a -> a -> [a] -> [a] +replace x y = map (\z -> if z == x then y else z) + +-- * equality functions + +-- | Use an ordering function as an equality predicate. +compareEq :: (a -> a -> Ordering) -> a -> a -> Bool +compareEq f x y = case f x y of + EQ -> True + _ -> False + +-- * ordering functions + +compareBy :: Ord b => (a -> b) -> a -> a -> Ordering +compareBy f = both f compare + +both :: (a -> b) -> (b -> b -> c) -> a -> a -> c +both f g x y = g (f x) (f y) + +-- * functions on pairs + +mapFst :: (a -> a') -> (a, b) -> (a', b) +mapFst f (a, b) = (f a, b) + +mapSnd :: (b -> b') -> (a, b) -> (a, b') +mapSnd f (a, b) = (a, f b) + +-- * functions on monads + +-- | Return the given value if the boolean is true, els return 'mzero'. +whenMP :: MonadPlus m => Bool -> a -> m a +whenMP b x = if b then return x else mzero + +-- * functions on Maybes + +-- | Returns true if the argument is Nothing or Just [] +nothingOrNull :: Maybe [a] -> Bool +nothingOrNull = maybe True null + +-- * functions on functions + +-- | Apply all the functions in the list to the argument. +foldFuns :: [a -> a] -> a -> a +foldFuns fs x = foldl (flip ($)) x fs + +-- | Fixpoint iteration. +fix :: Eq a => (a -> a) -> a -> a +fix f x = let x' = f x in if x' == x then x else fix f x' + +-- * functions on strings + +-- | Join a number of lists by using the given glue +-- between the lists. +join :: [a] -- ^ glue + -> [[a]] -- ^ lists to join + -> [a] +join g = concat . intersperse g + +-- * ShowS-functions + +nl :: ShowS +nl = showChar '\n' + +sp :: ShowS +sp = showChar ' ' + +wrap :: String -> ShowS -> String -> ShowS +wrap o s c = showString o . s . showString c + +concatS :: [ShowS] -> ShowS +concatS = foldr (.) id + +unwordsS :: [ShowS] -> ShowS +unwordsS = joinS " " + +unlinesS :: [ShowS] -> ShowS +unlinesS = joinS "\n" + +joinS :: String -> [ShowS] -> ShowS +joinS glue = concatS . intersperse (showString glue) + + + diff --git a/src/compiler/GF/Data/XML.hs b/src/compiler/GF/Data/XML.hs new file mode 100644 index 000000000..bdc6f98a1 --- /dev/null +++ b/src/compiler/GF/Data/XML.hs @@ -0,0 +1,58 @@ +---------------------------------------------------------------------- +-- | +-- Module : XML +-- +-- Utilities for creating XML documents. +---------------------------------------------------------------------- +module GF.Data.XML (XML(..), Attr, comments, showXMLDoc, showsXMLDoc, showsXML, bottomUpXML) where + +import GF.Data.Utilities +import GF.Text.UTF8 + +data XML = Data String | CData String | Tag String [Attr] [XML] | ETag String [Attr] | Comment String | Empty + deriving (Ord,Eq,Show) + +type Attr = (String,String) + +comments :: [String] -> [XML] +comments = map Comment + +showXMLDoc :: XML -> String +showXMLDoc xml = showsXMLDoc xml "" + +showsXMLDoc :: XML -> ShowS +showsXMLDoc xml = encodeUTF8 . showString header . showsXML xml + where header = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + +showsXML :: XML -> ShowS +showsXML = showsX 0 where + showsX i x = ind i . case x of + (Data s) -> showString s + (CData s) -> showString "<![CDATA[" . showString s .showString "]]>" + (ETag t as) -> showChar '<' . showString t . showsAttrs as . showString "/>" + (Tag t as cs) -> + showChar '<' . showString t . showsAttrs as . showChar '>' . + concatS (map (showsX (i+1)) cs) . ind i . + showString "</" . showString t . showChar '>' + (Comment c) -> showString "<!-- " . showString c . showString " -->" + (Empty) -> id + ind i = showString ("\n" ++ replicate (2*i) ' ') + +showsAttrs :: [Attr] -> ShowS +showsAttrs = concatS . map (showChar ' ' .) . map showsAttr + +showsAttr :: Attr -> ShowS +showsAttr (n,v) = showString n . showString "=\"" . showString (escape v) . showString "\"" + +escape :: String -> String +escape = concatMap escChar + where + escChar '<' = "<" + escChar '>' = ">" + escChar '&' = "&" + escChar '"' = """ + escChar c = [c] + +bottomUpXML :: (XML -> XML) -> XML -> XML +bottomUpXML f (Tag n attrs cs) = f (Tag n attrs (map (bottomUpXML f) cs)) +bottomUpXML f x = f x diff --git a/src/compiler/GF/Data/Zipper.hs b/src/compiler/GF/Data/Zipper.hs new file mode 100644 index 000000000..a4491f76e --- /dev/null +++ b/src/compiler/GF/Data/Zipper.hs @@ -0,0 +1,257 @@ +---------------------------------------------------------------------- +-- | +-- Module : Zipper +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/06/11 20:27:05 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.9 $ +-- +-- Gérard Huet's zipper (JFP 7 (1997)). AR 10\/8\/2001 +----------------------------------------------------------------------------- + +module GF.Data.Zipper (-- * types + Tr(..), + Path(..), + Loc(..), + -- * basic (original) functions + leaf, + goLeft, goRight, goUp, goDown, + changeLoc, + changeNode, + forgetNode, + -- * added sequential representation + goAhead, + goBack, + -- ** n-ary versions + goAheadN, + goBackN, + -- * added mappings between locations and trees + loc2tree, + loc2treeMarked, + tree2loc, + goRoot, + goLast, + goPosition, + getPosition, + keepPosition, + -- * added some utilities + traverseCollect, + scanTree, + mapTr, + mapTrM, + mapPath, + mapPathM, + mapLoc, + mapLocM, + foldTr, + foldTrM, + mapSubtrees, + mapSubtreesM, + changeRoot, + nthSubtree, + arityTree + ) where + +import GF.Data.Operations + +newtype Tr a = Tr (a,[Tr a]) deriving (Show,Eq) + +data Path a = + Top + | Node ([Tr a], (Path a, a), [Tr a]) + deriving Show + +leaf :: a -> Tr a +leaf a = Tr (a,[]) + +newtype Loc a = Loc (Tr a, Path a) deriving Show + +goLeft, goRight, goUp, goDown :: Loc a -> Err (Loc a) +goLeft (Loc (t,p)) = case p of + Top -> Bad "left of top" + Node (l:left, upv, right) -> return $ Loc (l, Node (left,upv,t:right)) + Node _ -> Bad "left of first" +goRight (Loc (t,p)) = case p of + Top -> Bad "right of top" + Node (left, upv, r:right) -> return $ Loc (r, Node (t:left,upv,right)) + Node _ -> Bad "right of first" +goUp (Loc (t,p)) = case p of + Top -> Bad "up of top" + Node (left, (up,v), right) -> + return $ Loc (Tr (v, reverse left ++ (t:right)), up) +goDown (Loc (t,p)) = case t of + Tr (v,(t1:trees)) -> return $ Loc (t1,Node ([],(p,v),trees)) + _ -> Bad "down of empty" + +changeLoc :: Loc a -> Tr a -> Err (Loc a) +changeLoc (Loc (_,p)) t = return $ Loc (t,p) + +changeNode :: (a -> a) -> Loc a -> Loc a +changeNode f (Loc (Tr (n,ts),p)) = Loc (Tr (f n, ts),p) + +forgetNode :: Loc a -> Err (Loc a) +forgetNode (Loc (Tr (n,[t]),p)) = return $ Loc (t,p) +forgetNode _ = Bad $ "not a one-branch tree" + +-- added sequential representation + +-- | a successor function +goAhead :: Loc a -> Err (Loc a) +goAhead s@(Loc (t,p)) = case (t,p) of + (Tr (_,_:_),Node (_,_,_:_)) -> goDown s + (Tr (_,[]), _) -> upsRight s + (_, _) -> goDown s + where + upsRight t = case goRight t of + Ok t' -> return t' + Bad _ -> goUp t >>= upsRight + +-- | a predecessor function +goBack :: Loc a -> Err (Loc a) +goBack s@(Loc (t,p)) = case goLeft s of + Ok s' -> downRight s' + _ -> goUp s + where + downRight s = case goDown s of + Ok s' -> case goRight s' of + Ok s'' -> downRight s'' + _ -> downRight s' + _ -> return s + +-- n-ary versions + +goAheadN :: Int -> Loc a -> Err (Loc a) +goAheadN i st + | i < 1 = return st + | otherwise = goAhead st >>= goAheadN (i-1) + +goBackN :: Int -> Loc a -> Err (Loc a) +goBackN i st + | i < 1 = return st + | otherwise = goBack st >>= goBackN (i-1) + +-- added mappings between locations and trees + +loc2tree :: Loc a -> Tr a +loc2tree (Loc (t,p)) = case p of + Top -> t + Node (left,(p',v),right) -> + loc2tree (Loc (Tr (v, reverse left ++ (t : right)),p')) + +loc2treeMarked :: Loc a -> Tr (a, Bool) +loc2treeMarked (Loc (Tr (a,ts),p)) = + loc2tree (Loc (Tr (mark a, map (mapTr nomark) ts), mapPath nomark p)) + where + (mark, nomark) = (\a -> (a,True), \a -> (a, False)) + +tree2loc :: Tr a -> Loc a +tree2loc t = Loc (t,Top) + +goRoot :: Loc a -> Loc a +goRoot = tree2loc . loc2tree + +goLast :: Loc a -> Err (Loc a) +goLast = rep goAhead where + rep f s = err (const (return s)) (rep f) (f s) + +goPosition :: [Int] -> Loc a -> Err (Loc a) +goPosition p = go p . goRoot where + go [] s = return s + go (p:ps) s = goDown s >>= apply p goRight >>= go ps + +getPosition :: Loc a -> [Int] +getPosition = reverse . getp where + getp (Loc (t,p)) = case p of + Top -> [] + Node (left,(p',v),_) -> length left : getp (Loc (Tr (v, []),p')) + +keepPosition :: (Loc a -> Err (Loc a)) -> (Loc a -> Err (Loc a)) +keepPosition f s = do + let p = getPosition s + s' <- f s + goPosition p s' + +apply :: Monad m => Int -> (a -> m a) -> a -> m a +apply n f a = case n of + 0 -> return a + _ -> f a >>= apply (n-1) f + +-- added some utilities + +traverseCollect :: Path a -> [a] +traverseCollect p = reverse $ case p of + Top -> [] + Node (_, (p',v), _) -> v : traverseCollect p' + +scanTree :: Tr a -> [a] +scanTree (Tr (a,ts)) = a : concatMap scanTree ts + +mapTr :: (a -> b) -> Tr a -> Tr b +mapTr f (Tr (x,ts)) = Tr (f x, map (mapTr f) ts) + +mapTrM :: Monad m => (a -> m b) -> Tr a -> m (Tr b) +mapTrM f (Tr (x,ts)) = do + fx <- f x + fts <- mapM (mapTrM f) ts + return $ Tr (fx,fts) + +mapPath :: (a -> b) -> Path a -> Path b +mapPath f p = case p of + Node (ts1, (p,v), ts2) -> + Node (map (mapTr f) ts1, (mapPath f p, f v), map (mapTr f) ts2) + Top -> Top + +mapPathM :: Monad m => (a -> m b) -> Path a -> m (Path b) +mapPathM f p = case p of + Node (ts1, (p,v), ts2) -> do + ts1' <- mapM (mapTrM f) ts1 + p' <- mapPathM f p + v' <- f v + ts2' <- mapM (mapTrM f) ts2 + return $ Node (ts1', (p',v'), ts2') + Top -> return Top + +mapLoc :: (a -> b) -> Loc a -> Loc b +mapLoc f (Loc (t,p)) = Loc (mapTr f t, mapPath f p) + +mapLocM :: Monad m => (a -> m b) -> Loc a -> m (Loc b) +mapLocM f (Loc (t,p)) = do + t' <- mapTrM f t + p' <- mapPathM f p + return $ (Loc (t',p')) + +foldTr :: (a -> [b] -> b) -> Tr a -> b +foldTr f (Tr (x,ts)) = f x (map (foldTr f) ts) + +foldTrM :: Monad m => (a -> [b] -> m b) -> Tr a -> m b +foldTrM f (Tr (x,ts)) = do + fts <- mapM (foldTrM f) ts + f x fts + +mapSubtrees :: (Tr a -> Tr a) -> Tr a -> Tr a +mapSubtrees f t = let Tr (x,ts) = f t in Tr (x, map (mapSubtrees f) ts) + +mapSubtreesM :: Monad m => (Tr a -> m (Tr a)) -> Tr a -> m (Tr a) +mapSubtreesM f t = do + Tr (x,ts) <- f t + ts' <- mapM (mapSubtreesM f) ts + return $ Tr (x, ts') + +-- | change the root without moving the pointer +changeRoot :: (a -> a) -> Loc a -> Loc a +changeRoot f loc = case loc of + Loc (Tr (a,ts),Top) -> Loc (Tr (f a,ts),Top) + Loc (t, Node (left,pv,right)) -> Loc (t, Node (left,chPath pv,right)) + where + chPath pv = case pv of + (Top,a) -> (Top, f a) + (Node (left,pv,right),v) -> (Node (left, chPath pv,right),v) + +nthSubtree :: Int -> Tr a -> Err (Tr a) +nthSubtree n (Tr (a,ts)) = ts !? n + +arityTree :: Tr a -> Int +arityTree (Tr (_,ts)) = length ts diff --git a/src/compiler/GF/Grammar.hs b/src/compiler/GF/Grammar.hs new file mode 100644 index 000000000..c540f77b8 --- /dev/null +++ b/src/compiler/GF/Grammar.hs @@ -0,0 +1,29 @@ +---------------------------------------------------------------------- +-- | +-- Module : Abstract +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:18 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.4 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Grammar + ( module GF.Infra.Ident, + module GF.Grammar.Grammar, + module GF.Grammar.Values, + module GF.Grammar.Macros, + module GF.Grammar.MMacros, + module GF.Grammar.Printer + ) where + +import GF.Infra.Ident +import GF.Grammar.Grammar +import GF.Grammar.Values +import GF.Grammar.Macros +import GF.Grammar.MMacros +import GF.Grammar.Printer diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs new file mode 100644 index 000000000..fbad5ac7e --- /dev/null +++ b/src/compiler/GF/Grammar/Binary.hs @@ -0,0 +1,261 @@ +----------------------------------------------------------------------
+-- |
+-- Module : GF.Grammar.Binary
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-----------------------------------------------------------------------------
+
+module GF.Grammar.Binary where
+
+import Data.Binary
+import qualified Data.Map as Map
+import qualified Data.ByteString.Char8 as BS
+
+import GF.Data.Operations
+import GF.Infra.Ident
+import GF.Infra.Option
+import GF.Infra.Modules
+import GF.Grammar.Grammar
+
+instance Binary Ident where
+ put id = put (ident2bs id)
+ get = do bs <- get
+ if bs == BS.pack "_"
+ then return identW
+ else return (identC bs)
+
+instance (Ord i, Binary i, Binary a) => Binary (MGrammar i a) where
+ put (MGrammar ms) = put ms
+ get = fmap MGrammar get
+
+instance (Ord i, Binary i, Binary a) => Binary (ModInfo i a) where
+ put mi = do put (mtype mi,mstatus mi,flags mi,extend mi,mwith mi,opens mi,mexdeps mi,jments mi,positions mi)
+ get = do (mtype,mstatus,flags,extend,mwith,opens,med,jments,positions) <- get
+ return (ModInfo mtype mstatus flags extend mwith opens med jments positions)
+
+instance (Binary i) => Binary (ModuleType i) where
+ put MTAbstract = putWord8 0
+ put MTResource = putWord8 2
+ put (MTConcrete i) = putWord8 3 >> put i
+ put MTInterface = putWord8 4
+ put (MTInstance i) = putWord8 5 >> put i
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return MTAbstract
+ 2 -> return MTResource
+ 3 -> get >>= return . MTConcrete
+ 4 -> return MTInterface
+ 5 -> get >>= return . MTInstance
+ _ -> decodingError
+
+instance (Binary i) => Binary (MInclude i) where
+ put MIAll = putWord8 0
+ put (MIOnly xs) = putWord8 1 >> put xs
+ put (MIExcept xs) = putWord8 2 >> put xs
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return MIAll
+ 1 -> fmap MIOnly get
+ 2 -> fmap MIExcept get
+ _ -> decodingError
+
+instance Binary i => Binary (OpenSpec i) where
+ put (OSimple i) = putWord8 0 >> put i
+ put (OQualif i j) = putWord8 1 >> put (i,j)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> get >>= return . OSimple
+ 1 -> get >>= \(i,j) -> return (OQualif i j)
+ _ -> decodingError
+
+instance Binary ModuleStatus where
+ put MSComplete = putWord8 0
+ put MSIncomplete = putWord8 1
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return MSComplete
+ 1 -> return MSIncomplete
+ _ -> decodingError
+
+instance Binary Options where
+ put = put . optionsGFO
+ get = do opts <- get
+ case parseModuleOptions ["--" ++ flag ++ "=" ++ value | (flag,value) <- opts] of
+ Ok x -> return x
+ Bad msg -> fail msg
+
+instance Binary Info where
+ put (AbsCat x y) = putWord8 0 >> put (x,y)
+ put (AbsFun x y z) = putWord8 1 >> put (x,y,z)
+ put (ResParam x y) = putWord8 2 >> put (x,y)
+ put (ResValue x) = putWord8 3 >> put x
+ put (ResOper x y) = putWord8 4 >> put (x,y)
+ put (ResOverload x y)= putWord8 5 >> put (x,y)
+ put (CncCat x y z) = putWord8 6 >> put (x,y,z)
+ put (CncFun x y z) = putWord8 7 >> put (x,y,z)
+ put (AnyInd x y) = putWord8 8 >> put (x,y)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> get >>= \(x,y) -> return (AbsCat x y)
+ 1 -> get >>= \(x,y,z) -> return (AbsFun x y z)
+ 2 -> get >>= \(x,y) -> return (ResParam x y)
+ 3 -> get >>= \x -> return (ResValue x)
+ 4 -> get >>= \(x,y) -> return (ResOper x y)
+ 5 -> get >>= \(x,y) -> return (ResOverload x y)
+ 6 -> get >>= \(x,y,z) -> return (CncCat x y z)
+ 7 -> get >>= \(x,y,z) -> return (CncFun x y z)
+ 8 -> get >>= \(x,y) -> return (AnyInd x y)
+ _ -> decodingError
+
+instance Binary BindType where
+ put Explicit = putWord8 0
+ put Implicit = putWord8 1
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return Explicit
+ 1 -> return Implicit
+ _ -> decodingError
+
+instance Binary Term where
+ put (Vr x) = putWord8 0 >> put x
+ put (Cn x) = putWord8 1 >> put x
+ put (Con x) = putWord8 2 >> put x
+ put (Sort x) = putWord8 3 >> put x
+ put (EInt x) = putWord8 4 >> put x
+ put (EFloat x) = putWord8 5 >> put x
+ put (K x) = putWord8 6 >> put x
+ put (Empty) = putWord8 7
+ put (App x y) = putWord8 8 >> put (x,y)
+ put (Abs x y z) = putWord8 9 >> put (x,y,z)
+ put (Meta x) = putWord8 10 >> put x
+ put (Prod w x y z)= putWord8 11 >> put (w,x,y,z)
+ put (Typed x y) = putWord8 12 >> put (x,y)
+ put (Example x y) = putWord8 13 >> put (x,y)
+ put (RecType x) = putWord8 14 >> put x
+ put (R x) = putWord8 15 >> put x
+ put (P x y) = putWord8 16 >> put (x,y)
+ put (ExtR x y) = putWord8 17 >> put (x,y)
+ put (Table x y) = putWord8 18 >> put (x,y)
+ put (T x y) = putWord8 19 >> put (x,y)
+ put (V x y) = putWord8 20 >> put (x,y)
+ put (S x y) = putWord8 21 >> put (x,y)
+ put (Let x y) = putWord8 22 >> put (x,y)
+ put (Q x y) = putWord8 23 >> put (x,y)
+ put (QC x y) = putWord8 24 >> put (x,y)
+ put (C x y) = putWord8 25 >> put (x,y)
+ put (Glue x y) = putWord8 26 >> put (x,y)
+ put (EPatt x) = putWord8 27 >> put x
+ put (EPattType x) = putWord8 28 >> put x
+ put (FV x) = putWord8 29 >> put x
+ put (Alts x) = putWord8 30 >> put x
+ put (Strs x) = putWord8 31 >> put x
+ put (ELin x y) = putWord8 32 >> put (x,y)
+
+ get = do tag <- getWord8
+ case tag of
+ 0 -> get >>= \x -> return (Vr x)
+ 1 -> get >>= \x -> return (Cn x)
+ 2 -> get >>= \x -> return (Con x)
+ 3 -> get >>= \x -> return (Sort x)
+ 4 -> get >>= \x -> return (EInt x)
+ 5 -> get >>= \x -> return (EFloat x)
+ 6 -> get >>= \x -> return (K x)
+ 7 -> return (Empty)
+ 8 -> get >>= \(x,y) -> return (App x y)
+ 9 -> get >>= \(x,y,z) -> return (Abs x y z)
+ 10 -> get >>= \x -> return (Meta x)
+ 11 -> get >>= \(w,x,y,z)->return (Prod w x y z)
+ 12 -> get >>= \(x,y) -> return (Typed x y)
+ 13 -> get >>= \(x,y) -> return (Example x y)
+ 14 -> get >>= \x -> return (RecType x)
+ 15 -> get >>= \x -> return (R x)
+ 16 -> get >>= \(x,y) -> return (P x y)
+ 17 -> get >>= \(x,y) -> return (ExtR x y)
+ 18 -> get >>= \(x,y) -> return (Table x y)
+ 19 -> get >>= \(x,y) -> return (T x y)
+ 20 -> get >>= \(x,y) -> return (V x y)
+ 21 -> get >>= \(x,y) -> return (S x y)
+ 22 -> get >>= \(x,y) -> return (Let x y)
+ 23 -> get >>= \(x,y) -> return (Q x y)
+ 24 -> get >>= \(x,y) -> return (QC x y)
+ 25 -> get >>= \(x,y) -> return (C x y)
+ 26 -> get >>= \(x,y) -> return (Glue x y)
+ 27 -> get >>= \x -> return (EPatt x)
+ 28 -> get >>= \x -> return (EPattType x)
+ 29 -> get >>= \x -> return (FV x)
+ 30 -> get >>= \x -> return (Alts x)
+ 31 -> get >>= \x -> return (Strs x)
+ 32 -> get >>= \(x,y) -> return (ELin x y)
+ _ -> decodingError
+
+instance Binary Patt where
+ put (PC x y) = putWord8 0 >> put (x,y)
+ put (PP x y z) = putWord8 1 >> put (x,y,z)
+ put (PV x) = putWord8 2 >> put x
+ put (PW) = putWord8 3
+ put (PR x) = putWord8 4 >> put x
+ put (PString x) = putWord8 5 >> put x
+ put (PInt x) = putWord8 6 >> put x
+ put (PFloat x) = putWord8 7 >> put x
+ put (PT x y) = putWord8 8 >> put (x,y)
+ put (PAs x y) = putWord8 10 >> put (x,y)
+ put (PNeg x) = putWord8 11 >> put x
+ put (PAlt x y) = putWord8 12 >> put (x,y)
+ put (PSeq x y) = putWord8 13 >> put (x,y)
+ put (PRep x) = putWord8 14 >> put x
+ put (PChar) = putWord8 15
+ put (PChars x) = putWord8 16 >> put x
+ put (PMacro x) = putWord8 17 >> put x
+ put (PM x y) = putWord8 18 >> put (x,y)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> get >>= \(x,y) -> return (PC x y)
+ 1 -> get >>= \(x,y,z) -> return (PP x y z)
+ 2 -> get >>= \x -> return (PV x)
+ 3 -> return (PW)
+ 4 -> get >>= \x -> return (PR x)
+ 5 -> get >>= \x -> return (PString x)
+ 6 -> get >>= \x -> return (PInt x)
+ 7 -> get >>= \x -> return (PFloat x)
+ 8 -> get >>= \(x,y) -> return (PT x y)
+ 10 -> get >>= \(x,y) -> return (PAs x y)
+ 11 -> get >>= \x -> return (PNeg x)
+ 12 -> get >>= \(x,y) -> return (PAlt x y)
+ 13 -> get >>= \(x,y) -> return (PSeq x y)
+ 14 -> get >>= \x -> return (PRep x)
+ 15 -> return (PChar)
+ 16 -> get >>= \x -> return (PChars x)
+ 17 -> get >>= \x -> return (PMacro x)
+ 18 -> get >>= \(x,y) -> return (PM x y)
+ _ -> decodingError
+
+instance Binary TInfo where
+ put TRaw = putWord8 0
+ put (TTyped t) = putWord8 1 >> put t
+ put (TComp t) = putWord8 2 >> put t
+ put (TWild t) = putWord8 3 >> put t
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return TRaw
+ 1 -> fmap TTyped get
+ 2 -> fmap TComp get
+ 3 -> fmap TWild get
+ _ -> decodingError
+
+instance Binary Label where
+ put (LIdent bs) = putWord8 0 >> put bs
+ put (LVar i) = putWord8 1 >> put i
+ get = do tag <- getWord8
+ case tag of
+ 0 -> fmap LIdent get
+ 1 -> fmap LVar get
+ _ -> decodingError
+
+decodeModHeader :: FilePath -> IO SourceModule
+decodeModHeader fpath = do
+ (m,mtype,mstatus,flags,extend,mwith,opens,med) <- decodeFile fpath
+ return (m,ModInfo mtype mstatus flags extend mwith opens med Map.empty Map.empty)
+
+decodingError = fail "This GFO file was compiled with different version of GF"
diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs new file mode 100644 index 000000000..a1d716994 --- /dev/null +++ b/src/compiler/GF/Grammar/CF.hs @@ -0,0 +1,128 @@ +---------------------------------------------------------------------- +-- | +-- Module : CF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/15 17:56:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.13 $ +-- +-- parsing CF grammars and converting them to GF +----------------------------------------------------------------------------- + +module GF.Grammar.CF (getCF) where + +import GF.Grammar.Grammar +import GF.Grammar.Macros +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Infra.Option + +import GF.Data.Operations + +import Data.Char +import Data.List +import qualified Data.ByteString.Char8 as BS + +getCF :: String -> String -> Err SourceGrammar +getCF name = fmap (cf2gf name) . pCF + +--------------------- +-- the parser ------- +--------------------- + +pCF :: String -> Err CF +pCF s = do + rules <- mapM getCFRule $ filter isRule $ lines s + return $ concat rules + where + isRule line = case dropWhile isSpace line of + '-':'-':_ -> False + _ -> not $ all isSpace line + +-- rules have an amazingly easy parser, if we use the format +-- fun. C -> item1 item2 ... where unquoted items are treated as cats +-- Actually would be nice to add profiles to this. + +getCFRule :: String -> Err [CFRule] +getCFRule s = getcf (wrds s) where + getcf ws = case ws of + fun : cat : a : its | isArrow a -> + Ok [(init fun, (cat, map mkIt its))] + cat : a : its | isArrow a -> + Ok [(mkFun cat it, (cat, map mkIt it)) | it <- chunk its] + _ -> Bad (" invalid rule:" +++ s) + isArrow a = elem a ["->", "::="] + mkIt w = case w of + ('"':w@(_:_)) -> Right (init w) + _ -> Left w + chunk its = case its of + [] -> [[]] + _ -> chunks "|" its + mkFun cat its = case its of + [] -> cat ++ "_" + _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style + clean = filter isAlphaNum -- to form valid identifiers + wrds = takeWhile (/= ";") . words -- to permit semicolon in the end + +type CF = [CFRule] + +type CFRule = (CFFun, (CFCat, [CFItem])) + +type CFItem = Either CFCat String + +type CFCat = String +type CFFun = String + +-------------------------- +-- the compiler ---------- +-------------------------- + +cf2gf :: String -> CF -> SourceGrammar +cf2gf name cf = MGrammar [ + (aname, addFlag (modifyFlags (\fs -> fs{optStartCat = Just cat})) + (emptyModInfo{mtype = MTAbstract, jments = abs})), + (cname, emptyModInfo{mtype = MTConcrete aname, jments = cnc}) + ] + where + (abs,cnc,cat) = cf2grammar cf + aname = identS $ name ++ "Abs" + cname = identS name + + +cf2grammar :: CF -> (BinTree Ident Info, BinTree Ident Info, String) +cf2grammar rules = (buildTree abs, buildTree conc, cat) where + abs = cats ++ funs + conc = lincats ++ lins + cat = case rules of + (_,(c,_)):_ -> c -- the value category of the first rule + _ -> error "empty CF" + cats = [(cat, AbsCat (Just []) (Just [])) | + cat <- nub (concat (map cf2cat rules))] ----notPredef cat + lincats = [(cat, CncCat (Just defLinType) Nothing Nothing) | (cat,AbsCat _ _) <- cats] + (funs,lins) = unzip (map cf2rule rules) + +cf2cat :: CFRule -> [Ident] +cf2cat (_,(cat, items)) = map identS $ cat : [c | Left c <- items] + +cf2rule :: CFRule -> ((Ident,Info),(Ident,Info)) +cf2rule (fun, (cat, items)) = (def,ldef) where + f = identS fun + def = (f, AbsFun (Just (mkProd args' (Cn (identS cat)) [])) Nothing Nothing) + args0 = zip (map (identS . ("x" ++) . show) [0..]) items + args = [((Explicit,v), Cn (identS c)) | (v, Left c) <- args0] + args' = [(Explicit,identS "_", Cn (identS c)) | (_, Left c) <- args0] + ldef = (f, CncFun + Nothing + (Just (mkAbs (map fst args) + (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))) + Nothing) + mkIt (v, Left _) = P (Vr v) theLinLabel + mkIt (_, Right a) = K a + foldconcat [] = K "" + foldconcat tt = foldr1 C tt + +identS = identC . BS.pack + diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs new file mode 100644 index 000000000..8d1468d9d --- /dev/null +++ b/src/compiler/GF/Grammar/Grammar.hs @@ -0,0 +1,230 @@ +---------------------------------------------------------------------- +-- | +-- Module : Grammar +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:20 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- GF source abstract syntax used internally in compilation. +-- +-- AR 23\/1\/2000 -- 30\/5\/2001 -- 4\/5\/2003 +----------------------------------------------------------------------------- + +module GF.Grammar.Grammar (SourceGrammar, + emptySourceGrammar, + SourceModInfo, + SourceModule, + mapSourceModule, + Info(..), + Type, + Cat, + Fun, + QIdent, + BindType(..), + Term(..), + Patt(..), + TInfo(..), + Label(..), + MetaId, + Hypo, + Context, + Equation, + Labelling, + Assign, + Case, + LocalDef, + Param, + Altern, + Substitution, + varLabel, tupleLabel, linLabel, theLinLabel, + ident2label, label2ident + ) where + +import GF.Infra.Ident +import GF.Infra.Option --- +import GF.Infra.Modules + +import GF.Data.Operations + +import qualified Data.ByteString.Char8 as BS + +-- | grammar as presented to the compiler +type SourceGrammar = MGrammar Ident Info + +emptySourceGrammar = MGrammar [] + +type SourceModInfo = ModInfo Ident Info + +type SourceModule = (Ident, SourceModInfo) + +mapSourceModule :: (SourceModInfo -> SourceModInfo) -> (SourceModule -> SourceModule) +mapSourceModule f (i,mi) = (i, f mi) + +-- | the constructors are judgements in +-- +-- - abstract syntax (/ABS/) +-- +-- - resource (/RES/) +-- +-- - concrete syntax (/CNC/) +-- +-- and indirection to module (/INDIR/) +data Info = +-- judgements in abstract syntax + AbsCat (Maybe Context) (Maybe [Term]) -- ^ (/ABS/) the second parameter is list of constructors - must be 'Id' or 'QId' + | AbsFun (Maybe Type) (Maybe Int) (Maybe [Equation]) -- ^ (/ABS/) type, arrity and definition of function + +-- judgements in resource + | ResParam (Maybe [Param]) (Maybe [Term]) -- ^ (/RES/) the second parameter is list of all possible values + | ResValue Type -- ^ (/RES/) to mark parameter constructors for lookup + | ResOper (Maybe Type) (Maybe Term) -- ^ (/RES/) + + | ResOverload [Ident] [(Type,Term)] -- ^ (/RES/) idents: modules inherited + +-- judgements in concrete syntax + | CncCat (Maybe Type) (Maybe Term) (Maybe Term) -- ^ (/CNC/) lindef ini'zed, + | CncFun (Maybe (Ident,Context,Type)) (Maybe Term) (Maybe Term) -- ^ (/CNC/) type info added at 'TC' + +-- indirection to module Ident + | AnyInd Bool Ident -- ^ (/INDIR/) the 'Bool' says if canonical + deriving Show + +type Type = Term +type Cat = QIdent +type Fun = QIdent + +type QIdent = (Ident,Ident) + +data BindType = + Explicit + | Implicit + deriving (Eq,Ord,Show) + +data Term = + Vr Ident -- ^ variable + | Cn Ident -- ^ constant + | Con Ident -- ^ constructor + | Sort Ident -- ^ basic type + | EInt Integer -- ^ integer literal + | EFloat Double -- ^ floating point literal + | K String -- ^ string literal or token: @\"foo\"@ + | Empty -- ^ the empty string @[]@ + + | App Term Term -- ^ application: @f a@ + | Abs BindType Ident Term -- ^ abstraction: @\x -> b@ + | Meta {-# UNPACK #-} !MetaId -- ^ metavariable: @?i@ (only parsable: ? = ?0) + | ImplArg Term -- ^ placeholder for implicit argument @{t}@ + | Prod BindType Ident Term Term -- ^ function type: @(x : A) -> B@, @A -> B@, @({x} : A) -> B@ + | Typed Term Term -- ^ type-annotated term +-- +-- /below this, the constructors are only for concrete syntax/ + | Example Term String -- ^ example-based term: @in M.C "foo" + | RecType [Labelling] -- ^ record type: @{ p : A ; ...}@ + | R [Assign] -- ^ record: @{ p = a ; ...}@ + | P Term Label -- ^ projection: @r.p@ + | ExtR Term Term -- ^ extension: @R ** {x : A}@ (both types and terms) + + | Table Term Term -- ^ table type: @P => A@ + | T TInfo [Case] -- ^ table: @table {p => c ; ...}@ + | V Type [Term] -- ^ table given as course of values: @table T [c1 ; ... ; cn]@ + | S Term Term -- ^ selection: @t ! p@ + + | Let LocalDef Term -- ^ local definition: @let {t : T = a} in b@ + + | Q Ident Ident -- ^ qualified constant from a package + | QC Ident Ident -- ^ qualified constructor from a package + + | C Term Term -- ^ concatenation: @s ++ t@ + | Glue Term Term -- ^ agglutination: @s + t@ + + | EPatt Patt -- ^ pattern (in macro definition): # p + | EPattType Term -- ^ pattern type: pattern T + + | ELincat Ident Term -- ^ boxed linearization type of Ident + | ELin Ident Term -- ^ boxed linearization of type Ident + + | FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@ + + | Alts (Term, [(Term, Term)]) -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@ + | Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@ + + deriving (Show, Eq, Ord) + +data Patt = + PC Ident [Patt] -- ^ constructor pattern: @C p1 ... pn@ @C@ + | PP Ident Ident [Patt] -- ^ package constructor pattern: @P.C p1 ... pn@ @P.C@ + | PV Ident -- ^ variable pattern: @x@ + | PW -- ^ wild card pattern: @_@ + | PR [(Label,Patt)] -- ^ record pattern: @{r = p ; ...}@ -- only concrete + | PString String -- ^ string literal pattern: @\"foo\"@ -- only abstract + | PInt Integer -- ^ integer literal pattern: @12@ -- only abstract + | PFloat Double -- ^ float literal pattern: @1.2@ -- only abstract + | PT Type Patt -- ^ type-annotated pattern + + | PAs Ident Patt -- ^ as-pattern: x@p + + | PImplArg Patt -- ^ placeholder for pattern for implicit argument @{p}@ + + -- regular expression patterns + | PNeg Patt -- ^ negated pattern: -p + | PAlt Patt Patt -- ^ disjunctive pattern: p1 | p2 + | PSeq Patt Patt -- ^ sequence of token parts: p + q + | PRep Patt -- ^ repetition of token part: p* + | PChar -- ^ string of length one: ? + | PChars [Char] -- ^ character list: ["aeiou"] + | PMacro Ident -- #p + | PM Ident Ident -- #m.p + + deriving (Show, Eq, Ord) + +-- | to guide computation and type checking of tables +data TInfo = + TRaw -- ^ received from parser; can be anything + | TTyped Type -- ^ type annontated, but can be anything + | TComp Type -- ^ expanded + | TWild Type -- ^ just one wild card pattern, no need to expand + deriving (Show, Eq, Ord) + +-- | record label +data Label = + LIdent BS.ByteString + | LVar Int + deriving (Show, Eq, Ord) + +type MetaId = Int + +type Hypo = (BindType,Ident,Term) -- (x:A) (_:A) A ({x}:A) +type Context = [Hypo] -- (x:A)(y:B) (x,y:A) (_,_:A) +type Equation = ([Patt],Term) + +type Labelling = (Label, Term) +type Assign = (Label, (Maybe Type, Term)) +type Case = (Patt, Term) +type Cases = ([Patt], Term) +type LocalDef = (Ident, (Maybe Type, Term)) + +type Param = (Ident, Context) +type Altern = (Term, [(Term, Term)]) + +type Substitution = [(Ident, Term)] + +varLabel :: Int -> Label +varLabel = LVar + +tupleLabel, linLabel :: Int -> Label +tupleLabel i = LIdent $! BS.pack ('p':show i) +linLabel i = LIdent $! BS.pack ('s':show i) + +theLinLabel :: Label +theLinLabel = LIdent (BS.singleton 's') + +ident2label :: Ident -> Label +ident2label c = LIdent (ident2bs c) + +label2ident :: Label -> Ident +label2ident (LIdent s) = identC s +label2ident (LVar i) = identC (BS.pack ('$':show i)) diff --git a/src/compiler/GF/Grammar/Lexer.hs b/src/compiler/GF/Grammar/Lexer.hs new file mode 100644 index 000000000..7cacb0588 --- /dev/null +++ b/src/compiler/GF/Grammar/Lexer.hs @@ -0,0 +1,478 @@ +{-# OPTIONS -fglasgow-exts -cpp #-}
+{-# LINE 3 "src\GF\Grammar\Lexer.x" #-}
+
+module GF.Grammar.Lexer
+ ( Token(..), Posn(..)
+ , P, runP, lexer, getPosn, failLoc
+ , isReservedWord
+ ) where
+
+import GF.Infra.Ident
+import GF.Data.Operations
+import qualified Data.ByteString.Char8 as BS
+import qualified Data.Map as Map
+
+
+#if __GLASGOW_HASKELL__ >= 603
+#include "ghcconfig.h"
+#elif defined(__GLASGOW_HASKELL__)
+#include "config.h"
+#endif
+#if __GLASGOW_HASKELL__ >= 503
+import Data.Array
+import Data.Char (ord)
+import Data.Array.Base (unsafeAt)
+#else
+import Array
+import Char (ord)
+#endif
+#if __GLASGOW_HASKELL__ >= 503
+import GHC.Exts
+#else
+import GlaExts
+#endif
+alex_base :: AlexAddr
+alex_base = AlexA# "\x01\x00\x00\x00\x15\x00\x00\x00\x39\x00\x00\x00\x3a\x00\x00\x00\x18\x00\x00\x00\x19\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x44\x00\x00\x00\x45\x00\x00\x00\x1b\x00\x00\x00\x1c\x00\x00\x00\x1d\x00\x00\x00\x42\x00\x00\x00\x00\x00\x00\x00\x26\x00\x00\x00\x27\x00\x00\x00\xf5\xff\xff\xff\x16\x00\x00\x00\x9c\x00\x00\x00\x00\x00\x00\x00\x2c\x00\x00\x00\x2e\x00\x00\x00\x6c\x01\x00\x00\x3c\x02\x00\x00\x00\x00\x00\x00\x17\x01\x00\x00\xe7\x01\x00\x00\xd5\x00\x00\x00\x35\x00\x00\x00\xe7\x00\x00\x00\xf2\x00\x00\x00\x1d\x01\x00\x00\xc2\x01\x00\x00\xcc\x01\x00\x00"#
+
+alex_table :: AlexAddr
+alex_table = AlexA# "\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0d\x00\x0d\x00\x0d\x00\x0d\x00\x0d\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0d\x00\x0e\x00\x1a\x00\x0e\x00\x0e\x00\x0e\x00\xff\xff\x15\x00\x0e\x00\x0e\x00\x0f\x00\x10\x00\x0e\x00\x05\x00\x0e\x00\x0e\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x0e\x00\x0e\x00\x0e\x00\x12\x00\x0e\x00\x0e\x00\x0e\x00\x04\x00\xff\xff\xff\xff\x02\x00\x02\x00\x09\x00\x09\x00\x09\x00\x0a\x00\x0d\x00\x0d\x00\x0d\x00\x0d\x00\x0d\x00\x0e\x00\x0e\x00\x0e\x00\x14\x00\x0e\x00\x14\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x11\x00\x0e\x00\xff\xff\x13\x00\xff\xff\x0d\x00\x20\x00\x00\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x00\x00\x00\x00\x09\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x0e\x00\x0e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x06\x00\x07\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x1b\x00\xff\xff\x00\x00\x00\x00\x18\x00\x1b\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\xff\xff\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x00\x00\x21\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x1c\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x22\x00\x00\x00\xff\xff\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x1f\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x1c\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x00\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00\x18\x00"#
+
+alex_check :: AlexAddr
+alex_check = AlexA# "\xff\xff\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00\x07\x00\x08\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x0e\x00\x0f\x00\x10\x00\x11\x00\x12\x00\x13\x00\x14\x00\x15\x00\x16\x00\x17\x00\x18\x00\x19\x00\x1a\x00\x1b\x00\x1c\x00\x1d\x00\x1e\x00\x1f\x00\x20\x00\x21\x00\x22\x00\x23\x00\x24\x00\x25\x00\x26\x00\x27\x00\x28\x00\x29\x00\x2a\x00\x2b\x00\x2c\x00\x2d\x00\x2e\x00\x2f\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x3a\x00\x3b\x00\x3c\x00\x3d\x00\x3e\x00\x3f\x00\x40\x00\x2d\x00\x0a\x00\x0a\x00\x2d\x00\x2d\x00\x2d\x00\x2d\x00\x2d\x00\x2d\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x2a\x00\x5c\x00\x2b\x00\x27\x00\x3e\x00\x27\x00\xff\xff\x3e\x00\xff\xff\xff\xff\xff\xff\xff\xff\x5b\x00\x5c\x00\x5d\x00\x5e\x00\x5f\x00\x60\x00\x20\x00\x2e\x00\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\x2d\x00\x2d\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7b\x00\x7c\x00\x7d\x00\x7e\x00\x7f\x00\x80\x00\x81\x00\x82\x00\x83\x00\x84\x00\x85\x00\x86\x00\x87\x00\x88\x00\x89\x00\x8a\x00\x8b\x00\x8c\x00\x8d\x00\x8e\x00\x8f\x00\x90\x00\x91\x00\x92\x00\x93\x00\x94\x00\x95\x00\x96\x00\x97\x00\x98\x00\x99\x00\x9a\x00\x9b\x00\x9c\x00\x9d\x00\x9e\x00\x9f\x00\xa0\x00\xa1\x00\xa2\x00\xa3\x00\xa4\x00\xa5\x00\xa6\x00\xa7\x00\xa8\x00\xa9\x00\xaa\x00\xab\x00\xac\x00\xad\x00\xae\x00\xaf\x00\xb0\x00\xb1\x00\xb2\x00\xb3\x00\xb4\x00\xb5\x00\xb6\x00\xb7\x00\xb8\x00\xb9\x00\xba\x00\xbb\x00\xbc\x00\xbd\x00\xbe\x00\xbf\x00\x7d\x00\x7d\x00\x27\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xd7\x00\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x22\x00\xf7\x00\xff\xff\xff\xff\x5f\x00\x27\x00\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x0a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x5c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x22\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x6e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x74\x00\xff\xff\xff\xff\x65\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\xff\xff\xff\xff\xff\xff\xff\xff\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x2d\x00\xff\xff\x0a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\x22\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\xff\xff\xff\xff\xff\xff\xff\xff\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\xff\xff\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\xff\xff\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00"#
+
+alex_deflt :: AlexAddr
+alex_deflt = AlexA# "\x17\x00\xff\xff\x03\x00\x03\x00\xff\xff\xff\xff\x0b\x00\xff\xff\x0b\x00\x0b\x00\x0b\x00\x0b\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x16\x00\x16\x00\xff\xff\xff\xff\xff\xff\x1b\x00\x1b\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+
+alex_accept = listArray (0::Int,34) [[],[],[(AlexAccSkip)],[(AlexAccSkip)],[],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAccSkip)],[],[],[],[],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_4))],[],[],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_6))],[],[],[],[(AlexAcc (alex_action_7))],[(AlexAcc (alex_action_8))],[(AlexAcc (alex_action_8))],[],[],[]]
+{-# LINE 42 "src\GF\Grammar\Lexer.x" #-}
+
+
+tok f p s = f s
+
+data Token
+ = T_exclmark
+ | T_patt
+ | T_int_label
+ | T_oparen
+ | T_cparen
+ | T_star
+ | T_starstar
+ | T_plus
+ | T_plusplus
+ | T_comma
+ | T_minus
+ | T_rarrow
+ | T_dot
+ | T_alt
+ | T_colon
+ | T_semicolon
+ | T_less
+ | T_equal
+ | T_big_rarrow
+ | T_great
+ | T_questmark
+ | T_obrack
+ | T_lam
+ | T_lamlam
+ | T_cbrack
+ | T_ocurly
+ | T_bar
+ | T_ccurly
+ | T_underscore
+ | T_at
+ | T_PType
+ | T_Str
+ | T_Strs
+ | T_Tok
+ | T_Type
+ | T_abstract
+ | T_case
+ | T_cat
+ | T_concrete
+ | T_data
+ | T_def
+ | T_flags
+ | T_fn
+ | T_fun
+ | T_in
+ | T_incomplete
+ | T_instance
+ | T_interface
+ | T_let
+ | T_lin
+ | T_lincat
+ | T_lindef
+ | T_of
+ | T_open
+ | T_oper
+ | T_param
+ | T_pattern
+ | T_pre
+ | T_printname
+ | T_resource
+ | T_strs
+ | T_table
+ | T_transfer
+ | T_variants
+ | T_where
+ | T_with
+ | T_String String -- string literals
+ | T_Integer Integer -- integer literals
+ | T_Double Double -- double precision float literals
+ | T_LString String
+ | T_Ident Ident
+ | T_EOF
+
+eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
+eitherResIdent tv s =
+ case Map.lookup s resWords of
+ Just t -> t
+ Nothing -> tv s
+
+isReservedWord :: BS.ByteString -> Bool
+isReservedWord s = Map.member s resWords
+
+resWords = Map.fromList
+ [ b "!" T_exclmark
+ , b "#" T_patt
+ , b "$" T_int_label
+ , b "(" T_oparen
+ , b ")" T_cparen
+ , b "*" T_star
+ , b "**" T_starstar
+ , b "+" T_plus
+ , b "++" T_plusplus
+ , b "," T_comma
+ , b "-" T_minus
+ , b "->" T_rarrow
+ , b "." T_dot
+ , b "/" T_alt
+ , b ":" T_colon
+ , b ";" T_semicolon
+ , b "<" T_less
+ , b "=" T_equal
+ , b "=>" T_big_rarrow
+ , b ">" T_great
+ , b "?" T_questmark
+ , b "[" T_obrack
+ , b "]" T_cbrack
+ , b "\\" T_lam
+ , b "\\\\" T_lamlam
+ , b "{" T_ocurly
+ , b "}" T_ccurly
+ , b "|" T_bar
+ , b "_" T_underscore
+ , b "@" T_at
+ , b "PType" T_PType
+ , b "Str" T_Str
+ , b "Strs" T_Strs
+ , b "Tok" T_Tok
+ , b "Type" T_Type
+ , b "abstract" T_abstract
+ , b "case" T_case
+ , b "cat" T_cat
+ , b "concrete" T_concrete
+ , b "data" T_data
+ , b "def" T_def
+ , b "flags" T_flags
+ , b "fn" T_fn
+ , b "fun" T_fun
+ , b "in" T_in
+ , b "incomplete" T_incomplete
+ , b "instance" T_instance
+ , b "interface" T_interface
+ , b "let" T_let
+ , b "lin" T_lin
+ , b "lincat" T_lincat
+ , b "lindef" T_lindef
+ , b "of" T_of
+ , b "open" T_open
+ , b "oper" T_oper
+ , b "param" T_param
+ , b "pattern" T_pattern
+ , b "pre" T_pre
+ , b "printname" T_printname
+ , b "resource" T_resource
+ , b "strs" T_strs
+ , b "table" T_table
+ , b "transfer" T_transfer
+ , b "variants" T_variants
+ , b "where" T_where
+ , b "with" T_with
+ ]
+ where b s t = (BS.pack s, t)
+
+unescapeInitTail :: String -> String
+unescapeInitTail = unesc . tail where
+ unesc s = case s of
+ '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
+ '\\':'n':cs -> '\n' : unesc cs
+ '\\':'t':cs -> '\t' : unesc cs
+ '"':[] -> []
+ c:cs -> c : unesc cs
+ _ -> []
+
+-------------------------------------------------------------------
+-- Alex wrapper code.
+-- A modified "posn" wrapper.
+-------------------------------------------------------------------
+
+data Posn = Pn {-# UNPACK #-} !Int
+ {-# UNPACK #-} !Int
+
+alexMove :: Posn -> Char -> Posn
+alexMove (Pn l c) '\n' = Pn (l+1) 1
+alexMove (Pn l c) _ = Pn l (c+1)
+
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar (AI p _ s) =
+ case BS.uncons s of
+ Nothing -> Nothing
+ Just (c,s) ->
+ let p' = alexMove p c
+ in p' `seq` Just (c, (AI p' c s))
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (AI p c s) = c
+
+data AlexInput = AI {-# UNPACK #-} !Posn -- current position,
+ {-# UNPACK #-} !Char -- previous char
+ {-# UNPACK #-} !BS.ByteString -- current input string
+
+data ParseResult a
+ = POk AlexInput a
+ | PFailed Posn -- The position of the error
+ String -- The error message
+
+newtype P a = P { unP :: AlexInput -> ParseResult a }
+
+instance Monad P where
+ return a = a `seq` (P $ \s -> POk s a)
+ (P m) >>= k = P $ \ s -> case m s of
+ POk s1 a -> unP (k a) s1
+ PFailed posn err -> PFailed posn err
+ fail msg = P $ \(AI posn _ _) -> PFailed posn msg
+
+runP :: P a -> BS.ByteString -> Either (Posn,String) a
+runP (P f) txt =
+ case f (AI (Pn 1 0) ' ' txt) of
+ POk _ x -> Right x
+ PFailed pos msg -> Left (pos,msg)
+
+failLoc :: Posn -> String -> P a
+failLoc pos msg = P $ \_ -> PFailed pos msg
+
+lexer :: (Token -> P a) -> P a
+lexer cont = P go
+ where
+ go inp@(AI pos _ str) =
+ case alexScan inp 0 of
+ AlexEOF -> unP (cont T_EOF) inp
+ AlexError (AI pos _ _) -> PFailed pos "lexical error"
+ AlexSkip inp' len -> go inp'
+ AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
+
+getPosn :: P Posn
+getPosn = P $ \inp@(AI pos _ _) -> POk inp pos
+
+
+alex_action_3 = tok (eitherResIdent (T_Ident . identC))
+alex_action_4 = tok (eitherResIdent (T_LString . BS.unpack))
+alex_action_5 = tok (eitherResIdent (T_Ident . identC))
+alex_action_6 = tok (T_String . unescapeInitTail . BS.unpack)
+alex_action_7 = tok (T_Integer . read . BS.unpack)
+alex_action_8 = tok (T_Double . read . BS.unpack)
+{-# LINE 1 "templates/GenericTemplate.hs" #-}
+{-# LINE 1 "templates/GenericTemplate.hs" #-}
+{-# LINE 1 "<built-in>" #-}
+{-# LINE 1 "<command line>" #-}
+{-# LINE 1 "templates/GenericTemplate.hs" #-}
+-- -----------------------------------------------------------------------------
+-- ALEX TEMPLATE
+--
+-- This code is in the PUBLIC DOMAIN; you may copy it freely and use
+-- it for any purpose whatsoever.
+
+-- -----------------------------------------------------------------------------
+-- INTERNALS and main scanner engine
+
+{-# LINE 35 "templates/GenericTemplate.hs" #-}
+
+{-# LINE 45 "templates/GenericTemplate.hs" #-}
+
+
+data AlexAddr = AlexA# Addr#
+
+#if __GLASGOW_HASKELL__ < 503
+uncheckedShiftL# = shiftL#
+#endif
+
+{-# INLINE alexIndexInt16OffAddr #-}
+alexIndexInt16OffAddr (AlexA# arr) off =
+#ifdef WORDS_BIGENDIAN
+ narrow16Int# i
+ where
+ i = word2Int# ((high `uncheckedShiftL#` 8#) `or#` low)
+ high = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#)))
+ low = int2Word# (ord# (indexCharOffAddr# arr off'))
+ off' = off *# 2#
+#else
+ indexInt16OffAddr# arr off
+#endif
+
+
+
+
+
+{-# INLINE alexIndexInt32OffAddr #-}
+alexIndexInt32OffAddr (AlexA# arr) off =
+#ifdef WORDS_BIGENDIAN
+ narrow32Int# i
+ where
+ i = word2Int# ((b3 `uncheckedShiftL#` 24#) `or#`
+ (b2 `uncheckedShiftL#` 16#) `or#`
+ (b1 `uncheckedShiftL#` 8#) `or#` b0)
+ b3 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 3#)))
+ b2 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 2#)))
+ b1 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#)))
+ b0 = int2Word# (ord# (indexCharOffAddr# arr off'))
+ off' = off *# 4#
+#else
+ indexInt32OffAddr# arr off
+#endif
+
+
+
+
+
+#if __GLASGOW_HASKELL__ < 503
+quickIndex arr i = arr ! i
+#else
+-- GHC >= 503, unsafeAt is available from Data.Array.Base.
+quickIndex = unsafeAt
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- Main lexing routines
+
+data AlexReturn a
+ = AlexEOF
+ | AlexError !AlexInput
+ | AlexSkip !AlexInput !Int
+ | AlexToken !AlexInput !Int a
+
+-- alexScan :: AlexInput -> StartCode -> AlexReturn a
+alexScan input (I# (sc))
+ = alexScanUser undefined input (I# (sc))
+
+alexScanUser user input (I# (sc))
+ = case alex_scan_tkn user input 0# input sc AlexNone of
+ (AlexNone, input') ->
+ case alexGetChar input of
+ Nothing ->
+
+
+
+ AlexEOF
+ Just _ ->
+
+
+
+ AlexError input'
+
+ (AlexLastSkip input len, _) ->
+
+
+
+ AlexSkip input len
+
+ (AlexLastAcc k input len, _) ->
+
+
+
+ AlexToken input len k
+
+
+-- Push the input through the DFA, remembering the most recent accepting
+-- state it encountered.
+
+alex_scan_tkn user orig_input len input s last_acc =
+ input `seq` -- strict in the input
+ let
+ new_acc = check_accs (alex_accept `quickIndex` (I# (s)))
+ in
+ new_acc `seq`
+ case alexGetChar input of
+ Nothing -> (new_acc, input)
+ Just (c, new_input) ->
+
+
+
+ let
+ base = alexIndexInt32OffAddr alex_base s
+ (I# (ord_c)) = ord c
+ offset = (base +# ord_c)
+ check = alexIndexInt16OffAddr alex_check offset
+
+ new_s = if (offset >=# 0#) && (check ==# ord_c)
+ then alexIndexInt16OffAddr alex_table offset
+ else alexIndexInt16OffAddr alex_deflt s
+ in
+ case new_s of
+ -1# -> (new_acc, input)
+ -- on an error, we want to keep the input *before* the
+ -- character that failed, not after.
+ _ -> alex_scan_tkn user orig_input (len +# 1#)
+ new_input new_s new_acc
+
+ where
+ check_accs [] = last_acc
+ check_accs (AlexAcc a : _) = AlexLastAcc a input (I# (len))
+ check_accs (AlexAccSkip : _) = AlexLastSkip input (I# (len))
+ check_accs (AlexAccPred a pred : rest)
+ | pred user orig_input (I# (len)) input
+ = AlexLastAcc a input (I# (len))
+ check_accs (AlexAccSkipPred pred : rest)
+ | pred user orig_input (I# (len)) input
+ = AlexLastSkip input (I# (len))
+ check_accs (_ : rest) = check_accs rest
+
+data AlexLastAcc a
+ = AlexNone
+ | AlexLastAcc a !AlexInput !Int
+ | AlexLastSkip !AlexInput !Int
+
+data AlexAcc a user
+ = AlexAcc a
+ | AlexAccSkip
+ | AlexAccPred a (AlexAccPred user)
+ | AlexAccSkipPred (AlexAccPred user)
+
+type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool
+
+-- -----------------------------------------------------------------------------
+-- Predicates on a rule
+
+alexAndPred p1 p2 user in1 len in2
+ = p1 user in1 len in2 && p2 user in1 len in2
+
+--alexPrevCharIsPred :: Char -> AlexAccPred _
+alexPrevCharIs c _ input _ _ = c == alexInputPrevChar input
+
+--alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _
+alexPrevCharIsOneOf arr _ input _ _ = arr ! alexInputPrevChar input
+
+--alexRightContext :: Int -> AlexAccPred _
+alexRightContext (I# (sc)) user _ _ input =
+ case alex_scan_tkn user input 0# input sc AlexNone of
+ (AlexNone, _) -> False
+ _ -> True
+ -- TODO: there's no need to find the longest
+ -- match when checking the right context, just
+ -- the first match will do.
+
+-- used by wrappers
+iUnbox (I# (i)) = i
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x new file mode 100644 index 000000000..d6f49bbb1 --- /dev/null +++ b/src/compiler/GF/Grammar/Lexer.x @@ -0,0 +1,272 @@ +-- -*- haskell -*- +-- This Alex file was machine-generated by the BNF converter +{ +module GF.Grammar.Lexer + ( Token(..), Posn(..) + , P, runP, lexer, getPosn, failLoc + , isReservedWord + ) where + +import GF.Infra.Ident +import GF.Data.Operations +import qualified Data.ByteString.Char8 as BS +import qualified Data.Map as Map + +} + + +$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME +$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME +$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME +$d = [0-9] -- digit +$i = [$l $d _ '] -- identifier character +$u = [\0-\255] -- universal: any character + +@rsyms = -- symbols and non-identifier-like reserved words + \; | \= | \{ | \} | \( | \) | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ + +:- +"--" [.]* ; -- Toss single line comments +"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ; + +$white+ ; +@rsyms { tok (eitherResIdent (T_Ident . identC)) } +\' ($u # \')* \' { tok (eitherResIdent (T_LString . BS.unpack)) } +(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC)) } + +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) } + +$d+ { tok (T_Integer . read . BS.unpack) } +$d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) } + +{ + +tok f p s = f s + +data Token + = T_exclmark + | T_patt + | T_int_label + | T_oparen + | T_cparen + | T_star + | T_starstar + | T_plus + | T_plusplus + | T_comma + | T_minus + | T_rarrow + | T_dot + | T_alt + | T_colon + | T_semicolon + | T_less + | T_equal + | T_big_rarrow + | T_great + | T_questmark + | T_obrack + | T_lam + | T_lamlam + | T_cbrack + | T_ocurly + | T_bar + | T_ccurly + | T_underscore + | T_at + | T_PType + | T_Str + | T_Strs + | T_Tok + | T_Type + | T_abstract + | T_case + | T_cat + | T_concrete + | T_data + | T_def + | T_flags + | T_fn + | T_fun + | T_in + | T_incomplete + | T_instance + | T_interface + | T_let + | T_lin + | T_lincat + | T_lindef + | T_of + | T_open + | T_oper + | T_param + | T_pattern + | T_pre + | T_printname + | T_resource + | T_strs + | T_table + | T_transfer + | T_variants + | T_where + | T_with + | T_String String -- string literals + | T_Integer Integer -- integer literals + | T_Double Double -- double precision float literals + | T_LString String + | T_Ident Ident + | T_EOF + +eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token +eitherResIdent tv s = + case Map.lookup s resWords of + Just t -> t + Nothing -> tv s + +isReservedWord :: BS.ByteString -> Bool +isReservedWord s = Map.member s resWords + +resWords = Map.fromList + [ b "!" T_exclmark + , b "#" T_patt + , b "$" T_int_label + , b "(" T_oparen + , b ")" T_cparen + , b "*" T_star + , b "**" T_starstar + , b "+" T_plus + , b "++" T_plusplus + , b "," T_comma + , b "-" T_minus + , b "->" T_rarrow + , b "." T_dot + , b "/" T_alt + , b ":" T_colon + , b ";" T_semicolon + , b "<" T_less + , b "=" T_equal + , b "=>" T_big_rarrow + , b ">" T_great + , b "?" T_questmark + , b "[" T_obrack + , b "]" T_cbrack + , b "\\" T_lam + , b "\\\\" T_lamlam + , b "{" T_ocurly + , b "}" T_ccurly + , b "|" T_bar + , b "_" T_underscore + , b "@" T_at + , b "PType" T_PType + , b "Str" T_Str + , b "Strs" T_Strs + , b "Tok" T_Tok + , b "Type" T_Type + , b "abstract" T_abstract + , b "case" T_case + , b "cat" T_cat + , b "concrete" T_concrete + , b "data" T_data + , b "def" T_def + , b "flags" T_flags + , b "fn" T_fn + , b "fun" T_fun + , b "in" T_in + , b "incomplete" T_incomplete + , b "instance" T_instance + , b "interface" T_interface + , b "let" T_let + , b "lin" T_lin + , b "lincat" T_lincat + , b "lindef" T_lindef + , b "of" T_of + , b "open" T_open + , b "oper" T_oper + , b "param" T_param + , b "pattern" T_pattern + , b "pre" T_pre + , b "printname" T_printname + , b "resource" T_resource + , b "strs" T_strs + , b "table" T_table + , b "transfer" T_transfer + , b "variants" T_variants + , b "where" T_where + , b "with" T_with + ] + where b s t = (BS.pack s, t) + +unescapeInitTail :: String -> String +unescapeInitTail = unesc . tail where + unesc s = case s of + '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs + '\\':'n':cs -> '\n' : unesc cs + '\\':'t':cs -> '\t' : unesc cs + '"':[] -> [] + c:cs -> c : unesc cs + _ -> [] + +------------------------------------------------------------------- +-- Alex wrapper code. +-- A modified "posn" wrapper. +------------------------------------------------------------------- + +data Posn = Pn {-# UNPACK #-} !Int + {-# UNPACK #-} !Int + +alexMove :: Posn -> Char -> Posn +alexMove (Pn l c) '\n' = Pn (l+1) 1 +alexMove (Pn l c) _ = Pn l (c+1) + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (AI p _ s) = + case BS.uncons s of + Nothing -> Nothing + Just (c,s) -> + let p' = alexMove p c + in p' `seq` Just (c, (AI p' c s)) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (AI p c s) = c + +data AlexInput = AI {-# UNPACK #-} !Posn -- current position, + {-# UNPACK #-} !Char -- previous char + {-# UNPACK #-} !BS.ByteString -- current input string + +data ParseResult a + = POk a + | PFailed Posn -- The position of the error + String -- The error message + +newtype P a = P { unP :: AlexInput -> ParseResult a } + +instance Monad P where + return a = a `seq` (P $ \s -> POk a) + (P m) >>= k = P $ \ s -> case m s of + POk a -> unP (k a) s + PFailed posn err -> PFailed posn err + fail msg = P $ \(AI posn _ _) -> PFailed posn msg + +runP :: P a -> BS.ByteString -> Either (Posn,String) a +runP (P f) txt = + case f (AI (Pn 1 0) ' ' txt) of + POk x -> Right x + PFailed pos msg -> Left (pos,msg) + +failLoc :: Posn -> String -> P a +failLoc pos msg = P $ \_ -> PFailed pos msg + +lexer :: (Token -> P a) -> P a +lexer cont = P go + where + go inp@(AI pos _ str) = + case alexScan inp 0 of + AlexEOF -> unP (cont T_EOF) inp + AlexError (AI pos _ _) -> PFailed pos "lexical error" + AlexSkip inp' len -> go inp' + AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp' + +getPosn :: P Posn +getPosn = P $ \inp@(AI pos _ _) -> POk pos + +} diff --git a/src/compiler/GF/Grammar/Lockfield.hs b/src/compiler/GF/Grammar/Lockfield.hs new file mode 100644 index 000000000..3e78a48b6 --- /dev/null +++ b/src/compiler/GF/Grammar/Lockfield.hs @@ -0,0 +1,52 @@ +---------------------------------------------------------------------- +-- | +-- Module : Lockfield +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 23:24:34 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.7 $ +-- +-- Creating and using lock fields in reused resource grammars. +-- +-- AR 8\/2\/2005 detached from 'compile/MkResource' +----------------------------------------------------------------------------- + +module GF.Grammar.Lockfield (lockRecType, unlockRecord, lockLabel, isLockLabel) where + +import qualified Data.ByteString.Char8 as BS + +import GF.Infra.Ident +import GF.Grammar.Grammar +import GF.Grammar.Macros + +import GF.Data.Operations + +lockRecType :: Ident -> Type -> Err Type +lockRecType c t@(RecType rs) = + let lab = lockLabel c in + return $ if elem lab (map fst rs) || elem (showIdent c) ["String","Int"] + then t --- don't add an extra copy of lock field, nor predef cats + else RecType (rs ++ [(lockLabel c, RecType [])]) +lockRecType c t = plusRecType t $ RecType [(lockLabel c, RecType [])] + +unlockRecord :: Ident -> Term -> Err Term +unlockRecord c ft = do + let (xs,t) = termFormCnc ft + let lock = R [(lockLabel c, (Just (RecType []),R []))] + case plusRecord t lock of + Ok t' -> return $ mkAbs xs t' + _ -> return $ mkAbs xs (ExtR t lock) + +lockLabel :: Ident -> Label +lockLabel c = LIdent $! BS.append lockPrefix (ident2bs c) + +isLockLabel :: Label -> Bool +isLockLabel l = case l of + LIdent c -> BS.isPrefixOf lockPrefix c + _ -> False + + +lockPrefix = BS.pack "lock_" diff --git a/src/compiler/GF/Grammar/Lookup.hs b/src/compiler/GF/Grammar/Lookup.hs new file mode 100644 index 000000000..074f0c5ec --- /dev/null +++ b/src/compiler/GF/Grammar/Lookup.hs @@ -0,0 +1,188 @@ +{-# LANGUAGE PatternGuards #-} +---------------------------------------------------------------------- +-- | +-- Module : Lookup +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/27 13:21:53 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.15 $ +-- +-- Lookup in source (concrete and resource) when compiling. +-- +-- lookup in resource and concrete in compiling; for abstract, use 'Look' +----------------------------------------------------------------------------- + +module GF.Grammar.Lookup ( + lookupIdent, + lookupIdentInfo, + lookupOrigInfo, + allOrigInfos, + lookupResDef, + lookupResType, + lookupOverload, + lookupParamValues, + allParamValues, + lookupAbsDef, + lookupLincat, + lookupFunType, + lookupCatContext + ) where + +import GF.Data.Operations +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Grammar.Macros +import GF.Grammar.Grammar +import GF.Grammar.Printer +import GF.Grammar.Predef +import GF.Grammar.Lockfield + +import Data.List (nub,sortBy) +import Control.Monad +import Text.PrettyPrint + +-- whether lock fields are added in reuse +lock c = lockRecType c -- return +unlock c = unlockRecord c -- return + +-- to look up a constant etc in a search tree --- why here? AR 29/5/2008 +lookupIdent :: Ident -> BinTree Ident b -> Err b +lookupIdent c t = + case lookupTree showIdent c t of + Ok v -> return v + Bad _ -> Bad ("unknown identifier" +++ showIdent c) + +lookupIdentInfo :: ModInfo Ident a -> Ident -> Err a +lookupIdentInfo mo i = lookupIdent i (jments mo) + +lookupResDef :: SourceGrammar -> Ident -> Ident -> Err Term +lookupResDef gr m c + | isPredefCat c = lock c defLinType + | otherwise = look m c + where + look m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + ResOper _ (Just t) -> return t + ResOper _ Nothing -> return (Q m c) + CncCat (Just ty) _ _ -> lock c ty + CncCat _ _ _ -> lock c defLinType + + CncFun (Just (cat,_,_)) (Just tr) _ -> unlock cat tr + CncFun _ (Just tr) _ -> return tr + + AnyInd _ n -> look n c + ResParam _ _ -> return (QC m c) + ResValue _ -> return (QC m c) + _ -> Bad $ render (ppIdent c <+> text "is not defined in resource" <+> ppIdent m) + +lookupResType :: SourceGrammar -> Ident -> Ident -> Err Type +lookupResType gr m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + ResOper (Just t) _ -> return t + + -- used in reused concrete + CncCat _ _ _ -> return typeType + CncFun (Just (cat,cont,val)) _ _ -> do + val' <- lock cat val + return $ mkProd cont val' [] + AnyInd _ n -> lookupResType gr n c + ResParam _ _ -> return typePType + ResValue t -> return t + _ -> Bad $ render (ppIdent c <+> text "has no type defined in resource" <+> ppIdent m) + +lookupOverload :: SourceGrammar -> Ident -> Ident -> Err [([Type],(Type,Term))] +lookupOverload gr m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + ResOverload os tysts -> do + tss <- mapM (\x -> lookupOverload gr x c) os + return $ [let (args,val) = typeFormCnc ty in (map (\(b,x,t) -> t) args,(val,tr)) | + (ty,tr) <- tysts] ++ + concat tss + + AnyInd _ n -> lookupOverload gr n c + _ -> Bad $ render (ppIdent c <+> text "is not an overloaded operation") + +-- | returns the original 'Info' and the module where it was found +lookupOrigInfo :: SourceGrammar -> Ident -> Ident -> Err (Ident,Info) +lookupOrigInfo gr m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + AnyInd _ n -> lookupOrigInfo gr n c + i -> return (m,i) + +allOrigInfos :: SourceGrammar -> Ident -> [(Ident,Info)] +allOrigInfos gr m = errVal [] $ do + mo <- lookupModule gr m + return [(c,i) | (c,_) <- tree2list (jments mo), Ok (_,i) <- [look c]] + where + look = lookupOrigInfo gr m + +lookupParamValues :: SourceGrammar -> Ident -> Ident -> Err [Term] +lookupParamValues gr m c = do + (_,info) <- lookupOrigInfo gr m c + case info of + ResParam _ (Just pvs) -> return pvs + _ -> Bad $ render (ppIdent c <+> text "has no parameter values defined in resource" <+> ppIdent m) + +allParamValues :: SourceGrammar -> Type -> Err [Term] +allParamValues cnc ptyp = case ptyp of + _ | Just n <- isTypeInts ptyp -> return [EInt i | i <- [0..n]] + QC p c -> lookupParamValues cnc p c + Q p c -> lookupResDef cnc p c >>= allParamValues cnc + RecType r -> do + let (ls,tys) = unzip $ sortByFst r + tss <- mapM (allParamValues cnc) tys + return [R (zipAssign ls ts) | ts <- combinations tss] + _ -> Bad (render (text "cannot find parameter values for" <+> ppTerm Unqualified 0 ptyp)) + where + -- to normalize records and record types + sortByFst = sortBy (\ x y -> compare (fst x) (fst y)) + +lookupAbsDef :: SourceGrammar -> Ident -> Ident -> Err (Maybe Int,Maybe [Equation]) +lookupAbsDef gr m c = errIn (render (text "looking up absdef of" <+> ppIdent c)) $ do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + AbsFun _ a d -> return (a,d) + AnyInd _ n -> lookupAbsDef gr n c + _ -> return (Nothing,Nothing) + +lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type +lookupLincat gr m c | isPredefCat c = return defLinType --- ad hoc; not needed? +lookupLincat gr m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + CncCat (Just t) _ _ -> return t + AnyInd _ n -> lookupLincat gr n c + _ -> Bad (render (ppIdent c <+> text "has no linearization type in" <+> ppIdent m)) + +-- | this is needed at compile time +lookupFunType :: SourceGrammar -> Ident -> Ident -> Err Type +lookupFunType gr m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + AbsFun (Just t) _ _ -> return t + AnyInd _ n -> lookupFunType gr n c + _ -> Bad (render (text "cannot find type of" <+> ppIdent c)) + +-- | this is needed at compile time +lookupCatContext :: SourceGrammar -> Ident -> Ident -> Err Context +lookupCatContext gr m c = do + mo <- lookupModule gr m + info <- lookupIdentInfo mo c + case info of + AbsCat (Just co) _ -> return co + AnyInd _ n -> lookupCatContext gr n c + _ -> Bad (render (text "unknown category" <+> ppIdent c)) diff --git a/src/compiler/GF/Grammar/MMacros.hs b/src/compiler/GF/Grammar/MMacros.hs new file mode 100644 index 000000000..a7f746b66 --- /dev/null +++ b/src/compiler/GF/Grammar/MMacros.hs @@ -0,0 +1,279 @@ +---------------------------------------------------------------------- +-- | +-- Module : MMacros +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/10 12:49:13 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.9 $ +-- +-- some more abstractions on grammars, esp. for Edit +----------------------------------------------------------------------------- + +module GF.Grammar.MMacros where + +import GF.Data.Operations +--import GF.Data.Zipper + +import GF.Grammar.Grammar +import GF.Grammar.Printer +import GF.Infra.Ident +import GF.Compile.Refresh +import GF.Grammar.Values +----import GrammarST +import GF.Grammar.Macros + +import Control.Monad +import qualified Data.ByteString.Char8 as BS +import Text.PrettyPrint + +{- +nodeTree :: Tree -> TrNode +argsTree :: Tree -> [Tree] + +nodeTree (Tr (n,_)) = n +argsTree (Tr (_,ts)) = ts + +isFocusNode :: TrNode -> Bool +bindsNode :: TrNode -> Binds +atomNode :: TrNode -> Atom +valNode :: TrNode -> Val +constrsNode :: TrNode -> Constraints +metaSubstsNode :: TrNode -> MetaSubst + +isFocusNode (N (_,_,_,_,b)) = b +bindsNode (N (b,_,_,_,_)) = b +atomNode (N (_,a,_,_,_)) = a +valNode (N (_,_,v,_,_)) = v +constrsNode (N (_,_,_,(c,_),_)) = c +metaSubstsNode (N (_,_,_,(_,m),_)) = m + +atomTree :: Tree -> Atom +valTree :: Tree -> Val + +atomTree = atomNode . nodeTree +valTree = valNode . nodeTree + +mkNode :: Binds -> Atom -> Val -> (Constraints, MetaSubst) -> TrNode +mkNode binds atom vtyp cs = N (binds,atom,vtyp,cs,False) + +metasTree :: Tree -> [MetaId] +metasTree = concatMap metasNode . scanTree where + metasNode n = [m | AtM m <- [atomNode n]] ++ map fst (metaSubstsNode n) + +varsTree :: Tree -> [(Var,Val)] +varsTree t = [(x,v) | N (_,AtV x,v,_,_) <- scanTree t] + +constrsTree :: Tree -> Constraints +constrsTree = constrsNode . nodeTree + +allConstrsTree :: Tree -> Constraints +allConstrsTree = concatMap constrsNode . scanTree + +changeConstrs :: (Constraints -> Constraints) -> TrNode -> TrNode +changeConstrs f (N (b,a,v,(c,m),x)) = N (b,a,v,(f c, m),x) + +changeMetaSubst :: (MetaSubst -> MetaSubst) -> TrNode -> TrNode +changeMetaSubst f (N (b,a,v,(c,m),x)) = N (b,a,v,(c, f m),x) + +changeAtom :: (Atom -> Atom) -> TrNode -> TrNode +changeAtom f (N (b,a,v,(c,m),x)) = N (b,f a,v,(c, m),x) + +-- * on the way to Edit + +uTree :: Tree +uTree = Tr (uNode, []) -- unknown tree + +uNode :: TrNode +uNode = mkNode [] uAtom uVal ([],[]) + + +uAtom :: Atom +uAtom = AtM meta0 + +mAtom :: Atom +mAtom = AtM meta0 +-} + +type Var = Ident + +uVal :: Val +uVal = vClos uExp + +vClos :: Exp -> Val +vClos = VClos [] + +uExp :: Exp +uExp = Meta meta0 + +mExp, mExp0 :: Exp +mExp = Meta meta0 +mExp0 = mExp + +meta2exp :: MetaId -> Exp +meta2exp = Meta +{- +atomC :: Fun -> Atom +atomC = AtC + +funAtom :: Atom -> Err Fun +funAtom a = case a of + AtC f -> return f + _ -> prtBad "not function head" a + +atomIsMeta :: Atom -> Bool +atomIsMeta atom = case atom of + AtM _ -> True + _ -> False + +getMetaAtom :: Atom -> Err MetaId +getMetaAtom a = case a of + AtM m -> return m + _ -> Bad "the active node is not meta" +-} +cat2val :: Context -> Cat -> Val +cat2val cont cat = vClos $ mkApp (uncurry Q cat) [Meta i | i <- [1..length cont]] + +val2cat :: Val -> Err Cat +val2cat v = liftM valCat (val2exp v) + +substTerm :: [Ident] -> Substitution -> Term -> Term +substTerm ss g c = case c of + Vr x -> maybe c id $ lookup x g + App f a -> App (substTerm ss g f) (substTerm ss g a) + Abs b x t -> let y = mkFreshVarX ss x in + Abs b y (substTerm (y:ss) ((x, Vr y):g) t) + Prod b x a t -> let y = mkFreshVarX ss x in + Prod b y (substTerm ss g a) (substTerm (y:ss) ((x,Vr y):g) t) + _ -> c + +metaSubstExp :: MetaSubst -> [(MetaId,Exp)] +metaSubstExp msubst = [(m, errVal (meta2exp m) (val2expSafe v)) | (m,v) <- msubst] + +-- * belong here rather than to computation + +substitute :: [Var] -> Substitution -> Exp -> Err Exp +substitute v s = return . substTerm v s + +alphaConv :: [Var] -> (Var,Var) -> Exp -> Err Exp --- +alphaConv oldvars (x,x') = substitute (x:x':oldvars) [(x,Vr x')] + +alphaFresh :: [Var] -> Exp -> Err Exp +alphaFresh vs = refreshTermN $ maxVarIndex vs + +-- | done in a state monad +alphaFreshAll :: [Var] -> [Exp] -> Err [Exp] +alphaFreshAll vs = mapM $ alphaFresh vs + +-- | for display +val2exp :: Val -> Err Exp +val2exp = val2expP False + +-- | for type checking +val2expSafe :: Val -> Err Exp +val2expSafe = val2expP True + +val2expP :: Bool -> Val -> Err Exp +val2expP safe v = case v of + + VClos g@(_:_) e@(Meta _) -> if safe + then Bad (render (text "unsafe value substitution" <+> ppValue Unqualified 0 v)) + else substVal g e + VClos g e -> substVal g e + VApp f c -> liftM2 App (val2expP safe f) (val2expP safe c) + VCn c -> return $ uncurry Q c + VGen i x -> if safe + then Bad (render (text "unsafe val2exp" <+> ppValue Unqualified 0 v)) + else return $ Vr $ x --- in editing, no alpha conversions presentv + VRecType xs->do xs <- mapM (\(l,v) -> val2expP safe v >>= \e -> return (l,e)) xs + return (RecType xs) + VType -> return typeType + where + substVal g e = mapPairsM (val2expP safe) g >>= return . (\s -> substTerm [] s e) + +isConstVal :: Val -> Bool +isConstVal v = case v of + VApp f c -> isConstVal f && isConstVal c + VCn _ -> True + VClos [] e -> null $ freeVarsExp e + _ -> False --- could be more liberal + +mkProdVal :: Binds -> Val -> Err Val --- +mkProdVal bs v = do + bs' <- mapPairsM val2exp bs + v' <- val2exp v + return $ vClos $ foldr (uncurry (Prod Explicit)) v' bs' + +freeVarsExp :: Exp -> [Ident] +freeVarsExp e = case e of + Vr x -> [x] + App f c -> freeVarsExp f ++ freeVarsExp c + Abs _ x b -> filter (/=x) (freeVarsExp b) + Prod _ x a b -> freeVarsExp a ++ filter (/=x) (freeVarsExp b) + _ -> [] --- thus applies to abstract syntax only + +int2var :: Int -> Ident +int2var = identC . BS.pack . ('$':) . show + +meta0 :: MetaId +meta0 = 0 + +termMeta0 :: Term +termMeta0 = Meta meta0 + +identVar :: Term -> Err Ident +identVar (Vr x) = return x +identVar _ = Bad "not a variable" + + +-- | light-weight rename for user interaction; also change names of internal vars +qualifTerm :: Ident -> Term -> Term +qualifTerm m = qualif [] where + qualif xs t = case t of + Abs b x t -> let x' = chV x in Abs b x' $ qualif (x':xs) t + Prod b x a t -> Prod b x (qualif xs a) $ qualif (x:xs) t + Vr x -> let x' = chV x in if (elem x' xs) then (Vr x') else (Q m x) + Cn c -> Q m c + Con c -> QC m c + _ -> composSafeOp (qualif xs) t + chV x = string2var $ ident2bs x + +string2var :: BS.ByteString -> Ident +string2var s = case BS.unpack s of + c:'_':i -> identV (BS.singleton c) (readIntArg i) --- + _ -> identC s + +-- | reindex variables so that they tell nesting depth level +reindexTerm :: Term -> Term +reindexTerm = qualif (0,[]) where + qualif dg@(d,g) t = case t of + Abs b x t -> let x' = ind x d in Abs b x' $ qualif (d+1, (x,x'):g) t + Prod b x a t -> let x' = ind x d in Prod b x' (qualif dg a) $ qualif (d+1, (x,x'):g) t + Vr x -> Vr $ look x g + _ -> composSafeOp (qualif dg) t + look x = maybe x id . lookup x --- if x is not in scope it is unchanged + ind x d = identC $ ident2bs x `BS.append` BS.singleton '_' `BS.append` BS.pack (show d) + +{- +-- this method works for context-free abstract syntax +-- and is meant to be used in simple embedded GF applications + +exp2tree :: Exp -> Err Tree +exp2tree e = do + (bs,f,xs) <- termForm e + cont <- case bs of + [] -> return [] + _ -> prtBad "cannot convert bindings in" e + at <- case f of + Q m c -> return $ AtC (m,c) + QC m c -> return $ AtC (m,c) + Meta m -> return $ AtM m + K s -> return $ AtL s + EInt n -> return $ AtI n + EFloat n -> return $ AtF n + _ -> prtBad "cannot convert to atom" f + ts <- mapM exp2tree xs + return $ Tr (N (cont,at,uVal,([],[]),True),ts) +-} diff --git a/src/compiler/GF/Grammar/Macros.hs b/src/compiler/GF/Grammar/Macros.hs new file mode 100644 index 000000000..799cd9ec5 --- /dev/null +++ b/src/compiler/GF/Grammar/Macros.hs @@ -0,0 +1,627 @@ +---------------------------------------------------------------------- +-- | +-- Module : Macros +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/11 16:38:00 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.24 $ +-- +-- Macros for constructing and analysing source code terms. +-- +-- operations on terms and types not involving lookup in or reference to grammars +-- +-- AR 7\/12\/1999 - 9\/5\/2000 -- 4\/6\/2001 +----------------------------------------------------------------------------- + +module GF.Grammar.Macros where + +import GF.Data.Operations +import GF.Data.Str +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Grammar.Grammar +import GF.Grammar.Values +import GF.Grammar.Predef +import GF.Grammar.Printer + +import Control.Monad (liftM, liftM2) +import Data.Char (isDigit) +import Data.List (sortBy,nub) +import Text.PrettyPrint + +typeForm :: Type -> (Context, Cat, [Term]) +typeForm t = + case t of + Prod b x a t -> + let (x', cat, args) = typeForm t + in ((b,x,a):x', cat, args) + App c a -> + let (_, cat, args) = typeForm c + in ([],cat,args ++ [a]) + Q m c -> ([],(m,c),[]) + QC m c -> ([],(m,c),[]) + Sort c -> ([],(identW, c),[]) + _ -> error (render (text "no normal form of type" <+> ppTerm Unqualified 0 t)) + +typeFormCnc :: Type -> (Context, Type) +typeFormCnc t = + case t of + Prod b x a t -> let (x', v) = typeFormCnc t + in ((b,x,a):x',v) + _ -> ([],t) + +valCat :: Type -> Cat +valCat typ = + let (_,cat,_) = typeForm typ + in cat + +valType :: Type -> Type +valType typ = + let (_,cat,xx) = typeForm typ --- not optimal to do in this way + in mkApp (uncurry Q cat) xx + +valTypeCnc :: Type -> Type +valTypeCnc typ = snd (typeFormCnc typ) + +typeSkeleton :: Type -> ([(Int,Cat)],Cat) +typeSkeleton typ = + let (cont,cat,_) = typeForm typ + args = map (\(b,x,t) -> typeSkeleton t) cont + in ([(length c, v) | (c,v) <- args], cat) + +catSkeleton :: Type -> ([Cat],Cat) +catSkeleton typ = + let (args,val) = typeSkeleton typ + in (map snd args, val) + +funsToAndFrom :: Type -> (Cat, [(Cat,[Int])]) +funsToAndFrom t = + let (cs,v) = catSkeleton t + cis = zip cs [0..] + in (v, [(c,[i | (c',i) <- cis, c' == c]) | c <- cs]) + +isRecursiveType :: Type -> Bool +isRecursiveType t = + let (cc,c) = catSkeleton t -- thus recursivity on Cat level + in any (== c) cc + +isHigherOrderType :: Type -> Bool +isHigherOrderType t = errVal True $ do -- pessimistic choice + co <- contextOfType t + return $ not $ null [x | (_,x,Prod _ _ _ _) <- co] + +contextOfType :: Type -> Err Context +contextOfType typ = case typ of + Prod b x a t -> liftM ((b,x,a):) $ contextOfType t + _ -> return [] + +termForm :: Term -> Err ([(BindType,Ident)], Term, [Term]) +termForm t = case t of + Abs b x t -> + do (x', fun, args) <- termForm t + return ((b,x):x', fun, args) + App c a -> + do (_,fun, args) <- termForm c + return ([],fun,args ++ [a]) + _ -> + return ([],t,[]) + +termFormCnc :: Term -> ([(BindType,Ident)], Term) +termFormCnc t = case t of + Abs b x t -> ((b,x):xs, t') where (xs,t') = termFormCnc t + _ -> ([],t) + +appForm :: Term -> (Term, [Term]) +appForm t = case t of + App c a -> (fun, args ++ [a]) where (fun, args) = appForm c + _ -> (t,[]) + +mkProdSimple :: Context -> Term -> Term +mkProdSimple c t = mkProd c t [] + +mkProd :: Context -> Term -> [Term] -> Term +mkProd [] typ args = mkApp typ args +mkProd ((b,x,a):dd) typ args = Prod b x a (mkProd dd typ args) + +mkTerm :: ([(BindType,Ident)], Term, [Term]) -> Term +mkTerm (xx,t,aa) = mkAbs xx (mkApp t aa) + +mkApp :: Term -> [Term] -> Term +mkApp = foldl App + +mkAbs :: [(BindType,Ident)] -> Term -> Term +mkAbs xx t = foldr (uncurry Abs) t xx + +appCons :: Ident -> [Term] -> Term +appCons = mkApp . Cn + +mkLet :: [LocalDef] -> Term -> Term +mkLet defs t = foldr Let t defs + +mkLetUntyped :: Context -> Term -> Term +mkLetUntyped defs = mkLet [(x,(Nothing,t)) | (_,x,t) <- defs] + +isVariable :: Term -> Bool +isVariable (Vr _ ) = True +isVariable _ = False + +eqIdent :: Ident -> Ident -> Bool +eqIdent = (==) + +uType :: Type +uType = Cn cUndefinedType + +assign :: Label -> Term -> Assign +assign l t = (l,(Nothing,t)) + +assignT :: Label -> Type -> Term -> Assign +assignT l a t = (l,(Just a,t)) + +unzipR :: [Assign] -> ([Label],[Term]) +unzipR r = (ls, map snd ts) where (ls,ts) = unzip r + +mkAssign :: [(Label,Term)] -> [Assign] +mkAssign lts = [assign l t | (l,t) <- lts] + +zipAssign :: [Label] -> [Term] -> [Assign] +zipAssign ls ts = [assign l t | (l,t) <- zip ls ts] + +mapAssignM :: Monad m => (Term -> m c) -> [Assign] -> m [(Label,(Maybe c,c))] +mapAssignM f = mapM (\ (ls,tv) -> liftM ((,) ls) (g tv)) + where g (t,v) = liftM2 (,) (maybe (return Nothing) (liftM Just . f) t) (f v) + +mkRecordN :: Int -> (Int -> Label) -> [Term] -> Term +mkRecordN int lab typs = R [ assign (lab i) t | (i,t) <- zip [int..] typs] + +mkRecord :: (Int -> Label) -> [Term] -> Term +mkRecord = mkRecordN 0 + +mkRecTypeN :: Int -> (Int -> Label) -> [Type] -> Type +mkRecTypeN int lab typs = RecType [ (lab i, t) | (i,t) <- zip [int..] typs] + +mkRecType :: (Int -> Label) -> [Type] -> Type +mkRecType = mkRecTypeN 0 + +record2subst :: Term -> Err Substitution +record2subst t = case t of + R fs -> return [(identC x, t) | (LIdent x,(_,t)) <- fs] + _ -> Bad (render (text "record expected, found" <+> ppTerm Unqualified 0 t)) + +typeType, typePType, typeStr, typeTok, typeStrs :: Term + +typeType = Sort cType +typePType = Sort cPType +typeStr = Sort cStr +typeTok = Sort cTok +typeStrs = Sort cStrs + +typeString, typeFloat, typeInt :: Term +typeInts :: Integer -> Term +typePBool :: Term +typeError :: Term + +typeString = cnPredef cString +typeInt = cnPredef cInt +typeFloat = cnPredef cFloat +typeInts i = App (cnPredef cInts) (EInt i) +typePBool = cnPredef cPBool +typeError = cnPredef cErrorType + +isTypeInts :: Term -> Maybe Integer +isTypeInts (App c (EInt i)) | c == cnPredef cInts = Just i +isTypeInts _ = Nothing + +isPredefConstant :: Term -> Bool +isPredefConstant t = case t of + Q mod _ | mod == cPredef || mod == cPredefAbs -> True + _ -> False + +cnPredef :: Ident -> Term +cnPredef f = Q cPredef f + +mkSelects :: Term -> [Term] -> Term +mkSelects t tt = foldl S t tt + +mkTable :: [Term] -> Term -> Term +mkTable tt t = foldr Table t tt + +mkCTable :: [(BindType,Ident)] -> Term -> Term +mkCTable ids v = foldr ccase v ids where + ccase (_,x) t = T TRaw [(PV x,t)] + +mkHypo :: Term -> Hypo +mkHypo typ = (Explicit,identW, typ) + +eqStrIdent :: Ident -> Ident -> Bool +eqStrIdent = (==) + +tuple2record :: [Term] -> [Assign] +tuple2record ts = [assign (tupleLabel i) t | (i,t) <- zip [1..] ts] + +tuple2recordType :: [Term] -> [Labelling] +tuple2recordType ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts] + +tuple2recordPatt :: [Patt] -> [(Label,Patt)] +tuple2recordPatt ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts] + +mkCases :: Ident -> Term -> Term +mkCases x t = T TRaw [(PV x, t)] + +mkWildCases :: Term -> Term +mkWildCases = mkCases identW + +mkFunType :: [Type] -> Type -> Type +mkFunType tt t = mkProd [(Explicit,identW, ty) | ty <- tt] t [] -- nondep prod + +plusRecType :: Type -> Type -> Err Type +plusRecType t1 t2 = case (t1, t2) of + (RecType r1, RecType r2) -> case + filter (`elem` (map fst r1)) (map fst r2) of + [] -> return (RecType (r1 ++ r2)) + ls -> Bad $ render (text "clashing labels" <+> hsep (map ppLabel ls)) + _ -> Bad $ render (text "cannot add record types" <+> ppTerm Unqualified 0 t1 <+> text "and" <+> ppTerm Unqualified 0 t2) + +plusRecord :: Term -> Term -> Err Term +plusRecord t1 t2 = + case (t1,t2) of + (R r1, R r2 ) -> return (R ([(l,v) | -- overshadowing of old fields + (l,v) <- r1, not (elem l (map fst r2)) ] ++ r2)) + (_, FV rs) -> mapM (plusRecord t1) rs >>= return . FV + (FV rs,_ ) -> mapM (`plusRecord` t2) rs >>= return . FV + _ -> Bad $ render (text "cannot add records" <+> ppTerm Unqualified 0 t1 <+> text "and" <+> ppTerm Unqualified 0 t2) + +-- | default linearization type +defLinType :: Type +defLinType = RecType [(theLinLabel, typeStr)] + +-- | refreshing variables +mkFreshVar :: [Ident] -> Ident +mkFreshVar olds = varX (maxVarIndex olds + 1) + +-- | trying to preserve a given symbol +mkFreshVarX :: [Ident] -> Ident -> Ident +mkFreshVarX olds x = if (elem x olds) then (varX (maxVarIndex olds + 1)) else x + +maxVarIndex :: [Ident] -> Int +maxVarIndex = maximum . ((-1):) . map varIndex + +mkFreshVars :: Int -> [Ident] -> [Ident] +mkFreshVars n olds = [varX (maxVarIndex olds + i) | i <- [1..n]] + +-- | quick hack for refining with var in editor +freshAsTerm :: String -> Term +freshAsTerm s = Vr (varX (readIntArg s)) + +-- | create a terminal for concrete syntax +string2term :: String -> Term +string2term = K + +int2term :: Integer -> Term +int2term = EInt + +float2term :: Double -> Term +float2term = EFloat + +-- | create a terminal from identifier +ident2terminal :: Ident -> Term +ident2terminal = K . showIdent + +symbolOfIdent :: Ident -> String +symbolOfIdent = showIdent + +symid :: Ident -> String +symid = symbolOfIdent + +justIdentOf :: Term -> Maybe Ident +justIdentOf (Vr x) = Just x +justIdentOf (Cn x) = Just x +justIdentOf _ = Nothing + +linTypeStr :: Type +linTypeStr = mkRecType linLabel [typeStr] -- default lintype {s :: Str} + +linAsStr :: String -> Term +linAsStr s = mkRecord linLabel [K s] -- default linearization {s = s} + +term2patt :: Term -> Err Patt +term2patt trm = case termForm trm of + Ok ([], Vr x, []) | x == identW -> return PW + | otherwise -> return (PV x) + Ok ([], Con c, aa) -> do + aa' <- mapM term2patt aa + return (PC c aa') + Ok ([], QC p c, aa) -> do + aa' <- mapM term2patt aa + return (PP p c aa') + + Ok ([], Q p c, []) -> do + return (PM p c) + + Ok ([], R r, []) -> do + let (ll,aa) = unzipR r + aa' <- mapM term2patt aa + return (PR (zip ll aa')) + Ok ([],EInt i,[]) -> return $ PInt i + Ok ([],EFloat i,[]) -> return $ PFloat i + Ok ([],K s, []) -> return $ PString s + +--- encodings due to excessive use of term-patt convs. AR 7/1/2005 + Ok ([], Cn id, [Vr a,b]) | id == cAs -> do + b' <- term2patt b + return (PAs a b') + Ok ([], Cn id, [a]) | id == cNeg -> do + a' <- term2patt a + return (PNeg a') + Ok ([], Cn id, [a]) | id == cRep -> do + a' <- term2patt a + return (PRep a') + Ok ([], Cn id, []) | id == cRep -> do + return PChar + Ok ([], Cn id,[K s]) | id == cChars -> do + return $ PChars s + Ok ([], Cn id, [a,b]) | id == cSeq -> do + a' <- term2patt a + b' <- term2patt b + return (PSeq a' b') + Ok ([], Cn id, [a,b]) | id == cAlt -> do + a' <- term2patt a + b' <- term2patt b + return (PAlt a' b') + + Ok ([], Cn c, []) -> do + return (PMacro c) + + _ -> Bad $ render (text "no pattern corresponds to term" <+> ppTerm Unqualified 0 trm) + +patt2term :: Patt -> Term +patt2term pt = case pt of + PV x -> Vr x + PW -> Vr identW --- not parsable, should not occur + PMacro c -> Cn c + PM p c -> Q p c + + PC c pp -> mkApp (Con c) (map patt2term pp) + PP p c pp -> mkApp (QC p c) (map patt2term pp) + + PR r -> R [assign l (patt2term p) | (l,p) <- r] + PT _ p -> patt2term p + PInt i -> EInt i + PFloat i -> EFloat i + PString s -> K s + + PAs x p -> appCons cAs [Vr x, patt2term p] --- an encoding + PChar -> appCons cChar [] --- an encoding + PChars s -> appCons cChars [K s] --- an encoding + PSeq a b -> appCons cSeq [(patt2term a), (patt2term b)] --- an encoding + PAlt a b -> appCons cAlt [(patt2term a), (patt2term b)] --- an encoding + PRep a -> appCons cRep [(patt2term a)] --- an encoding + PNeg a -> appCons cNeg [(patt2term a)] --- an encoding + + +redirectTerm :: Ident -> Term -> Term +redirectTerm n t = case t of + QC _ f -> QC n f + Q _ f -> Q n f + _ -> composSafeOp (redirectTerm n) t + +-- | to gather ultimate cases in a table; preserves pattern list +allCaseValues :: Term -> [([Patt],Term)] +allCaseValues trm = case trm of + T _ cs -> [(p:ps, t) | (p,t0) <- cs, (ps,t) <- allCaseValues t0] + _ -> [([],trm)] + +-- | to get a string from a term that represents a sequence of terminals +strsFromTerm :: Term -> Err [Str] +strsFromTerm t = case t of + K s -> return [str s] + Empty -> return [str []] + C s t -> do + s' <- strsFromTerm s + t' <- strsFromTerm t + return [plusStr x y | x <- s', y <- t'] + Glue s t -> do + s' <- strsFromTerm s + t' <- strsFromTerm t + return [glueStr x y | x <- s', y <- t'] + Alts (d,vs) -> do + d0 <- strsFromTerm d + v0 <- mapM (strsFromTerm . fst) vs + c0 <- mapM (strsFromTerm . snd) vs + let vs' = zip v0 c0 + return [strTok (str2strings def) vars | + def <- d0, + vars <- [[(str2strings v, map sstr c) | (v,c) <- zip vv c0] | + vv <- combinations v0] + ] + FV ts -> mapM strsFromTerm ts >>= return . concat + Strs ts -> mapM strsFromTerm ts >>= return . concat + _ -> Bad (render (text "cannot get Str from term" <+> ppTerm Unqualified 0 t)) + +-- | to print an Str-denoting term as a string; if the term is of wrong type, the error msg +stringFromTerm :: Term -> String +stringFromTerm = err id (ifNull "" (sstr . head)) . strsFromTerm + + +-- | to define compositional term functions +composSafeOp :: (Term -> Term) -> Term -> Term +composSafeOp op trm = case composOp (mkMonadic op) trm of + Ok t -> t + _ -> error "the operation is safe isn't it ?" + where + mkMonadic f = return . f + +-- | to define compositional term functions +composOp :: Monad m => (Term -> m Term) -> Term -> m Term +composOp co trm = + case trm of + App c a -> + do c' <- co c + a' <- co a + return (App c' a') + Abs b x t -> + do t' <- co t + return (Abs b x t') + Prod b x a t -> + do a' <- co a + t' <- co t + return (Prod b x a' t') + S c a -> + do c' <- co c + a' <- co a + return (S c' a') + Table a c -> + do a' <- co a + c' <- co c + return (Table a' c') + R r -> + do r' <- mapAssignM co r + return (R r') + RecType r -> + do r' <- mapPairListM (co . snd) r + return (RecType r') + P t i -> + do t' <- co t + return (P t' i) + ExtR a c -> + do a' <- co a + c' <- co c + return (ExtR a' c') + + T i cc -> + do cc' <- mapPairListM (co . snd) cc + i' <- changeTableType co i + return (T i' cc') + + V ty vs -> + do ty' <- co ty + vs' <- mapM co vs + return (V ty' vs') + + Let (x,(mt,a)) b -> + do a' <- co a + mt' <- case mt of + Just t -> co t >>= (return . Just) + _ -> return mt + b' <- co b + return (Let (x,(mt',a')) b') + + C s1 s2 -> + do v1 <- co s1 + v2 <- co s2 + return (C v1 v2) + Glue s1 s2 -> + do v1 <- co s1 + v2 <- co s2 + return (Glue v1 v2) + Alts (t,aa) -> + do t' <- co t + aa' <- mapM (pairM co) aa + return (Alts (t',aa')) + FV ts -> mapM co ts >>= return . FV + Strs tt -> mapM co tt >>= return . Strs + + EPattType ty -> + do ty' <- co ty + return (EPattType ty') + + ELincat c ty -> + do ty' <- co ty + return (ELincat c ty') + + ELin c ty -> + do ty' <- co ty + return (ELin c ty') + + _ -> return trm -- covers K, Vr, Cn, Sort, EPatt + +getTableType :: TInfo -> Err Type +getTableType i = case i of + TTyped ty -> return ty + TComp ty -> return ty + TWild ty -> return ty + _ -> Bad "the table is untyped" + +changeTableType :: Monad m => (Type -> m Type) -> TInfo -> m TInfo +changeTableType co i = case i of + TTyped ty -> co ty >>= return . TTyped + TComp ty -> co ty >>= return . TComp + TWild ty -> co ty >>= return . TWild + _ -> return i + +collectOp :: (Term -> [a]) -> Term -> [a] +collectOp co trm = case trm of + App c a -> co c ++ co a + Abs _ _ b -> co b + Prod _ _ a b -> co a ++ co b + S c a -> co c ++ co a + Table a c -> co a ++ co c + ExtR a c -> co a ++ co c + R r -> concatMap (\ (_,(mt,a)) -> maybe [] co mt ++ co a) r + RecType r -> concatMap (co . snd) r + P t i -> co t + T _ cc -> concatMap (co . snd) cc -- not from patterns --- nor from type annot + V _ cc -> concatMap co cc --- nor from type annot + Let (x,(mt,a)) b -> maybe [] co mt ++ co a ++ co b + C s1 s2 -> co s1 ++ co s2 + Glue s1 s2 -> co s1 ++ co s2 + Alts (t,aa) -> let (x,y) = unzip aa in co t ++ concatMap co (x ++ y) + FV ts -> concatMap co ts + Strs tt -> concatMap co tt + _ -> [] -- covers K, Vr, Cn, Sort + +-- | to find the word items in a term +wordsInTerm :: Term -> [String] +wordsInTerm trm = filter (not . null) $ case trm of + K s -> [s] + S c _ -> wo c + Alts (t,aa) -> wo t ++ concatMap (wo . fst) aa + _ -> collectOp wo trm + where wo = wordsInTerm + +noExist :: Term +noExist = FV [] + +defaultLinType :: Type +defaultLinType = mkRecType linLabel [typeStr] + +-- normalize records and record types; put s first + +sortRec :: [(Label,a)] -> [(Label,a)] +sortRec = sortBy ordLabel where + ordLabel (r1,_) (r2,_) = + case (showIdent (label2ident r1), showIdent (label2ident r2)) of + ("s",_) -> LT + (_,"s") -> GT + (s1,s2) -> compare s1 s2 + +-- | dependency check, detecting circularities and returning topo-sorted list + +allDependencies :: (Ident -> Bool) -> BinTree Ident Info -> [(Ident,[Ident])] +allDependencies ism b = + [(f, nub (concatMap opty (pts i))) | (f,i) <- tree2list b] + where + opersIn t = case t of + Q n c | ism n -> [c] + QC n c | ism n -> [c] + _ -> collectOp opersIn t + opty (Just ty) = opersIn ty + opty _ = [] + pts i = case i of + ResOper pty pt -> [pty,pt] + ResParam (Just ps) _ -> [Just t | (_,cont) <- ps, (_,_,t) <- cont] + CncCat pty _ _ -> [pty] + CncFun _ pt _ -> [pt] ---- (Maybe (Ident,(Context,Type)) + AbsFun pty _ ptr -> [pty] --- ptr is def, which can be mutual + AbsCat (Just co) _ -> [Just ty | (_,_,ty) <- co] + _ -> [] + +topoSortJments :: SourceModule -> Err [(Ident,Info)] +topoSortJments (m,mi) = do + is <- either + return + (\cyc -> Bad (render (text "circular definitions:" <+> fsep (map ppIdent (head cyc))))) + (topoTest (allDependencies (==m) (jments mi))) + return (reverse [(i,info) | i <- is, Ok info <- [lookupTree showIdent i (jments mi)]]) diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y new file mode 100644 index 000000000..320053674 --- /dev/null +++ b/src/compiler/GF/Grammar/Parser.y @@ -0,0 +1,739 @@ +{ +{-# OPTIONS -fno-warn-overlapping-patterns #-} +module GF.Grammar.Parser + ( P, runP + , pModDef + , pModHeader + , pExp + ) where + +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Infra.Option +import GF.Data.Operations +import GF.Grammar.Predef +import GF.Grammar.Grammar +import GF.Grammar.Macros +import GF.Grammar.Lexer +import qualified Data.ByteString.Char8 as BS +import GF.Compile.Update (buildAnyTree) +} + +%name pModDef ModDef +%partial pModHeader ModHeader +%name pExp Exp + +-- no lexer declaration +%monad { P } { >>= } { return } +%lexer { lexer } { T_EOF } +%tokentype { Token } + + +%token + '!' { T_exclmark } + '#' { T_patt } + '$' { T_int_label } + '(' { T_oparen } + ')' { T_cparen } + '*' { T_star } + '**' { T_starstar } + '+' { T_plus } + '++' { T_plusplus } + ',' { T_comma } + '-' { T_minus } + '->' { T_rarrow } + '.' { T_dot } + '/' { T_alt } + ':' { T_colon } + ';' { T_semicolon } + '<' { T_less } + '=' { T_equal } + '=>' { T_big_rarrow} + '>' { T_great } + '?' { T_questmark } + '@' { T_at } + '[' { T_obrack } + ']' { T_cbrack } + '{' { T_ocurly } + '}' { T_ccurly } + '\\' { T_lam } + '\\\\' { T_lamlam } + '_' { T_underscore} + '|' { T_bar } + 'PType' { T_PType } + 'Str' { T_Str } + 'Strs' { T_Strs } + 'Tok' { T_Tok } + 'Type' { T_Type } + 'abstract' { T_abstract } + 'case' { T_case } + 'cat' { T_cat } + 'concrete' { T_concrete } + 'data' { T_data } + 'def' { T_def } + 'flags' { T_flags } + 'fun' { T_fun } + 'in' { T_in } + 'incomplete' { T_incomplete} + 'instance' { T_instance } + 'interface' { T_interface } + 'let' { T_let } + 'lin' { T_lin } + 'lincat' { T_lincat } + 'lindef' { T_lindef } + 'of' { T_of } + 'open' { T_open } + 'oper' { T_oper } + 'param' { T_param } + 'pattern' { T_pattern } + 'pre' { T_pre } + 'printname' { T_printname } + 'resource' { T_resource } + 'strs' { T_strs } + 'table' { T_table } + 'variants' { T_variants } + 'where' { T_where } + 'with' { T_with } + +Integer { (T_Integer $$) } +Double { (T_Double $$) } +String { (T_String $$) } +LString { (T_LString $$) } +Ident { (T_Ident $$) } + + +%% + +ModDef :: { SourceModule } +ModDef + : ComplMod ModType '=' ModBody {% + do let mstat = $1 + (mtype,id) = $2 + (extends,with,content) = $4 + (opens,jments,opts) = case content of { Just c -> c; Nothing -> ([],[],noOptions) } + mapM_ (checkInfoType mtype) jments + defs <- case buildAnyTree id [(i,d) | (i,_,d) <- jments] of + Ok x -> return x + Bad msg -> fail msg + let poss = buildTree [(i,(fname,mkSrcSpan p)) | (i,p,_) <- jments] + fname = showIdent id ++ ".gf" + + mkSrcSpan :: (Posn, Posn) -> (Int,Int) + mkSrcSpan (Pn l1 _, Pn l2 _) = (l1,l2) + + return (id, ModInfo mtype mstat opts extends with opens [] defs poss) } + +ModHeader :: { SourceModule } +ModHeader + : ComplMod ModType '=' ModHeaderBody { let { mstat = $1 ; + (mtype,id) = $2 ; + (extends,with,opens) = $4 } + in (id, ModInfo mtype mstat noOptions extends with opens [] emptyBinTree emptyBinTree) } + +ComplMod :: { ModuleStatus } +ComplMod + : {- empty -} { MSComplete } + | 'incomplete' { MSIncomplete } + +ModType :: { (ModuleType Ident,Ident) } +ModType + : 'abstract' Ident { (MTAbstract, $2) } + | 'resource' Ident { (MTResource, $2) } + | 'interface' Ident { (MTInterface, $2) } + | 'concrete' Ident 'of' Ident { (MTConcrete $4, $2) } + | 'instance' Ident 'of' Ident { (MTInstance $4, $2) } + +ModHeaderBody :: { ( [(Ident,MInclude Ident)] + , Maybe (Ident,MInclude Ident,[(Ident,Ident)]) + , [OpenSpec Ident] + ) } +ModHeaderBody + : ListIncluded '**' Included 'with' ListInst '**' ModOpen { ($1, Just (fst $3,snd $3,$5), $7) } + | ListIncluded '**' Included 'with' ListInst { ($1, Just (fst $3,snd $3,$5), []) } + | ListIncluded '**' ModOpen { ($1, Nothing, $3) } + | ListIncluded { ($1, Nothing, []) } + | Included 'with' ListInst '**' ModOpen { ([], Just (fst $1,snd $1,$3), $5) } + | Included 'with' ListInst { ([], Just (fst $1,snd $1,$3), []) } + | ModOpen { ([], Nothing, $1) } + +ModOpen :: { [OpenSpec Ident] } +ModOpen + : { [] } + | 'open' ListOpen { $2 } + +ModBody :: { ( [(Ident,MInclude Ident)] + , Maybe (Ident,MInclude Ident,[(Ident,Ident)]) + , Maybe ([OpenSpec Ident],[(Ident,SrcSpan,Info)],Options) + ) } +ModBody + : ListIncluded '**' Included 'with' ListInst '**' ModContent { ($1, Just (fst $3,snd $3,$5), Just $7) } + | ListIncluded '**' Included 'with' ListInst { ($1, Just (fst $3,snd $3,$5), Nothing) } + | ListIncluded '**' ModContent { ($1, Nothing, Just $3) } + | ListIncluded { ($1, Nothing, Nothing) } + | Included 'with' ListInst '**' ModContent { ([], Just (fst $1,snd $1,$3), Just $5) } + | Included 'with' ListInst { ([], Just (fst $1,snd $1,$3), Nothing) } + | ModContent { ([], Nothing, Just $1) } + | ModBody ';' { $1 } + +ModContent :: { ([OpenSpec Ident],[(Ident,SrcSpan,Info)],Options) } +ModContent + : '{' ListTopDef '}' { ([],[d | Left ds <- $2, d <- ds],concatOptions [o | Right o <- $2]) } + | 'open' ListOpen 'in' '{' ListTopDef '}' { ($2,[d | Left ds <- $5, d <- ds],concatOptions [o | Right o <- $5]) } + +ListTopDef :: { [Either [(Ident,SrcSpan,Info)] Options] } +ListTopDef + : {- empty -} { [] } + | TopDef ListTopDef { $1 : $2 } + +ListOpen :: { [OpenSpec Ident] } +ListOpen + : Open { [$1] } + | Open ',' ListOpen { $1 : $3 } + +Open :: { OpenSpec Ident } +Open + : Ident { OSimple $1 } + | '(' Ident '=' Ident ')' { OQualif $2 $4 } + +ListInst :: { [(Ident,Ident)] } +ListInst + : Inst { [$1] } + | Inst ',' ListInst { $1 : $3 } + +Inst :: { (Ident,Ident) } +Inst + : '(' Ident '=' Ident ')' { ($2,$4) } + +ListIncluded :: { [(Ident,MInclude Ident)] } +ListIncluded + : Included { [$1] } + | Included ',' ListIncluded { $1 : $3 } + +Included :: { (Ident,MInclude Ident) } +Included + : Ident { ($1,MIAll ) } + | Ident '[' ListIdent ']' { ($1,MIOnly $3) } + | Ident '-' '[' ListIdent ']' { ($1,MIExcept $4) } + +TopDef :: { Either [(Ident,SrcSpan,Info)] Options } +TopDef + : 'cat' ListCatDef { Left $2 } + | 'fun' ListFunDef { Left $2 } + | 'def' ListDefDef { Left $2 } + | 'data' ListDataDef { Left $2 } + | 'param' ListParamDef { Left $2 } + | 'oper' ListOperDef { Left $2 } + | 'lincat' ListTermDef { Left [(f, pos, CncCat (Just e) Nothing Nothing ) | (f,pos,e) <- $2] } + | 'lindef' ListTermDef { Left [(f, pos, CncCat Nothing (Just e) Nothing ) | (f,pos,e) <- $2] } + | 'lin' ListLinDef { Left $2 } + | 'printname' 'cat' ListTermDef { Left [(f, pos, CncCat Nothing Nothing (Just e)) | (f,pos,e) <- $3] } + | 'printname' 'fun' ListTermDef { Left [(f, pos, CncFun Nothing Nothing (Just e)) | (f,pos,e) <- $3] } + | 'flags' ListFlagDef { Right $2 } + +CatDef :: { [(Ident,SrcSpan,Info)] } +CatDef + : Posn Ident ListDDecl Posn { [($2, ($1,$4), AbsCat (Just $3) Nothing)] } + | Posn '[' Ident ListDDecl ']' Posn { listCatDef $3 ($1,$6) $4 0 } + | Posn '[' Ident ListDDecl ']' '{' Integer '}' Posn { listCatDef $3 ($1,$9) $4 (fromIntegral $7) } + +FunDef :: { [(Ident,SrcSpan,Info)] } +FunDef + : Posn ListIdent ':' Exp Posn { [(fun, ($1,$5), AbsFun (Just $4) Nothing (Just [])) | fun <- $2] } + +DefDef :: { [(Ident,SrcSpan,Info)] } +DefDef + : Posn ListName '=' Exp Posn { [(f, ($1,$5),AbsFun Nothing (Just 0) (Just [([],$4)])) | f <- $2] } + | Posn Name ListPatt '=' Exp Posn { [($2,($1,$6),AbsFun Nothing (Just (length $3)) (Just [($3,$5)]))] } + +DataDef :: { [(Ident,SrcSpan,Info)] } +DataDef + : Posn Ident '=' ListDataConstr Posn { ($2, ($1,$5), AbsCat Nothing (Just (map Cn $4))) : + [(fun, ($1,$5), AbsFun Nothing Nothing Nothing) | fun <- $4] } + | Posn ListIdent ':' Exp Posn { -- (snd (valCat $4), ($1,$5), AbsCat Nothing (Just (map Cn $2))) : + [(fun, ($1,$5), AbsFun (Just $4) Nothing Nothing) | fun <- $2] } + +ParamDef :: { [(Ident,SrcSpan,Info)] } +ParamDef + : Posn Ident '=' ListParConstr Posn { ($2, ($1,$5), ResParam (Just $4) Nothing) : + [(f, ($1,$5), ResValue (mkProdSimple co (Cn $2))) | (f,co) <- $4] } + | Posn Ident Posn { [($2, ($1,$3), ResParam Nothing Nothing)] } + +OperDef :: { [(Ident,SrcSpan,Info)] } +OperDef + : Posn ListName ':' Exp Posn { [(i, ($1,$5), info) | i <- $2, info <- mkOverload (Just $4) Nothing ] } + | Posn ListName '=' Exp Posn { [(i, ($1,$5), info) | i <- $2, info <- mkOverload Nothing (Just $4)] } + | Posn Name ListArg '=' Exp Posn { [(i, ($1,$6), info) | i <- [$2], info <- mkOverload Nothing (Just (mkAbs $3 $5))] } + | Posn ListName ':' Exp '=' Exp Posn { [(i, ($1,$7), info) | i <- $2, info <- mkOverload (Just $4) (Just $6)] } + +LinDef :: { [(Ident,SrcSpan,Info)] } +LinDef + : Posn ListName '=' Exp Posn { [(f, ($1,$5), CncFun Nothing (Just $4) Nothing) | f <- $2] } + | Posn Name ListArg '=' Exp Posn { [($2, ($1,$6), CncFun Nothing (Just (mkAbs $3 $5)) Nothing)] } + +TermDef :: { [(Ident,SrcSpan,Term)] } +TermDef + : Posn ListName '=' Exp Posn { [(i,($1,$5),$4) | i <- $2] } + +FlagDef :: { Options } +FlagDef + : Posn Ident '=' Ident Posn {% case parseModuleOptions ["--" ++ showIdent $2 ++ "=" ++ showIdent $4] of + Ok x -> return x + Bad msg -> failLoc $1 msg } + +ListDataConstr :: { [Ident] } +ListDataConstr + : Ident { [$1] } + | Ident '|' ListDataConstr { $1 : $3 } + +ParConstr :: { Param } +ParConstr + : Ident ListDDecl { ($1,$2) } + +ListLinDef :: { [(Ident,SrcSpan,Info)] } +ListLinDef + : LinDef ';' { $1 } + | LinDef ';' ListLinDef { $1 ++ $3 } + +ListDefDef :: { [(Ident,SrcSpan,Info)] } +ListDefDef + : DefDef ';' { $1 } + | DefDef ';' ListDefDef { $1 ++ $3 } + +ListOperDef :: { [(Ident,SrcSpan,Info)] } +ListOperDef + : OperDef ';' { $1 } + | OperDef ';' ListOperDef { $1 ++ $3 } + +ListCatDef :: { [(Ident,SrcSpan,Info)] } +ListCatDef + : CatDef ';' { $1 } + | CatDef ';' ListCatDef { $1 ++ $3 } + +ListFunDef :: { [(Ident,SrcSpan,Info)] } +ListFunDef + : FunDef ';' { $1 } + | FunDef ';' ListFunDef { $1 ++ $3 } + +ListDataDef :: { [(Ident,SrcSpan,Info)] } +ListDataDef + : DataDef ';' { $1 } + | DataDef ';' ListDataDef { $1 ++ $3 } + +ListParamDef :: { [(Ident,SrcSpan,Info)] } +ListParamDef + : ParamDef ';' { $1 } + | ParamDef ';' ListParamDef { $1 ++ $3 } + +ListTermDef :: { [(Ident,SrcSpan,Term)] } +ListTermDef + : TermDef ';' { $1 } + | TermDef ';' ListTermDef { $1 ++ $3 } + +ListFlagDef :: { Options } +ListFlagDef + : FlagDef ';' { $1 } + | FlagDef ';' ListFlagDef { addOptions $1 $3 } + +ListParConstr :: { [Param] } +ListParConstr + : ParConstr { [$1] } + | ParConstr '|' ListParConstr { $1 : $3 } + +ListIdent :: { [Ident] } +ListIdent + : Ident { [$1] } + | Ident ',' ListIdent { $1 : $3 } + +ListIdent2 :: { [Ident] } +ListIdent2 + : Ident { [$1] } + | Ident ListIdent2 { $1 : $2 } + +Name :: { Ident } +Name + : Ident { $1 } + | '[' Ident ']' { mkListId $2 } + +ListName :: { [Ident] } +ListName + : Name { [$1] } + | Name ',' ListName { $1 : $3 } + +LocDef :: { [(Ident, Maybe Type, Maybe Term)] } +LocDef + : ListIdent ':' Exp { [(lab,Just $3,Nothing) | lab <- $1] } + | ListIdent '=' Exp { [(lab,Nothing,Just $3) | lab <- $1] } + | ListIdent ':' Exp '=' Exp { [(lab,Just $3,Just $5) | lab <- $1] } + +ListLocDef :: { [(Ident, Maybe Type, Maybe Term)] } +ListLocDef + : {- empty -} { [] } + | LocDef { $1 } + | LocDef ';' ListLocDef { $1 ++ $3 } + +Exp :: { Term } +Exp + : Exp1 '|' Exp { FV [$1,$3] } + | '\\' ListBind '->' Exp { mkAbs $2 $4 } + | '\\\\' ListBind '=>' Exp { mkCTable $2 $4 } + | Decl '->' Exp { mkProdSimple $1 $3 } + | Exp3 '=>' Exp { Table $1 $3 } + | 'let' '{' ListLocDef '}' 'in' Exp {% + do defs <- mapM tryLoc $3 + return $ mkLet defs $6 } + | 'let' ListLocDef 'in' Exp {% + do defs <- mapM tryLoc $2 + return $ mkLet defs $4 } + | Exp3 'where' '{' ListLocDef '}' {% + do defs <- mapM tryLoc $4 + return $ mkLet defs $1 } + | 'in' Exp5 String { Example $2 $3 } + | Exp1 { $1 } + +Exp1 :: { Term } +Exp1 + : Exp2 '++' Exp1 { C $1 $3 } + | Exp2 { $1 } + +Exp2 :: { Term } +Exp2 + : Exp3 '+' Exp2 { Glue $1 $3 } + | Exp3 { $1 } + +Exp3 :: { Term } +Exp3 + : Exp3 '!' Exp4 { S $1 $3 } + | 'table' '{' ListCase '}' { T TRaw $3 } + | 'table' Exp6 '{' ListCase '}' { T (TTyped $2) $4 } + | 'table' Exp6 '[' ListExp ']' { V $2 $4 } + | Exp3 '*' Exp4 { case $1 of + RecType xs -> RecType (xs ++ [(tupleLabel (length xs+1),$3)]) + t -> RecType [(tupleLabel 1,$1), (tupleLabel 2,$3)] } + | Exp3 '**' Exp4 { ExtR $1 $3 } + | Exp4 { $1 } + +Exp4 :: { Term } +Exp4 + : Exp4 Exp5 { App $1 $2 } + | Exp4 '{' Exp '}' { App $1 (ImplArg $3) } + | 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of + Typed _ t -> TTyped t + _ -> TRaw + in S (T annot $5) $2 } + | 'variants' '{' ListExp '}' { FV $3 } + | 'pre' '{' ListCase '}' {% mkAlts $3 } + | 'pre' '{' String ';' ListAltern '}' { Alts (K $3, $5) } + | 'pre' '{' Ident ';' ListAltern '}' { Alts (Vr $3, $5) } + | 'strs' '{' ListExp '}' { Strs $3 } + | '#' Patt2 { EPatt $2 } + | 'pattern' Exp5 { EPattType $2 } + | 'lincat' Ident Exp5 { ELincat $2 $3 } + | 'lin' Ident Exp5 { ELin $2 $3 } + | Exp5 { $1 } + +Exp5 :: { Term } +Exp5 + : Exp5 '.' Label { P $1 $3 } + | Exp6 { $1 } + +Exp6 :: { Term } +Exp6 + : Ident { Vr $1 } + | Sort { Sort $1 } + | String { K $1 } + | Integer { EInt $1 } + | Double { EFloat $1 } + | '?' { Meta 0 } + | '[' ']' { Empty } + | '[' Ident Exps ']' { foldl App (Vr (mkListId $2)) $3 } + | '[' String ']' { case $2 of + [] -> Empty + str -> foldr1 C (map K (words str)) } + | '{' ListLocDef '}' {% mkR $2 } + | '<' ListTupleComp '>' { R (tuple2record $2) } + | '<' Exp ':' Exp '>' { Typed $2 $4 } + | LString { K $1 } + | '(' Exp ')' { $2 } + +ListExp :: { [Term] } +ListExp + : {- empty -} { [] } + | Exp { [$1] } + | Exp ';' ListExp { $1 : $3 } + +Exps :: { [Term] } +Exps + : {- empty -} { [] } + | Exp6 Exps { $1 : $2 } + +Patt :: { Patt } +Patt + : Patt '|' Patt1 { PAlt $1 $3 } + | Patt '+' Patt1 { PSeq $1 $3 } + | Patt1 { $1 } + +Patt1 :: { Patt } +Patt1 + : Ident ListPatt { PC $1 $2 } + | Ident '.' Ident ListPatt { PP $1 $3 $4 } + | Patt2 '*' { PRep $1 } + | Ident '@' Patt2 { PAs $1 $3 } + | '-' Patt2 { PNeg $2 } + | Patt2 { $1 } + +Patt2 :: { Patt } +Patt2 + : '?' { PChar } + | '[' String ']' { PChars $2 } + | '#' Ident { PMacro $2 } + | '#' Ident '.' Ident { PM $2 $4 } + | '_' { PW } + | Ident { PV $1 } + | Ident '.' Ident { PP $1 $3 [] } + | Integer { PInt $1 } + | Double { PFloat $1 } + | String { PString $1 } + | '{' ListPattAss '}' { PR $2 } + | '<' ListPattTupleComp '>' { (PR . tuple2recordPatt) $2 } + | '(' Patt ')' { $2 } + +PattAss :: { [(Label,Patt)] } +PattAss + : ListIdent '=' Patt { [(LIdent (ident2bs i),$3) | i <- $1] } + +Label :: { Label } +Label + : Ident { LIdent (ident2bs $1) } + | '$' Integer { LVar (fromIntegral $2) } + +Sort :: { Ident } +Sort + : 'Type' { cType } + | 'PType' { cPType } + | 'Tok' { cTok } + | 'Str' { cStr } + | 'Strs' { cStrs } + +ListPattAss :: { [(Label,Patt)] } +ListPattAss + : {- empty -} { [] } + | PattAss { $1 } + | PattAss ';' ListPattAss { $1 ++ $3 } + +ListPatt :: { [Patt] } +ListPatt + : PattArg { [$1] } + | PattArg ListPatt { $1 : $2 } + +PattArg :: { Patt } + : Patt2 { $1 } + | '{' Patt2 '}' { PImplArg $2 } + +Arg :: { [(BindType,Ident)] } +Arg + : Ident { [(Explicit,$1 )] } + | '_' { [(Explicit,identW)] } + | '{' ListIdent2 '}' { [(Implicit,v) | v <- $2] } + +ListArg :: { [(BindType,Ident)] } +ListArg + : Arg { $1 } + | Arg ListArg { $1 ++ $2 } + +Bind :: { [(BindType,Ident)] } +Bind + : Ident { [(Explicit,$1 )] } + | '_' { [(Explicit,identW)] } + | '{' ListIdent '}' { [(Implicit,v) | v <- $2] } + +ListBind :: { [(BindType,Ident)] } +ListBind + : Bind { $1 } + | Bind ',' ListBind { $1 ++ $3 } + +Decl :: { [Hypo] } +Decl + : '(' ListBind ':' Exp ')' { [(b,x,$4) | (b,x) <- $2] } + | Exp4 { [mkHypo $1] } + +ListTupleComp :: { [Term] } +ListTupleComp + : {- empty -} { [] } + | Exp { [$1] } + | Exp ',' ListTupleComp { $1 : $3 } + +ListPattTupleComp :: { [Patt] } +ListPattTupleComp + : {- empty -} { [] } + | Patt { [$1] } + | Patt ',' ListPattTupleComp { $1 : $3 } + +Case :: { Case } +Case + : Patt '=>' Exp { ($1,$3) } + +ListCase :: { [Case] } +ListCase + : Case { [$1] } + | Case ';' ListCase { $1 : $3 } + +Altern :: { (Term,Term) } +Altern + : Exp '/' Exp { ($1,$3) } + +ListAltern :: { [(Term,Term)] } +ListAltern + : Altern { [$1] } + | Altern ';' ListAltern { $1 : $3 } + +DDecl :: { [Hypo] } +DDecl + : '(' ListBind ':' Exp ')' { [(b,x,$4) | (b,x) <- $2] } + | Exp6 { [mkHypo $1] } + +ListDDecl :: { [Hypo] } +ListDDecl + : {- empty -} { [] } + | DDecl ListDDecl { $1 ++ $2 } + +Posn :: { Posn } +Posn + : {- empty -} {% getPosn } + + +{ + +happyError :: P a +happyError = fail "parse error" + +mkListId,mkConsId,mkBaseId :: Ident -> Ident +mkListId = prefixId (BS.pack "List") +mkConsId = prefixId (BS.pack "Cons") +mkBaseId = prefixId (BS.pack "Base") + +prefixId :: BS.ByteString -> Ident -> Ident +prefixId pref id = identC (BS.append pref (ident2bs id)) + +listCatDef :: Ident -> SrcSpan -> Context -> Int -> [(Ident,SrcSpan,Info)] +listCatDef id pos cont size = [catd,nilfund,consfund] + where + listId = mkListId id + baseId = mkBaseId id + consId = mkConsId id + + catd = (listId, pos, AbsCat (Just cont') (Just [Cn baseId,Cn consId])) + nilfund = (baseId, pos, AbsFun (Just niltyp) Nothing Nothing) + consfund = (consId, pos, AbsFun (Just constyp) Nothing Nothing) + + cont' = [(b,mkId x i,ty) | (i,(b,x,ty)) <- zip [0..] cont] + xs = map (\(b,x,t) -> Vr x) cont' + cd = mkHypo (mkApp (Vr id) xs) + lc = mkApp (Vr listId) xs + + niltyp = mkProdSimple (cont' ++ replicate size cd) lc + constyp = mkProdSimple (cont' ++ [cd, mkHypo lc]) lc + + mkId x i = if isWildIdent x then (varX i) else x + +tryLoc (c,mty,Just e) = return (c,(mty,e)) +tryLoc (c,_ ,_ ) = fail ("local definition of" +++ showIdent c +++ "without value") + +mkR [] = return $ RecType [] --- empty record always interpreted as record type +mkR fs@(f:_) = + case f of + (lab,Just ty,Nothing) -> mapM tryRT fs >>= return . RecType + _ -> mapM tryR fs >>= return . R + where + tryRT (lab,Just ty,Nothing) = return (ident2label lab,ty) + tryRT (lab,_ ,_ ) = fail $ "illegal record type field" +++ showIdent lab --- manifest fields ?! + + tryR (lab,mty,Just t) = return (ident2label lab,(mty,t)) + tryR (lab,_ ,_ ) = fail $ "illegal record field" +++ showIdent lab + +mkOverload pdt pdf@(Just df) = + case appForm df of + (keyw, ts@(_:_)) | isOverloading keyw -> + case last ts of + R fs -> [ResOverload [m | Vr m <- ts] [(ty,fu) | (_,(Just ty,fu)) <- fs]] + _ -> [ResOper pdt pdf] + _ -> [ResOper pdt pdf] + + -- to enable separare type signature --- not type-checked +mkOverload pdt@(Just df) pdf = + case appForm df of + (keyw, ts@(_:_)) | isOverloading keyw -> + case last ts of + RecType _ -> [] + _ -> [ResOper pdt pdf] + _ -> [ResOper pdt pdf] +mkOverload pdt pdf = [ResOper pdt pdf] + +isOverloading t = + case t of + Vr keyw | showIdent keyw == "overload" -> True -- overload is a "soft keyword" + _ -> False + + +type SrcSpan = (Posn,Posn) + + +checkInfoType MTAbstract (id,pos,info) = + case info of + AbsCat _ _ -> return () + AbsFun _ _ _ -> return () + _ -> failLoc (fst pos) "illegal definition in abstract module" +checkInfoType MTResource (id,pos,info) = + case info of + ResParam _ _ -> return () + ResValue _ -> return () + ResOper _ _ -> return () + ResOverload _ _ -> return () + _ -> failLoc (fst pos) "illegal definition in resource module" +checkInfoType MTInterface (id,pos,info) = + case info of + ResParam _ _ -> return () + ResValue _ -> return () + ResOper _ _ -> return () + ResOverload _ _ -> return () + _ -> failLoc (fst pos) "illegal definition in interface module" +checkInfoType (MTConcrete _) (id,pos,info) = + case info of + CncCat _ _ _ -> return () + CncFun _ _ _ -> return () + ResParam _ _ -> return () + ResValue _ -> return () + ResOper _ _ -> return () + ResOverload _ _ -> return () + _ -> failLoc (fst pos) "illegal definition in concrete module" +checkInfoType (MTInstance _) (id,pos,info) = + case info of + ResParam _ _ -> return () + ResValue _ -> return () + ResOper _ _ -> return () + _ -> failLoc (fst pos) "illegal definition in instance module" + + +mkAlts cs = case cs of + _:_ -> do + def <- mkDef (last cs) + alts <- mapM mkAlt (init cs) + return (Alts (def,alts)) + _ -> fail "empty alts" + where + mkDef (_,t) = return t + mkAlt (p,t) = do + ss <- mkStrs p + return (t,ss) + mkStrs p = case p of + PAlt a b -> do + Strs as <- mkStrs a + Strs bs <- mkStrs b + return $ Strs $ as ++ bs + PString s -> return $ Strs [K s] + PV x -> return (Vr x) --- for macros; not yet complete + PMacro x -> return (Vr x) --- for macros; not yet complete + PM m c -> return (Q m c) --- for macros; not yet complete + _ -> fail "no strs from pattern" + +} + diff --git a/src/compiler/GF/Grammar/PatternMatch.hs b/src/compiler/GF/Grammar/PatternMatch.hs new file mode 100644 index 000000000..b8f7eff7d --- /dev/null +++ b/src/compiler/GF/Grammar/PatternMatch.hs @@ -0,0 +1,165 @@ +---------------------------------------------------------------------- +-- | +-- Module : PatternMatch +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/12 12:38:29 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.7 $ +-- +-- pattern matching for both concrete and abstract syntax. AR -- 16\/6\/2003 +----------------------------------------------------------------------------- + +module GF.Grammar.PatternMatch (matchPattern, + testOvershadow, + findMatch + ) where + +import GF.Data.Operations +import GF.Grammar.Grammar +import GF.Infra.Ident +import GF.Grammar.Macros +import GF.Grammar.Printer + +import Data.List +import Control.Monad +import Text.PrettyPrint +import Debug.Trace + +matchPattern :: [(Patt,Term)] -> Term -> Err (Term, Substitution) +matchPattern pts term = + if not (isInConstantForm term) + then Bad (render (text "variables occur in" <+> ppTerm Unqualified 0 term)) + else do + term' <- mkK term + errIn (render (text "trying patterns" <+> hsep (punctuate comma (map (ppPatt Unqualified 0 . fst) pts)))) $ + findMatch [([p],t) | (p,t) <- pts] [term'] + where + -- to capture all Str with string pattern matching + mkK s = case s of + C _ _ -> do + s' <- getS s + return (K (unwords s')) + _ -> return s + + getS s = case s of + K w -> return [w] + C v w -> liftM2 (++) (getS v) (getS w) + Empty -> return [] + _ -> Bad (render (text "cannot get string from" <+> ppTerm Unqualified 0 s)) + +testOvershadow :: [Patt] -> [Term] -> Err [Patt] +testOvershadow pts vs = do + let numpts = zip pts [0..] + let cases = [(p,EInt i) | (p,i) <- numpts] + ts <- mapM (liftM fst . matchPattern cases) vs + return [p | (p,i) <- numpts, notElem i [i | EInt i <- ts] ] + +findMatch :: [([Patt],Term)] -> [Term] -> Err (Term, Substitution) +findMatch cases terms = case cases of + [] -> Bad (render (text "no applicable case for" <+> hsep (punctuate comma (map (ppTerm Unqualified 0) terms)))) + (patts,_):_ | length patts /= length terms -> + Bad (render (text "wrong number of args for patterns :" <+> hsep (map (ppPatt Unqualified 0) patts) <+> + text "cannot take" <+> hsep (map (ppTerm Unqualified 0) terms))) + (patts,val):cc -> case mapM tryMatch (zip patts terms) of + Ok substs -> return (val, concat substs) + _ -> findMatch cc terms + +tryMatch :: (Patt, Term) -> Err [(Ident, Term)] +tryMatch (p,t) = do + t' <- termForm t + trym p t' + where + + isInConstantFormt = True -- tested already in matchPattern + trym p t' = + case (p,t') of + (_,(x,Empty,y)) -> trym p (x,K [],y) -- because "" = [""] = [] + (PW, _) | isInConstantFormt -> return [] -- optimization with wildcard + (PV x, _) | isInConstantFormt -> return [(x,t)] + (PString s, ([],K i,[])) | s==i -> return [] + (PInt s, ([],EInt i,[])) | s==i -> return [] + (PFloat s,([],EFloat i,[])) | s==i -> return [] --- rounding? + (PC p pp, ([], Con f, tt)) | + p `eqStrIdent` f && length pp == length tt -> + do matches <- mapM tryMatch (zip pp tt) + return (concat matches) + + (PP q p pp, ([], QC r f, tt)) | + -- q `eqStrIdent` r && --- not for inherited AR 10/10/2005 + p `eqStrIdent` f && length pp == length tt -> + do matches <- mapM tryMatch (zip pp tt) + return (concat matches) + ---- hack for AppPredef bug + (PP q p pp, ([], Q r f, tt)) | + -- q `eqStrIdent` r && --- + p `eqStrIdent` f && length pp == length tt -> + do matches <- mapM tryMatch (zip pp tt) + return (concat matches) + + (PR r, ([],R r',[])) | + all (`elem` map fst r') (map fst r) -> + do matches <- mapM tryMatch + [(p,snd a) | (l,p) <- r, let Just a = lookup l r'] + return (concat matches) + (PT _ p',_) -> trym p' t' + + (PAs x p',_) -> do + subst <- trym p' t' + return $ (x,t) : subst + + (PAlt p1 p2,_) -> checks [trym p1 t', trym p2 t'] + + (PNeg p',_) -> case tryMatch (p',t) of + Bad _ -> return [] + _ -> Bad (render (text "no match with negative pattern" <+> ppPatt Unqualified 0 p)) + + (PSeq p1 p2, ([],K s, [])) -> do + let cuts = [splitAt n s | n <- [0 .. length s]] + matches <- checks [mapM tryMatch [(p1,K s1),(p2,K s2)] | (s1,s2) <- cuts] + return (concat matches) + + (PRep p1, ([],K s, [])) -> checks [ + trym (foldr (const (PSeq p1)) (PString "") + [1..n]) t' | n <- [0 .. length s] + ] >> + return [] + + (PChar, ([],K [_], [])) -> return [] + (PChars cs, ([],K [c], [])) | elem c cs -> return [] + + _ -> Bad (render (text "no match in case expr for" <+> ppTerm Unqualified 0 t)) + +isInConstantForm :: Term -> Bool +isInConstantForm trm = case trm of + Cn _ -> True + Con _ -> True + Q _ _ -> True + QC _ _ -> True + Abs _ _ _ -> True + C c a -> isInConstantForm c && isInConstantForm a + App c a -> isInConstantForm c && isInConstantForm a + R r -> all (isInConstantForm . snd . snd) r + K _ -> True + Empty -> True + EInt _ -> True + _ -> False ---- isInArgVarForm trm + +varsOfPatt :: Patt -> [Ident] +varsOfPatt p = case p of + PV x -> [x] + PC _ ps -> concat $ map varsOfPatt ps + PP _ _ ps -> concat $ map varsOfPatt ps + PR r -> concat $ map (varsOfPatt . snd) r + PT _ q -> varsOfPatt q + _ -> [] + +-- | to search matching parameter combinations in tables +isMatchingForms :: [Patt] -> [Term] -> Bool +isMatchingForms ps ts = all match (zip ps ts') where + match (PC c cs, (Cn d, ds)) = c == d && isMatchingForms cs ds + match _ = True + ts' = map appForm ts + diff --git a/src/compiler/GF/Grammar/Predef.hs b/src/compiler/GF/Grammar/Predef.hs new file mode 100644 index 000000000..045df06ca --- /dev/null +++ b/src/compiler/GF/Grammar/Predef.hs @@ -0,0 +1,180 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Grammar.Predef +-- Maintainer : kr.angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Predefined identifiers and labels which the compiler knows +---------------------------------------------------------------------- + + +module GF.Grammar.Predef + ( cType + , cPType + , cTok + , cStr + , cStrs + , cPredefAbs, cPredefCnc, cPredef + , cInt + , cFloat + , cString + , cInts + , cPBool + , cErrorType + , cOverload + , cUndefinedType + , isPredefCat + + , cPTrue, cPFalse + + , cLength, cDrop, cTake, cTk, cDp, cEqStr, cOccur + , cOccurs, cEqInt, cLessInt, cPlus, cShow, cRead + , cToStr, cMapStr, cError + + -- hacks + , cMeta, cAs, cChar, cChars, cSeq, cAlt, cRep + , cNeg, cCNC, cConflict + ) where + +import GF.Infra.Ident +import qualified Data.ByteString.Char8 as BS + +cType :: Ident +cType = identC (BS.pack "Type") + +cPType :: Ident +cPType = identC (BS.pack "PType") + +cTok :: Ident +cTok = identC (BS.pack "Tok") + +cStr :: Ident +cStr = identC (BS.pack "Str") + +cStrs :: Ident +cStrs = identC (BS.pack "Strs") + +cPredefAbs :: Ident +cPredefAbs = identC (BS.pack "PredefAbs") + +cPredefCnc :: Ident +cPredefCnc = identC (BS.pack "PredefCnc") + +cPredef :: Ident +cPredef = identC (BS.pack "Predef") + +cInt :: Ident +cInt = identC (BS.pack "Int") + +cFloat :: Ident +cFloat = identC (BS.pack "Float") + +cString :: Ident +cString = identC (BS.pack "String") + +cInts :: Ident +cInts = identC (BS.pack "Ints") + +cPBool :: Ident +cPBool = identC (BS.pack "PBool") + +cErrorType :: Ident +cErrorType = identC (BS.pack "Error") + +cOverload :: Ident +cOverload = identC (BS.pack "overload") + +cUndefinedType :: Ident +cUndefinedType = identC (BS.pack "UndefinedType") + +isPredefCat :: Ident -> Bool +isPredefCat c = elem c [cInt,cString,cFloat] + +cPTrue :: Ident +cPTrue = identC (BS.pack "PTrue") + +cPFalse :: Ident +cPFalse = identC (BS.pack "PFalse") + +cLength :: Ident +cLength = identC (BS.pack "length") + +cDrop :: Ident +cDrop = identC (BS.pack "drop") + +cTake :: Ident +cTake = identC (BS.pack "take") + +cTk :: Ident +cTk = identC (BS.pack "tk") + +cDp :: Ident +cDp = identC (BS.pack "dp") + +cEqStr :: Ident +cEqStr = identC (BS.pack "eqStr") + +cOccur :: Ident +cOccur = identC (BS.pack "occur") + +cOccurs :: Ident +cOccurs = identC (BS.pack "occurs") + +cEqInt :: Ident +cEqInt = identC (BS.pack "eqInt") + +cLessInt :: Ident +cLessInt = identC (BS.pack "lessInt") + +cPlus :: Ident +cPlus = identC (BS.pack "plus") + +cShow :: Ident +cShow = identC (BS.pack "show") + +cRead :: Ident +cRead = identC (BS.pack "read") + +cToStr :: Ident +cToStr = identC (BS.pack "toStr") + +cMapStr :: Ident +cMapStr = identC (BS.pack "mapStr") + +cError :: Ident +cError = identC (BS.pack "error") + + +--- hacks: dummy identifiers used in various places +--- Not very nice! + +cMeta :: Ident +cMeta = identC (BS.singleton '?') + +cAs :: Ident +cAs = identC (BS.singleton '@') + +cChar :: Ident +cChar = identC (BS.singleton '?') + +cChars :: Ident +cChars = identC (BS.pack "[]") + +cSeq :: Ident +cSeq = identC (BS.pack "+") + +cAlt :: Ident +cAlt = identC (BS.pack "|") + +cRep :: Ident +cRep = identC (BS.pack "*") + +cNeg :: Ident +cNeg = identC (BS.pack "-") + +cCNC :: Ident +cCNC = identC (BS.pack "CNC") + +cConflict :: Ident +cConflict = IC (BS.pack "#conflict") diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs new file mode 100644 index 000000000..06cac9705 --- /dev/null +++ b/src/compiler/GF/Grammar/Printer.hs @@ -0,0 +1,317 @@ +----------------------------------------------------------------------
+-- |
+-- Module : GF.Grammar.Printer
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-----------------------------------------------------------------------------
+
+module GF.Grammar.Printer
+ ( TermPrintQual(..)
+ , ppIdent
+ , ppLabel
+ , ppModule
+ , ppJudgement
+ , ppTerm
+ , ppTermTabular
+ , ppPatt
+ , ppValue
+ , ppConstrs
+
+ , showTerm, TermPrintStyle(..)
+ ) where
+
+import GF.Infra.Ident
+import GF.Infra.Modules
+import GF.Infra.Option
+import GF.Grammar.Values
+import GF.Grammar.Grammar
+import GF.Data.Operations
+import Text.PrettyPrint
+
+import Data.Maybe (maybe)
+import Data.List (intersperse)
+
+data TermPrintQual = Qualified | Unqualified
+
+ppModule :: TermPrintQual -> SourceModule -> Doc
+ppModule q (mn, ModInfo mtype mstat opts exts with opens _ jments _) =
+ hdr $$ nest 2 (ppOptions opts $$ vcat (map (ppJudgement q) defs)) $$ ftr
+ where
+ defs = tree2list jments
+
+ hdr = complModDoc <+> modTypeDoc <+> equals <+>
+ hsep (intersperse (text "**") $
+ filter (not . isEmpty) $ [ commaPunct ppExtends exts
+ , maybe empty ppWith with
+ , if null opens
+ then lbrace
+ else text "open" <+> commaPunct ppOpenSpec opens <+> text "in" <+> lbrace
+ ])
+
+ ftr = rbrace
+
+ complModDoc =
+ case mstat of
+ MSComplete -> empty
+ MSIncomplete -> text "incomplete"
+
+ modTypeDoc =
+ case mtype of
+ MTAbstract -> text "abstract" <+> ppIdent mn
+ MTResource -> text "resource" <+> ppIdent mn
+ MTConcrete abs -> text "concrete" <+> ppIdent mn <+> text "of" <+> ppIdent abs
+ MTInterface -> text "interface" <+> ppIdent mn
+ MTInstance int -> text "instance" <+> ppIdent mn <+> text "of" <+> ppIdent int
+
+ ppExtends (id,MIAll ) = ppIdent id
+ ppExtends (id,MIOnly incs) = ppIdent id <+> brackets (commaPunct ppIdent incs)
+ ppExtends (id,MIExcept incs) = ppIdent id <+> char '-' <+> brackets (commaPunct ppIdent incs)
+
+ ppWith (id,ext,opens) = ppExtends (id,ext) <+> text "with" <+> commaPunct ppInstSpec opens
+
+ppOptions opts =
+ text "flags" $$
+ nest 2 (vcat [text option <+> equals <+> str value <+> semi | (option,value) <- optionsGFO opts])
+
+ppJudgement q (id, AbsCat pcont pconstrs) =
+ text "cat" <+> ppIdent id <+>
+ (case pcont of
+ Just cont -> hsep (map (ppDecl q) cont)
+ Nothing -> empty) <+> semi $$
+ case pconstrs of
+ Just costrs -> text "data" <+> ppIdent id <+> equals <+> fsep (intersperse (char '|') (map (ppTerm q 0) costrs)) <+> semi
+ Nothing -> empty
+ppJudgement q (id, AbsFun ptype _ pexp) =
+ (case ptype of
+ Just typ -> text "fun" <+> ppIdent id <+> colon <+> ppTerm q 0 typ <+> semi
+ Nothing -> empty) $$
+ (case pexp of
+ Just [] -> empty
+ Just eqs -> text "def" <+> vcat [ppIdent id <+> hsep (map (ppPatt q 2) ps) <+> equals <+> ppTerm q 0 e <+> semi | (ps,e) <- eqs]
+ Nothing -> empty)
+ppJudgement q (id, ResParam pparams _) =
+ text "param" <+> ppIdent id <+>
+ (case pparams of
+ Just ps -> equals <+> fsep (intersperse (char '|') (map (ppParam q) ps))
+ _ -> empty) <+> semi
+ppJudgement q (id, ResValue pvalue) = empty
+ppJudgement q (id, ResOper ptype pexp) =
+ text "oper" <+> ppIdent id <+>
+ (case ptype of {Just t -> colon <+> ppTerm q 0 t; Nothing -> empty} $$
+ case pexp of {Just e -> equals <+> ppTerm q 0 e; Nothing -> empty}) <+> semi
+ppJudgement q (id, ResOverload ids defs) =
+ text "oper" <+> ppIdent id <+> equals <+>
+ (text "overload" <+> lbrace $$
+ nest 2 (vcat [ppIdent id <+> (colon <+> ppTerm q 0 ty $$ equals <+> ppTerm q 0 e) | (ty,e) <- defs]) $$
+ rbrace) <+> semi
+ppJudgement q (id, CncCat ptype pexp pprn) =
+ (case ptype of
+ Just typ -> text "lincat" <+> ppIdent id <+> equals <+> ppTerm q 0 typ <+> semi
+ Nothing -> empty) $$
+ (case pexp of
+ Just exp -> text "lindef" <+> ppIdent id <+> equals <+> ppTerm q 0 exp <+> semi
+ Nothing -> empty) $$
+ (case pprn of
+ Just prn -> text "printname" <+> text "cat" <+> ppIdent id <+> equals <+> ppTerm q 0 prn <+> semi
+ Nothing -> empty)
+ppJudgement q (id, CncFun ptype pdef pprn) =
+ (case pdef of
+ Just e -> let (xs,e') = getAbs e
+ in text "lin" <+> ppIdent id <+> hsep (map ppBind xs) <+> equals <+> ppTerm q 0 e' <+> semi
+ Nothing -> empty) $$
+ (case pprn of
+ Just prn -> text "printname" <+> text "fun" <+> ppIdent id <+> equals <+> ppTerm q 0 prn <+> semi
+ Nothing -> empty)
+ppJudgement q (id, AnyInd cann mid) = text "ind" <+> ppIdent id <+> equals <+> (if cann then text "canonical" else empty) <+> ppIdent mid <+> semi
+
+ppTerm q d (Abs b v e) = let (xs,e') = getAbs (Abs b v e)
+ in prec d 0 (char '\\' <> commaPunct ppBind xs <+> text "->" <+> ppTerm q 0 e')
+ppTerm q d (T TRaw xs) = case getCTable (T TRaw xs) of
+ ([],_) -> text "table" <+> lbrace $$
+ nest 2 (vcat (punctuate semi (map (ppCase q) xs))) $$
+ rbrace
+ (vs,e) -> prec d 0 (text "\\\\" <> commaPunct ppIdent vs <+> text "=>" <+> ppTerm q 0 e)
+ppTerm q d (T (TTyped t) xs) = text "table" <+> ppTerm q 0 t <+> lbrace $$
+ nest 2 (vcat (punctuate semi (map (ppCase q) xs))) $$
+ rbrace
+ppTerm q d (T (TComp t) xs) = text "table" <+> ppTerm q 0 t <+> lbrace $$
+ nest 2 (vcat (punctuate semi (map (ppCase q) xs))) $$
+ rbrace
+ppTerm q d (T (TWild t) xs) = text "table" <+> ppTerm q 0 t <+> lbrace $$
+ nest 2 (vcat (punctuate semi (map (ppCase q) xs))) $$
+ rbrace
+ppTerm q d (Prod bt x a b)= if x == identW && bt == Explicit
+ then prec d 0 (ppTerm q 4 a <+> text "->" <+> ppTerm q 0 b)
+ else prec d 0 (parens (ppBind (bt,x) <+> colon <+> ppTerm q 0 a) <+> text "->" <+> ppTerm q 0 b)
+ppTerm q d (Table kt vt)=prec d 0 (ppTerm q 3 kt <+> text "=>" <+> ppTerm q 0 vt)
+ppTerm q d (Let l e) = let (ls,e') = getLet e
+ in prec d 0 (text "let" <+> vcat (map (ppLocDef q) (l:ls)) $$ text "in" <+> ppTerm q 0 e')
+ppTerm q d (Example e s)=prec d 0 (text "in" <+> ppTerm q 5 e <+> str s)
+ppTerm q d (C e1 e2) =prec d 1 (ppTerm q 2 e1 <+> text "++" <+> ppTerm q 1 e2)
+ppTerm q d (Glue e1 e2) =prec d 2 (ppTerm q 3 e1 <+> char '+' <+> ppTerm q 2 e2)
+ppTerm q d (S x y) = case x of
+ T annot xs -> let e = case annot of
+ TRaw -> y
+ TTyped t -> Typed y t
+ TComp t -> Typed y t
+ TWild t -> Typed y t
+ in text "case" <+> ppTerm q 0 e <+> text "of" <+> lbrace $$
+ nest 2 (vcat (punctuate semi (map (ppCase q) xs))) $$
+ rbrace
+ _ -> prec d 3 (ppTerm q 3 x <+> text "!" <+> ppTerm q 4 y)
+ppTerm q d (ExtR x y) = prec d 3 (ppTerm q 3 x <+> text "**" <+> ppTerm q 4 y)
+ppTerm q d (App x y) = prec d 4 (ppTerm q 4 x <+> ppTerm q 5 y)
+ppTerm q d (V e es) = text "table" <+> ppTerm q 6 e <+> lbrace $$
+ nest 2 (fsep (punctuate semi (map (ppTerm q 0) es))) $$
+ rbrace
+ppTerm q d (FV es) = text "variants" <+> braces (fsep (punctuate semi (map (ppTerm q 0) es)))
+ppTerm q d (Alts (e,xs))=text "pre" <+> braces (ppTerm q 0 e <> semi <+> fsep (punctuate semi (map (ppAltern q) xs)))
+ppTerm q d (Strs es) = text "strs" <+> braces (fsep (punctuate semi (map (ppTerm q 0) es)))
+ppTerm q d (EPatt p) = prec d 4 (char '#' <+> ppPatt q 2 p)
+ppTerm q d (EPattType t)=prec d 4 (text "pattern" <+> ppTerm q 0 t)
+ppTerm q d (P t l) = prec d 5 (ppTerm q 5 t <> char '.' <> ppLabel l)
+ppTerm q d (Cn id) = ppIdent id
+ppTerm q d (Vr id) = ppIdent id
+ppTerm q d (Q m id) = ppQIdent q m id
+ppTerm q d (QC m id) = ppQIdent q m id
+ppTerm q d (Sort id) = ppIdent id
+ppTerm q d (K s) = str s
+ppTerm q d (EInt n) = integer n
+ppTerm q d (EFloat f) = double f
+ppTerm q d (Meta _) = char '?'
+ppTerm q d (Empty) = text "[]"
+ppTerm q d (R xs) = braces (fsep (punctuate semi [ppLabel l <+>
+ fsep [case mb_t of {Just t -> colon <+> ppTerm q 0 t; Nothing -> empty},
+ equals <+> ppTerm q 0 e] | (l,(mb_t,e)) <- xs]))
+ppTerm q d (RecType xs)= braces (fsep (punctuate semi [ppLabel l <+> colon <+> ppTerm q 0 t | (l,t) <- xs]))
+ppTerm q d (Typed e t) = char '<' <> ppTerm q 0 e <+> colon <+> ppTerm q 0 t <> char '>'
+
+ppTermTabular :: TermPrintQual -> Term -> [(Doc,Doc)]
+ppTermTabular q = pr where
+ pr t = case t of
+ R rs ->
+ [(ppLabel lab <+> char '.' <+> path, str) | (lab,(_,val)) <- rs, (path,str) <- pr val]
+ T _ cs ->
+ [(ppPatt q 0 patt <+> text "=>" <+> path, str) | (patt, val ) <- cs, (path,str) <- pr val]
+ V _ cs ->
+ [(char '#' <> int i <+> text "=>" <+> path, str) | (i, val ) <- zip [0..] cs, (path,str) <- pr val]
+ _ -> [(empty,ps t)]
+ ps t = case t of
+ K s -> text s
+ C s u -> ps s <+> ps u
+ FV ts -> hsep (intersperse (char '/') (map ps ts))
+ _ -> ppTerm q 0 t
+
+ppEquation q (ps,e) = hcat (map (ppPatt q 2) ps) <+> text "->" <+> ppTerm q 0 e
+
+ppCase q (p,e) = ppPatt q 0 p <+> text "=>" <+> ppTerm q 0 e
+
+ppPatt q d (PAlt p1 p2) = prec d 0 (ppPatt q 0 p1 <+> char '|' <+> ppPatt q 1 p2)
+ppPatt q d (PSeq p1 p2) = prec d 0 (ppPatt q 0 p1 <+> char '+' <+> ppPatt q 1 p2)
+ppPatt q d (PC f ps) = if null ps
+ then ppIdent f
+ else prec d 1 (ppIdent f <+> hsep (map (ppPatt q 2) ps))
+ppPatt q d (PP f g ps) = if null ps
+ then ppQIdent q f g
+ else prec d 1 (ppQIdent q f g <+> hsep (map (ppPatt q 2) ps))
+ppPatt q d (PRep p) = prec d 1 (ppPatt q 2 p <> char '*')
+ppPatt q d (PAs f p) = prec d 1 (ppIdent f <> char '@' <> ppPatt q 2 p)
+ppPatt q d (PNeg p) = prec d 1 (char '-' <> ppPatt q 2 p)
+ppPatt q d (PChar) = char '?'
+ppPatt q d (PChars s) = brackets (str s)
+ppPatt q d (PMacro id) = char '#' <> ppIdent id
+ppPatt q d (PM m id) = char '#' <> ppIdent m <> char '.' <> ppIdent id
+ppPatt q d PW = char '_'
+ppPatt q d (PV id) = ppIdent id
+ppPatt q d (PInt n) = integer n
+ppPatt q d (PFloat f) = double f
+ppPatt q d (PString s) = str s
+ppPatt q d (PR xs) = braces (hsep (punctuate semi [ppLabel l <+> equals <+> ppPatt q 0 e | (l,e) <- xs]))
+
+ppValue :: TermPrintQual -> Int -> Val -> Doc
+ppValue q d (VGen i x) = ppIdent x <> text "{-" <> int i <> text "-}" ---- latter part for debugging
+ppValue q d (VApp u v) = prec d 4 (ppValue q 4 u <+> ppValue q 5 v)
+ppValue q d (VCn (_,c)) = ppIdent c
+ppValue q d (VClos env e) = case e of
+ Meta _ -> ppTerm q d e <> ppEnv env
+ _ -> ppTerm q d e ---- ++ prEnv env ---- for debugging
+ppValue q d (VRecType xs) = braces (hsep (punctuate comma [ppLabel l <> char '=' <> ppValue q 0 v | (l,v) <- xs]))
+ppValue q d VType = text "Type"
+
+ppConstrs :: Constraints -> [Doc]
+ppConstrs = map (\(v,w) -> braces (ppValue Unqualified 0 v <+> text "<>" <+> ppValue Unqualified 0 w))
+
+ppEnv :: Env -> Doc
+ppEnv e = hcat (map (\(x,t) -> braces (ppIdent x <> text ":=" <> ppValue Unqualified 0 t)) e)
+
+str s = doubleQuotes (text s)
+
+ppDecl q (_,id,typ)
+ | id == identW = ppTerm q 4 typ
+ | otherwise = parens (ppIdent id <+> colon <+> ppTerm q 0 typ)
+
+ppDDecl q (_,id,typ)
+ | id == identW = ppTerm q 6 typ
+ | otherwise = parens (ppIdent id <+> colon <+> ppTerm q 0 typ)
+
+ppIdent = text . showIdent
+
+ppQIdent q m id =
+ case q of
+ Qualified -> ppIdent m <> char '.' <> ppIdent id
+ Unqualified -> ppIdent id
+
+ppLabel = ppIdent . label2ident
+
+ppOpenSpec (OSimple id) = ppIdent id
+ppOpenSpec (OQualif id n) = parens (ppIdent id <+> equals <+> ppIdent n)
+
+ppInstSpec (id,n) = parens (ppIdent id <+> equals <+> ppIdent n)
+
+ppLocDef q (id, (mbt, e)) =
+ ppIdent id <+>
+ (case mbt of {Just t -> colon <+> ppTerm q 0 t; Nothing -> empty} <+> equals <+> ppTerm q 0 e) <+> semi
+
+ppBind (Explicit,v) = ppIdent v
+ppBind (Implicit,v) = braces (ppIdent v)
+
+ppAltern q (x,y) = ppTerm q 0 x <+> char '/' <+> ppTerm q 0 y
+
+ppParam q (id,cxt) = ppIdent id <+> hsep (map (ppDDecl q) cxt)
+
+commaPunct f ds = (hcat (punctuate comma (map f ds)))
+
+prec d1 d2 doc
+ | d1 > d2 = parens doc
+ | otherwise = doc
+
+getAbs :: Term -> ([(BindType,Ident)], Term)
+getAbs (Abs bt v e) = let (xs,e') = getAbs e
+ in ((bt,v):xs,e')
+getAbs e = ([],e)
+
+getCTable :: Term -> ([Ident], Term)
+getCTable (T TRaw [(PV v,e)]) = let (vs,e') = getCTable e
+ in (v:vs,e')
+getCTable (T TRaw [(PW, e)]) = let (vs,e') = getCTable e
+ in (identW:vs,e')
+getCTable e = ([],e)
+
+getLet :: Term -> ([LocalDef], Term)
+getLet (Let l e) = let (ls,e') = getLet e
+ in (l:ls,e')
+getLet e = ([],e)
+
+showTerm :: TermPrintStyle -> TermPrintQual -> Term -> String
+showTerm style q t = render $
+ case style of
+ TermPrintTable -> vcat [p <+> s | (p,s) <- ppTermTabular q t]
+ TermPrintAll -> vcat [ s | (p,s) <- ppTermTabular q t]
+ TermPrintDefault -> ppTerm q 0 t
+
+data TermPrintStyle
+ = TermPrintTable
+ | TermPrintAll
+ | TermPrintDefault
diff --git a/src/compiler/GF/Grammar/Unify.hs b/src/compiler/GF/Grammar/Unify.hs new file mode 100644 index 000000000..9bb49cfe2 --- /dev/null +++ b/src/compiler/GF/Grammar/Unify.hs @@ -0,0 +1,97 @@ +---------------------------------------------------------------------- +-- | +-- Module : Unify +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:31 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.4 $ +-- +-- (c) Petri Mäenpää & Aarne Ranta, 1998--2001 +-- +-- brute-force adaptation of the old-GF program AR 21\/12\/2001 --- +-- the only use is in 'TypeCheck.splitConstraints' +----------------------------------------------------------------------------- + +module GF.Grammar.Unify (unifyVal) where + +import GF.Grammar +import GF.Data.Operations + +import Text.PrettyPrint +import Data.List (partition) + +unifyVal :: Constraints -> Err (Constraints,MetaSubst) +unifyVal cs0 = do + let (cs1,cs2) = partition notSolvable cs0 + let (us,vs) = unzip cs2 + us' <- mapM val2exp us + vs' <- mapM val2exp vs + let (ms,cs) = unifyAll (zip us' vs') [] + return (cs1 ++ [(VClos [] t, VClos [] u) | (t,u) <- cs], + [(m, VClos [] t) | (m,t) <- ms]) + where + notSolvable (v,w) = case (v,w) of -- don't consider nonempty closures + (VClos (_:_) _,_) -> True + (_,VClos (_:_) _) -> True + _ -> False + +type Unifier = [(MetaId, Term)] +type Constrs = [(Term, Term)] + +unifyAll :: Constrs -> Unifier -> (Unifier,Constrs) +unifyAll [] g = (g, []) +unifyAll ((a@(s, t)) : l) g = + let (g1, c) = unifyAll l g + in case unify s t g1 of + Ok g2 -> (g2, c) + _ -> (g1, a : c) + +unify :: Term -> Term -> Unifier -> Err Unifier +unify e1 e2 g = + case (e1, e2) of + (Meta s, t) -> do + tg <- subst_all g t + let sg = maybe e1 id (lookup s g) + if (sg == Meta s) then extend g s tg else unify sg tg g + (t, Meta s) -> unify e2 e1 g + (Q _ a, Q _ b) | (a == b) -> return g ---- qualif? + (QC _ a, QC _ b) | (a == b) -> return g ---- + (Vr x, Vr y) | (x == y) -> return g + (Abs _ x b, Abs _ y c) -> do let c' = substTerm [x] [(y,Vr x)] c + unify b c' g + (App c a, App d b) -> case unify c d g of + Ok g1 -> unify a b g1 + _ -> Bad (render (text "fail unify" <+> ppTerm Unqualified 0 e1)) + (RecType xs,RecType ys) | xs == ys -> return g + _ -> Bad (render (text "fail unify" <+> ppTerm Unqualified 0 e1)) + +extend :: Unifier -> MetaId -> Term -> Err Unifier +extend g s t | (t == Meta s) = return g + | occCheck s t = Bad (render (text "occurs check" <+> ppTerm Unqualified 0 t)) + | True = return ((s, t) : g) + +subst_all :: Unifier -> Term -> Err Term +subst_all s u = + case (s,u) of + ([], t) -> return t + (a : l, t) -> do + t' <- (subst_all l t) --- successive substs - why ? + return $ substMetas [a] t' + +substMetas :: [(MetaId,Term)] -> Term -> Term +substMetas subst trm = case trm of + Meta x -> case lookup x subst of + Just t -> t + _ -> trm + _ -> composSafeOp (substMetas subst) trm + +occCheck :: MetaId -> Term -> Bool +occCheck s u = case u of + Meta v -> s == v + App c a -> occCheck s c || occCheck s a + Abs _ x b -> occCheck s b + _ -> False + diff --git a/src/compiler/GF/Grammar/Values.hs b/src/compiler/GF/Grammar/Values.hs new file mode 100644 index 000000000..1a68ddc89 --- /dev/null +++ b/src/compiler/GF/Grammar/Values.hs @@ -0,0 +1,96 @@ +---------------------------------------------------------------------- +-- | +-- Module : Values +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:32 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.7 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Grammar.Values (-- * values used in TC type checking + Exp, Val(..), Env, + -- * annotated tree used in editing +--Z Tree, TrNode(..), Atom(..), + Binds, Constraints, MetaSubst, + -- * for TC + valAbsInt, valAbsFloat, valAbsString, vType, + isPredefCat, + eType, +--Z tree2exp, loc2treeFocus + ) where + +import GF.Data.Operations +---Z import GF.Data.Zipper + +import GF.Infra.Ident +import GF.Grammar.Grammar +import GF.Grammar.Predef + +-- values used in TC type checking + +type Exp = Term + +data Val = VGen Int Ident | VApp Val Val | VCn QIdent | VRecType [(Label,Val)] | VType | VClos Env Exp + deriving (Eq,Show) + +type Env = [(Ident,Val)] + +{- +-- annotated tree used in editing + +type Tree = Tr TrNode + +newtype TrNode = N (Binds,Atom,Val,(Constraints,MetaSubst),Bool) + deriving (Eq,Show) + +data Atom = + AtC Fun | AtM MetaId | AtV Ident | AtL String | AtI Integer | AtF Double + deriving (Eq,Show) +-} +type Binds = [(Ident,Val)] +type Constraints = [(Val,Val)] +type MetaSubst = [(MetaId,Val)] + + +-- for TC + +valAbsInt :: Val +valAbsInt = VCn (cPredefAbs, cInt) + +valAbsFloat :: Val +valAbsFloat = VCn (cPredefAbs, cFloat) + +valAbsString :: Val +valAbsString = VCn (cPredefAbs, cString) + +vType :: Val +vType = VType + +eType :: Exp +eType = Sort cType + +{- +tree2exp :: Tree -> Exp +tree2exp (Tr (N (bi,at,_,_,_),ts)) = foldr Abs (foldl App at' ts') bi' where + at' = case at of + AtC (m,c) -> Q m c + AtV i -> Vr i + AtM m -> Meta m + AtL s -> K s + AtI s -> EInt s + AtF s -> EFloat s + bi' = map fst bi + ts' = map tree2exp ts + +loc2treeFocus :: Loc TrNode -> Tree +loc2treeFocus (Loc (Tr (a,ts),p)) = + loc2tree (Loc (Tr (mark a, map (mapTr nomark) ts), mapPath nomark p)) + where + (mark, nomark) = (\(N (a,b,c,d,_)) -> N(a,b,c,d,True), + \(N (a,b,c,d,_)) -> N(a,b,c,d,False)) +-} diff --git a/src/compiler/GF/Infra/CheckM.hs b/src/compiler/GF/Infra/CheckM.hs new file mode 100644 index 000000000..8a1b42cdf --- /dev/null +++ b/src/compiler/GF/Infra/CheckM.hs @@ -0,0 +1,77 @@ +---------------------------------------------------------------------- +-- | +-- Module : CheckM +-- Maintainer : (Maintainer) +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:33 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Infra.CheckM + (Check, Message, runCheck, + checkError, checkCond, checkWarn, + checkErr, checkIn, checkMap + ) where + +import GF.Data.Operations +import GF.Infra.Ident +import GF.Grammar.Grammar +import GF.Grammar.Printer + +import qualified Data.Map as Map +import Text.PrettyPrint + +type Message = Doc +data CheckResult a + = Fail [Message] + | Success a [Message] +newtype Check a = Check {unCheck :: Context -> [Message] -> CheckResult a} + +instance Monad Check where + return x = Check (\ctxt msgs -> Success x msgs) + f >>= g = Check (\ctxt msgs -> case unCheck f ctxt msgs of + Success x msgs -> unCheck (g x) ctxt msgs + Fail msgs -> Fail msgs) + +instance ErrorMonad Check where + raise s = checkError (text s) + handle f h = Check (\ctxt msgs -> case unCheck f ctxt msgs of + Success x msgs -> Success x msgs + Fail (msg:msgs) -> unCheck (h (render msg)) ctxt msgs) + +checkError :: Message -> Check a +checkError msg = Check (\ctxt msgs -> Fail (msg : msgs)) + +checkCond :: Message -> Bool -> Check () +checkCond s b = if b then return () else checkError s + +-- | warnings should be reversed in the end +checkWarn :: Message -> Check () +checkWarn msg = Check (\ctxt msgs -> Success () ((text "Warning:" <+> msg) : msgs)) + +runCheck :: Check a -> Err (a,String) +runCheck c = + case unCheck c [] [] of + Fail msgs -> Bad ( render (vcat (reverse msgs))) + Success v msgs -> Ok (v, render (vcat (reverse msgs))) + +checkMap :: (Ord a) => (a -> b -> Check b) -> Map.Map a b -> Check (Map.Map a b) +checkMap f map = do xs <- mapM (\(k,v) -> do v <- f k v + return (k,v)) (Map.toList map) + return (Map.fromAscList xs) + +checkErr :: Err a -> Check a +checkErr (Ok x) = return x +checkErr (Bad err) = checkError (text err) + +checkIn :: Doc -> Check a -> Check a +checkIn msg c = Check $ \ctxt msgs -> + case unCheck c ctxt [] of + Fail msgs' -> Fail ((msg $$ nest 3 (vcat (reverse msgs'))) : msgs) + Success v msgs' | null msgs' -> Success v msgs + | otherwise -> Success v ((msg $$ nest 3 (vcat (reverse msgs'))) : msgs) diff --git a/src/compiler/GF/Infra/CompactPrint.hs b/src/compiler/GF/Infra/CompactPrint.hs new file mode 100644 index 000000000..486c9e183 --- /dev/null +++ b/src/compiler/GF/Infra/CompactPrint.hs @@ -0,0 +1,22 @@ +module GF.Infra.CompactPrint where +import Data.Char + +compactPrint = compactPrintCustom keywordGF (const False) + +compactPrintGFCC = compactPrintCustom (const False) keywordGFCC + +compactPrintCustom pre post = dps . concat . map (spaceIf pre post) . words + +dps = dropWhile isSpace + +spaceIf pre post w = case w of + _ | pre w -> "\n" ++ w + _ | post w -> w ++ "\n" + c:_ | isAlpha c || isDigit c -> " " ++ w + '_':_ -> " " ++ w + _ -> w + +keywordGF w = elem w ["cat","fun","lin","lincat","lindef","oper","param"] +keywordGFCC w = + last w == ';' || + elem w ["flags","fun","cat","lin","oper","lincat","lindef","printname","param"] diff --git a/src/compiler/GF/Infra/Dependencies.hs b/src/compiler/GF/Infra/Dependencies.hs new file mode 100644 index 000000000..af2088711 --- /dev/null +++ b/src/compiler/GF/Infra/Dependencies.hs @@ -0,0 +1,61 @@ +module GF.Infra.Dependencies ( + depGraph + ) where + +import GF.Grammar.Grammar +import GF.Infra.Modules +import GF.Infra.Ident + +depGraph :: SourceGrammar -> String +depGraph = prDepGraph . grammar2moddeps + +prDepGraph :: [(Ident,ModDeps)] -> String +prDepGraph deps = unlines $ [ + "digraph {" + ] ++ + map mkNode deps ++ + concatMap mkArrows deps ++ [ + "}" + ] + where + mkNode (i,dep) = unwords [showIdent i, "[",nodeAttr (modtype dep),"]"] + nodeAttr ty = case ty of + MTAbstract -> "style = \"solid\", shape = \"box\"" + MTConcrete _ -> "style = \"solid\", shape = \"ellipse\"" + _ -> "style = \"dashed\", shape = \"ellipse\"" + mkArrows (i,dep) = + [unwords [showIdent i,"->",showIdent j,"[",arrowAttr "of","]"] | j <- ofs dep] ++ + [unwords [showIdent i,"->",showIdent j,"[",arrowAttr "ex","]"] | j <- extendeds dep] ++ + [unwords [showIdent i,"->",showIdent j,"[",arrowAttr "op","]"] | j <- openeds dep] ++ + [unwords [showIdent i,"->",showIdent j,"[",arrowAttr "ed","]"] | j <- extrads dep] + arrowAttr s = case s of + "of" -> "style = \"solid\", arrowhead = \"empty\"" + "ex" -> "style = \"solid\"" + "op" -> "style = \"dashed\"" + "ed" -> "style = \"dotted\"" + +data ModDeps = ModDeps { + modtype :: ModuleType Ident, + ofs :: [Ident], + extendeds :: [Ident], + openeds :: [Ident], + extrads :: [Ident], + functors :: [Ident], + interfaces :: [Ident], + instances :: [Ident] + } + +noModDeps = ModDeps MTAbstract [] [] [] [] [] [] [] + +grammar2moddeps :: SourceGrammar -> [(Ident,ModDeps)] +grammar2moddeps gr = [(i,depMod m) | (i,m) <- modules gr] where + depMod m = noModDeps{ + modtype = mtype m, + ofs = case mtype m of + MTConcrete i -> [i] + MTInstance i -> [i] + _ -> [], + extendeds = map fst (extend m), + openeds = map openedModule (opens m), + extrads = mexdeps m + } diff --git a/src/compiler/GF/Infra/GetOpt.hs b/src/compiler/GF/Infra/GetOpt.hs new file mode 100644 index 000000000..ede561c90 --- /dev/null +++ b/src/compiler/GF/Infra/GetOpt.hs @@ -0,0 +1,381 @@ +-- This is a version of System.Console.GetOpt which has been hacked to +-- support long options with a single dash. Since we don't want the annoying +-- clash with short options that start with the same character as a long +-- one, we don't allow short options to be given together (e.g. -zxf), +-- nor do we allow options to be given as any unique prefix. + +----------------------------------------------------------------------------- +-- | +-- Module : System.Console.GetOpt +-- Copyright : (c) Sven Panne 2002-2005 +-- License : BSD-style (see the file libraries/base/LICENSE) +-- +-- Maintainer : libraries@haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- This library provides facilities for parsing the command-line options +-- in a standalone program. It is essentially a Haskell port of the GNU +-- @getopt@ library. +-- +----------------------------------------------------------------------------- + +{- +Sven Panne <Sven.Panne@informatik.uni-muenchen.de> Oct. 1996 (small +changes Dec. 1997) + +Two rather obscure features are missing: The Bash 2.0 non-option hack +(if you don't already know it, you probably don't want to hear about +it...) and the recognition of long options with a single dash +(e.g. '-help' is recognised as '--help', as long as there is no short +option 'h'). + +Other differences between GNU's getopt and this implementation: + +* To enforce a coherent description of options and arguments, there + are explanation fields in the option/argument descriptor. + +* Error messages are now more informative, but no longer POSIX + compliant... :-( + +And a final Haskell advertisement: The GNU C implementation uses well +over 1100 lines, we need only 195 here, including a 46 line example! +:-) +-} + +--module System.Console.GetOpt ( +module GF.Infra.GetOpt ( + -- * GetOpt + getOpt, getOpt', + usageInfo, + ArgOrder(..), + OptDescr(..), + ArgDescr(..), + + -- * Examples + + -- |To hopefully illuminate the role of the different data structures, + -- here are the command-line options for a (very simple) compiler, + -- done in two different ways. + -- The difference arises because the type of 'getOpt' is + -- parameterized by the type of values derived from flags. + + -- ** Interpreting flags as concrete values + -- $example1 + + -- ** Interpreting flags as transformations of an options record + -- $example2 +) where + +import Prelude -- necessary to get dependencies right + +import Data.List ( isPrefixOf, find ) + +-- |What to do with options following non-options +data ArgOrder a + = RequireOrder -- ^ no option processing after first non-option + | Permute -- ^ freely intersperse options and non-options + | ReturnInOrder (String -> a) -- ^ wrap non-options into options + +{-| +Each 'OptDescr' describes a single option. + +The arguments to 'Option' are: + +* list of short option characters + +* list of long option strings (without \"--\") + +* argument descriptor + +* explanation of option for user +-} +data OptDescr a = -- description of a single options: + Option [Char] -- list of short option characters + [String] -- list of long option strings (without "--") + (ArgDescr a) -- argument descriptor + String -- explanation of option for user + +-- |Describes whether an option takes an argument or not, and if so +-- how the argument is injected into a value of type @a@. +data ArgDescr a + = NoArg a -- ^ no argument expected + | ReqArg (String -> a) String -- ^ option requires argument + | OptArg (Maybe String -> a) String -- ^ optional argument + +data OptKind a -- kind of cmd line arg (internal use only): + = Opt a -- an option + | UnreqOpt String -- an un-recognized option + | NonOpt String -- a non-option + | EndOfOpts -- end-of-options marker (i.e. "--") + | OptErr String -- something went wrong... + +-- | Return a string describing the usage of a command, derived from +-- the header (first argument) and the options described by the +-- second argument. +usageInfo :: String -- header + -> [OptDescr a] -- option descriptors + -> String -- nicely formatted decription of options +usageInfo header optDescr = unlines (header:table) + where (ss,ls,ds) = (unzip3 . concatMap fmtOpt) optDescr + table = zipWith3 paste (sameLen ss) (sameLen ls) ds + paste x y z = " " ++ x ++ " " ++ y ++ " " ++ z + sameLen xs = flushLeft ((maximum . map length) xs) xs + flushLeft n xs = [ take n (x ++ repeat ' ') | x <- xs ] + +fmtOpt :: OptDescr a -> [(String,String,String)] +fmtOpt (Option sos los ad descr) = + case lines descr of + [] -> [(sosFmt,losFmt,"")] + (d:ds) -> (sosFmt,losFmt,d) : [ ("","",d') | d' <- ds ] + where sepBy _ [] = "" + sepBy _ [x] = x + sepBy ch (x:xs) = x ++ ch:' ':sepBy ch xs + sosFmt = sepBy ',' (map (fmtShort ad) sos) + losFmt = sepBy ',' (map (fmtLong ad) los) + +fmtShort :: ArgDescr a -> Char -> String +fmtShort (NoArg _ ) so = "-" ++ [so] +fmtShort (ReqArg _ ad) so = "-" ++ [so] ++ " " ++ ad +fmtShort (OptArg _ ad) so = "-" ++ [so] ++ "[" ++ ad ++ "]" + +fmtLong :: ArgDescr a -> String -> String +fmtLong (NoArg _ ) lo = "--" ++ lo +fmtLong (ReqArg _ ad) lo = "--" ++ lo ++ "=" ++ ad +fmtLong (OptArg _ ad) lo = "--" ++ lo ++ "[=" ++ ad ++ "]" + +{-| +Process the command-line, and return the list of values that matched +(and those that didn\'t). The arguments are: + +* The order requirements (see 'ArgOrder') + +* The option descriptions (see 'OptDescr') + +* The actual command line arguments (presumably got from + 'System.Environment.getArgs'). + +'getOpt' returns a triple consisting of the option arguments, a list +of non-options, and a list of error messages. +-} +getOpt :: ArgOrder a -- non-option handling + -> [OptDescr a] -- option descriptors + -> [String] -- the command-line arguments + -> ([a],[String],[String]) -- (options,non-options,error messages) +getOpt ordering optDescr args = (os,xs,es ++ map errUnrec us) + where (os,xs,us,es) = getOpt' ordering optDescr args + +{-| +This is almost the same as 'getOpt', but returns a quadruple +consisting of the option arguments, a list of non-options, a list of +unrecognized options, and a list of error messages. +-} +getOpt' :: ArgOrder a -- non-option handling + -> [OptDescr a] -- option descriptors + -> [String] -- the command-line arguments + -> ([a],[String], [String] ,[String]) -- (options,non-options,unrecognized,error messages) +getOpt' _ _ [] = ([],[],[],[]) +getOpt' ordering optDescr (arg:args) = procNextOpt opt ordering + where procNextOpt (Opt o) _ = (o:os,xs,us,es) + procNextOpt (UnreqOpt u) _ = (os,xs,u:us,es) + procNextOpt (NonOpt x) RequireOrder = ([],x:rest,[],[]) + procNextOpt (NonOpt x) Permute = (os,x:xs,us,es) + procNextOpt (NonOpt x) (ReturnInOrder f) = (f x :os, xs,us,es) + procNextOpt EndOfOpts RequireOrder = ([],rest,[],[]) + procNextOpt EndOfOpts Permute = ([],rest,[],[]) + procNextOpt EndOfOpts (ReturnInOrder f) = (map f rest,[],[],[]) + procNextOpt (OptErr e) _ = (os,xs,us,e:es) + + (opt,rest) = getNext arg args optDescr + (os,xs,us,es) = getOpt' ordering optDescr rest + +-- take a look at the next cmd line arg and decide what to do with it +getNext :: String -> [String] -> [OptDescr a] -> (OptKind a,[String]) +getNext ('-':'-':[]) rest _ = (EndOfOpts,rest) +getNext ('-':'-':xs) rest optDescr = longOpt xs rest optDescr +getNext ('-' :xs) rest optDescr = longOpt xs rest optDescr +getNext a rest _ = (NonOpt a,rest) + +-- handle long option +longOpt :: String -> [String] -> [OptDescr a] -> (OptKind a,[String]) +longOpt ls rs optDescr = long ads arg rs + where (opt,arg) = break (=='=') ls + options = [ o | o@(Option ss xs _ _) <- optDescr + , opt `elem` map (:[]) ss || opt `elem` xs ] + ads = [ ad | Option _ _ ad _ <- options ] + optStr = ("--"++opt) + + long (_:_:_) _ rest = (errAmbig options optStr,rest) + long [NoArg a ] [] rest = (Opt a,rest) + long [NoArg _ ] ('=':_) rest = (errNoArg optStr,rest) + long [ReqArg _ d] [] [] = (errReq d optStr,[]) + long [ReqArg f _] [] (r:rest) = (Opt (f r),rest) + long [ReqArg f _] ('=':xs) rest = (Opt (f xs),rest) + long [OptArg f _] [] rest = (Opt (f Nothing),rest) + long [OptArg f _] ('=':xs) rest = (Opt (f (Just xs)),rest) + long _ _ rest = (UnreqOpt ("--"++ls),rest) + + +-- miscellaneous error formatting + +errAmbig :: [OptDescr a] -> String -> OptKind a +errAmbig ods optStr = OptErr (usageInfo header ods) + where header = "option `" ++ optStr ++ "' is ambiguous; could be one of:" + +errReq :: String -> String -> OptKind a +errReq d optStr = OptErr ("option `" ++ optStr ++ "' requires an argument " ++ d ++ "\n") + +errUnrec :: String -> String +errUnrec optStr = "unrecognized option `" ++ optStr ++ "'\n" + +errNoArg :: String -> OptKind a +errNoArg optStr = OptErr ("option `" ++ optStr ++ "' doesn't allow an argument\n") + +{- +----------------------------------------------------------------------------------------- +-- and here a small and hopefully enlightening example: + +data Flag = Verbose | Version | Name String | Output String | Arg String deriving Show + +options :: [OptDescr Flag] +options = + [Option ['v'] ["verbose"] (NoArg Verbose) "verbosely list files", + Option ['V','?'] ["version","release"] (NoArg Version) "show version info", + Option ['o'] ["output"] (OptArg out "FILE") "use FILE for dump", + Option ['n'] ["name"] (ReqArg Name "USER") "only dump USER's files"] + +out :: Maybe String -> Flag +out Nothing = Output "stdout" +out (Just o) = Output o + +test :: ArgOrder Flag -> [String] -> String +test order cmdline = case getOpt order options cmdline of + (o,n,[] ) -> "options=" ++ show o ++ " args=" ++ show n ++ "\n" + (_,_,errs) -> concat errs ++ usageInfo header options + where header = "Usage: foobar [OPTION...] files..." + +-- example runs: +-- putStr (test RequireOrder ["foo","-v"]) +-- ==> options=[] args=["foo", "-v"] +-- putStr (test Permute ["foo","-v"]) +-- ==> options=[Verbose] args=["foo"] +-- putStr (test (ReturnInOrder Arg) ["foo","-v"]) +-- ==> options=[Arg "foo", Verbose] args=[] +-- putStr (test Permute ["foo","--","-v"]) +-- ==> options=[] args=["foo", "-v"] +-- putStr (test Permute ["-?o","--name","bar","--na=baz"]) +-- ==> options=[Version, Output "stdout", Name "bar", Name "baz"] args=[] +-- putStr (test Permute ["--ver","foo"]) +-- ==> option `--ver' is ambiguous; could be one of: +-- -v --verbose verbosely list files +-- -V, -? --version, --release show version info +-- Usage: foobar [OPTION...] files... +-- -v --verbose verbosely list files +-- -V, -? --version, --release show version info +-- -o[FILE] --output[=FILE] use FILE for dump +-- -n USER --name=USER only dump USER's files +----------------------------------------------------------------------------------------- +-} + +{- $example1 + +A simple choice for the type associated with flags is to define a type +@Flag@ as an algebraic type representing the possible flags and their +arguments: + +> module Opts1 where +> +> import System.Console.GetOpt +> import Data.Maybe ( fromMaybe ) +> +> data Flag +> = Verbose | Version +> | Input String | Output String | LibDir String +> deriving Show +> +> options :: [OptDescr Flag] +> options = +> [ Option ['v'] ["verbose"] (NoArg Verbose) "chatty output on stderr" +> , Option ['V','?'] ["version"] (NoArg Version) "show version number" +> , Option ['o'] ["output"] (OptArg outp "FILE") "output FILE" +> , Option ['c'] [] (OptArg inp "FILE") "input FILE" +> , Option ['L'] ["libdir"] (ReqArg LibDir "DIR") "library directory" +> ] +> +> inp,outp :: Maybe String -> Flag +> outp = Output . fromMaybe "stdout" +> inp = Input . fromMaybe "stdin" +> +> compilerOpts :: [String] -> IO ([Flag], [String]) +> compilerOpts argv = +> case getOpt Permute options argv of +> (o,n,[] ) -> return (o,n) +> (_,_,errs) -> ioError (userError (concat errs ++ usageInfo header options)) +> where header = "Usage: ic [OPTION...] files..." + +Then the rest of the program will use the constructed list of flags +to determine it\'s behaviour. + +-} + +{- $example2 + +A different approach is to group the option values in a record of type +@Options@, and have each flag yield a function of type +@Options -> Options@ transforming this record. + +> module Opts2 where +> +> import System.Console.GetOpt +> import Data.Maybe ( fromMaybe ) +> +> data Options = Options +> { optVerbose :: Bool +> , optShowVersion :: Bool +> , optOutput :: Maybe FilePath +> , optInput :: Maybe FilePath +> , optLibDirs :: [FilePath] +> } deriving Show +> +> defaultOptions = Options +> { optVerbose = False +> , optShowVersion = False +> , optOutput = Nothing +> , optInput = Nothing +> , optLibDirs = [] +> } +> +> options :: [OptDescr (Options -> Options)] +> options = +> [ Option ['v'] ["verbose"] +> (NoArg (\ opts -> opts { optVerbose = True })) +> "chatty output on stderr" +> , Option ['V','?'] ["version"] +> (NoArg (\ opts -> opts { optShowVersion = True })) +> "show version number" +> , Option ['o'] ["output"] +> (OptArg ((\ f opts -> opts { optOutput = Just f }) . fromMaybe "output") +> "FILE") +> "output FILE" +> , Option ['c'] [] +> (OptArg ((\ f opts -> opts { optInput = Just f }) . fromMaybe "input") +> "FILE") +> "input FILE" +> , Option ['L'] ["libdir"] +> (ReqArg (\ d opts -> opts { optLibDirs = optLibDirs opts ++ [d] }) "DIR") +> "library directory" +> ] +> +> compilerOpts :: [String] -> IO (Options, [String]) +> compilerOpts argv = +> case getOpt Permute options argv of +> (o,n,[] ) -> return (foldl (flip id) defaultOptions o, n) +> (_,_,errs) -> ioError (userError (concat errs ++ usageInfo header options)) +> where header = "Usage: ic [OPTION...] files..." + +Similarly, each flag could yield a monadic function transforming a record, +of type @Options -> IO Options@ (or any other monad), allowing option +processing to perform actions of the chosen monad, e.g. printing help or +version messages, checking that file arguments exist, etc. + +-} diff --git a/src/compiler/GF/Infra/Ident.hs b/src/compiler/GF/Infra/Ident.hs new file mode 100644 index 000000000..efe6f9261 --- /dev/null +++ b/src/compiler/GF/Infra/Ident.hs @@ -0,0 +1,152 @@ +---------------------------------------------------------------------- +-- | +-- Module : Ident +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/15 11:43:33 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.8 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Infra.Ident (-- * Identifiers + Ident(..), ident2bs, showIdent, + identC, identV, identA, identAV, identW, + argIdent, varStr, varX, isWildIdent, varIndex, + -- * refreshing identifiers + IdState, initIdStateN, initIdState, + lookVar, refVar, refVarPlus + ) where + +import GF.Data.Operations +import qualified Data.ByteString.Char8 as BS +-- import Monad + + +-- | the constructors labelled /INTERNAL/ are +-- internal representation never returned by the parser +data Ident = + IC {-# UNPACK #-} !BS.ByteString -- ^ raw identifier after parsing, resolved in Rename + | IW -- ^ wildcard +-- +-- below this constructor: internal representation never returned by the parser + | IV {-# UNPACK #-} !BS.ByteString {-# UNPACK #-} !Int -- ^ /INTERNAL/ variable + | IA {-# UNPACK #-} !BS.ByteString {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat at position + | IAV {-# UNPACK #-} !BS.ByteString {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat with bindings at position +-- + + deriving (Eq, Ord, Show, Read) + +ident2bs :: Ident -> BS.ByteString +ident2bs i = case i of + IC s -> s + IV s n -> BS.append s (BS.pack ('_':show n)) + IA s j -> BS.append s (BS.pack ('_':show j)) + IAV s b j -> BS.append s (BS.pack ('_':show b ++ '_':show j)) + IW -> BS.pack "_" + +showIdent :: Ident -> String +showIdent i = BS.unpack $! ident2bs i + +identC :: BS.ByteString -> Ident +identV :: BS.ByteString -> Int -> Ident +identA :: BS.ByteString -> Int -> Ident +identAV:: BS.ByteString -> Int -> Int -> Ident +identW :: Ident +(identC, identV, identA, identAV, identW) = + (IC, IV, IA, IAV, IW) + +-- normal identifier +-- ident s = IC s + +-- | to mark argument variables +argIdent :: Int -> Ident -> Int -> Ident +argIdent 0 (IC c) i = identA c i +argIdent b (IC c) i = identAV c b i + +-- | used in lin defaults +varStr :: Ident +varStr = identA (BS.pack "str") 0 + +-- | refreshing variables +varX :: Int -> Ident +varX = identV (BS.pack "x") + +isWildIdent :: Ident -> Bool +isWildIdent x = case x of + IW -> True + IC s | s == BS.pack "_" -> True + _ -> False + +varIndex :: Ident -> Int +varIndex (IV _ n) = n +varIndex _ = -1 --- other than IV should not count + +-- refreshing identifiers + +type IdState = ([(Ident,Ident)],Int) + +initIdStateN :: Int -> IdState +initIdStateN i = ([],i) + +initIdState :: IdState +initIdState = initIdStateN 0 + +lookVar :: Ident -> STM IdState Ident +lookVar a@(IA _ _) = return a +lookVar x = do + (sys,_) <- readSTM + stm (\s -> maybe (Bad ("cannot find" +++ show x +++ prParenth (show sys))) + return $ + lookup x sys >>= (\y -> return (y,s))) + +refVar :: Ident -> STM IdState Ident +----refVar IW = return IW --- no update of wildcard +refVar x = do + (_,m) <- readSTM + let x' = IV (ident2bs x) m + updateSTM (\(sys,mx) -> ((x, x'):sys, mx + 1)) + return x' + +refVarPlus :: Ident -> STM IdState Ident +----refVarPlus IW = refVar (identC "h") +refVarPlus x = refVar x + + +{- +------------------------------ +-- to test + +refreshExp :: Exp -> Err Exp +refreshExp e = err Bad (return . fst) (appSTM (refresh e) initState) + +refresh :: Exp -> STM State Exp +refresh e = case e of + Atom x -> lookVar x >>= return . Atom + App f a -> liftM2 App (refresh f) (refresh a) + Abs x b -> liftM2 Abs (refVar x) (refresh b) + Fun xs a b -> do + a' <- refresh a + xs' <- mapM refVar xs + b' <- refresh b + return $ Fun xs' a' b' + +data Exp = + Atom Ident + | App Exp Exp + | Abs Ident Exp + | Fun [Ident] Exp Exp + deriving Show + +exp1 = Abs (IC "y") (Atom (IC "y")) +exp2 = Abs (IC "y") (App (Atom (IC "y")) (Atom (IC "y"))) +exp3 = Abs (IC "y") (Abs (IC "z") (App (Atom (IC "y")) (Atom (IC "z")))) +exp4 = Abs (IC "y") (Abs (IC "y") (App (Atom (IC "y")) (Atom (IC "z")))) +exp5 = Abs (IC "y") (Abs (IC "y") (App (Atom (IC "y")) (Atom (IC "y")))) +exp6 = Abs (IC "y") (Fun [IC "x", IC "y"] (Atom (IC "y")) (Atom (IC "y"))) +exp7 = Abs (IL "8") (Atom (IC "y")) + +-} diff --git a/src/compiler/GF/Infra/Modules.hs b/src/compiler/GF/Infra/Modules.hs new file mode 100644 index 000000000..0710b8f40 --- /dev/null +++ b/src/compiler/GF/Infra/Modules.hs @@ -0,0 +1,349 @@ +---------------------------------------------------------------------- +-- | +-- Module : Modules +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/09 15:14:30 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.26 $ +-- +-- Datastructures and functions for modules, common to GF and GFC. +-- +-- AR 29\/4\/2003 +-- +-- The same structure will be used in both source code and canonical. +-- The parameters tell what kind of data is involved. +-- Invariant: modules are stored in dependency order +----------------------------------------------------------------------------- + +module GF.Infra.Modules ( + MGrammar(..), ModInfo(..), ModuleType(..), + MInclude (..), + extends, isInherited,inheritAll, + updateMGrammar, updateModule, replaceJudgements, addFlag, + addOpenQualif, flagsModule, allFlags, mapModules, + OpenSpec(..), + ModuleStatus(..), + openedModule, depPathModule, allDepsModule, partOfGrammar, + allExtends, allExtendSpecs, allExtendsPlus, allExtensions, + searchPathModule, addModule, + emptyMGrammar, emptyModInfo, + IdentM(..), + abstractOfConcrete, abstractModOfConcrete, + lookupModule, lookupModuleType, lookupInfo, + lookupPosition, ppPosition, + isModAbs, isModRes, isModCnc, + sameMType, isCompilableModule, isCompleteModule, + allAbstracts, greatestAbstract, allResources, + greatestResource, allConcretes, allConcreteModules + ) where + +import GF.Infra.Ident +import GF.Infra.Option +import GF.Data.Operations + +import Data.List +import Text.PrettyPrint + +-- AR 29/4/2003 + +-- The same structure will be used in both source code and canonical. +-- The parameters tell what kind of data is involved. +-- Invariant: modules are stored in dependency order + +newtype MGrammar i a = MGrammar {modules :: [(i,ModInfo i a)]} + deriving Show + +data ModInfo i a = ModInfo { + mtype :: ModuleType i , + mstatus :: ModuleStatus , + flags :: Options, + extend :: [(i,MInclude i)], + mwith :: Maybe (i,MInclude i,[(i,i)]), + opens :: [OpenSpec i] , + mexdeps :: [i] , + jments :: BinTree i a , + positions :: BinTree i (String,(Int,Int)) -- file, first line, last line + } + deriving Show + +-- | encoding the type of the module +data ModuleType i = + MTAbstract + | MTResource + | MTConcrete i + -- ^ up to this, also used in GFC. Below, source only. + | MTInterface + | MTInstance i + deriving (Eq,Ord,Show) + +data MInclude i = MIAll | MIOnly [i] | MIExcept [i] + deriving (Eq,Ord,Show) + +extends :: ModInfo i a -> [i] +extends = map fst . extend + +isInherited :: Eq i => MInclude i -> i -> Bool +isInherited c i = case c of + MIAll -> True + MIOnly is -> elem i is + MIExcept is -> notElem i is + +inheritAll :: i -> (i,MInclude i) +inheritAll i = (i,MIAll) + +-- destructive update + +-- | dep order preserved since old cannot depend on new +updateMGrammar :: Ord i => MGrammar i a -> MGrammar i a -> MGrammar i a +updateMGrammar old new = MGrammar $ + [(i,m) | (i,m) <- os, notElem i (map fst ns)] ++ ns + where + os = modules old + ns = modules new + +updateModule :: Ord i => ModInfo i t -> i -> t -> ModInfo i t +updateModule (ModInfo mt ms fs me mw ops med js ps) i t = ModInfo mt ms fs me mw ops med (updateTree (i,t) js) ps + +replaceJudgements :: ModInfo i t -> BinTree i t -> ModInfo i t +replaceJudgements (ModInfo mt ms fs me mw ops med _ ps) js = ModInfo mt ms fs me mw ops med js ps + +addOpenQualif :: i -> i -> ModInfo i t -> ModInfo i t +addOpenQualif i j (ModInfo mt ms fs me mw ops med js ps) = ModInfo mt ms fs me mw (OQualif i j : ops) med js ps + +addFlag :: Options -> ModInfo i t -> ModInfo i t +addFlag f mo = mo {flags = flags mo `addOptions` f} + +flagsModule :: (i,ModInfo i a) -> Options +flagsModule (_,mi) = flags mi + +allFlags :: MGrammar i a -> Options +allFlags gr = concatOptions [flags m | (_,m) <- modules gr] + +mapModules :: (ModInfo i a -> ModInfo i a) -> MGrammar i a -> MGrammar i a +mapModules f (MGrammar ms) = MGrammar (map (onSnd f) ms) + +data OpenSpec i = + OSimple i + | OQualif i i + deriving (Eq,Ord,Show) + +data ModuleStatus = + MSComplete + | MSIncomplete + deriving (Eq,Ord,Show) + +openedModule :: OpenSpec i -> i +openedModule o = case o of + OSimple m -> m + OQualif _ m -> m + +-- | initial dependency list +depPathModule :: Ord i => ModInfo i a -> [OpenSpec i] +depPathModule m = fors m ++ exts m ++ opens m + where + fors m = + case mtype m of + MTConcrete i -> [OSimple i] + MTInstance i -> [OSimple i] + _ -> [] + exts m = map OSimple (extends m) + +-- | all dependencies +allDepsModule :: Ord i => MGrammar i a -> ModInfo i a -> [OpenSpec i] +allDepsModule gr m = iterFix add os0 where + os0 = depPathModule m + add os = [m | o <- os, Just n <- [lookup (openedModule o) mods], + m <- depPathModule n] + mods = modules gr + +-- | select just those modules that a given one depends on, including itself +partOfGrammar :: Ord i => MGrammar i a -> (i,ModInfo i a) -> MGrammar i a +partOfGrammar gr (i,m) = MGrammar [mo | mo@(j,_) <- mods, elem j modsFor] + where + mods = modules gr + modsFor = (i:) $ map openedModule $ allDepsModule gr m + +-- | all modules that a module extends, directly or indirectly, without restricts +allExtends :: (Show i,Ord i) => MGrammar i a -> i -> [i] +allExtends gr i = + case lookupModule gr i of + Ok m -> case extends m of + [] -> [i] + is -> i : concatMap (allExtends gr) is + _ -> [] + +-- | all modules that a module extends, directly or indirectly, with restricts +allExtendSpecs :: (Show i,Ord i) => MGrammar i a -> i -> [(i,MInclude i)] +allExtendSpecs gr i = + case lookupModule gr i of + Ok m -> case extend m of + [] -> [(i,MIAll)] + is -> (i,MIAll) : concatMap (allExtendSpecs gr . fst) is + _ -> [] + +-- | this plus that an instance extends its interface +allExtendsPlus :: (Show i,Ord i) => MGrammar i a -> i -> [i] +allExtendsPlus gr i = + case lookupModule gr i of + Ok m -> i : concatMap (allExtendsPlus gr) (exts m) + _ -> [] + where + exts m = extends m ++ [j | MTInstance j <- [mtype m]] + +-- | conversely: all modules that extend a given module, incl. instances of interface +allExtensions :: (Show i,Ord i) => MGrammar i a -> i -> [i] +allExtensions gr i = + case lookupModule gr i of + Ok m -> let es = exts i in es ++ concatMap (allExtensions gr) es + _ -> [] + where + exts i = [j | (j,m) <- mods, elem i (extends m) + || elem (MTInstance i) [mtype m]] + mods = modules gr + +-- | initial search path: the nonqualified dependencies +searchPathModule :: Ord i => ModInfo i a -> [i] +searchPathModule m = [i | OSimple i <- depPathModule m] + +-- | a new module can safely be added to the end, since nothing old can depend on it +addModule :: Ord i => + MGrammar i a -> i -> ModInfo i a -> MGrammar i a +addModule gr name mi = MGrammar $ (modules gr ++ [(name,mi)]) + +emptyMGrammar :: MGrammar i a +emptyMGrammar = MGrammar [] + +emptyModInfo :: ModInfo i a +emptyModInfo = ModInfo MTResource MSComplete noOptions [] Nothing [] [] emptyBinTree emptyBinTree + +-- | we store the module type with the identifier +data IdentM i = IdentM { + identM :: i , + typeM :: ModuleType i + } + deriving (Eq,Ord,Show) + +abstractOfConcrete :: (Show i, Eq i) => MGrammar i a -> i -> Err i +abstractOfConcrete gr c = do + n <- lookupModule gr c + case mtype n of + MTConcrete a -> return a + _ -> Bad $ "expected concrete" +++ show c + +abstractModOfConcrete :: (Show i, Eq i) => + MGrammar i a -> i -> Err (ModInfo i a) +abstractModOfConcrete gr c = do + a <- abstractOfConcrete gr c + lookupModule gr a + + +-- the canonical file name + +--- canonFileName s = prt s ++ ".gfc" + +lookupModule :: (Show i,Eq i) => MGrammar i a -> i -> Err (ModInfo i a) +lookupModule gr m = case lookup m (modules gr) of + Just i -> return i + _ -> Bad $ "unknown module" +++ show m + +++ "among" +++ unwords (map (show . fst) (modules gr)) ---- debug + +lookupModuleType :: (Show i,Eq i) => MGrammar i a -> i -> Err (ModuleType i) +lookupModuleType gr m = do + mi <- lookupModule gr m + return $ mtype mi + +lookupInfo :: (Show i, Ord i) => ModInfo i a -> i -> Err a +lookupInfo mo i = lookupTree show i (jments mo) + +lookupPosition :: (Show i, Ord i) => ModInfo i a -> i -> Err (String,(Int,Int)) +lookupPosition mo i = lookupTree show i (positions mo) + +ppPosition :: (Show i, Ord i) => ModInfo i a -> i -> Doc +ppPosition mo i = case lookupPosition mo i of + Ok (f,(b,e)) | b == e -> text "in" <+> text f <> text ", line" <+> int b + | otherwise -> text "in" <+> text f <> text ", lines" <+> int b <> text "-" <> int e + _ -> empty + +isModAbs :: ModInfo i a -> Bool +isModAbs m = case mtype m of + MTAbstract -> True +---- MTUnion t -> isModAbs t + _ -> False + +isModRes :: ModInfo i a -> Bool +isModRes m = case mtype m of + MTResource -> True + MTInterface -> True --- + MTInstance _ -> True + _ -> False + +isModCnc :: ModInfo i a -> Bool +isModCnc m = case mtype m of + MTConcrete _ -> True + _ -> False + +sameMType :: Eq i => ModuleType i -> ModuleType i -> Bool +sameMType m n = case (n,m) of + (MTConcrete _, MTConcrete _) -> True + + (MTInstance _, MTInstance _) -> True + (MTInstance _, MTResource) -> True + (MTInstance _, MTConcrete _) -> True + + (MTInterface, MTInstance _) -> True + (MTInterface, MTResource) -> True -- for reuse + (MTInterface, MTAbstract) -> True -- for reuse + (MTInterface, MTConcrete _) -> True -- for reuse + + (MTResource, MTInstance _) -> True + (MTResource, MTConcrete _) -> True -- for reuse + + _ -> m == n + +-- | don't generate code for interfaces and for incomplete modules +isCompilableModule :: ModInfo i a -> Bool +isCompilableModule m = + case mtype m of + MTInterface -> False + _ -> mstatus m == MSComplete + +-- | interface and "incomplete M" are not complete +isCompleteModule :: (Eq i) => ModInfo i a -> Bool +isCompleteModule m = mstatus m == MSComplete && mtype m /= MTInterface + + +-- | all abstract modules sorted from least to most dependent +allAbstracts :: (Ord i, Show i) => MGrammar i a -> [i] +allAbstracts gr = + case topoTest [(i,extends m) | (i,m) <- modules gr, mtype m == MTAbstract] of + Left is -> is + Right cycles -> error $ "Cyclic abstract modules: " ++ show cycles + +-- | the last abstract in dependency order (head of list) +greatestAbstract :: (Ord i, Show i) => MGrammar i a -> Maybe i +greatestAbstract gr = case allAbstracts gr of + [] -> Nothing + as -> return $ last as + +-- | all resource modules +allResources :: MGrammar i a -> [i] +allResources gr = [i | (i,m) <- modules gr, isModRes m || isModCnc m] + +-- | the greatest resource in dependency order +greatestResource :: MGrammar i a -> Maybe i +greatestResource gr = case allResources gr of + [] -> Nothing + a -> return $ head a ---- why not last as in Abstract? works though AR 24/5/2008 + +-- | all concretes for a given abstract +allConcretes :: Eq i => MGrammar i a -> i -> [i] +allConcretes gr a = + [i | (i, m) <- modules gr, mtype m == MTConcrete a, isCompleteModule m] + +-- | all concrete modules for any abstract +allConcreteModules :: Eq i => MGrammar i a -> [i] +allConcreteModules gr = + [i | (i, m) <- modules gr, MTConcrete _ <- [mtype m], isCompleteModule m] diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs new file mode 100644 index 000000000..dc15d1929 --- /dev/null +++ b/src/compiler/GF/Infra/Option.hs @@ -0,0 +1,609 @@ +module GF.Infra.Option + ( + -- * Option types + Options, + Flags(..), + Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), + SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..), + Dump(..), Printer(..), Recomp(..), BuildParser(..), + -- * Option parsing + parseOptions, parseModuleOptions, fixRelativeLibPaths, + -- * Option pretty-printing + optionsGFO, + optionsPGF, + -- * Option manipulation + addOptions, concatOptions, noOptions, + modifyFlags, + helpMessage, + -- * Checking specific options + flag, cfgTransform, haskellOption, readOutputFormat, + isLexicalCat, encodings, + -- * Setting specific options + setOptimization, setCFGTransform, + -- * Convenience methods for checking options + verbAtLeast, dump + ) where + +import Control.Monad +import Data.Char (toLower) +import Data.List +import Data.Maybe +import GF.Infra.GetOpt +--import System.Console.GetOpt +import System.FilePath + +import GF.Data.ErrM + +import Data.Set (Set) +import qualified Data.Set as Set + + + + +usageHeader :: String +usageHeader = unlines + ["Usage: gfc [OPTIONS] [FILE [...]]", + "", + "How each FILE is handled depends on the file name suffix:", + "", + ".gf Normal or old GF source, will be compiled.", + ".gfo Compiled GF source, will be loaded as is.", + ".gfe Example-based GF source, will be converted to .gf and compiled.", + ".ebnf Extended BNF format, will be converted to .gf and compiled.", + ".cf Context-free (BNF) format, will be converted to .gf and compiled.", + "", + "If multiple FILES are given, they must be normal GF source, .gfo or .gfe files.", + "For the other input formats, only one file can be given.", + "", + "Command-line options:"] + + +helpMessage :: String +helpMessage = usageInfo usageHeader optDescr + + +-- FIXME: do we really want multi-line errors? +errors :: [String] -> Err a +errors = fail . unlines + +-- Types + +data Mode = ModeVersion | ModeHelp | ModeInteractive | ModeRun | ModeCompiler + deriving (Show,Eq,Ord) + +data Verbosity = Quiet | Normal | Verbose | Debug + deriving (Show,Eq,Ord,Enum,Bounded) + +data Phase = Preproc | Convert | Compile | Link + deriving (Show,Eq,Ord) + +data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 + deriving (Eq,Ord) + +data OutputFormat = FmtPGFPretty + | FmtPMCFGPretty + | FmtJavaScript + | FmtHaskell + | FmtProlog + | FmtProlog_Abs + | FmtBNF + | FmtEBNF + | FmtRegular + | FmtNoLR + | FmtSRGS_XML + | FmtSRGS_XML_NonRec + | FmtSRGS_ABNF + | FmtSRGS_ABNF_NonRec + | FmtJSGF + | FmtGSL + | FmtVoiceXML + | FmtSLF + | FmtRegExp + | FmtFA + deriving (Eq,Ord) + +data SISRFormat = + -- | SISR Working draft 1 April 2003 + -- <http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/> + SISR_WD20030401 + | SISR_1_0 + deriving (Show,Eq,Ord) + +data Optimization = OptStem | OptCSE | OptExpand | OptParametrize + deriving (Show,Eq,Ord) + +data CFGTransform = CFGNoLR + | CFGRegular + | CFGTopDownFilter + | CFGBottomUpFilter + | CFGStartCatOnly + | CFGMergeIdentical + | CFGRemoveCycles + deriving (Show,Eq,Ord) + +data HaskellOption = HaskellNoPrefix | HaskellGADT | HaskellLexical + deriving (Show,Eq,Ord) + +data Warning = WarnMissingLincat + deriving (Show,Eq,Ord) + +data Dump = DumpSource | DumpRebuild | DumpExtend | DumpRename | DumpTypeCheck | DumpRefresh | DumpOptimize | DumpCanon + deriving (Show,Eq,Ord) + +-- | Pretty-printing options +data Printer = PrinterStrip -- ^ Remove name qualifiers. + deriving (Show,Eq,Ord) + +data Recomp = AlwaysRecomp | RecompIfNewer | NeverRecomp + deriving (Show,Eq,Ord) + +data BuildParser = BuildParser | DontBuildParser | BuildParserOnDemand + deriving (Show,Eq,Ord) + +data Flags = Flags { + optMode :: Mode, + optStopAfterPhase :: Phase, + optVerbosity :: Verbosity, + optProf :: Bool, + optShowCPUTime :: Bool, + optEmitGFO :: Bool, + optOutputFormats :: [OutputFormat], + optSISR :: Maybe SISRFormat, + optHaskellOptions :: Set HaskellOption, + optLexicalCats :: Set String, + optGFODir :: Maybe FilePath, + optOutputFile :: Maybe FilePath, + optOutputDir :: Maybe FilePath, + optGFLibPath :: Maybe FilePath, + optRecomp :: Recomp, + optPrinter :: [Printer], + optProb :: Bool, + optRetainResource :: Bool, + optName :: Maybe String, + optAbsName :: Maybe String, + optCncName :: Maybe String, + optResName :: Maybe String, + optPreprocessors :: [String], + optEncoding :: Encoding, + optOptimizations :: Set Optimization, + optCFGTransforms :: Set CFGTransform, + optLibraryPath :: [FilePath], + optStartCat :: Maybe String, + optSpeechLanguage :: Maybe String, + optLexer :: Maybe String, + optUnlexer :: Maybe String, + optErasing :: Bool, + optBuildParser :: BuildParser, + optWarnings :: [Warning], + optDump :: [Dump] + } + deriving (Show) + +newtype Options = Options (Flags -> Flags) + +instance Show Options where + show (Options o) = show (o defaultFlags) + +-- Option parsing + +parseOptions :: [String] -- ^ list of string arguments + -> Err (Options, [FilePath]) +parseOptions args + | not (null errs) = errors errs + | otherwise = do opts <- liftM concatOptions $ sequence optss + return (opts, files) + where + (optss, files, errs) = getOpt RequireOrder optDescr args + +parseModuleOptions :: [String] -- ^ list of string arguments + -> Err Options +parseModuleOptions args = do + (opts,nonopts) <- parseOptions args + if null nonopts + then return opts + else errors $ map ("Non-option among module options: " ++) nonopts + +fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) + where + fixPathFlags f@(Flags{optLibraryPath=path}) = f{optLibraryPath=concatMap (\dir -> [curr_dir </> dir, lib_dir </> dir]) path} + +-- Showing options + +-- | Pretty-print the options that are preserved in .gfo files. +optionsGFO :: Options -> [(String,String)] +optionsGFO opts = optionsPGF opts + ++ [("coding", show (flag optEncoding opts))] + +-- | Pretty-print the options that are preserved in .pgf files. +optionsPGF :: Options -> [(String,String)] +optionsPGF opts = + maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts) + ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts) + ++ (if flag optErasing opts then [("erasing","on")] else []) + ++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else []) + +-- Option manipulation + +flag :: (Flags -> a) -> Options -> a +flag f (Options o) = f (o defaultFlags) + +addOptions :: Options -> Options -> Options +addOptions (Options o1) (Options o2) = Options (o2 . o1) + +noOptions :: Options +noOptions = Options id + +concatOptions :: [Options] -> Options +concatOptions = foldr addOptions noOptions + +modifyFlags :: (Flags -> Flags) -> Options +modifyFlags = Options + +-- Default options + +defaultFlags :: Flags +defaultFlags = Flags { + optMode = ModeInteractive, + optStopAfterPhase = Compile, + optVerbosity = Normal, + optProf = False, + optShowCPUTime = False, + optEmitGFO = True, + optOutputFormats = [], + optSISR = Nothing, + optHaskellOptions = Set.empty, + optLexicalCats = Set.empty, + optGFODir = Nothing, + optOutputFile = Nothing, + optOutputDir = Nothing, + optGFLibPath = Nothing, + optRecomp = RecompIfNewer, + optPrinter = [], + optProb = False, + optRetainResource = False, + + optName = Nothing, + optAbsName = Nothing, + optCncName = Nothing, + optResName = Nothing, + optPreprocessors = [], + optEncoding = ISO_8859_1, + optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], + optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter, + CFGTopDownFilter, CFGMergeIdentical], + optLibraryPath = [], + optStartCat = Nothing, + optSpeechLanguage = Nothing, + optLexer = Nothing, + optUnlexer = Nothing, + optErasing = True, + optBuildParser = BuildParser, + optWarnings = [], + optDump = [] + } + +-- Option descriptions + +optDescr :: [OptDescr (Err Options)] +optDescr = + [ + Option ['?','h'] ["help"] (NoArg (mode ModeHelp)) "Show help message.", + Option ['V'] ["version"] (NoArg (mode ModeVersion)) "Display GF version number.", + Option ['v'] ["verbose"] (OptArg verbosity "N") "Set verbosity (default 1). -v alone is the same as -v 2.", + Option ['q','s'] ["quiet"] (NoArg (verbosity (Just "0"))) "Quiet, same as -v 0.", + Option [] ["batch"] (NoArg (mode ModeCompiler)) "Run in batch compiler mode.", + Option [] ["interactive"] (NoArg (mode ModeInteractive)) "Run in interactive mode (default).", + Option [] ["run"] (NoArg (mode ModeRun)) "Run in interactive mode, showing output only (no other messages).", + Option ['E'] [] (NoArg (phase Preproc)) "Stop after preprocessing (with --preproc).", + Option ['C'] [] (NoArg (phase Convert)) "Stop after conversion to .gf.", + Option ['c'] [] (NoArg (phase Compile)) "Stop after compiling to .gfo (default) .", + Option [] ["make"] (NoArg (liftM2 addOptions (mode ModeCompiler) (phase Link))) "Build .pgf file and other output files and exit.", + Option [] ["prof"] (NoArg (prof True)) "Dump profiling information when compiling to PMCFG", + Option [] ["cpu"] (NoArg (cpu True)) "Show compilation CPU time statistics.", + Option [] ["no-cpu"] (NoArg (cpu False)) "Don't show compilation CPU time statistics (default).", + Option [] ["emit-gfo"] (NoArg (emitGFO True)) "Create .gfo files (default).", + Option [] ["no-emit-gfo"] (NoArg (emitGFO False)) "Do not create .gfo files.", + Option [] ["gfo-dir"] (ReqArg gfoDir "DIR") "Directory to put .gfo files in (default = '.').", + Option ['f'] ["output-format"] (ReqArg outFmt "FMT") + (unlines ["Output format. FMT can be one of:", + "Multiple concrete: pgf (default), gar, js, prolog, ...", + "Single concrete only: cf, bnf, lbnf, gsl, srgs_xml, srgs_abnf, ...", + "Abstract only: haskell, prolog_abs, ..."]), + Option [] ["sisr"] (ReqArg sisrFmt "FMT") + (unlines ["Include SISR tags in generated speech recognition grammars.", + "FMT can be one of: old, 1.0"]), + Option [] ["haskell"] (ReqArg hsOption "OPTION") + ("Turn on an optional feature when generating Haskell data types. OPTION = " + ++ concat (intersperse " | " (map fst haskellOptionNames))), + Option [] ["lexical"] (ReqArg lexicalCat "CAT[,CAT[...]]") + "Treat CAT as a lexical category.", + Option ['o'] ["output-file"] (ReqArg outFile "FILE") + "Save output in FILE (default is out.X, where X depends on output format.", + Option ['D'] ["output-dir"] (ReqArg outDir "DIR") + "Save output files (other than .gfo files) in DIR.", + Option [] ["gf-lib-path"] (ReqArg gfLibPath "DIR") + "Overides the value of GF_LIB_PATH.", + Option [] ["src","force-recomp"] (NoArg (recomp AlwaysRecomp)) + "Always recompile from source.", + Option [] ["gfo","recomp-if-newer"] (NoArg (recomp RecompIfNewer)) + "(default) Recompile from source if the source is newer than the .gfo file.", + Option [] ["gfo","no-recomp"] (NoArg (recomp NeverRecomp)) + "Never recompile from source, if there is already .gfo file.", + Option [] ["strip"] (NoArg (printer PrinterStrip)) + "Remove name qualifiers when pretty-printing.", + Option [] ["retain"] (NoArg (set $ \o -> o { optRetainResource = True })) "Retain opers.", + Option [] ["prob"] (NoArg (prob True)) "Read probabilities from '--# prob' pragmas.", + Option ['n'] ["name"] (ReqArg name "NAME") + (unlines ["Use NAME as the name of the output. This is used in the output file names, ", + "with suffixes depending on the formats, and, when relevant, ", + "internally in the output."]), + Option [] ["abs"] (ReqArg absName "NAME") + ("Use NAME as the name of the abstract syntax module generated from " + ++ "a grammar in GF 1 format."), + Option [] ["cnc"] (ReqArg cncName "NAME") + ("Use NAME as the name of the concrete syntax module generated from " + ++ "a grammar in GF 1 format."), + Option [] ["res"] (ReqArg resName "NAME") + ("Use NAME as the name of the resource module generated from " + ++ "a grammar in GF 1 format."), + Option ['i'] [] (ReqArg addLibDir "DIR") "Add DIR to the library search path.", + Option [] ["path"] (ReqArg setLibPath "DIR:DIR:...") "Set the library search path.", + Option [] ["preproc"] (ReqArg preproc "CMD") + (unlines ["Use CMD to preprocess input files.", + "Multiple preprocessors can be used by giving this option multiple times."]), + Option [] ["coding"] (ReqArg coding "ENCODING") + ("Character encoding of the source grammar, ENCODING = " + ++ concat (intersperse " | " (map fst encodings)) ++ "."), + Option [] ["erasing"] (onOff erasing False) "Generate erasing grammar (default off).", + Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand", + Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.", + Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.", + Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.", + Option [] ["unlexer"] (ReqArg unlexer "UNLEXER") "Use unlexer UNLEXER.", + Option [] ["optimize"] (ReqArg optimize "OPT") + "Select an optimization package. OPT = all | values | parametrize | none", + Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).", + Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).", + Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...", + dumpOption "source" DumpSource, + dumpOption "rebuild" DumpRebuild, + dumpOption "extend" DumpExtend, + dumpOption "rename" DumpRename, + dumpOption "tc" DumpTypeCheck, + dumpOption "refresh" DumpRefresh, + dumpOption "opt" DumpOptimize, + dumpOption "canon" DumpCanon + + ] + where phase x = set $ \o -> o { optStopAfterPhase = x } + mode x = set $ \o -> o { optMode = x } + verbosity mv = case mv of + Nothing -> set $ \o -> o { optVerbosity = Verbose } + Just v -> case readMaybe v >>= toEnumBounded of + Just i -> set $ \o -> o { optVerbosity = i } + Nothing -> fail $ "Bad verbosity: " ++ show v + prof x = set $ \o -> o { optProf = x } + cpu x = set $ \o -> o { optShowCPUTime = x } + emitGFO x = set $ \o -> o { optEmitGFO = x } + gfoDir x = set $ \o -> o { optGFODir = Just x } + outFmt x = readOutputFormat x >>= \f -> + set $ \o -> o { optOutputFormats = optOutputFormats o ++ [f] } + sisrFmt x = case x of + "old" -> set $ \o -> o { optSISR = Just SISR_WD20030401 } + "1.0" -> set $ \o -> o { optSISR = Just SISR_1_0 } + _ -> fail $ "Unknown SISR format: " ++ show x + hsOption x = case lookup x haskellOptionNames of + Just p -> set $ \o -> o { optHaskellOptions = Set.insert p (optHaskellOptions o) } + Nothing -> fail $ "Unknown Haskell option: " ++ x + ++ " Known: " ++ show (map fst haskellOptionNames) + lexicalCat x = set $ \o -> o { optLexicalCats = foldr Set.insert (optLexicalCats o) (splitBy (==',') x) } + outFile x = set $ \o -> o { optOutputFile = Just x } + outDir x = set $ \o -> o { optOutputDir = Just x } + gfLibPath x = set $ \o -> o { optGFLibPath = Just x } + recomp x = set $ \o -> o { optRecomp = x } + printer x = set $ \o -> o { optPrinter = x : optPrinter o } + prob x = set $ \o -> o { optProb = x } + + name x = set $ \o -> o { optName = Just x } + absName x = set $ \o -> o { optAbsName = Just x } + cncName x = set $ \o -> o { optCncName = Just x } + resName x = set $ \o -> o { optResName = Just x } + addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } + setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } + preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } + coding x = case lookup x encodings of + Just c -> set $ \o -> o { optEncoding = c } + Nothing -> fail $ "Unknown character encoding: " ++ x + erasing x = set $ \o -> o { optErasing = x } + buildParser x = do v <- case x of + "on" -> return BuildParser + "off" -> return DontBuildParser + "ondemand" -> return BuildParserOnDemand + set $ \o -> o { optBuildParser = v } + startcat x = set $ \o -> o { optStartCat = Just x } + language x = set $ \o -> o { optSpeechLanguage = Just x } + lexer x = set $ \o -> o { optLexer = Just x } + unlexer x = set $ \o -> o { optUnlexer = Just x } + + optimize x = case lookup x optimizationPackages of + Just p -> set $ \o -> o { optOptimizations = p } + Nothing -> fail $ "Unknown optimization package: " ++ x + + toggleOptimize x b = set $ setOptimization' x b + + cfgTransform x = let (x', b) = case x of + 'n':'o':'-':rest -> (rest, False) + _ -> (x, True) + in case lookup x' cfgTransformNames of + Just t -> set $ setCFGTransform' t b + Nothing -> fail $ "Unknown CFG transformation: " ++ x' + ++ " Known: " ++ show (map fst cfgTransformNames) + + dumpOption s d = Option [] ["dump-"++s] (NoArg (set $ \o -> o { optDump = d:optDump o})) ("Dump output of the " ++ s ++ " phase.") + + set = return . Options + +outputFormats :: [(String,OutputFormat)] +outputFormats = + [("pgf_pretty", FmtPGFPretty), + ("pmcfg_pretty", FmtPMCFGPretty), + ("js", FmtJavaScript), + ("haskell", FmtHaskell), + ("prolog", FmtProlog), + ("prolog_abs", FmtProlog_Abs), + ("bnf", FmtBNF), + ("ebnf", FmtEBNF), + ("regular", FmtRegular), + ("nolr", FmtNoLR), + ("srgs_xml", FmtSRGS_XML), + ("srgs_xml_nonrec", FmtSRGS_XML_NonRec), + ("srgs_abnf", FmtSRGS_ABNF), + ("srgs_abnf_nonrec", FmtSRGS_ABNF_NonRec), + ("jsgf", FmtJSGF), + ("gsl", FmtGSL), + ("vxml", FmtVoiceXML), + ("slf", FmtSLF), + ("regexp", FmtRegExp), + ("fa", FmtFA)] + +instance Show OutputFormat where + show = lookupShow outputFormats + +instance Read OutputFormat where + readsPrec = lookupReadsPrec outputFormats + +optimizationPackages :: [(String, Set Optimization)] +optimizationPackages = + [("all", Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize]), + ("values", Set.fromList [OptStem,OptCSE,OptExpand]), + ("noexpand", Set.fromList [OptStem,OptCSE]), + + -- deprecated + ("all_subs", Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize]), + ("parametrize", Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize]), + ("none", Set.fromList [OptStem,OptCSE,OptExpand]) + ] + +cfgTransformNames :: [(String, CFGTransform)] +cfgTransformNames = + [("nolr", CFGNoLR), + ("regular", CFGRegular), + ("topdown", CFGTopDownFilter), + ("bottomup", CFGBottomUpFilter), + ("startcatonly", CFGStartCatOnly), + ("merge", CFGMergeIdentical), + ("removecycles", CFGRemoveCycles)] + +haskellOptionNames :: [(String, HaskellOption)] +haskellOptionNames = + [("noprefix", HaskellNoPrefix), + ("gadt", HaskellGADT), + ("lexical", HaskellLexical)] + +encodings :: [(String,Encoding)] +encodings = + [("utf8", UTF_8), + ("cp1250", CP_1250), + ("cp1251", CP_1251), + ("cp1252", CP_1252), + ("latin1", ISO_8859_1) + ] + +instance Show Encoding where + show = lookupShow encodings + +lookupShow :: Eq a => [(String,a)] -> a -> String +lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs] + +lookupReadsPrec :: [(String,a)] -> Int -> ReadS a +lookupReadsPrec xs _ s = [(z,rest) | (x,rest) <- lex s, (y,z) <- xs, y == x] + +onOff :: Monad m => (Bool -> m a) -> Bool -> ArgDescr (m a) +onOff f def = OptArg g "[on,off]" + where g ma = maybe (return def) readOnOff ma >>= f + readOnOff x = case map toLower x of + "on" -> return True + "off" -> return False + _ -> fail $ "Expected [on,off], got: " ++ show x + +readOutputFormat :: Monad m => String -> m OutputFormat +readOutputFormat s = + maybe (fail $ "Unknown output format: " ++ show s) return $ lookup s outputFormats + +-- FIXME: this is a copy of the function in GF.Devel.UseIO. +splitInModuleSearchPath :: String -> [FilePath] +splitInModuleSearchPath s = case break isPathSep s of + (f,_:cs) -> f : splitInModuleSearchPath cs + (f,_) -> [f] + where + isPathSep :: Char -> Bool + isPathSep c = c == ':' || c == ';' + +-- +-- * Convenience functions for checking options +-- + +verbAtLeast :: Options -> Verbosity -> Bool +verbAtLeast opts v = flag optVerbosity opts >= v + +dump :: Options -> Dump -> Bool +dump opts d = flag ((d `elem`) . optDump) opts + +cfgTransform :: Options -> CFGTransform -> Bool +cfgTransform opts t = Set.member t (flag optCFGTransforms opts) + +haskellOption :: Options -> HaskellOption -> Bool +haskellOption opts o = Set.member o (flag optHaskellOptions opts) + +isLexicalCat :: Options -> String -> Bool +isLexicalCat opts c = Set.member c (flag optLexicalCats opts) + +-- +-- * Convenience functions for setting options +-- + +setOptimization :: Optimization -> Bool -> Options +setOptimization o b = modifyFlags (setOptimization' o b) + +setOptimization' :: Optimization -> Bool -> Flags -> Flags +setOptimization' o b f = f { optOptimizations = toggle o b (optOptimizations f)} + +setCFGTransform :: CFGTransform -> Bool -> Options +setCFGTransform t b = modifyFlags (setCFGTransform' t b) + +setCFGTransform' :: CFGTransform -> Bool -> Flags -> Flags +setCFGTransform' t b f = f { optCFGTransforms = toggle t b (optCFGTransforms f) } + +toggle :: Ord a => a -> Bool -> Set a -> Set a +toggle o True = Set.insert o +toggle o False = Set.delete o + +-- +-- * General utilities +-- + +readMaybe :: Read a => String -> Maybe a +readMaybe s = case reads s of + [(x,"")] -> Just x + _ -> Nothing + +toEnumBounded :: (Bounded a, Enum a, Ord a) => Int -> Maybe a +toEnumBounded i = let mi = minBound + ma = maxBound `asTypeOf` mi + in if i >= fromEnum mi && i <= fromEnum ma + then Just (toEnum i `asTypeOf` mi) + else Nothing + +splitBy :: (a -> Bool) -> [a] -> [[a]] +splitBy _ [] = [] +splitBy p s = case break p s of + (l, _ : t@(_ : _)) -> l : splitBy p t + (l, _) -> [l] + +instance Functor OptDescr where + fmap f (Option cs ss d s) = Option cs ss (fmap f d) s + +instance Functor ArgDescr where + fmap f (NoArg x) = NoArg (f x) + fmap f (ReqArg g s) = ReqArg (f . g) s + fmap f (OptArg g s) = OptArg (f . g) s diff --git a/src/compiler/GF/Infra/UseIO.hs b/src/compiler/GF/Infra/UseIO.hs new file mode 100644 index 000000000..bb1a75b6e --- /dev/null +++ b/src/compiler/GF/Infra/UseIO.hs @@ -0,0 +1,186 @@ +{-# OPTIONS -cpp #-} +---------------------------------------------------------------------- +-- | +-- Module : UseIO +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/08 09:01:25 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.17 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Infra.UseIO where + +import GF.Data.Operations +import GF.Infra.Option +import Paths_gf(getDataDir) + +import System.Directory +import System.FilePath +import System.IO +import System.IO.Error +import System.Environment +import System.Exit +import System.CPUTime +import Text.Printf +import Control.Monad +import Control.Exception(evaluate) +import qualified Data.ByteString.Char8 as BS +import Data.List(nub) + +putShow' :: Show a => (c -> a) -> c -> IO () +putShow' f = putStrLn . show . length . show . f + +putIfVerb :: Options -> String -> IO () +putIfVerb opts msg = + when (verbAtLeast opts Verbose) $ putStrLn msg + +putIfVerbW :: Options -> String -> IO () +putIfVerbW opts msg = + when (verbAtLeast opts Verbose) $ putStr (' ' : msg) + +errOptIO :: Options -> a -> Err a -> IO a +errOptIO os e m = case m of + Ok x -> return x + Bad k -> do + putIfVerb os k + return e + +type FileName = String +type InitPath = String +type FullPath = String + +gfLibraryPath = "GF_LIB_PATH" +gfGrammarPathVar = "GF_GRAMMAR_PATH" + +getLibraryDirectory :: Options -> IO FilePath +getLibraryDirectory opts = + case flag optGFLibPath opts of + Just path -> return path + Nothing -> catch + (getEnv gfLibraryPath) + (\ex -> getDataDir >>= \path -> return (path </> "lib")) + +getGrammarPath :: FilePath -> IO [FilePath] +getGrammarPath lib_dir = do + catch (fmap splitSearchPath $ getEnv gfGrammarPathVar) (\_ -> return [lib_dir </> "prelude"]) -- e.g. GF_GRAMMAR_PATH + +-- | extends the search path with the +-- 'gfLibraryPath' and 'gfGrammarPathVar' +-- environment variables. Returns only existing paths. +extendPathEnv :: Options -> IO [FilePath] +extendPathEnv opts = do + opt_path <- return $ flag optLibraryPath opts -- e.g. paths given as options + lib_dir <- getLibraryDirectory opts -- e.g. GF_LIB_PATH + grm_path <- getGrammarPath lib_dir -- e.g. GF_GRAMMAR_PATH + let paths = opt_path ++ [lib_dir] ++ grm_path + ps <- liftM concat $ mapM allSubdirs paths + mapM canonicalizePath ps + where + allSubdirs :: FilePath -> IO [FilePath] + allSubdirs [] = return [[]] + allSubdirs p = case last p of + '*' -> do let path = init p + fs <- getSubdirs path + return [path </> f | f <- fs] + _ -> do exists <- doesDirectoryExist p + if exists + then return [p] + else return [] + +getSubdirs :: FilePath -> IO [FilePath] +getSubdirs dir = do + fs <- catch (getDirectoryContents dir) (const $ return []) + foldM (\fs f -> do let fpath = dir </> f + p <- getPermissions fpath + if searchable p && not (take 1 f==".") + then return (fpath:fs) + else return fs ) [] fs + +justModuleName :: FilePath -> String +justModuleName = dropExtension . takeFileName + +splitInModuleSearchPath :: String -> [FilePath] +splitInModuleSearchPath s = case break isPathSep s of + (f,_:cs) -> f : splitInModuleSearchPath cs + (f,_) -> [f] + where + isPathSep :: Char -> Bool + isPathSep c = c == ':' || c == ';' + +-- + +putStrFlush :: String -> IO () +putStrFlush s = putStr s >> hFlush stdout + +putStrLnFlush :: String -> IO () +putStrLnFlush s = putStrLn s >> hFlush stdout + +-- * IO monad with error; adapted from state monad + +newtype IOE a = IOE (IO (Err a)) + +appIOE :: IOE a -> IO (Err a) +appIOE (IOE iea) = iea + +ioe :: IO (Err a) -> IOE a +ioe = IOE + +ioeIO :: IO a -> IOE a +ioeIO io = ioe (io >>= return . return) + +ioeErr :: Err a -> IOE a +ioeErr = ioe . return + +instance Monad IOE where + return a = ioe (return (return a)) + IOE c >>= f = IOE $ do + x <- c -- Err a + appIOE $ err ioeBad f x -- f :: a -> IOE a + +ioeBad :: String -> IOE a +ioeBad = ioe . return . Bad + +useIOE :: a -> IOE a -> IO a +useIOE a ioe = appIOE ioe >>= err (\s -> putStrLn s >> return a) return + +foldIOE :: (a -> b -> IOE a) -> a -> [b] -> IOE (a, Maybe String) +foldIOE f s xs = case xs of + [] -> return (s,Nothing) + x:xx -> do + ev <- ioeIO $ appIOE (f s x) + case ev of + Ok v -> foldIOE f v xx + Bad m -> return $ (s, Just m) + +dieIOE :: IOE a -> IO a +dieIOE x = appIOE x >>= err die return + +die :: String -> IO a +die s = do hPutStrLn stderr s + exitFailure + +putStrLnE :: String -> IOE () +putStrLnE = ioeIO . putStrLnFlush + +putStrE :: String -> IOE () +putStrE = ioeIO . putStrFlush + +putPointE :: Verbosity -> Options -> String -> IOE a -> IOE a +putPointE v opts msg act = do + when (verbAtLeast opts v) $ ioeIO $ putStrFlush msg + + t1 <- ioeIO $ getCPUTime + a <- act >>= ioeIO . evaluate + t2 <- ioeIO $ getCPUTime + + if flag optShowCPUTime opts + then do let msec = (t2 - t1) `div` 1000000000 + putStrLnE (printf " %5d msec" msec) + else when (verbAtLeast opts v) $ putStrLnE "" + + return a diff --git a/src/compiler/GF/JavaScript/AbsJS.hs b/src/compiler/GF/JavaScript/AbsJS.hs new file mode 100644 index 000000000..2632ade48 --- /dev/null +++ b/src/compiler/GF/JavaScript/AbsJS.hs @@ -0,0 +1,60 @@ +module GF.JavaScript.AbsJS where + +-- Haskell module generated by the BNF converter + +newtype Ident = Ident String deriving (Eq,Ord,Show) +data Program = + Program [Element] + deriving (Eq,Ord,Show) + +data Element = + FunDef Ident [Ident] [Stmt] + | ElStmt Stmt + deriving (Eq,Ord,Show) + +data Stmt = + SCompound [Stmt] + | SReturnVoid + | SReturn Expr + | SDeclOrExpr DeclOrExpr + deriving (Eq,Ord,Show) + +data DeclOrExpr = + Decl [DeclVar] + | DExpr Expr + deriving (Eq,Ord,Show) + +data DeclVar = + DVar Ident + | DInit Ident Expr + deriving (Eq,Ord,Show) + +data Expr = + EAssign Expr Expr + | ENew Ident [Expr] + | EMember Expr Ident + | EIndex Expr Expr + | ECall Expr [Expr] + | EVar Ident + | EInt Int + | EDbl Double + | EStr String + | ETrue + | EFalse + | ENull + | EThis + | EFun [Ident] [Stmt] + | EArray [Expr] + | EObj [Property] + | ESeq [Expr] + deriving (Eq,Ord,Show) + +data Property = + Prop PropertyName Expr + deriving (Eq,Ord,Show) + +data PropertyName = + IdentPropName Ident + | StringPropName String + deriving (Eq,Ord,Show) + diff --git a/src/compiler/GF/JavaScript/JS.cf b/src/compiler/GF/JavaScript/JS.cf new file mode 100644 index 000000000..fe31a2074 --- /dev/null +++ b/src/compiler/GF/JavaScript/JS.cf @@ -0,0 +1,55 @@ +entrypoints Program; + +Program. Program ::= [Element]; + +FunDef. Element ::= "function" Ident "(" [Ident] ")" "{" [Stmt] "}" ; +ElStmt. Element ::= Stmt; +separator Element "" ; + +separator Ident "," ; + +SCompound. Stmt ::= "{" [Stmt] "}" ; +SReturnVoid. Stmt ::= "return" ";" ; +SReturn. Stmt ::= "return" Expr ";" ; +SDeclOrExpr. Stmt ::= DeclOrExpr ";" ; +separator Stmt "" ; + +Decl. DeclOrExpr ::= "var" [DeclVar]; +DExpr. DeclOrExpr ::= Expr1 ; + +DVar. DeclVar ::= Ident ; +DInit. DeclVar ::= Ident "=" Expr ; +separator DeclVar "," ; + +EAssign. Expr13 ::= Expr14 "=" Expr13 ; + +ENew. Expr14 ::= "new" Ident "(" [Expr] ")" ; + +EMember. Expr15 ::= Expr15 "." Ident ; +EIndex. Expr15 ::= Expr15 "[" Expr "]" ; +ECall. Expr15 ::= Expr15 "(" [Expr] ")" ; + +EVar. Expr16 ::= Ident ; +EInt. Expr16 ::= Integer ; +EDbl. Expr16 ::= Double ; +EStr. Expr16 ::= String ; +ETrue. Expr16 ::= "true" ; +EFalse. Expr16 ::= "false" ; +ENull. Expr16 ::= "null" ; +EThis. Expr16 ::= "this" ; +EFun. Expr16 ::= "function" "(" [Ident] ")" "{" [Stmt] "}" ; +EArray. Expr16 ::= "[" [Expr] "]" ; +EObj. Expr16 ::= "{" [Property] "}" ; + +eseq1. Expr16 ::= "(" Expr "," [Expr] ")"; +internal ESeq. Expr16 ::= "(" [Expr] ")" ; +define eseq1 x xs = ESeq (x:xs); + +separator Expr "," ; +coercions Expr 16 ; + +Prop. Property ::= PropertyName ":" Expr ; +separator Property "," ; + +IdentPropName. PropertyName ::= Ident ; +StringPropName. PropertyName ::= String ; diff --git a/src/compiler/GF/JavaScript/LexJS.x b/src/compiler/GF/JavaScript/LexJS.x new file mode 100644 index 000000000..10ba66d69 --- /dev/null +++ b/src/compiler/GF/JavaScript/LexJS.x @@ -0,0 +1,132 @@ +-- -*- haskell -*- +-- This Alex file was machine-generated by the BNF converter +{ +{-# OPTIONS -fno-warn-incomplete-patterns #-} +module GF.JavaScript.LexJS where + + +} + + +$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME +$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME +$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME +$d = [0-9] -- digit +$i = [$l $d _ '] -- identifier character +$u = [\0-\255] -- universal: any character + +@rsyms = -- symbols and non-identifier-like reserved words + \( | \) | \{ | \} | \, | \; | \= | \. | \[ | \] | \: + +:- + +$white+ ; +@rsyms { tok (\p s -> PT p (TS $ share s)) } + +$l $i* { tok (\p s -> PT p (eitherResIdent (TV . share) s)) } +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \"{ tok (\p s -> PT p (TL $ share $ unescapeInitTail s)) } + +$d+ { tok (\p s -> PT p (TI $ share s)) } +$d+ \. $d+ (e (\-)? $d+)? { tok (\p s -> PT p (TD $ share s)) } + +{ + +tok f p s = f p s + +share :: String -> String +share = id + +data Tok = + TS !String -- reserved words and symbols + | TL !String -- string literals + | TI !String -- integer literals + | TV !String -- identifiers + | TD !String -- double precision float literals + | TC !String -- character literals + + deriving (Eq,Show,Ord) + +data Token = + PT Posn Tok + | Err Posn + deriving (Eq,Show,Ord) + +tokenPos (PT (Pn _ l _) _ :_) = "line " ++ show l +tokenPos (Err (Pn _ l _) :_) = "line " ++ show l +tokenPos _ = "end of file" + +posLineCol (Pn _ l c) = (l,c) +mkPosToken t@(PT p _) = (posLineCol p, prToken t) + +prToken t = case t of + PT _ (TS s) -> s + PT _ (TI s) -> s + PT _ (TV s) -> s + PT _ (TD s) -> s + PT _ (TC s) -> s + + _ -> show t + +data BTree = N | B String Tok BTree BTree deriving (Show) + +eitherResIdent :: (String -> Tok) -> String -> Tok +eitherResIdent tv s = treeFind resWords + where + treeFind N = tv s + treeFind (B a t left right) | s < a = treeFind left + | s > a = treeFind right + | s == a = t + +resWords = b "return" (b "new" (b "function" (b "false" N N) N) (b "null" N N)) (b "true" (b "this" N N) (b "var" N N)) + where b s = B s (TS s) + +unescapeInitTail :: String -> String +unescapeInitTail = unesc . tail where + unesc s = case s of + '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs + '\\':'n':cs -> '\n' : unesc cs + '\\':'t':cs -> '\t' : unesc cs + '"':[] -> [] + c:cs -> c : unesc cs + _ -> [] + +------------------------------------------------------------------- +-- Alex wrapper code. +-- A modified "posn" wrapper. +------------------------------------------------------------------- + +data Posn = Pn !Int !Int !Int + deriving (Eq, Show,Ord) + +alexStartPos :: Posn +alexStartPos = Pn 0 1 1 + +alexMove :: Posn -> Char -> Posn +alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) +alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 +alexMove (Pn a l c) _ = Pn (a+1) l (c+1) + +type AlexInput = (Posn, -- current position, + Char, -- previous char + String) -- current input string + +tokens :: String -> [Token] +tokens str = go (alexStartPos, '\n', str) + where + go :: (Posn, Char, String) -> [Token] + go inp@(pos, _, str) = + case alexScan inp 0 of + AlexEOF -> [] + AlexError (pos, _, _) -> [Err pos] + AlexSkip inp' len -> go inp' + AlexToken inp' len act -> act pos (take len str) : (go inp') + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (p, c, []) = Nothing +alexGetChar (p, _, (c:s)) = + let p' = alexMove p c + in p' `seq` Just (c, (p', c, s)) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (p, c, s) = c +} diff --git a/src/compiler/GF/JavaScript/Makefile b/src/compiler/GF/JavaScript/Makefile new file mode 100644 index 000000000..10f867b06 --- /dev/null +++ b/src/compiler/GF/JavaScript/Makefile @@ -0,0 +1,14 @@ +all: + happy -gca ParJS.y + alex -g LexJS.x + +bnfc: + (cd ../.. && bnfc -p GF.JavaScript GF/JavaScript/JS.cf) + -rm -f *.bak + +clean: + -rm -f *.log *.aux *.hi *.o *.dvi + -rm -f DocJS.ps +distclean: clean + -rm -f DocJS.* LexJS.* ParJS.* LayoutJS.* SkelJS.* PrintJS.* TestJS.* AbsJS.* TestJS ErrM.* SharedString.* JS.dtd XMLJS.* Makefile* + diff --git a/src/compiler/GF/JavaScript/ParJS.y b/src/compiler/GF/JavaScript/ParJS.y new file mode 100644 index 000000000..bf0614757 --- /dev/null +++ b/src/compiler/GF/JavaScript/ParJS.y @@ -0,0 +1,225 @@ +-- This Happy file was machine-generated by the BNF converter +{ +{-# OPTIONS -fno-warn-incomplete-patterns -fno-warn-overlapping-patterns #-} +module GF.JavaScript.ParJS where +import GF.JavaScript.AbsJS +import GF.JavaScript.LexJS +import GF.Data.ErrM +} + +%name pProgram Program + +-- no lexer declaration +%monad { Err } { thenM } { returnM } +%tokentype { Token } + +%token + '(' { PT _ (TS "(") } + ')' { PT _ (TS ")") } + '{' { PT _ (TS "{") } + '}' { PT _ (TS "}") } + ',' { PT _ (TS ",") } + ';' { PT _ (TS ";") } + '=' { PT _ (TS "=") } + '.' { PT _ (TS ".") } + '[' { PT _ (TS "[") } + ']' { PT _ (TS "]") } + ':' { PT _ (TS ":") } + 'false' { PT _ (TS "false") } + 'function' { PT _ (TS "function") } + 'new' { PT _ (TS "new") } + 'null' { PT _ (TS "null") } + 'return' { PT _ (TS "return") } + 'this' { PT _ (TS "this") } + 'true' { PT _ (TS "true") } + 'var' { PT _ (TS "var") } + +L_ident { PT _ (TV $$) } +L_integ { PT _ (TI $$) } +L_doubl { PT _ (TD $$) } +L_quoted { PT _ (TL $$) } +L_err { _ } + + +%% + +Ident :: { Ident } : L_ident { Ident $1 } +Integer :: { Integer } : L_integ { (read $1) :: Integer } +Double :: { Double } : L_doubl { (read $1) :: Double } +String :: { String } : L_quoted { $1 } + +Program :: { Program } +Program : ListElement { Program (reverse $1) } + + +Element :: { Element } +Element : 'function' Ident '(' ListIdent ')' '{' ListStmt '}' { FunDef $2 $4 (reverse $7) } + | Stmt { ElStmt $1 } + + +ListElement :: { [Element] } +ListElement : {- empty -} { [] } + | ListElement Element { flip (:) $1 $2 } + + +ListIdent :: { [Ident] } +ListIdent : {- empty -} { [] } + | Ident { (:[]) $1 } + | Ident ',' ListIdent { (:) $1 $3 } + + +Stmt :: { Stmt } +Stmt : '{' ListStmt '}' { SCompound (reverse $2) } + | 'return' ';' { SReturnVoid } + | 'return' Expr ';' { SReturn $2 } + | DeclOrExpr ';' { SDeclOrExpr $1 } + + +ListStmt :: { [Stmt] } +ListStmt : {- empty -} { [] } + | ListStmt Stmt { flip (:) $1 $2 } + + +DeclOrExpr :: { DeclOrExpr } +DeclOrExpr : 'var' ListDeclVar { Decl $2 } + | Expr1 { DExpr $1 } + + +DeclVar :: { DeclVar } +DeclVar : Ident { DVar $1 } + | Ident '=' Expr { DInit $1 $3 } + + +ListDeclVar :: { [DeclVar] } +ListDeclVar : {- empty -} { [] } + | DeclVar { (:[]) $1 } + | DeclVar ',' ListDeclVar { (:) $1 $3 } + + +Expr13 :: { Expr } +Expr13 : Expr14 '=' Expr13 { EAssign $1 $3 } + | Expr14 { $1 } + + +Expr14 :: { Expr } +Expr14 : 'new' Ident '(' ListExpr ')' { ENew $2 $4 } + | Expr15 { $1 } + + +Expr15 :: { Expr } +Expr15 : Expr15 '.' Ident { EMember $1 $3 } + | Expr15 '[' Expr ']' { EIndex $1 $3 } + | Expr15 '(' ListExpr ')' { ECall $1 $3 } + | Expr16 { $1 } + + +Expr16 :: { Expr } +Expr16 : Ident { EVar $1 } + | Integer { EInt $1 } + | Double { EDbl $1 } + | String { EStr $1 } + | 'true' { ETrue } + | 'false' { EFalse } + | 'null' { ENull } + | 'this' { EThis } + | 'function' '(' ListIdent ')' '{' ListStmt '}' { EFun $3 (reverse $6) } + | '[' ListExpr ']' { EArray $2 } + | '{' ListProperty '}' { EObj $2 } + | '(' Expr ',' ListExpr ')' { eseq1_ $2 $4 } + | '(' Expr ')' { $2 } + + +ListExpr :: { [Expr] } +ListExpr : {- empty -} { [] } + | Expr { (:[]) $1 } + | Expr ',' ListExpr { (:) $1 $3 } + + +Expr :: { Expr } +Expr : Expr1 { $1 } + + +Expr1 :: { Expr } +Expr1 : Expr2 { $1 } + + +Expr2 :: { Expr } +Expr2 : Expr3 { $1 } + + +Expr3 :: { Expr } +Expr3 : Expr4 { $1 } + + +Expr4 :: { Expr } +Expr4 : Expr5 { $1 } + + +Expr5 :: { Expr } +Expr5 : Expr6 { $1 } + + +Expr6 :: { Expr } +Expr6 : Expr7 { $1 } + + +Expr7 :: { Expr } +Expr7 : Expr8 { $1 } + + +Expr8 :: { Expr } +Expr8 : Expr9 { $1 } + + +Expr9 :: { Expr } +Expr9 : Expr10 { $1 } + + +Expr10 :: { Expr } +Expr10 : Expr11 { $1 } + + +Expr11 :: { Expr } +Expr11 : Expr12 { $1 } + + +Expr12 :: { Expr } +Expr12 : Expr13 { $1 } + + +Property :: { Property } +Property : PropertyName ':' Expr { Prop $1 $3 } + + +ListProperty :: { [Property] } +ListProperty : {- empty -} { [] } + | Property { (:[]) $1 } + | Property ',' ListProperty { (:) $1 $3 } + + +PropertyName :: { PropertyName } +PropertyName : Ident { IdentPropName $1 } + | String { StringPropName $1 } + + + +{ + +returnM :: a -> Err a +returnM = return + +thenM :: Err a -> (a -> Err b) -> Err b +thenM = (>>=) + +happyError :: [Token] -> Err a +happyError ts = + Bad $ "syntax error at " ++ tokenPos ts ++ + case ts of + [] -> [] + [Err _] -> " due to lexer error" + _ -> " before " ++ unwords (map prToken (take 4 ts)) + +myLexer = tokens +eseq1_ x_ xs_ = ESeq (x_ : xs_) +} + diff --git a/src/compiler/GF/JavaScript/PrintJS.hs b/src/compiler/GF/JavaScript/PrintJS.hs new file mode 100644 index 000000000..4e04e3cbf --- /dev/null +++ b/src/compiler/GF/JavaScript/PrintJS.hs @@ -0,0 +1,169 @@ +{-# OPTIONS -fno-warn-incomplete-patterns #-} +module GF.JavaScript.PrintJS (printTree, Doc, Print(..)) where + +-- pretty-printer generated by the BNF converter + +import GF.JavaScript.AbsJS +import Data.Char + +-- the top-level printing method +printTree :: Print a => a -> String +printTree = render . prt 0 + +type Doc = [ShowS] -> [ShowS] + +doc :: ShowS -> Doc +doc = (:) + +render :: Doc -> String +render d = rend 0 (map ($ "") $ d []) "" where + rend i ss = case ss of + t:ts | not (spaceAfter t) -> showString t . rend i ts + t:ts@(t':_) | not (spaceBefore t') -> showString t . rend i ts + t:ts -> space t . rend i ts + [] -> id + new i = showChar '\n' . replicateS (2*i) (showChar ' ') . dropWhile isSpace + space t = showString t . (\s -> if null s then "" else (' ':s)) + +spaceAfter :: String -> Bool +spaceAfter = (`notElem` [".","(","[","{","\n"]) + +spaceBefore :: String -> Bool +spaceBefore = (`notElem` [",",".",":",";","(",")","[","]","{","}","\n"]) + +parenth :: Doc -> Doc +parenth ss = doc (showChar '(') . ss . doc (showChar ')') + +concatS :: [ShowS] -> ShowS +concatS = foldr (.) id + +concatD :: [Doc] -> Doc +concatD = foldr (.) id + +replicateS :: Int -> ShowS -> ShowS +replicateS n f = concatS (replicate n f) + +-- the printer class does the job +class Print a where + prt :: Int -> a -> Doc + prtList :: [a] -> Doc + prtList = concatD . map (prt 0) + +instance Print a => Print [a] where + prt _ = prtList + +instance Print Char where + prt _ s = doc (showChar '\'' . mkEsc '\'' s . showChar '\'') + prtList s = doc (showChar '"' . concatS (map (mkEsc '"') s) . showChar '"') + +mkEsc :: Char -> Char -> ShowS +mkEsc q s = case s of + _ | s == q -> showChar '\\' . showChar s + '\\'-> showString "\\\\" + '\n' -> showString "\\n" + '\t' -> showString "\\t" + _ -> showChar s + +prPrec :: Int -> Int -> Doc -> Doc +prPrec i j = if j<i then parenth else id + + +instance Print Int where + prt _ x = doc (shows x) + + +instance Print Double where + prt _ x = doc (shows x) + + +instance Print Ident where + prt _ (Ident i) = doc (showString i) + prtList es = case es of + [] -> (concatD []) + [x] -> (concatD [prt 0 x]) + x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs]) + + + +instance Print Program where + prt i e = case e of + Program elements -> prPrec i 0 (concatD [prt 0 elements]) + + +instance Print Element where + prt i e = case e of + FunDef id ids stmts -> prPrec i 0 (concatD [doc (showString "function") , prt 0 id , doc (showString "(") , prt 0 ids , doc (showString ")") , doc (showString "{") , prt 0 stmts , doc (showString "}")]) + ElStmt stmt -> prPrec i 0 (concatD [prt 0 stmt]) + + prtList es = case es of + [] -> (concatD []) + x:xs -> (concatD [prt 0 x , doc (showString "\n"), prt 0 xs]) -- HACKED! + +instance Print Stmt where + prt i e = case e of + SCompound stmts -> prPrec i 0 (concatD [doc (showString "{") , prt 0 stmts , doc (showString "}")]) + SReturnVoid -> prPrec i 0 (concatD [doc (showString "return") , doc (showString ";")]) + SReturn expr -> prPrec i 0 (concatD [doc (showString "return") , prt 0 expr , doc (showString ";")]) + SDeclOrExpr declorexpr -> prPrec i 0 (concatD [prt 0 declorexpr , doc (showString ";")]) + + prtList es = case es of + [] -> (concatD []) + x:xs -> (concatD [prt 0 x , prt 0 xs]) + +instance Print DeclOrExpr where + prt i e = case e of + Decl declvars -> prPrec i 0 (concatD [doc (showString "var") , prt 0 declvars]) + DExpr expr -> prPrec i 0 (concatD [prt 1 expr]) + + +instance Print DeclVar where + prt i e = case e of + DVar id -> prPrec i 0 (concatD [prt 0 id]) + DInit id expr -> prPrec i 0 (concatD [prt 0 id , doc (showString "=") , prt 0 expr]) + + prtList es = case es of + [] -> (concatD []) + [x] -> (concatD [prt 0 x]) + x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs]) + +instance Print Expr where + prt i e = case e of + EAssign expr0 expr -> prPrec i 13 (concatD [prt 14 expr0 , doc (showString "=") , prt 13 expr]) + ENew id exprs -> prPrec i 14 (concatD [doc (showString "new") , prt 0 id , doc (showString "(") , prt 0 exprs , doc (showString ")")]) + EMember expr id -> prPrec i 15 (concatD [prt 15 expr , doc (showString ".") , prt 0 id]) + EIndex expr0 expr -> prPrec i 15 (concatD [prt 15 expr0 , doc (showString "[") , prt 0 expr , doc (showString "]")]) + ECall expr exprs -> prPrec i 15 (concatD [prt 15 expr , doc (showString "(") , prt 0 exprs , doc (showString ")")]) + EVar id -> prPrec i 16 (concatD [prt 0 id]) + EInt n -> prPrec i 16 (concatD [prt 0 n]) + EDbl d -> prPrec i 16 (concatD [prt 0 d]) + EStr str -> prPrec i 16 (concatD [prt 0 str]) + ETrue -> prPrec i 16 (concatD [doc (showString "true")]) + EFalse -> prPrec i 16 (concatD [doc (showString "false")]) + ENull -> prPrec i 16 (concatD [doc (showString "null")]) + EThis -> prPrec i 16 (concatD [doc (showString "this")]) + EFun ids stmts -> prPrec i 16 (concatD [doc (showString "function") , doc (showString "(") , prt 0 ids , doc (showString ")") , doc (showString "{") , prt 0 stmts , doc (showString "}")]) + EArray exprs -> prPrec i 16 (concatD [doc (showString "[") , prt 0 exprs , doc (showString "]")]) + EObj propertys -> prPrec i 16 (concatD [doc (showString "{") , prt 0 propertys , doc (showString "}")]) + ESeq exprs -> prPrec i 16 (concatD [doc (showString "(") , prt 0 exprs , doc (showString ")")]) + + prtList es = case es of + [] -> (concatD []) + [x] -> (concatD [prt 0 x]) + x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs]) + +instance Print Property where + prt i e = case e of + Prop propertyname expr -> prPrec i 0 (concatD [prt 0 propertyname , doc (showString ":") , prt 0 expr]) + + prtList es = case es of + [] -> (concatD []) + [x] -> (concatD [prt 0 x]) + x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs]) + +instance Print PropertyName where + prt i e = case e of + IdentPropName id -> prPrec i 0 (concatD [prt 0 id]) + StringPropName str -> prPrec i 0 (concatD [prt 0 str]) + + + diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs new file mode 100644 index 000000000..52d9dee6b --- /dev/null +++ b/src/compiler/GF/Quiz.hs @@ -0,0 +1,98 @@ +---------------------------------------------------------------------- +-- | +-- Module : TeachYourself +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:46:13 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.7 $ +-- +-- translation and morphology quiz. AR 10\/5\/2000 -- 12\/4\/2002 -- 14\/6\/2008 +-------------------------------------------------------------------------------- + +module GF.Quiz ( + mkQuiz, + translationList, + morphologyList + ) where + +import PGF +import PGF.ShowLinearize +import GF.Data.Operations +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Text.Coding + +import System.Random + +import Data.List (nub) + +-- translation and morphology quiz. AR 10/5/2000 -- 12/4/2002 + +-- generic quiz function + +mkQuiz :: Encoding -> String -> [(String,[String])] -> IO () +mkQuiz cod msg tts = do + let qas = [ (encodeUnicode cod q, mkAnswer cod as) | (q,as) <- tts] + teachDialogue qas msg + +translationList :: + PGF -> Language -> Language -> Type -> Int -> IO [(String,[String])] +translationList pgf ig og typ number = do + ts <- generateRandom pgf typ >>= return . take number + return $ map mkOne $ ts + where + mkOne t = (norml (linearize pgf ig t), map (norml . linearize pgf og) (homonyms t)) + homonyms = nub . parse pgf ig typ . linearize pgf ig + +morphologyList :: PGF -> Language -> Type -> Int -> IO [(String,[String])] +morphologyList pgf ig typ number = do + ts <- generateRandom pgf typ >>= return . take (max 1 number) + gen <- newStdGen + let ss = map (tabularLinearize pgf ig) ts + let size = length (head ss) + let forms = take number $ randomRs (0,size-1) gen + return [(head (snd (head pws)) +++ par, ws) | + (pws,i) <- zip ss forms, let (par,ws) = pws !! i] + +-- | compare answer to the list of right answers, increase score and give feedback +mkAnswer :: Encoding -> [String] -> String -> (Integer, String) +mkAnswer cod as s = + if (elem (norm s) as) + then (1,"Yes.") + else (0,"No, not" +++ s ++ ", but" ++++ enc (unlines as)) + where + norm = unwords . words . decodeUnicode cod + enc = encodeUnicode cod + +norml = unwords . words + + +-- * a generic quiz session + +type QuestionsAndAnswers = [(String, String -> (Integer,String))] + +teachDialogue :: QuestionsAndAnswers -> String -> IO () +teachDialogue qas welc = do + putStrLn $ welc ++++ genericTeachWelcome + teach (0,0) qas + where + teach _ [] = do putStrLn "Sorry, ran out of problems" + teach (score,total) ((question,grade):quas) = do + putStr ("\n" ++ question ++ "\n> ") + answer <- getLine + if (answer == ".") then return () else do + let (result, feedback) = grade answer + score' = score + result + total' = total + 1 + putStr (feedback ++++ "Score" +++ show score' ++ "/" ++ show total') + if (total' > 9 && fromInteger score' / fromInteger total' >= 0.75) + then do putStrLn "\nCongratulations - you passed!" + else teach (score',total') quas + + genericTeachWelcome = + "The quiz is over when you have done at least 10 examples" ++++ + "with at least 75 % success." +++++ + "You can interrupt the quiz by entering a line consisting of a dot ('.').\n" diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Speech/CFG.hs new file mode 100644 index 000000000..9ec8416c5 --- /dev/null +++ b/src/compiler/GF/Speech/CFG.hs @@ -0,0 +1,372 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.CFG +-- +-- Context-free grammar representation and manipulation. +---------------------------------------------------------------------- +module GF.Speech.CFG where + +import GF.Data.Utilities +import PGF.CId +import GF.Infra.Option +import GF.Data.Relation + +import Control.Monad +import Control.Monad.State (State, get, put, evalState) +import qualified Data.ByteString.Char8 as BS +import Data.Map (Map) +import qualified Data.Map as Map +import Data.List +import Data.Maybe (fromMaybe) +import Data.Monoid (mconcat) +import Data.Set (Set) +import qualified Data.Set as Set + +-- +-- * Types +-- + +type Cat = String +type Token = String + +data Symbol c t = NonTerminal c | Terminal t + deriving (Eq, Ord, Show) + +type CFSymbol = Symbol Cat Token + +data CFRule = CFRule { + lhsCat :: Cat, + ruleRhs :: [CFSymbol], + ruleName :: CFTerm + } + deriving (Eq, Ord, Show) + +data CFTerm + = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments + | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id. + | CFApp CFTerm CFTerm -- ^ Application + | CFRes Int -- ^ The result of the n:th (0-based) non-terminal + | CFVar Int -- ^ A lambda-bound variable + | CFMeta CId -- ^ A metavariable + deriving (Eq, Ord, Show) + +data CFG = CFG { cfgStartCat :: Cat, + cfgExternalCats :: Set Cat, + cfgRules :: Map Cat (Set CFRule) } + deriving (Eq, Ord, Show) + +-- +-- * Grammar filtering +-- + +-- | Removes all directly and indirectly cyclic productions. +-- FIXME: this may be too aggressive, only one production +-- needs to be removed to break a given cycle. But which +-- one should we pick? +-- FIXME: Does not (yet) remove productions which are cyclic +-- because of empty productions. +removeCycles :: CFG -> CFG +removeCycles = onRules f + where f rs = filter (not . isCycle) rs + where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs] + isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c + isCycle _ = False + +-- | Better bottom-up filter that also removes categories which contain no finite +-- strings. +bottomUpFilter :: CFG -> CFG +bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty }) + where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr + okSym g = symbol (`elem` allCats g) (const True) + +-- | Removes categories which are not reachable from any external category. +topDownFilter :: CFG -> CFG +topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg + where + rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ] + uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats + keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg + +-- | Merges categories with identical right-hand-sides. +-- FIXME: handle probabilities +mergeIdentical :: CFG -> CFG +mergeIdentical g = onRules (map subst) g + where + -- maps categories to their replacement + m = Map.fromList [(y,concat (intersperse "+" xs)) + | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs] + -- build data to compare for each category: a set of name,rhs pairs + rulesKey = Set.map (\ (CFRule _ r n) -> (n,r)) + subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n + substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m + +-- | Keeps only the start category as an external category. +purgeExternalCats :: CFG -> CFG +purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) } + +-- +-- * Removing left recursion +-- + +-- The LC_LR algorithm from +-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf +removeLeftRecursion :: CFG -> CFG +removeLeftRecursion gr + = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] } + where + scheme1 = [CFRule a [x,NonTerminal a_x] n' | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + not (isLeftRecursive x), + let a_x = mkCat (NonTerminal a) x, + -- this is an extension of LC_LR to avoid generating + -- A-X categories for which there are no productions: + a_x `Set.member` newCats, + let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0)) + (\_ -> CFRes 0) x] + scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' | + a <- retainedLeftRecursive, + b@(NonTerminal b') <- properLeftCornersOf a, + isLeftRecursive b, + CFRule _ (x:beta) n <- catRules gr b', + let a_x = mkCat (NonTerminal a) x, + let a_b = mkCat (NonTerminal a) b, + let i = length $ filterCats beta, + let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n))) + (\_ -> CFApp (CFRes i) n) x] + scheme3 = [CFRule a_x beta n' | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + CFRule _ (x':beta) n <- catRules gr a, + x == x', + let a_x = mkCat (NonTerminal a) x, + let n' = symbol (\_ -> CFAbs 1 (shiftTerm n)) + (\_ -> n) x] + scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats + + newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3)) + + shiftTerm :: CFTerm -> CFTerm + shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts) + shiftTerm (CFRes 0) = CFVar 1 + shiftTerm (CFRes n) = CFRes (n-1) + shiftTerm t = t + -- note: the rest don't occur in the original grammar + + cats = allCats gr + rules = allRules gr + + directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr] + leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner + properLeftCorner = transitiveClosure directLeftCorner + properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal + isProperLeftCornerOf = flip (isRelatedTo properLeftCorner) + + leftRecursive = reflexiveElements properLeftCorner + isLeftRecursive = (`Set.member` leftRecursive) + + retained = cfgStartCat gr `Set.insert` + Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr), + NonTerminal a <- ruleRhs r] + isRetained = (`Set.member` retained) + + retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained + + mkCat :: CFSymbol -> CFSymbol -> Cat + mkCat x y = showSymbol x ++ "-" ++ showSymbol y + where showSymbol = symbol id show + +-- | Get the sets of mutually recursive non-terminals for a grammar. +mutRecCats :: Bool -- ^ If true, all categories will be in some set. + -- If false, only recursive categories will be included. + -> CFG -> [Set Cat] +mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r + where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss] + refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation + +-- +-- * Approximate context-free grammars with regular grammars. +-- + +makeSimpleRegular :: CFG -> CFG +makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles + +-- Use the transformation algorithm from \"Regular Approximation of Context-free +-- Grammars through Approximation\", Mohri and Nederhof, 2000 +-- to create an over-generating regular grammar for a context-free +-- grammar +makeRegular :: CFG -> CFG +makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) } + where trSet cs | allXLinear cs rs = rs + | otherwise = concatMap handleCat (Set.toList cs) + where rs = catSetRules g cs + handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c + makeRightLinearRules b' (CFRule c ss n) = + case ys of + [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left + (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n + ++ makeRightLinearRules (newCat b) (CFRule c zs n) + where (xs,ys) = break (`catElem` cs) ss + -- don't add rules on the form A -> A + newRule c rhs n | rhs == [NonTerminal c] = [] + | otherwise = [CFRule c rhs n] + newCat c = c ++ "$" + +-- +-- * CFG Utilities +-- + +mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG +mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs } + +groupProds :: [CFRule] -> Map Cat (Set CFRule) +groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) + +-- | Gets all rules in a CFG. +allRules :: CFG -> [CFRule] +allRules = concat . map Set.toList . Map.elems . cfgRules + +-- | Gets all rules in a CFG, grouped by their LHS categories. +allRulesGrouped :: CFG -> [(Cat,[CFRule])] +allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules + +-- | Gets all categories which have rules. +allCats :: CFG -> [Cat] +allCats = Map.keys . cfgRules + +-- | Gets all categories which have rules or occur in a RHS. +allCats' :: CFG -> [Cat] +allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union` + Set.fromList [c | rs <- Map.elems (cfgRules cfg), + r <- Set.toList rs, + NonTerminal c <- ruleRhs r]) + +-- | Gets all rules for the given category. +catRules :: CFG -> Cat -> [CFRule] +catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr) + +-- | Gets all rules for categories in the given set. +catSetRules :: CFG -> Set Cat -> [CFRule] +catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr + +mapCFGCats :: (Cat -> Cat) -> CFG -> CFG +mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg)) + (Set.map f (cfgExternalCats cfg)) + [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg] + +onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG +onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) } + +onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG +onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg } + +-- | Clean up CFG after rules have been removed. +cleanCFG :: CFG -> CFG +cleanCFG = onCFG (Map.filter (not . Set.null)) + +-- | Combine two CFGs. +unionCFG :: CFG -> CFG -> CFG +unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x + +filterCFG :: (CFRule -> Bool) -> CFG -> CFG +filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p)) + +filterCFGCats :: (Cat -> Bool) -> CFG -> CFG +filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c)) + +countCats :: CFG -> Int +countCats = Map.size . cfgRules . cleanCFG + +countRules :: CFG -> Int +countRules = length . allRules + +prCFG :: CFG -> String +prCFG = prProductions . map prRule . allRules + where + prRule r = (lhsCat r, unwords (map prSym (ruleRhs r))) + prSym = symbol id (\t -> "\""++ t ++"\"") + +prProductions :: [(Cat,String)] -> String +prProductions prods = + unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods] + where + maxLHSWidth = maximum $ 0:(map (length . fst) prods) + rpad n s = s ++ replicate (n - length s) ' ' + +prCFTerm :: CFTerm -> String +prCFTerm = pr 0 + where + pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")") + pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t) + pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")") + pr _ (CFRes i) = "$" ++ show i + pr _ (CFVar i) = "x" ++ show i + pr _ (CFMeta c) = "?" ++ showCId c + paren 0 x = x + paren 1 x = "(" ++ x ++ ")" + +-- +-- * CFRule Utilities +-- + +ruleFun :: CFRule -> CId +ruleFun (CFRule _ _ t) = f t + where f (CFObj n _) = n + f (CFApp _ x) = f x + f (CFAbs _ x) = f x + f _ = mkCId "" + +-- | Check if any of the categories used on the right-hand side +-- are in the given list of categories. +anyUsedBy :: [Cat] -> CFRule -> Bool +anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) + +mkCFTerm :: String -> CFTerm +mkCFTerm n = CFObj (mkCId n) [] + +ruleIsNonRecursive :: Set Cat -> CFRule -> Bool +ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + +-- | Check if all the rules are right-linear, or all the rules are +-- left-linear, with respect to given categories. +allXLinear :: Set Cat -> [CFRule] -> Bool +allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs + +-- | Checks if a context-free rule is right-linear. +isRightLinear :: Set Cat -- ^ The categories to consider + -> CFRule -- ^ The rule to check for right-linearity + -> Bool +isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs + +-- | Checks if a context-free rule is left-linear. +isLeftLinear :: Set Cat -- ^ The categories to consider + -> CFRule -- ^ The rule to check for left-linearity + -> Bool +isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs + + +-- +-- * Symbol utilities +-- + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (NonTerminal cat) = fc cat +symbol fc ft (Terminal tok) = ft tok + +mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t' +mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft) + +filterCats :: [Symbol c t] -> [c] +filterCats syms = [ cat | NonTerminal cat <- syms ] + +filterToks :: [Symbol c t] -> [t] +filterToks syms = [ tok | Terminal tok <- syms ] + +-- | Checks if a symbol is a non-terminal of one of the given categories. +catElem :: Ord c => Symbol c t -> Set c -> Bool +catElem s cs = symbol (`Set.member` cs) (const False) s + +noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool +noCatsInSet cs = not . any (`catElem` cs) diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs new file mode 100644 index 000000000..3045ac842 --- /dev/null +++ b/src/compiler/GF/Speech/CFGToFA.hs @@ -0,0 +1,244 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.CFGToFA +-- +-- Approximates CFGs with finite state networks. +---------------------------------------------------------------------- +module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular, + MFA(..), cfgToMFA, cfgToFA') where + +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import PGF.CId +import PGF.Data +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.PGFToCFG +import GF.Infra.Ident (Ident) + +import GF.Data.Graph +import GF.Data.Relation +import GF.Speech.FiniteState +import GF.Speech.CFG + +data Recursivity = RightR | LeftR | NotR + +data MutRecSet = MutRecSet { + mrCats :: Set Cat, + mrNonRecRules :: [CFRule], + mrRecRules :: [CFRule], + mrRec :: Recursivity + } + + +type MutRecSets = Map Cat MutRecSet + +-- +-- * Multiple DFA type +-- + +data MFA = MFA Cat [(Cat,DFA CFSymbol)] + + + +cfgToFA :: CFG -> DFA Token +cfgToFA = minimize . compileAutomaton . makeSimpleRegular + + +-- +-- * Compile strongly regular grammars to NFAs +-- + +-- Convert a strongly regular grammar to a finite automaton. +compileAutomaton :: CFG -> NFA Token +compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa + where + (fa,s,f) = newFA_ + ns = mutRecSets g $ mutRecCats False g + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000. +make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State + -> NFA Token -> NFA Token +make_fa c@(g,ns) q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [Terminal t] -> newTransition q0 q1 (Just t) fa + [NonTerminal a] -> + case Map.lookup a ns of + -- a is recursive + Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> + case mrRec n of + -- the set Ni is right-recursive or cyclic + RightR -> + let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, + let (xs,NonTerminal d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + -- the set Ni is left-recursive + LeftR -> + let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] + ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats ni fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + -- a is not recursive + Nothing -> let rs = catRules g a + in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs + (x:beta) -> let (fa',q) = newState () fa + in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa' + where + make_fa_ = make_fa c + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs + +-- +-- * Compile a strongly regular grammar to a DFA with sub-automata +-- + +cfgToMFA :: CFG -> MFA +cfgToMFA = buildMFA . makeSimpleRegular + +-- | Build a DFA by building and expanding an MFA +cfgToFA' :: CFG -> DFA Token +cfgToFA' = mfaToDFA . cfgToMFA + +buildMFA :: CFG -> MFA +buildMFA g = sortSubLats $ removeUnusedSubLats mfa + where fas = compileAutomata g + mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas] + +mfaStartDFA :: MFA -> DFA CFSymbol +mfaStartDFA (MFA start subs) = + fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs + +mfaToDFA :: MFA -> DFA Token +mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa + where + subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs] + getSub l = fromJust $ Map.lookup l subs' + expand (FA (Graph c ns es) s f) + = foldl' expandEdge (FA (Graph c ns []) s f) es + expandEdge fa (f,t,x) = + case x of + Nothing -> newTransition f t Nothing fa + Just (Terminal s) -> newTransition f t (Just s) fa + Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l) + +removeUnusedSubLats :: MFA -> MFA +removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c] + where + usedMap = subLatUseMap mfa + used = growUsedSet (Set.singleton start) + isUsed c = c `Set.member` used + growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s) + +subLatUseMap :: MFA -> Map Cat (Set Cat) +subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs] + +usedSubLats :: DFA CFSymbol -> Set Cat +usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa] + +-- | Sort sub-networks topologically. +sortSubLats :: MFA -> MFA +sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs) + where + usedByMap = revMultiMap (subLatUseMap mfa) + sortLats _ [] = [] + sortLats ub ls = xs ++ sortLats ub' ys + where (xs,ys) = partition ((==0) . indeg) ls + ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub + indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub + +-- | Convert a strongly regular grammar to a number of finite automata, +-- one for each non-terminal. +-- The edges in the automata accept tokens, or name another automaton to use. +compileAutomata :: CFG + -> [(Cat,NFA CFSymbol)] + -- ^ A map of non-terminals and their automata. +compileAutomata g = [(c, makeOneFA c) | c <- allCats g] + where + mrs = mutRecSets g $ mutRecCats True g + makeOneFA c = make_fa1 mr s [NonTerminal c] f fa + where (fa,s,f) = newFA_ + mr = fromJust (Map.lookup c mrs) + + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000, +-- adapted to build a finite automaton for a single (mutually recursive) set only. +-- Categories not in the set will result in category-labelled edges. +make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which + -- we are building the automaton. + -> State -- ^ State to come from + -> [CFSymbol] -- ^ Symbols to accept + -> State -- ^ State to end up in + -> NFA CFSymbol -- ^ FA to add to. + -> NFA CFSymbol +make_fa1 mr q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [t@(Terminal _)] -> newTransition q0 q1 (Just t) fa + [c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa + [NonTerminal a] -> + case mrRec mr of + NotR -> -- the set is a non-recursive (always singleton) set of categories + -- so the set of category rules is the set of rules for the whole set + make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa + RightR -> -- the set is right-recursive or cyclic + let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, + let (xs,NonTerminal d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + LeftR -> -- the set is left-recursive + let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats (mrCats mr) fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + (x:beta) -> let (fa',q) = newState () fa + in make_fas [(q0,[x],q),(q,beta,q1)] fa' + where + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs + +mutRecSets :: CFG -> [Set Cat] -> MutRecSets +mutRecSets g = Map.fromList . concatMap mkMutRecSet + where + mkMutRecSet cs = [ (c,ms) | c <- csl ] + where csl = Set.toList cs + rs = catSetRules g cs + (nrs,rrs) = partition (ruleIsNonRecursive cs) rs + ms = MutRecSet { + mrCats = cs, + mrNonRecRules = nrs, + mrRecRules = rrs, + mrRec = rec + } + rec | null rrs = NotR + | all (isRightLinear cs) rrs = RightR + | otherwise = LeftR + +-- +-- * Utilities +-- + +-- | Add a state for the given NFA for each of the categories +-- in the given set. Returns a map of categories to their +-- corresponding states. +addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State) +addStatesForCats cs fa = (fa', m) + where (fa', ns) = newStates (replicate (Set.size cs) ()) fa + m = Map.fromList (zip (Set.toList cs) (map fst ns)) + +revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a) +revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s] diff --git a/src/compiler/GF/Speech/FiniteState.hs b/src/compiler/GF/Speech/FiniteState.hs new file mode 100644 index 000000000..136d773a2 --- /dev/null +++ b/src/compiler/GF/Speech/FiniteState.hs @@ -0,0 +1,329 @@ +---------------------------------------------------------------------- +-- | +-- Module : FiniteState +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/10 16:43:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.16 $ +-- +-- A simple finite state network module. +----------------------------------------------------------------------------- +module GF.Speech.FiniteState (FA(..), State, NFA, DFA, + startState, finalStates, + states, transitions, + isInternal, + newFA, newFA_, + addFinalState, + newState, newStates, + newTransition, newTransitions, + insertTransitionWith, insertTransitionsWith, + mapStates, mapTransitions, + modifyTransitions, + nonLoopTransitionsTo, nonLoopTransitionsFrom, + loops, + removeState, + oneFinalState, + insertNFA, + onGraph, + moveLabelsToNodes, removeTrivialEmptyNodes, + minimize, + dfa2nfa, + unusedNames, renameStates, + prFAGraphviz, faToGraphviz) where + +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import GF.Data.Utilities +import GF.Data.Graph +import qualified GF.Data.Graphviz as Dot + +type State = Int + +-- | Type parameters: node id type, state label type, edge label type +-- Data constructor arguments: nodes and edges, start state, final states +data FA n a b = FA !(Graph n a b) !n ![n] + +type NFA a = FA State () (Maybe a) + +type DFA a = FA State () a + + +startState :: FA n a b -> n +startState (FA _ s _) = s + +finalStates :: FA n a b -> [n] +finalStates (FA _ _ ss) = ss + +states :: FA n a b -> [(n,a)] +states (FA g _ _) = nodes g + +transitions :: FA n a b -> [(n,n,b)] +transitions (FA g _ _) = edges g + +newFA :: Enum n => a -- ^ Start node label + -> FA n a b +newFA l = FA g s [] + where (g,s) = newNode l (newGraph [toEnum 0..]) + +-- | Create a new finite automaton with an initial and a final state. +newFA_ :: Enum n => (FA n () b, n, n) +newFA_ = (fa'', s, f) + where fa = newFA () + s = startState fa + (fa',f) = newState () fa + fa'' = addFinalState f fa' + +addFinalState :: n -> FA n a b -> FA n a b +addFinalState f (FA g s ss) = FA g s (f:ss) + +newState :: a -> FA n a b -> (FA n a b, n) +newState x (FA g s ss) = (FA g' s ss, n) + where (g',n) = newNode x g + +newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)]) +newStates xs (FA g s ss) = (FA g' s ss, ns) + where (g',ns) = newNodes xs g + +newTransition :: n -> n -> b -> FA n a b -> FA n a b +newTransition f t l = onGraph (newEdge (f,t,l)) + +newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b +newTransitions es = onGraph (newEdges es) + +insertTransitionWith :: Eq n => + (b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b +insertTransitionWith f t = onGraph (insertEdgeWith f t) + +insertTransitionsWith :: Eq n => + (b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b +insertTransitionsWith f ts fa = + foldl' (flip (insertTransitionWith f)) fa ts + +mapStates :: (a -> c) -> FA n a b -> FA n c b +mapStates f = onGraph (nmap f) + +mapTransitions :: (b -> c) -> FA n a b -> FA n a c +mapTransitions f = onGraph (emap f) + +modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b +modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es)) + +removeState :: Ord n => n -> FA n a b -> FA n a b +removeState n = onGraph (removeNode n) + +minimize :: Ord a => NFA a -> DFA a +minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA + +unusedNames :: FA n a b -> [n] +unusedNames (FA (Graph names _ _) _ _) = names + +-- | Gets all incoming transitions to a given state, excluding +-- transtions from the state itself. +nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)] +nonLoopTransitionsTo s fa = + [(f,l) | (f,t,l) <- transitions fa, t == s && f /= s] + +nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)] +nonLoopTransitionsFrom s fa = + [(t,l) | (f,t,l) <- transitions fa, f == s && t /= s] + +loops :: Eq n => n -> FA n a b -> [b] +loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s] + +-- | Give new names to all nodes. +renameStates :: Ord x => [y] -- ^ Infinite supply of new names + -> FA x a b + -> FA y a b +renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs' + where (ns,rest) = splitAt (length (nodes g)) supply + newNodes = Map.fromList (zip (map fst (nodes g)) ns) + newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes + s' = newName s + fs' = map newName fs + +-- | Insert an NFA into another +insertNFA :: NFA a -- ^ NFA to insert into + -> (State, State) -- ^ States to insert between + -> NFA a -- ^ NFA to insert. + -> NFA a +insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2) + = FA (newEdges es g') s1 fs1 + where + es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2] + (g',ren) = mergeGraphs g1 g2 + +onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d +onGraph f (FA g s ss) = FA (f g) s ss + + +-- | Make the finite automaton have a single final state +-- by adding a new final state and adding an edge +-- from the old final states to the new state. +oneFinalState :: a -- ^ Label to give the new node + -> b -- ^ Label to give the new edges + -> FA n a b -- ^ The old network + -> FA n a b -- ^ The new network +oneFinalState nl el fa = + let (FA g s fs,nf) = newState nl fa + es = [ (f,nf,el) | f <- fs ] + in FA (newEdges es g) s [nf] + +-- | Transform a standard finite automaton with labelled edges +-- to one where the labels are on the nodes instead. This can add +-- up to one extra node per edge. +moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) () +moveLabelsToNodes = onGraph f + where f g@(Graph c _ _) = Graph c' ns (concat ess) + where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)] + (c',is') = mapAccumL fixIncoming c is + (ns,ess) = unzip (concat is') + + +-- | Remove empty nodes which are not start or final, and have +-- exactly one outgoing edge or exactly one incoming edge. +removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) () +removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes + +-- | Move edges to empty nodes to point to the next node(s). +-- This is not done if the pointed-to node is a final node. +skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) () +skipSimpleEmptyNodes fa = onGraph og fa + where + og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es') + where + es' = concatMap changeEdge es + info = nodeInfo g + changeEdge e@(f,t,()) + | isNothing (getNodeLabel info t) + -- && (i * o <= i + o) + && not (isFinal fa t) + = [ (f,t',()) | (_,t',()) <- getOutgoing info t] + | otherwise = [e] +-- where i = inDegree info t +-- o = outDegree info t + +isInternal :: Eq n => FA n a b -> n -> Bool +isInternal (FA _ start final) n = n /= start && n `notElem` final + +isFinal :: Eq n => FA n a b -> n -> Bool +isFinal (FA _ _ final) n = n `elem` final + +-- | Remove all internal nodes with no incoming edges +-- or no outgoing edges. +pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) () +pruneUnusable fa = onGraph f fa + where + f g = if Set.null rns then g else f (removeNodes rns g) + where info = nodeInfo g + rns = Set.fromList [ n | (n,_) <- nodes g, + isInternal fa n, + inDegree info n == 0 + || outDegree info n == 0] + +fixIncoming :: (Ord n, Eq a) => [n] + -> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges + -> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their + -- incoming edges. +fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts) + where ls = nub $ map edgeLabel es + (cs',cs'') = splitAt (length ls) cs + newNodes = zip cs' ls + es' = [ (x,n,()) | x <- map fst newNodes ] + -- separate cyclic and non-cyclic edges + (cyc,ncyc) = partition (\ (f,_,_) -> f == n) es + -- keep all incoming non-cyclic edges with the right label + to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l'] + -- for each cyclic edge with the right label, + -- add an edge from each of the new nodes (including this one) + ++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes] + newContexts = [ (v, to v) | v <- newNodes ] + +alphabet :: Eq b => Graph n a (Maybe b) -> [b] +alphabet = nub . catMaybes . map edgeLabel . edges + +determinize :: Ord a => NFA a -> DFA a +determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty + (ns',es') = (Set.toList ns, Set.toList es) + final = filter isDFAFinal ns' + fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final + in renameStates [0..] fa + where info = nodeInfo g +-- reach = nodesReachable out + start = closure info $ Set.singleton s + isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n)) + h currentStates oldStates es + | Set.null currentStates = (oldStates,es) + | otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es' + where + allOldStates = oldStates `Set.union` currentStates + (newStates,es') = new (Set.toList currentStates) Set.empty es + uniqueNewStates = newStates Set.\\ allOldStates + -- Get the sets of states reachable from the given states + -- by consuming one symbol, and the associated edges. + new [] rs es = (rs,es) + new (n:ns) rs es = new ns rs' es' + where cs = reachable info n --reachable reach n + rs' = rs `Set.union` Set.fromList (map snd cs) + es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs] + + +-- | Get all the nodes reachable from a list of nodes by only empty edges. +closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n +closure info x = closure_ x x + where closure_ acc check | Set.null check = acc + | otherwise = closure_ acc' check' + where + reach = Set.fromList [y | x <- Set.toList check, + (_,y,Nothing) <- getOutgoing info x] + acc' = acc `Set.union` reach + check' = reach Set.\\ acc + +-- | Get a map of labels to sets of all nodes reachable +-- from a the set of nodes by one edge with the given +-- label and then any number of empty edges. +reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)] +reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns +reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n] + +reverseNFA :: NFA a -> NFA a +reverseNFA (FA g s fs) = FA g''' s' [s] + where g' = reverseGraph g + (g'',s') = newNode () g' + g''' = newEdges [(s',f,Nothing) | f <- fs] g'' + +dfa2nfa :: DFA a -> NFA a +dfa2nfa = mapTransitions Just + +-- +-- * Visualization +-- + +prFAGraphviz :: (Eq n,Show n) => FA n String String -> String +prFAGraphviz = Dot.prGraphviz . faToGraphviz + +prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String +prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show + +faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph +faToGraphviz (FA (Graph _ ns es) s f) + = Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) [] + where mkNode (n,l) = Dot.Node (show n) attrs + where attrs = [("label",l)] + ++ if n == s then [("shape","box")] else [] + ++ if n `elem` f then [("style","bold")] else [] + mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)] + +-- +-- * Utilities +-- + +lookups :: Ord k => [k] -> Map k a -> [a] +lookups xs m = mapMaybe (flip Map.lookup m) xs diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs new file mode 100644 index 000000000..8f26ea64c --- /dev/null +++ b/src/compiler/GF/Speech/GSL.hs @@ -0,0 +1,95 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.GSL +-- +-- This module prints a CFG as a Nuance GSL 2.0 grammar. +-- +----------------------------------------------------------------------------- + +module GF.Speech.GSL (gslPrinter) where + +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.SRG +import GF.Speech.RegExp +import GF.Infra.Option +import GF.Infra.Ident +import PGF.CId +import PGF.Data + +import Data.Char (toUpper,toLower) +import Data.List (partition) +import Text.PrettyPrint.HughesPJ + +width :: Int +width = 75 + +gslPrinter :: Options -> PGF -> CId -> String +gslPrinter opts pgf cnc = renderStyle st $ prGSL $ makeNonLeftRecursiveSRG opts pgf cnc + where st = style { lineLength = width } + +prGSL :: SRG -> Doc +prGSL srg = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text ";GSL2.0" $$ + comment ("Nuance speech recognition grammar for " ++ srgName srg) $$ + comment ("Generated by GF") + mainCat = text ".MAIN" <+> prCat (srgStartCat srg) + prRule (SRGRule cat rhs) = prCat cat <+> union (map prAlt rhs) + -- FIXME: use the probability + prAlt (SRGAlt mp _ rhs) = prItem rhs + + +prItem :: SRGItem -> Doc +prItem = f + where + f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes) + where (es,nes) = partition isEpsilon xs + f (REConcat [x]) = f x + f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")" + f (RERepeat x) = text "*" <> f x + f (RESymbol s) = prSymbol s + +union :: [Doc] -> Doc +union [x] = x +union xs = text "[" <> fsep xs <> text "]" + +prSymbol :: Symbol SRGNT Token -> Doc +prSymbol = symbol (prCat . fst) (doubleQuotes . showToken) + +-- GSL requires an upper case letter in category names +prCat :: Cat -> Doc +prCat = text . firstToUpper + + +firstToUpper :: String -> String +firstToUpper [] = [] +firstToUpper (x:xs) = toUpper x : xs + +{- +rmPunctCFG :: CGrammar -> CGrammar +rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g] + +keepSymbol :: Symbol c Token -> Bool +keepSymbol (Tok t) = not (all isPunct (prt t)) +keepSymbol _ = True +-} + +-- Nuance does not like upper case characters in tokens +showToken :: Token -> Doc +showToken = text . map toLower + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.:;.,?!()[]{}" + +comment :: String -> Doc +comment s = text ";" <+> text s + + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs new file mode 100644 index 000000000..2cfeea5f5 --- /dev/null +++ b/src/compiler/GF/Speech/JSGF.hs @@ -0,0 +1,113 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.JSGF +-- +-- This module prints a CFG as a JSGF grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +-- +-- FIXME: convert to UTF-8 +----------------------------------------------------------------------------- + +module GF.Speech.JSGF (jsgfPrinter) where + +import GF.Data.Utilities +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.RegExp +import GF.Speech.SISR +import GF.Speech.SRG +import PGF.CId +import PGF.Data + +import Data.Char +import Data.List +import Data.Maybe +import Text.PrettyPrint.HughesPJ +import Debug.Trace + +width :: Int +width = 75 + +jsgfPrinter :: Options + -> PGF + -> CId -> String +jsgfPrinter opts pgf cnc = renderStyle st $ prJSGF sisr $ makeNonLeftRecursiveSRG opts pgf cnc + where st = style { lineLength = width } + sisr = flag optSISR opts + +prJSGF :: Maybe SISRFormat -> SRG -> Doc +prJSGF sisr srg + = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$ + comment ("JSGF speech recognition grammar for " ++ srgName srg) $$ + comment "Generated by GF" $$ + text ("grammar " ++ srgName srg ++ ";") + lang = maybe empty text (srgLanguage srg) + mainCat = rule True "MAIN" [prCat (srgStartCat srg)] + prRule (SRGRule cat rhs) = rule (isExternalCat srg cat) cat (map prAlt rhs) + prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] + where initTag | isEmpty t = empty + | otherwise = text "<NULL>" <+> t + where t = tag sisr (profileInitSISR n) + finalTag = tag sisr (profileFinalSISR n) + p = if isEmpty initTag && isEmpty finalTag then id else parens + +prCat :: Cat -> Doc +prCat c = char '<' <> text c <> char '>' + +prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc +prItem sisr t = f 0 + where + f _ (REUnion []) = text "<VOID>" + f p (REUnion xs) + | not (null es) = brackets (f 0 (REUnion nes)) + | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) + where (es,nes) = partition isEpsilon xs + f _ (REConcat []) = text "<NULL>" + f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) + f p (RERepeat x) = f 3 x <> char '*' + f _ (RESymbol s) = prSymbol sisr t s + +prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc +prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) +prSymbol _ cn (Terminal t) | all isPunct t = empty -- removes punctuation + | otherwise = text t -- FIXME: quote if there is whitespace or odd chars + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc +tag Nothing _ = empty +tag (Just fmt) t = case t fmt of + [] -> empty + ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}' + where e [] = [] + e ('}':xs) = '\\':'}':e xs + e ('\n':xs) = ' ' : e (dropWhile isSpace xs) + e (x:xs) = x:e xs + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +comment :: String -> Doc +comment s = text "//" <+> text s + +alts :: [Doc] -> Doc +alts = fsep . prepunctuate (text "| ") + +rule :: Bool -> Cat -> [Doc] -> Doc +rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' + where p = if pub then text "public" else empty + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +prepunctuate :: Doc -> [Doc] -> [Doc] +prepunctuate _ [] = [] +prepunctuate p (x:xs) = x : map (p <>) xs + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y + diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs new file mode 100644 index 000000000..d22a4ea8d --- /dev/null +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -0,0 +1,116 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.PGFToCFG +-- +-- Approximates PGF grammars with context-free grammars. +---------------------------------------------------------------------- +module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where + +import PGF.CId +import PGF.Data as PGF +import PGF.Macros +import GF.Infra.Ident +import GF.Speech.CFG + +import Data.Array.IArray as Array +import Data.List +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import Data.Maybe +import Data.Set (Set) +import qualified Data.Set as Set + +bnfPrinter :: PGF -> CId -> String +bnfPrinter = toBNF id + +toBNF :: (CFG -> CFG) -> PGF -> CId -> String +toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc + + +pgfToCFG :: PGF + -> CId -- ^ Concrete syntax name + -> CFG +pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules) + where + pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) + + rules :: [(FCat,Production)] + rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo) + , prod <- Set.toList set] + + fcatCats :: Map FCat Cat + fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) + | (c,fcs) <- Map.toList (startCats pinfo), + (fc,i) <- zip fcs [1..]] + + fcatCat :: FCat -> Cat + fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats + + fcatToCat :: FCat -> FIndex -> Cat + fcatToCat c l = fcatCat c ++ row + where row = if catLinArity c == 1 then "" else "_" ++ show l + + -- gets the number of fields in the lincat for the given category + catLinArity :: FCat -> Int + catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + + topdownRules cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply funid args) rules = (functions pinfo ! funid,args) : rules + g (FCoerce cat) rules = f cat rules + + + extCats :: Set Cat + extCats = Set.fromList $ map lhsCat startRules + + startRules :: [CFRule] + startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) + | (c,fcs) <- Map.toList (startCats pinfo), + fc <- fcs, not (isLiteralFCat fc), + r <- [0..catLinArity fc-1]] + + fruleToCFRule :: (FCat,Production) -> [CFRule] + fruleToCFRule (c,FApply funid args) = + [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps)) + | (l,seqid) <- Array.assocs rhs + , let row = sequences pinfo ! seqid + , not (containsLiterals row)] + where + FFun f ps rhs = functions pinfo ! funid + + mkRhs :: Array FPointPos FSymbol -> [CFSymbol] + mkRhs = concatMap fsymbolToSymbol . Array.elems + + containsLiterals :: Array FPointPos FSymbol -> Bool + containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] || + not (null [n | FSymLit n _ <- Array.elems row]) -- only this is needed for PMCFG. + -- The first line is for backward compat. + + fsymbolToSymbol :: FSymbol -> [CFSymbol] + fsymbolToSymbol (FSymCat n l) = [NonTerminal (fcatToCat (args!!n) l)] + fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)] + fsymbolToSymbol (FSymKS ts) = map Terminal ts + + fixProfile :: Array FPointPos FSymbol -> Profile -> Profile + fixProfile row = concatMap positions + where + nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt] + positions i = [k | (k,j) <- nts, j == i] + + getPos (FSymCat j _) = [j] + getPos (FSymLit j _) = [j] + getPos _ = [] + + profilesToTerm :: [Profile] -> CFTerm + profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps) + where (argTypes,_) = catSkeleton $ lookType pgf f + + profileToTerm :: CId -> Profile -> CFTerm + profileToTerm t [] = CFMeta t + profileToTerm _ xs = CFRes (last xs) -- FIXME: unify + fruleToCFRule (c,FCoerce c') = + [CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) + | l <- [0..catLinArity c-1]] diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs new file mode 100644 index 000000000..0fc35d541 --- /dev/null +++ b/src/compiler/GF/Speech/PrRegExp.hs @@ -0,0 +1,27 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.PrRegExp +-- +-- This module prints a grammar as a regular expression. +----------------------------------------------------------------------------- + +module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where + +import GF.Speech.CFG +import GF.Speech.CFGToFA +import GF.Speech.PGFToCFG +import GF.Speech.RegExp +import PGF + +regexpPrinter :: PGF -> CId -> String +regexpPrinter pgf cnc = (++"\n") $ prRE id $ dfa2re $ cfgToFA $ pgfToCFG pgf cnc + +multiRegexpPrinter :: PGF -> CId -> String +multiRegexpPrinter pgf cnc = prREs $ mfa2res $ cfgToMFA $ pgfToCFG pgf cnc + +prREs :: [(String,RE CFSymbol)] -> String +prREs res = unlines [l ++ " = " ++ prRE id (mapRE showLabel re) | (l,re) <- res] + where showLabel = symbol (\l -> "<" ++ l ++ ">") id + +mfa2res :: MFA -> [(String,RE CFSymbol)] +mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas] diff --git a/src/compiler/GF/Speech/RegExp.hs b/src/compiler/GF/Speech/RegExp.hs new file mode 100644 index 000000000..2592b3d57 --- /dev/null +++ b/src/compiler/GF/Speech/RegExp.hs @@ -0,0 +1,144 @@ +module GF.Speech.RegExp (RE(..), + epsilonRE, nullRE, + isEpsilon, isNull, + unionRE, concatRE, seqRE, + repeatRE, minimizeRE, + mapRE, mapRE', joinRE, + symbolsRE, + dfa2re, prRE) where + +import Data.List + +import GF.Data.Utilities +import GF.Speech.FiniteState + +data RE a = + REUnion [RE a] -- ^ REUnion [] is null + | REConcat [RE a] -- ^ REConcat [] is epsilon + | RERepeat (RE a) + | RESymbol a + deriving (Eq,Ord,Show) + + +dfa2re :: (Ord a) => DFA a -> RE a +dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops + . oneFinalState () epsilonRE . mapTransitions RESymbol + where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa + merge es = [(f,t,unionRE ls) + | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]] + +elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a) +elimStates fa = + case [s | (s,_) <- states fa, isInternal fa s] of + [] -> fa + sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa + where sAs = nonLoopTransitionsTo sE fa + sBs = nonLoopTransitionsFrom sE fa + r2 = unionRE $ loops sE fa + ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs] + r r1 r3 = concatRE [r1, repeatRE r2, r3] + +epsilonRE :: RE a +epsilonRE = REConcat [] + +nullRE :: RE a +nullRE = REUnion [] + +isNull :: RE a -> Bool +isNull (REUnion []) = True +isNull _ = False + +isEpsilon :: RE a -> Bool +isEpsilon (REConcat []) = True +isEpsilon _ = False + +unionRE :: Ord a => [RE a] -> RE a +unionRE = unionOrId . sortNub . concatMap toList + where + toList (REUnion xs) = xs + toList x = [x] + unionOrId [r] = r + unionOrId rs = REUnion rs + +concatRE :: [RE a] -> RE a +concatRE xs | any isNull xs = nullRE + | otherwise = case concatMap toList xs of + [r] -> r + rs -> REConcat rs + where + toList (REConcat xs) = xs + toList x = [x] + +seqRE :: [a] -> RE a +seqRE = concatRE . map RESymbol + +repeatRE :: RE a -> RE a +repeatRE x | isNull x || isEpsilon x = epsilonRE + | otherwise = RERepeat x + +finalRE :: Ord a => DFA (RE a) -> RE a +finalRE fa = concatRE [repeatRE r1, r2, + repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])] + where + s0 = startState fa + [sF] = finalStates fa + r1 = unionRE $ loops s0 fa + r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa + r3 = unionRE $ loops sF fa + r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa + +reverseRE :: RE a -> RE a +reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs +reverseRE (REUnion xs) = REUnion (map reverseRE xs) +reverseRE (RERepeat x) = RERepeat (reverseRE x) +reverseRE x = x + +minimizeRE :: Ord a => RE a -> RE a +minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward + +mergeForward :: Ord a => RE a -> RE a +mergeForward (REUnion xs) = + unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)] +mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)] +mergeForward (RERepeat r) = repeatRE (mergeForward r) +mergeForward r = r + +firstRE :: RE a -> (RE a, RE a) +firstRE (REConcat (x:xs)) = (x, REConcat xs) +firstRE r = (r,epsilonRE) + +mapRE :: (a -> b) -> RE a -> RE b +mapRE f = mapRE' (RESymbol . f) + +mapRE' :: (a -> RE b) -> RE a -> RE b +mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs) +mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs) +mapRE' f (RERepeat x) = RERepeat (mapRE' f x) +mapRE' f (RESymbol s) = f s + +joinRE :: RE (RE a) -> RE a +joinRE (REConcat xs) = REConcat (map joinRE xs) +joinRE (REUnion xs) = REUnion (map joinRE xs) +joinRE (RERepeat xs) = RERepeat (joinRE xs) +joinRE (RESymbol ss) = ss + +symbolsRE :: RE a -> [a] +symbolsRE (REConcat xs) = concatMap symbolsRE xs +symbolsRE (REUnion xs) = concatMap symbolsRE xs +symbolsRE (RERepeat x) = symbolsRE x +symbolsRE (RESymbol x) = [x] + +-- Debugging + +prRE :: (a -> String) -> RE a -> String +prRE = prRE' 0 + +prRE' :: Int -> (a -> String) -> RE a -> String +prRE' _ _ (REUnion []) = "<NULL>" +prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs))) +prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs)) +prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*" +prRE' _ f (RESymbol s) = f s + +p n m s | n >= m = "(" ++ s ++ ")" + | True = s diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs new file mode 100644 index 000000000..f966d96b9 --- /dev/null +++ b/src/compiler/GF/Speech/SISR.hs @@ -0,0 +1,77 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.SISR +-- +-- Abstract syntax and pretty printer for SISR, +-- (Semantic Interpretation for Speech Recognition) +---------------------------------------------------------------------- +module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR, + topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where + +import Data.List + +import GF.Data.Utilities +import GF.Infra.Ident +import GF.Infra.Option (SISRFormat(..)) +import GF.Speech.CFG +import GF.Speech.SRG (SRGNT) +import PGF.CId + +import qualified GF.JavaScript.AbsJS as JS +import qualified GF.JavaScript.PrintJS as JS + +type SISRTag = [JS.DeclOrExpr] + + +prSISR :: SISRTag -> String +prSISR = JS.printTree + +topCatSISR :: String -> SISRFormat -> SISRTag +topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c] + +profileInitSISR :: CFTerm -> SISRFormat -> SISRTag +profileInitSISR t fmt + | null (usedArgs t) = [] + | otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]] + +usedArgs :: CFTerm -> [Int] +usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts) +usedArgs (CFAbs _ x) = usedArgs x +usedArgs (CFApp x y) = usedArgs x `union` usedArgs y +usedArgs (CFRes i) = [i] +usedArgs _ = [] + +catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag +catSISR t (c,i) fmt + | i `elem` usedArgs t = map JS.DExpr + [JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c] + | otherwise = [] + +profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag +profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term] + where + f (CFObj n ts) = tree (showCId n) (map f ts) + f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)] + f (CFApp x y) = JS.ECall (f x) [f y] + f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) + f (CFVar v) = JS.EVar (var v) + f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (showCId typ))] + +fmtOut SISR_WD20030401 = JS.EVar (JS.Ident "$") +fmtOut SISR_1_0 = JS.EVar (JS.Ident "out") + +fmtRef SISR_WD20030401 c = JS.EVar (JS.Ident ("$" ++ c)) +fmtRef SISR_1_0 c = field (JS.EVar (JS.Ident "rules")) c + +args = JS.Ident "a" + +var v = JS.Ident ("x" ++ show v) + +field x y = JS.EMember x (JS.Ident y) + +ass = JS.EAssign + +tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)] + +obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps] + diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs new file mode 100644 index 000000000..84633149b --- /dev/null +++ b/src/compiler/GF/Speech/SLF.hs @@ -0,0 +1,178 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.SLF +-- +-- This module converts a CFG to an SLF finite-state network +-- for use with the ATK recognizer. The SLF format is described +-- in the HTK manual, and an example for use in ATK is shown +-- in the ATK manual. +-- +----------------------------------------------------------------------------- + +module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter, + slfSubPrinter,slfSubGraphvizPrinter) where + +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.FiniteState +import GF.Speech.CFG +import GF.Speech.CFGToFA +import GF.Speech.PGFToCFG +import qualified GF.Data.Graphviz as Dot +import PGF +import PGF.CId + +import Control.Monad +import qualified Control.Monad.State as STM +import Data.Char (toUpper) +import Data.List +import Data.Maybe + +data SLFs = SLFs [(String,SLF)] SLF + +data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] } + +data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String } + | SLFSubLat { nId :: Int, nLat :: String } + +-- | An SLF word is a word, or the empty string. +type SLFWord = Maybe String + +data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int } + +type SLF_FA = FA State (Maybe CFSymbol) () + +mkFAs :: PGF -> CId -> (SLF_FA, [(String,SLF_FA)]) +mkFAs pgf cnc = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs]) + where MFA start subs = {- renameSubs $ -} cfgToMFA $ pgfToCFG pgf cnc + main = let (fa,s,f) = newFA_ in newTransition s f (NonTerminal start) fa + +slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) () +slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing () + . moveLabelsToNodes . dfa2nfa + +-- | Give sequential names to subnetworks. +renameSubs :: MFA -> MFA +renameSubs (MFA start subs) = MFA (newName start) subs' + where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]] + newName s = lookup' s newNames + subs' = [(newName s,renameLabels n) | (s,n) <- subs] + renameLabels = mapTransitions (mapSymbol newName id) + +-- +-- * SLF graphviz printing (without sub-networks) +-- + +slfGraphvizPrinter :: PGF -> CId -> String +slfGraphvizPrinter pgf cnc + = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc + where + gvFA = mapStates (fromMaybe "") . mapTransitions (const "") + +-- +-- * SLF graphviz printing (with sub-networks) +-- + +slfSubGraphvizPrinter :: PGF -> CId -> String +slfSubGraphvizPrinter pgf cnc = Dot.prGraphviz g + where (main, subs) = mkFAs pgf cnc + g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..] + ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs + m = gvSLFFA Nothing main + +gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph +gvSLFFA n fa = + liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv) + . mapTransitions (const "")) (rename fa) + where mfaLabelToGv = symbol ("#"++) id + mkCluster Nothing = id + mkCluster (Just x) + = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x + rename fa = do + names <- STM.get + let fa' = renameStates names fa + names' = unusedNames fa' + STM.put names' + return fa' + +-- +-- * SLF printing (without sub-networks) +-- + +slfPrinter :: PGF -> CId -> String +slfPrinter pgf cnc + = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc + +-- +-- * SLF printing (with sub-networks) +-- + +-- | Make a network with subnetworks in SLF +slfSubPrinter :: PGF -> CId -> String +slfSubPrinter pgf cnc = prSLFs slfs + where + (main,subs) = mkFAs pgf cnc + slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main) + faToSLF = automatonToSLF mfaNodeToSLFNode + +automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF +automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es } + where ns = map (uncurry mkNode) (states fa) + es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa) + +mfaNodeToSLFNode :: Int -> Maybe CFSymbol -> SLFNode +mfaNodeToSLFNode i l = case l of + Nothing -> mkSLFNode i Nothing + Just (Terminal x) -> mkSLFNode i (Just x) + Just (NonTerminal s) -> mkSLFSubLat i s + +mkSLFNode :: Int -> Maybe String -> SLFNode +mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing } +mkSLFNode i (Just w) + | isNonWord w = SLFNode { nId = i, + nWord = Nothing, + nTag = Just w } + | otherwise = SLFNode { nId = i, + nWord = Just (map toUpper w), + nTag = Just w } + +mkSLFSubLat :: Int -> String -> SLFNode +mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub } + +mkSLFEdge :: Int -> (Int,Int) -> SLFEdge +mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t } + +prSLFs :: SLFs -> String +prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) "" + where prSub (n,s) = showString "SUBLAT=" . shows n + . nl . prOneSLF s . showString "." . nl + +prSLF :: SLF -> String +prSLF slf = prOneSLF slf "" + +prOneSLF :: SLF -> ShowS +prOneSLF (SLF { slfNodes = ns, slfEdges = es}) + = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl + where + header = prFields [("N",show (length ns)),("L", show (length es))] . nl + prNode (SLFNode { nId = i, nWord = w, nTag = t }) + = prFields $ [("I",show i),("W",showWord w)] + ++ maybe [] (\t -> [("s",t)]) t + prNode (SLFSubLat { nId = i, nLat = l }) + = prFields [("I",show i),("L",show l)] + prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))] + +-- | Check if a word should not correspond to a word in the SLF file. +isNonWord :: String -> Bool +isNonWord = any isPunct + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!()[]{}" + +showWord :: SLFWord -> String +showWord Nothing = "!NULL" +showWord (Just w) | null w = "!NULL" + | otherwise = w + +prFields :: [(String,String)] -> ShowS +prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ] diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs new file mode 100644 index 000000000..2270ec7a1 --- /dev/null +++ b/src/compiler/GF/Speech/SRG.hs @@ -0,0 +1,205 @@ +---------------------------------------------------------------------- +-- | +-- Module : SRG +-- +-- Representation of, conversion to, and utilities for +-- printing of a general Speech Recognition Grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +---------------------------------------------------------------------- +module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol + , SRGNT, CFTerm + , ebnfPrinter + , makeNonLeftRecursiveSRG + , makeNonRecursiveSRG + , getSpeechLanguage + , isExternalCat + , lookupFM_ + ) where + +import GF.Data.Operations +import GF.Data.Utilities +import GF.Infra.Ident +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.PGFToCFG +import GF.Data.Relation +import GF.Speech.FiniteState +import GF.Speech.RegExp +import GF.Speech.CFGToFA +import GF.Infra.Option +import PGF.CId +import PGF.Data +import PGF.Macros + +import Data.List +import Data.Maybe (fromMaybe, maybeToList) +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import Debug.Trace + +data SRG = SRG { srgName :: String -- ^ grammar name + , srgStartCat :: Cat -- ^ start category name + , srgExternalCats :: Set Cat + , srgLanguage :: Maybe String -- ^ The language for which the grammar + -- is intended, e.g. en-UK + , srgRules :: [SRGRule] + } + deriving (Eq,Show) + +data SRGRule = SRGRule Cat [SRGAlt] + deriving (Eq,Show) + +-- | maybe a probability, a rule name and an EBNF right-hand side +data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem + deriving (Eq,Show) + +type SRGItem = RE SRGSymbol + +type SRGSymbol = Symbol SRGNT Token + +-- | An SRG non-terminal. Category name and its number in the profile. +type SRGNT = (Cat, Int) + +ebnfPrinter :: Options -> PGF -> CId -> String +ebnfPrinter opts pgf cnc = prSRG opts $ makeSRG opts pgf cnc + +-- | Create a compact filtered non-left-recursive SRG. +makeNonLeftRecursiveSRG :: Options -> PGF -> CId -> SRG +makeNonLeftRecursiveSRG opts = makeSRG opts' + where + opts' = setDefaultCFGTransform opts CFGNoLR True + +makeSRG :: Options -> PGF -> CId -> SRG +makeSRG opts = mkSRG cfgToSRG preprocess + where + cfgToSRG cfg = [cfRulesToSRGRule rs | (_,rs) <- allRulesGrouped cfg] + preprocess = maybeTransform opts CFGMergeIdentical mergeIdentical + . maybeTransform opts CFGNoLR removeLeftRecursion + . maybeTransform opts CFGRegular makeRegular + . maybeTransform opts CFGTopDownFilter topDownFilter + . maybeTransform opts CFGBottomUpFilter bottomUpFilter + . maybeTransform opts CFGRemoveCycles removeCycles + . maybeTransform opts CFGStartCatOnly purgeExternalCats + +setDefaultCFGTransform :: Options -> CFGTransform -> Bool -> Options +setDefaultCFGTransform opts t b = setCFGTransform t b `addOptions` opts + +maybeTransform :: Options -> CFGTransform -> (CFG -> CFG) -> (CFG -> CFG) +maybeTransform opts t f = if cfgTransform opts t then f else id + +traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g + +stats g = "Categories: " ++ show (countCats g) + ++ ", External categories: " ++ show (Set.size (cfgExternalCats g)) + ++ ", Rules: " ++ show (countRules g) + +makeNonRecursiveSRG :: Options + -> PGF + -> CId -- ^ Concrete syntax name. + -> SRG +makeNonRecursiveSRG opts = mkSRG cfgToSRG id + where + cfgToSRG cfg = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas] + where + MFA _ dfas = cfgToMFA cfg + dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re + dummyCFTerm = CFMeta (mkCId "dummy") + dummySRGNT = mapSymbol (\c -> (c,0)) id + +mkSRG :: (CFG -> [SRGRule]) -> (CFG -> CFG) -> PGF -> CId -> SRG +mkSRG mkRules preprocess pgf cnc = + SRG { srgName = showCId cnc, + srgStartCat = cfgStartCat cfg, + srgExternalCats = cfgExternalCats cfg, + srgLanguage = getSpeechLanguage pgf cnc, + srgRules = mkRules cfg } + where cfg = renameCats (showCId cnc) $ preprocess $ pgfToCFG pgf cnc + +-- | Renames all external cats C to C_cat, and all internal cats C_X (where X is any string), +-- to C_N where N is an integer. +renameCats :: String -> CFG -> CFG +renameCats prefix cfg = mapCFGCats renameCat cfg + where renameCat c | isExternal c = c ++ "_cat" + | otherwise = Map.findWithDefault (badCat c) c names + isExternal c = c `Set.member` cfgExternalCats cfg + catsByPrefix = buildMultiMap [(takeWhile (/='_') cat, cat) | cat <- allCats' cfg, not (isExternal cat)] + names = Map.fromList [(c,pref++"_"++show i) | (pref,cs) <- catsByPrefix, (c,i) <- zip cs [1..]] + badCat c = error ("GF.Speech.SRG.renameCats: " ++ c ++ "\n" ++ prCFG cfg) + +getSpeechLanguage :: PGF -> CId -> Maybe String +getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language") + +cfRulesToSRGRule :: [CFRule] -> SRGRule +cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs + where + alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs] + rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ] + + mkSRGSymbols _ [] = [] + mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss + mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss + +srgLHSCat :: SRGRule -> Cat +srgLHSCat (SRGRule c _) = c + +isExternalCat :: SRG -> Cat -> Bool +isExternalCat srg c = c `Set.member` srgExternalCats srg + +-- +-- * Size-optimized EBNF SRGs +-- + +srgItem :: [[SRGSymbol]] -> SRGItem +srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) +-- non-optimizing version: +--srgItem = unionRE . map seqRE + +-- | Merges a list of right-hand sides which all have the same +-- sequence of non-terminals. +mergeItems :: [[SRGSymbol]] -> SRGItem +mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens + +groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]] +groupTokens [] = [] +groupTokens (Terminal t:ss) = case groupTokens ss of + Terminal ts:ss' -> Terminal (t:ts):ss' + ss' -> Terminal [t]:ss' +groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss + +ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol +ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal))) + +-- +-- * Utilities for building and printing SRGs +-- + +prSRG :: Options -> SRG -> String +prSRG opts srg = prProductions $ map prRule $ ext ++ int + where + sisr = flag optSISR opts + (ext,int) = partition (isExternalCat srg . srgLHSCat) (srgRules srg) + prRule (SRGRule c alts) = (c,unwords (intersperse "|" (concatMap prAlt alts))) + prAlt (SRGAlt _ t rhs) = + -- FIXME: hack: we high-jack the --sisr flag to add + -- a simple lambda calculus format for semantic interpretation + -- Maybe the --sisr flag should be renamed. + case sisr of + Just _ -> + -- copy tags to each part of a top-level union, + -- to get simpler output + case rhs of + REUnion xs -> map prOneAlt xs + _ -> [prOneAlt rhs] + where prOneAlt a = prRE prSym a ++ " { " ++ prCFTerm t ++ " }" + Nothing -> [prRE prSym rhs] + prSym = symbol fst (\t -> "\""++ t ++"\"") + +lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt +lookupFM_ fm k = Map.findWithDefault err k fm + where err = error $ "Key not found: " ++ show k + ++ "\namong " ++ show (Map.keys fm) diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs new file mode 100644 index 000000000..2df1316a8 --- /dev/null +++ b/src/compiler/GF/Speech/SRGS_ABNF.hs @@ -0,0 +1,127 @@ +---------------------------------------------------------------------- +-- | +-- Module : PrJSRGS_ABNF +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/01 20:09:04 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.16 $ +-- +-- This module prints a CFG as a JSGF grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +-- +-- FIXME: convert to UTF-8 +----------------------------------------------------------------------------- + +module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where + +import GF.Data.Utilities +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.SISR as SISR +import GF.Speech.SRG +import GF.Speech.RegExp +import PGF (PGF, CId) + +import Data.Char +import Data.List +import Data.Maybe +import Text.PrettyPrint.HughesPJ +import Debug.Trace + +width :: Int +width = 75 + +srgsAbnfPrinter :: Options + -> PGF -> CId -> String +srgsAbnfPrinter opts pgf cnc = showDoc $ prABNF sisr $ makeNonLeftRecursiveSRG opts pgf cnc + where sisr = flag optSISR opts + +srgsAbnfNonRecursivePrinter :: Options -> PGF -> CId -> String +srgsAbnfNonRecursivePrinter opts pgf cnc = showDoc $ prABNF Nothing $ makeNonRecursiveSRG opts pgf cnc + +showDoc = renderStyle (style { lineLength = width }) + +prABNF :: Maybe SISRFormat -> SRG -> Doc +prABNF sisr srg + = header $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text "#ABNF 1.0 UTF-8;" $$ + meta "description" ("Speech recognition grammar for " ++ srgName srg) $$ + meta "generator" "Grammatical Framework" $$ + language $$ tagFormat $$ mainCat + language = maybe empty (\l -> text "language" <+> text l <> char ';') (srgLanguage srg) + tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';' + | otherwise = empty + mainCat = text "root" <+> prCat (srgStartCat srg) <> char ';' + prRule (SRGRule cat alts) = rule (isExternalCat srg cat) cat (map prAlt alts) + prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] + where initTag = tag sisr (profileInitSISR n) + finalTag = tag sisr (profileFinalSISR n) + p = if isEmpty initTag && isEmpty finalTag then id else parens + +prCat :: Cat -> Doc +prCat c = char '$' <> text c + +prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc +prItem sisr t = f 0 + where + f _ (REUnion []) = text "$VOID" + f p (REUnion xs) + | not (null es) = brackets (f 0 (REUnion nes)) + | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) + where (es,nes) = partition isEpsilon xs + f _ (REConcat []) = text "$NULL" + f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) + f p (RERepeat x) = f 3 x <> text "<0->" + f _ (RESymbol s) = prSymbol sisr t s + + +prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc +prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) +prSymbol _ cn (Terminal t) + | all isPunct t = empty -- removes punctuation + | otherwise = text t -- FIXME: quote if there is whitespace or odd chars + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc +tag Nothing _ = empty +tag (Just fmt) t = + case t fmt of + [] -> empty + -- grr, silly SRGS ABNF does not have an escaping mechanism + ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}" + | otherwise -> text "{" <+> text x <+> text "}" + where x = prSISR ts + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +comment :: String -> Doc +comment s = text "//" <+> text s + +alts :: [Doc] -> Doc +alts = fsep . prepunctuate (text "| ") + +rule :: Bool -> Cat -> [Doc] -> Doc +rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' + where p = if pub then text "public" else empty + +meta :: String -> String -> Doc +meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';' + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +prepunctuate :: Doc -> [Doc] -> [Doc] +prepunctuate _ [] = [] +prepunctuate p (x:xs) = x : map (p <>) xs + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y + diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs new file mode 100644 index 000000000..1f94de66d --- /dev/null +++ b/src/compiler/GF/Speech/SRGS_XML.hs @@ -0,0 +1,105 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.SRGS_XML +-- +-- Prints an SRGS XML speech recognition grammars. +---------------------------------------------------------------------- +module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where + +import GF.Data.Utilities +import GF.Data.XML +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.RegExp +import GF.Speech.SISR as SISR +import GF.Speech.SRG +import PGF (PGF, CId) + +import Control.Monad +import Data.Char (toUpper,toLower) +import Data.List +import Data.Maybe +import qualified Data.Map as Map + +srgsXmlPrinter :: Options + -> PGF -> CId -> String +srgsXmlPrinter opts pgf cnc = prSrgsXml sisr $ makeNonLeftRecursiveSRG opts pgf cnc + where sisr = flag optSISR opts + +srgsXmlNonRecursivePrinter :: Options -> PGF -> CId -> String +srgsXmlNonRecursivePrinter opts pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG opts pgf cnc + + +prSrgsXml :: Maybe SISRFormat -> SRG -> String +prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr) + where + xmlGr = grammar sisr (srgStartCat srg) (srgLanguage srg) $ + [meta "description" + ("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."), + meta "generator" "Grammatical Framework"] + ++ map ruleToXML (srgRules srg) + ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts) + where pub = if isExternalCat srg cat then [("scope","public")] else [] + prRhs rhss = [oneOf (map (mkProd sisr) rhss)] + +mkProd :: Maybe SISRFormat -> SRGAlt -> XML +mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf) + where x = mkItem sisr n rhs + ti = tag sisr (profileInitSISR n) + tf = tag sisr (profileFinalSISR n) + +mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML +mkItem sisr cn = f + where + f (REUnion []) = ETag "ruleref" [("special","VOID")] + f (REUnion xs) + | not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)] + | otherwise = oneOf (map f xs) + where (es,nes) = partition isEpsilon xs + f (REConcat []) = ETag "ruleref" [("special","NULL")] + f (REConcat xs) = Tag "item" [] (map f xs) + f (RERepeat x) = Tag "item" [("repeat","0-")] [f x] + f (RESymbol s) = symItem sisr cn s + +symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML +symItem sisr cn (NonTerminal n@(c,_)) = + Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n) +symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)] + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML] +tag Nothing _ = [] +tag (Just fmt) t = case t fmt of + [] -> [] + ts -> [Tag "tag" [] [Data (prSISR ts)]] + +showToken :: Token -> String +showToken t = t + +oneOf :: [XML] -> XML +oneOf = Tag "one-of" [] + +grammar :: Maybe SISRFormat + -> String -- ^ root + -> Maybe String -- ^language + -> [XML] -> XML +grammar sisr root ml = + Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"), + ("version","1.0"), + ("mode","voice"), + ("root",root)] + ++ (if isJust sisr then [("tag-format","semantics/1.0")] else []) + ++ maybe [] (\l -> [("xml:lang", l)]) ml + +meta :: String -> String -> XML +meta n c = ETag "meta" [("name",n),("content",c)] + +optimizeSRGS :: XML -> XML +optimizeSRGS = bottomUpXML f + where f (Tag "item" [] [x@(Tag "item" _ _)]) = x + f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x + f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs + f (Tag "item" as xs) = Tag "item" as (map g xs) + where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x + g x = x + f (Tag "one-of" [] [x]) = x + f x = x diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs new file mode 100644 index 000000000..134964062 --- /dev/null +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -0,0 +1,243 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.VoiceXML +-- +-- Creates VoiceXML dialogue systems from PGF grammars. +----------------------------------------------------------------------------- +module GF.Speech.VoiceXML (grammar2vxml) where + +import GF.Data.Operations +import GF.Data.Str (sstrV) +import GF.Data.Utilities +import GF.Data.XML +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Speech.SRG (getSpeechLanguage) +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.Linearize (realize) + +import Control.Monad (liftM) +import Data.List (isPrefixOf, find, intersperse) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) + +import Debug.Trace + +-- | the main function +grammar2vxml :: PGF -> CId -> String +grammar2vxml pgf cnc = showsXMLDoc (skel2vxml name language start skel qs) "" + where skel = pgfSkeleton pgf + name = showCId cnc + qs = catQuestions pgf cnc (map fst skel) + language = getSpeechLanguage pgf cnc + start = lookStartCat pgf + +-- +-- * VSkeleton: a simple description of the abstract syntax. +-- + +type Skeleton = [(CId, [(CId, [CId])])] + +pgfSkeleton :: PGF -> Skeleton +pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType pgf f))) | f <- fs]) + | (c,fs) <- Map.toList (catfuns (abstract pgf)), + not (isLiteralCat c)] + +-- +-- * Questions to ask +-- + +type CatQuestions = [(CId,String)] + +catQuestions :: PGF -> CId -> [CId] -> CatQuestions +catQuestions pgf cnc cats = [(c,catQuestion pgf cnc c) | c <- cats] + +catQuestion :: PGF -> CId -> CId -> String +catQuestion pgf cnc cat = realize (lookPrintName pgf cnc cat) + + +{- +lin :: StateGrammar -> String -> Err String +lin gr fun = do + tree <- string2treeErr gr fun + let ls = map unt $ linTree2strings noMark g c tree + case ls of + [] -> fail $ "No linearization of " ++ fun + l:_ -> return l + where c = cncId gr + g = stateGrammarST gr + unt = formatAsText +-} + +getCatQuestion :: CId -> CatQuestions -> String +getCatQuestion c qs = + fromMaybe (error "No question for category " ++ showCId c) (lookup c qs) + +-- +-- * Generate VoiceXML +-- + +skel2vxml :: String -> Maybe String -> CId -> Skeleton -> CatQuestions -> XML +skel2vxml name language start skel qs = + vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel) + where + gr = grammarURI name + startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)] + [param "old" "{ name : '?' }"]] + +grammarURI :: String -> String +grammarURI name = name ++ ".grxml" + + +catForms :: String -> CatQuestions -> CId -> [(CId, [CId])] -> [XML] +catForms gr qs cat fs = + comments [showCId cat ++ " category."] + ++ [cat2form gr qs cat fs] + +cat2form :: String -> CatQuestions -> CId -> [(CId, [CId])] -> XML +cat2form gr qs cat fs = + form (catFormId cat) $ + [var "old" Nothing, + blockCond "old.name != '?'" [assign "term" "old"], + field "term" [] + [promptString (getCatQuestion cat qs), + vxmlGrammar (gr++"#"++catFormId cat) + ] + ] + ++ concatMap (uncurry (fun2sub gr cat)) fs + ++ [block [return_ ["term"]{-]-}]] + +fun2sub :: String -> CId -> CId -> [CId] -> [XML] +fun2sub gr cat fun args = + comments [showCId fun ++ " : (" + ++ concat (intersperse ", " (map showCId args)) + ++ ") " ++ showCId cat] ++ ss + where + ss = zipWith mkSub [0..] args + mkSub n t = subdialog s [("src","#"++catFormId t), + ("cond","term.name == "++string (showCId fun))] + [param "old" v, + filled [] [assign v (s++".term")]] + where s = showCId fun ++ "_" ++ show n + v = "term.args["++show n++"]" + +catFormId :: CId -> String +catFormId c = showCId c ++ "_cat" + + +-- +-- * VoiceXML stuff +-- + +vxml :: Maybe String -> [XML] -> XML +vxml ml = Tag "vxml" $ [("version","2.0"), + ("xmlns","http://www.w3.org/2001/vxml")] + ++ maybe [] (\l -> [("xml:lang", l)]) ml + +form :: String -> [XML] -> XML +form id xs = Tag "form" [("id", id)] xs + +field :: String -> [(String,String)] -> [XML] -> XML +field name attrs = Tag "field" ([("name",name)]++attrs) + +subdialog :: String -> [(String,String)] -> [XML] -> XML +subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs) + +filled :: [(String,String)] -> [XML] -> XML +filled = Tag "filled" + +vxmlGrammar :: String -> XML +vxmlGrammar uri = ETag "grammar" [("src",uri)] + +prompt :: [XML] -> XML +prompt = Tag "prompt" [] + +promptString :: String -> XML +promptString p = prompt [Data p] + +reprompt :: XML +reprompt = ETag "reprompt" [] + +assign :: String -> String -> XML +assign n e = ETag "assign" [("name",n),("expr",e)] + +value :: String -> XML +value expr = ETag "value" [("expr",expr)] + +if_ :: String -> [XML] -> XML +if_ c b = if_else c b [] + +if_else :: String -> [XML] -> [XML] -> XML +if_else c t f = cond [(c,t)] f + +cond :: [(String,[XML])] -> [XML] -> XML +cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es) + where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest] + ++ if null els then [] else (Tag "else" [] []:els) + +goto_item :: String -> XML +goto_item nextitem = ETag "goto" [("nextitem",nextitem)] + +return_ :: [String] -> XML +return_ names = ETag "return" [("namelist", unwords names)] + +block :: [XML] -> XML +block = Tag "block" [] + +blockCond :: String -> [XML] -> XML +blockCond cond = Tag "block" [("cond", cond)] + +throw :: String -> String -> XML +throw event msg = Tag "throw" [("event",event),("message",msg)] [] + +nomatch :: [XML] -> XML +nomatch = Tag "nomatch" [] + +help :: [XML] -> XML +help = Tag "help" [] + +param :: String -> String -> XML +param name expr = ETag "param" [("name",name),("expr",expr)] + +var :: String -> Maybe String -> XML +var name expr = ETag "var" ([("name",name)]++e) + where e = maybe [] ((:[]) . (,) "expr") expr + +script :: String -> XML +script s = Tag "script" [] [CData s] + +scriptURI :: String -> XML +scriptURI uri = Tag "script" [("uri", uri)] [] + +-- +-- * ECMAScript stuff +-- + +string :: String -> String +string s = "'" ++ concatMap esc s ++ "'" + where esc '\'' = "\\'" + esc c = [c] + +{- +-- +-- * List stuff +-- + +isListCat :: (CId, [(CId, [CId])]) -> Bool +isListCat (cat,rules) = "List" `isPrefixOf` showIdent cat && length rules == 2 + && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs + where c = drop 4 (showIdent cat) + fs = map (showIdent . fst) rules + +isBaseFun :: CId -> Bool +isBaseFun f = "Base" `isPrefixOf` showIdent f + +isConsFun :: CId -> Bool +isConsFun f = "Cons" `isPrefixOf` showIdent f + +baseSize :: (CId, [(CId, [CId])]) -> Int +baseSize (_,rules) = length bs + where Just (_,bs) = find (isBaseFun . fst) rules +-} diff --git a/src/compiler/GF/System/NoReadline.hs b/src/compiler/GF/System/NoReadline.hs new file mode 100644 index 000000000..1f1050e8c --- /dev/null +++ b/src/compiler/GF/System/NoReadline.hs @@ -0,0 +1,33 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.System.NoReadline +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/10 15:04:01 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Do not use readline. +----------------------------------------------------------------------------- + +module GF.System.NoReadline (fetchCommand, setCompletionFunction, filenameCompletionFunction) where + +import System.IO.Error (try) +import System.IO (stdout,hFlush) + +fetchCommand :: String -> IO (String) +fetchCommand s = do + putStr s + hFlush stdout + res <- try getLine + case res of + Left e -> return "q" + Right l -> return l + +setCompletionFunction :: Maybe (String -> String -> Int -> IO [String]) -> IO () +setCompletionFunction _ = return () + +filenameCompletionFunction :: String -> IO [String] +filenameCompletionFunction _ = return [] diff --git a/src/compiler/GF/System/NoSignal.hs b/src/compiler/GF/System/NoSignal.hs new file mode 100644 index 000000000..5d82a431e --- /dev/null +++ b/src/compiler/GF/System/NoSignal.hs @@ -0,0 +1,29 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.System.NoSignal +-- Maintainer : Bjorn Bringert +-- Stability : (stability) +-- Portability : (portability) +-- +-- > CVS $Date: 2005/11/11 11:12:50 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Dummy implementation of signal handling. +----------------------------------------------------------------------------- + +module GF.System.NoSignal where + +import Control.Exception (Exception,catch) +import Prelude hiding (catch) + +{-# NOINLINE runInterruptibly #-} +runInterruptibly :: IO a -> IO (Either Exception a) +--runInterruptibly = fmap Right +runInterruptibly a = + p `catch` h + where p = a >>= \x -> return $! Right $! x + h e = return $ Left e + +blockInterrupt :: IO a -> IO a +blockInterrupt = id diff --git a/src/compiler/GF/System/Readline.hs b/src/compiler/GF/System/Readline.hs new file mode 100644 index 000000000..ee38cdc0b --- /dev/null +++ b/src/compiler/GF/System/Readline.hs @@ -0,0 +1,35 @@ +{-# OPTIONS -cpp #-} + +---------------------------------------------------------------------- +-- | +-- Module : GF.System.Readline +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/10 15:04:01 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.2 $ +-- +-- Uses the right readline library to read user input. +----------------------------------------------------------------------------- + +module GF.System.Readline (fetchCommand, setCompletionFunction, filenameCompletionFunction) where + +#ifdef USE_HASKELINE + +import GF.System.UseHaskeline + +#elif USE_READLINE + +import GF.System.UseReadline + +#elif USE_EDITLINE + +import GF.System.UseEditline + +#else + +import GF.System.NoReadline + +#endif diff --git a/src/compiler/GF/System/Signal.hs b/src/compiler/GF/System/Signal.hs new file mode 100644 index 000000000..fe8a12483 --- /dev/null +++ b/src/compiler/GF/System/Signal.hs @@ -0,0 +1,27 @@ +{-# OPTIONS -cpp #-} + +---------------------------------------------------------------------- +-- | +-- Module : GF.System.Signal +-- Maintainer : Bjorn Bringert +-- Stability : (stability) +-- Portability : (portability) +-- +-- > CVS $Date: 2005/11/11 11:12:50 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.3 $ +-- +-- Import the right singal handling module. +----------------------------------------------------------------------------- + +module GF.System.Signal (runInterruptibly,blockInterrupt) where + +#ifdef USE_INTERRUPT + +import GF.System.UseSignal (runInterruptibly,blockInterrupt) + +#else + +import GF.System.NoSignal (runInterruptibly,blockInterrupt) + +#endif diff --git a/src/compiler/GF/System/UseEditline.hs b/src/compiler/GF/System/UseEditline.hs new file mode 100644 index 000000000..6d51a1be3 --- /dev/null +++ b/src/compiler/GF/System/UseEditline.hs @@ -0,0 +1,36 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.System.UseReadline +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/10 15:04:01 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Use GNU readline +----------------------------------------------------------------------------- + +module GF.System.UseEditline (fetchCommand, setCompletionFunction, filenameCompletionFunction) where + +import System.Console.Editline.Readline + +fetchCommand :: String -> IO (String) +fetchCommand s = do + setCompletionAppendCharacter Nothing + --setBasicQuoteCharacters "" + res <- readline s + case res of + Nothing -> return "q" + Just s -> do addHistory s + return s + +setCompletionFunction :: Maybe (String -> String -> Int -> IO [String]) -> IO () +setCompletionFunction Nothing = setCompletionEntryFunction Nothing +setCompletionFunction (Just fn) = setCompletionEntryFunction (Just my_fn) + where + my_fn prefix = do + s <- getLineBuffer + p <- getPoint + fn s prefix p diff --git a/src/compiler/GF/System/UseHaskeline.hs b/src/compiler/GF/System/UseHaskeline.hs new file mode 100644 index 000000000..140407439 --- /dev/null +++ b/src/compiler/GF/System/UseHaskeline.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.System.UseReadline +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/10 15:04:01 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Use GNU readline +----------------------------------------------------------------------------- + +module GF.System.UseHaskeline (fetchCommand, setCompletionFunction, filenameCompletionFunction) where + +import System.Console.Haskeline +import System.Directory + +fetchCommand :: String -> IO (String) +fetchCommand s = do + settings <- getGFSettings + res <- runInputT settings (getInputLine s) + case res of + Nothing -> return "q" + Just s -> return s + +getGFSettings :: IO (Settings IO) +getGFSettings = do + path <- getAppUserDataDirectory "gf_history" + return $ + Settings { + complete = completeFilename, + historyFile = Just path, + autoAddHistory = True + } + + +setCompletionFunction :: Maybe (String -> String -> Int -> IO [String]) -> IO () +setCompletionFunction _ = return () + +filenameCompletionFunction :: String -> IO [String] +filenameCompletionFunction _ = return [] diff --git a/src/compiler/GF/System/UseReadline.hs b/src/compiler/GF/System/UseReadline.hs new file mode 100644 index 000000000..a0e051601 --- /dev/null +++ b/src/compiler/GF/System/UseReadline.hs @@ -0,0 +1,36 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.System.UseReadline +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/10 15:04:01 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Use GNU readline +----------------------------------------------------------------------------- + +module GF.System.UseReadline (fetchCommand, setCompletionFunction, filenameCompletionFunction) where + +import System.Console.Readline + +fetchCommand :: String -> IO (String) +fetchCommand s = do + setCompletionAppendCharacter Nothing + setBasicQuoteCharacters "" + res <- readline s + case res of + Nothing -> return "q" + Just s -> do addHistory s + return s + +setCompletionFunction :: Maybe (String -> String -> Int -> IO [String]) -> IO () +setCompletionFunction Nothing = setCompletionEntryFunction Nothing +setCompletionFunction (Just fn) = setCompletionEntryFunction (Just my_fn) + where + my_fn prefix = do + s <- getLineBuffer + p <- getPoint + fn s prefix p diff --git a/src/compiler/GF/System/UseSignal.hs b/src/compiler/GF/System/UseSignal.hs new file mode 100644 index 000000000..20c70a568 --- /dev/null +++ b/src/compiler/GF/System/UseSignal.hs @@ -0,0 +1,72 @@ +{-# OPTIONS -cpp #-} +---------------------------------------------------------------------- +-- | +-- Module : GF.System.UseSignal +-- Maintainer : Bjorn Bringert +-- Stability : (stability) +-- Portability : (portability) +-- +-- > CVS $Date: 2005/11/11 11:12:50 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Allows SIGINT (Ctrl-C) to interrupt computations. +----------------------------------------------------------------------------- + +module GF.System.UseSignal where + +import Control.Concurrent (myThreadId, killThread) +import Control.Exception (SomeException,catch) +import Prelude hiding (catch) +import System.IO + +#ifdef mingw32_HOST_OS +import GHC.ConsoleHandler + +myInstallHandler handler = installHandler handler +myCatch = Catch . const +myIgnore = Ignore +#else +import System.Posix.Signals + +myInstallHandler handler = installHandler sigINT handler Nothing +myCatch = Catch +myIgnore = Ignore +#endif + +{-# NOINLINE runInterruptibly #-} + +-- | Run an IO action, and allow it to be interrupted +-- by a SIGINT to the current process. Returns +-- an exception if the process did not complete +-- normally. +-- NOTES: +-- * This will replace any existing SIGINT +-- handler during the action. After the computation +-- has completed the existing handler will be restored. +-- * If the IO action is lazy (e.g. using readFile, +-- unsafeInterleaveIO etc.) the lazy computation will +-- not be interruptible, as it will be performed +-- after the signal handler has been removed. +runInterruptibly :: IO a -> IO (Either SomeException a) +runInterruptibly a = + do t <- myThreadId + oldH <- myInstallHandler (myCatch (killThread t)) + x <- p `catch` h + myInstallHandler oldH + return x + where p = a >>= \x -> return $! Right $! x + h e = return $ Left e + +-- | Like 'runInterruptibly', but always returns (), whether +-- the computation fails or not. +runInterruptibly_ :: IO () -> IO () +runInterruptibly_ = fmap (either (const ()) id) . runInterruptibly + +-- | Run an action with SIGINT blocked. +blockInterrupt :: IO a -> IO a +blockInterrupt a = + do oldH <- myInstallHandler myIgnore + x <- a + myInstallHandler oldH + return x diff --git a/src/compiler/GF/Text/CP1250.hs b/src/compiler/GF/Text/CP1250.hs new file mode 100644 index 000000000..474c04ace --- /dev/null +++ b/src/compiler/GF/Text/CP1250.hs @@ -0,0 +1,77 @@ +module GF.Text.CP1250 where
+
+import Data.Char
+
+decodeCP1250 = map convert where
+ convert c
+ | c == '\x80' = chr 0x20AC
+ | c == '\x82' = chr 0x201A
+ | c == '\x84' = chr 0x201E
+ | c == '\x85' = chr 0x2026
+ | c == '\x86' = chr 0x2020
+ | c == '\x87' = chr 0x2021
+ | c == '\x89' = chr 0x2030
+ | c == '\x8A' = chr 0x0160
+ | c == '\x8B' = chr 0x2039
+ | c == '\x8C' = chr 0x015A
+ | c == '\x8D' = chr 0x0164
+ | c == '\x8E' = chr 0x017D
+ | c == '\x8F' = chr 0x0179
+ | c == '\x91' = chr 0x2018
+ | c == '\x92' = chr 0x2019
+ | c == '\x93' = chr 0x201C
+ | c == '\x94' = chr 0x201D
+ | c == '\x95' = chr 0x2022
+ | c == '\x96' = chr 0x2013
+ | c == '\x97' = chr 0x2014
+ | c == '\x99' = chr 0x2122
+ | c == '\x9A' = chr 0x0161
+ | c == '\x9B' = chr 0x203A
+ | c == '\x9C' = chr 0x015B
+ | c == '\x9D' = chr 0x0165
+ | c == '\x9E' = chr 0x017E
+ | c == '\x9F' = chr 0x017A
+ | c == '\xA1' = chr 0x02C7
+ | c == '\xA5' = chr 0x0104
+ | c == '\xB9' = chr 0x0105
+ | c == '\xBC' = chr 0x013D
+ | c == '\xBE' = chr 0x013E
+ | otherwise = c
+
+
+encodeCP1250 = map convert where
+ convert c
+ | oc == 0x20AC = '\x80'
+ | oc == 0x201A = '\x82'
+ | oc == 0x201E = '\x84'
+ | oc == 0x2026 = '\x85'
+ | oc == 0x2020 = '\x86'
+ | oc == 0x2021 = '\x87'
+ | oc == 0x2030 = '\x89'
+ | oc == 0x0160 = '\x8A'
+ | oc == 0x2039 = '\x8B'
+ | oc == 0x015A = '\x8C'
+ | oc == 0x0164 = '\x8D'
+ | oc == 0x017D = '\x8E'
+ | oc == 0x0179 = '\x8F'
+ | oc == 0x2018 = '\x91'
+ | oc == 0x2019 = '\x92'
+ | oc == 0x201C = '\x93'
+ | oc == 0x201D = '\x94'
+ | oc == 0x2022 = '\x95'
+ | oc == 0x2013 = '\x96'
+ | oc == 0x2014 = '\x97'
+ | oc == 0x2122 = '\x99'
+ | oc == 0x0161 = '\x9A'
+ | oc == 0x203A = '\x9B'
+ | oc == 0x015B = '\x9C'
+ | oc == 0x0165 = '\x9D'
+ | oc == 0x017E = '\x9E'
+ | oc == 0x017A = '\x9F'
+ | oc == 0x02C7 = '\xA1'
+ | oc == 0x0104 = '\xA5'
+ | oc == 0x0105 = '\xB9'
+ | oc == 0x013D = '\xBC'
+ | oc == 0x013E = '\xBE'
+ | otherwise = c
+ where oc = ord c
diff --git a/src/compiler/GF/Text/CP1251.hs b/src/compiler/GF/Text/CP1251.hs new file mode 100644 index 000000000..7c277abab --- /dev/null +++ b/src/compiler/GF/Text/CP1251.hs @@ -0,0 +1,74 @@ +module GF.Text.CP1251 where
+
+import Data.Char
+
+decodeCP1251 = map convert where
+ convert c
+ | c >= '\xC0' && c <= '\xFF' = chr (ord c + (0x410-0xC0))
+ | c == '\xA8' = chr 0x401 -- cyrillic capital letter lo
+ | c == '\x80' = chr 0x402
+ | c == '\x81' = chr 0x403
+ | c == '\xAA' = chr 0x404
+ | c == '\xBD' = chr 0x405
+ | c == '\xB2' = chr 0x406
+ | c == '\xAF' = chr 0x407
+ | c == '\xA3' = chr 0x408
+ | c == '\x8A' = chr 0x409
+ | c == '\x8C' = chr 0x40A
+ | c == '\x8E' = chr 0x40B
+ | c == '\x8D' = chr 0x40C
+ | c == '\xA1' = chr 0x40E
+ | c == '\x8F' = chr 0x40F
+ | c == '\xB8' = chr 0x451 -- cyrillic small letter lo
+ | c == '\x90' = chr 0x452
+ | c == '\x83' = chr 0x453
+ | c == '\xBA' = chr 0x454
+ | c == '\xBE' = chr 0x455
+ | c == '\xB3' = chr 0x456
+ | c == '\xBF' = chr 0x457
+ | c == '\xBC' = chr 0x458
+ | c == '\x9A' = chr 0x459
+ | c == '\x9C' = chr 0x45A
+ | c == '\x9E' = chr 0x45B
+ | c == '\x9D' = chr 0x45C
+ | c == '\xA2' = chr 0x45E
+ | c == '\x9F' = chr 0x45F
+ | c == '\xA5' = chr 0x490
+ | c == '\xB4' = chr 0x491
+ | otherwise = c
+
+encodeCP1251 = map convert where
+ convert c
+ | oc >= 0x410 && oc <= 0x44F = chr (oc - (0x410-0xC0))
+ | oc == 0x401 = '\xA8' -- cyrillic capital letter lo
+ | oc == 0x402 = '\x80'
+ | oc == 0x403 = '\x81'
+ | oc == 0x404 = '\xAA'
+ | oc == 0x405 = '\xBD'
+ | oc == 0x406 = '\xB2'
+ | oc == 0x407 = '\xAF'
+ | oc == 0x408 = '\xA3'
+ | oc == 0x409 = '\x8A'
+ | oc == 0x40A = '\x8C'
+ | oc == 0x40B = '\x8E'
+ | oc == 0x40C = '\x8D'
+ | oc == 0x40E = '\xA1'
+ | oc == 0x40F = '\x8F'
+ | oc == 0x451 = '\xB8' -- cyrillic small letter lo
+ | oc == 0x452 = '\x90'
+ | oc == 0x453 = '\x83'
+ | oc == 0x454 = '\xBA'
+ | oc == 0x455 = '\xBE'
+ | oc == 0x456 = '\xB3'
+ | oc == 0x457 = '\xBF'
+ | oc == 0x458 = '\xBC'
+ | oc == 0x459 = '\x9A'
+ | oc == 0x45A = '\x9C'
+ | oc == 0x45B = '\x9E'
+ | oc == 0x45C = '\x9D'
+ | oc == 0x45E = '\xA2'
+ | oc == 0x45F = '\x9F'
+ | oc == 0x490 = '\xA5'
+ | oc == 0x491 = '\xB4'
+ | otherwise = c
+ where oc = ord c
diff --git a/src/compiler/GF/Text/CP1252.hs b/src/compiler/GF/Text/CP1252.hs new file mode 100644 index 000000000..1e5affe53 --- /dev/null +++ b/src/compiler/GF/Text/CP1252.hs @@ -0,0 +1,6 @@ +module GF.Text.CP1252 where
+
+import Data.Char
+
+decodeCP1252 = map id
+encodeCP1252 = map (\x -> if x <= '\255' then x else '?')
diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs new file mode 100644 index 000000000..e3cd7b0ea --- /dev/null +++ b/src/compiler/GF/Text/Coding.hs @@ -0,0 +1,21 @@ +module GF.Text.Coding where + +import GF.Infra.Option +import GF.Text.UTF8 +import GF.Text.CP1250 +import GF.Text.CP1251 +import GF.Text.CP1252 + +encodeUnicode e = case e of + UTF_8 -> encodeUTF8 + CP_1250 -> encodeCP1250 + CP_1251 -> encodeCP1251 + CP_1252 -> encodeCP1252 + _ -> id + +decodeUnicode e = case e of + UTF_8 -> decodeUTF8 + CP_1250 -> decodeCP1250 + CP_1251 -> decodeCP1251 + CP_1252 -> decodeCP1252 + _ -> id diff --git a/src/compiler/GF/Text/Lexing.hs b/src/compiler/GF/Text/Lexing.hs new file mode 100644 index 000000000..3300d311e --- /dev/null +++ b/src/compiler/GF/Text/Lexing.hs @@ -0,0 +1,131 @@ +module GF.Text.Lexing (stringOp,opInEnv) where + +import GF.Text.Transliterations +import GF.Text.UTF8 +import GF.Text.CP1251 + +import Data.Char +import Data.List (intersperse) + +-- lexers and unlexers - they work on space-separated word strings + +stringOp :: String -> Maybe (String -> String) +stringOp name = case name of + "chars" -> Just $ appLexer (filter (not . all isSpace) . map return) + "lextext" -> Just $ appLexer lexText + "lexcode" -> Just $ appLexer lexCode + "lexmixed" -> Just $ appLexer lexMixed + "words" -> Just $ appLexer words + "bind" -> Just $ appUnlexer bindTok + "unchars" -> Just $ appUnlexer concat + "unlextext" -> Just $ appUnlexer unlexText + "unlexcode" -> Just $ appUnlexer unlexCode + "unlexmixed" -> Just $ appUnlexer unlexMixed + "unwords" -> Just $ appUnlexer unwords + "to_html" -> Just wrapHTML + "to_utf8" -> Just encodeUTF8 + "from_utf8" -> Just decodeUTF8 + "to_cp1251" -> Just encodeCP1251 + "from_cp1251" -> Just decodeCP1251 + _ -> transliterate name + +-- perform op in environments beg--end, t.ex. between "--" +--- suboptimal implementation +opInEnv :: String -> String -> (String -> String) -> (String -> String) +opInEnv beg end op = concat . altern False . chop (lbeg, beg) [] where + chop mk@(lg, mark) s0 s = + let (tag,rest) = splitAt lg s in + if tag==mark then (reverse s0) : mark : chop (switch mk) [] rest + else case s of + c:cs -> chop mk (c:s0) cs + [] -> [reverse s0] + switch (lg,mark) = if mark==beg then (lend,end) else (lbeg,beg) + (lbeg,lend) = (length beg, length end) + altern m ts = case ts of + t:ws | not m && t==beg -> t : altern True ws + t:ws | m && t==end -> t : altern False ws + t:ws -> (if m then op t else t) : altern m ws + [] -> [] + +appLexer :: (String -> [String]) -> String -> String +appLexer f = unwords . filter (not . null) . f + +appUnlexer :: ([String] -> String) -> String -> String +appUnlexer f = unlines . map (f . words) . lines + +wrapHTML :: String -> String +wrapHTML = unlines . tag . intersperse "<br>" . lines where + tag ss = "<html>":"<head>":"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />":"</head>":"<body>" : ss ++ ["</body>","</html>"] + +lexText :: String -> [String] +lexText = uncap . lext where + lext s = case s of + c:cs | isMajorPunct c -> [c] : uncap (lext cs) + c:cs | isMinorPunct c -> [c] : lext cs + c:cs | isSpace c -> lext cs + _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs + _ -> [s] + uncap s = case s of + (c:cs):ws -> (toLower c : cs):ws + _ -> s + +-- | Haskell lexer, usable for much code +lexCode :: String -> [String] +lexCode ss = case lex ss of + [(w@(_:_),ws)] -> w : lexCode ws + _ -> [] + +-- | LaTeX style lexer, with "math" environment using Code between $...$ +lexMixed :: String -> [String] +lexMixed = concat . alternate False where + alternate env s = case s of + _:_ -> case break (=='$') s of + (t,[]) -> lex env t : [] + (t,c:m) -> lex env t : [[c]] : alternate (not env) m + _ -> [] + lex env = if env then lexCode else lexText + +bindTok :: [String] -> String +bindTok ws = case ws of + w:"&+":ws2 -> w ++ bindTok ws2 + w:[] -> w + w:ws2 -> w ++ " " ++ bindTok ws2 + [] -> "" + +unlexText :: [String] -> String +unlexText = cap . unlext where + unlext s = case s of + w:[] -> w + w:[c]:[] | isPunct c -> w ++ [c] + w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ cap (unlext cs) + w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs + w:ws -> w ++ " " ++ unlext ws + _ -> [] + cap s = case s of + c:cs -> toUpper c : cs + _ -> s + +unlexCode :: [String] -> String +unlexCode s = case s of + w:[] -> w + [c]:cs | isParen c -> [c] ++ unlexCode cs + w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs + w:ws -> w ++ " " ++ unlexCode ws + _ -> [] + + +unlexMixed :: [String] -> String +unlexMixed = concat . alternate False where + alternate env s = case s of + _:_ -> case break (=="$") s of + (t,[]) -> unlex env t : [] + (t,c:m) -> unlex env t : sep env c : alternate (not env) m + _ -> [] + unlex env = if env then unlexCode else unlexText + sep env c = if env then c ++ " " else " " ++ c + +isPunct = flip elem ".?!,:;" +isMajorPunct = flip elem ".?!" +isMinorPunct = flip elem ",:;" +isParen = flip elem "()[]{}" +isClosing = flip elem ")]}" diff --git a/src/compiler/GF/Text/Transliterations.hs b/src/compiler/GF/Text/Transliterations.hs new file mode 100644 index 000000000..e2747f506 --- /dev/null +++ b/src/compiler/GF/Text/Transliterations.hs @@ -0,0 +1,206 @@ +module GF.Text.Transliterations ( + transliterate, + transliteration, + characterTable, + transliterationPrintNames + ) where + +import GF.Text.UTF8 + +import Data.Char +import Numeric +import qualified Data.Map as Map + +-- transliterations between ASCII and a Unicode character set + +-- current transliterations: devanagari, thai + +-- to add a new one: define the Unicode range and the corresponding ASCII strings, +-- which may be one or more characters long + +-- conventions to be followed: +-- each character is either [letter] or [letter+nonletters] +-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations +-- characters can be invisible: ignored in translation to unicode + +transliterate :: String -> Maybe (String -> String) +transliterate s = case s of + 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t + 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t + _ -> Nothing + +transliteration :: String -> Maybe Transliteration +transliteration s = Map.lookup s allTransliterations + +allTransliterations = Map.fromAscList [ + ("ancientgreek", transAncientGreek), + ("arabic", transArabic), + ("devanagari", transDevanagari), + ("greek", transGreek), + ("hebrew", transHebrew), + ("persian", transPersian), + ("telugu", transTelugu), + ("thai", transThai) + ---- "urdu", transUrdu + ] + +-- used in command options and help +transliterationPrintNames = [(t,printname p) | (t,p) <- Map.toList allTransliterations] + +characterTable :: Transliteration -> String +characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where + prOne (i,s) = unwords ["|", showHex i "", "|", [toEnum i], "|", s, "|"] + +data Transliteration = Trans { + trans_to_unicode :: Map.Map String Int, + trans_from_unicode :: Map.Map Int String, + invisible_chars :: [String], + printname :: String + } + +appTransToUnicode :: Transliteration -> String -> String +appTransToUnicode trans = + concat . + map (\c -> maybe c (return . toEnum) $ + Map.lookup c (trans_to_unicode trans) + ) . + filter (flip notElem (invisible_chars trans)) . + unchar + +appTransFromUnicode :: Transliteration -> String -> String +appTransFromUnicode trans = + concat . + map (maybe "?" id . + flip Map.lookup (trans_from_unicode trans) + ) . + map fromEnum + + +mkTransliteration :: String -> [String] -> [Int] -> Transliteration +mkTransliteration name ts us = + Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) [] name + where + tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"] + uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"] + + +unchar :: String -> [String] +unchar s = case s of + c:d:cs + | isAlpha d -> [c] : unchar (d:cs) + | isSpace d -> [c]:[d]: unchar cs + | otherwise -> let (ds,cs2) = break (\x -> isAlpha x || isSpace x) cs in + (c:d:ds) : unchar cs2 + [_] -> [s] + _ -> [] + +transThai :: Transliteration +transThai = mkTransliteration "Thai" allTrans allCodes where + allTrans = words $ + "- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++ + "t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++ + "p3 m y r - l - w s- s. s h l' O h' - " ++ + "a. a a: a+ i i: v v: u u: - - - - - - " ++ + "e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++ + "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - " + allCodes = [0x0e00 .. 0x0e7f] + +transDevanagari :: Transliteration +transDevanagari = + (mkTransliteration "Devanagari" + allTransUrduHindi allCodes){invisible_chars = ["a"]} where + allCodes = [0x0900 .. 0x095f] + +allTransUrduHindi = words $ + "- M N - - a- A- i- I- u- U- R- - - - e- " ++ + "E- - - o- O- k K g G N: c C j J n: t. " ++ + "T. d. D. n. t T d D n - p P b B m y " ++ + "r - l - - v S s. s h - - r: - A i " ++ + "I u U R - - - e E o O - - - - - " ++ + "- - - - - - - - - - - z r. - - - " + +transUrdu :: Transliteration +transUrdu = + (mkTransliteration "Urdu" allTransUrduHindi allCodes){invisible_chars = ["a"]} where + allCodes = [0x0900 .. 0x095f] ---- TODO: this is devanagari + +transArabic :: Transliteration +transArabic = mkTransliteration "Arabic" allTrans allCodes where + allTrans = words $ + " V A: A? w? A- y? A b t. t v g H K d " ++ -- 0621 - 062f + "W r z s C S D T Z c G " ++ -- 0630 - 063a + " f q k l m n h w y. y a. u. i. a u " ++ -- 0641 - 064f + "i v2 o a: V+ V- i: a+ " ++ -- 0650 - 0657 + "A* " -- 0671 (used by AED) + allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++ + [0x0641..0x064f] ++ [0x0650..0x0657] ++ [0x0671] + +transPersian :: Transliteration +transPersian = (mkTransliteration "Persian/Farsi" allTrans allCodes) + {invisible_chars = ["a","u","i"]} where + allTrans = words $ + " V A: A? w? A- y? A b t. t t- j H K d " ++ -- 0621 - 062f + "W r z s C S D T Z c G " ++ -- 0630 - 063a + " f q k l m n h v y. y a. u. i. a u " ++ -- 0641 - 064f + "i v2 o a: V+ V- i: a+ " ++ -- 0650 - 0657 + "p c^ J g " + allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++ + [0x0641..0x064f] ++ [0x0650..0x0657] ++ + [0x067e,0x0686,0x0698,0x06af] + +transHebrew :: Transliteration +transHebrew = mkTransliteration "unvocalized Hebrew" allTrans allCodes where + allTrans = words $ + "A b g d h w z H T y K k l M m N " ++ + "n S O P p Z. Z q r s t - - - - - " ++ + "w2 w3 y2 g1 g2" + allCodes = [0x05d0..0x05f4] + +transTelugu :: Transliteration +transTelugu = mkTransliteration "Telugu" allTrans allCodes where + allTrans = words $ + "- c1 c2 c3 - A A: I I: U U: R_ L_ - E E: " ++ + "A' - O O: A_ k k. g g. n. c c. j j. n' T " ++ + "T. d d. N t t. d d. n - p p. b b. m y " ++ + "r R l L - v s' S s h - - - c5 a: i " ++ + "i: u u: r_ r. - e e: a' - o o: a_ c6 - - " ++ + "- - - - - c7 c8 z Z - - - - - - - " ++ + "R+ L+ l+ l* - - n0 n1 n2 n3 n4 n5 n6 n7 n8 n9 " + allCodes = [0x0c00 .. 0x0c7f] + +transGreek :: Transliteration +transGreek = mkTransliteration "modern Greek" allTrans allCodes where + allTrans = words $ + "- - - - - - A' - E' H' I' - O' - Y' W' " ++ + "i= A B G D E Z H V I K L M N X O " ++ + "P R - S T Y F C Q W I- Y- a' e' h' i' " ++ + "y= a b g d e z h v i k l m n x o " ++ + "p r s* s t y f c q w i- y- o' y' w' - " + allCodes = [0x0380 .. 0x03cf] + +transAncientGreek :: Transliteration +transAncientGreek = mkTransliteration "ancient Greek" allTrans allCodes where + allTrans = words $ + "- - - - - - - - - - - - - - - - " ++ + "i= A B G D E Z H V I K L M N X O " ++ + "P R - S T Y F C Q W I- Y- - - - - " ++ + "y= a b g d e z h v i k l m n x o " ++ + "p r s* s t y f c q w i- y- - - - - " ++ + "a) a( a)` a(` a)' a(' a)~ a(~ A) A( A)` A(` A)' A(' A)~ A(~ " ++ + "e) e( e)` e(` e)' e(' - - E) E( E)` E(` E)' E(' - - " ++ + "h) h( h)` h(` h)' h(' h)~ h(~ H) H( H)` H(` H)' H(' H)~ H(~ " ++ + "i) i( i)` i(` i)' i(' i)~ i(~ I) I( I)` I(` I)' I(' I)~ I(~ " ++ + "o) o( o)` o(` o)' o(' - - O) O( O)` O(` O)' O(' - - " ++ + "y) y( y)` y(` y)' y(' y)~ y(~ - Y( - Y(` - Y(' - Y(~ " ++ + "w) w( w)` w(` w)' w(' w)~ w(~ W) W( W)` W(` W)' W(' W)~ W(~ " ++ + "a` a' e` e' h` h' i` i' o` o' y` y' w` w' - - " ++ + "a|( a|) a|)` a|(` a|)' a|(' a|)~ a|(~ - - - - - - - - " ++ -- 1f80- + "h|( h|) h|)` h|(` h|)' h|(' h|)~ h|(~ - - - - - - - - " ++ -- 1f90- + "w|( w|) w|)` w|(` w|)' w|(' w|)~ w|(~ - - - - - - - - " ++ -- 1fa0- + "a. a_ a|` a| a|' - a~ a|~ - - - - - - - - " ++ -- 1fb0- + "- - h|` h| h|' - h~ h|~ - - - - - - - - " ++ -- 1fc0- + "i. i_ i=` i=' - - i~ i=~ - - - - - - - - " ++ -- 1fd0- + "y. y_ y=` y=' r) r( y~ y|~ - - - - - - - - " ++ -- 1fe0- + "- - w|` w| w|' - w~ w|~ - - - - - - - - " -- 1ff0- + allCodes = [0x0380 .. 0x03cf] ++ [0x1f00 .. 0x1fff] + diff --git a/src/compiler/GF/Text/UTF8.hs b/src/compiler/GF/Text/UTF8.hs new file mode 100644 index 000000000..5e9687684 --- /dev/null +++ b/src/compiler/GF/Text/UTF8.hs @@ -0,0 +1,48 @@ +---------------------------------------------------------------------- +-- | +-- Module : UTF8 +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:42 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- From the Char module supplied with HBC. +-- code by Thomas Hallgren (Jul 10 1999) +----------------------------------------------------------------------------- + +module GF.Text.UTF8 (decodeUTF8, encodeUTF8) where + +-- | Take a Unicode string and encode it as a string +-- with the UTF8 method. +decodeUTF8 :: String -> String +decodeUTF8 "" = "" +decodeUTF8 (c:cs) | c < '\x80' = c : decodeUTF8 cs +decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' && + '\x80' <= c' && c' <= '\xbf' = + toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs +decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' && + '\x80' <= c' && c' <= '\xbf' && + '\x80' <= c'' && c'' <= '\xbf' = + toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs +decodeUTF8 s = s ---- AR workaround 22/6/2006 +----decodeUTF8 _ = error "UniChar.decodeUTF8: bad data" + +encodeUTF8 :: String -> String +encodeUTF8 "" = "" +encodeUTF8 (c:cs) = + if c > '\x0000' && c < '\x0080' then + c : encodeUTF8 cs + else if c < toEnum 0x0800 then + let i = fromEnum c + in toEnum (0xc0 + i `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + encodeUTF8 cs + else + let i = fromEnum c + in toEnum (0xe0 + i `div` 0x1000) : + toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + encodeUTF8 cs diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs new file mode 100644 index 000000000..8037d4f1a --- /dev/null +++ b/src/compiler/GFC.hs @@ -0,0 +1,88 @@ +module GFC (mainGFC) where +-- module Main where + +import PGF +import PGF.CId +import PGF.Data +import GF.Compile +import GF.Compile.Export + +import GF.Grammar.CF ---- should this be on a deeper level? AR 15/10/2008 + +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Data.ErrM + +import Data.Maybe +import Data.Binary +import System.FilePath +import System.IO + + +mainGFC :: Options -> [FilePath] -> IOE () +mainGFC opts fs = + case () of + _ | null fs -> fail $ "No input files." + _ | all (extensionIs ".cf") fs -> compileCFFiles opts fs + _ | all (\f -> extensionIs ".gf" f || extensionIs ".gfo" f) fs -> compileSourceFiles opts fs + _ | all (extensionIs ".pgf") fs -> unionPGFFiles opts fs + _ -> fail $ "Don't know what to do with these input files: " ++ unwords fs + where extensionIs ext = (== ext) . takeExtension + +compileSourceFiles :: Options -> [FilePath] -> IOE () +compileSourceFiles opts fs = + do gr <- batchCompile opts fs + let cnc = justModuleName (last fs) + if flag optStopAfterPhase opts == Compile + then return () + else do pgf <- link opts cnc gr + writePGF opts pgf + writeOutputs opts pgf + +compileCFFiles :: Options -> [FilePath] -> IOE () +compileCFFiles opts fs = + do s <- ioeIO $ fmap unlines $ mapM readFile fs + let cnc = justModuleName (last fs) + gf <- ioeErr $ getCF cnc s + gr <- compileSourceGrammar opts gf + if flag optStopAfterPhase opts == Compile + then return () + else do pgf <- link opts cnc gr + writePGF opts pgf + writeOutputs opts pgf + +unionPGFFiles :: Options -> [FilePath] -> IOE () +unionPGFFiles opts fs = + do pgfs <- mapM readPGFVerbose fs + let pgf = foldl1 unionPGF pgfs + pgfFile = grammarName opts pgf <.> "pgf" + if pgfFile `elem` fs + then putStrLnE $ "Refusing to overwrite " ++ pgfFile + else writePGF opts pgf + writeOutputs opts pgf + where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f + +writeOutputs :: Options -> PGF -> IOE () +writeOutputs opts pgf = do + sequence_ [writeOutput opts name str + | fmt <- flag optOutputFormats opts, + (name,str) <- exportPGF opts fmt pgf] + +writePGF :: Options -> PGF -> IOE () +writePGF opts pgf = do + let outfile = grammarName opts pgf <.> "pgf" + putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ encodeFile outfile pgf + +grammarName :: Options -> PGF -> String +grammarName opts pgf = fromMaybe (showCId (absname pgf)) (flag optName opts) + +writeOutput :: Options -> FilePath-> String -> IOE () +writeOutput opts file str = + do let path = case flag optOutputDir opts of + Nothing -> file + Just dir -> dir </> file + writeOutputFile opts path str + +writeOutputFile :: Options -> FilePath -> String -> IOE () +writeOutputFile opts outfile output = + do putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ writeFile outfile output diff --git a/src/compiler/GFI.hs b/src/compiler/GFI.hs new file mode 100644 index 000000000..2ea22efa6 --- /dev/null +++ b/src/compiler/GFI.hs @@ -0,0 +1,363 @@ +{-# LANGUAGE ScopedTypeVariables, CPP #-} +module GFI (mainGFI,mainRunGFI) where + +import GF.Command.Interpreter +import GF.Command.Importing +import GF.Command.Commands +import GF.Command.Abstract +import GF.Command.Parse +import GF.Data.ErrM +import GF.Grammar hiding (Ident) +import GF.Grammar.Parser (runP, pExp) +import GF.Compile.Rename +import GF.Compile.Concrete.Compute (computeConcrete) +import GF.Compile.Concrete.TypeCheck (inferLType) +import GF.Infra.Dependencies +import GF.Infra.CheckM +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Infra.Modules (greatestResource) +import GF.System.Readline + +import GF.Text.Coding +import GF.Compile.Coding + +import PGF +import PGF.Data +import PGF.Macros + +import Data.Char +import Data.Maybe +import Data.List(isPrefixOf) +import qualified Data.Map as Map +import qualified Data.ByteString.Char8 as BS +import qualified Text.ParserCombinators.ReadP as RP +import System.Cmd +import System.CPUTime +import System.Directory +import Control.Exception +import Control.Monad +import Data.Version +import GF.System.Signal +--import System.IO.Error (try) +#ifdef mingw32_HOST_OS +import System.Win32.Console +import System.Win32.NLS +#endif + +import Paths_gf + +mainRunGFI :: Options -> [FilePath] -> IO () +mainRunGFI opts files = do + let opts1 = addOptions (modifyFlags (\f -> f{optVerbosity=Quiet})) opts + gfenv <- emptyGFEnv + gfenv <- importInEnv gfenv opts1 files + loop opts1 gfenv + return () + +mainGFI :: Options -> [FilePath] -> IO () +mainGFI opts files = do + putStrLn welcome + gfenv <- emptyGFEnv + gfenv <- importInEnv gfenv opts files + loop opts gfenv + return () + +loopOptNewCPU opts gfenv' + | not (verbAtLeast opts Normal) = return gfenv' + | otherwise = do + cpu' <- getCPUTime + putStrLnFlush (show ((cpu' - cputime gfenv') `div` 1000000000) ++ " msec") + return $ gfenv' {cputime = cpu'} + +loop :: Options -> GFEnv -> IO GFEnv +loop opts gfenv0 = do + let loopNewCPU = loopOptNewCPU opts + let isv = verbAtLeast opts Normal + let ifv act = if isv then act else return () + let env = commandenv gfenv0 + let sgr = sourcegrammar gfenv0 + setCompletionFunction (Just (wordCompletion gfenv0)) + let fetch = case flag optMode opts of + ModeRun -> tryGetLine + _ -> fetchCommand (prompt env) + s0 <- fetch + let gfenv = gfenv0 {history = s0 : history gfenv0} + let + enc = encode gfenv + s = decode gfenv s0 + pwords = case words s of + w:ws -> getCommandOp w :ws + ws -> ws + + -- special commands, requiring source grammar in env + + case pwords of + + "q":_ -> ifv (putStrLn "See you.") >> return gfenv + + _ -> do + r <- runInterruptibly $ case pwords of + + "!":ws -> do + system $ unwords ws + loopNewCPU gfenv + "cc":ws -> do + let + pOpts style q ("-table" :ws) = pOpts TermPrintTable q ws + pOpts style q ("-all" :ws) = pOpts TermPrintAll q ws + pOpts style q ("-default":ws) = pOpts TermPrintDefault q ws + pOpts style q ("-unqual" :ws) = pOpts style Unqualified ws + pOpts style q ("-qual" :ws) = pOpts style Qualified ws + pOpts style q ws = (style,q,unwords ws) + + (style,q,s) = pOpts TermPrintDefault Qualified (tail (words s0)) + + checkComputeTerm gr t = do + mo <- maybe (Bad "no source grammar in scope") return $ greatestResource gr + ((t,_),_) <- runCheck $ do t <- renameSourceTerm gr mo t + inferLType gr [] t + computeConcrete sgr t + + case runP pExp (BS.pack s) of + Left (_,msg) -> putStrLn msg + Right t -> case checkComputeTerm sgr (codeTerm (decode gfenv) t) of + Ok x -> putStrLn $ enc (showTerm style q x) + Bad s -> putStrLn $ enc s + loopNewCPU gfenv + "dg":ws -> do + writeFile "_gfdepgraph.dot" (depGraph sgr) + putStrLn "wrote graph in file _gfdepgraph.dot" + loopNewCPU gfenv + "i":args -> do + gfenv' <- case parseOptions args of + Ok (opts',files) -> do + curr_dir <- getCurrentDirectory + lib_dir <- getLibraryDirectory (addOptions opts opts') + importInEnv gfenv (addOptions opts (fixRelativeLibPaths curr_dir lib_dir opts')) files + Bad err -> do + putStrLn $ "Command parse error: " ++ err + return gfenv + loopNewCPU gfenv' + + -- other special commands, working on GFEnv + "e":_ -> loopNewCPU $ gfenv { + commandenv=emptyCommandEnv, sourcegrammar = emptySourceGrammar + } + + "dc":f:ws -> do + case readCommandLine (unwords ws) of + Just comm -> loopNewCPU $ gfenv { + commandenv = env { + commandmacros = Map.insert f comm (commandmacros env) + } + } + _ -> putStrLn "command definition not parsed" >> loopNewCPU gfenv + + "dt":f:ws -> do + case readExpr (unwords ws) of + Just exp -> loopNewCPU $ gfenv { + commandenv = env { + expmacros = Map.insert f exp (expmacros env) + } + } + _ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv + + "ph":_ -> + mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv + "se":c:_ -> + case lookup c encodings of + Just cod -> do +#ifdef mingw32_HOST_OS + case c of + 'c':'p':c -> case reads c of + [(cp,"")] -> setConsoleCP cp >> setConsoleOutputCP cp + _ -> return () + "utf8" -> setConsoleCP 65001 >> setConsoleOutputCP 65001 + _ -> return () +#endif + loopNewCPU $ gfenv {coding = cod} + Nothing -> do putStrLn "unknown encoding" + loopNewCPU gfenv + + -- ordinary commands, working on CommandEnv + _ -> do + interpretCommandLine enc env s + loopNewCPU gfenv +-- gfenv' <- return $ either (const gfenv) id r + gfenv' <- either (\e -> (print e >> return gfenv)) return r + loop opts gfenv' + +importInEnv :: GFEnv -> Options -> [FilePath] -> IO GFEnv +importInEnv gfenv opts files + | flag optRetainResource opts = + do src <- importSource (sourcegrammar gfenv) opts files + return $ gfenv {sourcegrammar = src} + | otherwise = + do let opts' = addOptions (setOptimization OptCSE False) opts + pgf0 = multigrammar (commandenv gfenv) + pgf1 <- importGrammar pgf0 opts' files + if (verbAtLeast opts Normal) + then putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf1) + else return () + return $ gfenv { commandenv = mkCommandEnv (coding gfenv) pgf1 } + +tryGetLine = do + res <- try getLine + case res of + Left (e :: SomeException) -> return "q" + Right l -> return l + +welcome = unlines [ + " ", + " * * * ", + " * * ", + " * * ", + " * ", + " * ", + " * * * * * * * ", + " * * * ", + " * * * * * * ", + " * * * ", + " * * * ", + " ", + "This is GF version "++showVersion version++". ", + "License: see help -license. ", + "Differences from GF 2.9: see help -changes.", + "Bug reports: http://code.google.com/p/grammatical-framework/issues/list" + ] + +prompt env + | abs == wildCId = "> " + | otherwise = showCId abs ++ "> " + where + abs = abstractName (multigrammar env) + +data GFEnv = GFEnv { + sourcegrammar :: SourceGrammar, -- gfo grammar -retain + commandenv :: CommandEnv, + history :: [String], + cputime :: Integer, + coding :: Encoding + } + +emptyGFEnv :: IO GFEnv +emptyGFEnv = do +#ifdef mingw32_HOST_OS + codepage <- getACP + let coding = fromMaybe UTF_8 (lookup ("cp"++show codepage) encodings) +#else + let coding = UTF_8 +#endif + return $ GFEnv emptySourceGrammar (mkCommandEnv coding emptyPGF) [] 0 coding + +encode = encodeUnicode . coding +decode = decodeUnicode . coding + +wordCompletion gfenv line0 prefix0 p = + case wc_type (take p line) of + CmplCmd pref + -> ret ' ' [name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name] + CmplStr (Just (Command _ opts _)) s + -> do mb_state0 <- try (evaluate (initState pgf (optLang opts) (optType opts))) + case mb_state0 of + Right state0 -> let ws = words (take (length s - length prefix) s) + in case loop state0 ws of + Nothing -> ret ' ' [] + Just state -> let compls = getCompletions state prefix + in ret ' ' (map (encode gfenv) (Map.keys compls)) + Left (_ :: SomeException) -> ret ' ' [] + CmplOpt (Just (Command n _ _)) pref + -> case Map.lookup n (commands cmdEnv) of + Just inf -> do let flg_compls = ['-':flg | (flg,_) <- flags inf, isPrefixOf pref flg] + opt_compls = ['-':opt | (opt,_) <- options inf, isPrefixOf pref opt] + ret (if null flg_compls then ' ' else '=') + (flg_compls++opt_compls) + Nothing -> ret ' ' [] + CmplIdent (Just (Command "i" _ _)) _ -- HACK: file name completion for command i + -> filenameCompletionFunction prefix + CmplIdent _ pref + -> do mb_abs <- try (evaluate (abstract pgf)) + case mb_abs of + Right abs -> ret ' ' [name | cid <- Map.keys (funs abs), let name = showCId cid, isPrefixOf pref name] + Left (_ :: SomeException) -> ret ' ' [] + _ -> ret ' ' [] + where + line = decode gfenv line0 + prefix = decode gfenv prefix0 + + pgf = multigrammar cmdEnv + cmdEnv = commandenv gfenv + optLang opts = valCIdOpts "lang" (head (languages pgf)) opts + optType opts = + let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts + in case readType str of + Just ty -> ty + Nothing -> error ("Can't parse '"++str++"' as type") + + loop ps [] = Just ps + loop ps (t:ts) = case nextState ps t of + Left es -> Nothing + Right ps -> loop ps ts + + ret c [x] = return [x++[c]] + ret _ xs = return xs + + +data CompletionType + = CmplCmd Ident + | CmplStr (Maybe Command) String + | CmplOpt (Maybe Command) Ident + | CmplIdent (Maybe Command) Ident + deriving Show + +wc_type :: String -> CompletionType +wc_type = cmd_name + where + cmd_name cs = + let cs1 = dropWhile isSpace cs + in go cs1 cs1 + where + go x [] = CmplCmd x + go x (c:cs) + | isIdent c = go x cs + | otherwise = cmd x cs + + cmd x [] = ret CmplIdent x "" 0 + cmd _ ('|':cs) = cmd_name cs + cmd _ (';':cs) = cmd_name cs + cmd x ('"':cs) = str x cs cs + cmd x ('-':cs) = option x cs cs + cmd x (c :cs) + | isIdent c = ident x (c:cs) cs + | otherwise = cmd x cs + + option x y [] = ret CmplOpt x y 1 + option x y ('=':cs) = optValue x y cs + option x y (c :cs) + | isIdent c = option x y cs + | otherwise = cmd x cs + + optValue x y ('"':cs) = str x y cs + optValue x y cs = cmd x cs + + ident x y [] = ret CmplIdent x y 0 + ident x y (c:cs) + | isIdent c = ident x y cs + | otherwise = cmd x cs + + str x y [] = ret CmplStr x y 1 + str x y ('\"':cs) = cmd x cs + str x y ('\\':c:cs) = str x y cs + str x y (c:cs) = str x y cs + + ret f x y d = f cmd y + where + x1 = take (length x - length y - d) x + x2 = takeWhile (\c -> isIdent c || isSpace c || c == '-' || c == '=' || c == '"') x1 + + cmd = case [x | (x,cs) <- RP.readP_to_S pCommand x2, all isSpace cs] of + [x] -> Just x + _ -> Nothing + + isIdent c = c == '_' || c == '\'' || isAlphaNum c |
