diff options
| author | aarne <unknown> | 2004-06-21 08:53:58 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2004-06-21 08:53:58 +0000 |
| commit | b248e6e25e5b58163cc9b897be7eb0b4bf6dbdc6 (patch) | |
| tree | 74d74c06998903a16c5909eafc9835e1ca68491d | |
| parent | a134a1fd65c80bae1e37d304fc674453e126c504 (diff) | |
for release meeting
| -rw-r--r-- | doc/release2.html | 546 | ||||
| -rw-r--r-- | src/GF/API.hs | 7 | ||||
| -rw-r--r-- | src/GF/Compile/Compile.hs | 11 | ||||
| -rw-r--r-- | src/GF/Infra/UseIO.hs | 7 | ||||
| -rw-r--r-- | src/GF/Shell/TeachYourself.hs | 8 |
5 files changed, 570 insertions, 9 deletions
diff --git a/doc/release2.html b/doc/release2.html new file mode 100644 index 000000000..d34b49cc1 --- /dev/null +++ b/doc/release2.html @@ -0,0 +1,546 @@ +<html> + +<body bgcolor="#FFFFFF" text="#000000"> + +<center> + +<h1>Grammatical Framework Version 2</h1> + +Release of Version 2.0 + +<p> + +Planned: 24 June 2004 + +<p> + +<a href="http://www.cs.chalmers.se/~aarne">Aarne Ranta</a> + +</center> + + +<!-- NEW --> + +<h2>Highlights</h2> + +Module system. + +<p> + +Separate compilation to canonical GF. + +<p> + +Improved GUI. + +<p> + +Improved parser generation. + +<p> + +Improved shell (new commands and options, help, error messages). + +<p> + +Accurate <a href="DocGF.pdf">language specification</a> +(also of GFC). + +<p> + +Extended resource library. + +<p> + +Extended Numerals library. + + +<!-- NEW --> + + + +<h2>Module system</h2> + +<li> Separate modules for <tt>abstract</tt>, + <tt>concrete</tt>, and <tt>resource</tt>. +<li> Replaces the file-based <tt>include</tt> system +<li> Name space handling with qualified names +<li> Hierarchic structure (single inheritance <tt>**</tt>) + + cross-cutting reuse (<tt>open</tt>) +<li> Separate compilation, one module per file +<li> Reuse of <tt>abstract</tt>+<tt>concrete</tt> as <tt>resource</tt> +<li> Parametrized modules: + <tt>interface</tt>, <tt>instance</tt>, <tt>incomplete</tt>. +<li> New experimental module types: <tt>transfer</tt>, + <tt>union</tt>. + + +<!-- NEW --> + +<h4>Canonical format GFC</h4> + +<li> The target of GF compiler; to reuse, just read in. + +<li> Readable by Haskell/Java/C++/C applications (by BNFC generated parsers). + + + +<!-- NEW --> + +<h4>New features in expression language</h4> + +In addition to the module system: + +<p> + +<li> Disjunctive patterns <tt>P | ... | Q</tt>. +<li> String patterns <tt>"foo"</tt>. +<li> (?) Integer patterns <tt>74</tt>. +<li> Binding token <tt>&+</tt> to glue separate tokens at unlexing phase, + and unlexer to resolve this. +<li> New syntax alternatives for local definitions: <tt>let</tt> without + braces and <tt>where</tt>. +<li> Pattern variables can be used on lhs's of <tt>oper</tt> definitions. +<li> New Unicode transliterations (by Harad Hammarström). + + +<!-- NEW --> + +<h4>New shell commands and command functionalities</h4> + +<li> <tt>pi</tt> = <tt>print_info</tt>: information on an identifier in scope. +<li> <tt>h</tt> = <tt>help</tt> now in long or short form, + and on individual commands. +<li> <tt>gt</tt> = <tt>generate_trees</tt>: all trees of a given + category or instantiations of a given incomplete term, up to a + given depth. +<li> <tt>gr</tt> = <tt>generate_random</tt> can now be given + an incomplete term as an argument, to constrain generation. +<li> <tt>so</tt> = <tt>show_opers</tt> shows all <tt>ope</tt> + operations with a given value type. +<li> <tt>pm</tt> = <tt>print_multi</tt> prints the multilingual + grammar resident in the current state to a ready-compiles + <tt>.gfcm</tt> file. +<li> All commands have both long and short names (see help). Short + names are easier to type, whereas long names + make scripts more readable. +<li> Meaningless command options generate warnings. + + +<!-- NEW --> + +<h4>New editor features</h4> + +<li> Active text field: click the middle button in the focus to send + in refinement through the parser. +<li> Clipboard: copy complex terms into the refine menu. +<li> Two-step refinements generated by the "Generate" operation. + +<!-- NEW --> + +<h4>Improved implementation</h4> + +<li> Haskell source code is organized into subdirectories. +<li> BNF Converter is used for defining the languages GF and GFC, which also + give reliable LaTeX documentation. +<li> Lexical rules sorted out by option <tt>-cflexer</tt> for efficient + parsing with large lexica. +<li> GHC optimizations and strictness flags are used for improving performance. + + +<!-- NEW --> + +<h4>New parser (work in progress)</h4> + +<li> By Peter Ljunglöf, based on MCFG. +<li> Much more efficient for morphology and discontinuous constituents. +<li> Treatment of cyclic rules. +<li> Currently lots of alternative parsers via flags <tt>-parser=newX</tt>. + + +<!-- NEW --> + +<h2>Status (21/6/2004)</h2> + +Grammar compiler, editor GUIs, and shell work for all platforms +(with restrictions for Solaris). + +<p> + +The updated <tt>HelpFile</tt> (accessible through <tt>h</tt> command) +marks unsupported features present in GF 1.2 with <tt>*</tt>. +They will be supported again if interested users appear. + +<p> + +GF1 grammars can be automatically translated to GF2 (although the +result is not as good +as manual, since indentation and comments are destroyed). The results can be +saved in GF2 files, but this is not necessary. +Some rarely used GF1 features are no longer supported (see next section). + +<p> + +It is also possible to write a GF2 grammar back to GF1, with the +command <tt>pg -printer=old</tt>. + + +<!-- NEW --> + +Resource libraries +and some example grammars and have been +converted. Most old example grammars work without any changes. +There is a new resource API with +many new constructions. + +<p> + +A make facility works, finding out which modules have to be recompiled. + +<p> + +Soundness checking of module depencencies and completeness is not +complete. This means that some errors may show up too late. + +<p> + +The environment variable <tt>GF_LIB_PATH</tt> needs some more work. + +<p> + +Latex and XML printing of grammars do not work yet. + + + +<!-- NEW --> + +<h2>How to use GF 1.* files</h2> + +Backward compatibility with respect to old GF grammars has been +a central goal. All GF grammars, from version 0.9, should work in +the old way in GF2. The main exceptions are some features that +are rarely used. +<ul> +<li> The <tt>package</tt> system introduced in GF 1.2, cannot be + interpreted in the module system of GF 2.0, since packages are in + mutual scope with the top level. +<li> <tt>tokenizer</tt> pragmas are cannot be parsed any more. In GF + 1.2, they are already replaced by <tt>lexer</tt> flags. +<li> <tt>var</tt> pragmas cannot be parsed any more. +</ul> + +<p> + +Very old GF grammars (from versions before 0.9), with the completely +different notation, do not work. They should be first converted to +GF1 by using GF version 1.2. + + +<!-- NEW --> + + +The import command <tt>i</tt> can be given the option <tt>-old</tt>. E.g. +<pre> + i -old tut1.Eng.g2 +</pre> +But this is no more necessary: GF2 detects automatically if a grammar +is in the GF1 format. + +<p> + +Importing a set of GF2 files generates, internally, three modules: +<pre> + abstract tut1 = ... + resource ResEng = ... + concrete Eng of tut1 = open ResEng in ... +</pre> +(The names are different if the file name has fewer parts.) + + +<p> + +The option <tt>-o</tt> causes GF2 to write these modules into files. + + +<!-- NEW --> + +The flags <tt>-abs</tt>, <tt>-cnc</tt>, and <tt>-res</tt> can be used +to give custom names to the modules. In particular, it is good to use +the <tt>-abs</tt> flag to guarantee that the abstract syntax module +has the same name for all grammars in a multilingual environmens: +<pre> + i -old -abs=Numerals hungarian.gf + i -old -abs=Numerals tamil.gf + i -old -abs=Numerals sanskrit.gf +</pre> + +<p> + +The same flags as in the import command can be used when invoking +GF2 from the system shell. Many grammars can be imported on the same command +line, e.g. +<pre> + % gf2 -old -abs=Tutorial tut1.Eng.gf tut1.Fin.gf tut1.Fra.gf +</pre> + +<p> + +To write a GF2 grammar back to GF1 (as one big file), use the command +<pre> + > pg -old +</pre> + + + +<!-- NEW --> + + + +GF2 has more reserved words than GF 1.2. When old files are read, a preprocessor +replaces every identifier that has the shape of a new reserved word +with a variant where the last letter is replaced by <tt>Z</tt>, e.g. +<tt>instance</tt> is replaced by <tt>instancZ</tt>. This method is of course +unsafe and should be replaced by something better. + + + + +<!-- NEW --> + +<h2>Abstract, concrete, and resource modules</h2> + +Judgement forms are sorted as follows: +<ul> +<li> abstract: + <tt>cat</tt>, <tt>fun</tt>, <tt>def</tt>, <tt>data</tt>, <tt>flags</tt> +<li> concrete: + <tt>lincat</tt>, <tt>cat</tt>, <tt>printname</tt>, <tt>flags</tt> +<li> resource: + <tt>param</tt>, <tt>oper</tt>, <tt>flags</tt> +<li> +</ul> + + +<!-- NEW --> + +Example: +<pre> + abstract Sums = { + cat + Exp ; + fun + One : Exp ; + plus : Exp -> Exp -> Exp ; + } + + concrete EnglishSums of Sums = open ResEng in { + lincat + Exp = {s : Str ; n : Number} ; + lin + One = expSg "one" ; + sum x y = expSg ("the" ++ "sum" ++ "of" ++ x.s ++ "and" ++ y.s) ; + } + + resource ResEng = { + param + Number = Sg | Pl ; + oper + expSG : Str -> {s : Str ; n : Number} = \s -> {s = s ; n = Sg} ; + } +</pre> + + + +<!-- NEW --> + +<h2>Opening and extending modules</h2> + +A <tt>concrete</tt> or <tt>resource</tt> can <b>open</b> a +<tt>resource</tt>. This means that +<ul> +<li> the names defined in <tt>resource</tt> can be used ("become visible") +<li> but: these names are not included in ("exported from") the opening module +</ul> +A module of any type can moreover <b>extend</b> a module of the same type. +This means that +<ul> +<li> the names defined in the extended module can be used ("become visible") +<li> and also: these names are included in ("exported from") the extending module +</ul> +Examples of extension: +<pre> + abstract Products = Sums ** { + fun times : Exp -> Exp -> Exp ; + } + -- names exported: Exp, plus, times + + concrete English of Products = EnglishSums ** open ResEng in { + lin times x y = expSg ("the" ++ "product" ++ "of" ++ x.s ++ "and" ++ y.s) ; + } +</pre> +Another important difference: +<li> extension is single +<li> opening can be multiple: <tt>open Foo, Bar, Baz in {...}</tt> + +<!-- NEW --> + +Moreover: +<li> opening can be <b>qualified</b> +<p> +Example of qualified opening: +<pre> + concrete NumberSystems of Systems = open (Bin = Binary), (Dec = Decimal) in { + lin + BZero = Bin.Zero ; + DZero = Dec.Zero + } +</pre> + + +<!-- NEW --> + +<h2>Compiling modules</h2> + +Separate compilation assumes there is <b>one module per file</b>. + +<p> + +The <b>module header</b> is the beginning of the module code up to the +first left bracket (<tt>{</tt>). The header gives +<ul> +<li> the module type: <tt>abstract</tt>, <tt>concrete</tt> (<tt>of</tt> <i>A</i>), + or <tt>resource</tt> +<li> the name of the module (next to the module type keyword) +<li> the name of extended module (between <tt>=</tt> and <tt>**</tt>) +<li> the names of opened modules +</ul> + +<!-- NEW --> + + +<b>filename</b> = <b>modulename</b> <tt>.</tt> <b>extension</b> + +<p> + +File name extensions: +<ul> +<li> <tt>gf</tt>: GF source file (uses GF syntax, is type checked and compiled) +<li> <tt>gfc</tt>: canonical GF file (uses GFC syntax, is simply read +in instead of compiled; produced from all kinds of modules) +<li> <tt>gfr</tt>: GF resource file (uses GF syntax, is only read in; produced from +<tt>resource</tt> modules) +<li> <tt>gfcm</tt>: canonical multilingual GF file +(uses GFC syntax, is only read in; produced +from a set of <tt>abstract</tt> and <tt>conctrete</tt> modules) +</ul> +Only <tt>gf</tt> files should ever be written/edited manually! + + + +<!-- NEW --> + + +What the make facility does when compiling <tt>Foo.gf</tt> +<ol> +<li> read the module header of <tt>Foo.gf</tt>, and recursively all headers from +the modules it <b>depends</b> on (i.e. extends or opens) +<li> build a dependency graph of these modules, and do topological sorting +<li> starting from the first module in topological order, +compare the modification times of each <tt>gf</tt> and <tt>gfc</tt> file: +<ul> +<li> if <tt>gf</tt> is later, compile the module and all modules depending on it +<li> if <tt>gfc</tt> is later, just read in the module +</ul> +</ol> +Inside the GF shell, also time stamps of modules read into memory are +taken into account. Thus a module need not be read from a file if the +module is in the memory and the file has not been modified. + + +<!-- NEW --> + +If the compilation of a grammar fails at some module, the state of the +GF shell contains all modules read up to that point. This makes it +faster to compile the faulty module again after fixing it. + +<p> + +Use the command <tt>po</tt> = <tt>print_options</tt> to see what +modules are in the state. + +<p> + +To force compilation: +<ul> +<li> The flag <i>-src</i> in the import command forces compilation from + source even if more recent object files exist. This is useful + when testing new versions of GF. +<li> The flag <i>-retain</i> in the import command forces reading in + <tt>gfr</tt> files in addition to <tt>gfc</tt> files. This is useful + when testing operations with the <tt>cc</tt> command. +</ul> + +<!-- NEW --> + +<h2>Module search paths</h2> + +Modules can reside in different directories. Use the <tt>path</tt> +flag to extend the directory search path. For instance, +<pre> + -path=.:../resource/russian:../prelude +</pre> +enables files to be found in three different directories. +By default, only the current directory is included. +If a <tt>path</tt> flag is given, the current directory +<tt>.</tt> must be explicitly included if it is wanted. + +<p> + +The <tt>path</tt> flag can be set in any of the following +places: +<ul> +<li> when invoking GF: <tt>gf -path=xxx</tt> +<li> when importing a module: <tt>i -path=xxx Foo.gf</tt> +<li> as a pragma in a topmost file: <tt>--# -path=xxx</tt> +</ul> +A flag set on a command line overrides ones set in files. + +<p> + +The value of the environment variable <tt>GF_LIB_PATH</tt> is +appended to the user-given path. + + +<!-- NEW --> + +<h2>To do</h2> + +Testing + +<p> + +Documentation + +<p> + +Packaging + + + +<!-- NEW --> + +<h2>Nasty details</h2> + + +<li> Readline in Solaris + +<li> Proper treatment file search paths + +<li> Unicode fonts in GUIs + +<li> directionality of Semitic alphabets + + + +</body> +</html> diff --git a/src/GF/API.hs b/src/GF/API.hs index c3d160bcd..ca97af146 100644 --- a/src/GF/API.hs +++ b/src/GF/API.hs @@ -148,8 +148,9 @@ string2srcTerm gr m s = do randomTreesIO :: Options -> GFGrammar -> Int -> IO [Tree] randomTreesIO opts gr n = do gen <- myStdGen mx - t <- err (\s -> putStrLnFlush s >> return []) (return . singleton) $ - mkRandomTree gen mx g catfun + t <- err (\s -> putS s >> return []) + (return . singleton) $ + mkRandomTree gen mx g catfun ts <- if n==1 then return [] else randomTreesIO opts gr (n-1) return $ t ++ ts where @@ -158,6 +159,8 @@ randomTreesIO opts gr n = do _ -> Left $ firstAbsCat opts gr g = grammar gr mx = optIntOrN opts flagDepth 41 + putS s = if oElem beSilent opts then return () else putStrLnFlush s + generateTrees :: Options -> GFGrammar -> Maybe Tree -> [Tree] generateTrees opts gr mt = diff --git a/src/GF/Compile/Compile.hs b/src/GF/Compile/Compile.hs index fa2e65a3c..78f3a1bb1 100644 --- a/src/GF/Compile/Compile.hs +++ b/src/GF/Compile/Compile.hs @@ -35,6 +35,10 @@ import Arch import Monad +-- environment variable for grammar search path + +gfGrammarPathVar = "GF_LIB_PATH" + -- in batch mode: write code in a file batchCompile f = liftM fst $ compileModule defOpts emptyShellState f @@ -86,9 +90,10 @@ compileModule opts1 st0 file = do let opts = addOptions opts1 opts0 let ps0 = pathListOpts opts let fpath = justInitPath file - let ps = if useFileOpt - then (map (prefixPathName fpath) ps0) - else ps0 + let ps1 = if useFileOpt + then (map (prefixPathName fpath) ps0) + else ps0 + ps <- ioeIO $ extendPathEnv gfGrammarPathVar ps1 let ioeIOIf = if oElem beSilent opts then (const (return ())) else ioeIO ioeIOIf $ putStrLn $ "module search path:" +++ show ps ---- let putp = putPointE opts diff --git a/src/GF/Infra/UseIO.hs b/src/GF/Infra/UseIO.hs index 243ead306..3dc41fadc 100644 --- a/src/GF/Infra/UseIO.hs +++ b/src/GF/Infra/UseIO.hs @@ -81,6 +81,13 @@ doesFileExistPath paths file = do mpfile <- ioeIO $ getFilePath paths file return $ maybe False (const True) mpfile +-- path in environment variable has lower priority +extendPathEnv :: String -> [FilePath] -> IO [FilePath] +extendPathEnv var ps = do + s <- catch (getEnv var) (const (return "")) + let fs = pFilePaths s + return $ ps ++ fs + pFilePaths :: String -> [FilePath] pFilePaths s = case span (/=':') s of (f,_:cs) -> f : pFilePaths cs diff --git a/src/GF/Shell/TeachYourself.hs b/src/GF/Shell/TeachYourself.hs index 623bd7b72..e3576e7ed 100644 --- a/src/GF/Shell/TeachYourself.hs +++ b/src/GF/Shell/TeachYourself.hs @@ -24,7 +24,7 @@ teachTranslation opts ig og = do transTrainList :: Options -> GFGrammar -> GFGrammar -> Integer -> IO [(String,[String])] transTrainList opts ig og number = do - ts <- randomTreesIO opts ig (fromInteger number) + ts <- randomTreesIO (addOption beSilent opts) ig (fromInteger number) return $ map mkOne $ ts where cat = firstCatOpts opts ig @@ -39,7 +39,7 @@ teachMorpho opts ig = useIOE () $ do morphoTrainList :: Options -> GFGrammar -> Integer -> IOE [(String,[String])] morphoTrainList opts ig number = do - ts <- ioeIO $ randomTreesIO opts ig (fromInteger number) + ts <- ioeIO $ randomTreesIO (addOption beSilent opts) ig (fromInteger number) gen <- ioeIO $ myStdGen (fromInteger number) mkOnes gen ts where @@ -49,9 +49,9 @@ morphoTrainList opts ig number = do let (i,gen') = randomR (0, length pss - 1) gen (ps,ss) <- ioeErr $ pss !? i (_,ss0) <- ioeErr $ pss !? 0 - let bas = concat $ take 1 ss0 + let bas = unwords ss0 --- concat $ take 1 ss0 more <- mkOnes gen' ts - return $ (bas +++ ":" +++ unwords (map prt_ ps), return (concat ss)) : more + return $ (bas +++ ":" +++ unwords (map prt_ ps), return (unwords ss)) : more mkOnes gen [] = return [] gr = grammar ig |
