summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release2.html546
-rw-r--r--src/GF/API.hs7
-rw-r--r--src/GF/Compile/Compile.hs11
-rw-r--r--src/GF/Infra/UseIO.hs7
-rw-r--r--src/GF/Shell/TeachYourself.hs8
5 files changed, 570 insertions, 9 deletions
diff --git a/doc/release2.html b/doc/release2.html
new file mode 100644
index 000000000..d34b49cc1
--- /dev/null
+++ b/doc/release2.html
@@ -0,0 +1,546 @@
+<html>
+
+<body bgcolor="#FFFFFF" text="#000000">
+
+<center>
+
+<h1>Grammatical Framework Version 2</h1>
+
+Release of Version 2.0
+
+<p>
+
+Planned: 24 June 2004
+
+<p>
+
+<a href="http://www.cs.chalmers.se/~aarne">Aarne Ranta</a>
+
+</center>
+
+
+<!-- NEW -->
+
+<h2>Highlights</h2>
+
+Module system.
+
+<p>
+
+Separate compilation to canonical GF.
+
+<p>
+
+Improved GUI.
+
+<p>
+
+Improved parser generation.
+
+<p>
+
+Improved shell (new commands and options, help, error messages).
+
+<p>
+
+Accurate <a href="DocGF.pdf">language specification</a>
+(also of GFC).
+
+<p>
+
+Extended resource library.
+
+<p>
+
+Extended Numerals library.
+
+
+<!-- NEW -->
+
+
+
+<h2>Module system</h2>
+
+<li> Separate modules for <tt>abstract</tt>,
+ <tt>concrete</tt>, and <tt>resource</tt>.
+<li> Replaces the file-based <tt>include</tt> system
+<li> Name space handling with qualified names
+<li> Hierarchic structure (single inheritance <tt>**</tt>) +
+ cross-cutting reuse (<tt>open</tt>)
+<li> Separate compilation, one module per file
+<li> Reuse of <tt>abstract</tt>+<tt>concrete</tt> as <tt>resource</tt>
+<li> Parametrized modules:
+ <tt>interface</tt>, <tt>instance</tt>, <tt>incomplete</tt>.
+<li> New experimental module types: <tt>transfer</tt>,
+ <tt>union</tt>.
+
+
+<!-- NEW -->
+
+<h4>Canonical format GFC</h4>
+
+<li> The target of GF compiler; to reuse, just read in.
+
+<li> Readable by Haskell/Java/C++/C applications (by BNFC generated parsers).
+
+
+
+<!-- NEW -->
+
+<h4>New features in expression language</h4>
+
+In addition to the module system:
+
+<p>
+
+<li> Disjunctive patterns <tt>P | ... | Q</tt>.
+<li> String patterns <tt>"foo"</tt>.
+<li> (?) Integer patterns <tt>74</tt>.
+<li> Binding token <tt>&+</tt> to glue separate tokens at unlexing phase,
+ and unlexer to resolve this.
+<li> New syntax alternatives for local definitions: <tt>let</tt> without
+ braces and <tt>where</tt>.
+<li> Pattern variables can be used on lhs's of <tt>oper</tt> definitions.
+<li> New Unicode transliterations (by Harad Hammarström).
+
+
+<!-- NEW -->
+
+<h4>New shell commands and command functionalities</h4>
+
+<li> <tt>pi</tt> = <tt>print_info</tt>: information on an identifier in scope.
+<li> <tt>h</tt> = <tt>help</tt> now in long or short form,
+ and on individual commands.
+<li> <tt>gt</tt> = <tt>generate_trees</tt>: all trees of a given
+ category or instantiations of a given incomplete term, up to a
+ given depth.
+<li> <tt>gr</tt> = <tt>generate_random</tt> can now be given
+ an incomplete term as an argument, to constrain generation.
+<li> <tt>so</tt> = <tt>show_opers</tt> shows all <tt>ope</tt>
+ operations with a given value type.
+<li> <tt>pm</tt> = <tt>print_multi</tt> prints the multilingual
+ grammar resident in the current state to a ready-compiles
+ <tt>.gfcm</tt> file.
+<li> All commands have both long and short names (see help). Short
+ names are easier to type, whereas long names
+ make scripts more readable.
+<li> Meaningless command options generate warnings.
+
+
+<!-- NEW -->
+
+<h4>New editor features</h4>
+
+<li> Active text field: click the middle button in the focus to send
+ in refinement through the parser.
+<li> Clipboard: copy complex terms into the refine menu.
+<li> Two-step refinements generated by the "Generate" operation.
+
+<!-- NEW -->
+
+<h4>Improved implementation</h4>
+
+<li> Haskell source code is organized into subdirectories.
+<li> BNF Converter is used for defining the languages GF and GFC, which also
+ give reliable LaTeX documentation.
+<li> Lexical rules sorted out by option <tt>-cflexer</tt> for efficient
+ parsing with large lexica.
+<li> GHC optimizations and strictness flags are used for improving performance.
+
+
+<!-- NEW -->
+
+<h4>New parser (work in progress)</h4>
+
+<li> By Peter Ljunglöf, based on MCFG.
+<li> Much more efficient for morphology and discontinuous constituents.
+<li> Treatment of cyclic rules.
+<li> Currently lots of alternative parsers via flags <tt>-parser=newX</tt>.
+
+
+<!-- NEW -->
+
+<h2>Status (21/6/2004)</h2>
+
+Grammar compiler, editor GUIs, and shell work for all platforms
+(with restrictions for Solaris).
+
+<p>
+
+The updated <tt>HelpFile</tt> (accessible through <tt>h</tt> command)
+marks unsupported features present in GF 1.2 with <tt>*</tt>.
+They will be supported again if interested users appear.
+
+<p>
+
+GF1 grammars can be automatically translated to GF2 (although the
+result is not as good
+as manual, since indentation and comments are destroyed). The results can be
+saved in GF2 files, but this is not necessary.
+Some rarely used GF1 features are no longer supported (see next section).
+
+<p>
+
+It is also possible to write a GF2 grammar back to GF1, with the
+command <tt>pg -printer=old</tt>.
+
+
+<!-- NEW -->
+
+Resource libraries
+and some example grammars and have been
+converted. Most old example grammars work without any changes.
+There is a new resource API with
+many new constructions.
+
+<p>
+
+A make facility works, finding out which modules have to be recompiled.
+
+<p>
+
+Soundness checking of module depencencies and completeness is not
+complete. This means that some errors may show up too late.
+
+<p>
+
+The environment variable <tt>GF_LIB_PATH</tt> needs some more work.
+
+<p>
+
+Latex and XML printing of grammars do not work yet.
+
+
+
+<!-- NEW -->
+
+<h2>How to use GF 1.* files</h2>
+
+Backward compatibility with respect to old GF grammars has been
+a central goal. All GF grammars, from version 0.9, should work in
+the old way in GF2. The main exceptions are some features that
+are rarely used.
+<ul>
+<li> The <tt>package</tt> system introduced in GF 1.2, cannot be
+ interpreted in the module system of GF 2.0, since packages are in
+ mutual scope with the top level.
+<li> <tt>tokenizer</tt> pragmas are cannot be parsed any more. In GF
+ 1.2, they are already replaced by <tt>lexer</tt> flags.
+<li> <tt>var</tt> pragmas cannot be parsed any more.
+</ul>
+
+<p>
+
+Very old GF grammars (from versions before 0.9), with the completely
+different notation, do not work. They should be first converted to
+GF1 by using GF version 1.2.
+
+
+<!-- NEW -->
+
+
+The import command <tt>i</tt> can be given the option <tt>-old</tt>. E.g.
+<pre>
+ i -old tut1.Eng.g2
+</pre>
+But this is no more necessary: GF2 detects automatically if a grammar
+is in the GF1 format.
+
+<p>
+
+Importing a set of GF2 files generates, internally, three modules:
+<pre>
+ abstract tut1 = ...
+ resource ResEng = ...
+ concrete Eng of tut1 = open ResEng in ...
+</pre>
+(The names are different if the file name has fewer parts.)
+
+
+<p>
+
+The option <tt>-o</tt> causes GF2 to write these modules into files.
+
+
+<!-- NEW -->
+
+The flags <tt>-abs</tt>, <tt>-cnc</tt>, and <tt>-res</tt> can be used
+to give custom names to the modules. In particular, it is good to use
+the <tt>-abs</tt> flag to guarantee that the abstract syntax module
+has the same name for all grammars in a multilingual environmens:
+<pre>
+ i -old -abs=Numerals hungarian.gf
+ i -old -abs=Numerals tamil.gf
+ i -old -abs=Numerals sanskrit.gf
+</pre>
+
+<p>
+
+The same flags as in the import command can be used when invoking
+GF2 from the system shell. Many grammars can be imported on the same command
+line, e.g.
+<pre>
+ % gf2 -old -abs=Tutorial tut1.Eng.gf tut1.Fin.gf tut1.Fra.gf
+</pre>
+
+<p>
+
+To write a GF2 grammar back to GF1 (as one big file), use the command
+<pre>
+ > pg -old
+</pre>
+
+
+
+<!-- NEW -->
+
+
+
+GF2 has more reserved words than GF 1.2. When old files are read, a preprocessor
+replaces every identifier that has the shape of a new reserved word
+with a variant where the last letter is replaced by <tt>Z</tt>, e.g.
+<tt>instance</tt> is replaced by <tt>instancZ</tt>. This method is of course
+unsafe and should be replaced by something better.
+
+
+
+
+<!-- NEW -->
+
+<h2>Abstract, concrete, and resource modules</h2>
+
+Judgement forms are sorted as follows:
+<ul>
+<li> abstract:
+ <tt>cat</tt>, <tt>fun</tt>, <tt>def</tt>, <tt>data</tt>, <tt>flags</tt>
+<li> concrete:
+ <tt>lincat</tt>, <tt>cat</tt>, <tt>printname</tt>, <tt>flags</tt>
+<li> resource:
+ <tt>param</tt>, <tt>oper</tt>, <tt>flags</tt>
+<li>
+</ul>
+
+
+<!-- NEW -->
+
+Example:
+<pre>
+ abstract Sums = {
+ cat
+ Exp ;
+ fun
+ One : Exp ;
+ plus : Exp -> Exp -> Exp ;
+ }
+
+ concrete EnglishSums of Sums = open ResEng in {
+ lincat
+ Exp = {s : Str ; n : Number} ;
+ lin
+ One = expSg "one" ;
+ sum x y = expSg ("the" ++ "sum" ++ "of" ++ x.s ++ "and" ++ y.s) ;
+ }
+
+ resource ResEng = {
+ param
+ Number = Sg | Pl ;
+ oper
+ expSG : Str -> {s : Str ; n : Number} = \s -> {s = s ; n = Sg} ;
+ }
+</pre>
+
+
+
+<!-- NEW -->
+
+<h2>Opening and extending modules</h2>
+
+A <tt>concrete</tt> or <tt>resource</tt> can <b>open</b> a
+<tt>resource</tt>. This means that
+<ul>
+<li> the names defined in <tt>resource</tt> can be used ("become visible")
+<li> but: these names are not included in ("exported from") the opening module
+</ul>
+A module of any type can moreover <b>extend</b> a module of the same type.
+This means that
+<ul>
+<li> the names defined in the extended module can be used ("become visible")
+<li> and also: these names are included in ("exported from") the extending module
+</ul>
+Examples of extension:
+<pre>
+ abstract Products = Sums ** {
+ fun times : Exp -> Exp -> Exp ;
+ }
+ -- names exported: Exp, plus, times
+
+ concrete English of Products = EnglishSums ** open ResEng in {
+ lin times x y = expSg ("the" ++ "product" ++ "of" ++ x.s ++ "and" ++ y.s) ;
+ }
+</pre>
+Another important difference:
+<li> extension is single
+<li> opening can be multiple: <tt>open Foo, Bar, Baz in {...}</tt>
+
+<!-- NEW -->
+
+Moreover:
+<li> opening can be <b>qualified</b>
+<p>
+Example of qualified opening:
+<pre>
+ concrete NumberSystems of Systems = open (Bin = Binary), (Dec = Decimal) in {
+ lin
+ BZero = Bin.Zero ;
+ DZero = Dec.Zero
+ }
+</pre>
+
+
+<!-- NEW -->
+
+<h2>Compiling modules</h2>
+
+Separate compilation assumes there is <b>one module per file</b>.
+
+<p>
+
+The <b>module header</b> is the beginning of the module code up to the
+first left bracket (<tt>{</tt>). The header gives
+<ul>
+<li> the module type: <tt>abstract</tt>, <tt>concrete</tt> (<tt>of</tt> <i>A</i>),
+ or <tt>resource</tt>
+<li> the name of the module (next to the module type keyword)
+<li> the name of extended module (between <tt>=</tt> and <tt>**</tt>)
+<li> the names of opened modules
+</ul>
+
+<!-- NEW -->
+
+
+<b>filename</b> = <b>modulename</b> <tt>.</tt> <b>extension</b>
+
+<p>
+
+File name extensions:
+<ul>
+<li> <tt>gf</tt>: GF source file (uses GF syntax, is type checked and compiled)
+<li> <tt>gfc</tt>: canonical GF file (uses GFC syntax, is simply read
+in instead of compiled; produced from all kinds of modules)
+<li> <tt>gfr</tt>: GF resource file (uses GF syntax, is only read in; produced from
+<tt>resource</tt> modules)
+<li> <tt>gfcm</tt>: canonical multilingual GF file
+(uses GFC syntax, is only read in; produced
+from a set of <tt>abstract</tt> and <tt>conctrete</tt> modules)
+</ul>
+Only <tt>gf</tt> files should ever be written/edited manually!
+
+
+
+<!-- NEW -->
+
+
+What the make facility does when compiling <tt>Foo.gf</tt>
+<ol>
+<li> read the module header of <tt>Foo.gf</tt>, and recursively all headers from
+the modules it <b>depends</b> on (i.e. extends or opens)
+<li> build a dependency graph of these modules, and do topological sorting
+<li> starting from the first module in topological order,
+compare the modification times of each <tt>gf</tt> and <tt>gfc</tt> file:
+<ul>
+<li> if <tt>gf</tt> is later, compile the module and all modules depending on it
+<li> if <tt>gfc</tt> is later, just read in the module
+</ul>
+</ol>
+Inside the GF shell, also time stamps of modules read into memory are
+taken into account. Thus a module need not be read from a file if the
+module is in the memory and the file has not been modified.
+
+
+<!-- NEW -->
+
+If the compilation of a grammar fails at some module, the state of the
+GF shell contains all modules read up to that point. This makes it
+faster to compile the faulty module again after fixing it.
+
+<p>
+
+Use the command <tt>po</tt> = <tt>print_options</tt> to see what
+modules are in the state.
+
+<p>
+
+To force compilation:
+<ul>
+<li> The flag <i>-src</i> in the import command forces compilation from
+ source even if more recent object files exist. This is useful
+ when testing new versions of GF.
+<li> The flag <i>-retain</i> in the import command forces reading in
+ <tt>gfr</tt> files in addition to <tt>gfc</tt> files. This is useful
+ when testing operations with the <tt>cc</tt> command.
+</ul>
+
+<!-- NEW -->
+
+<h2>Module search paths</h2>
+
+Modules can reside in different directories. Use the <tt>path</tt>
+flag to extend the directory search path. For instance,
+<pre>
+ -path=.:../resource/russian:../prelude
+</pre>
+enables files to be found in three different directories.
+By default, only the current directory is included.
+If a <tt>path</tt> flag is given, the current directory
+<tt>.</tt> must be explicitly included if it is wanted.
+
+<p>
+
+The <tt>path</tt> flag can be set in any of the following
+places:
+<ul>
+<li> when invoking GF: <tt>gf -path=xxx</tt>
+<li> when importing a module: <tt>i -path=xxx Foo.gf</tt>
+<li> as a pragma in a topmost file: <tt>--# -path=xxx</tt>
+</ul>
+A flag set on a command line overrides ones set in files.
+
+<p>
+
+The value of the environment variable <tt>GF_LIB_PATH</tt> is
+appended to the user-given path.
+
+
+<!-- NEW -->
+
+<h2>To do</h2>
+
+Testing
+
+<p>
+
+Documentation
+
+<p>
+
+Packaging
+
+
+
+<!-- NEW -->
+
+<h2>Nasty details</h2>
+
+
+<li> Readline in Solaris
+
+<li> Proper treatment file search paths
+
+<li> Unicode fonts in GUIs
+
+<li> directionality of Semitic alphabets
+
+
+
+</body>
+</html>
diff --git a/src/GF/API.hs b/src/GF/API.hs
index c3d160bcd..ca97af146 100644
--- a/src/GF/API.hs
+++ b/src/GF/API.hs
@@ -148,8 +148,9 @@ string2srcTerm gr m s = do
randomTreesIO :: Options -> GFGrammar -> Int -> IO [Tree]
randomTreesIO opts gr n = do
gen <- myStdGen mx
- t <- err (\s -> putStrLnFlush s >> return []) (return . singleton) $
- mkRandomTree gen mx g catfun
+ t <- err (\s -> putS s >> return [])
+ (return . singleton) $
+ mkRandomTree gen mx g catfun
ts <- if n==1 then return [] else randomTreesIO opts gr (n-1)
return $ t ++ ts
where
@@ -158,6 +159,8 @@ randomTreesIO opts gr n = do
_ -> Left $ firstAbsCat opts gr
g = grammar gr
mx = optIntOrN opts flagDepth 41
+ putS s = if oElem beSilent opts then return () else putStrLnFlush s
+
generateTrees :: Options -> GFGrammar -> Maybe Tree -> [Tree]
generateTrees opts gr mt =
diff --git a/src/GF/Compile/Compile.hs b/src/GF/Compile/Compile.hs
index fa2e65a3c..78f3a1bb1 100644
--- a/src/GF/Compile/Compile.hs
+++ b/src/GF/Compile/Compile.hs
@@ -35,6 +35,10 @@ import Arch
import Monad
+-- environment variable for grammar search path
+
+gfGrammarPathVar = "GF_LIB_PATH"
+
-- in batch mode: write code in a file
batchCompile f = liftM fst $ compileModule defOpts emptyShellState f
@@ -86,9 +90,10 @@ compileModule opts1 st0 file = do
let opts = addOptions opts1 opts0
let ps0 = pathListOpts opts
let fpath = justInitPath file
- let ps = if useFileOpt
- then (map (prefixPathName fpath) ps0)
- else ps0
+ let ps1 = if useFileOpt
+ then (map (prefixPathName fpath) ps0)
+ else ps0
+ ps <- ioeIO $ extendPathEnv gfGrammarPathVar ps1
let ioeIOIf = if oElem beSilent opts then (const (return ())) else ioeIO
ioeIOIf $ putStrLn $ "module search path:" +++ show ps ----
let putp = putPointE opts
diff --git a/src/GF/Infra/UseIO.hs b/src/GF/Infra/UseIO.hs
index 243ead306..3dc41fadc 100644
--- a/src/GF/Infra/UseIO.hs
+++ b/src/GF/Infra/UseIO.hs
@@ -81,6 +81,13 @@ doesFileExistPath paths file = do
mpfile <- ioeIO $ getFilePath paths file
return $ maybe False (const True) mpfile
+-- path in environment variable has lower priority
+extendPathEnv :: String -> [FilePath] -> IO [FilePath]
+extendPathEnv var ps = do
+ s <- catch (getEnv var) (const (return ""))
+ let fs = pFilePaths s
+ return $ ps ++ fs
+
pFilePaths :: String -> [FilePath]
pFilePaths s = case span (/=':') s of
(f,_:cs) -> f : pFilePaths cs
diff --git a/src/GF/Shell/TeachYourself.hs b/src/GF/Shell/TeachYourself.hs
index 623bd7b72..e3576e7ed 100644
--- a/src/GF/Shell/TeachYourself.hs
+++ b/src/GF/Shell/TeachYourself.hs
@@ -24,7 +24,7 @@ teachTranslation opts ig og = do
transTrainList ::
Options -> GFGrammar -> GFGrammar -> Integer -> IO [(String,[String])]
transTrainList opts ig og number = do
- ts <- randomTreesIO opts ig (fromInteger number)
+ ts <- randomTreesIO (addOption beSilent opts) ig (fromInteger number)
return $ map mkOne $ ts
where
cat = firstCatOpts opts ig
@@ -39,7 +39,7 @@ teachMorpho opts ig = useIOE () $ do
morphoTrainList :: Options -> GFGrammar -> Integer -> IOE [(String,[String])]
morphoTrainList opts ig number = do
- ts <- ioeIO $ randomTreesIO opts ig (fromInteger number)
+ ts <- ioeIO $ randomTreesIO (addOption beSilent opts) ig (fromInteger number)
gen <- ioeIO $ myStdGen (fromInteger number)
mkOnes gen ts
where
@@ -49,9 +49,9 @@ morphoTrainList opts ig number = do
let (i,gen') = randomR (0, length pss - 1) gen
(ps,ss) <- ioeErr $ pss !? i
(_,ss0) <- ioeErr $ pss !? 0
- let bas = concat $ take 1 ss0
+ let bas = unwords ss0 --- concat $ take 1 ss0
more <- mkOnes gen' ts
- return $ (bas +++ ":" +++ unwords (map prt_ ps), return (concat ss)) : more
+ return $ (bas +++ ":" +++ unwords (map prt_ ps), return (unwords ss)) : more
mkOnes gen [] = return []
gr = grammar ig