From ce15ec7b787479ca4c7295863ea7fa5cfdd16755 Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 22 Dec 2010 14:08:42 +0000 Subject: moved parts of doc to deprecated/doc --- deprecated/Resource-HOWTO.html | 967 ++++++ deprecated/Resource-HOWTO.txt | 827 +++++ deprecated/Syntax.png | Bin 0 -> 104604 bytes deprecated/doc/2341.html | 259 ++ deprecated/doc/DocGF.pdf | Bin 0 -> 56906 bytes deprecated/doc/DocGF.tex | 569 ++++ deprecated/doc/German.png | Bin 0 -> 21000 bytes deprecated/doc/Grammar.dot | 75 + deprecated/doc/Grammar.png | Bin 0 -> 78790 bytes deprecated/doc/TODO | 231 ++ deprecated/doc/compiling-gf.txt | 750 +++++ deprecated/doc/eu-langs.dot | 79 + deprecated/doc/eu-langs.png | Bin 0 -> 85484 bytes deprecated/doc/food-translet.png | Bin 0 -> 22916 bytes deprecated/doc/food1.png | Bin 0 -> 22805 bytes deprecated/doc/food2.png | Bin 0 -> 31506 bytes deprecated/doc/gf-compiler.dot | 88 + deprecated/doc/gf-compiler.png | Bin 0 -> 27451 bytes deprecated/doc/gf-formalism.html | 350 +++ deprecated/doc/gf-formalism.txt | 279 ++ deprecated/doc/gf-ideas.html | 311 ++ deprecated/doc/gf-ideas.txt | 231 ++ deprecated/doc/gf-statistics.txt | 289 ++ deprecated/doc/gf-summerschool.txt | 533 ++++ deprecated/doc/gf3-release.html | 73 + deprecated/doc/gf3-release.txt | 58 + deprecated/doc/school-langs.dot | 106 + deprecated/doc/school-langs.png | Bin 0 -> 131704 bytes deprecated/doc/summer-align.png | Bin 0 -> 449911 bytes deprecated/doc/summer-langs.png | Bin 0 -> 1885485 bytes deprecated/doc/vr.html | 46 + deprecated/doc/vr.txt | 32 + doc/10lang-small.png | Bin 66840 -> 0 bytes doc/2341.html | 259 -- doc/DocGF.pdf | Bin 56906 -> 0 bytes doc/DocGF.tex | 569 ---- doc/German.png | Bin 21000 -> 0 bytes doc/Grammar.dot | 75 - doc/Grammar.png | Bin 78790 -> 0 bytes doc/Resource-HOWTO.html | 967 ------ doc/Resource-HOWTO.txt | 827 ----- doc/Syntax.png | Bin 104604 -> 0 bytes doc/TODO | 231 -- doc/categories.png | Bin 4241 -> 0 bytes doc/compiling-gf.txt | 750 ----- doc/eu-langs.dot | 79 - doc/eu-langs.png | Bin 85484 -> 0 bytes doc/food-js.png | Bin 19002 -> 0 bytes doc/food-magnet.png | Bin 98845 -> 0 bytes doc/food-translet.png | Bin 22916 -> 0 bytes doc/food1.png | Bin 22805 -> 0 bytes doc/food2.png | Bin 31506 -> 0 bytes doc/foodmarket.png | Bin 2099 -> 0 bytes doc/gf-compiler.dot | 88 - doc/gf-compiler.png | Bin 27451 -> 0 bytes doc/gf-formalism.html | 350 --- doc/gf-formalism.txt | 279 -- doc/gf-ideas.html | 311 -- doc/gf-ideas.txt | 231 -- doc/gf-people.html | 27 +- doc/gf-quickstart.html | 42 +- doc/gf-refman.html | 2 +- doc/gf-statistics.txt | 289 -- doc/gf-summerschool.txt | 533 ---- doc/gf-tutorial.html | 5857 ------------------------------------ doc/gf-tutorial.txt | 5022 ------------------------------- doc/gf3-release.html | 73 - doc/gf3-release.txt | 58 - doc/index.html | 155 +- doc/iphone.jpg | Bin 17150 -> 0 bytes doc/mytree.png | Bin 2230 -> 0 bytes doc/school-langs.dot | 106 - doc/school-langs.png | Bin 131704 -> 0 bytes doc/summer-align.png | Bin 449911 -> 0 bytes doc/summer-langs.png | Bin 1885485 -> 0 bytes doc/tutorial/10lang-small.png | Bin 0 -> 66840 bytes doc/tutorial/categories.png | Bin 0 -> 4241 bytes doc/tutorial/food-js.png | Bin 0 -> 19002 bytes doc/tutorial/food-magnet.png | Bin 0 -> 98845 bytes doc/tutorial/foodmarket.png | Bin 0 -> 2099 bytes doc/tutorial/gf-tutorial.html | 5442 +++++++++++++++++++++++++++++++++ doc/tutorial/gf-tutorial.txt | 5022 +++++++++++++++++++++++++++++++ doc/tutorial/iphone.jpg | Bin 0 -> 17150 bytes doc/tutorial/mytree.png | Bin 0 -> 2230 bytes doc/vr.html | 46 - doc/vr.txt | 32 - index.html | 8 +- 87 files changed, 16654 insertions(+), 17229 deletions(-) create mode 100644 deprecated/Resource-HOWTO.html create mode 100644 deprecated/Resource-HOWTO.txt create mode 100644 deprecated/Syntax.png create mode 100644 deprecated/doc/2341.html create mode 100644 deprecated/doc/DocGF.pdf create mode 100644 deprecated/doc/DocGF.tex create mode 100644 deprecated/doc/German.png create mode 100644 deprecated/doc/Grammar.dot create mode 100644 deprecated/doc/Grammar.png create mode 100644 deprecated/doc/TODO create mode 100644 deprecated/doc/compiling-gf.txt create mode 100644 deprecated/doc/eu-langs.dot create mode 100644 deprecated/doc/eu-langs.png create mode 100644 deprecated/doc/food-translet.png create mode 100644 deprecated/doc/food1.png create mode 100644 deprecated/doc/food2.png create mode 100644 deprecated/doc/gf-compiler.dot create mode 100644 deprecated/doc/gf-compiler.png create mode 100644 deprecated/doc/gf-formalism.html create mode 100644 deprecated/doc/gf-formalism.txt create mode 100644 deprecated/doc/gf-ideas.html create mode 100644 deprecated/doc/gf-ideas.txt create mode 100644 deprecated/doc/gf-statistics.txt create mode 100644 deprecated/doc/gf-summerschool.txt create mode 100644 deprecated/doc/gf3-release.html create mode 100644 deprecated/doc/gf3-release.txt create mode 100644 deprecated/doc/school-langs.dot create mode 100644 deprecated/doc/school-langs.png create mode 100644 deprecated/doc/summer-align.png create mode 100644 deprecated/doc/summer-langs.png create mode 100644 deprecated/doc/vr.html create mode 100644 deprecated/doc/vr.txt delete mode 100644 doc/10lang-small.png delete mode 100644 doc/2341.html delete mode 100644 doc/DocGF.pdf delete mode 100644 doc/DocGF.tex delete mode 100644 doc/German.png delete mode 100644 doc/Grammar.dot delete mode 100644 doc/Grammar.png delete mode 100644 doc/Resource-HOWTO.html delete mode 100644 doc/Resource-HOWTO.txt delete mode 100644 doc/Syntax.png delete mode 100644 doc/TODO delete mode 100644 doc/categories.png delete mode 100644 doc/compiling-gf.txt delete mode 100644 doc/eu-langs.dot delete mode 100644 doc/eu-langs.png delete mode 100644 doc/food-js.png delete mode 100644 doc/food-magnet.png delete mode 100644 doc/food-translet.png delete mode 100644 doc/food1.png delete mode 100644 doc/food2.png delete mode 100644 doc/foodmarket.png delete mode 100644 doc/gf-compiler.dot delete mode 100644 doc/gf-compiler.png delete mode 100644 doc/gf-formalism.html delete mode 100644 doc/gf-formalism.txt delete mode 100644 doc/gf-ideas.html delete mode 100644 doc/gf-ideas.txt delete mode 100644 doc/gf-statistics.txt delete mode 100644 doc/gf-summerschool.txt delete mode 100644 doc/gf-tutorial.html delete mode 100644 doc/gf-tutorial.txt delete mode 100644 doc/gf3-release.html delete mode 100644 doc/gf3-release.txt delete mode 100644 doc/iphone.jpg delete mode 100644 doc/mytree.png delete mode 100644 doc/school-langs.dot delete mode 100644 doc/school-langs.png delete mode 100644 doc/summer-align.png delete mode 100644 doc/summer-langs.png create mode 100644 doc/tutorial/10lang-small.png create mode 100644 doc/tutorial/categories.png create mode 100644 doc/tutorial/food-js.png create mode 100644 doc/tutorial/food-magnet.png create mode 100644 doc/tutorial/foodmarket.png create mode 100644 doc/tutorial/gf-tutorial.html create mode 100644 doc/tutorial/gf-tutorial.txt create mode 100644 doc/tutorial/iphone.jpg create mode 100644 doc/tutorial/mytree.png delete mode 100644 doc/vr.html delete mode 100644 doc/vr.txt diff --git a/deprecated/Resource-HOWTO.html b/deprecated/Resource-HOWTO.html new file mode 100644 index 000000000..ce2c15137 --- /dev/null +++ b/deprecated/Resource-HOWTO.html @@ -0,0 +1,967 @@ + + + + +Resource grammar writing HOWTO + +

Resource grammar writing HOWTO

+ +Author: Aarne Ranta <aarne (at) cs.chalmers.se>
+Last update: Mon Sep 22 14:28:01 2008 +
+ +

+
+

+ + +

+
+

+

+History +

+

+September 2008: updated for Version 1.5. +

+

+October 2007: updated for Version 1.2. +

+

+January 2006: first version. +

+

+The purpose of this document is to tell how to implement the GF +resource grammar API for a new language. We will not cover how +to use the resource grammar, nor how to change the API. But we +will give some hints how to extend the API. +

+

+A manual for using the resource grammar is found in +

+

+www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html. +

+

+A tutorial on GF, also introducing the idea of resource grammars, is found in +

+

+www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html. +

+

+This document concerns the API v. 1.5, while the current stable release is 1.4. +You can find the code for the stable release in +

+

+www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/ +

+

+and the next release in +

+

+www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/ +

+

+It is recommended to build new grammars to match the next release. +

+ +

The resource grammar structure

+

+The library is divided into a bunch of modules, whose dependencies +are given in the following figure. +

+

+ +

+

+Modules of different kinds are distinguished as follows: +

+ + +

+Put in another way: +

+ + +

+The dashed ellipses form the main parts of the implementation, on which the resource +grammar programmer has to work with. She also has to work on the Paradigms +module. The rest of the modules can be produced mechanically from corresponding +modules for other languages, by just changing the language codes appearing in +their module headers. +

+

+The module structure is rather flat: most modules are direct +parents of Grammar. The idea +is that the implementors can concentrate on one linguistic aspect at a time, or +also distribute the work among several authors. The module Cat +defines the "glue" that ties the aspects together - a type system +to which all the other modules conform, so that e.g. NP means +the same thing in those modules that use NPs and those that +constructs them. +

+ +

Library API modules

+

+For the user of the library, these modules are the most important ones. +In a typical application, it is enough to open Paradigms and Syntax. +The module Try combines these two, making it possible to experiment +with combinations of syntactic and lexical constructors by using the +cc command in the GF shell. Here are short explanations of each API module: +

+ + + +

Phrase category modules

+

+The immediate parents of Grammar will be called phrase category modules, +since each of them concentrates on a particular phrase category (nouns, verbs, +adjectives, sentences,...). A phrase category module tells +how to construct phrases in that category. You will find out that +all functions in any of these modules have the same value type (or maybe +one of a small number of different types). Thus we have +

+ + + +

Infrastructure modules

+

+Expressions of each phrase category are constructed in the corresponding +phrase category module. But their use takes mostly place in other modules. +For instance, noun phrases, which are constructed in Noun, are +used as arguments of functions of almost all other phrase category modules. +How can we build all these modules independently of each other? +

+

+As usual in typeful programming, the only thing you need to know +about an object you use is its type. When writing a linearization rule +for a GF abstract syntax function, the only thing you need to know is +the linearization types of its value and argument categories. To achieve +the division of the resource grammar to several parallel phrase category modules, +what we need is an underlying definition of the linearization types. This +definition is given as the implementation of +

+ + +

+Any resource grammar implementation has first to agree on how to implement +Cat. Luckily enough, even this can be done incrementally: you +can skip the lincat definition of a category and use the default +{s : Str} until you need to change it to something else. In +English, for instance, many categories do have this linearization type. +

+ +

Lexical modules

+

+What is lexical and what is syntactic is not as clearcut in GF as in +some other grammar formalisms. Logically, lexical means atom, i.e. a +fun with no arguments. Linguistically, one may add to this +that the lin consists of only one token (or of a table whose values +are single tokens). Even in the restricted lexicon included in the resource +API, the latter rule is sometimes violated in some languages. For instance, +Structural.both7and_DConj is an atom, but its linearization is +two words e.g. both - and. +

+

+Another characterization of lexical is that lexical units can be added +almost ad libitum, and they cannot be defined in terms of already +given rules. The lexical modules of the resource API are thus more like +samples than complete lists. There are two such modules: +

+ + +

+The module Structural aims for completeness, and is likely to +be extended in future releases of the resource. The module Lexicon +gives a "random" list of words, which enables testing the syntax. +It also provides a check list for morphology, since those words are likely to include +most morphological patterns of the language. +

+

+In the case of Lexicon it may come out clearer than anywhere else +in the API that it is impossible to give exact translation equivalents in +different languages on the level of a resource grammar. This is no problem, +since application grammars can use the resource in different ways for +different languages. +

+ +

Language-dependent syntax modules

+

+In addition to the common API, there is room for language-dependent extensions +of the resource. The top level of each languages looks as follows (with German +as example): +

+
+    abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
+
+

+where ExtraGerAbs is a collection of syntactic structures specific to German, +and IrregGerAbs is a dictionary of irregular words of German +(at the moment, just verbs). Each of these language-specific grammars has +the potential to grow into a full-scale grammar of the language. These grammar +can also be used as libraries, but the possibility of using functors is lost. +

+

+To give a better overview of language-specific structures, +modules like ExtraGerAbs +are built from a language-independent module ExtraAbs +by restricted inheritance: +

+
+    abstract ExtraGerAbs = Extra [f,g,...]
+
+

+Thus any category and function in Extra may be shared by a subset of all +languages. One can see this set-up as a matrix, which tells +what Extra structures +are implemented in what languages. For the common API in Grammar, the matrix +is filled with 1's (everything is implemented in every language). +

+

+In a minimal resource grammar implementation, the language-dependent +extensions are just empty modules, but it is good to provide them for +the sake of uniformity. +

+ +

The present-tense fragment

+

+Some lines in the resource library are suffixed with the comment +

+
+    --# notpresent
+
+

+which is used by a preprocessor to exclude those lines from +a reduced version of the full resource. This present-tense-only +version is useful for applications in most technical text, since +they reduce the grammar size and compilation time. It can also +be useful to exclude those lines in a first version of resource +implementation. To compile a grammar with present-tense-only, use +

+
+    make Present
+
+

+with resource/Makefile. +

+ +

Phases of the work

+ +

Putting up a directory

+

+Unless you are writing an instance of a parametrized implementation +(Romance or Scandinavian), which will be covered later, the +simplest way is to follow roughly the following procedure. Assume you +are building a grammar for the German language. Here are the first steps, +which we actually followed ourselves when building the German implementation +of resource v. 1.0 at Ubuntu linux. We have slightly modified them to +match resource v. 1.5 and GF v. 3.0. +

+
    +
  1. Create a sister directory for GF/lib/resource/english, named + german. +
    +         cd GF/lib/resource/
    +         mkdir german
    +         cd german
    +
    +

    +
  2. Check out the [ISO 639 3-letter language code + http://www.w3.org/WAI/ER/IG/ert/iso639.htm] + for German: both Ger and Deu are given, and we pick Ger. + (We use the 3-letter codes rather than the more common 2-letter codes, + since they will suffice for many more languages!) +

    +
  3. Copy the *Eng.gf files from english german, + and rename them: +
    +         cp ../english/*Eng.gf .
    +         rename 's/Eng/Ger/' *Eng.gf
    +
    + If you don't have the rename command, you can use a bash script with mv. +
+ +
    +
  1. Change the Eng module references to Ger references + in all files: +
    +         sed -i 's/English/German/g' *Ger.gf
    +         sed -i 's/Eng/Ger/g' *Ger.gf
    +
    + The first line prevents changing the word English, which appears + here and there in comments, to Gerlish. The sed command syntax + may vary depending on your operating system. +

    +
  2. This may of course change unwanted occurrences of the + string Eng - verify this by +
    +         grep Ger *.gf
    +
    + But you will have to make lots of manual changes in all files anyway! +

    +
  3. Comment out the contents of these files: +
    +         sed -i 's/^/--/' *Ger.gf
    +
    + This will give you a set of templates out of which the grammar + will grow as you uncomment and modify the files rule by rule. +

    +
  4. In all .gf files, uncomment the module headers and brackets, + leaving the module bodies commented. Unfortunately, there is no + simple way to do this automatically (or to avoid commenting these + lines in the previous step) - but uncommenting the first + and the last lines will actually do the job for many of the files. +

    +
  5. Uncomment the contents of the main grammar file: +
    +         sed -i 's/^--//' LangGer.gf
    +
    +

    +
  6. Now you can open the grammar LangGer in GF: +
    +         gf LangGer.gf
    +
    + You will get lots of warnings on missing rules, but the grammar will compile. +

    +
  7. At all the following steps you will now have a valid, but incomplete + GF grammar. The GF command +
    +         pg -missing
    +
    + tells you what exactly is missing. +
+ +

+Here is the module structure of LangGer. It has been simplified by leaving out +the majority of the phrase category modules. Each of them has the same dependencies +as VerbGer, whose complete dependencies are shown as an example. +

+

+ +

+ +

Direction of work

+

+The real work starts now. There are many ways to proceed, the most obvious ones being +

+ + +

+The practical working direction is thus a saw-like motion between the morphological +and top-level modules. Here is a possible course of the work that gives enough +test data and enough general view at any point: +

+
    +
  1. Define Cat.N and the required parameter types in ResGer. As we define +
    +    lincat N  = {s : Number => Case => Str ; g : Gender} ;
    +
    +we need the parameter types Number, Case, and Gender. The definition +of Number in common/ParamX +works for German, so we +use it and just define Case and Gender in ResGer. +

    +
  2. Define some cases of mkN in ParadigmsGer. In this way you can +already implement a huge amount of nouns correctly in LexiconGer. Actually +just adding the worst-case instance of mkN (the one taking the most +arguments) should suffice for every noun - but, +since it is tedious to use, you +might proceed to the next step before returning to morphology and defining the +real work horse, mkN taking two forms and a gender. +

    +
  3. While doing this, you may want to test the resource independently. Do this by + starting the GF shell in the resource directory, by the commands +
    +    > i -retain german/ParadigmsGer
    +    > cc -table mkN "Kirche"
    +
    +

    +
  4. Proceed to determiners and pronouns in +NounGer (DetCN UsePron DetQuant NumSg DefArt IndefArt UseN) and +StructuralGer (i_Pron this_Quant). You also need some categories and +parameter types. At this point, it is maybe not possible to find out the final +linearization types of CN, NP, Det, and Quant, but at least you should +be able to correctly inflect noun phrases such as every airplane: +
    +    > i german/LangGer.gf
    +    > l -table DetCN every_Det (UseN airplane_N)
    +  
    +    Nom: jeder Flugzeug
    +    Acc: jeden Flugzeug
    +    Dat: jedem Flugzeug
    +    Gen: jedes Flugzeugs
    +
    +

    +
  5. Proceed to verbs: define CatGer.V, ResGer.VForm, and +ParadigmsGer.mkV. You may choose to exclude notpresent +cases at this point. But anyway, you will be able to inflect a good +number of verbs in Lexicon, such as +live_V (mkV "leben"). +

    +
  6. Now you can soon form your first sentences: define VP and +Cl in CatGer, VerbGer.UseV, and SentenceGer.PredVP. +Even if you have excluded the tenses, you will be able to produce +
    +    > i -preproc=./mkPresent german/LangGer.gf
    +    > l -table PredVP (UsePron i_Pron) (UseV live_V)
    +  
    +    Pres Simul Pos Main: ich lebe
    +    Pres Simul Pos Inv:  lebe ich
    +    Pres Simul Pos Sub:  ich lebe
    +    Pres Simul Neg Main: ich lebe nicht
    +    Pres Simul Neg Inv:  lebe ich nicht
    +    Pres Simul Neg Sub:  ich nicht lebe
    +
    +You should also be able to parse: +
    +    > p -cat=Cl "ich lebe"
    +    PredVP (UsePron i_Pron) (UseV live_V)
    +
    +

    +
  7. Transitive verbs +(CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a) +are a natural next step, so that you can +produce ich liebe dich ("I love you"). +

    +
  8. Adjectives (CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA) +will force you to think about strong and weak declensions, so that you can +correctly inflect mein neuer Wagen, dieser neue Wagen +("my new car, this new car"). +

    +
  9. Once you have implemented the set +(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP), +you have overcome most of difficulties. You know roughly what parameters +and dependences there are in your language, and you can now proceed very +much in the order you please. +
+ + +

The develop-test cycle

+

+The following develop-test cycle will +be applied most of the time, both in the first steps described above +and in later steps where you are more on your own. +

+
    +
  1. Select a phrase category module, e.g. NounGer, and uncomment some + linearization rules (for instance, DetCN, as above). +

    +
  2. Write down some German examples of this rule, for instance translations + of "the dog", "the house", "the big house", etc. Write these in all their + different forms (two numbers and four cases). +

    +
  3. Think about the categories involved (CN, NP, N, Det) and the + variations they have. Encode this in the lincats of CatGer. + You may have to define some new parameter types in ResGer. +

    +
  4. To be able to test the construction, + define some words you need to instantiate it + in LexiconGer. You will also need some regular inflection patterns + inParadigmsGer. +

    +
  5. Test by parsing, linearization, + and random generation. In particular, linearization to a table should + be used so that you see all forms produced; the treebank option + preserves the tree +
    +      > gr -cat=NP -number=20 | l -table -treebank
    +
    +

    +
  6. Save some tree-linearization pairs for later regression testing. You can save + a gold standard treebank and use the Unix diff command to compare later + linearizations produced from the same list of trees. If you save the trees + in a file trees, you can do as follows: +
    +      > rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
    +
    +

    +
  7. A file with trees testing all resource functions is included in the resource, + entitled resource/exx-resource.gft. A treebank can be created from this by + the Unix command +
    +    % runghc Make.hs test langs=Ger
    +
    +
+ +

+You are likely to run this cycle a few times for each linearization rule +you implement, and some hundreds of times altogether. There are roughly +70 cats and +600 funs in Lang at the moment; 170 of the funs are outside the two +lexicon modules). +

+ +

Auxiliary modules

+

+These auxuliary resource modules will be written by you. +

+ + +

+These modules are language-independent and provided by the existing resource +package. +

+ + +

+An important decision is what rules to implement in terms of operations in +ResGer. The golden rule of functional programming says: +

+ + +

+This rule suggests that an operation should be created if it is to be +used at least twice. At the same time, a sound principle of vicinity says: +

+ + +

+From these two principles, we have derived the following practice: +

+ + +

+This discipline is very different from the one followed in early +versions of the library (up to 0.9). We then valued the principle of +abstraction more than vicinity, creating layers of abstraction for +almost everything. This led in practice to the duplication of almost +all code on the lin and oper levels, and made the code +hard to understand and maintain. +

+ +

Morphology and lexicon

+

+The paradigms needed to implement +LexiconGer are defined in +ParadigmsGer. +This module provides high-level ways to define the linearization of +lexical items, of categories N, A, V and their complement-taking +variants. +

+

+For ease of use, the Paradigms modules follow a certain +naming convention. Thus they for each lexical category, such as N, +the overloaded functions, such as mkN, with the following cases: +

+ + +

+For the complement-taking variants, such as V2, we provide +

+ + +

+The golden rule for the design of paradigms is that +

+ + +

+The discipline of data abstraction moreover requires that the user of the resource +is not given access to parameter constructors, but only to constants that denote +them. This gives the resource grammarian the freedom to change the underlying +data representation if needed. It means that the ParadigmsGer module has +to define constants for those parameter types and constructors that +the application grammarian may need to use, e.g. +

+
+    oper 
+      Case : Type ;
+      nominative, accusative, genitive, dative : Case ;
+
+

+These constants are defined in terms of parameter types and constructors +in ResGer and MorphoGer, which modules are not +visible to the application grammarian. +

+ +

Lock fields

+

+An important difference between MorphoGer and +ParadigmsGer is that the former uses "raw" record types +for word classes, whereas the latter used category symbols defined in +CatGer. When these category symbols are used to denote +record types in a resource modules, such as ParadigmsGer, +a lock field is added to the record, so that categories +with the same implementation are not confused with each other. +(This is inspired by the newtype discipline in Haskell.) +For instance, the lincats of adverbs and conjunctions are the same +in CommonX (and therefore in CatGer, which inherits it): +

+
+    lincat Adv  = {s : Str} ;
+    lincat Conj = {s : Str} ;
+
+

+But when these category symbols are used to denote their linearization +types in resource module, these definitions are translated to +

+
+    oper Adv  : Type = {s : Str  ; lock_Adv  : {}} ;
+    oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
+
+

+In this way, the user of a resource grammar cannot confuse adverbs with +conjunctions. In other words, the lock fields force the type checker +to function as grammaticality checker. +

+

+When the resource grammar is opened in an application grammar, the +lock fields are never seen (except possibly in type error messages), +and the application grammarian should never write them herself. If she +has to do this, it is a sign that the resource grammar is incomplete, and +the proper way to proceed is to fix the resource grammar. +

+

+The resource grammarian has to provide the dummy lock field values +in her hidden definitions of constants in Paradigms. For instance, +

+
+    mkAdv : Str -> Adv ;
+    -- mkAdv s = {s = s ; lock_Adv = <>} ;
+
+

+ +

Lexicon construction

+

+The lexicon belonging to LangGer consists of two modules: +

+ + +

+The reason why MorphoGer has to be used in StructuralGer +is that ParadigmsGer does not contain constructors for closed +word classes such as pronouns and determiners. The reason why we +recommend ParadigmsGer for building LexiconGer is that +the coverage of the paradigms gets thereby tested and that the +use of the paradigms in LexiconGer gives a good set of examples for +those who want to build new lexica. +

+ +

Lexicon extension

+ +

The irregularity lexicon

+

+It is useful in most languages to provide a separate module of irregular +verbs and other words which are difficult for a lexicographer +to handle. There are usually a limited number of such words - a +few hundred perhaps. Building such a lexicon separately also +makes it less important to cover everything by the +worst-case variants of the paradigms mkV etc. +

+ +

Lexicon extraction from a word list

+

+You can often find resources such as lists of +irregular verbs on the internet. For instance, the +Irregular German Verb page +previously found in +http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html +page gives a list of verbs in the +traditional tabular format, which begins as follows: +

+
+    backen (du bäckst, er bäckt)                   backte [buk]              gebacken
+    befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
+    beginnen                                       begann (begönne; begänne) begonnen
+    beißen                                         biß                       gebissen
+
+

+All you have to do is to write a suitable verb paradigm +

+
+    irregV : (x1,_,_,_,_,x6 : Str) -> V ;
+
+

+and a Perl or Python or Haskell script that transforms +the table to +

+
+    backen_V   = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
+    befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
+
+

+

+When using ready-made word lists, you should think about +coyright issues. All resource grammar material should +be provided under GNU Lesser General Public License (LGPL). +

+ +

Lexicon extraction from raw text data

+

+This is a cheap technique to build a lexicon of thousands +of words, if text data is available in digital format. +See the Extract Homepage +homepage for details. +

+ +

Bootstrapping with smart paradigms

+

+This is another cheap technique, where you need as input a list of words with +part-of-speech marking. You initialize the lexicon by using the one-argument +mkN etc paradigms, and add forms to those words that do not come out right. +This procedure is described in the paper +

+

+A. Ranta. +How predictable is Finnish morphology? An experiment on lexicon construction. +In J. Nivre, M. Dahllöf and B. Megyesi (eds), +Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein, +University of Uppsala, +2008. +Available from the series homepage +

+ +

Extending the resource grammar API

+

+Sooner or later it will happen that the resource grammar API +does not suffice for all applications. A common reason is +that it does not include idiomatic expressions in a given language. +The solution then is in the first place to build language-specific +extension modules, like ExtraGer. +

+ +

Using parametrized modules

+ +

Writing an instance of parametrized resource grammar implementation

+

+Above we have looked at how a resource implementation is built by +the copy and paste method (from English to German), that is, formally +speaking, from scratch. A more elegant solution available for +families of languages such as Romance and Scandinavian is to +use parametrized modules. The advantages are +

+ + +

+Here is a set of +slides +on the topic. +

+ +

Parametrizing a resource grammar implementation

+

+This is the most demanding form of resource grammar writing. +We do not recommend the method of parametrizing from the +beginning: it is easier to have one language first implemented +in the conventional way and then add another language of the +same family by aprametrization. This means that the copy and +paste method is still used, but at this time the differences +are put into an interface module. +

+ +

Character encoding and transliterations

+

+This section is relevant for languages using a non-ASCII character set. +

+ +

Coding conventions in GF

+

+From version 3.0, GF follows a simple encoding convention: +

+ + +

+Most current resource grammars use isolatin-1 in the source, but this does +not affect their use in parallel with grammars written in other encodings. +In fact, a grammar can be put up from modules using different codings. +

+

+Warning. While string literals may contain any characters, identifiers +must be isolatin-1 letters (or digits, underscores, or dashes). This has to +do with the restrictions of the lexer tool that is used. +

+ +

Transliterations

+

+While UTF-8 is well supported by most web browsers, its use in terminals and +text editors may cause disappointment. Many grammarians therefore prefer to +use ASCII transliterations. GF 3.0beta2 provides the following built-in +transliterations: +

+ + +

+New transliterations can be defined in the GF source file +GF/Text/Transliterations.hs. +This file also gives instructions on how new ones are added. +

+ + + + diff --git a/deprecated/Resource-HOWTO.txt b/deprecated/Resource-HOWTO.txt new file mode 100644 index 000000000..8e50974a7 --- /dev/null +++ b/deprecated/Resource-HOWTO.txt @@ -0,0 +1,827 @@ +Resource grammar writing HOWTO +Author: Aarne Ranta +Last update: %%date(%c) + +% NOTE: this is a txt2tags file. +% Create an html file from this file using: +% txt2tags --toc -thtml Resource-HOWTO.txt + +%!target:html + +**History** + +September 2008: updated for Version 1.5. + +October 2007: updated for Version 1.2. + +January 2006: first version. + + +The purpose of this document is to tell how to implement the GF +resource grammar API for a new language. We will //not// cover how +to use the resource grammar, nor how to change the API. But we +will give some hints how to extend the API. + +A manual for using the resource grammar is found in + +[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html`` ../lib/resource/doc/synopsis.html]. + +A tutorial on GF, also introducing the idea of resource grammars, is found in + +[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html`` ./gf-tutorial.html]. + +This document concerns the API v. 1.5, while the current stable release is 1.4. +You can find the code for the stable release in + +[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/`` ../lib/resource] + +and the next release in + +[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/`` ../next-lib/src] + +It is recommended to build new grammars to match the next release. + + + + +==The resource grammar structure== + +The library is divided into a bunch of modules, whose dependencies +are given in the following figure. + +[Syntax.png] + +Modules of different kinds are distinguished as follows: +- solid contours: module seen by end users +- dashed contours: internal module +- ellipse: abstract/concrete pair of modules +- rectangle: resource or instance +- diamond: interface + + +Put in another way: +- solid rectangles and diamonds: user-accessible library API +- solid ellipses: user-accessible top-level grammar for parsing and linearization +- dashed contours: not visible to users + + +The dashed ellipses form the main parts of the implementation, on which the resource +grammar programmer has to work with. She also has to work on the ``Paradigms`` +module. The rest of the modules can be produced mechanically from corresponding +modules for other languages, by just changing the language codes appearing in +their module headers. + +The module structure is rather flat: most modules are direct +parents of ``Grammar``. The idea +is that the implementors can concentrate on one linguistic aspect at a time, or +also distribute the work among several authors. The module ``Cat`` +defines the "glue" that ties the aspects together - a type system +to which all the other modules conform, so that e.g. ``NP`` means +the same thing in those modules that use ``NP``s and those that +constructs them. + + +===Library API modules=== + +For the user of the library, these modules are the most important ones. +In a typical application, it is enough to open ``Paradigms`` and ``Syntax``. +The module ``Try`` combines these two, making it possible to experiment +with combinations of syntactic and lexical constructors by using the +``cc`` command in the GF shell. Here are short explanations of each API module: +- ``Try``: the whole resource library for a language (``Paradigms``, ``Syntax``, + ``Irreg``, and ``Extra``); + produced mechanically as a collection of modules +- ``Syntax``: language-independent categories, syntax functions, and structural words; + produced mechanically as a collection of modules +- ``Constructors``: language-independent syntax functions and structural words; + produced mechanically via functor instantiation +- ``Paradigms``: language-dependent morphological paradigms + + + + + +===Phrase category modules=== + +The immediate parents of ``Grammar`` will be called **phrase category modules**, +since each of them concentrates on a particular phrase category (nouns, verbs, +adjectives, sentences,...). A phrase category module tells +//how to construct phrases in that category//. You will find out that +all functions in any of these modules have the same value type (or maybe +one of a small number of different types). Thus we have +- ``Noun``: construction of nouns and noun phrases +- ``Adjective``: construction of adjectival phrases +- ``Verb``: construction of verb phrases +- ``Adverb``: construction of adverbial phrases +- ``Numeral``: construction of cardinal and ordinal numerals +- ``Sentence``: construction of sentences and imperatives +- ``Question``: construction of questions +- ``Relative``: construction of relative clauses +- ``Conjunction``: coordination of phrases +- ``Phrase``: construction of the major units of text and speech +- ``Text``: construction of texts as sequences of phrases +- ``Idiom``: idiomatic expressions such as existentials + + + + +===Infrastructure modules=== + +Expressions of each phrase category are constructed in the corresponding +phrase category module. But their //use// takes mostly place in other modules. +For instance, noun phrases, which are constructed in ``Noun``, are +used as arguments of functions of almost all other phrase category modules. +How can we build all these modules independently of each other? + +As usual in typeful programming, the //only// thing you need to know +about an object you use is its type. When writing a linearization rule +for a GF abstract syntax function, the only thing you need to know is +the linearization types of its value and argument categories. To achieve +the division of the resource grammar to several parallel phrase category modules, +what we need is an underlying definition of the linearization types. This +definition is given as the implementation of +- ``Cat``: syntactic categories of the resource grammar + + +Any resource grammar implementation has first to agree on how to implement +``Cat``. Luckily enough, even this can be done incrementally: you +can skip the ``lincat`` definition of a category and use the default +``{s : Str}`` until you need to change it to something else. In +English, for instance, many categories do have this linearization type. + + + +===Lexical modules=== + +What is lexical and what is syntactic is not as clearcut in GF as in +some other grammar formalisms. Logically, lexical means atom, i.e. a +``fun`` with no arguments. Linguistically, one may add to this +that the ``lin`` consists of only one token (or of a table whose values +are single tokens). Even in the restricted lexicon included in the resource +API, the latter rule is sometimes violated in some languages. For instance, +``Structural.both7and_DConj`` is an atom, but its linearization is +two words e.g. //both - and//. + +Another characterization of lexical is that lexical units can be added +almost //ad libitum//, and they cannot be defined in terms of already +given rules. The lexical modules of the resource API are thus more like +samples than complete lists. There are two such modules: +- ``Structural``: structural words (determiners, conjunctions,...) +- ``Lexicon``: basic everyday content words (nouns, verbs,...) + + +The module ``Structural`` aims for completeness, and is likely to +be extended in future releases of the resource. The module ``Lexicon`` +gives a "random" list of words, which enables testing the syntax. +It also provides a check list for morphology, since those words are likely to include +most morphological patterns of the language. + +In the case of ``Lexicon`` it may come out clearer than anywhere else +in the API that it is impossible to give exact translation equivalents in +different languages on the level of a resource grammar. This is no problem, +since application grammars can use the resource in different ways for +different languages. + + +==Language-dependent syntax modules== + +In addition to the common API, there is room for language-dependent extensions +of the resource. The top level of each languages looks as follows (with German +as example): +``` + abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs +``` +where ``ExtraGerAbs`` is a collection of syntactic structures specific to German, +and ``IrregGerAbs`` is a dictionary of irregular words of German +(at the moment, just verbs). Each of these language-specific grammars has +the potential to grow into a full-scale grammar of the language. These grammar +can also be used as libraries, but the possibility of using functors is lost. + +To give a better overview of language-specific structures, +modules like ``ExtraGerAbs`` +are built from a language-independent module ``ExtraAbs`` +by restricted inheritance: +``` + abstract ExtraGerAbs = Extra [f,g,...] +``` +Thus any category and function in ``Extra`` may be shared by a subset of all +languages. One can see this set-up as a matrix, which tells +what ``Extra`` structures +are implemented in what languages. For the common API in ``Grammar``, the matrix +is filled with 1's (everything is implemented in every language). + +In a minimal resource grammar implementation, the language-dependent +extensions are just empty modules, but it is good to provide them for +the sake of uniformity. + + + +===The present-tense fragment=== + +Some lines in the resource library are suffixed with the comment +``` + --# notpresent +``` +which is used by a preprocessor to exclude those lines from +a reduced version of the full resource. This present-tense-only +version is useful for applications in most technical text, since +they reduce the grammar size and compilation time. It can also +be useful to exclude those lines in a first version of resource +implementation. To compile a grammar with present-tense-only, use +``` + make Present +``` +with ``resource/Makefile``. + + + +==Phases of the work== + +===Putting up a directory=== + +Unless you are writing an instance of a parametrized implementation +(Romance or Scandinavian), which will be covered later, the +simplest way is to follow roughly the following procedure. Assume you +are building a grammar for the German language. Here are the first steps, +which we actually followed ourselves when building the German implementation +of resource v. 1.0 at Ubuntu linux. We have slightly modified them to +match resource v. 1.5 and GF v. 3.0. + ++ Create a sister directory for ``GF/lib/resource/english``, named + ``german``. +``` + cd GF/lib/resource/ + mkdir german + cd german +``` + ++ Check out the [ISO 639 3-letter language code + http://www.w3.org/WAI/ER/IG/ert/iso639.htm] + for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``. + (We use the 3-letter codes rather than the more common 2-letter codes, + since they will suffice for many more languages!) + ++ Copy the ``*Eng.gf`` files from ``english`` ``german``, + and rename them: +``` + cp ../english/*Eng.gf . + rename 's/Eng/Ger/' *Eng.gf +``` + If you don't have the ``rename`` command, you can use a bash script with ``mv``. + + ++ Change the ``Eng`` module references to ``Ger`` references + in all files: +``` + sed -i 's/English/German/g' *Ger.gf + sed -i 's/Eng/Ger/g' *Ger.gf +``` + The first line prevents changing the word ``English``, which appears + here and there in comments, to ``Gerlish``. The ``sed`` command syntax + may vary depending on your operating system. + ++ This may of course change unwanted occurrences of the + string ``Eng`` - verify this by +``` + grep Ger *.gf +``` + But you will have to make lots of manual changes in all files anyway! + ++ Comment out the contents of these files: +``` + sed -i 's/^/--/' *Ger.gf +``` + This will give you a set of templates out of which the grammar + will grow as you uncomment and modify the files rule by rule. + ++ In all ``.gf`` files, uncomment the module headers and brackets, + leaving the module bodies commented. Unfortunately, there is no + simple way to do this automatically (or to avoid commenting these + lines in the previous step) - but uncommenting the first + and the last lines will actually do the job for many of the files. + ++ Uncomment the contents of the main grammar file: +``` + sed -i 's/^--//' LangGer.gf +``` + ++ Now you can open the grammar ``LangGer`` in GF: +``` + gf LangGer.gf +``` + You will get lots of warnings on missing rules, but the grammar will compile. + ++ At all the following steps you will now have a valid, but incomplete + GF grammar. The GF command +``` + pg -missing +``` + tells you what exactly is missing. + + +Here is the module structure of ``LangGer``. It has been simplified by leaving out +the majority of the phrase category modules. Each of them has the same dependencies +as ``VerbGer``, whose complete dependencies are shown as an example. + +[German.png] + + +===Direction of work=== + +The real work starts now. There are many ways to proceed, the most obvious ones being +- Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then + ``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time + building complete phrases, and add them with more content as you proceed. + **This approach is not recommended**. It is impossible to test the rules if + you have no words to apply the constructions to. + +- Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you + need to write ``ParadigmsGer``, which in turn needs parts of + ``MorphoGer`` and ``ResGer``. + **This approach is not recommended**. You can get stuck to details of + morphology such as irregular words, and you don't have enough grasp about + the type system to decide what forms to cover in morphology. + + +The practical working direction is thus a saw-like motion between the morphological +and top-level modules. Here is a possible course of the work that gives enough +test data and enough general view at any point: ++ Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define +``` + lincat N = {s : Number => Case => Str ; g : Gender} ; +``` +we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition +of ``Number`` in [``common/ParamX`` ../lib/resource/common/ParamX.gf] +works for German, so we +use it and just define ``Case`` and ``Gender`` in ``ResGer``. + ++ Define some cases of ``mkN`` in ``ParadigmsGer``. In this way you can +already implement a huge amount of nouns correctly in ``LexiconGer``. Actually +just adding the worst-case instance of ``mkN`` (the one taking the most +arguments) should suffice for every noun - but, +since it is tedious to use, you +might proceed to the next step before returning to morphology and defining the +real work horse, ``mkN`` taking two forms and a gender. + ++ While doing this, you may want to test the resource independently. Do this by + starting the GF shell in the ``resource`` directory, by the commands +``` + > i -retain german/ParadigmsGer + > cc -table mkN "Kirche" +``` + ++ Proceed to determiners and pronouns in +``NounGer`` (``DetCN UsePron DetQuant NumSg DefArt IndefArt UseN``) and +``StructuralGer`` (``i_Pron this_Quant``). You also need some categories and +parameter types. At this point, it is maybe not possible to find out the final +linearization types of ``CN``, ``NP``, ``Det``, and ``Quant``, but at least you should +be able to correctly inflect noun phrases such as //every airplane//: +``` + > i german/LangGer.gf + > l -table DetCN every_Det (UseN airplane_N) + + Nom: jeder Flugzeug + Acc: jeden Flugzeug + Dat: jedem Flugzeug + Gen: jedes Flugzeugs +``` + ++ Proceed to verbs: define ``CatGer.V``, ``ResGer.VForm``, and +``ParadigmsGer.mkV``. You may choose to exclude ``notpresent`` +cases at this point. But anyway, you will be able to inflect a good +number of verbs in ``Lexicon``, such as +``live_V`` (``mkV "leben"``). + ++ Now you can soon form your first sentences: define ``VP`` and +``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``. +Even if you have excluded the tenses, you will be able to produce +``` + > i -preproc=./mkPresent german/LangGer.gf + > l -table PredVP (UsePron i_Pron) (UseV live_V) + + Pres Simul Pos Main: ich lebe + Pres Simul Pos Inv: lebe ich + Pres Simul Pos Sub: ich lebe + Pres Simul Neg Main: ich lebe nicht + Pres Simul Neg Inv: lebe ich nicht + Pres Simul Neg Sub: ich nicht lebe +``` +You should also be able to parse: +``` + > p -cat=Cl "ich lebe" + PredVP (UsePron i_Pron) (UseV live_V) +``` + ++ Transitive verbs +(``CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a``) +are a natural next step, so that you can +produce ``ich liebe dich`` ("I love you"). + ++ Adjectives (``CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA``) +will force you to think about strong and weak declensions, so that you can +correctly inflect //mein neuer Wagen, dieser neue Wagen// +("my new car, this new car"). + ++ Once you have implemented the set +(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP), +you have overcome most of difficulties. You know roughly what parameters +and dependences there are in your language, and you can now proceed very +much in the order you please. + + + +===The develop-test cycle=== + +The following develop-test cycle will +be applied most of the time, both in the first steps described above +and in later steps where you are more on your own. + ++ Select a phrase category module, e.g. ``NounGer``, and uncomment some + linearization rules (for instance, ``DetCN``, as above). + ++ Write down some German examples of this rule, for instance translations + of "the dog", "the house", "the big house", etc. Write these in all their + different forms (two numbers and four cases). + ++ Think about the categories involved (``CN, NP, N, Det``) and the + variations they have. Encode this in the lincats of ``CatGer``. + You may have to define some new parameter types in ``ResGer``. + ++ To be able to test the construction, + define some words you need to instantiate it + in ``LexiconGer``. You will also need some regular inflection patterns + in``ParadigmsGer``. + ++ Test by parsing, linearization, + and random generation. In particular, linearization to a table should + be used so that you see all forms produced; the ``treebank`` option + preserves the tree +``` + > gr -cat=NP -number=20 | l -table -treebank +``` + ++ Save some tree-linearization pairs for later regression testing. You can save + a gold standard treebank and use the Unix ``diff`` command to compare later + linearizations produced from the same list of trees. If you save the trees + in a file ``trees``, you can do as follows: +``` + > rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank +``` + ++ A file with trees testing all resource functions is included in the resource, + entitled ``resource/exx-resource.gft``. A treebank can be created from this by + the Unix command +``` + % runghc Make.hs test langs=Ger +``` + + + +You are likely to run this cycle a few times for each linearization rule +you implement, and some hundreds of times altogether. There are roughly +70 ``cat``s and +600 ``funs`` in ``Lang`` at the moment; 170 of the ``funs`` are outside the two +lexicon modules). + + +===Auxiliary modules=== + +These auxuliary ``resource`` modules will be written by you. + +- ``ResGer``: parameter types and auxiliary operations +(a resource for the resource grammar!) +- ``ParadigmsGer``: complete inflection engine and most important regular paradigms +- ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need +not be separate from ``ResGer``. + + +These modules are language-independent and provided by the existing resource +package. + +- ``ParamX``: parameter types used in many languages +- ``CommonX``: implementation of language-uniform categories + such as $Text$ and $Phr$, as well as of + the logical tense, anteriority, and polarity parameters +- ``Coordination``: operations to deal with lists and coordination +- ``Prelude``: general-purpose operations on strings, records, + truth values, etc. +- ``Predef``: general-purpose operations with hard-coded definitions + + +An important decision is what rules to implement in terms of operations in +``ResGer``. The **golden rule of functional programming** says: +- //Whenever you find yourself programming by copy and paste, write a function instead!//. + + +This rule suggests that an operation should be created if it is to be +used at least twice. At the same time, a sound principle of **vicinity** says: +- //It should not require too much browsing to understand what a piece of code does.// + + +From these two principles, we have derived the following practice: +- If an operation is needed //in two different modules//, + it should be created in as an ``oper`` in ``ResGer``. An example is ``mkClause``, + used in ``Sentence``, ``Question``, and ``Relative``- +- If an operation is needed //twice in the same module//, but never + outside, it should be created in the same module. Many examples are + found in ``Numerals``. +- If an operation is needed //twice in the same judgement//, but never + outside, it should be created by a ``let`` definition. +- If an operation is only needed once, it should not be created as an ``oper``, + but rather inlined. However, a ``let`` definition may well be in place just + to make the readable. + Most functions in phrase category modules + are implemented in this way. + + +This discipline is very different from the one followed in early +versions of the library (up to 0.9). We then valued the principle of +abstraction more than vicinity, creating layers of abstraction for +almost everything. This led in practice to the duplication of almost +all code on the ``lin`` and ``oper`` levels, and made the code +hard to understand and maintain. + + + +===Morphology and lexicon=== + +The paradigms needed to implement +``LexiconGer`` are defined in +``ParadigmsGer``. +This module provides high-level ways to define the linearization of +lexical items, of categories ``N, A, V`` and their complement-taking +variants. + +For ease of use, the ``Paradigms`` modules follow a certain +naming convention. Thus they for each lexical category, such as ``N``, +the overloaded functions, such as ``mkN``, with the following cases: + +- the worst-case construction of ``N``. Its type signature + has the form +``` + mkN : Str -> ... -> Str -> P -> ... -> Q -> N +``` + with as many string and parameter arguments as can ever be needed to + construct an ``N``. +- the most regular cases, with just one string argument: +``` + mkN : Str -> N +``` +- A language-dependent (small) set of functions to handle mild irregularities + and common exceptions. + + +For the complement-taking variants, such as ``V2``, we provide +- a case that takes a ``V`` and all necessary arguments, such + as case and preposition: +``` + mkV2 : V -> Case -> Str -> V2 ; +``` +- a case that takes a ``Str`` and produces a transitive verb with the direct + object case: +``` + mkV2 : Str -> V2 ; +``` +- A language-dependent (small) set of functions to handle common special cases, + such as transitive verbs that are not regular: +``` + mkV2 : V -> V2 ; +``` + + +The golden rule for the design of paradigms is that +- //The user of the library will only need function applications with constants and strings, never any records or tables.// + + +The discipline of data abstraction moreover requires that the user of the resource +is not given access to parameter constructors, but only to constants that denote +them. This gives the resource grammarian the freedom to change the underlying +data representation if needed. It means that the ``ParadigmsGer`` module has +to define constants for those parameter types and constructors that +the application grammarian may need to use, e.g. +``` + oper + Case : Type ; + nominative, accusative, genitive, dative : Case ; +``` +These constants are defined in terms of parameter types and constructors +in ``ResGer`` and ``MorphoGer``, which modules are not +visible to the application grammarian. + + +===Lock fields=== + +An important difference between ``MorphoGer`` and +``ParadigmsGer`` is that the former uses "raw" record types +for word classes, whereas the latter used category symbols defined in +``CatGer``. When these category symbols are used to denote +record types in a resource modules, such as ``ParadigmsGer``, +a **lock field** is added to the record, so that categories +with the same implementation are not confused with each other. +(This is inspired by the ``newtype`` discipline in Haskell.) +For instance, the lincats of adverbs and conjunctions are the same +in ``CommonX`` (and therefore in ``CatGer``, which inherits it): +``` + lincat Adv = {s : Str} ; + lincat Conj = {s : Str} ; +``` +But when these category symbols are used to denote their linearization +types in resource module, these definitions are translated to +``` + oper Adv : Type = {s : Str ; lock_Adv : {}} ; + oper Conj : Type = {s : Str} ; lock_Conj : {}} ; +``` +In this way, the user of a resource grammar cannot confuse adverbs with +conjunctions. In other words, the lock fields force the type checker +to function as grammaticality checker. + +When the resource grammar is ``open``ed in an application grammar, the +lock fields are never seen (except possibly in type error messages), +and the application grammarian should never write them herself. If she +has to do this, it is a sign that the resource grammar is incomplete, and +the proper way to proceed is to fix the resource grammar. + +The resource grammarian has to provide the dummy lock field values +in her hidden definitions of constants in ``Paradigms``. For instance, +``` + mkAdv : Str -> Adv ; + -- mkAdv s = {s = s ; lock_Adv = <>} ; +``` + + +===Lexicon construction=== + +The lexicon belonging to ``LangGer`` consists of two modules: +- ``StructuralGer``, structural words, built by using both + ``ParadigmsGer`` and ``MorphoGer``. +- ``LexiconGer``, content words, built by using ``ParadigmsGer`` only. + + +The reason why ``MorphoGer`` has to be used in ``StructuralGer`` +is that ``ParadigmsGer`` does not contain constructors for closed +word classes such as pronouns and determiners. The reason why we +recommend ``ParadigmsGer`` for building ``LexiconGer`` is that +the coverage of the paradigms gets thereby tested and that the +use of the paradigms in ``LexiconGer`` gives a good set of examples for +those who want to build new lexica. + + + + + +==Lexicon extension== + +===The irregularity lexicon=== + +It is useful in most languages to provide a separate module of irregular +verbs and other words which are difficult for a lexicographer +to handle. There are usually a limited number of such words - a +few hundred perhaps. Building such a lexicon separately also +makes it less important to cover //everything// by the +worst-case variants of the paradigms ``mkV`` etc. + + + +===Lexicon extraction from a word list=== + +You can often find resources such as lists of +irregular verbs on the internet. For instance, the +Irregular German Verb page +previously found in +``http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html`` +page gives a list of verbs in the +traditional tabular format, which begins as follows: +``` + backen (du bäckst, er bäckt) backte [buk] gebacken + befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen + beginnen begann (begönne; begänne) begonnen + beißen biß gebissen +``` +All you have to do is to write a suitable verb paradigm +``` + irregV : (x1,_,_,_,_,x6 : Str) -> V ; +``` +and a Perl or Python or Haskell script that transforms +the table to +``` + backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ; + befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ; +``` + +When using ready-made word lists, you should think about +coyright issues. All resource grammar material should +be provided under GNU Lesser General Public License (LGPL). + + + +===Lexicon extraction from raw text data=== + +This is a cheap technique to build a lexicon of thousands +of words, if text data is available in digital format. +See the [Extract Homepage http://www.cs.chalmers.se/~markus/extract/] +homepage for details. + + +===Bootstrapping with smart paradigms=== + +This is another cheap technique, where you need as input a list of words with +part-of-speech marking. You initialize the lexicon by using the one-argument +``mkN`` etc paradigms, and add forms to those words that do not come out right. +This procedure is described in the paper + +A. Ranta. +How predictable is Finnish morphology? An experiment on lexicon construction. +In J. Nivre, M. Dahllöf and B. Megyesi (eds), +//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//, +University of Uppsala, +2008. +Available from the [series homepage http://publications.uu.se/abstract.xsql?dbid=8933] + + + + +==Extending the resource grammar API== + +Sooner or later it will happen that the resource grammar API +does not suffice for all applications. A common reason is +that it does not include idiomatic expressions in a given language. +The solution then is in the first place to build language-specific +extension modules, like ``ExtraGer``. + +==Using parametrized modules== + +===Writing an instance of parametrized resource grammar implementation=== + +Above we have looked at how a resource implementation is built by +the copy and paste method (from English to German), that is, formally +speaking, from scratch. A more elegant solution available for +families of languages such as Romance and Scandinavian is to +use parametrized modules. The advantages are +- theoretical: linguistic generalizations and insights +- practical: maintainability improves with fewer components + + +Here is a set of +[slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf] +on the topic. + + +===Parametrizing a resource grammar implementation=== + +This is the most demanding form of resource grammar writing. +We do //not// recommend the method of parametrizing from the +beginning: it is easier to have one language first implemented +in the conventional way and then add another language of the +same family by aprametrization. This means that the copy and +paste method is still used, but at this time the differences +are put into an ``interface`` module. + + +==Character encoding and transliterations== + +This section is relevant for languages using a non-ASCII character set. + +==Coding conventions in GF== + +From version 3.0, GF follows a simple encoding convention: +- GF source files may follow any encoding, such as isolatin-1 or UTF-8; + the default is isolatin-1, and UTF8 must be indicated by the judgement +``` + flags coding = utf8 ; +``` + in each source module. +- for internal processing, all characters are converted to 16-bit unicode, + as the first step of grammar compilation guided by the ``coding`` flag +- as the last step of compilation, all characters are converted to UTF-8 +- thus, GF object files (``gfo``) and the Portable Grammar Format (``pgf``) + are in UTF-8 + + +Most current resource grammars use isolatin-1 in the source, but this does +not affect their use in parallel with grammars written in other encodings. +In fact, a grammar can be put up from modules using different codings. + +**Warning**. While string literals may contain any characters, identifiers +must be isolatin-1 letters (or digits, underscores, or dashes). This has to +do with the restrictions of the lexer tool that is used. + + +==Transliterations== + +While UTF-8 is well supported by most web browsers, its use in terminals and +text editors may cause disappointment. Many grammarians therefore prefer to +use ASCII transliterations. GF 3.0beta2 provides the following built-in +transliterations: +- Arabic +- Devanagari (Hindi) +- Thai + + +New transliterations can be defined in the GF source file +[``GF/Text/Transliterations.hs`` ../src/GF/Text/Transliterations.hs]. +This file also gives instructions on how new ones are added. + + + + + diff --git a/deprecated/Syntax.png b/deprecated/Syntax.png new file mode 100644 index 000000000..f36c098f6 Binary files /dev/null and b/deprecated/Syntax.png differ diff --git a/deprecated/doc/2341.html b/deprecated/doc/2341.html new file mode 100644 index 000000000..ff3e9644d --- /dev/null +++ b/deprecated/doc/2341.html @@ -0,0 +1,259 @@ + + + +af_tunni : lámma kún síddi? boqól afartón i ków + +

+albanian : dy mijë tre qind e dyzet e një + +

+amharic : ሁለት ሺህ ሦስት መቶ ኣርባ ኣንድ + +

+arabic_classical : الفان و ثلاث مائة و واحد و أربعون + +

+arabic_modern : ﺍﻟﻔﻴﻦ ﻭ ﺛﻼﺛﻤﺎﺋﺔ ﻭ ﻭﺍﺣﺪ ﻭ ﺃﺭﺑﻌﻴﻦ + +

+basque : bi mila ta hirurehun berrogei ta bat + +

+bearlake_slave : nákee lamíl tai lak'o, óno, di,i, honéno, ?ó, l-ée + +

+bulgarian : две жиляди триста четирисет и едно + +

+catalan : dos mil tres-cents quaranta - u + +

+chinese : 贰 仟 零 叁 佰 肆 拾 壹 + +

+croatian : dva hiljade tri stotine četrdeset i jedan + +

+czech : dva tisíce tr^i sta čtyr^icet jeden + +

+dagur : hoire miange guarebe jau duci neke + +

+danish : to tusind og tre hundrede og en og fyrre + +

+decimal : 2341 + +

+dutch : twee duizend drie honderd een en veertig + +

+english : two thousand three hundred and forty - one + +

+finnish : kaksi tuhatta kolme sataa neljä kymmentä yksi + +

+french : deux mille trois cent quarante et un + +

+french_swiss : deux mille trois cent quarante et un + +

+fulfulde : ujine d.id.i temed.d.e tati e chappand.e nai e go'o + +

+geez : ዕሽራ ወ ሠላስቱ ምእት አርብዓ ወ አሐዱ + +

+german : zwei tausend drei hundert ein und vierzig + +

+greek_classical : δισχίλιοι τριακόσιοι τετταράκοντα εἵς + +

+greek_modern : δύο χιλιάδες τριακόσια σαράντα ένα + +

+guahibo : aniha sunu akueya sia yana bae kae + +

+guarani : moko~i ma mpohapy sa~ irundy kua~ petei~ + +

+hebrew_biblical : אלפים ו שלש מאות ו ארבעים ו אחד + +

+hindi : दो हज़ार तीन सौ एक्तालीस + +

+hungarian : két ezer három száz negyven egy + +

+icelandic : tvö Þúsund Þrjú hundrað fjörutíu og einn + +

+irish : dhá mhíle trí chead dhá fhichead a haon + +

+italian : due mila tre cento quaranta uno + +

+japanese : にせん さんびゃく よんぢゅう いち + +

+kabardian : m&yn&yt' s'a&ys' p'L-'&s'ra z&ra + +

+kambera : dua riu tailu ngahu patu kambulu hau + +

+kawaiisu : N +

+khmer : bīra bā'na pī raya sē sipa mwya + +

+khowar : joo hazâr troi shọr oché joo bîsher î + +

+kodagu : i:ra:yrat mu:nu:yt.a na:padï + +

+kolyma_yukaghir : N +

+kulung : ni habau su chhum lik i + +

+kwami : dùbúk póllów dálmágí kúnún kán kúu pòD^òw kán múndí + +

+kwaza : N +

+lalo : `n. t'w sa há i tjhí tjh`& + +

+lamani : di hajaar do se caaLise par ek + +

+latvian : divtu^kstoš trīssimt četrdesmit viens + +

+lithuanian : dù tú:kstanc^iu, try:s s^imtai~ ke:turiasdes^imt víenas + +

+lotuxo : tausand ârrexai ikO EssIxa xunixoi ikO atOmwana aNwan x' âbotye + +

+maale : lam?ó $íya haitsó s'ééta ?oydí-támmi pétte + +

+malay : dua ribu tiga ratus empat puluh satu + +

+maltese : elfejn tliet mija u wieh-ed u erbgh-in + +

+mapuche : epu warangka külá pataka meli mari kiñe + +

+margi : dúbú s`&d>àN ghàrú mák`&r agá fód>ú kùmì gà s'&r pátlú* + +

+maybrat : N +

+miya : d'&bu ts`&r '`&náa d>àriy kìdi '`&náa díb>i f`&d>& bèh&n wut'& + +

+mongolian : qoyar mingGan Gurban ĵa'un döčin nigän + +

+nenets : side juonar n-ahar jur t-êt ju' ~ob + +

+norwegian_book : to tusen og tre hundre og førti et + +

+old_church_slavonic : дъвѣ тысѭшти триѥ съта четыре десѧте и ѥдинъ + +

+oromo : kuma lama fi dhibba sadii fi afurtamii tokko + +

+pashto : دوه زره دري سوه او يو څلوۍښت + +

+polish : dwa tysiace trzysta czterdziesci jeden + +

+portuguese : dois mil trezentos quarenta e um + +

+quechua : iskay warank'a kinsa pachak tawa chunka jukniyuq + +

+romanian : două mii trei sute patruzeci şi unu + +

+russian : две тысячи триста сорок один + +

+sango : ngbangbu bale óse na ndó ní ngbangbu otá na ndó ní bale osió na ndó ní ÓkO + +

+sanskrit : त्रि शतान्य एकचत्वारिंशच च द्वे सहस्रे + +

+slovak : dva tisic tri sto styridsat jedna + +

+sorani : دۇ ههزار سىسهد ځل و يهك + +

+spanish : dos mil trescientos cuarenta y uno + +

+stieng : baar ban pê riêng puôn jo't muôi + +

+swahili : elfu mbili mia tatu arobaini na moja + +

+swedish : två tusen tre hundra fyrtio ett + +

+tamil : இரணௌடௌ ஆயாரதௌதீ மீனௌ ந஽ரீ ந஽ரௌ பதௌ ஓனௌரீ + +

+tampere : kaks tuhatta kolme sataa nel kyt yks + +

+tibetan : t̆ong ṭ'a' n̆yī d́ang sumğya d́ang z̆hyib chu źhye chi' + +

+totonac : maa t~u3 mil lii ~a tuhun pus^um tun + +

+tuda_daza : dubu cu sao kidra ago.zo. sao mOrta tozo sao tro + +

+tukang_besi : dua riwu tolu hatu hato hulu sa'asa + +

+turkish : iki bin üç yüz kırk bir + +

+votic : kahsi tuhatta keVmsata: nelläts^ümmet ühsi + +

+welsh : dau fil tri chan un a deugain + +

+yasin_burushaski : altó hazár iskí tha altó-áltar hek + +

+zaiwa : i55 hing55 sum11 syo31 mi11 cue31 ra11 + + + + diff --git a/deprecated/doc/DocGF.pdf b/deprecated/doc/DocGF.pdf new file mode 100644 index 000000000..27e4262db Binary files /dev/null and b/deprecated/doc/DocGF.pdf differ diff --git a/deprecated/doc/DocGF.tex b/deprecated/doc/DocGF.tex new file mode 100644 index 000000000..6388d3548 --- /dev/null +++ b/deprecated/doc/DocGF.tex @@ -0,0 +1,569 @@ +\batchmode +%This Latex file is machine-generated by the BNF-converter + +\documentclass[a4paper,11pt]{article} +\author{BNF-converter} +\title{The Language GF} +\setlength{\parindent}{0mm} +\setlength{\parskip}{1mm} +\begin{document} + +\maketitle + +\newcommand{\emptyP}{\mbox{$\epsilon$}} +\newcommand{\terminal}[1]{\mbox{{\texttt {#1}}}} +\newcommand{\nonterminal}[1]{\mbox{$\langle \mbox{{\sl #1 }} \! \rangle$}} +\newcommand{\arrow}{\mbox{::=}} +\newcommand{\delimit}{\mbox{$|$}} +\newcommand{\reserved}[1]{\mbox{{\texttt {#1}}}} +\newcommand{\literal}[1]{\mbox{{\texttt {#1}}}} +\newcommand{\symb}[1]{\mbox{{\texttt {#1}}}} + +This document was automatically generated by the {\em BNF-Converter}. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place). + +\section*{The lexical structure of GF} +\subsection*{Identifiers} +Identifiers \nonterminal{Ident} are unquoted strings beginning with a letter, +followed by any combination of letters, digits, and the characters {\tt \_ '}, +reserved words excluded. + + +\subsection*{Literals} +Integer literals \nonterminal{Int}\ are nonempty sequences of digits. + + +String literals \nonterminal{String}\ have the form +\terminal{"}$x$\terminal{"}, where $x$ is any sequence of any characters +except \terminal{"}\ unless preceded by \verb6\6. + + + + +LString literals are recognized by the regular expression +\(\mbox{`''} ({\nonterminal{anychar}} - \mbox{`''})* \mbox{`''}\) + + +\subsection*{Reserved words and symbols} +The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions. + +The reserved words used in GF are the following: \\ + +\begin{tabular}{lll} +{\reserved{Lin}} &{\reserved{PType}} &{\reserved{Str}} \\ +{\reserved{Strs}} &{\reserved{Tok}} &{\reserved{Type}} \\ +{\reserved{abstract}} &{\reserved{case}} &{\reserved{cat}} \\ +{\reserved{concrete}} &{\reserved{data}} &{\reserved{def}} \\ +{\reserved{flags}} &{\reserved{fn}} &{\reserved{fun}} \\ +{\reserved{grammar}} &{\reserved{in}} &{\reserved{include}} \\ +{\reserved{incomplete}} &{\reserved{instance}} &{\reserved{interface}} \\ +{\reserved{let}} &{\reserved{lin}} &{\reserved{lincat}} \\ +{\reserved{lindef}} &{\reserved{lintype}} &{\reserved{of}} \\ +{\reserved{open}} &{\reserved{oper}} &{\reserved{out}} \\ +{\reserved{package}} &{\reserved{param}} &{\reserved{pattern}} \\ +{\reserved{pre}} &{\reserved{printname}} &{\reserved{resource}} \\ +{\reserved{reuse}} &{\reserved{strs}} &{\reserved{table}} \\ +{\reserved{tokenizer}} &{\reserved{transfer}} &{\reserved{union}} \\ +{\reserved{var}} &{\reserved{variants}} &{\reserved{where}} \\ +{\reserved{with}} & & \\ +\end{tabular}\\ + +The symbols used in GF are the following: \\ + +\begin{tabular}{lll} +{\symb{;}} &{\symb{{$=$}}} &{\symb{\{}} \\ +{\symb{\}}} &{\symb{(}} &{\symb{)}} \\ +{\symb{:}} &{\symb{{$-$}{$>$}}} &{\symb{**}} \\ +{\symb{,}} &{\symb{[}} &{\symb{]}} \\ +{\symb{.}} &{\symb{{$|$}}} &{\symb{\%}} \\ +{\symb{?}} &{\symb{{$<$}}} &{\symb{{$>$}}} \\ +{\symb{@}} &{\symb{!}} &{\symb{*}} \\ +{\symb{$\backslash$}} &{\symb{{$=$}{$>$}}} &{\symb{{$+$}{$+$}}} \\ +{\symb{{$+$}}} &{\symb{\_}} &{\symb{\$}} \\ +{\symb{/}} &{\symb{{$-$}}} & \\ +\end{tabular}\\ + +\subsection*{Comments} +Single-line comments begin with {\symb{{$-$}{$-$}}}. \\Multiple-line comments are enclosed with {\symb{\{{$-$}}} and {\symb{{$-$}\}}}. + +\section*{The syntactic structure of GF} +Non-terminals are enclosed between $\langle$ and $\rangle$. +The symbols {\arrow} (production), {\delimit} (union) +and {\emptyP} (empty rule) belong to the BNF notation. +All other symbols are terminals.\\ + +\begin{tabular}{lll} +{\nonterminal{Grammar}} & {\arrow} &{\nonterminal{ListModDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListModDef}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{ModDef}} {\nonterminal{ListModDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ModDef}} & {\arrow} &{\nonterminal{ModDef}} {\terminal{;}} \\ + & {\delimit} &{\terminal{grammar}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\terminal{abstract}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{;}} {\nonterminal{ListConcSpec}} {\terminal{\}}} \\ + & {\delimit} &{\nonterminal{ComplMod}} {\nonterminal{ModType}} {\terminal{{$=$}}} {\nonterminal{ModBody}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ConcSpec}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ConcExp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListConcSpec}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{ConcSpec}} \\ + & {\delimit} &{\nonterminal{ConcSpec}} {\terminal{;}} {\nonterminal{ListConcSpec}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ConcExp}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListTransfer}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListTransfer}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Transfer}} {\nonterminal{ListTransfer}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Transfer}} & {\arrow} &{\terminal{(}} {\terminal{transfer}} {\terminal{in}} {\nonterminal{Open}} {\terminal{)}} \\ + & {\delimit} &{\terminal{(}} {\terminal{transfer}} {\terminal{out}} {\nonterminal{Open}} {\terminal{)}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ModType}} & {\arrow} &{\terminal{abstract}} {\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{resource}} {\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{interface}} {\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{concrete}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{instance}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{transfer}} {\nonterminal{Ident}} {\terminal{:}} {\nonterminal{Open}} {\terminal{{$-$}{$>$}}} {\nonterminal{Open}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ModBody}} & {\arrow} &{\nonterminal{Extend}} {\nonterminal{Opens}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\ + & {\delimit} &{\nonterminal{ListIdent}} {\terminal{**}} {\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\ + & {\delimit} &{\terminal{reuse}} {\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{union}} {\nonterminal{ListIncluded}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListTopDef}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{TopDef}} {\nonterminal{ListTopDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Extend}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{**}} \\ + & {\delimit} &{\emptyP} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListOpen}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Open}} \\ + & {\delimit} &{\nonterminal{Open}} {\terminal{,}} {\nonterminal{ListOpen}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Opens}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\terminal{open}} {\nonterminal{ListOpen}} {\terminal{in}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Open}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{)}} \\ + & {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{)}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ComplMod}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\terminal{incomplete}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{QualOpen}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\terminal{incomplete}} \\ + & {\delimit} &{\terminal{interface}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListIncluded}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Included}} \\ + & {\delimit} &{\nonterminal{Included}} {\terminal{,}} {\nonterminal{ListIncluded}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Included}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{[}} {\nonterminal{ListIdent}} {\terminal{]}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Def}} & {\arrow} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Name}} {\nonterminal{ListPatt}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{TopDef}} & {\arrow} &{\terminal{cat}} {\nonterminal{ListCatDef}} \\ + & {\delimit} &{\terminal{fun}} {\nonterminal{ListFunDef}} \\ + & {\delimit} &{\terminal{data}} {\nonterminal{ListFunDef}} \\ + & {\delimit} &{\terminal{def}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{data}} {\nonterminal{ListDataDef}} \\ + & {\delimit} &{\terminal{transfer}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{param}} {\nonterminal{ListParDef}} \\ + & {\delimit} &{\terminal{oper}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{lincat}} {\nonterminal{ListPrintDef}} \\ + & {\delimit} &{\terminal{lindef}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{lin}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{printname}} {\terminal{cat}} {\nonterminal{ListPrintDef}} \\ + & {\delimit} &{\terminal{printname}} {\terminal{fun}} {\nonterminal{ListPrintDef}} \\ + & {\delimit} &{\terminal{flags}} {\nonterminal{ListFlagDef}} \\ + & {\delimit} &{\terminal{printname}} {\nonterminal{ListPrintDef}} \\ + & {\delimit} &{\terminal{lintype}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{pattern}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{package}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} {\terminal{;}} \\ + & {\delimit} &{\terminal{var}} {\nonterminal{ListDef}} \\ + & {\delimit} &{\terminal{tokenizer}} {\nonterminal{Ident}} {\terminal{;}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{CatDef}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\ + & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} \\ + & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} {\terminal{\{}} {\nonterminal{Integer}} {\terminal{\}}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{FunDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{DataDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListDataConstr}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{DataConstr}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListDataConstr}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{DataConstr}} \\ + & {\delimit} &{\nonterminal{DataConstr}} {\terminal{{$|$}}} {\nonterminal{ListDataConstr}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ParDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListParConstr}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\ + & {\delimit} &{\nonterminal{Ident}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ParConstr}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{PrintDef}} & {\arrow} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{FlagDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListDef}} & {\arrow} &{\nonterminal{Def}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{Def}} {\terminal{;}} {\nonterminal{ListDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListCatDef}} & {\arrow} &{\nonterminal{CatDef}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{CatDef}} {\terminal{;}} {\nonterminal{ListCatDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListFunDef}} & {\arrow} &{\nonterminal{FunDef}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{FunDef}} {\terminal{;}} {\nonterminal{ListFunDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListDataDef}} & {\arrow} &{\nonterminal{DataDef}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{DataDef}} {\terminal{;}} {\nonterminal{ListDataDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListParDef}} & {\arrow} &{\nonterminal{ParDef}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{ParDef}} {\terminal{;}} {\nonterminal{ListParDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListPrintDef}} & {\arrow} &{\nonterminal{PrintDef}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{PrintDef}} {\terminal{;}} {\nonterminal{ListPrintDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListFlagDef}} & {\arrow} &{\nonterminal{FlagDef}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{FlagDef}} {\terminal{;}} {\nonterminal{ListFlagDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListParConstr}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{ParConstr}} \\ + & {\delimit} &{\nonterminal{ParConstr}} {\terminal{{$|$}}} {\nonterminal{ListParConstr}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListIdent}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{,}} {\nonterminal{ListIdent}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Name}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\terminal{]}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListName}} & {\arrow} &{\nonterminal{Name}} \\ + & {\delimit} &{\nonterminal{Name}} {\terminal{,}} {\nonterminal{ListName}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{LocDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListLocDef}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{LocDef}} \\ + & {\delimit} &{\nonterminal{LocDef}} {\terminal{;}} {\nonterminal{ListLocDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Exp4}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{\%}} \\ + & {\delimit} &{\nonterminal{Sort}} \\ + & {\delimit} &{\nonterminal{String}} \\ + & {\delimit} &{\nonterminal{Integer}} \\ + & {\delimit} &{\terminal{?}} \\ + & {\delimit} &{\terminal{[}} {\terminal{]}} \\ + & {\delimit} &{\terminal{data}} \\ + & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{Exps}} {\terminal{]}} \\ + & {\delimit} &{\terminal{[}} {\nonterminal{String}} {\terminal{]}} \\ + & {\delimit} &{\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListTupleComp}} {\terminal{{$>$}}} \\ + & {\delimit} &{\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\ + & {\delimit} &{\terminal{{$<$}}} {\nonterminal{Exp}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$>$}}} \\ + & {\delimit} &{\terminal{(}} {\nonterminal{Exp}} {\terminal{)}} \\ + & {\delimit} &{\nonterminal{LString}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Exp3}} & {\arrow} &{\nonterminal{Exp3}} {\terminal{.}} {\nonterminal{Label}} \\ + & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\%}} \\ + & {\delimit} &{\nonterminal{Exp4}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Exp2}} & {\arrow} &{\nonterminal{Exp2}} {\nonterminal{Exp3}} \\ + & {\delimit} &{\terminal{table}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{[}} {\nonterminal{ListExp}} {\terminal{]}} \\ + & {\delimit} &{\terminal{case}} {\nonterminal{Exp}} {\terminal{of}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{variants}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{pre}} {\terminal{\{}} {\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListAltern}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{strs}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{@}} {\nonterminal{Exp4}} \\ + & {\delimit} &{\nonterminal{Exp3}} \\ + & {\delimit} &{\terminal{Lin}} {\nonterminal{Ident}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Exp1}} & {\arrow} &{\nonterminal{Exp1}} {\terminal{!}} {\nonterminal{Exp2}} \\ + & {\delimit} &{\nonterminal{Exp1}} {\terminal{*}} {\nonterminal{Exp2}} \\ + & {\delimit} &{\nonterminal{Exp1}} {\terminal{**}} {\nonterminal{Exp2}} \\ + & {\delimit} &{\nonterminal{Exp2}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Exp}} & {\arrow} &{\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\terminal{$\backslash$}} {\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Decl}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}{$+$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}}} {\nonterminal{Exp}} \\ + & {\delimit} &{\terminal{let}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} {\terminal{in}} {\nonterminal{Exp}} \\ + & {\delimit} &{\terminal{let}} {\nonterminal{ListLocDef}} {\terminal{in}} {\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Exp1}} {\terminal{where}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{fn}} {\terminal{\{}} {\nonterminal{ListEquation}} {\terminal{\}}} \\ + & {\delimit} &{\nonterminal{Exp1}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListExp}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Exp}} \\ + & {\delimit} &{\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListExp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Exps}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Exp4}} {\nonterminal{Exps}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Patt1}} & {\arrow} &{\terminal{\_}} \\ + & {\delimit} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\ + & {\delimit} &{\nonterminal{Integer}} \\ + & {\delimit} &{\nonterminal{String}} \\ + & {\delimit} &{\terminal{\{}} {\nonterminal{ListPattAss}} {\terminal{\}}} \\ + & {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListPattTupleComp}} {\terminal{{$>$}}} \\ + & {\delimit} &{\terminal{(}} {\nonterminal{Patt}} {\terminal{)}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Patt}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListPatt}} \\ + & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\nonterminal{ListPatt}} \\ + & {\delimit} &{\nonterminal{Patt1}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{PattAss}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Patt}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Label}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{\$}} {\nonterminal{Integer}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Sort}} & {\arrow} &{\terminal{Type}} \\ + & {\delimit} &{\terminal{PType}} \\ + & {\delimit} &{\terminal{Tok}} \\ + & {\delimit} &{\terminal{Str}} \\ + & {\delimit} &{\terminal{Strs}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListPattAss}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{PattAss}} \\ + & {\delimit} &{\nonterminal{PattAss}} {\terminal{;}} {\nonterminal{ListPattAss}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{PattAlt}} & {\arrow} &{\nonterminal{Patt}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListPatt}} & {\arrow} &{\nonterminal{Patt1}} \\ + & {\delimit} &{\nonterminal{Patt1}} {\nonterminal{ListPatt}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListPattAlt}} & {\arrow} &{\nonterminal{PattAlt}} \\ + & {\delimit} &{\nonterminal{PattAlt}} {\terminal{{$|$}}} {\nonterminal{ListPattAlt}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Bind}} & {\arrow} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{\_}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListBind}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Bind}} \\ + & {\delimit} &{\nonterminal{Bind}} {\terminal{,}} {\nonterminal{ListBind}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Decl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\ + & {\delimit} &{\nonterminal{Exp2}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{TupleComp}} & {\arrow} &{\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{PattTupleComp}} & {\arrow} &{\nonterminal{Patt}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListTupleComp}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{TupleComp}} \\ + & {\delimit} &{\nonterminal{TupleComp}} {\terminal{,}} {\nonterminal{ListTupleComp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListPattTupleComp}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{PattTupleComp}} \\ + & {\delimit} &{\nonterminal{PattTupleComp}} {\terminal{,}} {\nonterminal{ListPattTupleComp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Case}} & {\arrow} &{\nonterminal{ListPattAlt}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListCase}} & {\arrow} &{\nonterminal{Case}} \\ + & {\delimit} &{\nonterminal{Case}} {\terminal{;}} {\nonterminal{ListCase}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Equation}} & {\arrow} &{\nonterminal{ListPatt}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListEquation}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Equation}} \\ + & {\delimit} &{\nonterminal{Equation}} {\terminal{;}} {\nonterminal{ListEquation}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Altern}} & {\arrow} &{\nonterminal{Exp}} {\terminal{/}} {\nonterminal{Exp}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListAltern}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{Altern}} \\ + & {\delimit} &{\nonterminal{Altern}} {\terminal{;}} {\nonterminal{ListAltern}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{DDecl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\ + & {\delimit} &{\nonterminal{Exp4}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListDDecl}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\nonterminal{DDecl}} {\nonterminal{ListDDecl}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{OldGrammar}} & {\arrow} &{\nonterminal{Include}} {\nonterminal{ListTopDef}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{Include}} & {\arrow} &{\emptyP} \\ + & {\delimit} &{\terminal{include}} {\nonterminal{ListFileName}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{FileName}} & {\arrow} &{\nonterminal{String}} \\ + & {\delimit} &{\nonterminal{Ident}} \\ + & {\delimit} &{\terminal{/}} {\nonterminal{FileName}} \\ + & {\delimit} &{\terminal{.}} {\nonterminal{FileName}} \\ + & {\delimit} &{\terminal{{$-$}}} {\nonterminal{FileName}} \\ + & {\delimit} &{\nonterminal{Ident}} {\nonterminal{FileName}} \\ +\end{tabular}\\ + +\begin{tabular}{lll} +{\nonterminal{ListFileName}} & {\arrow} &{\nonterminal{FileName}} {\terminal{;}} \\ + & {\delimit} &{\nonterminal{FileName}} {\terminal{;}} {\nonterminal{ListFileName}} \\ +\end{tabular}\\ + + + +\end{document} + diff --git a/deprecated/doc/German.png b/deprecated/doc/German.png new file mode 100644 index 000000000..7c6303897 Binary files /dev/null and b/deprecated/doc/German.png differ diff --git a/deprecated/doc/Grammar.dot b/deprecated/doc/Grammar.dot new file mode 100644 index 000000000..cb2998eb3 --- /dev/null +++ b/deprecated/doc/Grammar.dot @@ -0,0 +1,75 @@ +digraph { + +size = "12,8" ; + +Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"]; + +Lang -> Grammar [style = "solid"]; +Lang -> Lexicon [style = "solid"]; + +Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"]; + + +Grammar -> Noun [style = "solid"]; +Grammar -> Verb [style = "solid"]; +Grammar -> Adjective [style = "solid"]; +Grammar -> Adverb [style = "solid"]; +Grammar -> Numeral [style = "solid"]; +Grammar -> Sentence [style = "solid"]; +Grammar -> Question [style = "solid"]; +Grammar -> Relative [style = "solid"]; +Grammar -> Conjunction [style = "solid"]; +Grammar -> Phrase [style = "solid"]; +Grammar -> Text [style = "solid"]; +Grammar -> Idiom [style = "solid"]; +Grammar -> Structural [style = "solid"]; + + +Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"]; +Noun -> Cat [style = "solid"]; + +Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"]; +Verb -> Cat [style = "solid"]; + +Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"]; +Adjective -> Cat [style = "solid"]; + +Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"]; +Adverb -> Cat [style = "solid"]; + +Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"]; +Numeral -> Cat [style = "solid"]; + +Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"]; +Sentence -> Cat [style = "solid"]; + +Question [style = "solid", shape = "ellipse", URL = "Question.gf"]; +Question -> Cat [style = "solid"]; + +Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"]; +Relative -> Cat [style = "solid"]; + +Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"]; +Conjunction -> Cat [style = "solid"]; + +Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; +Phrase -> Cat [style = "solid"]; + +Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; +Text -> Cat [style = "solid"]; + +Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; +Idiom -> Cat [style = "solid"]; + +Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"]; +Structural -> Cat [style = "solid"]; + +Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"]; +Lexicon -> Cat [style = "solid"]; + +Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"]; +Cat -> Common [style = "solid"]; + +Common [style = "solid", shape = "ellipse", URL = "Tense.gf"]; + +} diff --git a/deprecated/doc/Grammar.png b/deprecated/doc/Grammar.png new file mode 100644 index 000000000..ada2847d7 Binary files /dev/null and b/deprecated/doc/Grammar.png differ diff --git a/deprecated/doc/TODO b/deprecated/doc/TODO new file mode 100644 index 000000000..c92f4c8fa --- /dev/null +++ b/deprecated/doc/TODO @@ -0,0 +1,231 @@ + +* Some notes on the syntax of this file, making it possible to use todoo-mode.el: + +- Items start with "* " +- Sub-items start with "- " +- It should be noted somewhere in the item, who has reported the item + Suggestion: Add "[who]" at the beginning of the item title + (then one can use "assign item" in todoo-mode) +- Each item should have a priority + Suggestion: Add "URGENT", "IMPORTANT" or "WISH" at the beginning of + the item title +- Sort the items in priority order + (todoo-mode can move an item up or down) + +---------------------------------------------------------------------- + + +* [peb] URGENT: Error messages for syntax errors + + When a syntax error is reported, it should be noted which file it + is. Otherwise it is impossible to know where the error is + (if one uses the -s flag): + + > i -s Domain/MP3/Domain_MP_Semantics.gf + syntax error at line 33 before ve , Proposition , + + There's no problem with other kinds of errors: + + > i -s Domain/MP3/Domain_MP_Semantics.gf + checking module Godis_Semantics + Happened in linearization of userMove : + product expected instead of { + pl : Str + } + + +* [peb] IMPORTANT: Add the -path of a module to daughter modules + + Then the main module does not have to know where all grandchildren are: + + file A.gf: + abstract A = B ** {...} + + file B.gf: + --# -path=./resource + abstract B = Lang ** {...} + + I.e.: the file A.gf should not need to know that B.gf uses the + resource library. + + +* [peb] IMPORTANT: incomplete concrete and interfaces + +- The following works in GF: + + incomplete concrete TestDI of TestA = open (C=TestCI) in { + lincat A = TestCI.A ** {p : Str}; + lin f = TestCI.f ** {p = "f"}; + g = TestCI.g ** {p = "g"}; + } + + > i -src TestDE.gf + +- BUT, if we exchange "TestCI" for "C" we get an error: + + incomplete concrete TestDI of TestA = open (C=TestCI) in { + lincat A = C.A ** {p : Str}; + lin f = C.f ** {p = "f"}; + g = C.g ** {p = "g"}; + } + + > i -src TestDE.gf + compiling TestDE.gf... failed to find C + OCCURRED IN + atomic term C given TestCE TestCI TestCE TestDE + OCCURRED IN + renaming definition of f + OCCURRED IN + renaming module TestDE + +- the other modules: + + abstract TestA = { + cat A; + fun f, g : A; + } + + instance TestBE of TestBI = { + oper hello = "hello"; + bye = "bye"; + } + + interface TestBI = { + oper hello : Str; + bye : Str; + } + + concrete TestCE of TestA = TestCI with (TestBI = TestBE); + + incomplete concrete TestCI of TestA = open TestBI in { + lincat A = {s : Str}; + lin f = {s = hello}; + g = {s = bye}; + } + + concrete TestDE of TestA = TestDI with (TestCI = TestCE); + +* [peb] IMPORTANT: Missing things in the help command + + > h -printer + (the flag -printer=cfgm is missing) + + > h -cat + WARNING: invalid option: cat + + > h -lang + WARNING: invalid option: lang + + > h -language + WARNING: invalid option: language + + > h -parser + WARNING: invalid option: parser + + > h -aslkdjaslkdjss + WARNING: invalid option: aslkdjaslkdjss + Command not found. + (it should note: "option not found") + + > h -optimize + WARNING: invalid option: optimize + + > h -startcat + WARNING: invalid option: startcat + + > h h + h, help: h Command? + (it should also mention "h -option") + + +* [peb] IMPORTANT: Set GF_LIb-PATH within GF + + > sf libpath=~/GF/lib + + +* [peb] IMPORTANT: Set the starting category with "sf" + + > sf startcat=X + + +* [peb] IMPORTANT: import-flags + +- There are some inconsistencies when importing grammars: + + 1. when doing "pg -printer=cfg", one must have used "i -conversion=finite", + since "pg" doesn't care about the flags that are set in the grammar file + + 2. when doing "pm -printer=cfgm", one must have set the flag + "conversion=finite" within the grammar file, since "pm" doesn't + care about the flags to the import command + + (I guess it's me (peb) who should fix this, but I don't know where + the different flags reside...) + +- Also, it must be decided in what cases flags can override other flags: + + a) in the grammar file, e.g. "flags conversion=finite;" + b) on the command line, e.g. "> sf conversion=finite" + c) as argument to a command, e.g. "> i -conversion=finite file.gf" + +- A related issue is to decide the scope of flags: + + Some flags are (or should be) local to the module + (e.g. -coding and -path) + Other flags override daughter flags for daughter modules + (e.g. -startcat and -conversion) + +* [bringert] IMPORTANT: get right startcat flag when printing CFGM + GF.CFGM.PrintCFGrammar.prCanonAsCFGM currently only gets the startcat + flag from the top-level concrete module. This might be easier + to fix if the multi grammar printers had access to more than just + the CanonGrammar. + +* [peb] WISH: generalizing incomplete concrete + + I want to be able to open an incomplete concrete module + inside another incomplete conrete. + Then I can instantiate both incompletes at the same time. + +* [peb] WISH: _tmpi, _tmpo + + The files _tmpi and _tmpo are never removed when quitting GF. + Further suggestion: put them in /tmp or similar. + + peb: nr man anvnder "|" till ett systemanrop, t.ex: + pg | ! sort + s skapas filerna _tmpi och _tmpo. Men de tas aldrig bort. + + peb: nnu bttre: ta bort filerna eftert. + + aarne: Sant: nr GF quittas (om detta inte sker onormalt). + Eller nr kommandot har krt frdigt (om det terminerar). + + peb: Bst(?): skapa filerna i /tmp eller liknande. + + aarne: Ibland fr man skrivrttighetsproblem - och det r + inte kul om man mste ange en tmp-path. Och olika + anvndare och gf-processer mste ha unika filnamn. + Och vet inte hur det funkar p windows... + + aarne: Ett till alternativ skulle vara att anvnda handles + utan ngra tmp-filer alls. Men jag har inte hunnit + ta reda p hur det gr till. + + bjrn: Lite slumpmssiga tankar: + + man kan anvnda System.Directory.getTemporaryDirectory, s slipper man iaf bry sig om olika plattformsproblem. + + sen kan man anvnda System.IO.openTempFile fr att skapa en temporr fil. Den tas dock inte bort nr programmet avslutas, s det fr man fixa sjlv. + + System.Posix.Temp.mkstemp gr nt liknande, men dokumentationen r dlig. + + biblioteket HsShellScript har lite funktioner fr snt hr, se + http://www.volker-wysk.de/hsshellscript/apidoc/HsShellScript.html#16 + + +* [peb] WISH: Hierarchic modules + + Suggestion by peb: + The module A.B.C is located in the file A/B/C.gf + + Main advantage: you no longer need to state "--# -path=..." in + modules + +- How can this be combined with several modules inside one file? diff --git a/deprecated/doc/compiling-gf.txt b/deprecated/doc/compiling-gf.txt new file mode 100644 index 000000000..9e438f40f --- /dev/null +++ b/deprecated/doc/compiling-gf.txt @@ -0,0 +1,750 @@ +Compiling GF +Aarne Ranta +Proglog meeting, 1 November 2006 + +% to compile: txt2tags -thtml compiling-gf.txt ; htmls compiling-gf.html + +%!target:html +%!postproc(html): #NEW + +#NEW + +==The compilation task== + +GF is a grammar formalism, i.e. a special purpose programming language +for writing grammars. + +Other grammar formalisms: +- BNF, YACC, Happy (grammars for programming languages); +- PATR, HPSG, LFG (grammars for natural languages). + + +The grammar compiler prepares a GF grammar for two computational tasks: +- linearization: take syntax trees to strings +- parsing: take strings to syntax trees + + +The grammar gives a declarative description of these functionalities, +on a high abstraction level that improves grammar writing +productivity. + +For efficiency, the grammar is compiled to lower-level formats. + +Type checking is another essential compilation phase. Its purpose is +twofold, as usual: +- checking the correctness of the grammar +- type-annotating expressions for code generation + + +#NEW + +==Characteristics of GF language== + +Functional language with types, both built-in and user-defined. +``` + Str : Type + + param Number = Sg | Pl + + param AdjForm = ASg Gender | APl + + Noun : Type = {s : Number => Str ; g : Gender} +``` +Pattern matching. +``` + svart_A = table { + ASg _ => "svart" ; + _ => "svarta" + } +``` +Higher-order functions. + +Dependent types. +``` + flip : (a, b, c : Type) -> (a -> b -> c) -> b -> a -> c = + \_,_,_,f,y,x -> f x y ; +``` + + +#NEW + +==The module system of GF== + +Main division: abstract syntax and concrete syntax +``` + abstract Greeting = { + cat Greet ; + fun Hello : Greet ; + } + + concrete GreetingEng of Greeting = { + lincat Greet = {s : Str} ; + lin Hello = {s = "hello"} ; + } + + concrete GreetingIta of Greeting = { + param Politeness = Familiar | Polite ; + lincat Greet = {s : Politeness => Str} ; + lin Hello = {s = table { + Familiar => "ciao" ; + Polite => "buongiorno" + } ; + } +``` +Other features of the module system: +- extension and opening +- parametrized modules (cf. ML: signatures, structures, functors) + + + + +#NEW + +==GF vs. Haskell== + +Some things that (standard) Haskell hasn't: +- records and record subtyping +- regular expression patterns +- dependent types +- ML-style modules + + +Some things that GF hasn't: +- infinite (recursive) data types +- recursive functions +- classes, polymorphism + + +#NEW + +==GF vs. most linguistic grammar formalisms== + +GF separates abstract syntax from concrete syntax. + +GF has a module system with separate compilation. + +GF is generation-oriented (as opposed to parsing). + +GF has unidirectional matching (as opposed to unification). + +GF has a static type system (as opposed to a type-free universe). + +"I was - and I still am - firmly convinced that a program composed +out of statically type-checked parts is more likely to faithfully +express a well-thought-out design than a program relying on +weakly-typed interfaces or dynamically-checked interfaces." +(B. Stroustrup, 1994, p. 107) + + + +#NEW + +==The computation model: abstract syntax== + +An abstract syntax defines a free algebra of trees (using +dependent types, recursion, higher-order abstract syntax: +GF includes a complete Logical Framework). +``` + cat C (x_1 : A_1)...(x_n : A_n) + a_1 : A_1 + ... + a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1} + ---------------------------------------------------- + (C a_1 ... a_n) : Type + + + fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A + a_1 : A_1 + ... + a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1} + ---------------------------------------------------- + (f a_1 ... a_n) : A{x_1 : A_1,...,x_n : A_n} + + + A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A + ---------------------------- ---------------------- ------------------------ + (x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A} +``` +Notice that all syntax trees are in eta-long form. + + +#NEW + +==The computation model: concrete syntax== + +A concrete syntax defines a homomorphism (compositional mapping) +from the abstract syntax to a system of concrete syntax objects. +``` + cat C _ + -------------------- + lincat C = C* : Type + + fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A + ----------------------------------------------- + lin f = f* : A_1* -> ... -> A_n* -> A* + + (f a_1 ... a_n)* = f* a_1* ... a_n* +``` +The homomorphism can as such be used as linearization function. + +It is a functional program, but a restricted one, since it works +in the end on finite data structures only. + +But a more efficient program is obtained via compilation to +GFC = Canonical GF: the "machine code" of GF. + +The parsing problem of GFC can be reduced to that of MPCFG (Multiple +Parallel Context Free Grammars), see P. Ljunglöf's thesis (2004). + + + +#NEW + +==The core type system of concrete syntax: basic types== + +``` + param P P : PType + PType : Type --------- --------- + P : PType P : Type + + s : Str t : Str + Str : type "foo" : Str [] : Str ---------------- + s ++ t : Str +``` + + +#NEW + +==The core type system of concrete syntax: functions and tables== + +``` + A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A + ---------------------------- ---------------------- ------------------------ + (x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A} + + + P : PType A : Type t : P => A p : p + -------------------- ----------------- + P => A : Type t ! p : A + + v_1,...,v_n : A + ---------------------------------------------- P = {C_1,...,C_n} + table {C_1 => v_1 ; ... ; C_n => v_n} : P => A +``` +Pattern matching is treated as an abbreviation for tables. Notice that +``` + case e of {...} == table {...} ! e +``` + + +#NEW + +==The core type system of concrete syntax: records== + +``` + A_1,...,A_n : Type + ------------------------------------ n >= 0 + {r_1 : A_1 ; ... ; r_n : A_n} : Type + + + a_1 : A_1 ... a_n : A_n + ------------------------------------------------------------ + {r_1 = a_1 ; ... ; r_n = a_n} : {r_1 : A_1 ; ... ; r_n : A_n} + + + r : {r_1 : A_1 ; ... ; r_n : A_n} + ----------------------------------- i = 1,...,n + r.r_1 : A_1 +``` +Subtyping: if ``r : R`` then ``r : R ** {r : A}`` + + + +#NEW + +==Computation rules== + +``` + (\x -> b) a = b{x := a} + + (table {C_1 => v_1 ; ... ; C_n => v_n} : P => A) ! C_i = v_i + + {r_1 = a_1 ; ... ; r_n = a_n}.r_i = a_i +``` + + + +#NEW + +==Canonical GF== + +Concrete syntax type system: +``` + A_1 : Type ... A_n : Type + Str : Type Int : Type ------------------------- $i : A + [A_1, ..., A_n] : Type + + + a_1 : A_1 ... a_n : A_n t : [A_1, ..., A_n] + --------------------------------- ------------------- i = 1,..,n + [a_1, ..., a_n] : [A_1, ..., A_n] t ! i : A_i +``` +Tuples represent both records and tables. + +There are no functions. + +Linearization: +``` + lin f = f* + + (f a_1 ... a_n)* = f*{$1 = a_1*, ..., $n = a_n*} +``` + + +#NEW + +==The compilation task, again== + +1. From a GF source grammar, derive a canonical GF grammar. + +2. From the canonical GF grammar derive an MPCFG grammar + +The canonical GF grammar can be used for linearization, with +linear time complexity (w.r.t. the size of the tree). + +The MPCFG grammar can be used for parsing, with (unbounded) +polynomial time complexity (w.r.t. the size of the string). + +For these target formats, we have also built interpreters in +different programming languages (C, C++, Haskell, Java, Prolog). + +Moreover, we generate supplementary formats such as grammars +required by various speech recognition systems. + + +#NEW + +==An overview of compilation phases== + +Legend: +- ellipse node: representation saved in a file +- plain text node: internal representation +- solid arrow or ellipse: essential phare or format +- dashed arrow or ellipse: optional phase or format +- arrow label: the module implementing the phase + + +[gf-compiler.png] + + +#NEW + +==Using the compiler== + +Batch mode (cf. GHC). + +Interactive mode, building the grammar incrementally from +different files, with the possibility of testing them +(cf. GHCI). + +The interactive mode was first, built on the model of ALF-2 +(L. Magnusson), and there was no file output of compiled +grammars. + + +#NEW + +==Modules and separate compilation== + +The above diagram shows what happens to each module. +(But not quite, since some of the back-end formats must be +built for sets of modules: GFCC and the parser formats.) + +When the grammar compiler is called, it has a main module as its +argument. It then builds recursively a dependency graph with all +the other modules, and decides which ones must be recompiled. +The behaviour is rather similar to GHC. + +Separate compilation is //extremely important// when developing +big grammars, especially when using grammar libraries. Example: compiling +the GF resource grammar library takes 5 minutes, whereas reading +in the compiled image takes 10 seconds. + + +#NEW + +==Module dependencies and recompilation== + +(For later use, not for the Proglog talk) + +For each module M, there are 3 kinds of files: +- M.gf, source file +- M.gfc, compiled file ("object file") +- M.gfr, type-checked and optimized source file (for resource modules only) + + +The compiler reads gf files and writes gfc files (and gfr files if appropriate) + +The Main module is the one used as argument when calling GF. + +A module M (immediately) depends on the module K, if either +- M is a concrete of K +- M is an instance of K +- M extends K +- M opens K +- M is a completion of K with something +- M is a completion of some module with K instantiated with something + + +A module M (transitively) depends on the module K, if either +- M immediately depends on K +- M depends on some L such that L immediately depends on K + + +Immediate dependence is readable from the module header without parsing +the whole module. + +The compiler reads recursively the headers of all modules that Main depends on. + +These modules are arranged in a dependency graph, which is checked to be acyclic. + +To decide whether a module M has to be compiled, do: ++ Get the time stamps t() of M.gf and M.gfc (if a file doesn't exist, its + time is minus infinity). ++ If t(M.gf) > t(M.gfc), M must be compiled. ++ If M depends on K and K must be compiled, then M must be compiled. ++ If M depends on K and t(K.gf) > t(M.gfc), then M must be compiled. + + +Decorate the dependency graph by information on whether the gf or the gfc (and gfr) +format is to be read. + +Topologically sort the decorated graph, and read each file in the chosen format. + +The gfr file is generated for these module types only: +- resource +- instance + + +When reading K.gfc, also K.gfr is read if some M depending on K has to be compiled. +In other cases, it is enough to read K.gfc. + +In an interactive GF session, some modules may be in memory already. +When read to the memory, each module M is given time stamp t(M.m). +The additional rule now is: +- If M.gfc is to be read, and t(M.m) > t(M.gfc), don't read M.gfc. + + + + +#NEW + +==Techniques used== + +The compiler is written in Haskell, with some C foreign function calls +in the interactive version (readline, killing threads). + +BNFC is used for generating both the parsers and printers. +This has helped to make the formats portable. + +"Almost compositional functions" (``composOp``) are used in +many compiler passes, making them easier to write and understand. +A ``grep`` on the sources reveals 40 uses (outside the definition +of ``composOp`` itself). + +The key algorithmic ideas are +- type-driven partial evaluation in GF-to-GFC generation +- common subexpression elimination as back-end optimization +- some ideas in GFC-to-MCFG encoding + + +#NEW + +==Type-driven partial evaluation== + +Each abstract syntax category in GF has a corresponding linearization type: +``` + cat C + lincat C = T +``` +The general form of a GF rule pair is +``` + fun f : C1 -> ... -> Cn -> C + lin f = t +``` +with the typing condition following the ``lincat`` definitions +``` + t : T1 -> ... -> Tn -> T +``` +The term ``t`` is in general built by using abstraction methods such +as pattern matching, higher-order functions, local definitions, +and library functions. + +The compilation technique proceeds as follows: +- use eta-expansion on ``t`` to determine the canonical form of the term +``` + \ $C1, ...., $Cn -> (t $C1 .... $Cn) +``` +with unique variables ``$C1 .... $Cn`` for the arguments; repeat this +inside the term for records and tables +- evaluate the resulting term using the computation rules of GF +- what remains is a canonical term with ``$C1 .... $Cn`` the only +variables (the run-time input of the linearization function) + + +#NEW + +==Eta-expanding records and tables== + +For records that are valied via subtyping, eta expansion +eliminates superfluous fields: +``` + {r1 = t1 ; r2 = t2} : {r1 : T1} ----> {r1 = t1} +``` +For tables, the effect is always expansion, since +pattern matching can be used to represent tables +compactly: +``` + table {n => "fish"} : Number => Str ---> + + table { + Sg => "fish" ; + Pl => "fish" + } +``` +This can be helped by back-end optimizations (see below). + + +#NEW + +==Eliminating functions== + +"Everything is finite": parameter types, records, tables; +finite number of string tokens per grammar. + +But "inifinite types" such as function types are useful when +writing grammars, to enable abstractions. + +Since function types do not appear in linearization types, +we want functions to be eliminated from linearization terms. + +This is similar to the **subformula property** in logic. +Also the main problem is similar: function depending on +a run-time variable, +``` + (table {P => f ; Q = g} ! x) a +``` +This is not a redex, but we can make it closer to one by moving +the application inside the table, +``` + table {P => f a ; Q = g a} ! x +``` +This transformation is the same as Prawitz's (1965) elimination +of maximal segments in natural deduction: +``` + A B + C -> D C C -> D C + A B --------- --------- + A v B C -> D C -> D A v B D D + --------------------- ===> ------------------------- + C -> D C D + -------------------- + D +``` + + + +#NEW + +==Size effects of partial evaluation== + +Irrelevant table branches are thrown away, which can reduce the size. + +But, since tables are expanded and auxiliary functions are inlined, +the size can grow exponentially. + +How can we keep the first property and eliminate the second? + + +#NEW + +==Parametrization of tables== + +Algorithm: for each branch in a table, consider replacing the +argument by a variable: +``` + table { table { + P => t ; ---> x => t[P->x] ; + Q => u x => u[Q->x] + } } +``` +If the resulting branches are all equal, you can replace the table +by a lambda abstract +``` + \\x => t[P->x] +``` +If each created variable ``x`` is unique in the grammar, computation +with the lambda abstract is efficient. + + + +#NEW + +==Course-of-values tables== + +By maintaining a canonical order of parameters in a type, we can +eliminate the left hand sides of branches. +``` + table { table T [ + P => t ; ---> t ; + Q => u u + } ] +``` +The treatment is similar to ``Enum`` instances in Haskell. + +In the end, all parameter types can be translated to +initial segments of integers. + + +#NEW + +==Common subexpression elimination== + +Algorithm: ++ Go through all terms and subterms in a module, creating + a symbol table mapping terms to the number of occurrences. ++ For each subterm appearing at least twice, create a fresh + constant defined as that subterm. ++ Go through all rules (incl. rules for the new constants), + replacing largest possible subterms with such new constants. + + +This algorithm, in a way, creates the strongest possible abstractions. + +In general, the new constants have open terms as definitions. +But since all variables (and constants) are unique, they can +be computed by simple replacement. + + + +#NEW + +==Size effects of optimizations== + +Example: the German resource grammar +``LangGer`` + +|| optimization | lines | characters | size % | blow-up | +| none | 5394 | 3208435 | 100 | 25 | +| all | 5394 | 750277 | 23 | 6 | +| none_subs | 5772 | 1290866 | 40 | 10 | +| all_subs | 5644 | 414119 | 13 | 3 | +| gfcc | 3279 | 190004 | 6 | 1.5 | +| gf source | 3976 | 121939 | 4 | 1 | + + +Optimization "all" means parametrization + course-of-values. + +The source code size is an estimate, since it includes +potentially irrelevant library modules, and comments. + +The GFCC format is not reusable in separate compilation. + + + +#NEW + +==The shared prefix optimization== + +This is currently performed in GFCC only. + +The idea works for languages that have a rich morphology +based on suffixes. Then we can replace a course of values +with a pair of a prefix and a suffix set: +``` + ["apa", "apan", "apor", "aporna"] ---> + ("ap" + ["a", "an", "or", "orna"]) +``` +The real gain comes via common subexpression elimination: +``` + _34 = ["a", "an", "or", "orna"] + apa = ("ap" + _34) + blomma = ("blomm" + _34) + flicka = ("flick" + _34) +``` +Notice that it now matters a lot how grammars are written. +For instance, if German verbs are treated as a one-dimensional +table, +``` + ["lieben", "liebe", "liebst", ...., "geliebt", "geliebter",...] +``` +no shared prefix optimization is possible. A better form is +separate tables for non-"ge" and "ge" forms: +``` + [["lieben", "liebe", "liebst", ....], ["geliebt", "geliebter",...]] +``` + + +#NEW + +==Reuse of grammars as libraries== + +The idea of resource grammars: take care of all aspects of +surface grammaticality (inflection, agreement, word order). + +Reuse in application grammar: via translations +``` + cat C ---> oper C : Type = T + lincat C = T + + fun f : A ---> oper f : A* = t + lin f = t +``` +The user only needs to know the type signatures (abstract syntax). + +However, this does not quite guarantee grammaticality, because +different categories can have the same lincat: +``` + lincat Conj = {s : Str} + lincat Adv = {s : Str} +``` +Thus someone may by accident use "and" as an adverb! + + +#NEW + +==Forcing the type checker to act as a grammar checker== + +We just have to make linearization types unique for each category. + +The technique is reminiscent of Haskell's ``newtype`` but uses +records instead: we add **lock fields** e.g. +``` + lincat Conj = {s : Str ; lock_Conj : {}} + lincat Adv = {s : Str ; lock_Adv : {}} +``` +Thanks to record subtyping, the translation is simple: +``` + fun f : C1 -> ... -> Cn -> C + lin f = t + + ---> + + oper f : C1* -> ... -> Cn* -> C* = + \x1,...,xn -> (t x1 ... xn) ** {lock_C = {}} +``` + +#NEW + +==Things to do== + +Better compression of gfc file format. + +Type checking of dependent-type pattern matching in abstract syntax. + +Compilation-related modules that need rewriting +- ``ReadFiles``: clarify the logic of dependencies +- ``Compile``: clarify the logic of what to do with each module +- ``Compute``: make the evaluation more efficient +- ``Parsing/*``, ``OldParsing/*``, ``Conversion/*``: reduce the number + of parser formats and algorithms diff --git a/deprecated/doc/eu-langs.dot b/deprecated/doc/eu-langs.dot new file mode 100644 index 000000000..115ce0040 --- /dev/null +++ b/deprecated/doc/eu-langs.dot @@ -0,0 +1,79 @@ +graph{ + +size = "7,7" ; + +overlap = scale ; + +"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ; + +"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ; +"1" -- "Abs" [style = "solid"]; + +"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ; +"2" -- "Abs" [style = "solid"]; + +"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ; +"3" -- "Abs" [style = "solid"]; + +"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ; +"4" -- "Abs" [style = "solid"]; + +"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ; +"5" -- "Abs" [style = "solid"]; + +"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ; +"6" -- "Abs" [style = "solid"]; + +"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ; +"7" -- "Abs" [style = "solid"]; + +"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ; +"8" -- "Abs" [style = "solid"]; + +"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ; +"9" -- "Abs" [style = "solid"]; + +"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ; +"10" -- "Abs" [style = "solid"]; + +"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ; +"11" -- "Abs" [style = "solid"]; + +"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "12" [style = "solid"]; + +"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "13" [style = "solid"]; + +"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "14" [style = "solid"]; + +"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "15" [style = "solid"]; + +"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "16" [style = "solid"]; + +"17" [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "17" [style = "solid"]; + +"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "18" [style = "solid"]; + +"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "19" [style = "solid"]; + +"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "20" [style = "solid"]; + +"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "21" [style = "solid"]; + +"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ; +"Abs" -- "22" [style = "solid"]; + +"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ; +"Abs" -- "23" [style = "solid"]; + + +} diff --git a/deprecated/doc/eu-langs.png b/deprecated/doc/eu-langs.png new file mode 100644 index 000000000..8c46a19db Binary files /dev/null and b/deprecated/doc/eu-langs.png differ diff --git a/deprecated/doc/food-translet.png b/deprecated/doc/food-translet.png new file mode 100644 index 000000000..dd622a4bf Binary files /dev/null and b/deprecated/doc/food-translet.png differ diff --git a/deprecated/doc/food1.png b/deprecated/doc/food1.png new file mode 100644 index 000000000..767069dab Binary files /dev/null and b/deprecated/doc/food1.png differ diff --git a/deprecated/doc/food2.png b/deprecated/doc/food2.png new file mode 100644 index 000000000..b36a01b22 Binary files /dev/null and b/deprecated/doc/food2.png differ diff --git a/deprecated/doc/gf-compiler.dot b/deprecated/doc/gf-compiler.dot new file mode 100644 index 000000000..f8ce1aaae --- /dev/null +++ b/deprecated/doc/gf-compiler.dot @@ -0,0 +1,88 @@ +digraph { + + gfe [label = "file.gfe", style = "dashed", shape = "ellipse"]; + gfe -> gf1 [label = " MkConcrete", style = "dashed"]; + +gf1 [label = "file.gf", style = "solid", shape = "ellipse"]; +gf1 -> gf2 [label = " LexGF", style = "solid"]; + +gf2 [label = "token list", style = "solid", shape = "plaintext"]; +gf2 -> gf3 [label = " ParGF", style = "solid"]; + +gf3 [label = "source tree", style = "solid", shape = "plaintext"]; +gf3 -> gf4 [label = " SourceToGrammar", style = "solid"]; + + cf [label = "file.cf", style = "dashed", shape = "ellipse"]; + cf -> gf4 [label = " CF.PPrCF", style = "dashed"]; + + ebnf [label = "file.ebnf", style = "dashed", shape = "ellipse"]; + ebnf -> gf4 [label = " CF.EBNF", style = "dashed"]; + + +gf4 [label = "GF tree", style = "solid", shape = "plaintext"]; +gf4 -> gf5 [label = " Extend", style = "solid"]; + +gf5 [label = "inheritance-linked GF tree", style = "solid", shape = "plaintext"]; +gf5 -> gf6 [label = " Rename", style = "solid"]; + +gf6 [label = "name-resolved GF tree", style = "solid", shape = "plaintext"]; +gf6 -> gf7 [label = " CheckGrammar", style = "solid"]; + +gf7 [label = "type-annotated GF tree", style = "solid", shape = "plaintext"]; +gf7 -> gf8 [label = " Optimize", style = "solid"]; + +gf8 [label = "optimized GF tree", style = "solid", shape = "plaintext"]; +gf8 -> gf9 [label = " GrammarToCanon", style = "solid"]; + +gf9 [label = "GFC tree", style = "solid", shape = "plaintext"]; +gf9 -> gfc [label = " BackOpt", style = "solid"]; + +gfc [label = "optimized GFC tree", style = "solid", shape = "box"]; +gfc -> gf11 [label = " PrintGFC", style = "solid"]; + +gf11 [label = "file.gfc", style = "solid", shape = "ellipse"]; + + + gfcc [label = "file.gfcc", style = "solid", shape = "ellipse"]; + gfc -> gfcc [label = " CanonToGFCC", style = "solid"]; + + mcfg [label = "file.gfcm", style = "dashed", shape = "ellipse"]; + gfc -> mcfg [label = " PrintGFC", style = "dashed"]; + + bnf [label = "file.cf", style = "dashed", shape = "ellipse"]; + gfc -> bnf [label = " CF.PrLBNF", style = "dashed"]; + + happy [label = "file.y (Happy)", style = "dashed", shape = "ellipse"]; + bnf -> happy [label = " bnfc", style = "dashed"]; + + bison [label = "file.y (Bison)", style = "dashed", shape = "ellipse"]; + bnf -> bison [label = " bnfc", style = "dashed"]; + + cup [label = "parser.java (CUP)", style = "dashed", shape = "ellipse"]; + bnf -> cup [label = " bnfc", style = "dashed"]; + + xml [label = "file.dtd (XML)", style = "dashed", shape = "ellipse"]; + bnf -> xml [label = " bnfc", style = "dashed"]; + + cfg [label = "CFG tree", style = "solid", shape = "plaintext"]; + gfc -> cfg [label = " Conversions.GFC", style = "dashed"]; + + cfgm [label = "file.cfgm", style = "dashed", shape = "ellipse"]; + cfg -> cfgm [label = " Conversions.GFC", style = "dashed"]; + + srg [label = "Non-LR CFG", style = "solid", shape = "plaintext"]; + cfg -> srg [label = " Speech.SRG", style = "dashed"]; + + gsl [label = "file.gsl", style = "dashed", shape = "ellipse"]; + srg -> gsl [label = " Speech.PrGSL", style = "dashed"]; + + jsgf [label = "file.jsgf", style = "dashed", shape = "ellipse"]; + srg -> jsgf [label = " Speech.PrJSGF", style = "dashed"]; + + fa [label = "DFA", style = "solid", shape = "plaintext"]; + cfg -> fa [label = " Speech.CFGToFiniteState", style = "dashed"]; + + slf [label = "file.slf", style = "dashed", shape = "ellipse"]; + fa -> slf [label = " Speech.PrSLF", style = "dashed"]; + +} diff --git a/deprecated/doc/gf-compiler.png b/deprecated/doc/gf-compiler.png new file mode 100644 index 000000000..6949c37b5 Binary files /dev/null and b/deprecated/doc/gf-compiler.png differ diff --git a/deprecated/doc/gf-formalism.html b/deprecated/doc/gf-formalism.html new file mode 100644 index 000000000..52d9256aa --- /dev/null +++ b/deprecated/doc/gf-formalism.html @@ -0,0 +1,350 @@ + + + + +A Birds-Eye View of GF as a Grammar Formalism + +

A Birds-Eye View of GF as a Grammar Formalism

+ +Author: Aarne Ranta
+Last update: Thu Feb 2 14:16:01 2006 +
+ +

+
+

+ + +

+
+

+

+ +

+

+Abstract. This document gives a general description of the +Grammatical Framework (GF), with comparisons to other grammar +formalisms such as CG, ACG, HPSG, and LFG. +

+

+ +

+ +

GF in a few words

+

+Grammatical Framework (GF) is a grammar formalism +based on constructive type theory. +

+

+GF makes a distinction between abstract syntax and concrete syntax. +

+

+The abstract syntax part of GF is a logical framework, with +dependent types and higher-order functions. +

+

+The concrete syntax is a system of records containing strings and features. +

+

+A GF grammar defines a reversible homomorphism from an abstract syntax to a +concrete syntax. +

+

+A multilingual GF grammar is a set of concrete syntaxes associated with +one abstract syntax. +

+

+GF grammars are written in a high-level functional programming language, +which is compiled into a core language (GFC). +

+

+GF grammars can be used as resources, i.e. as libraries for writing +new grammars; these are compiled and optimized by the method of +grammar composition. +

+

+GF has a module system that supports grammar engineering and separate +compilation. +

+

+ +

+ +

History of GF

+

+1988. Intuitionistic Categorial Grammar; type theory as abstract syntax, +playing the role of Montague's analysis trees. Grammars implemented in Prolog. +

+

+1994. Type-Theoretical Grammar. Abstract syntax organized as a system of +combinators. Grammars implemented in ALF. +

+

+1996. Multilingual Type-Theoretical Grammar. Rules for generating six +languages from the same abstract syntax. Grammars implemented in ALF, ML, and +Haskell. +

+

+1998. The first implementation of GF as a language of its own. +

+

+2000. New version of GF: high-level functional source language, records used +for concrete syntax. +

+

+2003. The module system. +

+

+2004. Ljunglöf's thesis Expressivity and Complexity of GF. +

+

+ +

+ +

Some key ingredients of GF in other grammar formalisms

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
/GFACGLFGHPSGCG
abstract vs concrete syntaxXX?--
type theoryXX--X
records and featuresX-XX-
+ +

+

+ +

+ +

Examples of descriptions in each formalism

+

+To be written... +

+

+ +

+ +

Lambda terms and records

+

+In CS, abstract syntax is trees and concrete syntax is strings. +This works more or less for programming languages. +

+

+In CG, all syntax is lambda terms. +

+

+In Montague grammar, abstract syntax is lambda terms and +concrete syntax is trees. Abstract syntax as lambda terms +can be considered well-established. +

+

+In PATR and HPSG, concrete syntax it records. This can be considered +well-established for natural languages. +

+

+In ACG, both are lambda terms. This is more general than GF, +but reversibility requires linearity restriction, which can be +unnatural for grammar writing. +

+

+In GF, linearization from lambda terms to records is reversible, +and grammar writing is not restricted to linear terms. +

+

+Grammar composition in ACG is just function composition. In GF, +it is more restricted... +

+

+ +

+ +

The structure of GF formalisms

+

+The following diagram (to be drawn properly!) describes the +levels. +

+
+         |   programming language design
+         V
+    GF source language
+         |
+         |   type-directed partial evaluation
+         V
+    GFC assembly language
+         |
+         |   Ljunglöf's translation
+         V
+    MCFG parser
+
+

+The last two phases are nontrivial mathematica properties. +

+

+In most grammar formalisms, grammarians have to work on the GFC +(or MCFG) level. +

+

+Maybe they use macros - they are therefore like macro assemblers. But there +are no separately compiled library modules, no type checking, etc. +

+

+ +

+ +

The expressivity of GF

+

+Parsing complexity is the same as MCFG: polynomial, with +unrestricted exponent depending on grammar. +This is between TAG and HPSG. +

+

+If semantic well-formedness (type theory) is taken into account, +then arbitrary logic can be expressed. The well-formedness of +abstract syntax is decidable, but the well-formedness of a +concrete-syntax string can require an arbitrary proof construction +and is therefore undecidable. +

+

+Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF +has the goal of assigning intended trees for strings. This is +generalized to shared trees for different languages. +

+

+The high-level language strives after the properties of +writability and readability (programming language notions). +

+

+ +

+ +

Grammars and parsing

+

+In many projects, a grammar is just seen as a declarative parsing program. +

+

+For GF, a grammar is primarily the definition of a language. +

+

+Detaching grammars from parsers is a good idea, giving +

+ + +

+Separating abstract from concrete syntax is a prerequisite for this: +we want parsers to return abstract syntax objects, and these must exist +independently of parse trees. +

+

+A possible radical approach to parsing: +use a grammar to generate a treebank and machine-learn +a statistical parser from this. +

+

+Comparison: Steedman in CCG has done something like this. +

+

+ +

+ +

Grammars as software libraries

+

+Reuse for different purposes. +

+

+Grammar composition. +

+

+ +

+ +

Multilinguality

+

+In application grammars, the AS is a semantic +model, and a CS covers domain terminology and idioms. +

+

+This can give publication-quality translation on +limited domains (e.g. the WebALT project). +

+

+Resource grammars with grammar composition lead to +compile-time transfer. +

+

+When is run-time transfer necessary? +

+

+Cf. CLE (Core Language Engine). +

+

+ +

+ +

Parametrized modules

+

+This notion comes from the ML language in the 1980's. +

+

+It can be used for sharing even more code between languages +than their AS. +

+

+Especially, for related languages (Scandinavian, Romance). +

+

+Cf. grammar porting in CLE: what they do with untyped +macro packages GF does with typable interfaces. +

+ + + + diff --git a/deprecated/doc/gf-formalism.txt b/deprecated/doc/gf-formalism.txt new file mode 100644 index 000000000..3b6963d11 --- /dev/null +++ b/deprecated/doc/gf-formalism.txt @@ -0,0 +1,279 @@ +A Birds-Eye View of GF as a Grammar Formalism +Author: Aarne Ranta +Last update: %%date(%c) + +% NOTE: this is a txt2tags file. +% Create an html file from this file using: +% txt2tags -thtml --toc gf-formalism.txt + +%!target:html + +%!postproc(html): #NEW + +[Logos/gf0.png] + +//Abstract. This document gives a general description of the// +//Grammatical Framework (GF), with comparisons to other grammar// +//formalisms such as CG, ACG, HPSG, and LFG.// + + +#NEW + +==Logical Frameworks and Grammar Formalisms== + +Logic - formalization of mathematics (mathematical language?) + +Linguistics - formalization of natural language + +Since math lang is a subset, we can expect similarities. + +But in natural language we have +- masses of empirical data +- no right of reform + + + +#NEW + +==High-level programming== + +We have to write a lot of program code when formalizing language. + +We need a language with proper abstractions. + +Cf. Paul Graham on Prolog: very high-level, but wrong abstractions. + +Typed functional languages work well in maths. + +We have developed one for linguistics +- some extra constructs, e.g. inflection tables +- constraint of reversibility (nontrivial math problem) + + +Writing a grammar of e.g. French clitics should not be a topic +on which one can write a paper - it should be easy to render in code +the known facts about languages! + + + +#NEW + +==GF in a few words== + +Grammatical Framework (GF) is a grammar formalism +based on **constructive type theory**. + +GF makes a distinction between **abstract syntax** and **concrete syntax**. + +The abstract syntax part of GF is a **logical framework**, with +dependent types and higher-order functions. + +The concrete syntax is a system of **records** containing strings and features. + +A GF grammar defines a **reversible homomorphism** from an abstract syntax to a +concrete syntax. + +A **multilingual GF grammar** is a set of concrete syntaxes associated with +one abstract syntax. + +GF grammars are written in a high-level **functional programming language**, +which is compiled into a **core language** (GFC). + +GF grammars can be used as **resources**, i.e. as libraries for writing +new grammars; these are compiled and optimized by the method of +**grammar composition**. + +GF has a **module system** that supports grammar engineering and separate +compilation. + + +#NEW + +==History of GF== + +1988. Intuitionistic Categorial Grammar; type theory as abstract syntax, +playing the role of Montague's analysis trees. Grammars implemented in Prolog. + +1994. Type-Theoretical Grammar. Abstract syntax organized as a system of +combinators. Grammars implemented in ALF. + +1996. Multilingual Type-Theoretical Grammar. Rules for generating six +languages from the same abstract syntax. Grammars implemented in ALF, ML, and +Haskell. + +1998. The first implementation of GF as a language of its own. + +2000. New version of GF: high-level functional source language, records used +for concrete syntax. + +2003. The module system. + +2004. Ljunglöf's thesis //Expressivity and Complexity of GF//. + + + +#NEW + +==Some key ingredients of GF in other grammar formalisms== + +- [GF ]: Grammatical Framework +- [CG ]: categorial grammar +- [ACG ]: abstract categorial grammar +- [HPSG ]: head-driven phrase structure grammar +- [LFG ]: lexical functional grammar + + +| / | GF | ACG | LFG | HPSG | CG | +| abstract vs concrete syntax | X | X | ? | - | - | +| type theory | X | X | - | - | X | +| records and features | X | - | X | X | - | + + +#NEW + +==Examples of descriptions in each formalism== + +To be written... + + +#NEW + +==Lambda terms and records== + +In CS, abstract syntax is trees and concrete syntax is strings. +This works more or less for programming languages. + +In CG, all syntax is lambda terms. + +In Montague grammar, abstract syntax is lambda terms and +concrete syntax is trees. Abstract syntax as lambda terms +can be considered well-established. + +In PATR and HPSG, concrete syntax it records. This can be considered +well-established for natural languages. + +In ACG, both are lambda terms. This is more general than GF, +but reversibility requires linearity restriction, which can be +unnatural for grammar writing. + +In GF, linearization from lambda terms to records is reversible, +and grammar writing is not restricted to linear terms. + +Grammar composition in ACG is just function composition. In GF, +it is more restricted... + + +#NEW + +==The structure of GF formalisms== + +The following diagram (to be drawn properly!) describes the +levels. +``` + | programming language design + V + GF source language + | + | type-directed partial evaluation + V + GFC assembly language + | + | Ljunglöf's translation + V + MCFG parser +``` +The last two phases are nontrivial mathematica properties. + +In most grammar formalisms, grammarians have to work on the GFC +(or MCFG) level. + +Maybe they use macros - they are therefore like macro assemblers. But there +are no separately compiled library modules, no type checking, etc. + + +#NEW + +==The expressivity of GF== + +Parsing complexity is the same as MCFG: polynomial, with +unrestricted exponent depending on grammar. +This is between TAG and HPSG. + +If semantic well-formedness (type theory) is taken into account, +then arbitrary logic can be expressed. The well-formedness of +abstract syntax is decidable, but the well-formedness of a +concrete-syntax string can require an arbitrary proof construction +and is therefore undecidable. + +Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF +has the goal of assigning intended trees for strings. This is +generalized to shared trees for different languages. + +The high-level language strives after the properties of +writability and readability (programming language notions). + + +#NEW + +==Grammars and parsing== + +In many projects, a grammar is just seen as a **declarative parsing program**. + +For GF, a grammar is primarily the **definition of a language**. + +Detaching grammars from parsers is a good idea, giving +- more efficient and robust parsing (statistical etc) +- cleaner grammars + + +Separating abstract from concrete syntax is a prerequisite for this: +we want parsers to return abstract syntax objects, and these must exist +independently of parse trees. + +A possible radical approach to parsing: +use a grammar to generate a treebank and machine-learn +a statistical parser from this. + +Comparison: Steedman in CCG has done something like this. + + +#NEW + +==Grammars as software libraries== + +Reuse for different purposes. + +Grammar composition. + + +#NEW + +==Multilinguality== + +In **application grammars**, the AS is a semantic +model, and a CS covers domain terminology and idioms. + +This can give publication-quality translation on +limited domains (e.g. the WebALT project). + +Resource grammars with grammar composition lead to +**compile-time transfer**. + +When is **run-time transfer** necessary? + +Cf. CLE (Core Language Engine). + + +#NEW + +==Parametrized modules== + +This notion comes from the ML language in the 1980's. + +It can be used for sharing even more code between languages +than their AS. + +Especially, for related languages (Scandinavian, Romance). + +Cf. grammar porting in CLE: what they do with untyped +macro packages GF does with typable interfaces. diff --git a/deprecated/doc/gf-ideas.html b/deprecated/doc/gf-ideas.html new file mode 100644 index 000000000..8119740fa --- /dev/null +++ b/deprecated/doc/gf-ideas.html @@ -0,0 +1,311 @@ + + + + + +GF Project Ideas + + +

+

+ +
+

+ +

+

GF Project Ideas

+ +Resource Grammars, Web Applications, etc
+contact: Aarne Ranta (aarne at chalmers dot se) +
+ +

+
+

+ + +

+
+

+ +

Resource Grammar Implementations

+

+GF Resource Grammar Library is an open-source computational grammar resource +that currently covers 12 languages. +The Library is a collaborative effort to which programmers from many countries +have contributed. The next goal is to extend the library +to all of the 23 official EU languages. Also other languages +are welcome all the time. The following diagram show the current status of the +library. Each of the red and yellow ones are a potential project. +

+

+

+ +
+

+

+red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu +

+

+The linguistic coverage of the library includes the inflectional morphology +and basic syntax of each language. It can be used in GF applications +and also ported to other formats. It can also be used for building other +linguistic resources, such as morphological lexica and parsers. +The library is licensed under LGPL. +

+ +

Tasks

+

+Writing a grammar for a language is usually easier if other languages +from the same family already have grammars. The colours have the same +meaning as in the diagram above; in addition, we use boldface for the +red, still unimplemented languages and italics for the +orange languages in progress. Thus, in particular, each of the languages +coloured red below are possible programming projects. +

+

+Baltic: +

+ + +

+Celtic: +

+ + +

+Fenno-Ugric: +

+ + +

+Germanic: +

+ + +

+Hellenic: +

+ + +

+Indo-Iranian: +

+ + +

+Romance: +

+ + +

+Semitic: +

+ + +

+Slavonic: +

+ + +

+Tai: +

+ + +

+Turkic: +

+ + + +

Who is qualified

+

+Writing a resource grammar implementation requires good general programming +skills, and a good explicit knowledge of the grammar of the target language. +A typical participant could be +

+ + +

+But it is the quality of the assignment that is assessed, not any formal +requirements. The "typical participant" was described to give an idea of +who is likely to succeed in this. +

+ +

The Summer School

+

+A Summer School on resource grammars and applications will +be organized at the campus of Chalmers University of Technology in Gothenburg, +Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in +a resource grammar project; the participants are assumed to learn GF before +the Summer School, but how far they have come in their projects may vary. +

+

+More information on the Summer School web page: +

+

+http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html +

+ +

Other project ideas

+ +

GF interpreter in Java

+

+The idea is to write a run-time system for GF grammars in Java. This enables +the use of embedded grammars in Java applications. This project is +a fresh-up of earlier work, +now using the new run-time format PGF and addressing a new parsing algorithm. +

+

+Requirements: Java, Haskell, basics of compilers and parsing algorithms. +

+ +

GF interpreter in C#

+

+The idea is to write a run-time system for GF grammars in C#. This enables +the use of embedded grammars in C# applications. This project is +similar to earlier work +on Java, now addressing C# and using the new run-time format PGF. +

+

+Requirements: C#, Haskell, basics of compilers and parsing algorithms. +

+ +

GF localization library

+

+This is an idea for a software localization library using GF grammars. +The library should replace strings by grammar rules, which can be conceived +as very smart templates always guaranteeing grammatically correct output. +The library should be based on the +GF Resource Grammar Library, providing infrastructure +currently for 12 languages. +

+

+Requirements: GF, some natural languages, some localization platform +

+ +

Multilingual grammar applications for mobile phones

+

+GF grammars can be compiled into programs that can be run on different +platforms, such as web browsers and mobile phones. An example is a +numeral translator running on both these platforms. +

+

+The proposed project is rather open: find some cool applications of +the technology that are useful or entertaining for mobile phone users. A +part of the project is to investigate implementation issues such as making +the best use of the phone's resources. Possible applications have +something to do with translation; one suggestion is an sms editor/translator. +

+

+Requirements: GF, JavaScript, some phone application development tools +

+ +

Multilingual grammar applications for the web

+

+This project is rather open: find some cool applications of +the technology that are useful or entertaining on the web. Examples include +

+ + +

+Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI +

+ +

GMail gadget for GF

+

+It is possible to add custom gadgets to GMail. If you are going to write +e-mail in a foreign language then you probably will need help from +dictonary or you may want to check something in the grammar. GF provides +all resources that you may need but you have to think about how to +design gadget that fits well in the GMail environment and what +functionality from GF you want to expose. +

+

+Requirements: GF, Google Web Toolkit +

+ +

Dissemination and intellectual property

+

+All code suggested here will be released under the LGPL just like +the current resource grammars and run-time GF libraries, +with the copyright held by respective authors. +

+

+As a rule, the code will be distributed via the GF web site. +

+ + + + diff --git a/deprecated/doc/gf-ideas.txt b/deprecated/doc/gf-ideas.txt new file mode 100644 index 000000000..3f62196b9 --- /dev/null +++ b/deprecated/doc/gf-ideas.txt @@ -0,0 +1,231 @@ +GF Project Ideas +Resource Grammars, Web Applications, etc +contact: Aarne Ranta (aarne at chalmers dot se) + +%!Encoding : iso-8859-1 + +%!target:html +%!postproc(html): #BECE
+%!postproc(html): #ENCE
+%!postproc(html): #GRAY +%!postproc(html): #EGRAY +%!postproc(html): #RED +%!postproc(html): #YELLOW +%!postproc(html): #ERED +%!postproc(html): #EYELLOW + +#BECE +[Logos/gf0.png] +#ENCE + + +==Resource Grammar Implementations== + +GF Resource Grammar Library is an open-source computational grammar resource +that currently covers 12 languages. +The Library is a collaborative effort to which programmers from many countries +have contributed. The next goal is to extend the library +to all of the 23 official EU languages. Also other languages +are welcome all the time. The following diagram show the current status of the +library. Each of the red and yellow ones are a potential project. + +#BECE +[school-langs.png] +#ENCE + + +//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu// + +The linguistic coverage of the library includes the inflectional morphology +and basic syntax of each language. It can be used in GF applications +and also ported to other formats. It can also be used for building other +linguistic resources, such as morphological lexica and parsers. +The library is licensed under LGPL. + + +===Tasks=== + +Writing a grammar for a language is usually easier if other languages +from the same family already have grammars. The colours have the same +meaning as in the diagram above; in addition, we use boldface for the +red, still unimplemented languages and italics for the +orange languages in progress. Thus, in particular, each of the languages +coloured red below are possible programming projects. + +Baltic: +- #RED Latvian #ERED +- #RED Lithuanian #ERED + + +Celtic: +- #RED Irish #ERED + + +Fenno-Ugric: +- #RED Estonian #ERED +- #GRAY Finnish #EGRAY +- #RED Hungarian #ERED + + +Germanic: +- #GRAY Danish #EGRAY +- #RED Dutch #ERED +- #GRAY English #EGRAY +- #GRAY German #EGRAY +- #GRAY Norwegian #EGRAY +- #GRAY Swedish #EGRAY + + +Hellenic: +- #RED Greek #ERED + + +Indo-Iranian: +- #YELLOW Hindi #EYELLOW +- #YELLOW Urdu #EYELLOW + + +Romance: +- #GRAY Catalan #EGRAY +- #GRAY French #EGRAY +- #GRAY Italian #EGRAY +- #RED Portuguese #ERED +- #YELLOW Romanian #EYELLOW +- #GRAY Spanish #EGRAY + + +Semitic: +- #YELLOW Arabic #EYELLOW +- #RED Maltese #ERED + + +Slavonic: +- #GRAY Bulgarian #EGRAY +- #RED Czech #ERED +- #YELLOW Polish #EYELLOW +- #GRAY Russian #EGRAY +- #RED Slovak #ERED +- #RED Slovenian #ERED + + +Tai: +- #YELLOW Thai #EYELLOW + + +Turkic: +- #YELLOW Turkish #EYELLOW + + +===Who is qualified=== + +Writing a resource grammar implementation requires good general programming +skills, and a good explicit knowledge of the grammar of the target language. +A typical participant could be +- native or fluent speaker of the target language +- interested in languages on the theoretical level, and preferably familiar + with many languages (to be able to think about them on an abstract level) +- familiar with functional programming languages such as ML or Haskell + (GF itself is a language similar to these) +- on Master's or PhD level in linguistics, computer science, or mathematics + + +But it is the quality of the assignment that is assessed, not any formal +requirements. The "typical participant" was described to give an idea of +who is likely to succeed in this. + + +===The Summer School=== + +A Summer School on resource grammars and applications will +be organized at the campus of Chalmers University of Technology in Gothenburg, +Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in +a resource grammar project; the participants are assumed to learn GF before +the Summer School, but how far they have come in their projects may vary. + +More information on the Summer School web page: + +[``http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html`` http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html] + + +==Other project ideas== + +===GF interpreter in Java=== + +The idea is to write a run-time system for GF grammars in Java. This enables +the use of **embedded grammars** in Java applications. This project is +a fresh-up of [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html], +now using the new run-time format PGF and addressing a new parsing algorithm. + +Requirements: Java, Haskell, basics of compilers and parsing algorithms. + + +===GF interpreter in C#=== + +The idea is to write a run-time system for GF grammars in C#. This enables +the use of **embedded grammars** in C# applications. This project is +similar to [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html] +on Java, now addressing C# and using the new run-time format PGF. + +Requirements: C#, Haskell, basics of compilers and parsing algorithms. + + +===GF localization library=== + +This is an idea for a software localization library using GF grammars. +The library should replace strings by grammar rules, which can be conceived +as very smart templates always guaranteeing grammatically correct output. +The library should be based on the +[GF Resource Grammar Library http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html], providing infrastructure +currently for 12 languages. + +Requirements: GF, some natural languages, some localization platform + + +===Multilingual grammar applications for mobile phones=== + +GF grammars can be compiled into programs that can be run on different +platforms, such as web browsers and mobile phones. An example is a +[numeral translator http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html] running on both these platforms. + +The proposed project is rather open: find some cool applications of +the technology that are useful or entertaining for mobile phone users. A +part of the project is to investigate implementation issues such as making +the best use of the phone's resources. Possible applications have +something to do with translation; one suggestion is an sms editor/translator. + +Requirements: GF, JavaScript, some phone application development tools + + +===Multilingual grammar applications for the web=== + +This project is rather open: find some cool applications of +the technology that are useful or entertaining on the web. Examples include +- translators: see [demo http://129.16.250.57:41296/translate] +- multilingual wikis: see [demo http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi] +- fridge magnets: see [demo http://129.16.250.57:41296/fridge] + + +Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI + + +===GMail gadget for GF=== + +It is possible to add custom gadgets to GMail. If you are going to write +e-mail in a foreign language then you probably will need help from +dictonary or you may want to check something in the grammar. GF provides +all resources that you may need but you have to think about how to +design gadget that fits well in the GMail environment and what +functionality from GF you want to expose. + +Requirements: GF, Google Web Toolkit + + + +==Dissemination and intellectual property== + +All code suggested here will be released under the LGPL just like +the current resource grammars and run-time GF libraries, +with the copyright held by respective authors. + +As a rule, the code will be distributed via the GF web site. + diff --git a/deprecated/doc/gf-statistics.txt b/deprecated/doc/gf-statistics.txt new file mode 100644 index 000000000..499ad7d09 --- /dev/null +++ b/deprecated/doc/gf-statistics.txt @@ -0,0 +1,289 @@ +(Adapted from KeY statistics by Vladimir Klebanov) + +This is GF right now: + +Total Physical Source Lines of Code (SLOC) = 42,467 + +Development Effort Estimate, Person-Years (Person-Months) = 10.24 (122.932) + (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05)) + +Schedule Estimate, Years (Months) = 1.30 (15.56) + (Basic COCOMO model, Months = 2.5 * (person-months**0.38)) + +Estimated Average Number of Developers (Effort/Schedule) = 7.90 + +Total Estimated Cost to Develop = $ 1,383,870 + (average salary = $56,286/year, overhead = 2.40). + +SLOCCount, Copyright (C) 2001-2004 David A. Wheeler + + + +----------- basis of counting: Haskell code + BNFC code - generated Happy parsers + +-- GF/src% wc -l *.hs GF/*.hs GF/*/*.hs GF/*/*/*.hs GF/*/*.cf JavaGUI/*.java +-- date Fri Jun 3 10:00:31 CEST 2005 + + 104 GF.hs + 402 GF/API.hs + 98 GF/GFModes.hs + 379 GF/Shell.hs + 4 GF/Today.hs + 43 GF/API/BatchTranslate.hs + 145 GF/API/GrammarToHaskell.hs + 77 GF/API/IOGrammar.hs + 25 GF/API/MyParser.hs + 177 GF/Canon/AbsGFC.hs + 37 GF/Canon/ByLine.hs + 192 GF/Canon/CanonToGrammar.hs + 293 GF/Canon/CMacros.hs + 79 GF/Canon/GetGFC.hs + 86 GF/Canon/GFC.hs + 291 GF/Canon/LexGFC.hs + 201 GF/Canon/Look.hs + 235 GF/Canon/MkGFC.hs + 46 GF/Canon/PrExp.hs + 352 GF/Canon/PrintGFC.hs + 147 GF/Canon/Share.hs + 207 GF/Canon/SkelGFC.hs + 46 GF/Canon/TestGFC.hs + 49 GF/Canon/Unlex.hs + 202 GF/CF/CanonToCF.hs + 213 GF/CF/CF.hs + 217 GF/CF/CFIdent.hs + 62 GF/CF/CFtoGrammar.hs + 47 GF/CF/CFtoSRG.hs + 206 GF/CF/ChartParser.hs + 191 GF/CF/EBNF.hs + 45 GF/CFGM/AbsCFG.hs + 312 GF/CFGM/LexCFG.hs + 157 GF/CFGM/PrintCFG.hs + 109 GF/CFGM/PrintCFGrammar.hs + 85 GF/CF/PPrCF.hs + 150 GF/CF/PrLBNF.hs + 106 GF/CF/Profile.hs + 141 GF/Compile/BackOpt.hs + 763 GF/Compile/CheckGrammar.hs + 337 GF/Compile/Compile.hs + 136 GF/Compile/Extend.hs + 124 GF/Compile/GetGrammar.hs + 282 GF/Compile/GrammarToCanon.hs + 93 GF/Compile/MkConcrete.hs + 128 GF/Compile/MkResource.hs + 83 GF/Compile/MkUnion.hs + 146 GF/Compile/ModDeps.hs + 294 GF/Compile/NewRename.hs + 227 GF/Compile/Optimize.hs + 76 GF/Compile/PGrammar.hs + 84 GF/Compile/PrOld.hs + 119 GF/Compile/Rebuild.hs + 63 GF/Compile/RemoveLiT.hs + 274 GF/Compile/Rename.hs + 535 GF/Compile/ShellState.hs + 135 GF/Compile/Update.hs + 129 GF/Conversion/GFC.hs + 149 GF/Conversion/GFCtoSimple.hs + 53 GF/Conversion/MCFGtoCFG.hs + 46 GF/Conversion/RemoveEpsilon.hs + 102 GF/Conversion/RemoveErasing.hs + 82 GF/Conversion/RemoveSingletons.hs + 137 GF/Conversion/SimpleToFinite.hs + 26 GF/Conversion/SimpleToMCFG.hs + 230 GF/Conversion/Types.hs + 143 GF/Data/Assoc.hs + 118 GF/Data/BacktrackM.hs + 20 GF/Data/ErrM.hs + 119 GF/Data/GeneralDeduction.hs + 30 GF/Data/Glue.hs + 67 GF/Data/IncrementalDeduction.hs + 61 GF/Data/Map.hs + 662 GF/Data/Operations.hs + 127 GF/Data/OrdMap2.hs + 120 GF/Data/OrdSet.hs + 193 GF/Data/Parsers.hs + 64 GF/Data/RedBlack.hs + 150 GF/Data/RedBlackSet.hs + 19 GF/Data/SharedString.hs + 127 GF/Data/SortedList.hs + 134 GF/Data/Str.hs + 120 GF/Data/Trie2.hs + 129 GF/Data/Trie.hs + 71 GF/Data/Utilities.hs + 243 GF/Data/Zipper.hs + 78 GF/Embed/EmbedAPI.hs + 113 GF/Embed/EmbedCustom.hs + 137 GF/Embed/EmbedParsing.hs + 50 GF/Formalism/CFG.hs + 51 GF/Formalism/GCFG.hs + 58 GF/Formalism/MCFG.hs + 246 GF/Formalism/SimpleGFC.hs + 349 GF/Formalism/Utilities.hs + 30 GF/Fudgets/ArchEdit.hs + 134 GF/Fudgets/CommandF.hs + 51 GF/Fudgets/EventF.hs + 59 GF/Fudgets/FudgetOps.hs + 37 GF/Fudgets/UnicodeF.hs + 86 GF/Grammar/AbsCompute.hs + 38 GF/Grammar/Abstract.hs + 149 GF/Grammar/AppPredefined.hs + 312 GF/Grammar/Compute.hs + 215 GF/Grammar/Grammar.hs + 46 GF/Grammar/Lockfield.hs + 189 GF/Grammar/LookAbs.hs + 182 GF/Grammar/Lookup.hs + 745 GF/Grammar/Macros.hs + 340 GF/Grammar/MMacros.hs + 115 GF/Grammar/PatternMatch.hs + 279 GF/Grammar/PrGrammar.hs + 121 GF/Grammar/Refresh.hs + 44 GF/Grammar/ReservedWords.hs + 251 GF/Grammar/TC.hs + 301 GF/Grammar/TypeCheck.hs + 96 GF/Grammar/Unify.hs + 101 GF/Grammar/Values.hs + 89 GF/Infra/CheckM.hs + 43 GF/Infra/Comments.hs + 152 GF/Infra/Ident.hs + 390 GF/Infra/Modules.hs + 358 GF/Infra/Option.hs + 179 GF/Infra/Print.hs + 331 GF/Infra/ReadFiles.hs + 337 GF/Infra/UseIO.hs + 153 GF/OldParsing/CFGrammar.hs + 283 GF/OldParsing/ConvertFiniteGFC.hs + 121 GF/OldParsing/ConvertFiniteSimple.hs + 34 GF/OldParsing/ConvertGFCtoMCFG.hs + 122 GF/OldParsing/ConvertGFCtoSimple.hs + 44 GF/OldParsing/ConvertGrammar.hs + 52 GF/OldParsing/ConvertMCFGtoCFG.hs + 30 GF/OldParsing/ConvertSimpleToMCFG.hs + 43 GF/OldParsing/GCFG.hs + 86 GF/OldParsing/GeneralChart.hs + 148 GF/OldParsing/GrammarTypes.hs + 50 GF/OldParsing/IncrementalChart.hs + 206 GF/OldParsing/MCFGrammar.hs + 43 GF/OldParsing/ParseCFG.hs + 82 GF/OldParsing/ParseCF.hs + 177 GF/OldParsing/ParseGFC.hs + 37 GF/OldParsing/ParseMCFG.hs + 161 GF/OldParsing/SimpleGFC.hs + 188 GF/OldParsing/Utilities.hs + 51 GF/Parsing/CFG.hs + 66 GF/Parsing/CF.hs + 151 GF/Parsing/GFC.hs + 64 GF/Parsing/MCFG.hs + 83 GF/Printing/PrintParser.hs + 127 GF/Printing/PrintSimplifiedTerm.hs + 190 GF/Shell/CommandL.hs + 556 GF/Shell/Commands.hs + 524 GF/Shell/HelpFile.hs + 79 GF/Shell/JGF.hs + 171 GF/Shell/PShell.hs + 221 GF/Shell/ShellCommands.hs + 66 GF/Shell/SubShell.hs + 87 GF/Shell/TeachYourself.hs + 296 GF/Source/AbsGF.hs + 229 GF/Source/GrammarToSource.hs + 312 GF/Source/LexGF.hs + 528 GF/Source/PrintGF.hs + 353 GF/Source/SkelGF.hs + 657 GF/Source/SourceToGrammar.hs + 58 GF/Source/TestGF.hs + 72 GF/Speech/PrGSL.hs + 65 GF/Speech/PrJSGF.hs + 128 GF/Speech/SRG.hs + 103 GF/Speech/TransformCFG.hs + 30 GF/System/ArchEdit.hs + 90 GF/System/Arch.hs + 27 GF/System/NoReadline.hs + 27 GF/System/Readline.hs + 73 GF/System/Tracing.hs + 25 GF/System/UseReadline.hs + 63 GF/Text/Arabic.hs + 97 GF/Text/Devanagari.hs + 72 GF/Text/Ethiopic.hs + 99 GF/Text/ExtendedArabic.hs + 37 GF/Text/ExtraDiacritics.hs + 172 GF/Text/Greek.hs + 53 GF/Text/Hebrew.hs + 95 GF/Text/Hiragana.hs + 69 GF/Text/LatinASupplement.hs + 47 GF/Text/OCSCyrillic.hs + 45 GF/Text/Russian.hs + 77 GF/Text/Tamil.hs + 125 GF/Text/Text.hs + 69 GF/Text/Unicode.hs + 47 GF/Text/UTF8.hs + 56 GF/Translate/GFT.hs + 427 GF/UseGrammar/Custom.hs + 435 GF/UseGrammar/Editing.hs + 180 GF/UseGrammar/Generate.hs + 71 GF/UseGrammar/GetTree.hs + 143 GF/UseGrammar/Information.hs + 228 GF/UseGrammar/Linear.hs + 130 GF/UseGrammar/Morphology.hs + 70 GF/UseGrammar/Paraphrases.hs + 157 GF/UseGrammar/Parsing.hs + 66 GF/UseGrammar/Randomized.hs + 170 GF/UseGrammar/Session.hs + 186 GF/UseGrammar/Tokenize.hs + 43 GF/UseGrammar/Transfer.hs + 122 GF/Visualization/NewVisualizationGrammar.hs + 123 GF/Visualization/VisualizeGrammar.hs + 63 GF/Conversion/SimpleToMCFG/Coercions.hs + 256 GF/Conversion/SimpleToMCFG/Nondet.hs + 129 GF/Conversion/SimpleToMCFG/Strict.hs + 71 GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs + 281 GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs + 277 GF/OldParsing/ConvertGFCtoMCFG/Old.hs + 189 GF/OldParsing/ConvertGFCtoMCFG/Strict.hs + 70 GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs + 245 GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs + 277 GF/OldParsing/ConvertSimpleToMCFG/Old.hs + 139 GF/OldParsing/ConvertSimpleToMCFG/Strict.hs + 83 GF/OldParsing/ParseCFG/General.hs + 142 GF/OldParsing/ParseCFG/Incremental.hs + 156 GF/OldParsing/ParseMCFG/Basic.hs + 103 GF/Parsing/CFG/General.hs + 150 GF/Parsing/CFG/Incremental.hs + 98 GF/Parsing/CFG/PInfo.hs + 226 GF/Parsing/MCFG/Active2.hs + 304 GF/Parsing/MCFG/Active.hs + 144 GF/Parsing/MCFG/Incremental2.hs + 163 GF/Parsing/MCFG/Incremental.hs + 128 GF/Parsing/MCFG/Naive.hs + 163 GF/Parsing/MCFG/PInfo.hs + 194 GF/Parsing/MCFG/Range.hs + 183 GF/Parsing/MCFG/ViaCFG.hs + 167 GF/Canon/GFC.cf + 36 GF/CFGM/CFG.cf + 321 GF/Source/GF.cf + 272 JavaGUI/DynamicTree2.java + 272 JavaGUI/DynamicTree.java + 2357 JavaGUI/GFEditor2.java + 1420 JavaGUI/GFEditor.java + 30 JavaGUI/GrammarFilter.java + 13 JavaGUI/LinPosition.java + 18 JavaGUI/MarkedArea.java + 1552 JavaGUI/Numerals.java + 22 JavaGUI/Utils.java + 5956 total + 48713 total + +- 2131 GF/Canon/ParGFC.hs + 3336 GF/Source/ParGF.hs + 779 GF/CFGM/ParCFG.hs + + 42467 total + +-------- + +sloccount sloc = + let + ksloc = sloc / 1000 + effort = 2.4 * (ksloc ** 1.05) + schedule = 2.5 * (effort ** 0.38) + develops = effort / schedule + cost = 56286 * (effort/12) * 2.4 + in + [sloc,ksloc,effort,effort/12,schedule,schedule/12,develops,cost] diff --git a/deprecated/doc/gf-summerschool.txt b/deprecated/doc/gf-summerschool.txt new file mode 100644 index 000000000..0acf9177d --- /dev/null +++ b/deprecated/doc/gf-summerschool.txt @@ -0,0 +1,533 @@ +GF Resource Grammar Summer School +Gothenburg, 17-28 August 2009 +Aarne Ranta (aarne at chalmers.se) + +%!Encoding : iso-8859-1 + +%!target:html +%!postproc(html): #BECE
+%!postproc(html): #ENCE
+%!postproc(html): #GRAY +%!postproc(html): #EGRAY +%!postproc(html): #RED +%!postproc(html): #YELLOW +%!postproc(html): #ERED + +#BECE +[school-langs.png] +#ENCE + + +//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu// + + +==News== + +An on-line course //GF for Resource Grammar Writers// will start on +Monday 20 April at 15.30 CEST. The slides and recordings of the five +45-minute lectures will be made available via this web page. If requested, +the course may be repeated in the beginning of the summer school. + + +==Executive summary== + +GF Resource Grammar Library is an open-source computational grammar resource +that currently covers 12 languages. +The Summer School is a part of a collaborative effort to extend the library +to all of the 23 official EU languages. Also other languages +chosen by the participants are welcome. + +The missing EU languages are: +Czech, Dutch, Estonian, Greek, Hungarian, Irish, Latvian, Lithuanian, +Maltese, Portuguese, Slovak, and Slovenian. There is also more work to +be done on Polish and Romanian. + +The linguistic coverage of the library includes the inflectional morphology +and basic syntax of each language. It can be used in GF applications +and also ported to other formats. It can also be used for building other +linguistic resources, such as morphological lexica and parsers. +The library is licensed under LGPL. + +In the summer school, each language will be implemented by one or two students +working together. A morphology implementation will be credited +as a Chalmers course worth 7.5 ETCS points; adding a syntax implementation +will be worth more. The estimated total work load is 1-2 months for the +morphology, and 3-6 months for the whole grammar. + +Participation in the course is free. Registration is done via the courses's +Google group, [``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]. The registration deadline is 15 June 2009. + +Some travel grants will be available. They are distributed on the basis of a +GF programming contest in April and May. + +The summer school will be held on 17-28 August 2009, at the campus of +Chalmers University of Technology in Gothenburg, Sweden. + + +[align6.png] + +//Word alignment produced by GF from the resource grammar in Bulgarian, English, Italian, German, Finnish, French, and Swedish.// + +==Introduction== + +Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this +document. There is a growing need of linguistic resources for these +languages, to help in tasks such as translation and information retrieval. +These resources should be **portable** and **freely accessible**. +Languages marked in red in the diagram are of particular interest for +the summer school, since they are those on which the effort will be concentrated. + +GF (Grammatical Framework, +[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf]) +is a **functional programming language** designed for writing natural +language grammars. It provides an efficient platform for this task, due to +its modern characteristics: +- It is a functional programming language, similar to Haskell and ML. +- It has a static type system and type checker. +- It has a powerful module system supporting separate compilation + and data abstraction. +- It has an optimizing compiler to **Portable Grammar Format** (PGF). +- PGF can be further compiled to other formats, such as JavaScript and + speech recognition language models. +- GF has a **resource grammar library** giving access to the morphology and + basic syntax of 12 languages. + + +In addition to "ordinary" grammars for single languages, GF +supports **multilingual grammars**. A multilingual GF grammar consists of an +**abstract syntax** and a set of **concrete syntaxes**. +An abstract syntax is system of **trees**, serving as a semantic +model or an ontology. A concrete syntax is a mapping from abstract syntax +trees to strings of a particular language. + +These mappings defined in concrete syntax are **reversible**: they +can be used both for **generating** strings from trees, and for +**parsing** strings into trees. Combinations of generation and +parsing can be used for **translation**, where the abstract +syntax works as an **interlingua**. Thus GF has been used as a +framework for building translation systems in several areas +of application and large sets of languages. + + + +==The GF resource grammar library== + +The GF resource grammar library is a set of grammars usable as libraries when +building translation systems and other applications. +The library currently covers +the 9 languages coloured in green in the diagram above; in addition, +Catalan, Norwegian, and Russian are covered, and there is ongoing work on +Arabic, Hindi/Urdu, Polish, Romanian, and Thai. + +The purpose of the resource grammar library is to define the "low-level" structure +of a language: inflection, word order, agreement. This structure belongs to what +linguists call morphology and syntax. It can be very complex and requires +a lot of knowledge. Yet, when translating from one language to +another, knowing morphology and syntax is but a part of what is needed. +The translator (whether human +or machine) must understand the meaning of what is translated, and must also know +the idiomatic way to express the meaning in the target language. This knowledge +can be very domain-dependent and requires in general an expert in the field to +reach high quality: a mathematician in the field of mathematics, a meteorologist +in the field of weather reports, etc. + +The problem is to find a person who is an expert in both the domain of translation +and in the low-level linguistic details. It is the rareness of this combination +that has made it difficult to build interlingua-based translation systems. +The GF resource grammar library has the mission of helping in this task. +It encapsulates the low-level linguistics in program modules +accessed through easy-to-use interfaces. +Experts on different domains can build translation systems by using the library, +without knowing low-level linguistics. The idea is much the same as when a +programmer builds a graphical user interface (GUI) from high-level elements such as +buttons and menus, without having to care about pixels or geometrical forms. + + +===Missing EU languages, by the family=== + +Writing a grammar for a language is usually easier if other languages +from the same family already have grammars. The colours have the same +meaning as in the diagram above. + +Baltic: +#RED Latvian #ERED +#RED Lithuanian #ERED + +Celtic: +#RED Irish #ERED + +Fenno-Ugric: +#RED Estonian #ERED +#GRAY Finnish #EGRAY +#RED Hungarian #ERED + +Germanic: +#GRAY Danish #EGRAY +#RED Dutch #ERED +#GRAY English #EGRAY +#GRAY German #EGRAY +#GRAY Swedish #EGRAY + +Hellenic: +#RED Greek #ERED + +Romance: +#GRAY French #EGRAY +#GRAY Italian #EGRAY +#RED Portuguese #ERED +#YELLOW Romanian #ERED +#GRAY Spanish #EGRAY + +Semitic: +#RED Maltese #ERED + +Slavonic: +#GRAY Bulgarian #EGRAY +#RED Czech #ERED +#YELLOW Polish #ERED +#RED Slovak #ERED +#RED Slovenian #ERED + + + + + + +===Applications of the library=== + +In addition to translation, the library is also useful in **localization**, +that is, porting a piece of software to new languages. +The GF resource grammar library has been used in three major projects that need +interlingua-based translation or localization of systems to new languages: +- in KeY, + [``http://www.key-project.org/`` http://www.key-project.org/], + for writing formal and informal software specifications (3 languages) +- in WebALT, + [``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html], + for translating mathematical exercises to 7 languages +- in TALK [``http://www.talk-project.org`` http://www.talk-project.org], + where the library was used for localizing spoken dialogue systems + to six languages + + +The library is also a generic **linguistic resource**, +which can be used for tasks +such as language teaching and information retrieval. The liberal license (LGPL) +makes it usable for anyone and for any task. GF also has tools supporting the +use of grammars in programs written in other +programming languages: C, C++, Haskell, +Java, JavaScript, and Prolog. In connection with the TALK project, +support has also been +developed for translating GF grammars to language models used in speech +recognition (GSL/Nuance, HTK/ATK, SRGS, JSGF). + + + +===The structure of the library=== + +The library has the following main parts: +- **Inflection paradigms**, covering the inflection of each language. +- **Core Syntax**, covering a large set of syntax rule that + can be implemented for all languages involved. +- **Common Test Lexicon**, giving ca. 500 common words that can be used for + testing the library. +- **Language-Specific Syntax Extensions**, covering syntax rules that are + not implementable for all languages. +- **Language-Specific Lexica**, word lists for each language, with + accurate morphological and syntactic information. + + +The goal of the summer school is to implement, for each language, at least +the first three components. The latter three are more open-ended in character. + + +==The summer school== + +The goal of the summer school is to extend the GF resource grammar library +to covering all 23 EU languages, which means we need 15 new languages. +We also welcome other languages than these 23, +if there are interested participants. + +The amount of work and skill is between a Master's thesis and a PhD thesis. +The Russian implementation was made by Janna Khegai as a part of her +PhD thesis; the thesis contains other material, too. +The Arabic implementation was started by Ali El Dada in his Master's thesis, +but the thesis does not cover the whole API. The realistic amount of work is +somewhere between 3 and 8 person months, +but this is very much language-dependent. +Dutch, for instance, can profit from previous implementations of German and +Scandinavian languages, and will probably require less work. +Latvian and Lithuanian are the first languages of the Baltic family and +will probably require more work. + +In any case, the proposed allocation of work power is 2 participants per +language. They will do 1 months' worth of home work, followed +by 2 weeks of summer school, followed by 4 months work at home. +Who are these participants? + + +===Selecting participants=== + +Persons interested to participate in the Summer School should sign up in +the **Google Group** of the course, + +[``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/] + +The registration deadline is 15 June 2009. + +Notice: you can sign up in the Google +group even if you are not planning to attend the summer school, but are +just interested in the topic. There will be a separate registration to the +school itself later. + +The participants are recommended to learn GF in advance, by self-study from the +[tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html]. +This should take a couple of weeks. An **on-line course** will be +arranged on 20-29 April to help in getting started with GF. + +At the end of the on-line course, a **programming assignment** will be published. +This assignment will test skills required in resource grammar programming. +Work on the assignment will take a couple of weeks. +Those who are interested in getting a travel grant will submit +their sample resource grammar fragment +to the Summer School Committee by 12 May. +The Committee then decides who is given a travel grant of up to 1000 EUR. + +Notice: you can participate in the summer school without following the on-line +course or participating in the contest. These things are required only if you +want a travel grant. If requested by enough many participants, the lectures of +the on-line course will be repeated in the beginning of the summer school. + +The summer school itself is devoted for working on resource grammars. +In addition to grammar writing itself, testing and evaluation is +performed. One way to do this is via adding new languages +to resource grammar applications - in particular, to the WebALT mathematical +exercise translator. + +The resource grammars are expected to be completed by December 2009. They will +be published at GF website and licensed under LGPL. + +The participants are encouraged to contact each other and even work in groups. + + + +===Who is qualified=== + +Writing a resource grammar implementation requires good general programming +skills, and a good explicit knowledge of the grammar of the target language. +A typical participant could be +- native or fluent speaker of the target language +- interested in languages on the theoretical level, and preferably familiar + with many languages (to be able to think about them on an abstract level) +- familiar with functional programming languages such as ML or Haskell + (GF itself is a language similar to these) +- on Master's or PhD level in linguistics, computer science, or mathematics + + +But it is the quality of the assignment that is assessed, not any formal +requirements. The "typical participant" was described to give an idea of +who is likely to succeed in this. + + +===Costs=== + +The summer school is free of charge. + +Some travel grants are given, on the basis of a programming contest, +to cover travel and accommodation costs up to 1000 EUR +per person. + +The number of grants will be decided during Spring 2009, and the grand +holders will be notified before the beginning of June. + +Special terms will apply to students in +[GSLT http://www.gslt.hum.gu.se/] and +[NGSLT http://ngslt.org/]. + + + + + +===Teachers=== + +A list of teachers will be published here later. Some of the local teachers +probably involved are the following: +- Krasimir Angelov +- Robin Cooper +- Hkan Burden +- Markus Forsberg +- Harald Hammarstrm +- Peter Ljunglf +- Aarne Ranta + + +More teachers are welcome! If you are interested, please contact us so that +we can discuss your involvement and travel arrangements. + +In addition to teachers, we will look for consultants who can help to assess +the results for each language. Please contact us! + + + +===The Summer School Committee=== + +This committee consists of a number of teachers and informants, +who will select the participants. It will be selected by April 2009. + + +===Time and Place=== + +The summer school will +be organized at the campus of Chalmers University of Technology in Gothenburg, +Sweden, on 17-28 August 2009. + +Time schedule: +- February: announcement of summer school +- 20-29 April: on-line course +- 12 May: submission deadline for assignment work +- 31 May: review of assignments, notifications of acceptance +- 15 June: **registration deadline** +- 17-28 August: Summer School +- September-December: homework on resource grammars +- December: release of the extended Resource Grammar Library + + +===Dissemination and intellectual property=== + +The new resource grammars will be released under the LGPL just like +the current resource grammars, +with the copyright held by respective authors. + +The grammars will be distributed via the GF web site. + + + +==Why I should participate== + +Seven reasons: ++ participation in a pioneering language technology work in an + enthusiastic atmosphere ++ work and fun with people from all over Europe and the world ++ job opportunities and business ideas ++ credits: the school project will be established as a course at Chalmers worth + 7.5 or 15 ETCS points per person, depending on the work accompliched; also + extensions to Master's thesis will be considered (special credit arrangements + for [GSLT http://www.gslt.hum.gu.se/] and [NGSLT http://ngslt.org/]) ++ merits: the resulting grammar can easily lead to a published paper (see below) ++ contribution to the multilingual and multicultural development of Europe and the + world ++ free trip and stay in Gothenburg (for travel grant students) + + +==More information== + +[Course Google Group http://groups.google.com/group/gf-resource-school-2009/] + +[GF web page http://digitalgrammars.com/gf/] + +[GF tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html] + +[GF resource synopsis http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html] + +[Resource-HOWTO document http://digitalgrammars.com/gf/doc/Resource-HOWTO.html] + + +===Contact=== + +Hkan Burden: burden at chalmers se + +Aarne Ranta: aarne at chalmers se + + + +===Selected publications from earlier resource grammar projects=== + +K. Angelov. +Type-Theoretical Bulgarian Grammar. +In B. Nordstrm and A. Ranta (eds), +//Advances in Natural Language Processing (GoTAL 2008)//, +LNCS/LNAI 5221, Springer, +2008. + +B. Bringert. +//Programming Language Techniques for Natural Language Applications//. +Phd thesis, Computer Science, University of Gothenburg, +2008. + +A. El Dada and A. Ranta. +Implementing an Open Source Arabic Resource Grammar in GF. +In M. Mughazy (ed), +//Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26// +John Benjamins Publishing Company. +2007. + +A. El Dada. +Implementation of the Arabic Numerals and their Syntax in GF. +Computational Approaches to Semitic Languages: Common Issues and Resources, + ACL-2007 Workshop, +June 28, 2007, Prague. +2007. + +H. Hammarstrm and A. Ranta. +Cardinal Numerals Revisited in GF. +//Workshop on Numerals in the World's Languages//. +Dept. of Linguistics Max Planck Institute for Evolutionary Anthropology, Leipzig, +2004. + +M. Humayoun, H. Hammarstrm, and A. Ranta. +Urdu Morphology, Orthography and Lexicon Extraction. +//CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages//, +July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University. +2007. + +K. Johannisson. +//Formal and Informal Software Specifications.// +Phd thesis, Computer Science, University of Gothenburg, +2005. + +J. Khegai. +GF parallel resource grammars and Russian. +In proceedings of ACL2006 + (The joint conference of the International Committee on Computational + Linguistics and the Association for Computational Linguistics) (pp. 475-482), + Sydney, Australia, July 2006. + +J. Khegai. +//Language engineering in Grammatical Framework (GF)//. +Phd thesis, Computer Science, Chalmers University of Technology, +2006. + +W. Ng'ang'a. +Multilingual content development for eLearning in Africa. +eLearning Africa: 1st Pan-African Conference on ICT for Development, + Education and Training. 24-26 May 2006, Addis Ababa, Ethiopia. +2006. + +N. Perera and A. Ranta. +Dialogue System Localization with the GF Resource Grammar Library. +//SPEECHGRAM 2007: ACL Workshop on Grammar-Based Approaches to Spoken Language Processing//, +June 29, 2007, Prague. +2007. + +A. Ranta. +Modular Grammar Engineering in GF. +//Research on Language and Computation//, +5:133-158, 2007. + +A. Ranta. +How predictable is Finnish morphology? An experiment on lexicon construction. +In J. Nivre, M. Dahllf and B. Megyesi (eds), +//Resourceful Language Technology: Festschrift in Honor of Anna Sgvall Hein//, +University of Uppsala, +2008. + +A. Ranta. Grammars as Software Libraries. +To appear in +Y. Bertot, G. Huet, J-J. Lvy, and G. Plotkin (eds.), +//From Semantics to Computer Science//, +Cambridge University Press, Cambridge, 2009. + +A. Ranta and K. Angelov. +Implementing Controlled Languages in GF. +To appear in the proceedings of //CNL 2009//. + diff --git a/deprecated/doc/gf3-release.html b/deprecated/doc/gf3-release.html new file mode 100644 index 000000000..75557c94a --- /dev/null +++ b/deprecated/doc/gf3-release.html @@ -0,0 +1,73 @@ + + + + +GF 3.0 + +

GF 3.0

+ +Krasimir Angelov, Bjrn Bringert, and Aarne Ranta
+Beta release, 27 June 2008 +
+ +

+GF Version 3.0 is a major revision of GF. The source language is a superset of the +language in 2.9, which means backward compatibility. But the target languages, the +compiler implementation, and the functionalities (e.g. the shell) have undergone +radical changes. +

+

New features

+

+Here is a summary of the main novelties visible to the user: +

+
    +
  • Size: the source code and the executable binary size have gone + down to about the half of 2.9. +
  • Portability: the new back end format PGF (Portable Grammar Format) is + much simpler than the old GFC format, and therefore easier to port to new + platforms. +
  • Multilingual web page support: as an example of portability, GF 3.0 provides a + compiler from PGF to JavaScript. There are also JavaScript libraries for creating + translators and syntax editors as client-side web applications. +
  • Incremental parsing: there is a possibility of word completion when + input strings are sent to the parser. +
  • Application programmer's interfaces: both source-GF and PGF formats, + the shell, and the compiler are accessible via high-level APIs. +
  • Resource library version 1.4: more coverage, more languages; some of + the new GF language features are exploited. +
  • Uniform character encoding: UTF8 in generated files, user-definable in + source files +
+ +

Non-supported features

+

+There are some features of GF 2.9 that will not work in the 3.0 beta release. +

+
    +
  • Java Editor GUI: we now see the JavaScript editor as the main form of + syntax editing. +
  • Pre-module multi-file grammar format: the grammar format of GF before version 2.0 + is still not yet supported. +
  • Context-free and EBNF input grammar formats. +
  • Probabilistic GF grammars. +
  • Some output formats: LBNF. +
  • Some GF shell commands: while the main ones will be supported with their familiar + syntax and options, some old commands have not been included. The GF shell + command help -changes gives the actual list. +
+ +

+Users who want to have these features are welcome to contact us, +and even more welcome to contribute code that restores them! +

+

GF language extensions

+

+Operations for defining patterns. +

+

+Inheritance of overload groups. +

+ + + + diff --git a/deprecated/doc/gf3-release.txt b/deprecated/doc/gf3-release.txt new file mode 100644 index 000000000..631752c90 --- /dev/null +++ b/deprecated/doc/gf3-release.txt @@ -0,0 +1,58 @@ +GF 3.0 +Krasimir Angelov, Bjrn Bringert, and Aarne Ranta +Beta release, 27 June 2008 + + +GF Version 3.0 is a major revision of GF. The source language is a superset of the +language in 2.9, which means backward compatibility. But the target languages, the +compiler implementation, and the functionalities (e.g. the shell) have undergone +radical changes. + + +==New features== + +Here is a summary of the main novelties visible to the user: +- **Size**: the source code and the executable binary size have gone + down to about the half of 2.9. +- **Portability**: the new back end format PGF (Portable Grammar Format) is + much simpler than the old GFC format, and therefore easier to port to new + platforms. +- **Multilingual web page support**: as an example of portability, GF 3.0 provides a + compiler from PGF to JavaScript. There are also JavaScript libraries for creating + translators and syntax editors as client-side web applications. +- **Incremental parsing**: there is a possibility of word completion when + input strings are sent to the parser. +- **Application programmer's interfaces**: both source-GF and PGF formats, + the shell, and the compiler are accessible via high-level APIs. +- **Resource library version 1.4**: more coverage, more languages; some of + the new GF language features are exploited. +- **Uniform character encoding**: UTF8 in generated files, user-definable in + source files + + +==Non-supported features== + +There are some features of GF 2.9 that will //not// work in the 3.0 beta release. +- Java Editor GUI: we now see the JavaScript editor as the main form of + syntax editing. +- Pre-module multi-file grammar format: the grammar format of GF before version 2.0 + is still not yet supported. +- Context-free and EBNF input grammar formats. +- Probabilistic GF grammars. +- Some output formats: LBNF. +- Some GF shell commands: while the main ones will be supported with their familiar + syntax and options, some old commands have not been included. The GF shell + command ``help -changes`` gives the actual list. + + +Users who want to have these features are welcome to contact us, +and even more welcome to contribute code that restores them! + + +==GF language extensions== + +Operations for defining patterns. + +Inheritance of overload groups. + + diff --git a/deprecated/doc/school-langs.dot b/deprecated/doc/school-langs.dot new file mode 100644 index 000000000..88e0a9c96 --- /dev/null +++ b/deprecated/doc/school-langs.dot @@ -0,0 +1,106 @@ +graph{ + +size = "8,8" ; + +overlap = scale ; + +"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ; + +"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ; +"1" -- "Abs" [style = "solid"]; + +"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ; +"2" -- "Abs" [style = "solid"]; + +"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ; +"3" -- "Abs" [style = "solid"]; + +"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ; +"4" -- "Abs" [style = "solid"]; + +"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ; +"5" -- "Abs" [style = "solid"]; + +"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ; +"6" -- "Abs" [style = "solid"]; + +"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ; +"7" -- "Abs" [style = "solid"]; + +"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ; +"8" -- "Abs" [style = "solid"]; + +"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ; +"9" -- "Abs" [style = "solid"]; + +"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ; +"10" -- "Abs" [style = "solid"]; + +"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ; +"11" -- "Abs" [style = "solid"]; + +"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "12" [style = "solid"]; + +"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "13" [style = "solid"]; + +"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "14" [style = "solid"]; + +"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "15" [style = "solid"]; + +"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "16" [style = "solid"]; + +"17" [label = "Polish", style = "solid", shape = "ellipse", color = "orange"] ; +"Abs" -- "17" [style = "solid"]; + +"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "18" [style = "solid"]; + +"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "19" [style = "solid"]; + +"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ; +"Abs" -- "20" [style = "solid"]; + +"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "orange"] ; +"Abs" -- "21" [style = "solid"]; + +"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ; +"Abs" -- "22" [style = "solid"]; + +"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ; +"Abs" -- "23" [style = "solid"]; + +"24" [label = "Catalan", style = "dotted", shape = "ellipse", color = "green"] ; +"Abs" -- "24" [style = "solid"]; + +"25" [label = "Norwegian", style = "dotted", shape = "ellipse", color = "green"] ; +"Abs" -- "25" [style = "solid"]; + +"26" [label = "Russian", style = "dotted", shape = "ellipse", color = "green"] ; +"Abs" -- "26" [style = "solid"]; + +"27" [label = "Interlingua", style = "dotted", shape = "ellipse", color = "green"] ; +"Abs" -- "27" [style = "solid"]; + +"28" [label = "Latin", style = "dotted", shape = "ellipse", color = "orange"] ; +"Abs" -- "28" [style = "solid"]; +"29" [label = "Turkish", style = "dotted", shape = "ellipse", color = "orange"] ; +"Abs" -- "29" [style = "solid"]; +"30" [label = "Hindi", style = "dotted", shape = "ellipse", color = "orange"] ; +"Abs" -- "30" [style = "solid"]; +"31" [label = "Thai", style = "dotted", shape = "ellipse", color = "orange"] ; +"Abs" -- "31" [style = "solid"]; +"32" [label = "Urdu", style = "dotted", shape = "ellipse", color = "orange"] ; +"Abs" -- "32" [style = "solid"]; +"33" [label = "Telugu", style = "dotted", shape = "ellipse", color = "red"] ; +"Abs" -- "33" [style = "solid"]; +"34" [label = "Arabic", style = "dotted", shape = "ellipse", color = "orange"] ; +"Abs" -- "34" [style = "solid"]; + + +} diff --git a/deprecated/doc/school-langs.png b/deprecated/doc/school-langs.png new file mode 100644 index 000000000..7230e0bff Binary files /dev/null and b/deprecated/doc/school-langs.png differ diff --git a/deprecated/doc/summer-align.png b/deprecated/doc/summer-align.png new file mode 100644 index 000000000..796754408 Binary files /dev/null and b/deprecated/doc/summer-align.png differ diff --git a/deprecated/doc/summer-langs.png b/deprecated/doc/summer-langs.png new file mode 100644 index 000000000..729af722a Binary files /dev/null and b/deprecated/doc/summer-langs.png differ diff --git a/deprecated/doc/vr.html b/deprecated/doc/vr.html new file mode 100644 index 000000000..e5dee1885 --- /dev/null +++ b/deprecated/doc/vr.html @@ -0,0 +1,46 @@ + + + + +Library-Based Grammar Engineering + +

Library-Based Grammar Engineering

+ +VR Project 2006-2008
+
+ +

Staff

+

+Lars Borin (co-leader) +

+

+Robin Cooper (co-leader) +

+

+Aarne Ranta (project responsible) +

+

+Sibylle Schupp (co-leader) +

+

Publications

+

+Ali El Dada, MSc Thesis +

+

+Muhammad Humayoun, MSc Thesis +

+

+Janna Khegai, +Language Engineering in GF, PhD Thesis, Chalmers. 2006. +

+

Links

+

+GF +

+

+Functional Morphology +

+ + + + diff --git a/deprecated/doc/vr.txt b/deprecated/doc/vr.txt new file mode 100644 index 000000000..9b5045978 --- /dev/null +++ b/deprecated/doc/vr.txt @@ -0,0 +1,32 @@ +Library-Based Grammar Engineering +VR Project 2006-2008 + + +=Staff= + +Lars Borin (co-leader) + +Robin Cooper (co-leader) + +Aarne Ranta (project responsible) + +Sibylle Schupp (co-leader) + + + +=Publications= + +Ali El Dada, MSc Thesis + +Muhammad Humayoun, MSc Thesis + +Janna Khegai, +Language Engineering in GF, PhD Thesis, Chalmers. 2006. + + + +=Links= + +[GF http://www.cs.chalmers.se/~aarne/GF/] + +[Functional Morphology http://www.cs.chalmers.se/~markus/FM/] diff --git a/doc/10lang-small.png b/doc/10lang-small.png deleted file mode 100644 index 49a3d0a98..000000000 Binary files a/doc/10lang-small.png and /dev/null differ diff --git a/doc/2341.html b/doc/2341.html deleted file mode 100644 index ff3e9644d..000000000 --- a/doc/2341.html +++ /dev/null @@ -1,259 +0,0 @@ - - - -af_tunni : lámma kún síddi? boqól afartón i ków - -

-albanian : dy mijë tre qind e dyzet e një - -

-amharic : ሁለት ሺህ ሦስት መቶ ኣርባ ኣንድ - -

-arabic_classical : الفان و ثلاث مائة و واحد و أربعون - -

-arabic_modern : ﺍﻟﻔﻴﻦ ﻭ ﺛﻼﺛﻤﺎﺋﺔ ﻭ ﻭﺍﺣﺪ ﻭ ﺃﺭﺑﻌﻴﻦ - -

-basque : bi mila ta hirurehun berrogei ta bat - -

-bearlake_slave : nákee lamíl tai lak'o, óno, di,i, honéno, ?ó, l-ée - -

-bulgarian : две жиляди триста четирисет и едно - -

-catalan : dos mil tres-cents quaranta - u - -

-chinese : 贰 仟 零 叁 佰 肆 拾 壹 - -

-croatian : dva hiljade tri stotine četrdeset i jedan - -

-czech : dva tisíce tr^i sta čtyr^icet jeden - -

-dagur : hoire miange guarebe jau duci neke - -

-danish : to tusind og tre hundrede og en og fyrre - -

-decimal : 2341 - -

-dutch : twee duizend drie honderd een en veertig - -

-english : two thousand three hundred and forty - one - -

-finnish : kaksi tuhatta kolme sataa neljä kymmentä yksi - -

-french : deux mille trois cent quarante et un - -

-french_swiss : deux mille trois cent quarante et un - -

-fulfulde : ujine d.id.i temed.d.e tati e chappand.e nai e go'o - -

-geez : ዕሽራ ወ ሠላስቱ ምእት አርብዓ ወ አሐዱ - -

-german : zwei tausend drei hundert ein und vierzig - -

-greek_classical : δισχίλιοι τριακόσιοι τετταράκοντα εἵς - -

-greek_modern : δύο χιλιάδες τριακόσια σαράντα ένα - -

-guahibo : aniha sunu akueya sia yana bae kae - -

-guarani : moko~i ma mpohapy sa~ irundy kua~ petei~ - -

-hebrew_biblical : אלפים ו שלש מאות ו ארבעים ו אחד - -

-hindi : दो हज़ार तीन सौ एक्तालीस - -

-hungarian : két ezer három száz negyven egy - -

-icelandic : tvö Þúsund Þrjú hundrað fjörutíu og einn - -

-irish : dhá mhíle trí chead dhá fhichead a haon - -

-italian : due mila tre cento quaranta uno - -

-japanese : にせん さんびゃく よんぢゅう いち - -

-kabardian : m&yn&yt' s'a&ys' p'L-'&s'ra z&ra - -

-kambera : dua riu tailu ngahu patu kambulu hau - -

-kawaiisu : N -

-khmer : bīra bā'na pī raya sē sipa mwya - -

-khowar : joo hazâr troi shọr oché joo bîsher î - -

-kodagu : i:ra:yrat mu:nu:yt.a na:padï - -

-kolyma_yukaghir : N -

-kulung : ni habau su chhum lik i - -

-kwami : dùbúk póllów dálmágí kúnún kán kúu pòD^òw kán múndí - -

-kwaza : N -

-lalo : `n. t'w sa há i tjhí tjh`& - -

-lamani : di hajaar do se caaLise par ek - -

-latvian : divtu^kstoš trīssimt četrdesmit viens - -

-lithuanian : dù tú:kstanc^iu, try:s s^imtai~ ke:turiasdes^imt víenas - -

-lotuxo : tausand ârrexai ikO EssIxa xunixoi ikO atOmwana aNwan x' âbotye - -

-maale : lam?ó $íya haitsó s'ééta ?oydí-támmi pétte - -

-malay : dua ribu tiga ratus empat puluh satu - -

-maltese : elfejn tliet mija u wieh-ed u erbgh-in - -

-mapuche : epu warangka külá pataka meli mari kiñe - -

-margi : dúbú s`&d>àN ghàrú mák`&r agá fód>ú kùmì gà s'&r pátlú* - -

-maybrat : N -

-miya : d'&bu ts`&r '`&náa d>àriy kìdi '`&náa díb>i f`&d>& bèh&n wut'& - -

-mongolian : qoyar mingGan Gurban ĵa'un döčin nigän - -

-nenets : side juonar n-ahar jur t-êt ju' ~ob - -

-norwegian_book : to tusen og tre hundre og førti et - -

-old_church_slavonic : дъвѣ тысѭшти триѥ съта четыре десѧте и ѥдинъ - -

-oromo : kuma lama fi dhibba sadii fi afurtamii tokko - -

-pashto : دوه زره دري سوه او يو څلوۍښت - -

-polish : dwa tysiace trzysta czterdziesci jeden - -

-portuguese : dois mil trezentos quarenta e um - -

-quechua : iskay warank'a kinsa pachak tawa chunka jukniyuq - -

-romanian : două mii trei sute patruzeci şi unu - -

-russian : две тысячи триста сорок один - -

-sango : ngbangbu bale óse na ndó ní ngbangbu otá na ndó ní bale osió na ndó ní ÓkO - -

-sanskrit : त्रि शतान्य एकचत्वारिंशच च द्वे सहस्रे - -

-slovak : dva tisic tri sto styridsat jedna - -

-sorani : دۇ ههزار سىسهد ځل و يهك - -

-spanish : dos mil trescientos cuarenta y uno - -

-stieng : baar ban pê riêng puôn jo't muôi - -

-swahili : elfu mbili mia tatu arobaini na moja - -

-swedish : två tusen tre hundra fyrtio ett - -

-tamil : இரணௌடௌ ஆயாரதௌதீ மீனௌ ந஽ரீ ந஽ரௌ பதௌ ஓனௌரீ - -

-tampere : kaks tuhatta kolme sataa nel kyt yks - -

-tibetan : t̆ong ṭ'a' n̆yī d́ang sumğya d́ang z̆hyib chu źhye chi' - -

-totonac : maa t~u3 mil lii ~a tuhun pus^um tun - -

-tuda_daza : dubu cu sao kidra ago.zo. sao mOrta tozo sao tro - -

-tukang_besi : dua riwu tolu hatu hato hulu sa'asa - -

-turkish : iki bin üç yüz kırk bir - -

-votic : kahsi tuhatta keVmsata: nelläts^ümmet ühsi - -

-welsh : dau fil tri chan un a deugain - -

-yasin_burushaski : altó hazár iskí tha altó-áltar hek - -

-zaiwa : i55 hing55 sum11 syo31 mi11 cue31 ra11 - - - - diff --git a/doc/DocGF.pdf b/doc/DocGF.pdf deleted file mode 100644 index 27e4262db..000000000 Binary files a/doc/DocGF.pdf and /dev/null differ diff --git a/doc/DocGF.tex b/doc/DocGF.tex deleted file mode 100644 index 6388d3548..000000000 --- a/doc/DocGF.tex +++ /dev/null @@ -1,569 +0,0 @@ -\batchmode -%This Latex file is machine-generated by the BNF-converter - -\documentclass[a4paper,11pt]{article} -\author{BNF-converter} -\title{The Language GF} -\setlength{\parindent}{0mm} -\setlength{\parskip}{1mm} -\begin{document} - -\maketitle - -\newcommand{\emptyP}{\mbox{$\epsilon$}} -\newcommand{\terminal}[1]{\mbox{{\texttt {#1}}}} -\newcommand{\nonterminal}[1]{\mbox{$\langle \mbox{{\sl #1 }} \! \rangle$}} -\newcommand{\arrow}{\mbox{::=}} -\newcommand{\delimit}{\mbox{$|$}} -\newcommand{\reserved}[1]{\mbox{{\texttt {#1}}}} -\newcommand{\literal}[1]{\mbox{{\texttt {#1}}}} -\newcommand{\symb}[1]{\mbox{{\texttt {#1}}}} - -This document was automatically generated by the {\em BNF-Converter}. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place). - -\section*{The lexical structure of GF} -\subsection*{Identifiers} -Identifiers \nonterminal{Ident} are unquoted strings beginning with a letter, -followed by any combination of letters, digits, and the characters {\tt \_ '}, -reserved words excluded. - - -\subsection*{Literals} -Integer literals \nonterminal{Int}\ are nonempty sequences of digits. - - -String literals \nonterminal{String}\ have the form -\terminal{"}$x$\terminal{"}, where $x$ is any sequence of any characters -except \terminal{"}\ unless preceded by \verb6\6. - - - - -LString literals are recognized by the regular expression -\(\mbox{`''} ({\nonterminal{anychar}} - \mbox{`''})* \mbox{`''}\) - - -\subsection*{Reserved words and symbols} -The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions. - -The reserved words used in GF are the following: \\ - -\begin{tabular}{lll} -{\reserved{Lin}} &{\reserved{PType}} &{\reserved{Str}} \\ -{\reserved{Strs}} &{\reserved{Tok}} &{\reserved{Type}} \\ -{\reserved{abstract}} &{\reserved{case}} &{\reserved{cat}} \\ -{\reserved{concrete}} &{\reserved{data}} &{\reserved{def}} \\ -{\reserved{flags}} &{\reserved{fn}} &{\reserved{fun}} \\ -{\reserved{grammar}} &{\reserved{in}} &{\reserved{include}} \\ -{\reserved{incomplete}} &{\reserved{instance}} &{\reserved{interface}} \\ -{\reserved{let}} &{\reserved{lin}} &{\reserved{lincat}} \\ -{\reserved{lindef}} &{\reserved{lintype}} &{\reserved{of}} \\ -{\reserved{open}} &{\reserved{oper}} &{\reserved{out}} \\ -{\reserved{package}} &{\reserved{param}} &{\reserved{pattern}} \\ -{\reserved{pre}} &{\reserved{printname}} &{\reserved{resource}} \\ -{\reserved{reuse}} &{\reserved{strs}} &{\reserved{table}} \\ -{\reserved{tokenizer}} &{\reserved{transfer}} &{\reserved{union}} \\ -{\reserved{var}} &{\reserved{variants}} &{\reserved{where}} \\ -{\reserved{with}} & & \\ -\end{tabular}\\ - -The symbols used in GF are the following: \\ - -\begin{tabular}{lll} -{\symb{;}} &{\symb{{$=$}}} &{\symb{\{}} \\ -{\symb{\}}} &{\symb{(}} &{\symb{)}} \\ -{\symb{:}} &{\symb{{$-$}{$>$}}} &{\symb{**}} \\ -{\symb{,}} &{\symb{[}} &{\symb{]}} \\ -{\symb{.}} &{\symb{{$|$}}} &{\symb{\%}} \\ -{\symb{?}} &{\symb{{$<$}}} &{\symb{{$>$}}} \\ -{\symb{@}} &{\symb{!}} &{\symb{*}} \\ -{\symb{$\backslash$}} &{\symb{{$=$}{$>$}}} &{\symb{{$+$}{$+$}}} \\ -{\symb{{$+$}}} &{\symb{\_}} &{\symb{\$}} \\ -{\symb{/}} &{\symb{{$-$}}} & \\ -\end{tabular}\\ - -\subsection*{Comments} -Single-line comments begin with {\symb{{$-$}{$-$}}}. \\Multiple-line comments are enclosed with {\symb{\{{$-$}}} and {\symb{{$-$}\}}}. - -\section*{The syntactic structure of GF} -Non-terminals are enclosed between $\langle$ and $\rangle$. -The symbols {\arrow} (production), {\delimit} (union) -and {\emptyP} (empty rule) belong to the BNF notation. -All other symbols are terminals.\\ - -\begin{tabular}{lll} -{\nonterminal{Grammar}} & {\arrow} &{\nonterminal{ListModDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListModDef}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{ModDef}} {\nonterminal{ListModDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ModDef}} & {\arrow} &{\nonterminal{ModDef}} {\terminal{;}} \\ - & {\delimit} &{\terminal{grammar}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\terminal{abstract}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{;}} {\nonterminal{ListConcSpec}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{ComplMod}} {\nonterminal{ModType}} {\terminal{{$=$}}} {\nonterminal{ModBody}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ConcSpec}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ConcExp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListConcSpec}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{ConcSpec}} \\ - & {\delimit} &{\nonterminal{ConcSpec}} {\terminal{;}} {\nonterminal{ListConcSpec}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ConcExp}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListTransfer}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListTransfer}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Transfer}} {\nonterminal{ListTransfer}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Transfer}} & {\arrow} &{\terminal{(}} {\terminal{transfer}} {\terminal{in}} {\nonterminal{Open}} {\terminal{)}} \\ - & {\delimit} &{\terminal{(}} {\terminal{transfer}} {\terminal{out}} {\nonterminal{Open}} {\terminal{)}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ModType}} & {\arrow} &{\terminal{abstract}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{resource}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{interface}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{concrete}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{instance}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{transfer}} {\nonterminal{Ident}} {\terminal{:}} {\nonterminal{Open}} {\terminal{{$-$}{$>$}}} {\nonterminal{Open}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ModBody}} & {\arrow} &{\nonterminal{Extend}} {\nonterminal{Opens}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\ - & {\delimit} &{\nonterminal{ListIdent}} {\terminal{**}} {\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\ - & {\delimit} &{\terminal{reuse}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{union}} {\nonterminal{ListIncluded}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListTopDef}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{TopDef}} {\nonterminal{ListTopDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Extend}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{**}} \\ - & {\delimit} &{\emptyP} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListOpen}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Open}} \\ - & {\delimit} &{\nonterminal{Open}} {\terminal{,}} {\nonterminal{ListOpen}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Opens}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{open}} {\nonterminal{ListOpen}} {\terminal{in}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Open}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{)}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{)}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ComplMod}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{incomplete}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{QualOpen}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{incomplete}} \\ - & {\delimit} &{\terminal{interface}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListIncluded}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Included}} \\ - & {\delimit} &{\nonterminal{Included}} {\terminal{,}} {\nonterminal{ListIncluded}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Included}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{[}} {\nonterminal{ListIdent}} {\terminal{]}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Def}} & {\arrow} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Name}} {\nonterminal{ListPatt}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{TopDef}} & {\arrow} &{\terminal{cat}} {\nonterminal{ListCatDef}} \\ - & {\delimit} &{\terminal{fun}} {\nonterminal{ListFunDef}} \\ - & {\delimit} &{\terminal{data}} {\nonterminal{ListFunDef}} \\ - & {\delimit} &{\terminal{def}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{data}} {\nonterminal{ListDataDef}} \\ - & {\delimit} &{\terminal{transfer}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{param}} {\nonterminal{ListParDef}} \\ - & {\delimit} &{\terminal{oper}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{lincat}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{lindef}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{lin}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{printname}} {\terminal{cat}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{printname}} {\terminal{fun}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{flags}} {\nonterminal{ListFlagDef}} \\ - & {\delimit} &{\terminal{printname}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{lintype}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{pattern}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{package}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} {\terminal{;}} \\ - & {\delimit} &{\terminal{var}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{tokenizer}} {\nonterminal{Ident}} {\terminal{;}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{CatDef}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} {\terminal{\{}} {\nonterminal{Integer}} {\terminal{\}}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{FunDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{DataDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListDataConstr}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{DataConstr}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDataConstr}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{DataConstr}} \\ - & {\delimit} &{\nonterminal{DataConstr}} {\terminal{{$|$}}} {\nonterminal{ListDataConstr}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ParDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListParConstr}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ParConstr}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PrintDef}} & {\arrow} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{FlagDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDef}} & {\arrow} &{\nonterminal{Def}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{Def}} {\terminal{;}} {\nonterminal{ListDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListCatDef}} & {\arrow} &{\nonterminal{CatDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{CatDef}} {\terminal{;}} {\nonterminal{ListCatDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListFunDef}} & {\arrow} &{\nonterminal{FunDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{FunDef}} {\terminal{;}} {\nonterminal{ListFunDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDataDef}} & {\arrow} &{\nonterminal{DataDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{DataDef}} {\terminal{;}} {\nonterminal{ListDataDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListParDef}} & {\arrow} &{\nonterminal{ParDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{ParDef}} {\terminal{;}} {\nonterminal{ListParDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPrintDef}} & {\arrow} &{\nonterminal{PrintDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{PrintDef}} {\terminal{;}} {\nonterminal{ListPrintDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListFlagDef}} & {\arrow} &{\nonterminal{FlagDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{FlagDef}} {\terminal{;}} {\nonterminal{ListFlagDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListParConstr}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{ParConstr}} \\ - & {\delimit} &{\nonterminal{ParConstr}} {\terminal{{$|$}}} {\nonterminal{ListParConstr}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListIdent}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{,}} {\nonterminal{ListIdent}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Name}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\terminal{]}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListName}} & {\arrow} &{\nonterminal{Name}} \\ - & {\delimit} &{\nonterminal{Name}} {\terminal{,}} {\nonterminal{ListName}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{LocDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListLocDef}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{LocDef}} \\ - & {\delimit} &{\nonterminal{LocDef}} {\terminal{;}} {\nonterminal{ListLocDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp4}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{\%}} \\ - & {\delimit} &{\nonterminal{Sort}} \\ - & {\delimit} &{\nonterminal{String}} \\ - & {\delimit} &{\nonterminal{Integer}} \\ - & {\delimit} &{\terminal{?}} \\ - & {\delimit} &{\terminal{[}} {\terminal{]}} \\ - & {\delimit} &{\terminal{data}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{Exps}} {\terminal{]}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{String}} {\terminal{]}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListTupleComp}} {\terminal{{$>$}}} \\ - & {\delimit} &{\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\ - & {\delimit} &{\terminal{{$<$}}} {\nonterminal{Exp}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$>$}}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{Exp}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{LString}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp3}} & {\arrow} &{\nonterminal{Exp3}} {\terminal{.}} {\nonterminal{Label}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\%}} \\ - & {\delimit} &{\nonterminal{Exp4}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp2}} & {\arrow} &{\nonterminal{Exp2}} {\nonterminal{Exp3}} \\ - & {\delimit} &{\terminal{table}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{[}} {\nonterminal{ListExp}} {\terminal{]}} \\ - & {\delimit} &{\terminal{case}} {\nonterminal{Exp}} {\terminal{of}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{variants}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{pre}} {\terminal{\{}} {\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListAltern}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{strs}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{@}} {\nonterminal{Exp4}} \\ - & {\delimit} &{\nonterminal{Exp3}} \\ - & {\delimit} &{\terminal{Lin}} {\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp1}} & {\arrow} &{\nonterminal{Exp1}} {\terminal{!}} {\nonterminal{Exp2}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{*}} {\nonterminal{Exp2}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{**}} {\nonterminal{Exp2}} \\ - & {\delimit} &{\nonterminal{Exp2}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp}} & {\arrow} &{\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\terminal{$\backslash$}} {\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Decl}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}{$+$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\terminal{let}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} {\terminal{in}} {\nonterminal{Exp}} \\ - & {\delimit} &{\terminal{let}} {\nonterminal{ListLocDef}} {\terminal{in}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{where}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{fn}} {\terminal{\{}} {\nonterminal{ListEquation}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Exp1}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListExp}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListExp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exps}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Exp4}} {\nonterminal{Exps}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Patt1}} & {\arrow} &{\terminal{\_}} \\ - & {\delimit} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Integer}} \\ - & {\delimit} &{\nonterminal{String}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{ListPattAss}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListPattTupleComp}} {\terminal{{$>$}}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{Patt}} {\terminal{)}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Patt}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListPatt}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\nonterminal{ListPatt}} \\ - & {\delimit} &{\nonterminal{Patt1}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PattAss}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Patt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Label}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\$}} {\nonterminal{Integer}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Sort}} & {\arrow} &{\terminal{Type}} \\ - & {\delimit} &{\terminal{PType}} \\ - & {\delimit} &{\terminal{Tok}} \\ - & {\delimit} &{\terminal{Str}} \\ - & {\delimit} &{\terminal{Strs}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPattAss}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{PattAss}} \\ - & {\delimit} &{\nonterminal{PattAss}} {\terminal{;}} {\nonterminal{ListPattAss}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PattAlt}} & {\arrow} &{\nonterminal{Patt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPatt}} & {\arrow} &{\nonterminal{Patt1}} \\ - & {\delimit} &{\nonterminal{Patt1}} {\nonterminal{ListPatt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPattAlt}} & {\arrow} &{\nonterminal{PattAlt}} \\ - & {\delimit} &{\nonterminal{PattAlt}} {\terminal{{$|$}}} {\nonterminal{ListPattAlt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Bind}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\_}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListBind}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Bind}} \\ - & {\delimit} &{\nonterminal{Bind}} {\terminal{,}} {\nonterminal{ListBind}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Decl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{Exp2}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{TupleComp}} & {\arrow} &{\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PattTupleComp}} & {\arrow} &{\nonterminal{Patt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListTupleComp}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{TupleComp}} \\ - & {\delimit} &{\nonterminal{TupleComp}} {\terminal{,}} {\nonterminal{ListTupleComp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPattTupleComp}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{PattTupleComp}} \\ - & {\delimit} &{\nonterminal{PattTupleComp}} {\terminal{,}} {\nonterminal{ListPattTupleComp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Case}} & {\arrow} &{\nonterminal{ListPattAlt}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListCase}} & {\arrow} &{\nonterminal{Case}} \\ - & {\delimit} &{\nonterminal{Case}} {\terminal{;}} {\nonterminal{ListCase}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Equation}} & {\arrow} &{\nonterminal{ListPatt}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListEquation}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Equation}} \\ - & {\delimit} &{\nonterminal{Equation}} {\terminal{;}} {\nonterminal{ListEquation}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Altern}} & {\arrow} &{\nonterminal{Exp}} {\terminal{/}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListAltern}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Altern}} \\ - & {\delimit} &{\nonterminal{Altern}} {\terminal{;}} {\nonterminal{ListAltern}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{DDecl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{Exp4}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDDecl}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{DDecl}} {\nonterminal{ListDDecl}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{OldGrammar}} & {\arrow} &{\nonterminal{Include}} {\nonterminal{ListTopDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Include}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{include}} {\nonterminal{ListFileName}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{FileName}} & {\arrow} &{\nonterminal{String}} \\ - & {\delimit} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{/}} {\nonterminal{FileName}} \\ - & {\delimit} &{\terminal{.}} {\nonterminal{FileName}} \\ - & {\delimit} &{\terminal{{$-$}}} {\nonterminal{FileName}} \\ - & {\delimit} &{\nonterminal{Ident}} {\nonterminal{FileName}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListFileName}} & {\arrow} &{\nonterminal{FileName}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{FileName}} {\terminal{;}} {\nonterminal{ListFileName}} \\ -\end{tabular}\\ - - - -\end{document} - diff --git a/doc/German.png b/doc/German.png deleted file mode 100644 index 7c6303897..000000000 Binary files a/doc/German.png and /dev/null differ diff --git a/doc/Grammar.dot b/doc/Grammar.dot deleted file mode 100644 index cb2998eb3..000000000 --- a/doc/Grammar.dot +++ /dev/null @@ -1,75 +0,0 @@ -digraph { - -size = "12,8" ; - -Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"]; - -Lang -> Grammar [style = "solid"]; -Lang -> Lexicon [style = "solid"]; - -Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"]; - - -Grammar -> Noun [style = "solid"]; -Grammar -> Verb [style = "solid"]; -Grammar -> Adjective [style = "solid"]; -Grammar -> Adverb [style = "solid"]; -Grammar -> Numeral [style = "solid"]; -Grammar -> Sentence [style = "solid"]; -Grammar -> Question [style = "solid"]; -Grammar -> Relative [style = "solid"]; -Grammar -> Conjunction [style = "solid"]; -Grammar -> Phrase [style = "solid"]; -Grammar -> Text [style = "solid"]; -Grammar -> Idiom [style = "solid"]; -Grammar -> Structural [style = "solid"]; - - -Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"]; -Noun -> Cat [style = "solid"]; - -Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"]; -Verb -> Cat [style = "solid"]; - -Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"]; -Adjective -> Cat [style = "solid"]; - -Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"]; -Adverb -> Cat [style = "solid"]; - -Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"]; -Numeral -> Cat [style = "solid"]; - -Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"]; -Sentence -> Cat [style = "solid"]; - -Question [style = "solid", shape = "ellipse", URL = "Question.gf"]; -Question -> Cat [style = "solid"]; - -Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"]; -Relative -> Cat [style = "solid"]; - -Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"]; -Conjunction -> Cat [style = "solid"]; - -Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; -Phrase -> Cat [style = "solid"]; - -Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; -Text -> Cat [style = "solid"]; - -Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; -Idiom -> Cat [style = "solid"]; - -Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"]; -Structural -> Cat [style = "solid"]; - -Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"]; -Lexicon -> Cat [style = "solid"]; - -Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"]; -Cat -> Common [style = "solid"]; - -Common [style = "solid", shape = "ellipse", URL = "Tense.gf"]; - -} diff --git a/doc/Grammar.png b/doc/Grammar.png deleted file mode 100644 index ada2847d7..000000000 Binary files a/doc/Grammar.png and /dev/null differ diff --git a/doc/Resource-HOWTO.html b/doc/Resource-HOWTO.html deleted file mode 100644 index ce2c15137..000000000 --- a/doc/Resource-HOWTO.html +++ /dev/null @@ -1,967 +0,0 @@ - - - - -Resource grammar writing HOWTO - -

Resource grammar writing HOWTO

- -Author: Aarne Ranta <aarne (at) cs.chalmers.se>
-Last update: Mon Sep 22 14:28:01 2008 -
- -

-
-

- - -

-
-

-

-History -

-

-September 2008: updated for Version 1.5. -

-

-October 2007: updated for Version 1.2. -

-

-January 2006: first version. -

-

-The purpose of this document is to tell how to implement the GF -resource grammar API for a new language. We will not cover how -to use the resource grammar, nor how to change the API. But we -will give some hints how to extend the API. -

-

-A manual for using the resource grammar is found in -

-

-www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html. -

-

-A tutorial on GF, also introducing the idea of resource grammars, is found in -

-

-www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html. -

-

-This document concerns the API v. 1.5, while the current stable release is 1.4. -You can find the code for the stable release in -

-

-www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/ -

-

-and the next release in -

-

-www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/ -

-

-It is recommended to build new grammars to match the next release. -

- -

The resource grammar structure

-

-The library is divided into a bunch of modules, whose dependencies -are given in the following figure. -

-

- -

-

-Modules of different kinds are distinguished as follows: -

-
    -
  • solid contours: module seen by end users -
  • dashed contours: internal module -
  • ellipse: abstract/concrete pair of modules -
  • rectangle: resource or instance -
  • diamond: interface -
- -

-Put in another way: -

-
    -
  • solid rectangles and diamonds: user-accessible library API -
  • solid ellipses: user-accessible top-level grammar for parsing and linearization -
  • dashed contours: not visible to users -
- -

-The dashed ellipses form the main parts of the implementation, on which the resource -grammar programmer has to work with. She also has to work on the Paradigms -module. The rest of the modules can be produced mechanically from corresponding -modules for other languages, by just changing the language codes appearing in -their module headers. -

-

-The module structure is rather flat: most modules are direct -parents of Grammar. The idea -is that the implementors can concentrate on one linguistic aspect at a time, or -also distribute the work among several authors. The module Cat -defines the "glue" that ties the aspects together - a type system -to which all the other modules conform, so that e.g. NP means -the same thing in those modules that use NPs and those that -constructs them. -

- -

Library API modules

-

-For the user of the library, these modules are the most important ones. -In a typical application, it is enough to open Paradigms and Syntax. -The module Try combines these two, making it possible to experiment -with combinations of syntactic and lexical constructors by using the -cc command in the GF shell. Here are short explanations of each API module: -

-
    -
  • Try: the whole resource library for a language (Paradigms, Syntax, - Irreg, and Extra); - produced mechanically as a collection of modules -
  • Syntax: language-independent categories, syntax functions, and structural words; - produced mechanically as a collection of modules -
  • Constructors: language-independent syntax functions and structural words; - produced mechanically via functor instantiation -
  • Paradigms: language-dependent morphological paradigms -
- - -

Phrase category modules

-

-The immediate parents of Grammar will be called phrase category modules, -since each of them concentrates on a particular phrase category (nouns, verbs, -adjectives, sentences,...). A phrase category module tells -how to construct phrases in that category. You will find out that -all functions in any of these modules have the same value type (or maybe -one of a small number of different types). Thus we have -

-
    -
  • Noun: construction of nouns and noun phrases -
  • Adjective: construction of adjectival phrases -
  • Verb: construction of verb phrases -
  • Adverb: construction of adverbial phrases -
  • Numeral: construction of cardinal and ordinal numerals -
  • Sentence: construction of sentences and imperatives -
  • Question: construction of questions -
  • Relative: construction of relative clauses -
  • Conjunction: coordination of phrases -
  • Phrase: construction of the major units of text and speech -
  • Text: construction of texts as sequences of phrases -
  • Idiom: idiomatic expressions such as existentials -
- - -

Infrastructure modules

-

-Expressions of each phrase category are constructed in the corresponding -phrase category module. But their use takes mostly place in other modules. -For instance, noun phrases, which are constructed in Noun, are -used as arguments of functions of almost all other phrase category modules. -How can we build all these modules independently of each other? -

-

-As usual in typeful programming, the only thing you need to know -about an object you use is its type. When writing a linearization rule -for a GF abstract syntax function, the only thing you need to know is -the linearization types of its value and argument categories. To achieve -the division of the resource grammar to several parallel phrase category modules, -what we need is an underlying definition of the linearization types. This -definition is given as the implementation of -

-
    -
  • Cat: syntactic categories of the resource grammar -
- -

-Any resource grammar implementation has first to agree on how to implement -Cat. Luckily enough, even this can be done incrementally: you -can skip the lincat definition of a category and use the default -{s : Str} until you need to change it to something else. In -English, for instance, many categories do have this linearization type. -

- -

Lexical modules

-

-What is lexical and what is syntactic is not as clearcut in GF as in -some other grammar formalisms. Logically, lexical means atom, i.e. a -fun with no arguments. Linguistically, one may add to this -that the lin consists of only one token (or of a table whose values -are single tokens). Even in the restricted lexicon included in the resource -API, the latter rule is sometimes violated in some languages. For instance, -Structural.both7and_DConj is an atom, but its linearization is -two words e.g. both - and. -

-

-Another characterization of lexical is that lexical units can be added -almost ad libitum, and they cannot be defined in terms of already -given rules. The lexical modules of the resource API are thus more like -samples than complete lists. There are two such modules: -

-
    -
  • Structural: structural words (determiners, conjunctions,...) -
  • Lexicon: basic everyday content words (nouns, verbs,...) -
- -

-The module Structural aims for completeness, and is likely to -be extended in future releases of the resource. The module Lexicon -gives a "random" list of words, which enables testing the syntax. -It also provides a check list for morphology, since those words are likely to include -most morphological patterns of the language. -

-

-In the case of Lexicon it may come out clearer than anywhere else -in the API that it is impossible to give exact translation equivalents in -different languages on the level of a resource grammar. This is no problem, -since application grammars can use the resource in different ways for -different languages. -

- -

Language-dependent syntax modules

-

-In addition to the common API, there is room for language-dependent extensions -of the resource. The top level of each languages looks as follows (with German -as example): -

-
-    abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
-
-

-where ExtraGerAbs is a collection of syntactic structures specific to German, -and IrregGerAbs is a dictionary of irregular words of German -(at the moment, just verbs). Each of these language-specific grammars has -the potential to grow into a full-scale grammar of the language. These grammar -can also be used as libraries, but the possibility of using functors is lost. -

-

-To give a better overview of language-specific structures, -modules like ExtraGerAbs -are built from a language-independent module ExtraAbs -by restricted inheritance: -

-
-    abstract ExtraGerAbs = Extra [f,g,...]
-
-

-Thus any category and function in Extra may be shared by a subset of all -languages. One can see this set-up as a matrix, which tells -what Extra structures -are implemented in what languages. For the common API in Grammar, the matrix -is filled with 1's (everything is implemented in every language). -

-

-In a minimal resource grammar implementation, the language-dependent -extensions are just empty modules, but it is good to provide them for -the sake of uniformity. -

- -

The present-tense fragment

-

-Some lines in the resource library are suffixed with the comment -

-
-    --# notpresent
-
-

-which is used by a preprocessor to exclude those lines from -a reduced version of the full resource. This present-tense-only -version is useful for applications in most technical text, since -they reduce the grammar size and compilation time. It can also -be useful to exclude those lines in a first version of resource -implementation. To compile a grammar with present-tense-only, use -

-
-    make Present
-
-

-with resource/Makefile. -

- -

Phases of the work

- -

Putting up a directory

-

-Unless you are writing an instance of a parametrized implementation -(Romance or Scandinavian), which will be covered later, the -simplest way is to follow roughly the following procedure. Assume you -are building a grammar for the German language. Here are the first steps, -which we actually followed ourselves when building the German implementation -of resource v. 1.0 at Ubuntu linux. We have slightly modified them to -match resource v. 1.5 and GF v. 3.0. -

-
    -
  1. Create a sister directory for GF/lib/resource/english, named - german. -
    -         cd GF/lib/resource/
    -         mkdir german
    -         cd german
    -
    -

    -
  2. Check out the [ISO 639 3-letter language code - http://www.w3.org/WAI/ER/IG/ert/iso639.htm] - for German: both Ger and Deu are given, and we pick Ger. - (We use the 3-letter codes rather than the more common 2-letter codes, - since they will suffice for many more languages!) -

    -
  3. Copy the *Eng.gf files from english german, - and rename them: -
    -         cp ../english/*Eng.gf .
    -         rename 's/Eng/Ger/' *Eng.gf
    -
    - If you don't have the rename command, you can use a bash script with mv. -
- -
    -
  1. Change the Eng module references to Ger references - in all files: -
    -         sed -i 's/English/German/g' *Ger.gf
    -         sed -i 's/Eng/Ger/g' *Ger.gf
    -
    - The first line prevents changing the word English, which appears - here and there in comments, to Gerlish. The sed command syntax - may vary depending on your operating system. -

    -
  2. This may of course change unwanted occurrences of the - string Eng - verify this by -
    -         grep Ger *.gf
    -
    - But you will have to make lots of manual changes in all files anyway! -

    -
  3. Comment out the contents of these files: -
    -         sed -i 's/^/--/' *Ger.gf
    -
    - This will give you a set of templates out of which the grammar - will grow as you uncomment and modify the files rule by rule. -

    -
  4. In all .gf files, uncomment the module headers and brackets, - leaving the module bodies commented. Unfortunately, there is no - simple way to do this automatically (or to avoid commenting these - lines in the previous step) - but uncommenting the first - and the last lines will actually do the job for many of the files. -

    -
  5. Uncomment the contents of the main grammar file: -
    -         sed -i 's/^--//' LangGer.gf
    -
    -

    -
  6. Now you can open the grammar LangGer in GF: -
    -         gf LangGer.gf
    -
    - You will get lots of warnings on missing rules, but the grammar will compile. -

    -
  7. At all the following steps you will now have a valid, but incomplete - GF grammar. The GF command -
    -         pg -missing
    -
    - tells you what exactly is missing. -
- -

-Here is the module structure of LangGer. It has been simplified by leaving out -the majority of the phrase category modules. Each of them has the same dependencies -as VerbGer, whose complete dependencies are shown as an example. -

-

- -

- -

Direction of work

-

-The real work starts now. There are many ways to proceed, the most obvious ones being -

-
    -
  • Top-down: start from the module Phrase and go down to Sentence, then - Verb, Noun, and in the end Lexicon. In this way, you are all the time - building complete phrases, and add them with more content as you proceed. - This approach is not recommended. It is impossible to test the rules if - you have no words to apply the constructions to. -

    -
  • Bottom-up: set as your first goal to implement Lexicon. To this end, you - need to write ParadigmsGer, which in turn needs parts of - MorphoGer and ResGer. - This approach is not recommended. You can get stuck to details of - morphology such as irregular words, and you don't have enough grasp about - the type system to decide what forms to cover in morphology. -
- -

-The practical working direction is thus a saw-like motion between the morphological -and top-level modules. Here is a possible course of the work that gives enough -test data and enough general view at any point: -

-
    -
  1. Define Cat.N and the required parameter types in ResGer. As we define -
    -    lincat N  = {s : Number => Case => Str ; g : Gender} ;
    -
    -we need the parameter types Number, Case, and Gender. The definition -of Number in common/ParamX -works for German, so we -use it and just define Case and Gender in ResGer. -

    -
  2. Define some cases of mkN in ParadigmsGer. In this way you can -already implement a huge amount of nouns correctly in LexiconGer. Actually -just adding the worst-case instance of mkN (the one taking the most -arguments) should suffice for every noun - but, -since it is tedious to use, you -might proceed to the next step before returning to morphology and defining the -real work horse, mkN taking two forms and a gender. -

    -
  3. While doing this, you may want to test the resource independently. Do this by - starting the GF shell in the resource directory, by the commands -
    -    > i -retain german/ParadigmsGer
    -    > cc -table mkN "Kirche"
    -
    -

    -
  4. Proceed to determiners and pronouns in -NounGer (DetCN UsePron DetQuant NumSg DefArt IndefArt UseN) and -StructuralGer (i_Pron this_Quant). You also need some categories and -parameter types. At this point, it is maybe not possible to find out the final -linearization types of CN, NP, Det, and Quant, but at least you should -be able to correctly inflect noun phrases such as every airplane: -
    -    > i german/LangGer.gf
    -    > l -table DetCN every_Det (UseN airplane_N)
    -  
    -    Nom: jeder Flugzeug
    -    Acc: jeden Flugzeug
    -    Dat: jedem Flugzeug
    -    Gen: jedes Flugzeugs
    -
    -

    -
  5. Proceed to verbs: define CatGer.V, ResGer.VForm, and -ParadigmsGer.mkV. You may choose to exclude notpresent -cases at this point. But anyway, you will be able to inflect a good -number of verbs in Lexicon, such as -live_V (mkV "leben"). -

    -
  6. Now you can soon form your first sentences: define VP and -Cl in CatGer, VerbGer.UseV, and SentenceGer.PredVP. -Even if you have excluded the tenses, you will be able to produce -
    -    > i -preproc=./mkPresent german/LangGer.gf
    -    > l -table PredVP (UsePron i_Pron) (UseV live_V)
    -  
    -    Pres Simul Pos Main: ich lebe
    -    Pres Simul Pos Inv:  lebe ich
    -    Pres Simul Pos Sub:  ich lebe
    -    Pres Simul Neg Main: ich lebe nicht
    -    Pres Simul Neg Inv:  lebe ich nicht
    -    Pres Simul Neg Sub:  ich nicht lebe
    -
    -You should also be able to parse: -
    -    > p -cat=Cl "ich lebe"
    -    PredVP (UsePron i_Pron) (UseV live_V)
    -
    -

    -
  7. Transitive verbs -(CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a) -are a natural next step, so that you can -produce ich liebe dich ("I love you"). -

    -
  8. Adjectives (CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA) -will force you to think about strong and weak declensions, so that you can -correctly inflect mein neuer Wagen, dieser neue Wagen -("my new car, this new car"). -

    -
  9. Once you have implemented the set -(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP), -you have overcome most of difficulties. You know roughly what parameters -and dependences there are in your language, and you can now proceed very -much in the order you please. -
- - -

The develop-test cycle

-

-The following develop-test cycle will -be applied most of the time, both in the first steps described above -and in later steps where you are more on your own. -

-
    -
  1. Select a phrase category module, e.g. NounGer, and uncomment some - linearization rules (for instance, DetCN, as above). -

    -
  2. Write down some German examples of this rule, for instance translations - of "the dog", "the house", "the big house", etc. Write these in all their - different forms (two numbers and four cases). -

    -
  3. Think about the categories involved (CN, NP, N, Det) and the - variations they have. Encode this in the lincats of CatGer. - You may have to define some new parameter types in ResGer. -

    -
  4. To be able to test the construction, - define some words you need to instantiate it - in LexiconGer. You will also need some regular inflection patterns - inParadigmsGer. -

    -
  5. Test by parsing, linearization, - and random generation. In particular, linearization to a table should - be used so that you see all forms produced; the treebank option - preserves the tree -
    -      > gr -cat=NP -number=20 | l -table -treebank
    -
    -

    -
  6. Save some tree-linearization pairs for later regression testing. You can save - a gold standard treebank and use the Unix diff command to compare later - linearizations produced from the same list of trees. If you save the trees - in a file trees, you can do as follows: -
    -      > rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
    -
    -

    -
  7. A file with trees testing all resource functions is included in the resource, - entitled resource/exx-resource.gft. A treebank can be created from this by - the Unix command -
    -    % runghc Make.hs test langs=Ger
    -
    -
- -

-You are likely to run this cycle a few times for each linearization rule -you implement, and some hundreds of times altogether. There are roughly -70 cats and -600 funs in Lang at the moment; 170 of the funs are outside the two -lexicon modules). -

- -

Auxiliary modules

-

-These auxuliary resource modules will be written by you. -

-
    -
  • ResGer: parameter types and auxiliary operations -(a resource for the resource grammar!) -
  • ParadigmsGer: complete inflection engine and most important regular paradigms -
  • MorphoGer: auxiliaries for ParadigmsGer and StructuralGer. This need -not be separate from ResGer. -
- -

-These modules are language-independent and provided by the existing resource -package. -

-
    -
  • ParamX: parameter types used in many languages -
  • CommonX: implementation of language-uniform categories - such as $Text$ and $Phr$, as well as of - the logical tense, anteriority, and polarity parameters -
  • Coordination: operations to deal with lists and coordination -
  • Prelude: general-purpose operations on strings, records, - truth values, etc. -
  • Predef: general-purpose operations with hard-coded definitions -
- -

-An important decision is what rules to implement in terms of operations in -ResGer. The golden rule of functional programming says: -

-
    -
  • Whenever you find yourself programming by copy and paste, write a function instead!. -
- -

-This rule suggests that an operation should be created if it is to be -used at least twice. At the same time, a sound principle of vicinity says: -

-
    -
  • It should not require too much browsing to understand what a piece of code does. -
- -

-From these two principles, we have derived the following practice: -

-
    -
  • If an operation is needed in two different modules, - it should be created in as an oper in ResGer. An example is mkClause, - used in Sentence, Question, and Relative- -
  • If an operation is needed twice in the same module, but never - outside, it should be created in the same module. Many examples are - found in Numerals. -
  • If an operation is needed twice in the same judgement, but never - outside, it should be created by a let definition. -
  • If an operation is only needed once, it should not be created as an oper, - but rather inlined. However, a let definition may well be in place just - to make the readable. - Most functions in phrase category modules - are implemented in this way. -
- -

-This discipline is very different from the one followed in early -versions of the library (up to 0.9). We then valued the principle of -abstraction more than vicinity, creating layers of abstraction for -almost everything. This led in practice to the duplication of almost -all code on the lin and oper levels, and made the code -hard to understand and maintain. -

- -

Morphology and lexicon

-

-The paradigms needed to implement -LexiconGer are defined in -ParadigmsGer. -This module provides high-level ways to define the linearization of -lexical items, of categories N, A, V and their complement-taking -variants. -

-

-For ease of use, the Paradigms modules follow a certain -naming convention. Thus they for each lexical category, such as N, -the overloaded functions, such as mkN, with the following cases: -

-
    -
  • the worst-case construction of N. Its type signature - has the form -
    -         mkN : Str -> ... -> Str -> P -> ... -> Q -> N
    -
    - with as many string and parameter arguments as can ever be needed to - construct an N. -
  • the most regular cases, with just one string argument: -
    -         mkN : Str -> N
    -
    -
  • A language-dependent (small) set of functions to handle mild irregularities - and common exceptions. -
- -

-For the complement-taking variants, such as V2, we provide -

-
    -
  • a case that takes a V and all necessary arguments, such - as case and preposition: -
    -         mkV2 : V -> Case -> Str -> V2 ;
    -
    -
  • a case that takes a Str and produces a transitive verb with the direct - object case: -
    -         mkV2 : Str -> V2 ;
    -
    -
  • A language-dependent (small) set of functions to handle common special cases, - such as transitive verbs that are not regular: -
    -         mkV2 : V -> V2 ;
    -
    -
- -

-The golden rule for the design of paradigms is that -

-
    -
  • The user of the library will only need function applications with constants and strings, never any records or tables. -
- -

-The discipline of data abstraction moreover requires that the user of the resource -is not given access to parameter constructors, but only to constants that denote -them. This gives the resource grammarian the freedom to change the underlying -data representation if needed. It means that the ParadigmsGer module has -to define constants for those parameter types and constructors that -the application grammarian may need to use, e.g. -

-
-    oper 
-      Case : Type ;
-      nominative, accusative, genitive, dative : Case ;
-
-

-These constants are defined in terms of parameter types and constructors -in ResGer and MorphoGer, which modules are not -visible to the application grammarian. -

- -

Lock fields

-

-An important difference between MorphoGer and -ParadigmsGer is that the former uses "raw" record types -for word classes, whereas the latter used category symbols defined in -CatGer. When these category symbols are used to denote -record types in a resource modules, such as ParadigmsGer, -a lock field is added to the record, so that categories -with the same implementation are not confused with each other. -(This is inspired by the newtype discipline in Haskell.) -For instance, the lincats of adverbs and conjunctions are the same -in CommonX (and therefore in CatGer, which inherits it): -

-
-    lincat Adv  = {s : Str} ;
-    lincat Conj = {s : Str} ;
-
-

-But when these category symbols are used to denote their linearization -types in resource module, these definitions are translated to -

-
-    oper Adv  : Type = {s : Str  ; lock_Adv  : {}} ;
-    oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
-
-

-In this way, the user of a resource grammar cannot confuse adverbs with -conjunctions. In other words, the lock fields force the type checker -to function as grammaticality checker. -

-

-When the resource grammar is opened in an application grammar, the -lock fields are never seen (except possibly in type error messages), -and the application grammarian should never write them herself. If she -has to do this, it is a sign that the resource grammar is incomplete, and -the proper way to proceed is to fix the resource grammar. -

-

-The resource grammarian has to provide the dummy lock field values -in her hidden definitions of constants in Paradigms. For instance, -

-
-    mkAdv : Str -> Adv ;
-    -- mkAdv s = {s = s ; lock_Adv = <>} ;
-
-

- -

Lexicon construction

-

-The lexicon belonging to LangGer consists of two modules: -

-
    -
  • StructuralGer, structural words, built by using both - ParadigmsGer and MorphoGer. -
  • LexiconGer, content words, built by using ParadigmsGer only. -
- -

-The reason why MorphoGer has to be used in StructuralGer -is that ParadigmsGer does not contain constructors for closed -word classes such as pronouns and determiners. The reason why we -recommend ParadigmsGer for building LexiconGer is that -the coverage of the paradigms gets thereby tested and that the -use of the paradigms in LexiconGer gives a good set of examples for -those who want to build new lexica. -

- -

Lexicon extension

- -

The irregularity lexicon

-

-It is useful in most languages to provide a separate module of irregular -verbs and other words which are difficult for a lexicographer -to handle. There are usually a limited number of such words - a -few hundred perhaps. Building such a lexicon separately also -makes it less important to cover everything by the -worst-case variants of the paradigms mkV etc. -

- -

Lexicon extraction from a word list

-

-You can often find resources such as lists of -irregular verbs on the internet. For instance, the -Irregular German Verb page -previously found in -http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html -page gives a list of verbs in the -traditional tabular format, which begins as follows: -

-
-    backen (du bäckst, er bäckt)                   backte [buk]              gebacken
-    befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
-    beginnen                                       begann (begönne; begänne) begonnen
-    beißen                                         biß                       gebissen
-
-

-All you have to do is to write a suitable verb paradigm -

-
-    irregV : (x1,_,_,_,_,x6 : Str) -> V ;
-
-

-and a Perl or Python or Haskell script that transforms -the table to -

-
-    backen_V   = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
-    befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
-
-

-

-When using ready-made word lists, you should think about -coyright issues. All resource grammar material should -be provided under GNU Lesser General Public License (LGPL). -

- -

Lexicon extraction from raw text data

-

-This is a cheap technique to build a lexicon of thousands -of words, if text data is available in digital format. -See the Extract Homepage -homepage for details. -

- -

Bootstrapping with smart paradigms

-

-This is another cheap technique, where you need as input a list of words with -part-of-speech marking. You initialize the lexicon by using the one-argument -mkN etc paradigms, and add forms to those words that do not come out right. -This procedure is described in the paper -

-

-A. Ranta. -How predictable is Finnish morphology? An experiment on lexicon construction. -In J. Nivre, M. Dahllöf and B. Megyesi (eds), -Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein, -University of Uppsala, -2008. -Available from the series homepage -

- -

Extending the resource grammar API

-

-Sooner or later it will happen that the resource grammar API -does not suffice for all applications. A common reason is -that it does not include idiomatic expressions in a given language. -The solution then is in the first place to build language-specific -extension modules, like ExtraGer. -

- -

Using parametrized modules

- -

Writing an instance of parametrized resource grammar implementation

-

-Above we have looked at how a resource implementation is built by -the copy and paste method (from English to German), that is, formally -speaking, from scratch. A more elegant solution available for -families of languages such as Romance and Scandinavian is to -use parametrized modules. The advantages are -

-
    -
  • theoretical: linguistic generalizations and insights -
  • practical: maintainability improves with fewer components -
- -

-Here is a set of -slides -on the topic. -

- -

Parametrizing a resource grammar implementation

-

-This is the most demanding form of resource grammar writing. -We do not recommend the method of parametrizing from the -beginning: it is easier to have one language first implemented -in the conventional way and then add another language of the -same family by aprametrization. This means that the copy and -paste method is still used, but at this time the differences -are put into an interface module. -

- -

Character encoding and transliterations

-

-This section is relevant for languages using a non-ASCII character set. -

- -

Coding conventions in GF

-

-From version 3.0, GF follows a simple encoding convention: -

-
    -
  • GF source files may follow any encoding, such as isolatin-1 or UTF-8; - the default is isolatin-1, and UTF8 must be indicated by the judgement -
    -      flags coding = utf8 ;
    -
    - in each source module. -
  • for internal processing, all characters are converted to 16-bit unicode, - as the first step of grammar compilation guided by the coding flag -
  • as the last step of compilation, all characters are converted to UTF-8 -
  • thus, GF object files (gfo) and the Portable Grammar Format (pgf) - are in UTF-8 -
- -

-Most current resource grammars use isolatin-1 in the source, but this does -not affect their use in parallel with grammars written in other encodings. -In fact, a grammar can be put up from modules using different codings. -

-

-Warning. While string literals may contain any characters, identifiers -must be isolatin-1 letters (or digits, underscores, or dashes). This has to -do with the restrictions of the lexer tool that is used. -

- -

Transliterations

-

-While UTF-8 is well supported by most web browsers, its use in terminals and -text editors may cause disappointment. Many grammarians therefore prefer to -use ASCII transliterations. GF 3.0beta2 provides the following built-in -transliterations: -

-
    -
  • Arabic -
  • Devanagari (Hindi) -
  • Thai -
- -

-New transliterations can be defined in the GF source file -GF/Text/Transliterations.hs. -This file also gives instructions on how new ones are added. -

- - - - diff --git a/doc/Resource-HOWTO.txt b/doc/Resource-HOWTO.txt deleted file mode 100644 index 8e50974a7..000000000 --- a/doc/Resource-HOWTO.txt +++ /dev/null @@ -1,827 +0,0 @@ -Resource grammar writing HOWTO -Author: Aarne Ranta -Last update: %%date(%c) - -% NOTE: this is a txt2tags file. -% Create an html file from this file using: -% txt2tags --toc -thtml Resource-HOWTO.txt - -%!target:html - -**History** - -September 2008: updated for Version 1.5. - -October 2007: updated for Version 1.2. - -January 2006: first version. - - -The purpose of this document is to tell how to implement the GF -resource grammar API for a new language. We will //not// cover how -to use the resource grammar, nor how to change the API. But we -will give some hints how to extend the API. - -A manual for using the resource grammar is found in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html`` ../lib/resource/doc/synopsis.html]. - -A tutorial on GF, also introducing the idea of resource grammars, is found in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html`` ./gf-tutorial.html]. - -This document concerns the API v. 1.5, while the current stable release is 1.4. -You can find the code for the stable release in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/`` ../lib/resource] - -and the next release in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/`` ../next-lib/src] - -It is recommended to build new grammars to match the next release. - - - - -==The resource grammar structure== - -The library is divided into a bunch of modules, whose dependencies -are given in the following figure. - -[Syntax.png] - -Modules of different kinds are distinguished as follows: -- solid contours: module seen by end users -- dashed contours: internal module -- ellipse: abstract/concrete pair of modules -- rectangle: resource or instance -- diamond: interface - - -Put in another way: -- solid rectangles and diamonds: user-accessible library API -- solid ellipses: user-accessible top-level grammar for parsing and linearization -- dashed contours: not visible to users - - -The dashed ellipses form the main parts of the implementation, on which the resource -grammar programmer has to work with. She also has to work on the ``Paradigms`` -module. The rest of the modules can be produced mechanically from corresponding -modules for other languages, by just changing the language codes appearing in -their module headers. - -The module structure is rather flat: most modules are direct -parents of ``Grammar``. The idea -is that the implementors can concentrate on one linguistic aspect at a time, or -also distribute the work among several authors. The module ``Cat`` -defines the "glue" that ties the aspects together - a type system -to which all the other modules conform, so that e.g. ``NP`` means -the same thing in those modules that use ``NP``s and those that -constructs them. - - -===Library API modules=== - -For the user of the library, these modules are the most important ones. -In a typical application, it is enough to open ``Paradigms`` and ``Syntax``. -The module ``Try`` combines these two, making it possible to experiment -with combinations of syntactic and lexical constructors by using the -``cc`` command in the GF shell. Here are short explanations of each API module: -- ``Try``: the whole resource library for a language (``Paradigms``, ``Syntax``, - ``Irreg``, and ``Extra``); - produced mechanically as a collection of modules -- ``Syntax``: language-independent categories, syntax functions, and structural words; - produced mechanically as a collection of modules -- ``Constructors``: language-independent syntax functions and structural words; - produced mechanically via functor instantiation -- ``Paradigms``: language-dependent morphological paradigms - - - - - -===Phrase category modules=== - -The immediate parents of ``Grammar`` will be called **phrase category modules**, -since each of them concentrates on a particular phrase category (nouns, verbs, -adjectives, sentences,...). A phrase category module tells -//how to construct phrases in that category//. You will find out that -all functions in any of these modules have the same value type (or maybe -one of a small number of different types). Thus we have -- ``Noun``: construction of nouns and noun phrases -- ``Adjective``: construction of adjectival phrases -- ``Verb``: construction of verb phrases -- ``Adverb``: construction of adverbial phrases -- ``Numeral``: construction of cardinal and ordinal numerals -- ``Sentence``: construction of sentences and imperatives -- ``Question``: construction of questions -- ``Relative``: construction of relative clauses -- ``Conjunction``: coordination of phrases -- ``Phrase``: construction of the major units of text and speech -- ``Text``: construction of texts as sequences of phrases -- ``Idiom``: idiomatic expressions such as existentials - - - - -===Infrastructure modules=== - -Expressions of each phrase category are constructed in the corresponding -phrase category module. But their //use// takes mostly place in other modules. -For instance, noun phrases, which are constructed in ``Noun``, are -used as arguments of functions of almost all other phrase category modules. -How can we build all these modules independently of each other? - -As usual in typeful programming, the //only// thing you need to know -about an object you use is its type. When writing a linearization rule -for a GF abstract syntax function, the only thing you need to know is -the linearization types of its value and argument categories. To achieve -the division of the resource grammar to several parallel phrase category modules, -what we need is an underlying definition of the linearization types. This -definition is given as the implementation of -- ``Cat``: syntactic categories of the resource grammar - - -Any resource grammar implementation has first to agree on how to implement -``Cat``. Luckily enough, even this can be done incrementally: you -can skip the ``lincat`` definition of a category and use the default -``{s : Str}`` until you need to change it to something else. In -English, for instance, many categories do have this linearization type. - - - -===Lexical modules=== - -What is lexical and what is syntactic is not as clearcut in GF as in -some other grammar formalisms. Logically, lexical means atom, i.e. a -``fun`` with no arguments. Linguistically, one may add to this -that the ``lin`` consists of only one token (or of a table whose values -are single tokens). Even in the restricted lexicon included in the resource -API, the latter rule is sometimes violated in some languages. For instance, -``Structural.both7and_DConj`` is an atom, but its linearization is -two words e.g. //both - and//. - -Another characterization of lexical is that lexical units can be added -almost //ad libitum//, and they cannot be defined in terms of already -given rules. The lexical modules of the resource API are thus more like -samples than complete lists. There are two such modules: -- ``Structural``: structural words (determiners, conjunctions,...) -- ``Lexicon``: basic everyday content words (nouns, verbs,...) - - -The module ``Structural`` aims for completeness, and is likely to -be extended in future releases of the resource. The module ``Lexicon`` -gives a "random" list of words, which enables testing the syntax. -It also provides a check list for morphology, since those words are likely to include -most morphological patterns of the language. - -In the case of ``Lexicon`` it may come out clearer than anywhere else -in the API that it is impossible to give exact translation equivalents in -different languages on the level of a resource grammar. This is no problem, -since application grammars can use the resource in different ways for -different languages. - - -==Language-dependent syntax modules== - -In addition to the common API, there is room for language-dependent extensions -of the resource. The top level of each languages looks as follows (with German -as example): -``` - abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs -``` -where ``ExtraGerAbs`` is a collection of syntactic structures specific to German, -and ``IrregGerAbs`` is a dictionary of irregular words of German -(at the moment, just verbs). Each of these language-specific grammars has -the potential to grow into a full-scale grammar of the language. These grammar -can also be used as libraries, but the possibility of using functors is lost. - -To give a better overview of language-specific structures, -modules like ``ExtraGerAbs`` -are built from a language-independent module ``ExtraAbs`` -by restricted inheritance: -``` - abstract ExtraGerAbs = Extra [f,g,...] -``` -Thus any category and function in ``Extra`` may be shared by a subset of all -languages. One can see this set-up as a matrix, which tells -what ``Extra`` structures -are implemented in what languages. For the common API in ``Grammar``, the matrix -is filled with 1's (everything is implemented in every language). - -In a minimal resource grammar implementation, the language-dependent -extensions are just empty modules, but it is good to provide them for -the sake of uniformity. - - - -===The present-tense fragment=== - -Some lines in the resource library are suffixed with the comment -``` - --# notpresent -``` -which is used by a preprocessor to exclude those lines from -a reduced version of the full resource. This present-tense-only -version is useful for applications in most technical text, since -they reduce the grammar size and compilation time. It can also -be useful to exclude those lines in a first version of resource -implementation. To compile a grammar with present-tense-only, use -``` - make Present -``` -with ``resource/Makefile``. - - - -==Phases of the work== - -===Putting up a directory=== - -Unless you are writing an instance of a parametrized implementation -(Romance or Scandinavian), which will be covered later, the -simplest way is to follow roughly the following procedure. Assume you -are building a grammar for the German language. Here are the first steps, -which we actually followed ourselves when building the German implementation -of resource v. 1.0 at Ubuntu linux. We have slightly modified them to -match resource v. 1.5 and GF v. 3.0. - -+ Create a sister directory for ``GF/lib/resource/english``, named - ``german``. -``` - cd GF/lib/resource/ - mkdir german - cd german -``` - -+ Check out the [ISO 639 3-letter language code - http://www.w3.org/WAI/ER/IG/ert/iso639.htm] - for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``. - (We use the 3-letter codes rather than the more common 2-letter codes, - since they will suffice for many more languages!) - -+ Copy the ``*Eng.gf`` files from ``english`` ``german``, - and rename them: -``` - cp ../english/*Eng.gf . - rename 's/Eng/Ger/' *Eng.gf -``` - If you don't have the ``rename`` command, you can use a bash script with ``mv``. - - -+ Change the ``Eng`` module references to ``Ger`` references - in all files: -``` - sed -i 's/English/German/g' *Ger.gf - sed -i 's/Eng/Ger/g' *Ger.gf -``` - The first line prevents changing the word ``English``, which appears - here and there in comments, to ``Gerlish``. The ``sed`` command syntax - may vary depending on your operating system. - -+ This may of course change unwanted occurrences of the - string ``Eng`` - verify this by -``` - grep Ger *.gf -``` - But you will have to make lots of manual changes in all files anyway! - -+ Comment out the contents of these files: -``` - sed -i 's/^/--/' *Ger.gf -``` - This will give you a set of templates out of which the grammar - will grow as you uncomment and modify the files rule by rule. - -+ In all ``.gf`` files, uncomment the module headers and brackets, - leaving the module bodies commented. Unfortunately, there is no - simple way to do this automatically (or to avoid commenting these - lines in the previous step) - but uncommenting the first - and the last lines will actually do the job for many of the files. - -+ Uncomment the contents of the main grammar file: -``` - sed -i 's/^--//' LangGer.gf -``` - -+ Now you can open the grammar ``LangGer`` in GF: -``` - gf LangGer.gf -``` - You will get lots of warnings on missing rules, but the grammar will compile. - -+ At all the following steps you will now have a valid, but incomplete - GF grammar. The GF command -``` - pg -missing -``` - tells you what exactly is missing. - - -Here is the module structure of ``LangGer``. It has been simplified by leaving out -the majority of the phrase category modules. Each of them has the same dependencies -as ``VerbGer``, whose complete dependencies are shown as an example. - -[German.png] - - -===Direction of work=== - -The real work starts now. There are many ways to proceed, the most obvious ones being -- Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then - ``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time - building complete phrases, and add them with more content as you proceed. - **This approach is not recommended**. It is impossible to test the rules if - you have no words to apply the constructions to. - -- Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you - need to write ``ParadigmsGer``, which in turn needs parts of - ``MorphoGer`` and ``ResGer``. - **This approach is not recommended**. You can get stuck to details of - morphology such as irregular words, and you don't have enough grasp about - the type system to decide what forms to cover in morphology. - - -The practical working direction is thus a saw-like motion between the morphological -and top-level modules. Here is a possible course of the work that gives enough -test data and enough general view at any point: -+ Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define -``` - lincat N = {s : Number => Case => Str ; g : Gender} ; -``` -we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition -of ``Number`` in [``common/ParamX`` ../lib/resource/common/ParamX.gf] -works for German, so we -use it and just define ``Case`` and ``Gender`` in ``ResGer``. - -+ Define some cases of ``mkN`` in ``ParadigmsGer``. In this way you can -already implement a huge amount of nouns correctly in ``LexiconGer``. Actually -just adding the worst-case instance of ``mkN`` (the one taking the most -arguments) should suffice for every noun - but, -since it is tedious to use, you -might proceed to the next step before returning to morphology and defining the -real work horse, ``mkN`` taking two forms and a gender. - -+ While doing this, you may want to test the resource independently. Do this by - starting the GF shell in the ``resource`` directory, by the commands -``` - > i -retain german/ParadigmsGer - > cc -table mkN "Kirche" -``` - -+ Proceed to determiners and pronouns in -``NounGer`` (``DetCN UsePron DetQuant NumSg DefArt IndefArt UseN``) and -``StructuralGer`` (``i_Pron this_Quant``). You also need some categories and -parameter types. At this point, it is maybe not possible to find out the final -linearization types of ``CN``, ``NP``, ``Det``, and ``Quant``, but at least you should -be able to correctly inflect noun phrases such as //every airplane//: -``` - > i german/LangGer.gf - > l -table DetCN every_Det (UseN airplane_N) - - Nom: jeder Flugzeug - Acc: jeden Flugzeug - Dat: jedem Flugzeug - Gen: jedes Flugzeugs -``` - -+ Proceed to verbs: define ``CatGer.V``, ``ResGer.VForm``, and -``ParadigmsGer.mkV``. You may choose to exclude ``notpresent`` -cases at this point. But anyway, you will be able to inflect a good -number of verbs in ``Lexicon``, such as -``live_V`` (``mkV "leben"``). - -+ Now you can soon form your first sentences: define ``VP`` and -``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``. -Even if you have excluded the tenses, you will be able to produce -``` - > i -preproc=./mkPresent german/LangGer.gf - > l -table PredVP (UsePron i_Pron) (UseV live_V) - - Pres Simul Pos Main: ich lebe - Pres Simul Pos Inv: lebe ich - Pres Simul Pos Sub: ich lebe - Pres Simul Neg Main: ich lebe nicht - Pres Simul Neg Inv: lebe ich nicht - Pres Simul Neg Sub: ich nicht lebe -``` -You should also be able to parse: -``` - > p -cat=Cl "ich lebe" - PredVP (UsePron i_Pron) (UseV live_V) -``` - -+ Transitive verbs -(``CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a``) -are a natural next step, so that you can -produce ``ich liebe dich`` ("I love you"). - -+ Adjectives (``CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA``) -will force you to think about strong and weak declensions, so that you can -correctly inflect //mein neuer Wagen, dieser neue Wagen// -("my new car, this new car"). - -+ Once you have implemented the set -(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP), -you have overcome most of difficulties. You know roughly what parameters -and dependences there are in your language, and you can now proceed very -much in the order you please. - - - -===The develop-test cycle=== - -The following develop-test cycle will -be applied most of the time, both in the first steps described above -and in later steps where you are more on your own. - -+ Select a phrase category module, e.g. ``NounGer``, and uncomment some - linearization rules (for instance, ``DetCN``, as above). - -+ Write down some German examples of this rule, for instance translations - of "the dog", "the house", "the big house", etc. Write these in all their - different forms (two numbers and four cases). - -+ Think about the categories involved (``CN, NP, N, Det``) and the - variations they have. Encode this in the lincats of ``CatGer``. - You may have to define some new parameter types in ``ResGer``. - -+ To be able to test the construction, - define some words you need to instantiate it - in ``LexiconGer``. You will also need some regular inflection patterns - in``ParadigmsGer``. - -+ Test by parsing, linearization, - and random generation. In particular, linearization to a table should - be used so that you see all forms produced; the ``treebank`` option - preserves the tree -``` - > gr -cat=NP -number=20 | l -table -treebank -``` - -+ Save some tree-linearization pairs for later regression testing. You can save - a gold standard treebank and use the Unix ``diff`` command to compare later - linearizations produced from the same list of trees. If you save the trees - in a file ``trees``, you can do as follows: -``` - > rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank -``` - -+ A file with trees testing all resource functions is included in the resource, - entitled ``resource/exx-resource.gft``. A treebank can be created from this by - the Unix command -``` - % runghc Make.hs test langs=Ger -``` - - - -You are likely to run this cycle a few times for each linearization rule -you implement, and some hundreds of times altogether. There are roughly -70 ``cat``s and -600 ``funs`` in ``Lang`` at the moment; 170 of the ``funs`` are outside the two -lexicon modules). - - -===Auxiliary modules=== - -These auxuliary ``resource`` modules will be written by you. - -- ``ResGer``: parameter types and auxiliary operations -(a resource for the resource grammar!) -- ``ParadigmsGer``: complete inflection engine and most important regular paradigms -- ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need -not be separate from ``ResGer``. - - -These modules are language-independent and provided by the existing resource -package. - -- ``ParamX``: parameter types used in many languages -- ``CommonX``: implementation of language-uniform categories - such as $Text$ and $Phr$, as well as of - the logical tense, anteriority, and polarity parameters -- ``Coordination``: operations to deal with lists and coordination -- ``Prelude``: general-purpose operations on strings, records, - truth values, etc. -- ``Predef``: general-purpose operations with hard-coded definitions - - -An important decision is what rules to implement in terms of operations in -``ResGer``. The **golden rule of functional programming** says: -- //Whenever you find yourself programming by copy and paste, write a function instead!//. - - -This rule suggests that an operation should be created if it is to be -used at least twice. At the same time, a sound principle of **vicinity** says: -- //It should not require too much browsing to understand what a piece of code does.// - - -From these two principles, we have derived the following practice: -- If an operation is needed //in two different modules//, - it should be created in as an ``oper`` in ``ResGer``. An example is ``mkClause``, - used in ``Sentence``, ``Question``, and ``Relative``- -- If an operation is needed //twice in the same module//, but never - outside, it should be created in the same module. Many examples are - found in ``Numerals``. -- If an operation is needed //twice in the same judgement//, but never - outside, it should be created by a ``let`` definition. -- If an operation is only needed once, it should not be created as an ``oper``, - but rather inlined. However, a ``let`` definition may well be in place just - to make the readable. - Most functions in phrase category modules - are implemented in this way. - - -This discipline is very different from the one followed in early -versions of the library (up to 0.9). We then valued the principle of -abstraction more than vicinity, creating layers of abstraction for -almost everything. This led in practice to the duplication of almost -all code on the ``lin`` and ``oper`` levels, and made the code -hard to understand and maintain. - - - -===Morphology and lexicon=== - -The paradigms needed to implement -``LexiconGer`` are defined in -``ParadigmsGer``. -This module provides high-level ways to define the linearization of -lexical items, of categories ``N, A, V`` and their complement-taking -variants. - -For ease of use, the ``Paradigms`` modules follow a certain -naming convention. Thus they for each lexical category, such as ``N``, -the overloaded functions, such as ``mkN``, with the following cases: - -- the worst-case construction of ``N``. Its type signature - has the form -``` - mkN : Str -> ... -> Str -> P -> ... -> Q -> N -``` - with as many string and parameter arguments as can ever be needed to - construct an ``N``. -- the most regular cases, with just one string argument: -``` - mkN : Str -> N -``` -- A language-dependent (small) set of functions to handle mild irregularities - and common exceptions. - - -For the complement-taking variants, such as ``V2``, we provide -- a case that takes a ``V`` and all necessary arguments, such - as case and preposition: -``` - mkV2 : V -> Case -> Str -> V2 ; -``` -- a case that takes a ``Str`` and produces a transitive verb with the direct - object case: -``` - mkV2 : Str -> V2 ; -``` -- A language-dependent (small) set of functions to handle common special cases, - such as transitive verbs that are not regular: -``` - mkV2 : V -> V2 ; -``` - - -The golden rule for the design of paradigms is that -- //The user of the library will only need function applications with constants and strings, never any records or tables.// - - -The discipline of data abstraction moreover requires that the user of the resource -is not given access to parameter constructors, but only to constants that denote -them. This gives the resource grammarian the freedom to change the underlying -data representation if needed. It means that the ``ParadigmsGer`` module has -to define constants for those parameter types and constructors that -the application grammarian may need to use, e.g. -``` - oper - Case : Type ; - nominative, accusative, genitive, dative : Case ; -``` -These constants are defined in terms of parameter types and constructors -in ``ResGer`` and ``MorphoGer``, which modules are not -visible to the application grammarian. - - -===Lock fields=== - -An important difference between ``MorphoGer`` and -``ParadigmsGer`` is that the former uses "raw" record types -for word classes, whereas the latter used category symbols defined in -``CatGer``. When these category symbols are used to denote -record types in a resource modules, such as ``ParadigmsGer``, -a **lock field** is added to the record, so that categories -with the same implementation are not confused with each other. -(This is inspired by the ``newtype`` discipline in Haskell.) -For instance, the lincats of adverbs and conjunctions are the same -in ``CommonX`` (and therefore in ``CatGer``, which inherits it): -``` - lincat Adv = {s : Str} ; - lincat Conj = {s : Str} ; -``` -But when these category symbols are used to denote their linearization -types in resource module, these definitions are translated to -``` - oper Adv : Type = {s : Str ; lock_Adv : {}} ; - oper Conj : Type = {s : Str} ; lock_Conj : {}} ; -``` -In this way, the user of a resource grammar cannot confuse adverbs with -conjunctions. In other words, the lock fields force the type checker -to function as grammaticality checker. - -When the resource grammar is ``open``ed in an application grammar, the -lock fields are never seen (except possibly in type error messages), -and the application grammarian should never write them herself. If she -has to do this, it is a sign that the resource grammar is incomplete, and -the proper way to proceed is to fix the resource grammar. - -The resource grammarian has to provide the dummy lock field values -in her hidden definitions of constants in ``Paradigms``. For instance, -``` - mkAdv : Str -> Adv ; - -- mkAdv s = {s = s ; lock_Adv = <>} ; -``` - - -===Lexicon construction=== - -The lexicon belonging to ``LangGer`` consists of two modules: -- ``StructuralGer``, structural words, built by using both - ``ParadigmsGer`` and ``MorphoGer``. -- ``LexiconGer``, content words, built by using ``ParadigmsGer`` only. - - -The reason why ``MorphoGer`` has to be used in ``StructuralGer`` -is that ``ParadigmsGer`` does not contain constructors for closed -word classes such as pronouns and determiners. The reason why we -recommend ``ParadigmsGer`` for building ``LexiconGer`` is that -the coverage of the paradigms gets thereby tested and that the -use of the paradigms in ``LexiconGer`` gives a good set of examples for -those who want to build new lexica. - - - - - -==Lexicon extension== - -===The irregularity lexicon=== - -It is useful in most languages to provide a separate module of irregular -verbs and other words which are difficult for a lexicographer -to handle. There are usually a limited number of such words - a -few hundred perhaps. Building such a lexicon separately also -makes it less important to cover //everything// by the -worst-case variants of the paradigms ``mkV`` etc. - - - -===Lexicon extraction from a word list=== - -You can often find resources such as lists of -irregular verbs on the internet. For instance, the -Irregular German Verb page -previously found in -``http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html`` -page gives a list of verbs in the -traditional tabular format, which begins as follows: -``` - backen (du bäckst, er bäckt) backte [buk] gebacken - befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen - beginnen begann (begönne; begänne) begonnen - beißen biß gebissen -``` -All you have to do is to write a suitable verb paradigm -``` - irregV : (x1,_,_,_,_,x6 : Str) -> V ; -``` -and a Perl or Python or Haskell script that transforms -the table to -``` - backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ; - befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ; -``` - -When using ready-made word lists, you should think about -coyright issues. All resource grammar material should -be provided under GNU Lesser General Public License (LGPL). - - - -===Lexicon extraction from raw text data=== - -This is a cheap technique to build a lexicon of thousands -of words, if text data is available in digital format. -See the [Extract Homepage http://www.cs.chalmers.se/~markus/extract/] -homepage for details. - - -===Bootstrapping with smart paradigms=== - -This is another cheap technique, where you need as input a list of words with -part-of-speech marking. You initialize the lexicon by using the one-argument -``mkN`` etc paradigms, and add forms to those words that do not come out right. -This procedure is described in the paper - -A. Ranta. -How predictable is Finnish morphology? An experiment on lexicon construction. -In J. Nivre, M. Dahllöf and B. Megyesi (eds), -//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//, -University of Uppsala, -2008. -Available from the [series homepage http://publications.uu.se/abstract.xsql?dbid=8933] - - - - -==Extending the resource grammar API== - -Sooner or later it will happen that the resource grammar API -does not suffice for all applications. A common reason is -that it does not include idiomatic expressions in a given language. -The solution then is in the first place to build language-specific -extension modules, like ``ExtraGer``. - -==Using parametrized modules== - -===Writing an instance of parametrized resource grammar implementation=== - -Above we have looked at how a resource implementation is built by -the copy and paste method (from English to German), that is, formally -speaking, from scratch. A more elegant solution available for -families of languages such as Romance and Scandinavian is to -use parametrized modules. The advantages are -- theoretical: linguistic generalizations and insights -- practical: maintainability improves with fewer components - - -Here is a set of -[slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf] -on the topic. - - -===Parametrizing a resource grammar implementation=== - -This is the most demanding form of resource grammar writing. -We do //not// recommend the method of parametrizing from the -beginning: it is easier to have one language first implemented -in the conventional way and then add another language of the -same family by aprametrization. This means that the copy and -paste method is still used, but at this time the differences -are put into an ``interface`` module. - - -==Character encoding and transliterations== - -This section is relevant for languages using a non-ASCII character set. - -==Coding conventions in GF== - -From version 3.0, GF follows a simple encoding convention: -- GF source files may follow any encoding, such as isolatin-1 or UTF-8; - the default is isolatin-1, and UTF8 must be indicated by the judgement -``` - flags coding = utf8 ; -``` - in each source module. -- for internal processing, all characters are converted to 16-bit unicode, - as the first step of grammar compilation guided by the ``coding`` flag -- as the last step of compilation, all characters are converted to UTF-8 -- thus, GF object files (``gfo``) and the Portable Grammar Format (``pgf``) - are in UTF-8 - - -Most current resource grammars use isolatin-1 in the source, but this does -not affect their use in parallel with grammars written in other encodings. -In fact, a grammar can be put up from modules using different codings. - -**Warning**. While string literals may contain any characters, identifiers -must be isolatin-1 letters (or digits, underscores, or dashes). This has to -do with the restrictions of the lexer tool that is used. - - -==Transliterations== - -While UTF-8 is well supported by most web browsers, its use in terminals and -text editors may cause disappointment. Many grammarians therefore prefer to -use ASCII transliterations. GF 3.0beta2 provides the following built-in -transliterations: -- Arabic -- Devanagari (Hindi) -- Thai - - -New transliterations can be defined in the GF source file -[``GF/Text/Transliterations.hs`` ../src/GF/Text/Transliterations.hs]. -This file also gives instructions on how new ones are added. - - - - - diff --git a/doc/Syntax.png b/doc/Syntax.png deleted file mode 100644 index f36c098f6..000000000 Binary files a/doc/Syntax.png and /dev/null differ diff --git a/doc/TODO b/doc/TODO deleted file mode 100644 index c92f4c8fa..000000000 --- a/doc/TODO +++ /dev/null @@ -1,231 +0,0 @@ - -* Some notes on the syntax of this file, making it possible to use todoo-mode.el: - -- Items start with "* " -- Sub-items start with "- " -- It should be noted somewhere in the item, who has reported the item - Suggestion: Add "[who]" at the beginning of the item title - (then one can use "assign item" in todoo-mode) -- Each item should have a priority - Suggestion: Add "URGENT", "IMPORTANT" or "WISH" at the beginning of - the item title -- Sort the items in priority order - (todoo-mode can move an item up or down) - ----------------------------------------------------------------------- - - -* [peb] URGENT: Error messages for syntax errors - - When a syntax error is reported, it should be noted which file it - is. Otherwise it is impossible to know where the error is - (if one uses the -s flag): - - > i -s Domain/MP3/Domain_MP_Semantics.gf - syntax error at line 33 before ve , Proposition , - - There's no problem with other kinds of errors: - - > i -s Domain/MP3/Domain_MP_Semantics.gf - checking module Godis_Semantics - Happened in linearization of userMove : - product expected instead of { - pl : Str - } - - -* [peb] IMPORTANT: Add the -path of a module to daughter modules - - Then the main module does not have to know where all grandchildren are: - - file A.gf: - abstract A = B ** {...} - - file B.gf: - --# -path=./resource - abstract B = Lang ** {...} - - I.e.: the file A.gf should not need to know that B.gf uses the - resource library. - - -* [peb] IMPORTANT: incomplete concrete and interfaces - -- The following works in GF: - - incomplete concrete TestDI of TestA = open (C=TestCI) in { - lincat A = TestCI.A ** {p : Str}; - lin f = TestCI.f ** {p = "f"}; - g = TestCI.g ** {p = "g"}; - } - - > i -src TestDE.gf - -- BUT, if we exchange "TestCI" for "C" we get an error: - - incomplete concrete TestDI of TestA = open (C=TestCI) in { - lincat A = C.A ** {p : Str}; - lin f = C.f ** {p = "f"}; - g = C.g ** {p = "g"}; - } - - > i -src TestDE.gf - compiling TestDE.gf... failed to find C - OCCURRED IN - atomic term C given TestCE TestCI TestCE TestDE - OCCURRED IN - renaming definition of f - OCCURRED IN - renaming module TestDE - -- the other modules: - - abstract TestA = { - cat A; - fun f, g : A; - } - - instance TestBE of TestBI = { - oper hello = "hello"; - bye = "bye"; - } - - interface TestBI = { - oper hello : Str; - bye : Str; - } - - concrete TestCE of TestA = TestCI with (TestBI = TestBE); - - incomplete concrete TestCI of TestA = open TestBI in { - lincat A = {s : Str}; - lin f = {s = hello}; - g = {s = bye}; - } - - concrete TestDE of TestA = TestDI with (TestCI = TestCE); - -* [peb] IMPORTANT: Missing things in the help command - - > h -printer - (the flag -printer=cfgm is missing) - - > h -cat - WARNING: invalid option: cat - - > h -lang - WARNING: invalid option: lang - - > h -language - WARNING: invalid option: language - - > h -parser - WARNING: invalid option: parser - - > h -aslkdjaslkdjss - WARNING: invalid option: aslkdjaslkdjss - Command not found. - (it should note: "option not found") - - > h -optimize - WARNING: invalid option: optimize - - > h -startcat - WARNING: invalid option: startcat - - > h h - h, help: h Command? - (it should also mention "h -option") - - -* [peb] IMPORTANT: Set GF_LIb-PATH within GF - - > sf libpath=~/GF/lib - - -* [peb] IMPORTANT: Set the starting category with "sf" - - > sf startcat=X - - -* [peb] IMPORTANT: import-flags - -- There are some inconsistencies when importing grammars: - - 1. when doing "pg -printer=cfg", one must have used "i -conversion=finite", - since "pg" doesn't care about the flags that are set in the grammar file - - 2. when doing "pm -printer=cfgm", one must have set the flag - "conversion=finite" within the grammar file, since "pm" doesn't - care about the flags to the import command - - (I guess it's me (peb) who should fix this, but I don't know where - the different flags reside...) - -- Also, it must be decided in what cases flags can override other flags: - - a) in the grammar file, e.g. "flags conversion=finite;" - b) on the command line, e.g. "> sf conversion=finite" - c) as argument to a command, e.g. "> i -conversion=finite file.gf" - -- A related issue is to decide the scope of flags: - - Some flags are (or should be) local to the module - (e.g. -coding and -path) - Other flags override daughter flags for daughter modules - (e.g. -startcat and -conversion) - -* [bringert] IMPORTANT: get right startcat flag when printing CFGM - GF.CFGM.PrintCFGrammar.prCanonAsCFGM currently only gets the startcat - flag from the top-level concrete module. This might be easier - to fix if the multi grammar printers had access to more than just - the CanonGrammar. - -* [peb] WISH: generalizing incomplete concrete - - I want to be able to open an incomplete concrete module - inside another incomplete conrete. - Then I can instantiate both incompletes at the same time. - -* [peb] WISH: _tmpi, _tmpo - - The files _tmpi and _tmpo are never removed when quitting GF. - Further suggestion: put them in /tmp or similar. - - peb: nr man anvnder "|" till ett systemanrop, t.ex: - pg | ! sort - s skapas filerna _tmpi och _tmpo. Men de tas aldrig bort. - - peb: nnu bttre: ta bort filerna eftert. - - aarne: Sant: nr GF quittas (om detta inte sker onormalt). - Eller nr kommandot har krt frdigt (om det terminerar). - - peb: Bst(?): skapa filerna i /tmp eller liknande. - - aarne: Ibland fr man skrivrttighetsproblem - och det r - inte kul om man mste ange en tmp-path. Och olika - anvndare och gf-processer mste ha unika filnamn. - Och vet inte hur det funkar p windows... - - aarne: Ett till alternativ skulle vara att anvnda handles - utan ngra tmp-filer alls. Men jag har inte hunnit - ta reda p hur det gr till. - - bjrn: Lite slumpmssiga tankar: - + man kan anvnda System.Directory.getTemporaryDirectory, s slipper man iaf bry sig om olika plattformsproblem. - + sen kan man anvnda System.IO.openTempFile fr att skapa en temporr fil. Den tas dock inte bort nr programmet avslutas, s det fr man fixa sjlv. - + System.Posix.Temp.mkstemp gr nt liknande, men dokumentationen r dlig. - + biblioteket HsShellScript har lite funktioner fr snt hr, se - http://www.volker-wysk.de/hsshellscript/apidoc/HsShellScript.html#16 - - -* [peb] WISH: Hierarchic modules - - Suggestion by peb: - The module A.B.C is located in the file A/B/C.gf - - Main advantage: you no longer need to state "--# -path=..." in - modules - -- How can this be combined with several modules inside one file? diff --git a/doc/categories.png b/doc/categories.png deleted file mode 100644 index afc5873c5..000000000 Binary files a/doc/categories.png and /dev/null differ diff --git a/doc/compiling-gf.txt b/doc/compiling-gf.txt deleted file mode 100644 index 9e438f40f..000000000 --- a/doc/compiling-gf.txt +++ /dev/null @@ -1,750 +0,0 @@ -Compiling GF -Aarne Ranta -Proglog meeting, 1 November 2006 - -% to compile: txt2tags -thtml compiling-gf.txt ; htmls compiling-gf.html - -%!target:html -%!postproc(html): #NEW - -#NEW - -==The compilation task== - -GF is a grammar formalism, i.e. a special purpose programming language -for writing grammars. - -Other grammar formalisms: -- BNF, YACC, Happy (grammars for programming languages); -- PATR, HPSG, LFG (grammars for natural languages). - - -The grammar compiler prepares a GF grammar for two computational tasks: -- linearization: take syntax trees to strings -- parsing: take strings to syntax trees - - -The grammar gives a declarative description of these functionalities, -on a high abstraction level that improves grammar writing -productivity. - -For efficiency, the grammar is compiled to lower-level formats. - -Type checking is another essential compilation phase. Its purpose is -twofold, as usual: -- checking the correctness of the grammar -- type-annotating expressions for code generation - - -#NEW - -==Characteristics of GF language== - -Functional language with types, both built-in and user-defined. -``` - Str : Type - - param Number = Sg | Pl - - param AdjForm = ASg Gender | APl - - Noun : Type = {s : Number => Str ; g : Gender} -``` -Pattern matching. -``` - svart_A = table { - ASg _ => "svart" ; - _ => "svarta" - } -``` -Higher-order functions. - -Dependent types. -``` - flip : (a, b, c : Type) -> (a -> b -> c) -> b -> a -> c = - \_,_,_,f,y,x -> f x y ; -``` - - -#NEW - -==The module system of GF== - -Main division: abstract syntax and concrete syntax -``` - abstract Greeting = { - cat Greet ; - fun Hello : Greet ; - } - - concrete GreetingEng of Greeting = { - lincat Greet = {s : Str} ; - lin Hello = {s = "hello"} ; - } - - concrete GreetingIta of Greeting = { - param Politeness = Familiar | Polite ; - lincat Greet = {s : Politeness => Str} ; - lin Hello = {s = table { - Familiar => "ciao" ; - Polite => "buongiorno" - } ; - } -``` -Other features of the module system: -- extension and opening -- parametrized modules (cf. ML: signatures, structures, functors) - - - - -#NEW - -==GF vs. Haskell== - -Some things that (standard) Haskell hasn't: -- records and record subtyping -- regular expression patterns -- dependent types -- ML-style modules - - -Some things that GF hasn't: -- infinite (recursive) data types -- recursive functions -- classes, polymorphism - - -#NEW - -==GF vs. most linguistic grammar formalisms== - -GF separates abstract syntax from concrete syntax. - -GF has a module system with separate compilation. - -GF is generation-oriented (as opposed to parsing). - -GF has unidirectional matching (as opposed to unification). - -GF has a static type system (as opposed to a type-free universe). - -"I was - and I still am - firmly convinced that a program composed -out of statically type-checked parts is more likely to faithfully -express a well-thought-out design than a program relying on -weakly-typed interfaces or dynamically-checked interfaces." -(B. Stroustrup, 1994, p. 107) - - - -#NEW - -==The computation model: abstract syntax== - -An abstract syntax defines a free algebra of trees (using -dependent types, recursion, higher-order abstract syntax: -GF includes a complete Logical Framework). -``` - cat C (x_1 : A_1)...(x_n : A_n) - a_1 : A_1 - ... - a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1} - ---------------------------------------------------- - (C a_1 ... a_n) : Type - - - fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A - a_1 : A_1 - ... - a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1} - ---------------------------------------------------- - (f a_1 ... a_n) : A{x_1 : A_1,...,x_n : A_n} - - - A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A - ---------------------------- ---------------------- ------------------------ - (x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A} -``` -Notice that all syntax trees are in eta-long form. - - -#NEW - -==The computation model: concrete syntax== - -A concrete syntax defines a homomorphism (compositional mapping) -from the abstract syntax to a system of concrete syntax objects. -``` - cat C _ - -------------------- - lincat C = C* : Type - - fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A - ----------------------------------------------- - lin f = f* : A_1* -> ... -> A_n* -> A* - - (f a_1 ... a_n)* = f* a_1* ... a_n* -``` -The homomorphism can as such be used as linearization function. - -It is a functional program, but a restricted one, since it works -in the end on finite data structures only. - -But a more efficient program is obtained via compilation to -GFC = Canonical GF: the "machine code" of GF. - -The parsing problem of GFC can be reduced to that of MPCFG (Multiple -Parallel Context Free Grammars), see P. Ljunglöf's thesis (2004). - - - -#NEW - -==The core type system of concrete syntax: basic types== - -``` - param P P : PType - PType : Type --------- --------- - P : PType P : Type - - s : Str t : Str - Str : type "foo" : Str [] : Str ---------------- - s ++ t : Str -``` - - -#NEW - -==The core type system of concrete syntax: functions and tables== - -``` - A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A - ---------------------------- ---------------------- ------------------------ - (x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A} - - - P : PType A : Type t : P => A p : p - -------------------- ----------------- - P => A : Type t ! p : A - - v_1,...,v_n : A - ---------------------------------------------- P = {C_1,...,C_n} - table {C_1 => v_1 ; ... ; C_n => v_n} : P => A -``` -Pattern matching is treated as an abbreviation for tables. Notice that -``` - case e of {...} == table {...} ! e -``` - - -#NEW - -==The core type system of concrete syntax: records== - -``` - A_1,...,A_n : Type - ------------------------------------ n >= 0 - {r_1 : A_1 ; ... ; r_n : A_n} : Type - - - a_1 : A_1 ... a_n : A_n - ------------------------------------------------------------ - {r_1 = a_1 ; ... ; r_n = a_n} : {r_1 : A_1 ; ... ; r_n : A_n} - - - r : {r_1 : A_1 ; ... ; r_n : A_n} - ----------------------------------- i = 1,...,n - r.r_1 : A_1 -``` -Subtyping: if ``r : R`` then ``r : R ** {r : A}`` - - - -#NEW - -==Computation rules== - -``` - (\x -> b) a = b{x := a} - - (table {C_1 => v_1 ; ... ; C_n => v_n} : P => A) ! C_i = v_i - - {r_1 = a_1 ; ... ; r_n = a_n}.r_i = a_i -``` - - - -#NEW - -==Canonical GF== - -Concrete syntax type system: -``` - A_1 : Type ... A_n : Type - Str : Type Int : Type ------------------------- $i : A - [A_1, ..., A_n] : Type - - - a_1 : A_1 ... a_n : A_n t : [A_1, ..., A_n] - --------------------------------- ------------------- i = 1,..,n - [a_1, ..., a_n] : [A_1, ..., A_n] t ! i : A_i -``` -Tuples represent both records and tables. - -There are no functions. - -Linearization: -``` - lin f = f* - - (f a_1 ... a_n)* = f*{$1 = a_1*, ..., $n = a_n*} -``` - - -#NEW - -==The compilation task, again== - -1. From a GF source grammar, derive a canonical GF grammar. - -2. From the canonical GF grammar derive an MPCFG grammar - -The canonical GF grammar can be used for linearization, with -linear time complexity (w.r.t. the size of the tree). - -The MPCFG grammar can be used for parsing, with (unbounded) -polynomial time complexity (w.r.t. the size of the string). - -For these target formats, we have also built interpreters in -different programming languages (C, C++, Haskell, Java, Prolog). - -Moreover, we generate supplementary formats such as grammars -required by various speech recognition systems. - - -#NEW - -==An overview of compilation phases== - -Legend: -- ellipse node: representation saved in a file -- plain text node: internal representation -- solid arrow or ellipse: essential phare or format -- dashed arrow or ellipse: optional phase or format -- arrow label: the module implementing the phase - - -[gf-compiler.png] - - -#NEW - -==Using the compiler== - -Batch mode (cf. GHC). - -Interactive mode, building the grammar incrementally from -different files, with the possibility of testing them -(cf. GHCI). - -The interactive mode was first, built on the model of ALF-2 -(L. Magnusson), and there was no file output of compiled -grammars. - - -#NEW - -==Modules and separate compilation== - -The above diagram shows what happens to each module. -(But not quite, since some of the back-end formats must be -built for sets of modules: GFCC and the parser formats.) - -When the grammar compiler is called, it has a main module as its -argument. It then builds recursively a dependency graph with all -the other modules, and decides which ones must be recompiled. -The behaviour is rather similar to GHC. - -Separate compilation is //extremely important// when developing -big grammars, especially when using grammar libraries. Example: compiling -the GF resource grammar library takes 5 minutes, whereas reading -in the compiled image takes 10 seconds. - - -#NEW - -==Module dependencies and recompilation== - -(For later use, not for the Proglog talk) - -For each module M, there are 3 kinds of files: -- M.gf, source file -- M.gfc, compiled file ("object file") -- M.gfr, type-checked and optimized source file (for resource modules only) - - -The compiler reads gf files and writes gfc files (and gfr files if appropriate) - -The Main module is the one used as argument when calling GF. - -A module M (immediately) depends on the module K, if either -- M is a concrete of K -- M is an instance of K -- M extends K -- M opens K -- M is a completion of K with something -- M is a completion of some module with K instantiated with something - - -A module M (transitively) depends on the module K, if either -- M immediately depends on K -- M depends on some L such that L immediately depends on K - - -Immediate dependence is readable from the module header without parsing -the whole module. - -The compiler reads recursively the headers of all modules that Main depends on. - -These modules are arranged in a dependency graph, which is checked to be acyclic. - -To decide whether a module M has to be compiled, do: -+ Get the time stamps t() of M.gf and M.gfc (if a file doesn't exist, its - time is minus infinity). -+ If t(M.gf) > t(M.gfc), M must be compiled. -+ If M depends on K and K must be compiled, then M must be compiled. -+ If M depends on K and t(K.gf) > t(M.gfc), then M must be compiled. - - -Decorate the dependency graph by information on whether the gf or the gfc (and gfr) -format is to be read. - -Topologically sort the decorated graph, and read each file in the chosen format. - -The gfr file is generated for these module types only: -- resource -- instance - - -When reading K.gfc, also K.gfr is read if some M depending on K has to be compiled. -In other cases, it is enough to read K.gfc. - -In an interactive GF session, some modules may be in memory already. -When read to the memory, each module M is given time stamp t(M.m). -The additional rule now is: -- If M.gfc is to be read, and t(M.m) > t(M.gfc), don't read M.gfc. - - - - -#NEW - -==Techniques used== - -The compiler is written in Haskell, with some C foreign function calls -in the interactive version (readline, killing threads). - -BNFC is used for generating both the parsers and printers. -This has helped to make the formats portable. - -"Almost compositional functions" (``composOp``) are used in -many compiler passes, making them easier to write and understand. -A ``grep`` on the sources reveals 40 uses (outside the definition -of ``composOp`` itself). - -The key algorithmic ideas are -- type-driven partial evaluation in GF-to-GFC generation -- common subexpression elimination as back-end optimization -- some ideas in GFC-to-MCFG encoding - - -#NEW - -==Type-driven partial evaluation== - -Each abstract syntax category in GF has a corresponding linearization type: -``` - cat C - lincat C = T -``` -The general form of a GF rule pair is -``` - fun f : C1 -> ... -> Cn -> C - lin f = t -``` -with the typing condition following the ``lincat`` definitions -``` - t : T1 -> ... -> Tn -> T -``` -The term ``t`` is in general built by using abstraction methods such -as pattern matching, higher-order functions, local definitions, -and library functions. - -The compilation technique proceeds as follows: -- use eta-expansion on ``t`` to determine the canonical form of the term -``` - \ $C1, ...., $Cn -> (t $C1 .... $Cn) -``` -with unique variables ``$C1 .... $Cn`` for the arguments; repeat this -inside the term for records and tables -- evaluate the resulting term using the computation rules of GF -- what remains is a canonical term with ``$C1 .... $Cn`` the only -variables (the run-time input of the linearization function) - - -#NEW - -==Eta-expanding records and tables== - -For records that are valied via subtyping, eta expansion -eliminates superfluous fields: -``` - {r1 = t1 ; r2 = t2} : {r1 : T1} ----> {r1 = t1} -``` -For tables, the effect is always expansion, since -pattern matching can be used to represent tables -compactly: -``` - table {n => "fish"} : Number => Str ---> - - table { - Sg => "fish" ; - Pl => "fish" - } -``` -This can be helped by back-end optimizations (see below). - - -#NEW - -==Eliminating functions== - -"Everything is finite": parameter types, records, tables; -finite number of string tokens per grammar. - -But "inifinite types" such as function types are useful when -writing grammars, to enable abstractions. - -Since function types do not appear in linearization types, -we want functions to be eliminated from linearization terms. - -This is similar to the **subformula property** in logic. -Also the main problem is similar: function depending on -a run-time variable, -``` - (table {P => f ; Q = g} ! x) a -``` -This is not a redex, but we can make it closer to one by moving -the application inside the table, -``` - table {P => f a ; Q = g a} ! x -``` -This transformation is the same as Prawitz's (1965) elimination -of maximal segments in natural deduction: -``` - A B - C -> D C C -> D C - A B --------- --------- - A v B C -> D C -> D A v B D D - --------------------- ===> ------------------------- - C -> D C D - -------------------- - D -``` - - - -#NEW - -==Size effects of partial evaluation== - -Irrelevant table branches are thrown away, which can reduce the size. - -But, since tables are expanded and auxiliary functions are inlined, -the size can grow exponentially. - -How can we keep the first property and eliminate the second? - - -#NEW - -==Parametrization of tables== - -Algorithm: for each branch in a table, consider replacing the -argument by a variable: -``` - table { table { - P => t ; ---> x => t[P->x] ; - Q => u x => u[Q->x] - } } -``` -If the resulting branches are all equal, you can replace the table -by a lambda abstract -``` - \\x => t[P->x] -``` -If each created variable ``x`` is unique in the grammar, computation -with the lambda abstract is efficient. - - - -#NEW - -==Course-of-values tables== - -By maintaining a canonical order of parameters in a type, we can -eliminate the left hand sides of branches. -``` - table { table T [ - P => t ; ---> t ; - Q => u u - } ] -``` -The treatment is similar to ``Enum`` instances in Haskell. - -In the end, all parameter types can be translated to -initial segments of integers. - - -#NEW - -==Common subexpression elimination== - -Algorithm: -+ Go through all terms and subterms in a module, creating - a symbol table mapping terms to the number of occurrences. -+ For each subterm appearing at least twice, create a fresh - constant defined as that subterm. -+ Go through all rules (incl. rules for the new constants), - replacing largest possible subterms with such new constants. - - -This algorithm, in a way, creates the strongest possible abstractions. - -In general, the new constants have open terms as definitions. -But since all variables (and constants) are unique, they can -be computed by simple replacement. - - - -#NEW - -==Size effects of optimizations== - -Example: the German resource grammar -``LangGer`` - -|| optimization | lines | characters | size % | blow-up | -| none | 5394 | 3208435 | 100 | 25 | -| all | 5394 | 750277 | 23 | 6 | -| none_subs | 5772 | 1290866 | 40 | 10 | -| all_subs | 5644 | 414119 | 13 | 3 | -| gfcc | 3279 | 190004 | 6 | 1.5 | -| gf source | 3976 | 121939 | 4 | 1 | - - -Optimization "all" means parametrization + course-of-values. - -The source code size is an estimate, since it includes -potentially irrelevant library modules, and comments. - -The GFCC format is not reusable in separate compilation. - - - -#NEW - -==The shared prefix optimization== - -This is currently performed in GFCC only. - -The idea works for languages that have a rich morphology -based on suffixes. Then we can replace a course of values -with a pair of a prefix and a suffix set: -``` - ["apa", "apan", "apor", "aporna"] ---> - ("ap" + ["a", "an", "or", "orna"]) -``` -The real gain comes via common subexpression elimination: -``` - _34 = ["a", "an", "or", "orna"] - apa = ("ap" + _34) - blomma = ("blomm" + _34) - flicka = ("flick" + _34) -``` -Notice that it now matters a lot how grammars are written. -For instance, if German verbs are treated as a one-dimensional -table, -``` - ["lieben", "liebe", "liebst", ...., "geliebt", "geliebter",...] -``` -no shared prefix optimization is possible. A better form is -separate tables for non-"ge" and "ge" forms: -``` - [["lieben", "liebe", "liebst", ....], ["geliebt", "geliebter",...]] -``` - - -#NEW - -==Reuse of grammars as libraries== - -The idea of resource grammars: take care of all aspects of -surface grammaticality (inflection, agreement, word order). - -Reuse in application grammar: via translations -``` - cat C ---> oper C : Type = T - lincat C = T - - fun f : A ---> oper f : A* = t - lin f = t -``` -The user only needs to know the type signatures (abstract syntax). - -However, this does not quite guarantee grammaticality, because -different categories can have the same lincat: -``` - lincat Conj = {s : Str} - lincat Adv = {s : Str} -``` -Thus someone may by accident use "and" as an adverb! - - -#NEW - -==Forcing the type checker to act as a grammar checker== - -We just have to make linearization types unique for each category. - -The technique is reminiscent of Haskell's ``newtype`` but uses -records instead: we add **lock fields** e.g. -``` - lincat Conj = {s : Str ; lock_Conj : {}} - lincat Adv = {s : Str ; lock_Adv : {}} -``` -Thanks to record subtyping, the translation is simple: -``` - fun f : C1 -> ... -> Cn -> C - lin f = t - - ---> - - oper f : C1* -> ... -> Cn* -> C* = - \x1,...,xn -> (t x1 ... xn) ** {lock_C = {}} -``` - -#NEW - -==Things to do== - -Better compression of gfc file format. - -Type checking of dependent-type pattern matching in abstract syntax. - -Compilation-related modules that need rewriting -- ``ReadFiles``: clarify the logic of dependencies -- ``Compile``: clarify the logic of what to do with each module -- ``Compute``: make the evaluation more efficient -- ``Parsing/*``, ``OldParsing/*``, ``Conversion/*``: reduce the number - of parser formats and algorithms diff --git a/doc/eu-langs.dot b/doc/eu-langs.dot deleted file mode 100644 index 115ce0040..000000000 --- a/doc/eu-langs.dot +++ /dev/null @@ -1,79 +0,0 @@ -graph{ - -size = "7,7" ; - -overlap = scale ; - -"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ; - -"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ; -"1" -- "Abs" [style = "solid"]; - -"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ; -"2" -- "Abs" [style = "solid"]; - -"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ; -"3" -- "Abs" [style = "solid"]; - -"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ; -"4" -- "Abs" [style = "solid"]; - -"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ; -"5" -- "Abs" [style = "solid"]; - -"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ; -"6" -- "Abs" [style = "solid"]; - -"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ; -"7" -- "Abs" [style = "solid"]; - -"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ; -"8" -- "Abs" [style = "solid"]; - -"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ; -"9" -- "Abs" [style = "solid"]; - -"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ; -"10" -- "Abs" [style = "solid"]; - -"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ; -"11" -- "Abs" [style = "solid"]; - -"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "12" [style = "solid"]; - -"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "13" [style = "solid"]; - -"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "14" [style = "solid"]; - -"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "15" [style = "solid"]; - -"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "16" [style = "solid"]; - -"17" [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "17" [style = "solid"]; - -"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "18" [style = "solid"]; - -"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "19" [style = "solid"]; - -"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "20" [style = "solid"]; - -"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "21" [style = "solid"]; - -"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "22" [style = "solid"]; - -"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "23" [style = "solid"]; - - -} diff --git a/doc/eu-langs.png b/doc/eu-langs.png deleted file mode 100644 index 8c46a19db..000000000 Binary files a/doc/eu-langs.png and /dev/null differ diff --git a/doc/food-js.png b/doc/food-js.png deleted file mode 100644 index fe579b1a9..000000000 Binary files a/doc/food-js.png and /dev/null differ diff --git a/doc/food-magnet.png b/doc/food-magnet.png deleted file mode 100644 index 8b137875d..000000000 Binary files a/doc/food-magnet.png and /dev/null differ diff --git a/doc/food-translet.png b/doc/food-translet.png deleted file mode 100644 index dd622a4bf..000000000 Binary files a/doc/food-translet.png and /dev/null differ diff --git a/doc/food1.png b/doc/food1.png deleted file mode 100644 index 767069dab..000000000 Binary files a/doc/food1.png and /dev/null differ diff --git a/doc/food2.png b/doc/food2.png deleted file mode 100644 index b36a01b22..000000000 Binary files a/doc/food2.png and /dev/null differ diff --git a/doc/foodmarket.png b/doc/foodmarket.png deleted file mode 100644 index 6b0e3fbd7..000000000 Binary files a/doc/foodmarket.png and /dev/null differ diff --git a/doc/gf-compiler.dot b/doc/gf-compiler.dot deleted file mode 100644 index f8ce1aaae..000000000 --- a/doc/gf-compiler.dot +++ /dev/null @@ -1,88 +0,0 @@ -digraph { - - gfe [label = "file.gfe", style = "dashed", shape = "ellipse"]; - gfe -> gf1 [label = " MkConcrete", style = "dashed"]; - -gf1 [label = "file.gf", style = "solid", shape = "ellipse"]; -gf1 -> gf2 [label = " LexGF", style = "solid"]; - -gf2 [label = "token list", style = "solid", shape = "plaintext"]; -gf2 -> gf3 [label = " ParGF", style = "solid"]; - -gf3 [label = "source tree", style = "solid", shape = "plaintext"]; -gf3 -> gf4 [label = " SourceToGrammar", style = "solid"]; - - cf [label = "file.cf", style = "dashed", shape = "ellipse"]; - cf -> gf4 [label = " CF.PPrCF", style = "dashed"]; - - ebnf [label = "file.ebnf", style = "dashed", shape = "ellipse"]; - ebnf -> gf4 [label = " CF.EBNF", style = "dashed"]; - - -gf4 [label = "GF tree", style = "solid", shape = "plaintext"]; -gf4 -> gf5 [label = " Extend", style = "solid"]; - -gf5 [label = "inheritance-linked GF tree", style = "solid", shape = "plaintext"]; -gf5 -> gf6 [label = " Rename", style = "solid"]; - -gf6 [label = "name-resolved GF tree", style = "solid", shape = "plaintext"]; -gf6 -> gf7 [label = " CheckGrammar", style = "solid"]; - -gf7 [label = "type-annotated GF tree", style = "solid", shape = "plaintext"]; -gf7 -> gf8 [label = " Optimize", style = "solid"]; - -gf8 [label = "optimized GF tree", style = "solid", shape = "plaintext"]; -gf8 -> gf9 [label = " GrammarToCanon", style = "solid"]; - -gf9 [label = "GFC tree", style = "solid", shape = "plaintext"]; -gf9 -> gfc [label = " BackOpt", style = "solid"]; - -gfc [label = "optimized GFC tree", style = "solid", shape = "box"]; -gfc -> gf11 [label = " PrintGFC", style = "solid"]; - -gf11 [label = "file.gfc", style = "solid", shape = "ellipse"]; - - - gfcc [label = "file.gfcc", style = "solid", shape = "ellipse"]; - gfc -> gfcc [label = " CanonToGFCC", style = "solid"]; - - mcfg [label = "file.gfcm", style = "dashed", shape = "ellipse"]; - gfc -> mcfg [label = " PrintGFC", style = "dashed"]; - - bnf [label = "file.cf", style = "dashed", shape = "ellipse"]; - gfc -> bnf [label = " CF.PrLBNF", style = "dashed"]; - - happy [label = "file.y (Happy)", style = "dashed", shape = "ellipse"]; - bnf -> happy [label = " bnfc", style = "dashed"]; - - bison [label = "file.y (Bison)", style = "dashed", shape = "ellipse"]; - bnf -> bison [label = " bnfc", style = "dashed"]; - - cup [label = "parser.java (CUP)", style = "dashed", shape = "ellipse"]; - bnf -> cup [label = " bnfc", style = "dashed"]; - - xml [label = "file.dtd (XML)", style = "dashed", shape = "ellipse"]; - bnf -> xml [label = " bnfc", style = "dashed"]; - - cfg [label = "CFG tree", style = "solid", shape = "plaintext"]; - gfc -> cfg [label = " Conversions.GFC", style = "dashed"]; - - cfgm [label = "file.cfgm", style = "dashed", shape = "ellipse"]; - cfg -> cfgm [label = " Conversions.GFC", style = "dashed"]; - - srg [label = "Non-LR CFG", style = "solid", shape = "plaintext"]; - cfg -> srg [label = " Speech.SRG", style = "dashed"]; - - gsl [label = "file.gsl", style = "dashed", shape = "ellipse"]; - srg -> gsl [label = " Speech.PrGSL", style = "dashed"]; - - jsgf [label = "file.jsgf", style = "dashed", shape = "ellipse"]; - srg -> jsgf [label = " Speech.PrJSGF", style = "dashed"]; - - fa [label = "DFA", style = "solid", shape = "plaintext"]; - cfg -> fa [label = " Speech.CFGToFiniteState", style = "dashed"]; - - slf [label = "file.slf", style = "dashed", shape = "ellipse"]; - fa -> slf [label = " Speech.PrSLF", style = "dashed"]; - -} diff --git a/doc/gf-compiler.png b/doc/gf-compiler.png deleted file mode 100644 index 6949c37b5..000000000 Binary files a/doc/gf-compiler.png and /dev/null differ diff --git a/doc/gf-formalism.html b/doc/gf-formalism.html deleted file mode 100644 index 52d9256aa..000000000 --- a/doc/gf-formalism.html +++ /dev/null @@ -1,350 +0,0 @@ - - - - -A Birds-Eye View of GF as a Grammar Formalism - -

A Birds-Eye View of GF as a Grammar Formalism

- -Author: Aarne Ranta
-Last update: Thu Feb 2 14:16:01 2006 -
- -

-
-

- - -

-
-

-

- -

-

-Abstract. This document gives a general description of the -Grammatical Framework (GF), with comparisons to other grammar -formalisms such as CG, ACG, HPSG, and LFG. -

-

- -

- -

GF in a few words

-

-Grammatical Framework (GF) is a grammar formalism -based on constructive type theory. -

-

-GF makes a distinction between abstract syntax and concrete syntax. -

-

-The abstract syntax part of GF is a logical framework, with -dependent types and higher-order functions. -

-

-The concrete syntax is a system of records containing strings and features. -

-

-A GF grammar defines a reversible homomorphism from an abstract syntax to a -concrete syntax. -

-

-A multilingual GF grammar is a set of concrete syntaxes associated with -one abstract syntax. -

-

-GF grammars are written in a high-level functional programming language, -which is compiled into a core language (GFC). -

-

-GF grammars can be used as resources, i.e. as libraries for writing -new grammars; these are compiled and optimized by the method of -grammar composition. -

-

-GF has a module system that supports grammar engineering and separate -compilation. -

-

- -

- -

History of GF

-

-1988. Intuitionistic Categorial Grammar; type theory as abstract syntax, -playing the role of Montague's analysis trees. Grammars implemented in Prolog. -

-

-1994. Type-Theoretical Grammar. Abstract syntax organized as a system of -combinators. Grammars implemented in ALF. -

-

-1996. Multilingual Type-Theoretical Grammar. Rules for generating six -languages from the same abstract syntax. Grammars implemented in ALF, ML, and -Haskell. -

-

-1998. The first implementation of GF as a language of its own. -

-

-2000. New version of GF: high-level functional source language, records used -for concrete syntax. -

-

-2003. The module system. -

-

-2004. Ljunglöf's thesis Expressivity and Complexity of GF. -

-

- -

- -

Some key ingredients of GF in other grammar formalisms

-
    -
  • [GF ]: Grammatical Framework -
  • [CG ]: categorial grammar -
  • [ACG ]: abstract categorial grammar -
  • [HPSG ]: head-driven phrase structure grammar -
  • [LFG ]: lexical functional grammar -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/GFACGLFGHPSGCG
abstract vs concrete syntaxXX?--
type theoryXX--X
records and featuresX-XX-
- -

-

- -

- -

Examples of descriptions in each formalism

-

-To be written... -

-

- -

- -

Lambda terms and records

-

-In CS, abstract syntax is trees and concrete syntax is strings. -This works more or less for programming languages. -

-

-In CG, all syntax is lambda terms. -

-

-In Montague grammar, abstract syntax is lambda terms and -concrete syntax is trees. Abstract syntax as lambda terms -can be considered well-established. -

-

-In PATR and HPSG, concrete syntax it records. This can be considered -well-established for natural languages. -

-

-In ACG, both are lambda terms. This is more general than GF, -but reversibility requires linearity restriction, which can be -unnatural for grammar writing. -

-

-In GF, linearization from lambda terms to records is reversible, -and grammar writing is not restricted to linear terms. -

-

-Grammar composition in ACG is just function composition. In GF, -it is more restricted... -

-

- -

- -

The structure of GF formalisms

-

-The following diagram (to be drawn properly!) describes the -levels. -

-
-         |   programming language design
-         V
-    GF source language
-         |
-         |   type-directed partial evaluation
-         V
-    GFC assembly language
-         |
-         |   Ljunglöf's translation
-         V
-    MCFG parser
-
-

-The last two phases are nontrivial mathematica properties. -

-

-In most grammar formalisms, grammarians have to work on the GFC -(or MCFG) level. -

-

-Maybe they use macros - they are therefore like macro assemblers. But there -are no separately compiled library modules, no type checking, etc. -

-

- -

- -

The expressivity of GF

-

-Parsing complexity is the same as MCFG: polynomial, with -unrestricted exponent depending on grammar. -This is between TAG and HPSG. -

-

-If semantic well-formedness (type theory) is taken into account, -then arbitrary logic can be expressed. The well-formedness of -abstract syntax is decidable, but the well-formedness of a -concrete-syntax string can require an arbitrary proof construction -and is therefore undecidable. -

-

-Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF -has the goal of assigning intended trees for strings. This is -generalized to shared trees for different languages. -

-

-The high-level language strives after the properties of -writability and readability (programming language notions). -

-

- -

- -

Grammars and parsing

-

-In many projects, a grammar is just seen as a declarative parsing program. -

-

-For GF, a grammar is primarily the definition of a language. -

-

-Detaching grammars from parsers is a good idea, giving -

-
    -
  • more efficient and robust parsing (statistical etc) -
  • cleaner grammars -
- -

-Separating abstract from concrete syntax is a prerequisite for this: -we want parsers to return abstract syntax objects, and these must exist -independently of parse trees. -

-

-A possible radical approach to parsing: -use a grammar to generate a treebank and machine-learn -a statistical parser from this. -

-

-Comparison: Steedman in CCG has done something like this. -

-

- -

- -

Grammars as software libraries

-

-Reuse for different purposes. -

-

-Grammar composition. -

-

- -

- -

Multilinguality

-

-In application grammars, the AS is a semantic -model, and a CS covers domain terminology and idioms. -

-

-This can give publication-quality translation on -limited domains (e.g. the WebALT project). -

-

-Resource grammars with grammar composition lead to -compile-time transfer. -

-

-When is run-time transfer necessary? -

-

-Cf. CLE (Core Language Engine). -

-

- -

- -

Parametrized modules

-

-This notion comes from the ML language in the 1980's. -

-

-It can be used for sharing even more code between languages -than their AS. -

-

-Especially, for related languages (Scandinavian, Romance). -

-

-Cf. grammar porting in CLE: what they do with untyped -macro packages GF does with typable interfaces. -

- - - - diff --git a/doc/gf-formalism.txt b/doc/gf-formalism.txt deleted file mode 100644 index 3b6963d11..000000000 --- a/doc/gf-formalism.txt +++ /dev/null @@ -1,279 +0,0 @@ -A Birds-Eye View of GF as a Grammar Formalism -Author: Aarne Ranta -Last update: %%date(%c) - -% NOTE: this is a txt2tags file. -% Create an html file from this file using: -% txt2tags -thtml --toc gf-formalism.txt - -%!target:html - -%!postproc(html): #NEW - -[Logos/gf0.png] - -//Abstract. This document gives a general description of the// -//Grammatical Framework (GF), with comparisons to other grammar// -//formalisms such as CG, ACG, HPSG, and LFG.// - - -#NEW - -==Logical Frameworks and Grammar Formalisms== - -Logic - formalization of mathematics (mathematical language?) - -Linguistics - formalization of natural language - -Since math lang is a subset, we can expect similarities. - -But in natural language we have -- masses of empirical data -- no right of reform - - - -#NEW - -==High-level programming== - -We have to write a lot of program code when formalizing language. - -We need a language with proper abstractions. - -Cf. Paul Graham on Prolog: very high-level, but wrong abstractions. - -Typed functional languages work well in maths. - -We have developed one for linguistics -- some extra constructs, e.g. inflection tables -- constraint of reversibility (nontrivial math problem) - - -Writing a grammar of e.g. French clitics should not be a topic -on which one can write a paper - it should be easy to render in code -the known facts about languages! - - - -#NEW - -==GF in a few words== - -Grammatical Framework (GF) is a grammar formalism -based on **constructive type theory**. - -GF makes a distinction between **abstract syntax** and **concrete syntax**. - -The abstract syntax part of GF is a **logical framework**, with -dependent types and higher-order functions. - -The concrete syntax is a system of **records** containing strings and features. - -A GF grammar defines a **reversible homomorphism** from an abstract syntax to a -concrete syntax. - -A **multilingual GF grammar** is a set of concrete syntaxes associated with -one abstract syntax. - -GF grammars are written in a high-level **functional programming language**, -which is compiled into a **core language** (GFC). - -GF grammars can be used as **resources**, i.e. as libraries for writing -new grammars; these are compiled and optimized by the method of -**grammar composition**. - -GF has a **module system** that supports grammar engineering and separate -compilation. - - -#NEW - -==History of GF== - -1988. Intuitionistic Categorial Grammar; type theory as abstract syntax, -playing the role of Montague's analysis trees. Grammars implemented in Prolog. - -1994. Type-Theoretical Grammar. Abstract syntax organized as a system of -combinators. Grammars implemented in ALF. - -1996. Multilingual Type-Theoretical Grammar. Rules for generating six -languages from the same abstract syntax. Grammars implemented in ALF, ML, and -Haskell. - -1998. The first implementation of GF as a language of its own. - -2000. New version of GF: high-level functional source language, records used -for concrete syntax. - -2003. The module system. - -2004. Ljunglöf's thesis //Expressivity and Complexity of GF//. - - - -#NEW - -==Some key ingredients of GF in other grammar formalisms== - -- [GF ]: Grammatical Framework -- [CG ]: categorial grammar -- [ACG ]: abstract categorial grammar -- [HPSG ]: head-driven phrase structure grammar -- [LFG ]: lexical functional grammar - - -| / | GF | ACG | LFG | HPSG | CG | -| abstract vs concrete syntax | X | X | ? | - | - | -| type theory | X | X | - | - | X | -| records and features | X | - | X | X | - | - - -#NEW - -==Examples of descriptions in each formalism== - -To be written... - - -#NEW - -==Lambda terms and records== - -In CS, abstract syntax is trees and concrete syntax is strings. -This works more or less for programming languages. - -In CG, all syntax is lambda terms. - -In Montague grammar, abstract syntax is lambda terms and -concrete syntax is trees. Abstract syntax as lambda terms -can be considered well-established. - -In PATR and HPSG, concrete syntax it records. This can be considered -well-established for natural languages. - -In ACG, both are lambda terms. This is more general than GF, -but reversibility requires linearity restriction, which can be -unnatural for grammar writing. - -In GF, linearization from lambda terms to records is reversible, -and grammar writing is not restricted to linear terms. - -Grammar composition in ACG is just function composition. In GF, -it is more restricted... - - -#NEW - -==The structure of GF formalisms== - -The following diagram (to be drawn properly!) describes the -levels. -``` - | programming language design - V - GF source language - | - | type-directed partial evaluation - V - GFC assembly language - | - | Ljunglöf's translation - V - MCFG parser -``` -The last two phases are nontrivial mathematica properties. - -In most grammar formalisms, grammarians have to work on the GFC -(or MCFG) level. - -Maybe they use macros - they are therefore like macro assemblers. But there -are no separately compiled library modules, no type checking, etc. - - -#NEW - -==The expressivity of GF== - -Parsing complexity is the same as MCFG: polynomial, with -unrestricted exponent depending on grammar. -This is between TAG and HPSG. - -If semantic well-formedness (type theory) is taken into account, -then arbitrary logic can be expressed. The well-formedness of -abstract syntax is decidable, but the well-formedness of a -concrete-syntax string can require an arbitrary proof construction -and is therefore undecidable. - -Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF -has the goal of assigning intended trees for strings. This is -generalized to shared trees for different languages. - -The high-level language strives after the properties of -writability and readability (programming language notions). - - -#NEW - -==Grammars and parsing== - -In many projects, a grammar is just seen as a **declarative parsing program**. - -For GF, a grammar is primarily the **definition of a language**. - -Detaching grammars from parsers is a good idea, giving -- more efficient and robust parsing (statistical etc) -- cleaner grammars - - -Separating abstract from concrete syntax is a prerequisite for this: -we want parsers to return abstract syntax objects, and these must exist -independently of parse trees. - -A possible radical approach to parsing: -use a grammar to generate a treebank and machine-learn -a statistical parser from this. - -Comparison: Steedman in CCG has done something like this. - - -#NEW - -==Grammars as software libraries== - -Reuse for different purposes. - -Grammar composition. - - -#NEW - -==Multilinguality== - -In **application grammars**, the AS is a semantic -model, and a CS covers domain terminology and idioms. - -This can give publication-quality translation on -limited domains (e.g. the WebALT project). - -Resource grammars with grammar composition lead to -**compile-time transfer**. - -When is **run-time transfer** necessary? - -Cf. CLE (Core Language Engine). - - -#NEW - -==Parametrized modules== - -This notion comes from the ML language in the 1980's. - -It can be used for sharing even more code between languages -than their AS. - -Especially, for related languages (Scandinavian, Romance). - -Cf. grammar porting in CLE: what they do with untyped -macro packages GF does with typable interfaces. diff --git a/doc/gf-ideas.html b/doc/gf-ideas.html deleted file mode 100644 index 8119740fa..000000000 --- a/doc/gf-ideas.html +++ /dev/null @@ -1,311 +0,0 @@ - - - - - -GF Project Ideas - - -

-

- -
-

- -

-

GF Project Ideas

- -Resource Grammars, Web Applications, etc
-contact: Aarne Ranta (aarne at chalmers dot se) -
- -

-
-

- - -

-
-

- -

Resource Grammar Implementations

-

-GF Resource Grammar Library is an open-source computational grammar resource -that currently covers 12 languages. -The Library is a collaborative effort to which programmers from many countries -have contributed. The next goal is to extend the library -to all of the 23 official EU languages. Also other languages -are welcome all the time. The following diagram show the current status of the -library. Each of the red and yellow ones are a potential project. -

-

-

- -
-

-

-red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu -

-

-The linguistic coverage of the library includes the inflectional morphology -and basic syntax of each language. It can be used in GF applications -and also ported to other formats. It can also be used for building other -linguistic resources, such as morphological lexica and parsers. -The library is licensed under LGPL. -

- -

Tasks

-

-Writing a grammar for a language is usually easier if other languages -from the same family already have grammars. The colours have the same -meaning as in the diagram above; in addition, we use boldface for the -red, still unimplemented languages and italics for the -orange languages in progress. Thus, in particular, each of the languages -coloured red below are possible programming projects. -

-

-Baltic: -

-
    -
  • Latvian -
  • Lithuanian -
- -

-Celtic: -

-
    -
  • Irish -
- -

-Fenno-Ugric: -

-
    -
  • Estonian -
  • Finnish -
  • Hungarian -
- -

-Germanic: -

-
    -
  • Danish -
  • Dutch -
  • English -
  • German -
  • Norwegian -
  • Swedish -
- -

-Hellenic: -

-
    -
  • Greek -
- -

-Indo-Iranian: -

-
    -
  • Hindi -
  • Urdu -
- -

-Romance: -

-
    -
  • Catalan -
  • French -
  • Italian -
  • Portuguese -
  • Romanian -
  • Spanish -
- -

-Semitic: -

-
    -
  • Arabic -
  • Maltese -
- -

-Slavonic: -

-
    -
  • Bulgarian -
  • Czech -
  • Polish -
  • Russian -
  • Slovak -
  • Slovenian -
- -

-Tai: -

-
    -
  • Thai -
- -

-Turkic: -

-
    -
  • Turkish -
- - -

Who is qualified

-

-Writing a resource grammar implementation requires good general programming -skills, and a good explicit knowledge of the grammar of the target language. -A typical participant could be -

-
    -
  • native or fluent speaker of the target language -
  • interested in languages on the theoretical level, and preferably familiar - with many languages (to be able to think about them on an abstract level) -
  • familiar with functional programming languages such as ML or Haskell - (GF itself is a language similar to these) -
  • on Master's or PhD level in linguistics, computer science, or mathematics -
- -

-But it is the quality of the assignment that is assessed, not any formal -requirements. The "typical participant" was described to give an idea of -who is likely to succeed in this. -

- -

The Summer School

-

-A Summer School on resource grammars and applications will -be organized at the campus of Chalmers University of Technology in Gothenburg, -Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in -a resource grammar project; the participants are assumed to learn GF before -the Summer School, but how far they have come in their projects may vary. -

-

-More information on the Summer School web page: -

-

-http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html -

- -

Other project ideas

- -

GF interpreter in Java

-

-The idea is to write a run-time system for GF grammars in Java. This enables -the use of embedded grammars in Java applications. This project is -a fresh-up of earlier work, -now using the new run-time format PGF and addressing a new parsing algorithm. -

-

-Requirements: Java, Haskell, basics of compilers and parsing algorithms. -

- -

GF interpreter in C#

-

-The idea is to write a run-time system for GF grammars in C#. This enables -the use of embedded grammars in C# applications. This project is -similar to earlier work -on Java, now addressing C# and using the new run-time format PGF. -

-

-Requirements: C#, Haskell, basics of compilers and parsing algorithms. -

- -

GF localization library

-

-This is an idea for a software localization library using GF grammars. -The library should replace strings by grammar rules, which can be conceived -as very smart templates always guaranteeing grammatically correct output. -The library should be based on the -GF Resource Grammar Library, providing infrastructure -currently for 12 languages. -

-

-Requirements: GF, some natural languages, some localization platform -

- -

Multilingual grammar applications for mobile phones

-

-GF grammars can be compiled into programs that can be run on different -platforms, such as web browsers and mobile phones. An example is a -numeral translator running on both these platforms. -

-

-The proposed project is rather open: find some cool applications of -the technology that are useful or entertaining for mobile phone users. A -part of the project is to investigate implementation issues such as making -the best use of the phone's resources. Possible applications have -something to do with translation; one suggestion is an sms editor/translator. -

-

-Requirements: GF, JavaScript, some phone application development tools -

- -

Multilingual grammar applications for the web

-

-This project is rather open: find some cool applications of -the technology that are useful or entertaining on the web. Examples include -

-
    -
  • translators: see demo -
  • multilingual wikis: see demo -
  • fridge magnets: see demo -
- -

-Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI -

- -

GMail gadget for GF

-

-It is possible to add custom gadgets to GMail. If you are going to write -e-mail in a foreign language then you probably will need help from -dictonary or you may want to check something in the grammar. GF provides -all resources that you may need but you have to think about how to -design gadget that fits well in the GMail environment and what -functionality from GF you want to expose. -

-

-Requirements: GF, Google Web Toolkit -

- -

Dissemination and intellectual property

-

-All code suggested here will be released under the LGPL just like -the current resource grammars and run-time GF libraries, -with the copyright held by respective authors. -

-

-As a rule, the code will be distributed via the GF web site. -

- - - - diff --git a/doc/gf-ideas.txt b/doc/gf-ideas.txt deleted file mode 100644 index 3f62196b9..000000000 --- a/doc/gf-ideas.txt +++ /dev/null @@ -1,231 +0,0 @@ -GF Project Ideas -Resource Grammars, Web Applications, etc -contact: Aarne Ranta (aarne at chalmers dot se) - -%!Encoding : iso-8859-1 - -%!target:html -%!postproc(html): #BECE
-%!postproc(html): #ENCE
-%!postproc(html): #GRAY -%!postproc(html): #EGRAY -%!postproc(html): #RED -%!postproc(html): #YELLOW -%!postproc(html): #ERED -%!postproc(html): #EYELLOW - -#BECE -[Logos/gf0.png] -#ENCE - - -==Resource Grammar Implementations== - -GF Resource Grammar Library is an open-source computational grammar resource -that currently covers 12 languages. -The Library is a collaborative effort to which programmers from many countries -have contributed. The next goal is to extend the library -to all of the 23 official EU languages. Also other languages -are welcome all the time. The following diagram show the current status of the -library. Each of the red and yellow ones are a potential project. - -#BECE -[school-langs.png] -#ENCE - - -//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu// - -The linguistic coverage of the library includes the inflectional morphology -and basic syntax of each language. It can be used in GF applications -and also ported to other formats. It can also be used for building other -linguistic resources, such as morphological lexica and parsers. -The library is licensed under LGPL. - - -===Tasks=== - -Writing a grammar for a language is usually easier if other languages -from the same family already have grammars. The colours have the same -meaning as in the diagram above; in addition, we use boldface for the -red, still unimplemented languages and italics for the -orange languages in progress. Thus, in particular, each of the languages -coloured red below are possible programming projects. - -Baltic: -- #RED Latvian #ERED -- #RED Lithuanian #ERED - - -Celtic: -- #RED Irish #ERED - - -Fenno-Ugric: -- #RED Estonian #ERED -- #GRAY Finnish #EGRAY -- #RED Hungarian #ERED - - -Germanic: -- #GRAY Danish #EGRAY -- #RED Dutch #ERED -- #GRAY English #EGRAY -- #GRAY German #EGRAY -- #GRAY Norwegian #EGRAY -- #GRAY Swedish #EGRAY - - -Hellenic: -- #RED Greek #ERED - - -Indo-Iranian: -- #YELLOW Hindi #EYELLOW -- #YELLOW Urdu #EYELLOW - - -Romance: -- #GRAY Catalan #EGRAY -- #GRAY French #EGRAY -- #GRAY Italian #EGRAY -- #RED Portuguese #ERED -- #YELLOW Romanian #EYELLOW -- #GRAY Spanish #EGRAY - - -Semitic: -- #YELLOW Arabic #EYELLOW -- #RED Maltese #ERED - - -Slavonic: -- #GRAY Bulgarian #EGRAY -- #RED Czech #ERED -- #YELLOW Polish #EYELLOW -- #GRAY Russian #EGRAY -- #RED Slovak #ERED -- #RED Slovenian #ERED - - -Tai: -- #YELLOW Thai #EYELLOW - - -Turkic: -- #YELLOW Turkish #EYELLOW - - -===Who is qualified=== - -Writing a resource grammar implementation requires good general programming -skills, and a good explicit knowledge of the grammar of the target language. -A typical participant could be -- native or fluent speaker of the target language -- interested in languages on the theoretical level, and preferably familiar - with many languages (to be able to think about them on an abstract level) -- familiar with functional programming languages such as ML or Haskell - (GF itself is a language similar to these) -- on Master's or PhD level in linguistics, computer science, or mathematics - - -But it is the quality of the assignment that is assessed, not any formal -requirements. The "typical participant" was described to give an idea of -who is likely to succeed in this. - - -===The Summer School=== - -A Summer School on resource grammars and applications will -be organized at the campus of Chalmers University of Technology in Gothenburg, -Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in -a resource grammar project; the participants are assumed to learn GF before -the Summer School, but how far they have come in their projects may vary. - -More information on the Summer School web page: - -[``http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html`` http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html] - - -==Other project ideas== - -===GF interpreter in Java=== - -The idea is to write a run-time system for GF grammars in Java. This enables -the use of **embedded grammars** in Java applications. This project is -a fresh-up of [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html], -now using the new run-time format PGF and addressing a new parsing algorithm. - -Requirements: Java, Haskell, basics of compilers and parsing algorithms. - - -===GF interpreter in C#=== - -The idea is to write a run-time system for GF grammars in C#. This enables -the use of **embedded grammars** in C# applications. This project is -similar to [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html] -on Java, now addressing C# and using the new run-time format PGF. - -Requirements: C#, Haskell, basics of compilers and parsing algorithms. - - -===GF localization library=== - -This is an idea for a software localization library using GF grammars. -The library should replace strings by grammar rules, which can be conceived -as very smart templates always guaranteeing grammatically correct output. -The library should be based on the -[GF Resource Grammar Library http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html], providing infrastructure -currently for 12 languages. - -Requirements: GF, some natural languages, some localization platform - - -===Multilingual grammar applications for mobile phones=== - -GF grammars can be compiled into programs that can be run on different -platforms, such as web browsers and mobile phones. An example is a -[numeral translator http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html] running on both these platforms. - -The proposed project is rather open: find some cool applications of -the technology that are useful or entertaining for mobile phone users. A -part of the project is to investigate implementation issues such as making -the best use of the phone's resources. Possible applications have -something to do with translation; one suggestion is an sms editor/translator. - -Requirements: GF, JavaScript, some phone application development tools - - -===Multilingual grammar applications for the web=== - -This project is rather open: find some cool applications of -the technology that are useful or entertaining on the web. Examples include -- translators: see [demo http://129.16.250.57:41296/translate] -- multilingual wikis: see [demo http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi] -- fridge magnets: see [demo http://129.16.250.57:41296/fridge] - - -Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI - - -===GMail gadget for GF=== - -It is possible to add custom gadgets to GMail. If you are going to write -e-mail in a foreign language then you probably will need help from -dictonary or you may want to check something in the grammar. GF provides -all resources that you may need but you have to think about how to -design gadget that fits well in the GMail environment and what -functionality from GF you want to expose. - -Requirements: GF, Google Web Toolkit - - - -==Dissemination and intellectual property== - -All code suggested here will be released under the LGPL just like -the current resource grammars and run-time GF libraries, -with the copyright held by respective authors. - -As a rule, the code will be distributed via the GF web site. - diff --git a/doc/gf-people.html b/doc/gf-people.html index 690084d3c..bc09412d0 100644 --- a/doc/gf-people.html +++ b/doc/gf-people.html @@ -13,12 +13,13 @@ -Most of the code is by -Krasimir Angelov, -Bjrn Bringert, +The current developers and maintainers are +Krasimir Angelov, +Thomas Hallgren, and -Aarne Ranta. Bug reports should be -posted via the GF bug tracker. +Aarne Ranta. Bug reports should be +posted via the +GF bug tracker.

@@ -27,19 +28,23 @@ Also the following people have contributed code to some of the versions:

-Hkan Burden (Chalmers) +Grgoire Dtrez (University of Gothenburg) +
+Ramona Enache (University of Gothenburg) +
+Bjrn Bringert (University of Gothenburg) +
+Hkan Burden (University of Gothenburg)
Hans-Joachim Daniels (Karlsruhe)
Markus Forsberg (Chalmers)
-Thomas Hallgren (Chalmers) -
-Kristofer Johannisson (Chalmers) +Kristofer Johannisson (University of Gothenburg)
-Janna Khegai (Chalmers) +Janna Khegai (Chalmers)
-Peter Ljunglf (Chalmers) +Peter Ljunglf (University of Gothenburg)
Petri Menp (Nokia) diff --git a/doc/gf-quickstart.html b/doc/gf-quickstart.html index 7a6971953..cd508d474 100644 --- a/doc/gf-quickstart.html +++ b/doc/gf-quickstart.html @@ -9,7 +9,7 @@

Aarne Ranta

-3 September, 2007 +22 December 2010 (3 September, 2007)

@@ -20,7 +20,7 @@ Aarne Ranta This Quick Start shows two examples of how GF can be used. We assume that you have downloaded and installed GF, so that the command gf works for you. See download and install -instructions here. +instructions here. @@ -61,39 +61,11 @@ and start GF again with the same command. Now you can even translate this bread is very Italian. To lear more on GF commands and -grammar development, go to the -New Grammarian's Tutorial. +grammar development, go to the one of the tutorials: +

- -

Multilingual authoring

- -This demo also requires the GUI package, which makes the command -jgf work for you. -
    -
  1. Download the file Letter.gfcm. -
  2. Start the GF editor by the command -
    -  gfeditor Letter.gfcm
    -
    -
  3. When the editor window is open, select "Letter" from the "New" menu. -
  4. Push the button "Random" in the lower end of the window. -
  5. Move the pointer to some place in the text, e.g. to the first word (in any - of the languages), and click. The first word should now be highlighted and - a number of alternatives appear in the lower window part (a similar situation - is shown in the picture below). -
  6. Double-click at some of the alternatives marked "ch ..." and observe how - the text changes in each of the languages. -
-See the Editor User Manual -for more information on how to use the -editor. To change the grammars, you should not edit Letter.gfcm, -which is low-level code generated by the GF grammar compiler. Instead, you -can edit the files in examples/letter in the GF grammar package, -and compile by using the script mkLetter.gfs in the same package. - -

- - - diff --git a/doc/gf-refman.html b/doc/gf-refman.html index 104f644c7..188a063a8 100644 --- a/doc/gf-refman.html +++ b/doc/gf-refman.html @@ -106,7 +106,7 @@ This document is not an introduction to GF; such introduction can be found in the GF tutorial available on line on the GF web page,

-digitalgrammars.com/gf +grammaticalframework.org

This manual covers only the language, not the GF compiler or diff --git a/doc/gf-statistics.txt b/doc/gf-statistics.txt deleted file mode 100644 index 499ad7d09..000000000 --- a/doc/gf-statistics.txt +++ /dev/null @@ -1,289 +0,0 @@ -(Adapted from KeY statistics by Vladimir Klebanov) - -This is GF right now: - -Total Physical Source Lines of Code (SLOC) = 42,467 - -Development Effort Estimate, Person-Years (Person-Months) = 10.24 (122.932) - (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05)) - -Schedule Estimate, Years (Months) = 1.30 (15.56) - (Basic COCOMO model, Months = 2.5 * (person-months**0.38)) - -Estimated Average Number of Developers (Effort/Schedule) = 7.90 - -Total Estimated Cost to Develop = $ 1,383,870 - (average salary = $56,286/year, overhead = 2.40). - -SLOCCount, Copyright (C) 2001-2004 David A. Wheeler - - - ------------ basis of counting: Haskell code + BNFC code - generated Happy parsers - --- GF/src% wc -l *.hs GF/*.hs GF/*/*.hs GF/*/*/*.hs GF/*/*.cf JavaGUI/*.java --- date Fri Jun 3 10:00:31 CEST 2005 - - 104 GF.hs - 402 GF/API.hs - 98 GF/GFModes.hs - 379 GF/Shell.hs - 4 GF/Today.hs - 43 GF/API/BatchTranslate.hs - 145 GF/API/GrammarToHaskell.hs - 77 GF/API/IOGrammar.hs - 25 GF/API/MyParser.hs - 177 GF/Canon/AbsGFC.hs - 37 GF/Canon/ByLine.hs - 192 GF/Canon/CanonToGrammar.hs - 293 GF/Canon/CMacros.hs - 79 GF/Canon/GetGFC.hs - 86 GF/Canon/GFC.hs - 291 GF/Canon/LexGFC.hs - 201 GF/Canon/Look.hs - 235 GF/Canon/MkGFC.hs - 46 GF/Canon/PrExp.hs - 352 GF/Canon/PrintGFC.hs - 147 GF/Canon/Share.hs - 207 GF/Canon/SkelGFC.hs - 46 GF/Canon/TestGFC.hs - 49 GF/Canon/Unlex.hs - 202 GF/CF/CanonToCF.hs - 213 GF/CF/CF.hs - 217 GF/CF/CFIdent.hs - 62 GF/CF/CFtoGrammar.hs - 47 GF/CF/CFtoSRG.hs - 206 GF/CF/ChartParser.hs - 191 GF/CF/EBNF.hs - 45 GF/CFGM/AbsCFG.hs - 312 GF/CFGM/LexCFG.hs - 157 GF/CFGM/PrintCFG.hs - 109 GF/CFGM/PrintCFGrammar.hs - 85 GF/CF/PPrCF.hs - 150 GF/CF/PrLBNF.hs - 106 GF/CF/Profile.hs - 141 GF/Compile/BackOpt.hs - 763 GF/Compile/CheckGrammar.hs - 337 GF/Compile/Compile.hs - 136 GF/Compile/Extend.hs - 124 GF/Compile/GetGrammar.hs - 282 GF/Compile/GrammarToCanon.hs - 93 GF/Compile/MkConcrete.hs - 128 GF/Compile/MkResource.hs - 83 GF/Compile/MkUnion.hs - 146 GF/Compile/ModDeps.hs - 294 GF/Compile/NewRename.hs - 227 GF/Compile/Optimize.hs - 76 GF/Compile/PGrammar.hs - 84 GF/Compile/PrOld.hs - 119 GF/Compile/Rebuild.hs - 63 GF/Compile/RemoveLiT.hs - 274 GF/Compile/Rename.hs - 535 GF/Compile/ShellState.hs - 135 GF/Compile/Update.hs - 129 GF/Conversion/GFC.hs - 149 GF/Conversion/GFCtoSimple.hs - 53 GF/Conversion/MCFGtoCFG.hs - 46 GF/Conversion/RemoveEpsilon.hs - 102 GF/Conversion/RemoveErasing.hs - 82 GF/Conversion/RemoveSingletons.hs - 137 GF/Conversion/SimpleToFinite.hs - 26 GF/Conversion/SimpleToMCFG.hs - 230 GF/Conversion/Types.hs - 143 GF/Data/Assoc.hs - 118 GF/Data/BacktrackM.hs - 20 GF/Data/ErrM.hs - 119 GF/Data/GeneralDeduction.hs - 30 GF/Data/Glue.hs - 67 GF/Data/IncrementalDeduction.hs - 61 GF/Data/Map.hs - 662 GF/Data/Operations.hs - 127 GF/Data/OrdMap2.hs - 120 GF/Data/OrdSet.hs - 193 GF/Data/Parsers.hs - 64 GF/Data/RedBlack.hs - 150 GF/Data/RedBlackSet.hs - 19 GF/Data/SharedString.hs - 127 GF/Data/SortedList.hs - 134 GF/Data/Str.hs - 120 GF/Data/Trie2.hs - 129 GF/Data/Trie.hs - 71 GF/Data/Utilities.hs - 243 GF/Data/Zipper.hs - 78 GF/Embed/EmbedAPI.hs - 113 GF/Embed/EmbedCustom.hs - 137 GF/Embed/EmbedParsing.hs - 50 GF/Formalism/CFG.hs - 51 GF/Formalism/GCFG.hs - 58 GF/Formalism/MCFG.hs - 246 GF/Formalism/SimpleGFC.hs - 349 GF/Formalism/Utilities.hs - 30 GF/Fudgets/ArchEdit.hs - 134 GF/Fudgets/CommandF.hs - 51 GF/Fudgets/EventF.hs - 59 GF/Fudgets/FudgetOps.hs - 37 GF/Fudgets/UnicodeF.hs - 86 GF/Grammar/AbsCompute.hs - 38 GF/Grammar/Abstract.hs - 149 GF/Grammar/AppPredefined.hs - 312 GF/Grammar/Compute.hs - 215 GF/Grammar/Grammar.hs - 46 GF/Grammar/Lockfield.hs - 189 GF/Grammar/LookAbs.hs - 182 GF/Grammar/Lookup.hs - 745 GF/Grammar/Macros.hs - 340 GF/Grammar/MMacros.hs - 115 GF/Grammar/PatternMatch.hs - 279 GF/Grammar/PrGrammar.hs - 121 GF/Grammar/Refresh.hs - 44 GF/Grammar/ReservedWords.hs - 251 GF/Grammar/TC.hs - 301 GF/Grammar/TypeCheck.hs - 96 GF/Grammar/Unify.hs - 101 GF/Grammar/Values.hs - 89 GF/Infra/CheckM.hs - 43 GF/Infra/Comments.hs - 152 GF/Infra/Ident.hs - 390 GF/Infra/Modules.hs - 358 GF/Infra/Option.hs - 179 GF/Infra/Print.hs - 331 GF/Infra/ReadFiles.hs - 337 GF/Infra/UseIO.hs - 153 GF/OldParsing/CFGrammar.hs - 283 GF/OldParsing/ConvertFiniteGFC.hs - 121 GF/OldParsing/ConvertFiniteSimple.hs - 34 GF/OldParsing/ConvertGFCtoMCFG.hs - 122 GF/OldParsing/ConvertGFCtoSimple.hs - 44 GF/OldParsing/ConvertGrammar.hs - 52 GF/OldParsing/ConvertMCFGtoCFG.hs - 30 GF/OldParsing/ConvertSimpleToMCFG.hs - 43 GF/OldParsing/GCFG.hs - 86 GF/OldParsing/GeneralChart.hs - 148 GF/OldParsing/GrammarTypes.hs - 50 GF/OldParsing/IncrementalChart.hs - 206 GF/OldParsing/MCFGrammar.hs - 43 GF/OldParsing/ParseCFG.hs - 82 GF/OldParsing/ParseCF.hs - 177 GF/OldParsing/ParseGFC.hs - 37 GF/OldParsing/ParseMCFG.hs - 161 GF/OldParsing/SimpleGFC.hs - 188 GF/OldParsing/Utilities.hs - 51 GF/Parsing/CFG.hs - 66 GF/Parsing/CF.hs - 151 GF/Parsing/GFC.hs - 64 GF/Parsing/MCFG.hs - 83 GF/Printing/PrintParser.hs - 127 GF/Printing/PrintSimplifiedTerm.hs - 190 GF/Shell/CommandL.hs - 556 GF/Shell/Commands.hs - 524 GF/Shell/HelpFile.hs - 79 GF/Shell/JGF.hs - 171 GF/Shell/PShell.hs - 221 GF/Shell/ShellCommands.hs - 66 GF/Shell/SubShell.hs - 87 GF/Shell/TeachYourself.hs - 296 GF/Source/AbsGF.hs - 229 GF/Source/GrammarToSource.hs - 312 GF/Source/LexGF.hs - 528 GF/Source/PrintGF.hs - 353 GF/Source/SkelGF.hs - 657 GF/Source/SourceToGrammar.hs - 58 GF/Source/TestGF.hs - 72 GF/Speech/PrGSL.hs - 65 GF/Speech/PrJSGF.hs - 128 GF/Speech/SRG.hs - 103 GF/Speech/TransformCFG.hs - 30 GF/System/ArchEdit.hs - 90 GF/System/Arch.hs - 27 GF/System/NoReadline.hs - 27 GF/System/Readline.hs - 73 GF/System/Tracing.hs - 25 GF/System/UseReadline.hs - 63 GF/Text/Arabic.hs - 97 GF/Text/Devanagari.hs - 72 GF/Text/Ethiopic.hs - 99 GF/Text/ExtendedArabic.hs - 37 GF/Text/ExtraDiacritics.hs - 172 GF/Text/Greek.hs - 53 GF/Text/Hebrew.hs - 95 GF/Text/Hiragana.hs - 69 GF/Text/LatinASupplement.hs - 47 GF/Text/OCSCyrillic.hs - 45 GF/Text/Russian.hs - 77 GF/Text/Tamil.hs - 125 GF/Text/Text.hs - 69 GF/Text/Unicode.hs - 47 GF/Text/UTF8.hs - 56 GF/Translate/GFT.hs - 427 GF/UseGrammar/Custom.hs - 435 GF/UseGrammar/Editing.hs - 180 GF/UseGrammar/Generate.hs - 71 GF/UseGrammar/GetTree.hs - 143 GF/UseGrammar/Information.hs - 228 GF/UseGrammar/Linear.hs - 130 GF/UseGrammar/Morphology.hs - 70 GF/UseGrammar/Paraphrases.hs - 157 GF/UseGrammar/Parsing.hs - 66 GF/UseGrammar/Randomized.hs - 170 GF/UseGrammar/Session.hs - 186 GF/UseGrammar/Tokenize.hs - 43 GF/UseGrammar/Transfer.hs - 122 GF/Visualization/NewVisualizationGrammar.hs - 123 GF/Visualization/VisualizeGrammar.hs - 63 GF/Conversion/SimpleToMCFG/Coercions.hs - 256 GF/Conversion/SimpleToMCFG/Nondet.hs - 129 GF/Conversion/SimpleToMCFG/Strict.hs - 71 GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs - 281 GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs - 277 GF/OldParsing/ConvertGFCtoMCFG/Old.hs - 189 GF/OldParsing/ConvertGFCtoMCFG/Strict.hs - 70 GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs - 245 GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs - 277 GF/OldParsing/ConvertSimpleToMCFG/Old.hs - 139 GF/OldParsing/ConvertSimpleToMCFG/Strict.hs - 83 GF/OldParsing/ParseCFG/General.hs - 142 GF/OldParsing/ParseCFG/Incremental.hs - 156 GF/OldParsing/ParseMCFG/Basic.hs - 103 GF/Parsing/CFG/General.hs - 150 GF/Parsing/CFG/Incremental.hs - 98 GF/Parsing/CFG/PInfo.hs - 226 GF/Parsing/MCFG/Active2.hs - 304 GF/Parsing/MCFG/Active.hs - 144 GF/Parsing/MCFG/Incremental2.hs - 163 GF/Parsing/MCFG/Incremental.hs - 128 GF/Parsing/MCFG/Naive.hs - 163 GF/Parsing/MCFG/PInfo.hs - 194 GF/Parsing/MCFG/Range.hs - 183 GF/Parsing/MCFG/ViaCFG.hs - 167 GF/Canon/GFC.cf - 36 GF/CFGM/CFG.cf - 321 GF/Source/GF.cf - 272 JavaGUI/DynamicTree2.java - 272 JavaGUI/DynamicTree.java - 2357 JavaGUI/GFEditor2.java - 1420 JavaGUI/GFEditor.java - 30 JavaGUI/GrammarFilter.java - 13 JavaGUI/LinPosition.java - 18 JavaGUI/MarkedArea.java - 1552 JavaGUI/Numerals.java - 22 JavaGUI/Utils.java - 5956 total - 48713 total - -- 2131 GF/Canon/ParGFC.hs - 3336 GF/Source/ParGF.hs - 779 GF/CFGM/ParCFG.hs - - 42467 total - --------- - -sloccount sloc = - let - ksloc = sloc / 1000 - effort = 2.4 * (ksloc ** 1.05) - schedule = 2.5 * (effort ** 0.38) - develops = effort / schedule - cost = 56286 * (effort/12) * 2.4 - in - [sloc,ksloc,effort,effort/12,schedule,schedule/12,develops,cost] diff --git a/doc/gf-summerschool.txt b/doc/gf-summerschool.txt deleted file mode 100644 index 0acf9177d..000000000 --- a/doc/gf-summerschool.txt +++ /dev/null @@ -1,533 +0,0 @@ -GF Resource Grammar Summer School -Gothenburg, 17-28 August 2009 -Aarne Ranta (aarne at chalmers.se) - -%!Encoding : iso-8859-1 - -%!target:html -%!postproc(html): #BECE

-%!postproc(html): #ENCE
-%!postproc(html): #GRAY -%!postproc(html): #EGRAY -%!postproc(html): #RED -%!postproc(html): #YELLOW -%!postproc(html): #ERED - -#BECE -[school-langs.png] -#ENCE - - -//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu// - - -==News== - -An on-line course //GF for Resource Grammar Writers// will start on -Monday 20 April at 15.30 CEST. The slides and recordings of the five -45-minute lectures will be made available via this web page. If requested, -the course may be repeated in the beginning of the summer school. - - -==Executive summary== - -GF Resource Grammar Library is an open-source computational grammar resource -that currently covers 12 languages. -The Summer School is a part of a collaborative effort to extend the library -to all of the 23 official EU languages. Also other languages -chosen by the participants are welcome. - -The missing EU languages are: -Czech, Dutch, Estonian, Greek, Hungarian, Irish, Latvian, Lithuanian, -Maltese, Portuguese, Slovak, and Slovenian. There is also more work to -be done on Polish and Romanian. - -The linguistic coverage of the library includes the inflectional morphology -and basic syntax of each language. It can be used in GF applications -and also ported to other formats. It can also be used for building other -linguistic resources, such as morphological lexica and parsers. -The library is licensed under LGPL. - -In the summer school, each language will be implemented by one or two students -working together. A morphology implementation will be credited -as a Chalmers course worth 7.5 ETCS points; adding a syntax implementation -will be worth more. The estimated total work load is 1-2 months for the -morphology, and 3-6 months for the whole grammar. - -Participation in the course is free. Registration is done via the courses's -Google group, [``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]. The registration deadline is 15 June 2009. - -Some travel grants will be available. They are distributed on the basis of a -GF programming contest in April and May. - -The summer school will be held on 17-28 August 2009, at the campus of -Chalmers University of Technology in Gothenburg, Sweden. - - -[align6.png] - -//Word alignment produced by GF from the resource grammar in Bulgarian, English, Italian, German, Finnish, French, and Swedish.// - -==Introduction== - -Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this -document. There is a growing need of linguistic resources for these -languages, to help in tasks such as translation and information retrieval. -These resources should be **portable** and **freely accessible**. -Languages marked in red in the diagram are of particular interest for -the summer school, since they are those on which the effort will be concentrated. - -GF (Grammatical Framework, -[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf]) -is a **functional programming language** designed for writing natural -language grammars. It provides an efficient platform for this task, due to -its modern characteristics: -- It is a functional programming language, similar to Haskell and ML. -- It has a static type system and type checker. -- It has a powerful module system supporting separate compilation - and data abstraction. -- It has an optimizing compiler to **Portable Grammar Format** (PGF). -- PGF can be further compiled to other formats, such as JavaScript and - speech recognition language models. -- GF has a **resource grammar library** giving access to the morphology and - basic syntax of 12 languages. - - -In addition to "ordinary" grammars for single languages, GF -supports **multilingual grammars**. A multilingual GF grammar consists of an -**abstract syntax** and a set of **concrete syntaxes**. -An abstract syntax is system of **trees**, serving as a semantic -model or an ontology. A concrete syntax is a mapping from abstract syntax -trees to strings of a particular language. - -These mappings defined in concrete syntax are **reversible**: they -can be used both for **generating** strings from trees, and for -**parsing** strings into trees. Combinations of generation and -parsing can be used for **translation**, where the abstract -syntax works as an **interlingua**. Thus GF has been used as a -framework for building translation systems in several areas -of application and large sets of languages. - - - -==The GF resource grammar library== - -The GF resource grammar library is a set of grammars usable as libraries when -building translation systems and other applications. -The library currently covers -the 9 languages coloured in green in the diagram above; in addition, -Catalan, Norwegian, and Russian are covered, and there is ongoing work on -Arabic, Hindi/Urdu, Polish, Romanian, and Thai. - -The purpose of the resource grammar library is to define the "low-level" structure -of a language: inflection, word order, agreement. This structure belongs to what -linguists call morphology and syntax. It can be very complex and requires -a lot of knowledge. Yet, when translating from one language to -another, knowing morphology and syntax is but a part of what is needed. -The translator (whether human -or machine) must understand the meaning of what is translated, and must also know -the idiomatic way to express the meaning in the target language. This knowledge -can be very domain-dependent and requires in general an expert in the field to -reach high quality: a mathematician in the field of mathematics, a meteorologist -in the field of weather reports, etc. - -The problem is to find a person who is an expert in both the domain of translation -and in the low-level linguistic details. It is the rareness of this combination -that has made it difficult to build interlingua-based translation systems. -The GF resource grammar library has the mission of helping in this task. -It encapsulates the low-level linguistics in program modules -accessed through easy-to-use interfaces. -Experts on different domains can build translation systems by using the library, -without knowing low-level linguistics. The idea is much the same as when a -programmer builds a graphical user interface (GUI) from high-level elements such as -buttons and menus, without having to care about pixels or geometrical forms. - - -===Missing EU languages, by the family=== - -Writing a grammar for a language is usually easier if other languages -from the same family already have grammars. The colours have the same -meaning as in the diagram above. - -Baltic: -#RED Latvian #ERED -#RED Lithuanian #ERED - -Celtic: -#RED Irish #ERED - -Fenno-Ugric: -#RED Estonian #ERED -#GRAY Finnish #EGRAY -#RED Hungarian #ERED - -Germanic: -#GRAY Danish #EGRAY -#RED Dutch #ERED -#GRAY English #EGRAY -#GRAY German #EGRAY -#GRAY Swedish #EGRAY - -Hellenic: -#RED Greek #ERED - -Romance: -#GRAY French #EGRAY -#GRAY Italian #EGRAY -#RED Portuguese #ERED -#YELLOW Romanian #ERED -#GRAY Spanish #EGRAY - -Semitic: -#RED Maltese #ERED - -Slavonic: -#GRAY Bulgarian #EGRAY -#RED Czech #ERED -#YELLOW Polish #ERED -#RED Slovak #ERED -#RED Slovenian #ERED - - - - - - -===Applications of the library=== - -In addition to translation, the library is also useful in **localization**, -that is, porting a piece of software to new languages. -The GF resource grammar library has been used in three major projects that need -interlingua-based translation or localization of systems to new languages: -- in KeY, - [``http://www.key-project.org/`` http://www.key-project.org/], - for writing formal and informal software specifications (3 languages) -- in WebALT, - [``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html], - for translating mathematical exercises to 7 languages -- in TALK [``http://www.talk-project.org`` http://www.talk-project.org], - where the library was used for localizing spoken dialogue systems - to six languages - - -The library is also a generic **linguistic resource**, -which can be used for tasks -such as language teaching and information retrieval. The liberal license (LGPL) -makes it usable for anyone and for any task. GF also has tools supporting the -use of grammars in programs written in other -programming languages: C, C++, Haskell, -Java, JavaScript, and Prolog. In connection with the TALK project, -support has also been -developed for translating GF grammars to language models used in speech -recognition (GSL/Nuance, HTK/ATK, SRGS, JSGF). - - - -===The structure of the library=== - -The library has the following main parts: -- **Inflection paradigms**, covering the inflection of each language. -- **Core Syntax**, covering a large set of syntax rule that - can be implemented for all languages involved. -- **Common Test Lexicon**, giving ca. 500 common words that can be used for - testing the library. -- **Language-Specific Syntax Extensions**, covering syntax rules that are - not implementable for all languages. -- **Language-Specific Lexica**, word lists for each language, with - accurate morphological and syntactic information. - - -The goal of the summer school is to implement, for each language, at least -the first three components. The latter three are more open-ended in character. - - -==The summer school== - -The goal of the summer school is to extend the GF resource grammar library -to covering all 23 EU languages, which means we need 15 new languages. -We also welcome other languages than these 23, -if there are interested participants. - -The amount of work and skill is between a Master's thesis and a PhD thesis. -The Russian implementation was made by Janna Khegai as a part of her -PhD thesis; the thesis contains other material, too. -The Arabic implementation was started by Ali El Dada in his Master's thesis, -but the thesis does not cover the whole API. The realistic amount of work is -somewhere between 3 and 8 person months, -but this is very much language-dependent. -Dutch, for instance, can profit from previous implementations of German and -Scandinavian languages, and will probably require less work. -Latvian and Lithuanian are the first languages of the Baltic family and -will probably require more work. - -In any case, the proposed allocation of work power is 2 participants per -language. They will do 1 months' worth of home work, followed -by 2 weeks of summer school, followed by 4 months work at home. -Who are these participants? - - -===Selecting participants=== - -Persons interested to participate in the Summer School should sign up in -the **Google Group** of the course, - -[``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/] - -The registration deadline is 15 June 2009. - -Notice: you can sign up in the Google -group even if you are not planning to attend the summer school, but are -just interested in the topic. There will be a separate registration to the -school itself later. - -The participants are recommended to learn GF in advance, by self-study from the -[tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html]. -This should take a couple of weeks. An **on-line course** will be -arranged on 20-29 April to help in getting started with GF. - -At the end of the on-line course, a **programming assignment** will be published. -This assignment will test skills required in resource grammar programming. -Work on the assignment will take a couple of weeks. -Those who are interested in getting a travel grant will submit -their sample resource grammar fragment -to the Summer School Committee by 12 May. -The Committee then decides who is given a travel grant of up to 1000 EUR. - -Notice: you can participate in the summer school without following the on-line -course or participating in the contest. These things are required only if you -want a travel grant. If requested by enough many participants, the lectures of -the on-line course will be repeated in the beginning of the summer school. - -The summer school itself is devoted for working on resource grammars. -In addition to grammar writing itself, testing and evaluation is -performed. One way to do this is via adding new languages -to resource grammar applications - in particular, to the WebALT mathematical -exercise translator. - -The resource grammars are expected to be completed by December 2009. They will -be published at GF website and licensed under LGPL. - -The participants are encouraged to contact each other and even work in groups. - - - -===Who is qualified=== - -Writing a resource grammar implementation requires good general programming -skills, and a good explicit knowledge of the grammar of the target language. -A typical participant could be -- native or fluent speaker of the target language -- interested in languages on the theoretical level, and preferably familiar - with many languages (to be able to think about them on an abstract level) -- familiar with functional programming languages such as ML or Haskell - (GF itself is a language similar to these) -- on Master's or PhD level in linguistics, computer science, or mathematics - - -But it is the quality of the assignment that is assessed, not any formal -requirements. The "typical participant" was described to give an idea of -who is likely to succeed in this. - - -===Costs=== - -The summer school is free of charge. - -Some travel grants are given, on the basis of a programming contest, -to cover travel and accommodation costs up to 1000 EUR -per person. - -The number of grants will be decided during Spring 2009, and the grand -holders will be notified before the beginning of June. - -Special terms will apply to students in -[GSLT http://www.gslt.hum.gu.se/] and -[NGSLT http://ngslt.org/]. - - - - - -===Teachers=== - -A list of teachers will be published here later. Some of the local teachers -probably involved are the following: -- Krasimir Angelov -- Robin Cooper -- Hkan Burden -- Markus Forsberg -- Harald Hammarstrm -- Peter Ljunglf -- Aarne Ranta - - -More teachers are welcome! If you are interested, please contact us so that -we can discuss your involvement and travel arrangements. - -In addition to teachers, we will look for consultants who can help to assess -the results for each language. Please contact us! - - - -===The Summer School Committee=== - -This committee consists of a number of teachers and informants, -who will select the participants. It will be selected by April 2009. - - -===Time and Place=== - -The summer school will -be organized at the campus of Chalmers University of Technology in Gothenburg, -Sweden, on 17-28 August 2009. - -Time schedule: -- February: announcement of summer school -- 20-29 April: on-line course -- 12 May: submission deadline for assignment work -- 31 May: review of assignments, notifications of acceptance -- 15 June: **registration deadline** -- 17-28 August: Summer School -- September-December: homework on resource grammars -- December: release of the extended Resource Grammar Library - - -===Dissemination and intellectual property=== - -The new resource grammars will be released under the LGPL just like -the current resource grammars, -with the copyright held by respective authors. - -The grammars will be distributed via the GF web site. - - - -==Why I should participate== - -Seven reasons: -+ participation in a pioneering language technology work in an - enthusiastic atmosphere -+ work and fun with people from all over Europe and the world -+ job opportunities and business ideas -+ credits: the school project will be established as a course at Chalmers worth - 7.5 or 15 ETCS points per person, depending on the work accompliched; also - extensions to Master's thesis will be considered (special credit arrangements - for [GSLT http://www.gslt.hum.gu.se/] and [NGSLT http://ngslt.org/]) -+ merits: the resulting grammar can easily lead to a published paper (see below) -+ contribution to the multilingual and multicultural development of Europe and the - world -+ free trip and stay in Gothenburg (for travel grant students) - - -==More information== - -[Course Google Group http://groups.google.com/group/gf-resource-school-2009/] - -[GF web page http://digitalgrammars.com/gf/] - -[GF tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html] - -[GF resource synopsis http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html] - -[Resource-HOWTO document http://digitalgrammars.com/gf/doc/Resource-HOWTO.html] - - -===Contact=== - -Hkan Burden: burden at chalmers se - -Aarne Ranta: aarne at chalmers se - - - -===Selected publications from earlier resource grammar projects=== - -K. Angelov. -Type-Theoretical Bulgarian Grammar. -In B. Nordstrm and A. Ranta (eds), -//Advances in Natural Language Processing (GoTAL 2008)//, -LNCS/LNAI 5221, Springer, -2008. - -B. Bringert. -//Programming Language Techniques for Natural Language Applications//. -Phd thesis, Computer Science, University of Gothenburg, -2008. - -A. El Dada and A. Ranta. -Implementing an Open Source Arabic Resource Grammar in GF. -In M. Mughazy (ed), -//Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26// -John Benjamins Publishing Company. -2007. - -A. El Dada. -Implementation of the Arabic Numerals and their Syntax in GF. -Computational Approaches to Semitic Languages: Common Issues and Resources, - ACL-2007 Workshop, -June 28, 2007, Prague. -2007. - -H. Hammarstrm and A. Ranta. -Cardinal Numerals Revisited in GF. -//Workshop on Numerals in the World's Languages//. -Dept. of Linguistics Max Planck Institute for Evolutionary Anthropology, Leipzig, -2004. - -M. Humayoun, H. Hammarstrm, and A. Ranta. -Urdu Morphology, Orthography and Lexicon Extraction. -//CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages//, -July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University. -2007. - -K. Johannisson. -//Formal and Informal Software Specifications.// -Phd thesis, Computer Science, University of Gothenburg, -2005. - -J. Khegai. -GF parallel resource grammars and Russian. -In proceedings of ACL2006 - (The joint conference of the International Committee on Computational - Linguistics and the Association for Computational Linguistics) (pp. 475-482), - Sydney, Australia, July 2006. - -J. Khegai. -//Language engineering in Grammatical Framework (GF)//. -Phd thesis, Computer Science, Chalmers University of Technology, -2006. - -W. Ng'ang'a. -Multilingual content development for eLearning in Africa. -eLearning Africa: 1st Pan-African Conference on ICT for Development, - Education and Training. 24-26 May 2006, Addis Ababa, Ethiopia. -2006. - -N. Perera and A. Ranta. -Dialogue System Localization with the GF Resource Grammar Library. -//SPEECHGRAM 2007: ACL Workshop on Grammar-Based Approaches to Spoken Language Processing//, -June 29, 2007, Prague. -2007. - -A. Ranta. -Modular Grammar Engineering in GF. -//Research on Language and Computation//, -5:133-158, 2007. - -A. Ranta. -How predictable is Finnish morphology? An experiment on lexicon construction. -In J. Nivre, M. Dahllf and B. Megyesi (eds), -//Resourceful Language Technology: Festschrift in Honor of Anna Sgvall Hein//, -University of Uppsala, -2008. - -A. Ranta. Grammars as Software Libraries. -To appear in -Y. Bertot, G. Huet, J-J. Lvy, and G. Plotkin (eds.), -//From Semantics to Computer Science//, -Cambridge University Press, Cambridge, 2009. - -A. Ranta and K. Angelov. -Implementing Controlled Languages in GF. -To appear in the proceedings of //CNL 2009//. - diff --git a/doc/gf-tutorial.html b/doc/gf-tutorial.html deleted file mode 100644 index 230152005..000000000 --- a/doc/gf-tutorial.html +++ /dev/null @@ -1,5857 +0,0 @@ - - - - - -Grammatical Framework Tutorial - -

Grammatical Framework Tutorial

- -Aarne Ranta
-Version 3.1.2, November 2008 -
- -

-
-

- - -

-
-

-

- -

- -

Overview

-

-This is a hands-on introduction to grammar writing in GF. -

-

-Main ingredients of GF: -

-
    -
  • linguistics -
  • functional programming -
- -

-Prerequisites: -

-
    -
  • some previous experience from some programming language -
  • the basics of using computers, e.g. the use of - text editors and the management of files. -
  • knowledge of Unix commands is useful but not necessary -
  • knowledge of many natural languages may add fun to experience -
- -

- -

- -

Outline

-

-Lesson 1: a multilingual "Hello World" grammar. English, Finnish, Italian. -

-

-Lesson 2: a larger grammar for the domain of food. English and Italian. -

-

-Lesson 3: parameters - morphology and agreement. -

-

-Lesson 4: using the resource grammar library. -

-

-Lesson 5: semantics - dependent types, variable bindings, -and semantic definitions. -

-

-Lesson 6: implementing formal languages. -

-

-Lesson 7: embedded grammar applications. -

-

- -

- -

Slides

-

-You can chop this tutorial into a set of slides by the command -

-
-    htmls gf-tutorial.html
-
-

-where the program htmls is distributed with GF (see below), in -

-

- GF/src/tools/Htmls.hs -

-

-The slides will appear as a set of files beginning with 01-gf-tutorial.htmls. -

-

-Internal links will not work in the slide format, except for those in the -upper left corner of each slide, and the links behind the "Contents" link. -

-

- -

- -

Lesson 1: Getting Started with GF

-

- -

-

-Goals: -

-
    -
  • install and run GF -
  • write the first GF grammar: a "Hello World" grammar in three languages -
  • use GF for translation and multilingual generation -
- -

- -

- -

What GF is

-

-We use the term GF for three different things: -

-
    -
  • a system (computer program) used for working with grammars -
  • a programming language in which grammars can be written -
  • a theory about grammars and languages -
- -

-The GF system is an implementation -of the GF programming language, which in turn is built on the ideas of the -GF theory. -

-

-The focus of this tutorial is on using the GF programming language. -

-

-At the same time, we learn the way of thinking in the GF theory. -

-

-We make the grammars run on a computer by -using the GF system. -

-

- -

- -

GF grammars and language processing tasks

-

-A GF program is called a grammar. -

-

-A grammar defines a language. -

-

-From this definition, language processing components can be derived: -

-
    -
  • parsing: to analyse the language -
  • linearization: to generate the language -
  • translation: to analyse one language and generate another -
- -

-In general, a GF grammar is multilingual: -

-
    -
  • many languages in one grammar -
  • translations between them -
- -

- -

- -

Getting the GF system

-

-Open-source free software, downloaded via the GF Homepage: -

-

-digitalgrammars.com/gf -

-

-There you find -

-
    -
  • binaries for Linux, Mac OS X, and Windows -
  • source code and documentation -
  • grammar libraries and examples -
- -

-Many examples in this tutorial are -online. -

-

-Normally you don't have to compile GF yourself. -But, if you do want to compile GF from source follow the -instructions in the Developers Guide. -

-

- -

- -

Running the GF system

-

-Type gf in the Unix (or Cygwin) shell: -

-
-    % gf
-
-

-You will see GF's welcome message and the prompt >. -The command -

-
-    > help
-
-

-will give you a list of available commands. -

-

-As a common convention, we will use -

-
    -
  • % as a prompt that marks system commands -
  • > as a prompt that marks GF commands -
- -

-Thus you should not type these prompts, but only the characters that -follow them. -

-

- -

- -

A "Hello World" grammar

-

-Like most programming language tutorials, we start with a -program that prints "Hello World" on the terminal. -

-

-Extra features: -

-
    -
  • Multilinguality: the message is printed in many languages. -
  • Reversibility: in addition to printing, you can parse the - message and translate it to other languages. -
- -

- -

- -

The program: abstract syntax and concrete syntaxes

-

-A GF program, in general, is a multilingual grammar. Its main parts -are -

-
    -
  • an abstract syntax -
  • one or more concrete syntaxes -
- -

-The abstract syntax defines what meanings -can be expressed in the grammar -

-
    -
  • Greetings, where we greet a Recipient, which can be - World or Mum or Friends -
- -

- -

-

-GF code for the abstract syntax: -

-
-    -- a "Hello World" grammar
-    abstract Hello = {
-  
-      flags startcat = Greeting ;
-  
-      cat Greeting ; Recipient ;
-  
-      fun 
-        Hello : Recipient -> Greeting ;
-        World, Mum, Friends : Recipient ;
-    }
-
-

-The code has the following parts: -

-
    -
  • a comment (optional), saying what the module is doing -
  • a module header indicating that it is an abstract syntax - module named Hello -
  • a module body in braces, consisting of -
      -
    • a startcat flag declaration stating that Greeting is the - default start category for parsing and generation -
    • category declarations introducing two categories, i.e. types of meanings -
    • function declarations introducing three meaning-building functions -
    -
- -

- -

-

-English concrete syntax (mapping from meanings to strings): -

-
-    concrete HelloEng of Hello = {
-  
-      lincat Greeting, Recipient = {s : Str} ;
-  
-      lin 
-        Hello recip = {s = "hello" ++ recip.s} ;
-        World = {s = "world"} ;
-        Mum = {s = "mum"} ;
-        Friends = {s = "friends"} ;
-    }
-
-

-The major parts of this code are: -

-
    -
  • a module header indicating that it is a concrete syntax of the abstract syntax - Hello, itself named HelloEng -
  • a module body in curly brackets, consisting of -
      -
    • linearization type definitions stating that - Greeting and Recipient are records with a string s -
    • linearization definitions telling what records are assigned to - each of the meanings defined in the abstract syntax -
    -
- -

-Notice the concatenation ++ and the record projection .. -

-

- -

-

-Finnish and an Italian concrete syntaxes: -

-
-    concrete HelloFin of Hello = {
-      lincat Greeting, Recipient = {s : Str} ;
-      lin 
-        Hello recip = {s = "terve" ++ recip.s} ;
-        World = {s = "maailma"} ;
-        Mum = {s = "iti"} ;
-        Friends = {s = "ystvt"} ;
-    }
-  
-    concrete HelloIta of Hello = {
-      lincat Greeting, Recipient = {s : Str} ;
-      lin 
-        Hello recip = {s = "ciao" ++ recip.s} ;
-        World = {s = "mondo"} ;
-        Mum = {s = "mamma"} ;
-        Friends = {s = "amici"} ;
-    }
-
-

-

- -

- -

Using grammars in the GF system

-

-In order to compile the grammar in GF, -we create four files, one for each module, named Modulename.gf: -

-
-    Hello.gf  HelloEng.gf  HelloFin.gf  HelloIta.gf
-
-

-The first GF command: import a grammar. -

-
-    > import HelloEng.gf
-
-

-All commands also have short names; here: -

-
-    > i HelloEng.gf
-
-

-The GF system will compile your grammar -into an internal representation and show the CPU time was consumed, followed -by a new prompt: -

-
-    > i HelloEng.gf
-    - compiling Hello.gf...   wrote file Hello.gfo 8 msec
-    - compiling HelloEng.gf...   wrote file HelloEng.gfo 12 msec
-  
-    12 msec
-    >
-
-

-

- -

-

-You can use GF for parsing (parse = p) -

-
-    > parse "hello world"
-    Hello World
-
-

-Parsing takes a string into an abstract syntax tree. -

-

-The notation for trees is that of function application: -

-
-    function argument1 ... argumentn
-
-

-Parentheses are only needed for grouping. -

-

-Parsing something that is not in grammar will fail: -

-
-    > parse "hello dad"
-    Unknown words: dad
-  
-    > parse "world hello"
-    no tree found
-
-

-

- -

-

-You can also use GF for linearization (linearize = l). -It takes trees into strings: -

-
-    > linearize Hello World
-    hello world
-
-

-Translation: pipe linearization to parsing: -

-
-    > import HelloEng.gf
-    > import HelloIta.gf
-  
-    > parse -lang=HelloEng "hello mum" | linearize -lang=HelloIta
-    ciao mamma
-
-

-Default of the language flag (-lang): the last-imported concrete syntax. -

-

-Multilingual generation: -

-
-    > parse -lang=HelloEng "hello friends" | linearize
-    terve ystvt
-    ciao amici
-    hello friends
-
-

-Linearization is by default to all available languages. -

-

- -

- -

Exercises on the Hello World grammar

-
    -
  1. Test the parsing and translation examples shown above, as well as -some other examples, in different combinations of languages. -

    -
  2. Extend the grammar Hello.gf and some of the -concrete syntaxes by five new recipients and one new greeting -form. -

    -
  3. Add a concrete syntax for some other -languages you might know. -

    -
  4. Add a pair of greetings that are expressed in one and -the same way in -one language and in two different ways in another. -For instance, good morning -and good afternoon in English are both expressed -as buongiorno in Italian. -Test what happens when you translate buongiorno to English in GF. -

    -
  5. Inject errors in the Hello grammars, for example, leave out -some line, omit a variable in a lin rule, or change the name -in one occurrence -of a variable. Inspect the error messages generated by GF. -
- -

- -

- -

Using grammars from outside GF

-

-You can use the gf program in a Unix pipe. -

-
    -
  • echo a GF command -
  • pipe it into GF with grammar names as arguments -
- -
-    % echo "l Hello World" | gf HelloEng.gf HelloFin.gf HelloIta.gf
-
-

-You can also write a script, a file containing the lines -

-
-    import HelloEng.gf
-    import HelloFin.gf
-    import HelloIta.gf
-    linearize Hello World
-
-

-

- -

- -

GF scripts

-

-If we name this script hello.gfs, we can do -

-
-    $ gf --run <hello.gfs
-  
-    ciao mondo
-    terve maailma
-    hello world
-
-

-The option --run removes prompts, CPU time, and other messages. -

-

-See Lesson 7, for stand-alone programs that don't need the GF system to run. -

-

-Exercise. (For Unix hackers.) Write a GF application that reads -an English string from the standard input and writes an Italian -translation to the output. -

-

- -

- -

What else can be done with the grammar

-

-Some more functions that will be covered: -

-
    -
  • morphological analysis: find out the possible inflection forms of words -
  • morphological synthesis: generate all inflection forms of words -
  • random generation: generate random expressions -
  • corpus generation: generate all expressions -
  • treebank generation: generate a list of trees with their linearizations -
  • teaching quizzes: train morphology and translation -
  • multilingual authoring: create a document in many languages simultaneously -
  • speech input: optimize a speech recognition system for a grammar -
- -

- -

- -

Embedded grammar applications

-

-Application programs, using techniques from Lesson 7: -

-
    -
  • compile grammars to new formats, such as speech recognition grammars -
  • embed grammars in Java and Haskell programs -
  • build applications using compilation and embedding: -
      -
    • voice commands -
    • spoken language translators -
    • dialogue systems -
    • user interfaces -
    • localization: render the messages printed by a program - in different languages -
    -
- -

- -

- -

Lesson 2: Designing a grammar for complex phrases

-

- -

-

-Goals: -

-
    -
  • build a larger grammar: phrases about food in English and Italian -
  • learn to write reusable library functions ("operations") -
  • learn the basics of GF's module system -
- -

- -

- -

The abstract syntax Food

-

-Phrases usable for speaking about food: -

-
    -
  • the start category is Phrase -
  • a Phrase can be built by assigning a Quality to an Item - (e.g. this cheese is Italian) -
  • anItem is build from a Kind by prefixing this or that - (e.g. this wine) -
  • a Kind is either atomic (e.g. cheese), or formed - qualifying a given Kind with a Quality (e.g. Italian cheese) -
  • a Quality is either atomic (e.g. Italian, - or built by modifying a given Quality with the word very (e.g. very warm) -
- -

-Abstract syntax: -

-
-    abstract Food = {
-  
-      flags startcat = Phrase ;
-  
-      cat
-        Phrase ; Item ; Kind ; Quality ;
-  
-      fun
-        Is : Item -> Quality -> Phrase ;
-        This, That : Kind -> Item ;
-        QKind : Quality -> Kind -> Kind ;
-        Wine, Cheese, Fish : Kind ;
-        Very : Quality -> Quality ;
-        Fresh, Warm, Italian, Expensive, Delicious, Boring : Quality ;
-    }
-
-

-Example Phrase -

-
-    Is (This (QKind Delicious (QKind Italian Wine))) (Very (Very Expensive))
-    this delicious Italian wine is very very expensive
-
-

-

- -

- -

The concrete syntax FoodEng

-
-    concrete FoodEng of Food = {
-  
-      lincat
-        Phrase, Item, Kind, Quality = {s : Str} ;
-  
-      lin
-        Is item quality = {s = item.s ++ "is" ++ quality.s} ;
-        This kind = {s = "this" ++ kind.s} ;
-        That kind = {s = "that" ++ kind.s} ;
-        QKind quality kind = {s = quality.s ++ kind.s} ;
-        Wine = {s = "wine"} ;
-        Cheese = {s = "cheese"} ;
-        Fish = {s = "fish"} ;
-        Very quality = {s = "very" ++ quality.s} ;
-        Fresh = {s = "fresh"} ;
-        Warm = {s = "warm"} ;
-        Italian = {s = "Italian"} ;
-        Expensive = {s = "expensive"} ;
-        Delicious = {s = "delicious"} ;
-        Boring = {s = "boring"} ;
-    }  
-
-

-

- -

-

-Test the grammar for parsing: -

-
-    > import FoodEng.gf
-    > parse "this delicious wine is very very Italian"
-    Is (This (QKind Delicious Wine)) (Very (Very Italian))
-
-

-Parse in other categories setting the cat flag: -

-
-    p -cat=Kind "very Italian wine"
-    QKind (Very Italian) Wine
-
-

-

- -

- -

Exercises on the Food grammar

-
    -
  1. Extend the Food grammar by ten new food kinds and -qualities, and run the parser with new kinds of examples. -

    -
  2. Add a rule that enables question phrases of the form -is this cheese Italian. -

    -
  3. Enable the optional prefixing of -phrases with the words "excuse me but". Do this in such a way that -the prefix can occur at most once. -
- -

- -

- -

Commands for testing grammars

- -

Generating trees and strings

-

-Random generation (generate_random = gr): build -build a random tree in accordance with an abstract syntax: -

-
-    > generate_random
-    Is (This (QKind Italian Fish)) Fresh
-
-

-By using a pipe, random generation can be fed into linearization: -

-
-    > generate_random | linearize
-    this Italian fish is fresh
-
-

-Use the number flag to generate several trees: -

-
-    > gr -number=4 | l
-    that wine is boring
-    that fresh cheese is fresh
-    that cheese is very boring
-    this cheese is Italian
-
-

-

- -

-

-To generate all phrases that a grammar can produce, -use generate_trees = gt. -

-
-    > generate_trees | l
-    that cheese is very Italian
-    that cheese is very boring
-    that cheese is very delicious
-    ...
-    this wine is fresh
-    this wine is warm
-
-

-The default depth is 3; the depth can be -set by using the depth flag: -

-
-    > generate_trees -depth=2 | l
-
-

-What options a command has can be seen by the help = h command: -

-
-    > help gr
-    > help gt
-
-

-

- -

- -

Exercises on generation

-
    -
  1. If the command gt generated all -trees in your grammar, it would never terminate. Why? -

    -
  2. Measure how many trees the grammar gives with depths 4 and 5, -respectively. Hint. You can -use the Unix word count command wc to count lines. -
- -

- -

- -

More on pipes: tracing

-

-Put the tracing option -tr to each command whose output you -want to see: -

-
-    > gr -tr | l -tr | p
-  
-    Is (This Cheese) Boring
-    this cheese is boring
-    Is (This Cheese) Boring  
-
-

-Useful for test purposes: the pipe above can show -if a grammar is ambiguous, i.e. -contains strings that can be parsed in more than one way. -

-

-Exercise. Extend the Food grammar so that it produces ambiguous -strings, and try out the ambiguity test. -

-

- -

- -

Writing and reading files

-

-To save the outputs into a file, pipe it to the write_file = wf command, -

-
-    > gr -number=10 | linearize | write_file -file=exx.tmp
-
-

-To read a file to GF, use the read_file = rf command, -

-
-    > read_file -file=exx.tmp -lines | parse
-
-

-The flag -lines tells GF to read each line of the file separately. -

-

-Files with examples can be used for regression testing -of grammars - the most systematic way to do this is by -treebanks; see here. -

-

- -

- -

Visualizing trees

-

-Parentheses give a linear representation of trees, -useful for the computer. -

-

-Human eye may prefer to see a visualization: visualize_tree = vt: -

-
-    > parse "this delicious cheese is very Italian" | visualize_tree
-
-

-The tree is generated in postscript (.ps) file. The -view option is used for -telling what command to use to view the file. Its default is "gv", which works -on most Linux installations. On a Mac, one would probably write -

-
-    > parse "this delicious cheese is very Italian" | visualize_tree -view="open"
-
-

-

- -

-

-This command uses the program Graphviz, which you -might not have, but which are freely available on the web. -

-

-You can save the temporary file _grph.dot, -which the command vt produces. -

-

-Then you can process this file with the dot -program (from the Graphviz package). -

-
-    % dot -Tpng _grph.dot > mytree.png
-
-

-

- -

- -

System commands

-

-You can give a system command without leaving GF: -! followed by a Unix command, -

-
-    > ! dot -Tpng grphtmp.dot > mytree.png
-    > ! open mytree.png
-
-

-A system command may also receive its argument from -a GF pipes. It then has the name sp = system_pipe: -

-
-    > generate_trees -depth=4 | sp -command="wc -l"
-
-

-This command example returns the number of generated trees. -

-

-Exercise. -Measure how many trees the grammar FoodEng gives with depths 4 and 5, -respectively. Use the Unix word count command wc to count lines, and -a system pipe from a GF command into a Unix command. -

-

- -

- -

An Italian concrete syntax

-

- -

-

-Just (?) replace English words with their dictionary equivalents: -

-
-    concrete FoodIta of Food = {
-  
-      lincat
-        Phrase, Item, Kind, Quality = {s : Str} ;
-  
-      lin
-        Is item quality = {s = item.s ++ "" ++ quality.s} ;
-        This kind = {s = "questo" ++ kind.s} ;
-        That kind = {s = "quel" ++ kind.s} ;
-        QKind quality kind = {s = kind.s ++ quality.s} ;
-        Wine = {s = "vino"} ;
-        Cheese = {s = "formaggio"} ;
-        Fish = {s = "pesce"} ;
-        Very quality = {s = "molto" ++ quality.s} ;
-        Fresh = {s = "fresco"} ;
-        Warm = {s = "caldo"} ;
-        Italian = {s = "italiano"} ;
-        Expensive = {s = "caro"} ;
-        Delicious = {s = "delizioso"} ;
-        Boring = {s = "noioso"} ;
-    }
-
-

-

- -

-

-Not just replacing words: -

-

-The order of a quality and the kind it modifies is changed in -

-
-      QKind quality kind = {s = kind.s ++ quality.s} ;
-
-

-Thus Italian says vino italiano for Italian wine. -

-

-(Some Italian adjectives -are put before the noun. This distinction can be controlled by parameters, -which are introduced in Lesson 3.) -

-

- -

- -

Exercises on multilinguality

-
    -
  1. Write a concrete syntax of Food for some other language. -You will probably end up with grammatically incorrect -linearizations - but don't -worry about this yet. -

    -
  2. If you have written Food for German, Swedish, or some -other language, test with random or exhaustive generation what constructs -come out incorrect, and prepare a list of those ones that cannot be helped -with the currently available fragment of GF. You can return to your list -after having worked out Lesson 3. -
- -

- -

- -

Free variation

-

-Semantically indistinguishable ways of expressing a thing. -

-

-The variants construct of GF expresses free variation. For example, -

-
-    lin Delicious = {s = "delicious" | "exquisit" | "tasty"} ;
-
-

-By default, the linearize command -shows only the first variant from such lists; to see them -all, use the option -all: -

-
-    > p "this exquisit wine is delicious" | l -all
-    this delicious wine is delicious
-    this delicious wine is exquisit
-    ...
-
-

-

- -

-

-An equivalent notation for variants is -

-
-    lin Delicious = {s = variants {"delicious" ; "exquisit" ; "tasty"}} ;
-
-

-This notation also allows the limiting case: an empty variant list, -

-
-    variants {}
-
-

-It can be used e.g. if a word lacks a certain inflection form. -

-

-Free variation works for all types in concrete syntax; all terms in -a variant list must be of the same type. -

-

- -

- -

More application of multilingual grammars

- -

Multilingual treebanks

-

- -

-

-Multilingual treebank: a set of trees with their -linearizations in different languages: -

-
-    > gr -number=2 | l -treebank
-  
-    Is (That Cheese) (Very Boring)
-    quel formaggio  molto noioso
-    that cheese is very boring
-  
-    Is (That Cheese) Fresh
-    quel formaggio  fresco
-    that cheese is fresh
-
-

-

- -

- -

Translation quiz

-

-translation_quiz = tq: -generate random sentences, display them in one language, and check the user's -answer given in another language. -

-
-    > translation_quiz -from=FoodEng -to=FoodIta
-  
-    Welcome to GF Translation Quiz.
-    The quiz is over when you have done at least 10 examples
-    with at least 75 % success.
-    You can interrupt the quiz by entering a line consisting of a dot ('.').
-  
-    this fish is warm
-    questo pesce  caldo
-    > Yes.
-    Score 1/1
-  
-    this cheese is Italian
-    questo formaggio  noioso
-    > No, not questo formaggio  noioso, but
-    questo formaggio  italiano
-  
-    Score 1/2
-    this fish is expensive
-
-

-

- -

- -

Context-free grammars and GF

- -

The "cf" grammar format

-

-The grammar FoodEng can be written in a BNF format as follows: -

-
-    Is.        Phrase  ::= Item "is" Quality ;
-    That.      Item    ::= "that" Kind ;
-    This.      Item    ::= "this" Kind ;
-    QKind.     Kind    ::= Quality Kind ;
-    Cheese.    Kind    ::= "cheese" ;
-    Fish.      Kind    ::= "fish" ;
-    Wine.      Kind    ::= "wine" ;
-    Italian.   Quality ::= "Italian" ;
-    Boring.    Quality ::= "boring" ;
-    Delicious. Quality ::= "delicious" ;
-    Expensive. Quality ::= "expensive" ;
-    Fresh.     Quality ::= "fresh" ;
-    Very.      Quality ::= "very" Quality ;
-    Warm.      Quality ::= "warm" ;
-
-

-GF can convert BNF grammars into GF. -BNF files are recognized by the file name suffix .cf (for context-free): -

-
-    > import food.cf
-
-

-The compiler creates separate abstract and concrete modules internally. -

-

- -

- -

Restrictions of context-free grammars

-

-Separating concrete and abstract syntax allows -three deviations from context-free grammar: -

-
    -
  • permutation: changing the order of constituents -
  • suppression: omitting constituents -
  • reduplication: repeating constituents -
- -

-Exercise. Define the non-context-free -copy language {x x | x <- (a|b)*} in GF. -

-

- -

- -

Modules and files

-

-GF uses suffixes to recognize different file formats: -

-
    -
  • Source files: Modulename.gf -
  • Target files: Modulename.gfo -
- -

-Importing generates target from source: -

-
-    > i FoodEng.gf
-    - compiling Food.gf...   wrote file Food.gfo 16 msec
-    - compiling FoodEng.gf...   wrote file FoodEng.gfo 20 msec
-
-

-The .gfo format (="GF Object") is precompiled GF, which is -faster to load than source GF (.gf). -

-

-When reading a module, GF decides whether -to use an existing .gfo file or to generate -a new one, by looking at modification times. -

-

- -

-

-Exercise. What happens when you import FoodEng.gf for -a second time? Try this in different situations: -

-
    -
  • Right after importing it the first time (the modules are kept in - the memory of GF and need no reloading). -
  • After issuing the command empty (e), which clears the memory - of GF. -
  • After making a small change in FoodEng.gf, be it only an added space. -
  • After making a change in Food.gf. -
- -

- -

- -

Using operations and resource modules

- -

Operation definitions

-

-The golden rule of functional programmin: -

-

-Whenever you find yourself programming by copy-and-paste, write a function instead. -

-

-Functions in concrete syntax are defined using the keyword oper (for -operation), distinct from fun for the sake of clarity. -

-

-Example: -

-
-    oper ss : Str -> {s : Str} = \x -> {s = x} ;
-
-

-The operation can be applied to an argument, and GF will -compute the value: -

-
-    ss "boy" ===> {s = "boy"}
-
-

-The symbol ===> will be used for computation. -

-

- -

-

-Notice the lambda abstraction form -

-
    -
  • \x -> t -
- -

-This is read: -

-
    -
  • function with variable x and function body t -
- -

-For lambda abstraction with multiple arguments, we have the shorthand -

-
-    \x,y -> t   ===  \x -> \y -> t
-
-

-Linearization rules actually use syntactic -sugar for abstraction: -

-
-    lin f x = t   ===  lin f = \x -> t
-
-

-

- -

- -

The ``resource`` module type

-

-The resource module type is used to package -oper definitions into reusable resources. -

-
-    resource StringOper = {
-      oper
-        SS : Type = {s : Str} ;
-        ss : Str -> SS = \x -> {s = x} ;
-        cc : SS -> SS -> SS = \x,y -> ss (x.s ++ y.s) ;
-        prefix : Str -> SS -> SS = \p,x -> ss (p ++ x.s) ;
-    }
-
-

-

- -

- -

Opening a resource

-

-Any number of resource modules can be -opened in a concrete syntax. -

-
-    concrete FoodEng of Food = open StringOper in {
-  
-      lincat
-        S, Item, Kind, Quality = SS ;
-  
-      lin
-        Is item quality = cc item (prefix "is" quality) ;
-        This k = prefix "this" k ;
-        That k = prefix "that" k ;
-        QKind k q = cc k q ;
-        Wine = ss "wine" ;
-        Cheese = ss "cheese" ;
-        Fish = ss "fish" ;
-        Very = prefix "very" ;
-        Fresh = ss "fresh" ;
-        Warm = ss "warm" ;
-        Italian = ss "Italian" ;
-        Expensive = ss "expensive" ;
-        Delicious = ss "delicious" ;
-        Boring = ss "boring" ;
-    }
-
-

-

- -

- -

Partial application

-

- -

-

-The rule -

-
-    lin This k = prefix "this" k ;
-
-

-can be written more concisely -

-
-    lin This = prefix "this" ;
-
-

-Part of the art in functional programming: -decide the order of arguments in a function, -so that partial application can be used as much as possible. -

-

-For instance, prefix is typically applied to -linearization variables with constant strings. Hence we -put the Str argument before the SS argument. -

-

-Exercise. Define an operation infix analogous to prefix, -such that it allows you to write -

-
-    lin Is = infix "is" ;
-
-

-

- -

- -

Testing resource modules

-

-Import with the flag -retain, -

-
-    > import -retain StringOper.gf
-
-

-Compute the value with compute_concrete = cc, -

-
-    > compute_concrete prefix "in" (ss "addition")
-    {s : Str = "in" ++ "addition"}
-
-

-

- -

- -

Grammar architecture

-

- -

- -

Extending a grammar

-

-A new module can extend an old one: -

-
-    abstract Morefood = Food ** {
-      cat
-        Question ;
-      fun
-        QIs : Item -> Quality -> Question ;
-        Pizza : Kind ;      
-    }
-
-

-Parallel to the abstract syntax, extensions can -be built for concrete syntaxes: -

-
-    concrete MorefoodEng of Morefood = FoodEng ** {
-      lincat
-        Question = {s : Str} ;
-      lin
-        QIs item quality = {s = "is" ++ item.s ++ quality.s} ;
-        Pizza = {s = "pizza"} ;
-    }
-
-

-The effect of extension: all of the contents of the extended -and extending modules are put together. -

-

-In other words: the new module inherits the contents of the old module. -

-

- -

-

-Simultaneous extension and opening: -

-
-    concrete MorefoodIta of Morefood = FoodIta ** open StringOper in {
-      lincat
-        Question = SS ;
-      lin
-        QIs item quality = ss (item.s ++ "" ++ quality.s) ;
-        Pizza = ss "pizza" ;
-    }
-
-

-Resource modules can extend other resource modules - thus it is -possible to build resource hierarchies. -

-

- -

- -

Multiple inheritance

-

-Extend several grammars at the same time: -

-
-    abstract Foodmarket = Food, Fruit, Mushroom ** {
-      fun 
-        FruitKind    : Fruit    -> Kind ;
-        MushroomKind : Mushroom -> Kind ;
-      }
-
-

-where -

-
-    abstract Fruit = {
-      cat Fruit ;
-      fun Apple, Peach : Fruit ;
-    }
-  
-    abstract Mushroom = {
-      cat Mushroom ;
-      fun Cep, Agaric : Mushroom ;
-    }
-
-

-

-Exercise. Refactor Food by taking apart Wine into a special -Drink module. -

-

- -

- -

Lesson 3: Grammars with parameters

-

- -

-

-Goals: -

-
    -
  • implement sophisticated linguistic structures: -
      -
    • morphology: the inflection of words -
    • agreement: rules for selecting word forms in syntactic combinations -
    -
- -
    -
  • Cover all GF constructs for concrete syntax -
- -

-It is possible to skip this chapter and go directly -to the next, since the use of the GF Resource Grammar library -makes it unnecessary to use parameters: they -could be left to library implementors. -

-

- -

- -

The problem: words have to be inflected

-

-Plural forms are needed in things like -

-these Italian wines are delicious -
-This requires two things: -

-
    -
  • the inflection of nouns and verbs in singular and plural -
  • the agreement of the verb to subject: - the verb must have the same number as the subject -
- -

-Different languages have different types of inflection and agreement. -

-
    -
  • Italian has also gender (masculine vs. feminine). -
- -

-In a multilingual grammar, -we want to ignore such distinctions in abstract syntax. -

-

-Exercise. Make a list of the possible forms that nouns, -adjectives, and verbs can have in some languages that you know. -

-

- -

- -

Parameters and tables

-

-We define the parameter type of number in English by -a new form of judgement: -

-
-    param Number = Sg | Pl ;
-
-

-This judgement defines the parameter type Number by listing -its two constructors, Sg and Pl -(singular and plural). -

-

-We give Kind a linearization type that has a table depending on number: -

-
-    lincat Kind = {s : Number => Str} ;
-
-

-The table type Number => Str is similar a function type -(Number -> Str). -

-

-Difference: the argument must be a parameter type. Then -the argument-value pairs can be listed in a finite table. -

-

- -

-

-Here is a table: -

-
-    lin Cheese = {
-      s = table {
-        Sg => "cheese" ;
-        Pl => "cheeses"
-      }
-    } ;
-
-

-The table has branches, with a pattern on the -left of the arrow => and a value on the right. -

-

-The application of a table is done by the selection operator !. -

-

-It which is computed by pattern matching: return -the value from the first branch whose pattern matches the -argument. For instance, -

-
-     table {Sg => "cheese" ; Pl => "cheeses"} ! Pl 
-     ===> "cheeses"
-
-

-

- -

-

-Case expressions are syntactic sugar: -

-
-    case e of {...} ===  table {...} ! e
-
-

-Since they are familiar to Haskell and ML programmers, they can come out handy -when writing GF programs. -

-

- -

-

-Constructors can take arguments from other parameter types. -

-

-Example: forms of English verbs (except be): -

-
-    param VerbForm = VPresent Number | VPast | VPastPart | VPresPart ;
-
-

-Fact expressed: only present tense has number variation. -

-

-Example table: the forms of the verb drink: -

-
-    table {
-      VPresent Sg => "drinks" ;
-      VPresent Pl => "drink" ;
-      VPast       => "drank" ;
-      VPastPart   => "drunk" ;
-      VPresPart   => "drinking"
-      }
-
-

-

-Exercise. In an earlier exercise (previous section), -you made a list of the possible -forms that nouns, adjectives, and verbs can have in some languages that -you know. Now take some of the results and implement them by -using parameter type definitions and tables. Write them into a resource -module, which you can test by using the command compute_concrete. -

-

- -

- -

Inflection tables and paradigms

-

-A morphological paradigm is a formula telling how a class of -words is inflected. -

-

-From the GF point of view, a paradigm is a function that takes -a lemma (also known as a dictionary form, or a citation form) and -returns an inflection table. -

-

-The following operation defines the regular noun paradigm of English: -

-
-    oper regNoun : Str -> {s : Number => Str} = \dog -> {
-      s = table {
-        Sg => dog ;
-        Pl => dog + "s"
-        }
-      } ;
-
-

-The gluing operator + glues strings to one token: -

-
-    (regNoun "cheese").s ! Pl  ===> "cheese" + "s"  ===>  "cheeses"
-
-

-

- -

-

-A more complex example: regular verbs, -

-
-    oper regVerb : Str -> {s : VerbForm => Str} = \talk -> {
-      s = table {
-        VPresent Sg => talk + "s" ;
-        VPresent Pl => talk ;
-        VPresPart   => talk + "ing" ;
-        _           => talk + "ed"
-        }
-      } ;
-
-

-The catch-all case for the past tense and the past participle -uses a wild card pattern _. -

-

- -

- -

Exercises on morphology

-
    -
  1. Identify cases in which the regNoun paradigm does not -apply in English, and implement some alternative paradigms. -

    -
  2. Implement some regular paradigms for other languages you have -considered in earlier exercises. -
- -

- -

- -

Using parameters in concrete syntax

-

-Purpose: a more radical -variation between languages -than just the use of different words and word orders. -

-

-We add to the grammar Food two rules for forming plural items: -

-
-    fun These, Those : Kind -> Item ;
-
-

-We also add a noun which in Italian has the feminine case: -

-
-    fun Pizza : Kind ;
-
-

-This will force us to deal with gender- -

-

- -

- -

Agreement

-

-In English, the phrase-forming rule -

-
-    fun Is : Item -> Quality -> Phrase ;
-
-

-is affected by the number because of subject-verb agreement: -the verb of a sentence must be inflected in the number of the subject, -

-
-    Is (This Pizza) Warm   ===>  "this pizza is warm"
-    Is (These Pizza) Warm  ===>  "these pizzas are warm"
-
-

-It is the copula (the verb be) that is affected: -

-
-    oper copula : Number -> Str = \n -> 
-      case n of {
-        Sg => "is" ;
-        Pl => "are"
-        } ;
-
-

-The subject Item must have such a number to provide to the copula: -

-
-    lincat Item = {s : Str ; n : Number} ;
-
-

-Now we can write -

-
-    lin Is item qual = {s = item.s ++ copula item.n ++ qual.s} ;
-
-

-

- -

- -

Determiners

-

-How does an Item subject receive its number? The rules -

-
-    fun This, These : Kind -> Item ;
-
-

-add determiners, either this or these, which -require different this pizza vs. -these pizzas. -

-

-Thus Kind must have both singular and plural forms: -

-
-    lincat Kind = {s : Number => Str} ;
-
-

-We can write -

-
-    lin This kind = {
-      s = "this" ++ kind.s ! Sg ; 
-      n = Sg
-    } ; 
-  
-    lin These kind = {
-      s = "these" ++ kind.s ! Pl ; 
-      n = Pl
-    } ; 
-
-

-

- -

-

-To avoid copy-and-paste, we can factor out the pattern of determination, -

-
-    oper det : 
-      Str -> Number -> {s : Number => Str} -> {s : Str ; n : Number} = 
-        \det,n,kind -> {
-        s = det ++ kind.s ! n ; 
-        n = n
-      } ; 
-
-

-Now we can write -

-
-    lin This  = det Sg "this" ;
-    lin These = det Pl "these" ;
-
-

-In a more lexicalized grammar, determiners would be a category: -

-
-    lincat Det = {s : Str ; n : Number} ;
-    fun Det : Det -> Kind -> Item ;
-    lin Det det kind = {
-        s = det.s ++ kind.s ! det.n ; 
-        n = det.n
-      } ; 
-
-

-

- -

- -

Parametric vs. inherent features

-

-Kinds have number as a parametric feature: both singular and plural -can be formed, -

-
-    lincat Kind = {s : Number => Str} ;
-
-

-Items have number as an inherent feature: they are inherently either -singular or plural, -

-
-    lincat Item = {s : Str ; n : Number} ;
-
-

-Italian Kind will have parametric number and inherent gender: -

-
-    lincat Kind = {s : Number => Str ; g : Gender} ;
-
-

-

- -

-

-Questions to ask when designing parameters: -

-
    -
  • existence: what forms are possible to build by morphological and - other means? -
  • need: what features are expected via agreement or government? -
- -

-Dictionaries give good advice: -

-uomo, pl. uomini, n.m. "man" -
-tells that uomo is a masculine noun with the plural form uomini. -Hence, parametric number and an inherent gender. -

-

-For words, inherent features are usually given as lexical information. -

-

-For combinations, they are inherited from some part of the construction -(typically the one called the head). Italian modification: -

-
-    lin QKind qual kind = 
-      let gen = kind.g in {
-        s = table {n => kind.s ! n ++ qual.s ! gen ! n} ;
-        g = gen
-        } ;
-
-

-Notice -

-
    -
  • local definition (let expression) -
  • variable pattern n -
- -

- -

- -

An English concrete syntax for Foods with parameters

-

-We use some string operations from the library Prelude are used. -

-
-     concrete FoodsEng of Foods = open Prelude in {
-  
-    lincat
-      S, Quality = SS ; 
-      Kind = {s : Number => Str} ; 
-      Item = {s : Str ; n : Number} ; 
-  
-    lin
-      Is item quality = ss (item.s ++ copula item.n ++ quality.s) ;
-      This  = det Sg "this" ;
-      That  = det Sg "that" ;
-      These = det Pl "these" ;
-      Those = det Pl "those" ;
-      QKind quality kind = {s = table {n => quality.s ++ kind.s ! n}} ;
-      Wine = regNoun "wine" ;
-      Cheese = regNoun "cheese" ;
-      Fish = noun "fish" "fish" ;
-      Pizza = regNoun "pizza" ;
-      Very = prefixSS "very" ;
-      Fresh = ss "fresh" ;
-      Warm = ss "warm" ;
-      Italian = ss "Italian" ;
-      Expensive = ss "expensive" ;
-      Delicious = ss "delicious" ;
-      Boring = ss "boring" ;
-
-

-

- -

-
-    param
-      Number = Sg | Pl ;
-  
-    oper
-      det : Number -> Str -> {s : Number => Str} -> {s : Str ; n : Number} = 
-        \n,d,cn -> {
-          s = d ++ cn.s ! n ;
-          n = n
-        } ;
-      noun : Str -> Str -> {s : Number => Str} = 
-        \man,men -> {s = table {
-          Sg => man ;
-          Pl => men 
-          }
-        } ;
-      regNoun : Str -> {s : Number => Str} = 
-        \car -> noun car (car + "s") ;
-      copula : Number -> Str = 
-        \n -> case n of {
-          Sg => "is" ;
-          Pl => "are"
-          } ;
-    }    
-
-

-

- -

- -

More on inflection paradigms

-

- -

-

-Let us extend the English noun paradigms so that we can -deal with all nouns, not just the regular ones. The goal is to -provide a morphology module that makes it easy to -add words to a lexicon. -

-

- -

- -

Worst-case functions

-

-We perform data abstraction from the type -of nouns by writing a a worst-case function: -

-
-    oper Noun : Type = {s : Number => Str} ;
-  
-    oper mkNoun : Str -> Str -> Noun = \x,y -> {
-      s = table {
-        Sg => x ;
-        Pl => y
-        }
-      } ;
-  
-    oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ;
-
-

-Then we can define -

-
-    lincat N = Noun ;
-    lin Mouse = mkNoun "mouse" "mice" ;
-    lin House = regNoun "house" ;
-
-

-where the underlying types are not seen. -

-

- -

-

-We are free to change the undelying definitions, e.g. -add case (nominative or genitive) to noun inflection: -

-
-    param Case = Nom | Gen ;
-  
-    oper Noun : Type = {s : Number => Case => Str} ;
-
-

-Now we have to redefine the worst-case function -

-
-    oper mkNoun : Str -> Str -> Noun = \x,y -> {
-      s = table {
-        Sg => table {
-          Nom => x ;
-          Gen => x + "'s"
-          } ;
-        Pl => table {
-          Nom => y ;
-          Gen => y + case last y of {
-            "s" => "'" ;
-            _   => "'s"
-          }
-        }
-      } ;
-
-

-But up from this level, we can retain the old definitions -

-
-    lin Mouse = mkNoun "mouse" "mice" ;
-    oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ;
-
-

-

- -

-

-In the last definition of mkNoun, we used a case expression -on the last character of the plural, as well as the Prelude -operation -

-
-    last : Str -> Str ;
-
-

-returning the string consisting of the last character. -

-

-The case expression uses pattern matching over strings, which -is supported in GF, alongside with pattern matching over -parameters. -

-

- -

- -

Smart paradigms

-

-The regular dog-dogs paradigm has -predictable variations: -

-
    -
  • nouns ending with an y: fly-flies, except if - a vowel precedes the y: boy-boys -
  • nouns ending with s, ch, and a number of - other endings: bus-buses, leech-leeches -
- -

-We could provide alternative paradigms: -

-
-    noun_y : Str -> Noun = \fly -> mkNoun fly (init fly + "ies") ;  
-    noun_s : Str -> Noun = \bus -> mkNoun bus (bus + "es") ;
-
-

-(The Prelude function init drops the last character of a token.) -

-

-Drawbacks: -

-
    -
  • it can be difficult to select the correct paradigm -
  • it can be difficult to remember the names of the different paradigms -
- -

- -

-

-Better solution: a smart paradigm: -

-
-    regNoun : Str -> Noun = \w -> 
-      let 
-        ws : Str = case w of {
-          _ + ("a" | "e" | "i" | "o") + "o" => w + "s" ;  -- bamboo
-          _ + ("s" | "x" | "sh" | "o")      => w + "es" ; -- bus, hero
-          _ + "z"                           => w + "zes" ;-- quiz 
-          _ + ("a" | "e" | "o" | "u") + "y" => w + "s" ;  -- boy
-          x + "y"                           => x + "ies" ;-- fly
-          _                                 => w + "s"    -- car
-          } 
-      in 
-      mkNoun w ws
-
-

-GF has regular expression patterns: -

-
    -
  • disjunctive patterns P | Q -
  • concatenation patterns P + Q -
- -

-The patterns are ordered in such a way that, for instance, -the suffix "oo" prevents bamboo from matching the suffix -"o". -

-

- -

- -

Exercises on regular patterns

-
    -
  1. The same rules that form plural nouns in English also -apply in the formation of third-person singular verbs. -Write a regular verb paradigm that uses this idea, but first -rewrite regNoun so that the analysis needed to build s-forms -is factored out as a separate oper, which is shared with -regVerb. -

    -
  2. Extend the verb paradigms to cover all verb forms -in English, with special care taken of variations with the suffix -ed (e.g. try-tried, use-used). -

    -
  3. Implement the German Umlaut operation on word stems. -The operation changes the vowel of the stressed stem syllable as follows: -a to , au to u, o to , and u to . You -can assume that the operation only takes syllables as arguments. Test the -operation to see whether it correctly changes Arzt to rzt, -Baum to Bum, Topf to Tpf, and Kuh to Kh. -
- -

- -

- -

Function types with variables

-

-In Lesson 5, dependent function types need a notation -that binds a variable to the argument type, as in -

-
-    switchOff : (k : Kind) -> Action k
-
-

-Function types without variables are actually a shorthand: -

-
-    PredVP : NP -> VP -> S
-
-

-means -

-
-    PredVP : (x : NP) -> (y : VP) -> S
-
-

-or any other naming of the variables. -

-

- -

-

-Sometimes variables shorten the code, since they can share a type: -

-
-    octuple : (x,y,z,u,v,w,s,t : Str) -> Str
-
-

-If a bound variable is not used, it can be replaced by a wildcard: -

-
-    octuple : (_,_,_,_,_,_,_,_ : Str) -> Str
-
-

-A good practice is to indicate the number of arguments: -

-
-    octuple : (x1,_,_,_,_,_,_,x8 : Str) -> Str
-
-

-For inflection paradigms, it is handy to use heuristic variable names, -looking like the expected forms: -

-
-    mkNoun : (mouse,mice : Str) -> Noun
-
-

-

- -

- -

Separating operation types and definitions

-

-In librarues, it is useful to group type signatures separately from -definitions. It is possible to divide an oper judgement, -

-
-    oper regNoun : Str -> Noun ;
-    oper regNoun s = mkNoun s (s + "s") ;
-
-

-and put the parts in different places. -

-

-With the interface and instance module types -(see here): the parts can even be put to different files. -

-

- -

- -

Overloading of operations

-

-Overloading: different functions can be given the same name, as e.g. in C++. -

-

-The compiler performs overload resolution, which works as long as the -functions have different types. -

-

-In GF, the functions must be grouped together in overload groups. -

-

-Example: different ways to define nouns in English: -

-
-    oper mkN : overload {
-      mkN : (dog : Str) -> Noun ;         -- regular nouns
-      mkN : (mouse,mice : Str) -> Noun ;  -- irregular nouns
-    }
-
-

-Cf. dictionaries: if the -word is regular, just one form is needed. If it is irregular, -more forms are given. -

-

-The definition can be given separately, or at the same time, as the types: -

-
-    oper mkN = overload {
-      mkN : (dog : Str) -> Noun = regNoun ;
-      mkN : (mouse,mice : Str) -> Noun = mkNoun ;
-    }
-
-

-Exercise. Design a system of English verb paradigms presented by -an overload group. -

-

- -

- -

Morphological analysis and morphology quiz

-

-The command morpho_analyse = ma -can be used to read a text and return for each word its analyses -(in the current grammar): -

-
-    > read_file bible.txt | morpho_analyse
-
-

-The command morpho_quiz = mq generates inflection exercises. -

-
-    % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfc
-  
-    > morpho_quiz -cat=V
-  
-    Welcome to GF Morphology Quiz.
-    ...
-  
-    rapparatre : VFin VCondit  Pl  P2
-    rapparaitriez
-    > No, not rapparaitriez, but
-    rapparatriez
-    Score 0/1
-
-

-To create a list for later use, use the command morpho_list = ml -

-
-    > morpho_list -number=25 -cat=V | write_file exx.txt
-
-

-

- -

- -

The Italian Foods grammar

-

- -

-

-Parameters include not only number but also gender. -

-
-  concrete FoodsIta of Foods = open Prelude in {
-  
-    param
-      Number = Sg | Pl ;
-      Gender = Masc | Fem ;
-
-

-Qualities are inflected for gender and number, whereas kinds -have a parametric number and an inherent gender. -Items have an inherent number and gender. -

-
-    lincat
-      Phr = SS ; 
-      Quality = {s : Gender => Number => Str} ; 
-      Kind = {s : Number => Str ; g : Gender} ; 
-      Item = {s : Str ; g : Gender ; n : Number} ; 
-
-

-

- -

-

-A Quality is an adjective, with one form for each gender-number combination. -

-
-    oper
-      adjective : (_,_,_,_ : Str) -> {s : Gender => Number => Str} = 
-        \nero,nera,neri,nere -> {
-          s = table {
-            Masc => table {
-              Sg => nero ;
-              Pl => neri
-              } ; 
-            Fem => table {
-              Sg => nera ;
-              Pl => nere
-              }
-            }
-        } ;
-
-

-Regular adjectives work by adding endings to the stem. -

-
-      regAdj : Str -> {s : Gender => Number => Str} = \nero ->
-        let ner = init nero 
-        in adjective nero (ner + "a") (ner + "i") (ner + "e") ;
-
-

-

- -

-

-For noun inflection, we are happy to give the two forms and the gender -explicitly: -

-
-      noun : Str -> Str -> Gender -> {s : Number => Str ; g : Gender} = 
-        \vino,vini,g -> {
-          s = table {
-            Sg => vino ;
-            Pl => vini
-            } ;
-          g = g
-        } ;
-
-

-We need only number variation for the copula. -

-
-      copula : Number -> Str = 
-        \n -> case n of {
-          Sg => "" ;
-          Pl => "sono"
-          } ;
-
-

-

- -

-

-Determination is more complex than in English, because of gender: -

-
-      det : Number -> Str -> Str -> {s : Number => Str ; g : Gender} -> 
-          {s : Str ; g : Gender ; n : Number} = 
-        \n,m,f,cn -> {
-          s = case cn.g of {Masc => m ; Fem => f} ++ cn.s ! n ;
-          g = cn.g ;
-          n = n
-        } ;
-
-

-

- -

-

-The complete set of linearization rules: -

-
-    lin
-      Is item quality = 
-        ss (item.s ++ copula item.n ++ quality.s ! item.g ! item.n) ;
-      This  = det Sg "questo" "questa" ;
-      That  = det Sg "quel"   "quella" ;
-      These = det Pl "questi" "queste" ;
-      Those = det Pl "quei"   "quelle" ;
-      QKind quality kind = {
-        s = \\n => kind.s ! n ++ quality.s ! kind.g ! n ;
-        g = kind.g
-        } ;
-      Wine = noun "vino" "vini" Masc ;
-      Cheese = noun "formaggio" "formaggi" Masc ;
-      Fish = noun "pesce" "pesci" Masc ;
-      Pizza = noun "pizza" "pizze" Fem ;
-      Very qual = {s = \\g,n => "molto" ++ qual.s ! g ! n} ;
-      Fresh = adjective "fresco" "fresca" "freschi" "fresche" ;
-      Warm = regAdj "caldo" ;
-      Italian = regAdj "italiano" ;
-      Expensive = regAdj "caro" ;
-      Delicious = regAdj "delizioso" ;
-      Boring = regAdj "noioso" ;
-    }
-
-

-

- -

- -

Exercises on using parameters

-
    -
  1. Experiment with multilingual generation and translation in the -Foods grammars. -

    -
  2. Add items, qualities, and determiners to the grammar, -and try to get their inflection and inherent features right. -

    -
  3. Write a concrete syntax of Food for a language of your choice, -now aiming for complete grammatical correctness by the use of parameters. -

    -
  4. Measure the size of the context-free grammar corresponding to -FoodsIta. You can do this by printing the grammar in the context-free format -(print_grammar -printer=bnf) and counting the lines. -
- -

- -

- -

Discontinuous constituents

-

-A linearization record may contain more strings than one, and those -strings can be put apart in linearization. -

-

-Example: English particle -verbs, (switch off). The object can appear between: -

-

-he switched it off -

-

-The verb switch off is called a -discontinuous constituents. -

-

-We can define transitive verbs and their combinations as follows: -

-
-    lincat TV = {s : Number => Str ; part : Str} ;
-  
-    fun AppTV : Item -> TV -> Item -> Phrase ;
-  
-    lin AppTV subj tv obj = 
-      {s = subj.s ++ tv.s ! subj.n ++ obj.s ++ tv.part} ;
-
-

-

-Exercise. Define the language a^n b^n c^n in GF, i.e. -any number of a's followed by the same number of b's and -the same number of c's. This language is not context-free, -but can be defined in GF by using discontinuous constituents. -

-

- -

- -

Strings at compile time vs. run time

-

-Tokens are created in the following ways: -

-
    -
  • quoted string: "foo" -
  • gluing : t + s -
  • predefined operations init, tail, tk, dp -
  • pattern matching over strings -
- -

-Since tokens must be known at compile time, -the above operations may not be applied to run-time variables -(i.e. variables that stand for function arguments in linearization rules). -

-

-Hence it is not legal to write -

-
-    cat Noun ;
-    fun Plural : Noun -> Noun ;
-    lin Plural n = {s = n.s + "s"} ;
-
-

-because n is a run-time variable. Also -

-
-    lin Plural n = {s = (regNoun n).s ! Pl} ; 
-
-

-is incorrect with regNoun as defined here, because the run-time -variable is eventually sent to string pattern matching and gluing. -

-

- -

-

-How to write tokens together without a space? -

-
-    lin Question p = {s = p + "?"} ;
-
-

-is incorrect. -

-

-The way to go is to use an unlexer that creates correct spacing -after linearization. -

-

-Correspondingly, a lexer that e.g. analyses "warm?" into -to tokens is needed before parsing. -This topic will be covered in here. -

-

- -

- -

Supplementary constructs for concrete syntax

-

Record extension and subtyping

-

-The symbol ** is used for both record types and record objects. -

-
-    lincat TV = Verb ** {c : Case} ;
-  
-    lin Follow = regVerb "folgen" ** {c = Dative} ; 
-
-

-TV becomes a subtype of Verb. -

-

-If T is a subtype of R, an object of T can be used whenever -an object of R is required. -

-

-Covariance: a function returning a record T as value can -also be used to return a value of a supertype R. -

-

-Contravariance: a function taking an R as argument -can also be applied to any object of a subtype T. -

-

- -

-

Tuples and product types

-

-Product types and tuples are syntactic sugar for record types and records: -

-
-    T1 * ... * Tn   ===   {p1 : T1 ; ... ; pn : Tn}
-    <t1, ...,  tn>  ===   {p1 = T1 ; ... ; pn = Tn}
-
-

-Thus the labels p1, p2,... are hard-coded. -

-

- -

-

Prefix-dependent choices

-

-English indefinite article: -

-
-    oper artIndef : Str = 
-      pre {"a" ; "an" / strs {"a" ; "e" ; "i" ; "o"}} ;
-
-

-Thus -

-
-    artIndef ++ "cheese"  --->  "a" ++ "cheese"
-    artIndef ++ "apple"   --->  "an" ++ "apple"
-
-

-

- -

- -

Lesson 4: Using the resource grammar library

-

- -

-

-Goals: -

-
    -
  • navigate in the GF resource grammar library and use it in applications -
  • get acquainted with basic linguistic categories -
  • write functors to achieve maximal sharing of code in multilingual grammars -
- -

- -

- -

The coverage of the library

-

-The current 12 resource languages are -

-
    -
  • Bulgarian -
  • Catalan -
  • Danish -
  • English -
  • Finnish -
  • French -
  • German -
  • Italian -
  • Norwegian -
  • Russian -
  • Spanish -
  • Swedish -
- -

-The first three letters (Eng etc) are used in grammar module names -(ISO 639 standard). -

-

- -

- -

The structure of the library

-

- -

-

-Semantic grammars (up to now in this tutorial): -a grammar defines a system of meanings (abstract syntax) and -tells how they are expressed(concrete syntax). -

-

-Resource grammars (as usual in linguistic tradition): -a grammar specifies the grammatically correct combinations of words, -whatever their meanings are. -

-

-With resource grammars, we can achieve a -wider coverage than with semantic grammars. -

-

- -

- -

Lexical vs. phrasal rules

-

-A resource grammar has two kinds of categories and two kinds of rules: -

-
    -
  • lexical: -
      -
    • lexical categories, to classify words -
    • lexical rules, to define words and their properties -

      -
    -
  • phrasal (combinatorial, syntactic): -
      -
    • phrasal categories, to classify phrases of arbitrary size -
    • phrasal rules, to combine phrases into larger phrases -
    -
- -

-GE makes no formal distinction between these two kinds. -

-

-But it is a good discipline to follow. -

-

- -

- -

Lexical categories

-

-Two kinds of lexical categories: -

-
    -
  • closed: -
      -
    • a finite number of words -
    • seldom extended in the history of language -
    • structural words / function words, e.g. -
      -      Conj ;     -- conjunction           e.g. "and"
      -      QuantSg ;  -- singular quantifier   e.g. "this"
      -      QuantPl ;  -- plural quantifier     e.g. "this"
      -
      -

      -
    -
  • open: -
      -
    • new words are added all the time -
    • content words, e.g. -
      -      N ;        -- noun         e.g. "pizza"
      -      A ;        -- adjective    e.g. "good"
      -      V ;        -- verb         e.g. "sleep"
      -
      -
    -
- -

- -

- -

Lexical rules

-

-Closed classes: module Syntax. In the Foods grammar, we need -

-
-    this_QuantSg, that_QuantSg : QuantSg ; 
-    these_QuantPl, those_QuantPl : QuantPl ; 
-    very_AdA  : AdA ;
-
-

-Naming convention: word followed by the category (so we can -distinguish the quantifier that from the conjunction that). -

-

-Open classes have no objects in Syntax. Words are -built as they are needed in applications: if we have -

-
-    fun Wine : Kind ;
-
-

-we will define -

-
-    lin Wine = mkN "wine" ;
-
-

-where we use mkN from ParadigmsEng: -

-

- -

- -

Resource lexicon

-

-Alternative concrete syntax for -

-
-    fun Wine : Kind ;
-
-

-is to provide a resource lexicon, which contains definitions such as -

-
-    oper wine_N : N = mkN "wine" ;
-
-

-so that we can write -

-
-    lin Wine = wine_N ;
-
-

-Advantages: -

-
    -
  • we accumulate a reusable lexicon -
  • we can use a here to speed up multilingual grammar implementation -
- -

- -

- -

Phrasal categories

-

-In Foods, we need just four phrasal categories: -

-
-    Cl ;   -- clause             e.g. "this pizza is good"
-    NP ;   -- noun phrase        e.g. "this pizza"
-    CN ;   -- common noun        e.g. "warm pizza"
-    AP ;   -- adjectival phrase  e.g. "very warm"
-
-

-Clauses are similar to sentences (S), but without a -fixed tense and mood; see here for how they relate. -

-

-Common nouns are made into noun phrases by adding determiners. -

-

- -

- -

Syntactic combinations

-

-We need the following combinations: -

-
-    mkCl : NP -> AP -> Cl ;      -- e.g. "this pizza is very warm"
-    mkNP : QuantSg -> CN -> NP ; -- e.g. "this pizza" 
-    mkNP : QuantPl -> CN -> NP ; -- e.g. "these pizzas"
-    mkCN : AP -> CN -> CN ;      -- e.g. "warm pizza"
-    mkAP : AdA -> AP -> AP ;     -- e.g. "very warm" 
-
-

-We also need lexical insertion, to form phrases from single words: -

-
-    mkCN : N -> NP ;
-    mkAP : A -> AP ;
-
-

-Naming convention: to construct a C, use a function mkC. -

-

-Heavy overloading: the current library -(version 1.2) has 23 operations named mkNP! -

-

- -

- -

Example syntactic combination

-

-The sentence -

-these very warm pizzas are Italian -
-can be built as follows: -

-
-    mkCl 
-      (mkNP these_QuantPl 
-         (mkCN (mkAP very_AdA (mkAP warm_A)) (mkCN pizza_CN)))
-      (mkAP italian_AP) 
-
-

-The task now: to define the concrete syntax of Foods so that -this syntactic tree gives the value of linearizing the semantic tree -

-
-    Is (These (QKind (Very Warm) Pizza)) Italian
-
-

-

- -

- -

The resource API

-

-Language-specific and language-independent parts - roughly, -

-
    -
  • the syntax API SyntaxL has the same types and - functions for all languages L -
  • the morphology API ParadigmsL has partly - different types and functions - for different languages L -
- -

-Full API documentation on-line: the resource synopsis, -

-

-digitalgrammars.com/gf/lib/resource/doc/synopsis.html -

-

- -

- -

A miniature resource API: categories

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CategoryExplanationExample
Clclause (sentence), with all tensesshe looks at this
APadjectival phrasevery warm
CNcommon noun (without determiner)red house
NPnoun phrase (subject or object)the red house
AdAadjective-modifying adverb,very
QuantSgsingular quantifierthese
QuantPlplural quantifierthis
Aone-place adjectivewarm
Ncommon nounhouse
- -

- -

- -

A miniature resource API: rules

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FunctionTypeExample
mkClNP -> AP -> ClJohn is very old
mkNPQuantSg -> CN -> NPthis old man
mkNPQuantPl -> CN -> NPthese old man
mkCNN -> CNhouse
mkCNAP -> CN -> CNvery big blue house
mkAPA -> APold
mkAPAdA -> AP -> APvery very old
- -

- -

- -

A miniature resource API: structural words

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FunctionTypeIn English
this_QuantSgQuantSgthis
that_QuantSgQuantSgthat
these_QuantPlQuantPlthis
those_QuantPlQuantPlthat
very_AdAAdAvery
- -

- -

- -

A miniature resource API: paradigms

-

-From ParadigmsEng: -

- - - - - - - - - - - - - - - - - -
FunctionType
mkN(dog : Str) -> N
mkN(man,men : Str) -> N
mkA(cold : Str) -> A
- -

-From ParadigmsIta: -

- - - - - - - - - - - - - -
FunctionType
mkN(vino : Str) -> N
mkA(caro : Str) -> A
- -

- -

- -

A miniature resource API: more paradigms

-

-From ParadigmsGer: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FunctionType
GenderType
masculineGender
feminineGender
neuterGender
mkN(Stufe : Str) -> N
mkN(Bild,Bilder : Str) -> Gender -> N
mkA(klein : Str) -> A
mkA(gut,besser,beste : Str) -> A
- -

-From ParadigmsFin: -

- - - - - - - - - - - - - -
FunctionType
mkN(talo : Str) -> N
mkA(hieno : Str) -> A
- -

- -

- -

Exercises

-

-1. Try out the morphological paradigms in different languages. Do -as follows: -

-
-    > i -path=alltenses -retain alltenses/ParadigmsGer.gfo
-    > cc -table mkN "Farbe"
-    > cc -table mkA "gut" "besser" "beste"
-
-

-

- -

- -

Example: English

-

- -

-

-We assume the abstract syntax Foods from Lesson 3. -

-

-We don't need to think about inflection and agreement, but just pick -functions from the resource grammar library. -

-

-We need a path with -

-
    -
  • the current directory . -
  • the directory ../foods, in which Foods.gf resides. -
  • the library directory present, which is relative to the - environment variable GF_LIB_PATH -
- -

-Thus the beginning of the module is -

-
-    --# -path=.:../foods:present
-  
-    concrete FoodsEng of Foods = open SyntaxEng,ParadigmsEng in {
-
-

-

- -

- -

English example: linearization types and combination rules

-

-As linearization types, we use clauses for Phrase, noun phrases -for Item, common nouns for Kind, and adjectival phrases for Quality. -

-
-    lincat
-      Phrase = Cl ; 
-      Item = NP ;
-      Kind = CN ;
-      Quality = AP ;
-
-

-Now the combination rules we need almost write themselves automatically: -

-
-    lin
-      Is item quality = mkCl item quality ;
-      This kind = mkNP this_QuantSg kind ;
-      That kind = mkNP that_QuantSg kind ;
-      These kind = mkNP these_QuantPl kind ;
-      Those kind = mkNP those_QuantPl kind ;
-      QKind quality kind = mkCN quality kind ;
-      Very quality = mkAP very_AdA quality ;
-
-

-

- -

- -

English example: lexical rules

-

-We use resource paradigms and lexical insertion rules. -

-

-The two-place noun paradigm is needed only once, for -fish - everythins else is regular. -

-
-      Wine = mkCN (mkN "wine") ;
-      Pizza = mkCN (mkN "pizza") ;
-      Cheese = mkCN (mkN "cheese") ;
-      Fish = mkCN (mkN "fish" "fish") ;
-      Fresh = mkAP (mkA "fresh") ;
-      Warm = mkAP (mkA "warm") ;
-      Italian = mkAP (mkA "Italian") ;
-      Expensive = mkAP (mkA "expensive") ;
-      Delicious = mkAP (mkA "delicious") ;
-      Boring = mkAP (mkA "boring") ;
-    }
-
-

-

- -

- -

English example: exercises

-

-1. Compile the grammar FoodsEng and generate -and parse some sentences. -

-

-2. Write a concrete syntax of Foods for Italian -or some other language included in the resource library. You can -compare the results with the hand-written -grammars presented earlier in this tutorial. -

-

- -

- -

Functor implementation of multilingual grammars

-

- -

- -

New language by copy and paste

-

-If you write a concrete syntax of Foods for some other -language, much of the code will look exactly the same -as for English. This is because -

-
    -
  • the Syntax API is the same for all languages (because - all languages in the resource package do implement the same - syntactic structures) -
  • languages tend to use the syntactic structures in similar ways -
- -

-But lexical rules are more language-dependent. -

-

-Thus, to port a grammar to a new language, you -

-
    -
  1. copy the concrete syntax of a given language -
  2. change the words (strings and inflection paradigms) -
- -

-Can we avoid this programming by copy-and-paste? -

-

- -

- -

Functors: functions on the module level

-

-Functors familiar from the functional programming languages ML and OCaml, -also known as parametrized modules. -

-

-In GF, a functor is a module that opens one or more interfaces. -

-

-An interface is a module similar to a resource, but it only -contains the types of opers, not (necessarily) their definitions. -

-

-Syntax for functors: add the keyword incomplete. We will use the header -

-
-    incomplete concrete FoodsI of Foods = open Syntax, LexFoods in
-
-

-where -

-
-    interface Syntax    -- the resource grammar interface
-    interface LexFoods  -- the domain lexicon interface
-
-

-When we moreover have -

-
-    instance SyntaxEng of Syntax     -- the English resource grammar
-    instance LexFoodsEng of LexFoods -- the English domain lexicon
-
-

-we can write a functor instantiation, -

-
-    concrete FoodsGer of Foods = FoodsI with 
-      (Syntax = SyntaxGer),
-      (LexFoods = LexFoodsGer) ;
-
-

-

- -

- -

Code for the Foods functor

-
-    --# -path=.:../foods
-  
-    incomplete concrete FoodsI of Foods = open Syntax, LexFoods in {
-    lincat
-      Phrase = Cl ; 
-      Item = NP ;
-      Kind = CN ;
-      Quality = AP ;
-    lin
-      Is item quality = mkCl item quality ;
-      This kind = mkNP this_QuantSg kind ;
-      That kind = mkNP that_QuantSg kind ;
-      These kind = mkNP these_QuantPl kind ;
-      Those kind = mkNP those_QuantPl kind ;
-      QKind quality kind = mkCN quality kind ;
-      Very quality = mkAP very_AdA quality ;
-  
-      Wine = mkCN wine_N ;
-      Pizza = mkCN pizza_N ;
-      Cheese = mkCN cheese_N ;
-      Fish = mkCN fish_N ;
-      Fresh = mkAP fresh_A ;
-      Warm = mkAP warm_A ;
-      Italian = mkAP italian_A ;
-      Expensive = mkAP expensive_A ;
-      Delicious = mkAP delicious_A ;
-      Boring = mkAP boring_A ;
-    }
-
-

-

- -

- -

Code for the LexFoods interface

-

- -

-
-    interface LexFoods = open Syntax in {
-    oper
-      wine_N : N ;
-      pizza_N : N ;
-      cheese_N : N ;
-      fish_N : N ;
-      fresh_A : A ;
-      warm_A : A ;
-      italian_A : A ;
-      expensive_A : A ;
-      delicious_A : A ;
-      boring_A : A ;
-    }
-
-

-

- -

- -

Code for a German instance of the lexicon

-
-    instance LexFoodsGer of LexFoods = open SyntaxGer, ParadigmsGer in {
-    oper
-      wine_N = mkN "Wein" ;
-      pizza_N = mkN "Pizza" "Pizzen" feminine ;
-      cheese_N = mkN "Kse" "Ksen" masculine ;
-      fish_N = mkN "Fisch" ;
-      fresh_A = mkA "frisch" ;
-      warm_A = mkA "warm" "wrmer" "wrmste" ;
-      italian_A = mkA "italienisch" ;
-      expensive_A = mkA "teuer" ;
-      delicious_A = mkA "kstlich" ;
-      boring_A = mkA "langweilig" ;
-    }
-
-

-

- -

- -

Code for a German functor instantiation

-
-    --# -path=.:../foods:present
-  
-    concrete FoodsGer of Foods = FoodsI with 
-      (Syntax = SyntaxGer),
-      (LexFoods = LexFoodsGer) ;
-
-

-

- -

- -

Adding languages to a functor implementation

-

-Just two modules are needed: -

-
    -
  • a domain lexicon instance -
  • a functor instantiation -
- -

-The functor instantiation is completely mechanical to write. -

-

-The domain lexicon instance requires some knowledge of the words of the -language: -

-
    -
  • what words are used for which concepts -
  • how the words are -
  • features such as genders -
- -

- -

- -

Example: adding Finnish

-

-Lexicon instance -

-
-    instance LexFoodsFin of LexFoods = open SyntaxFin, ParadigmsFin in {
-    oper
-      wine_N = mkN "viini" ;
-      pizza_N = mkN "pizza" ;
-      cheese_N = mkN "juusto" ;
-      fish_N = mkN "kala" ;
-      fresh_A = mkA "tuore" ;
-      warm_A = mkA "lmmin" ;
-      italian_A = mkA "italialainen" ;
-      expensive_A = mkA "kallis" ;
-      delicious_A = mkA "herkullinen" ;
-      boring_A = mkA "tyls" ;
-    }
-
-

-Functor instantiation -

-
-    --# -path=.:../foods:present
-  
-    concrete FoodsFin of Foods = FoodsI with 
-      (Syntax = SyntaxFin),
-      (LexFoods = LexFoodsFin) ;
-
-

-

- -

- -

A design pattern

-

-This can be seen as a design pattern for multilingual grammars: -

-
-                        concrete DomainL*
-  
-      instance LexDomainL                 instance SyntaxL*
-     
-                   incomplete concrete DomainI
-                   /           |              \               
-     interface LexDomain   abstract Domain    interface Syntax*
-
-

-Modules marked with * are either given in the library, or trivial. -

-

-Of the hand-written modules, only LexDomainL is language-dependent. -

-

- -

- -

Functors: exercises

-

-1. Compile and test FoodsGer. -

-

-2. Refactor FoodsEng into a functor instantiation. -

-

-3. Instantiate the functor FoodsI to some language of -your choice. -

-

-4. Design a small grammar that can be used for controlling -an MP3 player. The grammar should be able to recognize commands such -as play this song, with the following variations: -

-
    -
  • verbs: play, remove -
  • objects: song, artist -
  • determiners: this, the previous -
  • verbs without arguments: stop, pause -
- -

-The implementation goes in the following phases: -

-
    -
  1. abstract syntax -
  2. (optional:) prototype string-based concrete syntax -
  3. functor over resource syntax and lexicon interface -
  4. lexicon instance for the first language -
  5. functor instantiation for the first language -
  6. lexicon instance for the second language -
  7. functor instantiation for the second language -
  8. ... -
- -

- -

- -

Restricted inheritance

- -

A problem with functors

-

-Problem: a functor only works when all languages use the resource Syntax -in the same way. -

-

-Example (contrived): assume that English has -no word for Pizza, but has to use the paraphrase Italian pie. -This is no longer a noun N, but a complex phrase -in the category CN. -

-

-Possible solution: change interface the LexFoods with -

-
-    oper pizza_CN : CN ;
-
-

-Problem with this solution: -

-
    -
  • we may end up changing the interface and the function with each new language -
  • we must every time also change the instances for the old languages to maintain - type correctness -
- -

- -

- -

Restricted inheritance: include or exclude

-

-A module may inherit just a selection of names. -

-

-Example: the FoodMarket example "Rsecarchitecture: -

-
-    abstract Foodmarket = Food, Fruit [Peach], Mushroom - [Agaric]
-
-

-Here, from Fruit we include Peach only, and from Mushroom -we exclude Agaric. -

-

-A concrete syntax of Foodmarket must make the analogous restrictions. -

-

- -

- -

The functor problem solved

-

-The English instantiation inherits the functor -implementation except for the constant Pizza. This constant -is defined in the body instead: -

-
-    --# -path=.:../foods:present
-  
-    concrete FoodsEng of Foods = FoodsI - [Pizza] with 
-      (Syntax = SyntaxEng),
-      (LexFoods = LexFoodsEng) ** 
-        open SyntaxEng, ParadigmsEng in {
-  
-      lin Pizza = mkCN (mkA "Italian") (mkN "pie") ;
-    }
-
-

-

- -

- -

Grammar reuse

-

-Abstract syntax modules can be used as interfaces, -and concrete syntaxes as their instances. -

-

-The following correspondencies are then applied: -

-
-    cat C         <--->  oper C : Type
-  
-    fun f : A     <--->  oper f : A
-  
-    lincat C = T  <--->  oper C : Type = T
-  
-    lin f = t     <--->  oper f : A = t
-
-

-

- -

- -

Library exercises

-

-1. Find resource grammar terms for the following -English phrases (in the category Phr). You can first try to -build the terms manually. -

-

-every man loves a woman -

-

-this grammar speaks more than ten languages -

-

-which languages aren't in the grammar -

-

-which languages did you want to speak -

-

-Then translate the phrases to other languages. -

-

- -

- -

Tenses

-

- -

-

-In Foods grammars, we have used the path -

-
-    --# -path=.:../foods
-
-

-The library subdirectory present is a restricted version -of the resource, with only present tense of verbs and sentences. -

-

-By just changing the path, we get all tenses: -

-
-    --# -path=.:../foods:alltenses
-
-

-Now we can see all the tenses of phrases, by using the -all flag -in linearization: -

-
-    > gr | l -all
-    This wine is delicious
-    Is this wine delicious
-    This wine isn't delicious
-    Isn't this wine delicious
-    This wine is not delicious
-    Is this wine not delicious
-    This wine has been delicious
-    Has this wine been delicious
-    This wine hasn't been delicious
-    Hasn't this wine been delicious
-    This wine has not been delicious
-    Has this wine not been delicious
-    This wine was delicious
-    Was this wine delicious
-    This wine wasn't delicious
-    Wasn't this wine delicious
-    This wine was not delicious
-    Was this wine not delicious
-    This wine had been delicious
-    Had this wine been delicious
-    This wine hadn't been delicious
-    Hadn't this wine been delicious
-    This wine had not been delicious
-    Had this wine not been delicious
-    This wine will be delicious
-    Will this wine be delicious
-    This wine won't be delicious
-    Won't this wine be delicious
-    This wine will not be delicious
-    Will this wine not be delicious
-    This wine will have been delicious
-    Will this wine have been delicious
-    This wine won't have been delicious
-    Won't this wine have been delicious
-    This wine will not have been delicious
-    Will this wine not have been delicious
-    This wine would be delicious
-    Would this wine be delicious
-    This wine wouldn't be delicious
-    Wouldn't this wine be delicious
-    This wine would not be delicious
-    Would this wine not be delicious
-    This wine would have been delicious
-    Would this wine have been delicious
-    This wine wouldn't have been delicious
-    Wouldn't this wine have been delicious
-    This wine would not have been delicious
-    Would this wine not have been delicious
-
-

-We also see -

-
    -
  • polarity (positive vs. negative) -
  • word order (direct vs. inverted) -
  • variation between contracted and full negation -
- -

-The list is even longer in languages that have more -tenses and moods, e.g. the Romance languages. -

-

- -

- -

Lesson 5: Refining semantics in abstract syntax

-

- -

-

-Goals: -

-
    -
  • include semantic conditions in grammars, by using -
      -
    • dependent types -
    • higher order abstract syntax -
    • proof objects -
    • semantic definitions -

      -These concepts are inherited from type theory (more precisely: -constructive type theory, or Martin-Lf type theory). -

      -Type theory is the basis logical frameworks. -

      -GF = logical framework + concrete syntax. -
    -
- -

- -

- -

Dependent types

-

- -

-

-Problem: to express conditions of semantic well-formedness. -

-

-Example: a voice command system for a "smart house" wants to -eliminate meaningless commands. -

-

-Thus we want to restrict particular actions to -particular devices - we can dim a light, but we cannot -dim a fan. -

-

-The following example is borrowed from the -Regulus Book (Rayner & al. 2006). -

-

-A simple example is a "smart house" system, which -defines voice commands for household appliances. -

-

- -

- -

A dependent type system

-

-Ontology: -

-
    -
  • there are commands and device kinds -
  • for each kind of device, there are devices and actions -
  • a command concerns an action of some kind on a device of the same kind -
- -

-Abstract syntax formalizing this: -

-
-    cat
-      Command ;
-      Kind ; 
-      Device Kind ; -- argument type Kind 
-      Action Kind ; 
-    fun 
-      CAction : (k : Kind) -> Action k -> Device k -> Command ;
-
-

-Device and Action are both dependent types. -

-

- -

- -

Examples of devices and actions

-

-Assume the kinds light and fan, -

-
-    light, fan : Kind ;
-    dim : Action light ;
-
-

-Given a kind, k, you can form the device the k. -

-
-    DKindOne  : (k : Kind) -> Device k ;  -- the light
-
-

-Now we can form the syntax tree -

-
-    CAction light dim (DKindOne light)
-
-

-but we cannot form the trees -

-
-    CAction light dim (DKindOne fan)
-    CAction fan   dim (DKindOne light)
-    CAction fan   dim (DKindOne fan)
-
-

-

- -

- -

Linearization and parsing with dependent types

-

-Concrete syntax does not know if a category is a dependent type. -

-
-    lincat Action = {s : Str} ;
-    lin CAction _ act dev = {s = act.s ++ dev.s} ; 
-
-

-Notice that the Kind argument is suppressed in linearization. -

-

-Parsing with dependent types is performed in two phases: -

-
    -
  1. context-free parsing -
  2. filtering through type checker -
- -

-By just doing the first phase, the kind argument is not found: -

-
-    > parse "dim the light"
-    CAction ? dim (DKindOne light)
-
-

-Moreover, type-incorrect commands are not rejected: -

-
-    > parse "dim the fan"
-    CAction ? dim (DKindOne fan)
-
-

-The term ? is a metavariable, returned by the parser -for any subtree that is suppressed by a linearization rule. -These are the same kind of metavariables as were used here -to mark incomplete parts of trees in the syntax editor. -

-

- -

- -

Solving metavariables

-

-Use the command put_tree = pt with the option -typecheck: -

-
-    > parse "dim the light" | put_tree -typecheck
-    CAction light dim (DKindOne light)
-
-

-The typecheck process may fail, in which case an error message -is shown and no tree is returned: -

-
-    > parse "dim the fan" | put_tree -typecheck
-  
-    Error in tree UCommand (CAction ? 0 dim (DKindOne fan)) :
-      (? 0 <> fan) (? 0 <> light)
-
-

-

- -

- -

Polymorphism

-

- -

-

-Sometimes an action can be performed on all kinds of devices. -

-

-This is represented as a function that takes a Kind as an argument -and produce an Action for that Kind: -

-
-    fun switchOn, switchOff : (k : Kind) -> Action k ;
-
-

-Functions of this kind are called polymorphic. -

-

-We can use this kind of polymorphism in concrete syntax as well, -to express Haskell-type library functions: -

-
-    oper const :(a,b : Type) -> a -> b -> a =
-      \_,_,c,_ -> c ;
-  
-    oper flip : (a,b,c : Type) -> (a -> b ->c) -> b -> a -> c =
-      \_,_,_,f,x,y -> f y x ;
-
-

-

- -

- -

Dependent types: exercises

-

-1. Write an abstract syntax module with above contents -and an appropriate English concrete syntax. Try to parse the commands -dim the light and dim the fan, with and without solve filtering. -

-

-2. Perform random and exhaustive generation, with and without -solve filtering. -

-

-3. Add some device kinds and actions to the grammar. -

-

- -

- -

Proof objects

-

-Curry-Howard isomorphism = propositions as types principle: -a proposition is a type of proofs (= proof objects). -

-

-Example: define the less than proposition for natural numbers, -

-
-    cat Nat ; 
-    fun Zero : Nat ;
-    fun Succ : Nat -> Nat ;
-
-

-Define inductively what it means for a number x to be less than -a number y: -

-
    -
  • Zero is less than Succ y for any y. -
  • If x is less than y, then Succ x is less than Succ y. -
- -

-Expressing these axioms in type theory -with a dependent type Less x y and two functions constructing -its objects: -

-
-    cat Less Nat Nat ; 
-    fun lessZ : (y : Nat) -> Less Zero (Succ y) ;
-    fun lessS : (x,y : Nat) -> Less x y -> Less (Succ x) (Succ y) ;
-
-

-Example: the fact that 2 is less that 4 has the proof object -

-
-    lessS (Succ Zero) (Succ (Succ (Succ Zero)))
-          (lessS Zero (Succ (Succ Zero)) (lessZ (Succ Zero)))
-     : Less (Succ (Succ Zero)) (Succ (Succ (Succ (Succ Zero))))
-
-

-

- -

- -

Proof-carrying documents

-

-Idea: to be semantically well-formed, the abstract syntax of a document -must contain a proof of some property, -although the proof is not shown in the concrete document. -

-

-Example: documents describing flight connections: -

-

-To fly from Gothenburg to Prague, first take LH3043 to Frankfurt, then OK0537 to Prague. -

-

-The well-formedness of this text is partly expressible by dependent typing: -

-
-    cat
-      City ;
-      Flight City City ;
-    fun
-      Gothenburg, Frankfurt, Prague : City ;
-      LH3043 : Flight Gothenburg Frankfurt ;
-      OK0537 : Flight Frankfurt Prague ;
-
-

-To extend the conditions to flight connections, we introduce a category -of proofs that a change is possible: -

-
-    cat IsPossible (x,y,z : City)(Flight x y)(Flight y z) ;
-
-

-A legal connection is formed by the function -

-
-    fun Connect : (x,y,z : City) -> 
-      (u : Flight x y) -> (v : Flight y z) -> 
-        IsPossible x y z u v -> Flight x z ;
-
-

-

- -

- -

Restricted polymorphism

-

-Above, all Actions were either of -

-
    -
  • monomorphic: defined for one Kind -
  • polymorphic: defined for all Kinds -
- -

-To make this scale up for new Kinds, we can refine this to -restricted polymorphism: defined for Kinds of a certain class -

-

-The notion of class uses the Curry-Howard isomorphism as follows: -

-
    -
  • a class is a predicate of Kinds --- i.e. a type depending of Kinds -
  • a Kind is in a class if there is a proof object of this type -
- -

- -

- -

Example: classes for switching and dimming

-

-We modify the smart house grammar: -

-
-  cat
-    Switchable Kind ;
-    Dimmable   Kind ;
-  fun
-    switchable_light : Switchable light ;
-    switchable_fan   : Switchable fan ;
-    dimmable_light   : Dimmable light ;
-  
-    switchOn : (k : Kind) -> Switchable k -> Action k ;
-    dim      : (k : Kind) -> Dimmable k -> Action k ;
-
-

-Classes for new actions can be added incrementally. -

-

- -

- -

Variable bindings

-

- -

-

-Mathematical notation and programming languages have -expressions that bind variables. -

-

-Example: universal quantifier formula -

-
-    (All x)B(x)
-
-

-The variable x has a binding (All x), and -occurs bound in the body B(x). -

-

-Examples from informal mathematical language: -

-
-    for all x, x is equal to x
-  
-    the function that for any numbers x and y returns the maximum of x+y
-    and x*y
-  
-    Let x be a natural number. Assume that x is even. Then x + 3 is odd.
-
-

-

- -

- -

Higher-order abstract syntax

-

-Abstract syntax can use functions as arguments: -

-
-    cat Ind ; Prop ;
-    fun All : (Ind -> Prop) -> Prop
-
-

-where Ind is the type of individuals and Prop, -the type of propositions. -

-

-Let us add an equality predicate -

-
-    fun Eq : Ind -> Ind -> Prop
-
-

-Now we can form the tree -

-
-    All (\x -> Eq x x)
-
-

-which we want to relate to the ordinary notation -

-
-    (All x)(x = x)
-
-

-In higher-order abstract syntax (HOAS), all variable bindings are -expressed using higher-order syntactic constructors. -

-

- -

- -

Higher-order abstract syntax: linearization

-

-HOAS has proved to be useful in the semantics and computer implementation of -variable-binding expressions. -

-

-How do we relate HOAS to the concrete syntax? -

-

-In GF, we write -

-
-    fun All : (Ind -> Prop) -> Prop
-    lin All B = {s = "(" ++ "All" ++ B.$0 ++ ")" ++ B.s}
-
-

-General rule: if an argument type of a fun function is -a function type A -> C, the linearization type of -this argument is the linearization type of C -together with a new field $0 : Str. -

-

-The argument B thus has the linearization type -

-
-    {s : Str ; $0 : Str},
-
-

-If there are more bindings, we add $1, $2, etc. -

-

- -

- -

Eta expansion

-

-To make sense of linearization, syntax trees must be -eta-expanded: for any function of type -

-
-    A -> B
-
-

-an eta-expanded syntax tree has the form -

-
-    \x -> b
-
-

-where b : B under the assumption x : A. -

-

-Given the linearization rule -

-
-    lin Eq a b = {s = "(" ++ a.s ++ "=" ++ b.s ++ ")"}
-
-

-the linearization of the tree -

-
-    \x -> Eq x x
-
-

-is the record -

-
-    {$0 = "x", s = ["( x = x )"]}
-
-

-Then we can compute the linearization of the formula, -

-
-    All (\x -> Eq x x)  --> {s = "[( All x ) ( x = x )]"}.
-
-

-The linearization of the variable x is, -"automagically", the string "x". -

-

- -

- -

Parsing variable bindings

-

-GF can treat any one-word string as a variable symbol. -

-
-    > p -cat=Prop "( All x ) ( x = x )"
-    All (\x -> Eq x x)
-
-

-Variables must be bound if they are used: -

-
-    > p -cat=Prop "( All x ) ( x = y )"
-    no tree found
-
-

-

- -

- -

Exercises on variable bindings

-

-1. Write an abstract syntax of the whole -predicate calculus, with the -connectives "and", "or", "implies", and "not", and the -quantifiers "exists" and "for all". Use higher-order functions -to guarantee that unbounded variables do not occur. -

-

-2. Write a concrete syntax for your favourite -notation of predicate calculus. Use Latex as target language -if you want nice output. You can also try producing boolean -expressions of some programming language. Use as many parenthesis as you need to -guarantee non-ambiguity. -

-

- -

- -

Semantic definitions

-

- -

-

-The fun judgements of GF are declarations of functions, giving their types. -

-

-Can we compute fun functions? -

-

-Mostly we are not interested, since functions are seen as constructors, -i.e. data forms - as usual with -

-
-    fun Zero : Nat ;
-    fun Succ : Nat -> Nat ;
-
-

-But it is also possible to give semantic definitions to functions. -The key word is def: -

-
-    fun one : Nat ;
-    def one = Succ Zero ;
-  
-    fun twice : Nat -> Nat ;
-    def twice x = plus x x ;
-  
-    fun plus : Nat -> Nat -> Nat ;
-    def 
-      plus x Zero = x ;
-      plus x (Succ y) = Succ (Sum x y) ;
-
-

-

- -

- -

Computing a tree

-

-Computation: follow a chain of definition until no definition -can be applied, -

-
-    plus one one -->
-    plus (Succ Zero) (Succ Zero) -->
-    Succ (plus (Succ Zero) Zero) -->
-    Succ (Succ Zero)
-
-

-Computation in GF is performed with the put_term command and the -compute transformation, e.g. -

-
-    > parse -tr "1 + 1" | put_term -transform=compute -tr | l
-    plus one one
-    Succ (Succ Zero)
-    s(s(0))
-
-

-

- -

- -

Definitional equality

-

-Two trees are definitionally equal if they compute into the same tree. -

-

-Definitional equality does not guarantee sameness of linearization: -

-
-    plus one one     ===> 1 + 1
-    Succ (Succ Zero) ===> s(s(0))
-
-

-The main use of this concept is in type checking: sameness of types. -

-

-Thus e.g. the following types are equal -

-
-    Less Zero one
-    Less Zero (Succ Zero))
-
-

-so that an object of one also is an object of the other. -

-

- -

- -

Judgement forms for constructors

-

-The judgement form data tells that a category has -certain functions as constructors: -

-
-    data Nat = Succ | Zero ;
-
-

-The type signatures of constructors are given separately, -

-
-    fun Zero : Nat ;
-    fun Succ : Nat -> Nat ;
-
-

-There is also a shorthand: -

-
-    data Succ : Nat -> Nat ;    ===   fun Succ : Nat -> Nat ;
-                                      data Nat = Succ ;
-
-

-Notice: in def definitions, identifier patterns not -marked as data will be treated as variables. -

-

- -

- -

Exercises on semantic definitions

-

-1. Implement an interpreter of a small functional programming -language with natural numbers, lists, pairs, lambdas, etc. Use higher-order -abstract syntax with semantic definitions. As concrete syntax, use -your favourite programming language. -

-

-2. There is no termination checking for def definitions. -Construct an examples that makes type checking loop. -Type checking can be invoked with put_term -transform=solve. -

-

- -

- -

Lesson 6: Grammars of formal languages

-

- -

-

-Goals: -

-
    -
  • write grammars for formal languages (mathematical notation, programming languages) -
  • interface between formal and natural langauges -
  • implement a compiler by using GF -
- -

- -

- -

Arithmetic expressions

-

-We construct a calculator with addition, subtraction, multiplication, and -division of integers. -

-
-    abstract Calculator = {
-  
-    cat Exp ;
-  
-    fun
-      EPlus, EMinus, ETimes, EDiv : Exp -> Exp -> Exp ;
-      EInt : Int -> Exp ;
-    }
-
-

-The category Int is a built-in category of -integers. Its syntax trees integer literals, i.e. -sequences of digits: -

-
-    5457455814608954681 : Int
-
-

-These are the only objects of type Int: -grammars are not allowed to declare functions with Int as value type. -

-

- -

- -

Concrete syntax: a simple approach

-

-We begin with a -concrete syntax that always uses parentheses around binary -operator applications: -

-
-    concrete CalculatorP of Calculator = {
-  
-    lincat 
-      Exp = SS ;
-    lin
-      EPlus  = infix "+" ;
-      EMinus = infix "-" ;
-      ETimes = infix "*" ;
-      EDiv   = infix "/" ;
-      EInt i = i ;
-  
-    oper
-      infix : Str -> SS -> SS -> SS = \f,x,y -> 
-        ss ("(" ++ x.s ++ f ++ y.s ++ ")") ;
-    }
-
-

-Now we have -

-
-    > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4))
-    ( 2 + ( 3 * 4 ) )
-
-

-First problems: -

-
    -
  • to get rid of superfluous spaces and -
  • to recognize integer literals in the parser -
- -

- -

- -

Lexing and unlexing

-

- -

-

-The input of parsing in GF is not just a string, but a list of -tokens, returned by a lexer. -

-

-The default lexer in GF returns chunks separated by spaces: -

-
-    "(12 + (3 * 4))"  ===>  "(12", "+", "(3". "*". "4))"
-
-

-The proper way would be -

-
-    "(", "12", "+", "(", "3", "*", "4", ")", ")"
-
-

-Moreover, the tokens "12", "3", and "4" should be recognized as -integer literals - they cannot be found in the grammar. -

-

- -

-

-Lexers are invoked by flags to the command put_string = ps. -

-
-    > put_string -lexcode "(2 + (3 * 4))"
-    ( 2 + ( 3 * 4 ) )
-
-

-This can be piped into a parser, as usual: -

-
-    > ps -lexcode "(2 + (3 * 4))" | parse
-    EPlus (EInt 2) (ETimes (EInt 3) (EInt 4))
-
-

-In linearization, we use a corresponding unlexer: -

-
-    > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) | ps -unlexcode
-    (2 + (3 * 4))
-
-

-

- -

- -

Most common lexers and unlexers

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lexerunlexerdescription
charsuncharseach character is a token
lexcodeunlexcodeprogram code conventions (uses Haskell's lex)
lexmixedunlexmixedlike text, but between $ signs like code
lextextunlextextwith conventions on punctuation and capitals
wordsunwords(default) tokens separated by space characters
- -

- -

- -

Precedence and fixity

-

-Arithmetic expressions should be unambiguous. If we write -

-
-    2 + 3 * 4
-
-

-it should be parsed as one, but not both, of -

-
-    EPlus (EInt 2) (ETimes (EInt 3) (EInt 4))
-    ETimes (EPlus (EInt 2) (EInt 3)) (EInt 4)
-
-

-We choose the former tree, because -multiplication has higher precedence than addition. -

-

-To express the latter tree, we have to use parentheses: -

-
-    (2 + 3) * 4
-
-

-The usual precedence rules: -

-
    -
  • Integer constants and expressions in parentheses have the highest precedence. -
  • Multiplication and division have equal precedence, lower than the highest - but higher than addition and subtraction, which are again equal. -
  • All the four binary operations are left-associative: - 1 + 2 + 3 means the same as (1 + 2) + 3. -
- -

- -

- -

Precedence as a parameter

-

-Precedence can be made into an inherent feature of expressions: -

-
-    oper
-      Prec : PType = Ints 2 ;
-      TermPrec : Type = {s : Str ; p : Prec} ;
-  
-      mkPrec : Prec -> Str -> TermPrec = \p,s -> {s = s ; p = p} ;
-  
-    lincat 
-      Exp = TermPrec ;
-
-

-Notice Ints 2: a parameter type, whose values are the integers -0,1,2. -

-

-Using precedence levels: compare the inherent precedence of an -expression with the expected precedence. -

-
    -
  • if the inherent precedence is lower than the expected precedence, - use parentheses -
  • otherwise, no parentheses are needed -
- -

-This idea is encoded in the operation -

-
-    oper usePrec : TermPrec -> Prec -> Str = \x,p ->
-      case lessPrec x.p p of {
-        True  => "(" x.s ")" ;
-        False => x.s
-      } ;
-
-

-(We use lessPrec from lib/prelude/Formal.) -

-

- -

- -

Fixities

-

-We can define left-associative infix expressions: -

-
-    infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
-      mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ;
-
-

-Constant-like expressions (the highest level): -

-
-    constant : Str -> TermPrec = mkPrec 2 ;
-
-

-All these operations can be found in lib/prelude/Formal, -which has 5 levels. -

-

-Now we can write the whole concrete syntax of Calculator compactly: -

-
-    concrete CalculatorC of Calculator = open Formal, Prelude in {
-  
-    flags lexer = codelit ; unlexer = code ; startcat = Exp ;
-  
-    lincat Exp = TermPrec ;
-  
-    lin
-      EPlus  = infixl 0 "+" ;
-      EMinus = infixl 0 "-" ;
-      ETimes = infixl 1 "*" ;
-      EDiv   = infixl 1 "/" ;
-      EInt i = constant i.s ;
-    }
-
-

-

- -

- -

Exercises on precedence

-

-1. Define non-associative and right-associative infix operations -analogous to infixl. -

-

-2. Add a constructor that puts parentheses around expressions -to raise their precedence, but that is eliminated by a def definition. -Test parsing with and without a pipe to pt -transform=compute. -

-

- -

- -

Code generation as linearization

-

-Translate arithmetic (infix) to JVM (postfix): -

-
-    2 + 3 * 4
-  
-      ===>
-  
-    iconst 2 : iconst 3 ; iconst 4 ; imul ; iadd
-
-

-Just give linearization rules for JVM: -

-
-    lin
-      EPlus  = postfix "iadd" ;
-      EMinus = postfix "isub" ;
-      ETimes = postfix "imul" ;
-      EDiv   = postfix "idiv" ;
-      EInt i = ss ("iconst" ++ i.s) ;
-    oper
-      postfix : Str -> SS -> SS -> SS = \op,x,y -> 
-        ss (x.s ++ ";" ++ y.s ++ ";" ++ op) ;
-
-

-

- -

- -

Programs with variables

-

-A straight code programming language, with -initializations and assignments: -

-
-    int x = 2 + 3 ;  
-    int y = x + 1 ; 
-    x = x + 9 * y ;
-
-

-We define programs by the following constructors: -

-
-    fun
-      PEmpty : Prog ;
-      PInit  : Exp -> (Var -> Prog) -> Prog ;
-      PAss   : Var -> Exp  -> Prog  -> Prog ;
-
-

-PInit uses higher-order abstract syntax for making the -initialized variable available in the continuation of the program. -

-

-The abstract syntax tree for the above code is -

-
-    PInit (EPlus (EInt 2) (EInt 3)) (\x -> 
-      PInit (EPlus (EVar x) (EInt 1)) (\y -> 
-        PAss x (EPlus (EVar x) (ETimes (EInt 9) (EVar y))) 
-          PEmpty))
-
-

-No uninitialized variables are allowed - there are no constructors for Var! -But we do have the rule -

-
-    fun EVar : Var -> Exp ;
-
-

-The rest of the grammar is just the same as for arithmetic expressions -here. The best way to implement it is perhaps by writing a -module that extends the expression module. The most natural start category -of the extension is Prog. -

-

- -

- -

Exercises on code generation

-

-1. Define a C-like concrete syntax of the straight-code language. -

-

-2. Extend the straight-code language to expressions of type float. -To guarantee type safety, you can define a category Typ of types, and -make Exp and Var dependent on Typ. Basic floating point expressions -can be formed from literal of the built-in GF type Float. The arithmetic -operations should be made polymorphic (as here). -

-

-3. Extend JVM generation to the straight-code language, using -two more instructions -

-
    -
  • iload x, which loads the value of the variable x -
  • istore x which stores a value to the variable x -
- -

-Thus the code for the example in the previous section is -

-
-    iconst 2 ; iconst 3 ; iadd ; istore x ;
-    iload x ; iconst 1 ; iadd ; istore y ;
-    iload x ; iconst 9 ; iload y ; imul ; iadd ; istore x ;
-
-

-

-4. If you made the exercise of adding floating point numbers to -the language, you can now cash out the main advantage of type checking -for code generation: selecting type-correct JVM instructions. The floating -point instructions are precisely the same as the integer one, except that -the prefix is f instead of i, and that fconst takes floating -point literals as arguments. -

-

- -

- -

Lesson 7: Embedded grammars

-

- -

-

-Goals: -

-
    -
  • use grammars as parts of programs written in Haskell and JavaScript -
  • implement stand-alone question-answering systems and translators based on - GF grammars -
  • generate language models for speech recognition from GF grammars -
- -

- -

- -

Functionalities of an embedded grammar format

-

-GF grammars can be used as parts of programs written in other programming -languages, to be called host languages. -This facility is based on several components: -

-
    -
  • PGF: a portable format for multilingual GF grammars -
  • a PGF interpreter written in the host language -
  • a library in the host language that enables calling the interpreter -
  • a way to manipulate abstract syntax trees in the host language -
- -

- -

- -

The portable grammar format

-

-The portable format is called PGF, "Portable Grammar Format". -

-

-This format is produced by the GF batch compiler gfc, -executable from the operative system shell: -

-
-    % gfc --make SOURCE.gf
-
-

-PGF is the recommended format in -which final grammar products are distributed, because they -are stripped from superfluous information and can be started and applied -faster than sets of separate modules. -

-

-Application programmers have never any need to read or modify PGF files. -

-

-PGF thus plays the same role as machine code in -general-purpose programming (or bytecode in Java). -

-

- -

- -

Haskell: the EmbedAPI module

-

-The Haskell API contains (among other things) the following types and functions: -

-
-    readPGF   :: FilePath -> IO PGF
-  
-    linearize :: PGF -> Language -> Tree -> String
-    parse     :: PGF -> Language -> Category -> String -> [Tree]
-  
-    linearizeAll     :: PGF -> Tree -> [String]
-    linearizeAllLang :: PGF -> Tree -> [(Language,String)]
-  
-    parseAll     :: PGF -> Category -> String -> [[Tree]]
-    parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])]
-  
-    languages    :: PGF -> [Language]
-    categories   :: PGF -> [Category]
-    startCat     :: PGF -> Category
-
-

-This is the only module that needs to be imported in the Haskell application. -It is available as a part of the GF distribution, in the file -src/PGF.hs. -

-

- -

- -

First application: a translator

-

-Let us first build a stand-alone translator, which can translate -in any multilingual grammar between any languages in the grammar. -

-
-  module Main where
-  
-  import PGF
-  import System (getArgs)
-  
-  main :: IO () 
-  main = do
-    file:_ <- getArgs
-    gr     <- readPGF file
-    interact (translate gr)
-  
-  translate :: PGF -> String -> String
-  translate gr s = case parseAllLang gr (startCat gr) s of
-    (lg,t:_):_ -> unlines [linearize gr l t | l <- languages gr, l /= lg]
-    _ -> "NO PARSE"
-
-

-To run the translator, first compile it by -

-
-    % ghc --make -o trans Translator.hs 
-
-

-For this, you need the Haskell compiler GHC. -

-

- -

- -

Producing GFCC for the translator

-

-Then produce a GFCC file. For instance, the Food grammar set can be -compiled as follows: -

-
-    % gfc --make FoodEng.gf FoodIta.gf
-
-

-This produces the file Food.pgf (its name comes from the abstract syntax). -

-

-The Haskell library function interact makes the trans program work -like a Unix filter, which reads from standard input and writes to standard -output. Therefore it can be a part of a pipe and read and write files. -The simplest way to translate is to echo input to the program: -

-
-    % echo "this wine is delicious" | ./trans Food.pgf
-    questo vino  delizioso
-
-

-The result is given in all languages except the input language. -

-

- -

- -

A translator loop

-

-To avoid starting the translator over and over again: -change interact in the main function to loop, defined as -follows: -

-
-  loop :: (String -> String) -> IO ()
-  loop trans = do 
-    s <- getLine
-    if s == "quit" then putStrLn "bye" else do  
-      putStrLn $ trans s
-      loop trans
-
-

-The loop keeps on translating line by line until the input line -is quit. -

-

- -

- -

A question-answer system

-

- -

-

-The next application is also a translator, but it adds a -transfer component - a function that transforms syntax trees. -

-

-The transfer function we use is one that computes a question into an answer. -

-

-The program accepts simple questions about arithmetic and answers -"yes" or "no" in the language in which the question was made: -

-
-    Is 123 prime?
-    No.
-    77 est impair ?
-    Oui.
-
-

-We change the pure translator by giving -the translate function the transfer as an extra argument: -

-
-    translate :: (Tree -> Tree) -> PGF -> String -> String
-
-

-Ordinary translation as a special case where -transfer is the identity function (id in Haskell). -

-

-To reply in the same language as the question: -

-
-    translate tr gr = case parseAllLang gr (startCat gr) s of
-      (lg,t:_):_ -> linearize gr lg (tr t)
-      _ -> "NO PARSE"
-
-

-

- -

- -

Abstract syntax of the query system

-

-Input: abstract syntax judgements -

-
-  abstract Query = {
-  
-    flags startcat=Question ;
-  
-    cat 
-      Answer ; Question ; Object ;
-  
-    fun 
-      Even   : Object -> Question ;
-      Odd    : Object -> Question ;
-      Prime  : Object -> Question ;
-      Number : Int -> Object ;
-  
-      Yes : Answer ;
-      No  : Answer ;
-  }
-
-

-

- -

- -

Exporting GF datatypes to Haskell

-

-To make it easy to define a transfer function, we export the -abstract syntax to a system of Haskell datatypes: -

-
-    % gfc --output-format=haskell Query.pgf
-
-

-It is also possible to produce the Haskell file together with GFCC, by -

-
-    % gfc --make --output-format=haskell QueryEng.gf
-
-

-The result is a file named Query.hs, containing a -module named Query. -

-

- -

-

-Output: Haskell definitions -

-
-  module Query where
-  import PGF
-  
-  data GAnswer =
-     GYes 
-   | GNo 
-  
-  data GObject = GNumber GInt 
-  
-  data GQuestion =
-     GPrime GObject 
-   | GOdd GObject 
-   | GEven GObject 
-  
-  newtype GInt = GInt Integer
-
-

-All type and constructor names are prefixed with a G to prevent clashes. -

-

-The Haskell module name is the same as the abstract syntax name. -

-

- -

- -

The question-answer function

-

-Haskell's type checker guarantees that the functions are well-typed also with -respect to GF. -

-
-  answer :: GQuestion -> GAnswer
-  answer p = case p of
-    GOdd x   -> test odd x
-    GEven x  -> test even x
-    GPrime x -> test prime x
-  
-  value :: GObject -> Int
-  value e = case e of
-    GNumber (GInt i) -> fromInteger i
-  
-  test :: (Int -> Bool) -> GObject -> GAnswer
-  test f x = if f (value x) then GYes else GNo
-
-

-

- -

- -

Converting between Haskell and GF trees

-

-The generated Haskell module also contains -

-
-  class Gf a where 
-    gf :: a -> Tree
-    fg :: Tree -> a
-  
-  instance Gf GQuestion where
-    gf (GEven x1) = DTr [] (AC (CId "Even")) [gf x1]
-    gf (GOdd x1) = DTr [] (AC (CId "Odd")) [gf x1]
-    gf (GPrime x1) = DTr [] (AC (CId "Prime")) [gf x1]
-    fg t =
-      case t of
-        DTr [] (AC (CId "Even")) [x1] -> GEven (fg x1)
-        DTr [] (AC (CId "Odd")) [x1] -> GOdd (fg x1)
-        DTr [] (AC (CId "Prime")) [x1] -> GPrime (fg x1)
-        _ -> error ("no Question " ++ show t)
-
-

-For the programmer, it is enougo to know: -

-
    -
  • all GF names are in Haskell prefixed with G -
  • gf translates from Haskell objects to GF trees -
  • fg translates from GF trees to Haskell objects -
- -

- -

- -

Putting it all together: the transfer definition

-
-  module TransferDef where
-  
-  import PGF (Tree)
-  import Query   -- generated from GF
-  
-  transfer :: Tree -> Tree
-  transfer = gf . answer . fg
-  
-  answer :: GQuestion -> GAnswer
-  answer p = case p of
-    GOdd x   -> test odd x
-    GEven x  -> test even x
-    GPrime x -> test prime x
-  
-  value :: GObject -> Int
-  value e = case e of
-    GNumber (GInt i) -> fromInteger i
-  
-  test :: (Int -> Bool) -> GObject -> GAnswer
-  test f x = if f (value x) then GYes else GNo
-  
-  prime :: Int -> Bool
-  prime x = elem x primes where
-    primes = sieve [2 .. x]
-    sieve (p:xs) = p : sieve [ n | n <- xs, n `mod` p > 0 ]
-    sieve [] = []
-
-

-

- -

- -

Putting it all together: the Main module

-

-Here is the complete code in the Haskell file TransferLoop.hs. -

-
-  module Main where
-  
-  import PGF
-  import TransferDef (transfer)
-  
-  main :: IO () 
-  main = do
-    gr <- readPGF "Query.pgf"
-    loop (translate transfer gr)
-  
-  loop :: (String -> String) -> IO ()
-  loop trans = do 
-    s <- getLine
-    if s == "quit" then putStrLn "bye" else do  
-      putStrLn $ trans s
-      loop trans
-  
-  translate :: (Tree -> Tree) -> PGF -> String -> String
-  translate tr gr s = case parseAllLang gr (startCat gr) s of
-    (lg,t:_):_ -> linearize gr lg (tr t)
-    _ -> "NO PARSE"
-
-

-

- -

- -

Putting it all together: the Makefile

-

-To automate the production of the system, we write a Makefile as follows: -

-
-  all:
-          gfc --make --output-format=haskell QueryEng
-          ghc --make -o ./math TransferLoop.hs
-          strip math
-
-

-(The empty segments starting the command lines in a Makefile must be tabs.) -Now we can compile the whole system by just typing -

-
-    make
-
-

-Then you can run it by typing -

-
-    ./math
-
-

-Just to summarize, the source of the application consists of the following files: -

-
-    Makefile         -- a makefile
-    Math.gf          -- abstract syntax
-    Math???.gf       -- concrete syntaxes
-    TransferDef.hs   -- definition of question-to-answer function
-    TransferLoop.hs  -- Haskell Main module
-
-

-

- -

- -

Web server applications

-

-PGF files can be used in web servers, for which there is a Haskell library included -in src/server/. How to build a server for tasks like translators is explained -in the README file in that directory. -

-

-One of the servers that can be readily built with the library (without any -programming required) is fridge poetry magnets. It is an application that -uses an incremental parser to suggest grammatically correct next words. Here -is an example of its application to the Foods grammars. -

-

- -

-

- -

- -

JavaScript applications

-

-JavaScript is a programming language that has interpreters built in in most -web browsers. It is therefore usable for client side web programs, which can even -be run without access to the internet. The following figure shows a JavaScript -program compiled from GF grammars as run on an iPhone. -

-

- -

-

- -

- -

Compiling to JavaScript

-

-JavaScript is one of the output formats of the GF batch compiler. Thus the following -command generates a JavaScript file from two Food grammars. -

-
-    % gfc --make --output-format=js FoodEng.gf FoodIta.gf
-
-

-The name of the generated file is Food.js, derived from the top-most abstract -syntax name. This file contains the multilingual grammar as a JavaScript object. -

-

- -

- -

Using the JavaScript grammar

-

-To perform parsing and linearization, the run-time library -gflib.js is used. It is included in GF/lib/javascript/, together with -some other JavaScript and HTML files; these files can be used -as templates for building applications. -

-

-An example of usage is -translator.html, -which is in fact initialized with -a pointer to the Food grammar, so that it provides translation between the English -and Italian grammars: -

-

- -

-

-The grammar must have the name grammar.js. The abstract syntax and start -category names in translator.html must match the ones in the grammar. -With these changes, the translator works for any multilingual GF grammar. -

-

- -

- -

Language models for speech recognition

-

-The standard way of using GF in speech recognition is by building -grammar-based language models. -

-

-GF supports several formats, including -GSL, the formatused in the Nuance speech recognizer. -

-

-GSL is produced from GF by running gfc with the flag ---output-format=gsl. -

-

-Example: GSL generated from FoodsEng.gf. -

-
-    % gfc --make --output-format=gsl FoodsEng.gf
-    % more FoodsEng.gsl
-  
-    ;GSL2.0
-    ; Nuance speech recognition grammar for FoodsEng
-    ; Generated by GF
-  
-    .MAIN Phrase_cat
-  
-    Item_1 [("that" Kind_1) ("this" Kind_1)]
-    Item_2 [("these" Kind_2) ("those" Kind_2)]
-    Item_cat [Item_1 Item_2]
-    Kind_1 ["cheese" "fish" "pizza" (Quality_1 Kind_1)
-            "wine"]
-    Kind_2 ["cheeses" "fish" "pizzas"
-            (Quality_1 Kind_2) "wines"]
-    Kind_cat [Kind_1 Kind_2]
-    Phrase_1 [(Item_1 "is" Quality_1)
-              (Item_2 "are" Quality_1)]
-    Phrase_cat Phrase_1
-    
-    Quality_1 ["boring" "delicious" "expensive"
-               "fresh" "italian" ("very" Quality_1) "warm"]
-    Quality_cat Quality_1
-
-

-

- -

- -

More speech recognition grammar formats

-

-Other formats available via the --output-format flag include: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FormatDescription
gslNuance GSL speech recognition grammar
jsgfJava Speech Grammar Format (JSGF)
jsgf_sisr_oldJSGF with semantic tags in SISR WD 20030401 format
srgs_abnfSRGS ABNF format
srgs_xmlSRGS XML format
srgs_xml_probSRGS XML format, with weights
slffinite automaton in the HTK SLF format
slf_subfinite automaton with sub-automata in HTK SLF
- -

-All currently available formats can be seen with gfc --help. -

- - - - diff --git a/doc/gf-tutorial.txt b/doc/gf-tutorial.txt deleted file mode 100644 index 8e8b8172a..000000000 --- a/doc/gf-tutorial.txt +++ /dev/null @@ -1,5022 +0,0 @@ -Grammatical Framework Tutorial -Aarne Ranta -Version 3.1.2, November 2008 - - -% NOTE: this is a txt2tags file. -% Create a tex file from this file using: -% txt2tags --toc -ttex gf-tutorial.txt - -%!target:html -%!encoding: iso-8859-1 - -%!postproc(tex) : "\\subsection\*" "\\newslide" -%!preproc(tex): "#NEW" "" -%!postproc(html): #NEW - - - -%!postproc(html): #OVERVIEW

Overview

- -%%!postproc(tex): "section\*" "section" - -%!postproc(tex): "\\documentclass{article}" "" - -%!postproc(tex): "subsection\*" "section" -%!postproc(tex): "section\*" "chapter" - -%!postproc(tex): "textbf{Exercise}" "exercise" -%!postproc(tex): "textbf" "keywrd" - -%!postproc(html): #BCEN
-%!postproc(html): #ECEN
- -%!postproc(tex): #BCEN "begin{center}" -%!postproc(tex): #ECEN "end{center}" - -%!postproc(tex): #BEQU "bequ" -%!postproc(tex): #ENQU "enqu" -%!postproc(html): #BEQU "
" -%!postproc(html): #ENQU "
" - -%!preproc(html): #EDITORPNG [quick-editor.png] -%!preproc(tex): #EDITORPNG [10lang-small.png] - -%!preproc(html): #LOGOPNG [Logos/gf0.png] -%!preproc(tex): #LOGOPNG [Logos/gf0.png] - - -%!postproc(tex): #PARTone "part{Tutorial}" -%!postproc(tex): #PARTtwo "part{Applications of Grammars}" -%!postproc(tex): #PARTfour "part{Advanced Grammar Writing}" -%!postproc(tex): #PARTthree "part{Reference Manual}" - -%%!postproc(tex): #PARTbnf "include{DocGF}" -%!postproc(tex): #PARTquickref "chapter{Quick Reference}" -%!postproc(tex): #twocolumn ""%twocolumn" -%!postproc(tex): #newpage "newpage" -%!postproc(tex): #smallsize "small" -%!postproc(tex): #normalsize "normalsize" -%!postproc(tex): #startappendix "appendix" - - -%!postproc(tex): #indexYACC "index{YACC}" - -%!postproc(tex): #MYTREE "input{mytree}" -%!preproc(html): #MYTREE [mytree.png] -%!postproc(tex): #FOODMARKET "input{foodmarket}" -%!preproc(html): #FOODMARKET [foodmarket.png] -%!postproc(tex): #CATEGORIES "input{categories}" -%!preproc(html): #CATEGORIES [categories.png] - -%!postproc(tex): #Syntaxpic "input{Syntax}" -%!postproc(tex): #Germanpic "input{German}" - -%!postproc(tex): #REFERENCES "input{references}" - -%!postproc(tex): #FORMULAone "input{FORMULAone}" - -%!postproc(tex): #SETLENGTHS "input{SETLENGTHS}" - -%!postproc(tex): #PRINTINDEX "printindex" - -%!postproc(tex): #Lchaptwo "label{chaptwo}" -%!postproc(tex): #Rchaptwo "chref{chaptwo}" -%!postproc(html): #Lchaptwo -%!postproc(html): #Rchaptwo Lesson 1 - -%!postproc(tex): #Lchapthree "label{chapthree}" -%!postproc(tex): #Rchapthree "chref{chapthree}" -%!postproc(html): #Lchapthree -%!postproc(html): #Rchapthree Lesson 2 - -%!postproc(tex): #Lchapfour "label{chapfour}" -%!postproc(tex): #Rchapfour "chref{chapfour}" -%!postproc(html): #Lchapfour -%!postproc(html): #Rchapfour Lesson 3 - -%!postproc(tex): #Lchapfive "label{chapfive}" -%!postproc(tex): #Rchapfive "chref{chapfive}" -%!postproc(html): #Lchapfive -%!postproc(html): #Rchapfive Lesson 4 - -%!postproc(tex): #Lchapsix "label{chapsix}" -%!postproc(tex): #Rchapsix "chref{chapsix}" -%!postproc(html): #Lchapsix -%!postproc(html): #Rchapsix Lesson 5 - -%!postproc(tex): #Lchapseven "label{chapseven}" -%!postproc(tex): #Rchapseven "chref{chapseven}" -%!postproc(html): #Lchapseven -%!postproc(html): #Rchapseven Lesson 6 - -%!postproc(tex): #Lchapeight "label{chapeight}" -%!postproc(tex): #Rchapeight "chref{chapeight}" -%!postproc(html): #Lchapeight -%!postproc(html): #Rchapeight Lesson 7 - - -%2.7.2 -%!postproc(tex): #Lsecjment "label{secjment}" -%!postproc(tex): #Rsecjment "sref{secjment}" -%!postproc(html): #Lsecjment -%!postproc(html): #Rsecjment here - -%3.4 -%!postproc(tex): #Lsecanitalian "label{secanitalian}" -%!postproc(tex): #Rsecanitalian "sref{secanitalian}" -%!postproc(html): #Lsecanitalian -%!postproc(html): #Rsecanitalian here - -%3.6.1 -%!postproc(tex): #Lsectreebank "label{sectreebank}" -%!postproc(tex): #Rsectreebank "sref{sectreebank}" -%!postproc(html): #Lsectreebank -%!postproc(html): #Rsectreebank here - - -%3.6.4 -%!postproc(tex): #Lsecediting "label{secediting}" -%!postproc(tex): #Rsecediting "sref{secediting}" -%!postproc(html): #Lsecediting -%!postproc(html): #Rsecediting here - - -%3.9.5 -%!postproc(tex): #Lsecpartapp "label{secpartapp}" -%!postproc(tex): #Rsecpartapp "sref{secpartapp}" -%!postproc(html): #Lsecpartapp -%!postproc(html): #Rsecpartapp here - -%3.10 -%!postproc(tex): #Lsecarchitecture "label{secarchitecture}" -%!postproc(tex): #Rsecarchitecture "sref{secarchitecture}" -%!postproc(html): #Lsecarchitecture -%!postproc(html): #Rsecarchitecture here - -%4.6 -%!postproc(tex): #Lsecinflection "label{secinflection}" -%!postproc(tex): #Rsecinflection "sref{secinflection}" -%!postproc(html): #Lsecinflection -%!postproc(html): #Rsecinflection here - -%4.7 -%!postproc(tex): #Lsecitalian "label{secitalian}" -%!postproc(tex): #Rsecitalian "sref{secitalian}" -%!postproc(html): #Lsecitalian -%!postproc(html): #Rsecitalian here - -%4.10.2 -%!postproc(tex): #Lsecmatching "label{secmatching}" -%!postproc(tex): #Rsecmatching "sref{secmatching}" -%!postproc(html): #Lsecmatching -%!postproc(html): #Rsecmatching here - -%5.2 -%!postproc(tex): #Lseclexical "label{seclexical}" -%!postproc(tex): #Rseclexical "sref{seclexical}" -%!postproc(html): #Lseclexical -%!postproc(html): #Rseclexical here - -%5.4 -%!postproc(tex): #Lsecenglish "label{secenglish}" -%!postproc(tex): #Rsecenglish "sref{secenglish}" -%!postproc(html): #Lsecenglish -%!postproc(html): #Rsecenglish here - -%5.5 -%!postproc(tex): #Lsecfunctor "label{secfunctor}" -%!postproc(tex): #Rsecfunctor "sref{secfunctor}" -%!postproc(html): #Lsecfunctor -%!postproc(html): #Rsecfunctor here - -%5.6 -%!postproc(tex): #Lsecinterface "label{secinterface}" -%!postproc(tex): #Rsecinterface "sref{secinterface}" -%!postproc(html): #Lsecinterface -%!postproc(html): #Rsecinterface here - -%5.11 -%!postproc(tex): #Lsecbrowsing "label{secbrowsing}" -%!postproc(tex): #Rsecbrowsing "sref{secbrowsing}" -%!postproc(html): #Lsecbrowsing -%!postproc(html): #Rsecbrowsing here - -%5.12 -%!postproc(tex): #Lsecextended "label{secextended}" -%!postproc(tex): #Rsecextended "sref{secextended}" -%!postproc(html): #Lsecextended -%!postproc(html): #Rsecextended here - -%5.13 -%!postproc(tex): #Lsectense "label{sectense}" -%!postproc(tex): #Rsectense "sref{sectense}" -%!postproc(html): #Lsectense -%!postproc(html): #Rsectense here - -%5.14.2 -%!postproc(tex): #Lseclock "label{seclock}" -%!postproc(tex): #Rseclock "sref{seclock}" -%!postproc(html): #Lseclock -%!postproc(html): #Rseclock here - -%6.2 -%!postproc(tex): #Lsecsmarthouse "label{secsmarthouse}" -%!postproc(tex): #Rsecsmarthouse "sref{secsmarthouse}" -%!postproc(html): #Lsecsmarthouse -%!postproc(html): #Rsecsmarthouse here - -%6.3 -%!postproc(tex): #Lsecpolymorphic "label{secpolymorphic}" -%!postproc(tex): #Rsecpolymorphic "sref{secpolymorphic}" -%!postproc(html): #Lsecpolymorphic -%!postproc(html): #Rsecpolymorphic here - -%6.7 -%!postproc(tex): #Lsecbinding "label{secbinding}" -%!postproc(tex): #Rsecbinding "sref{secbinding}" -%!postproc(html): #Lsecbinding -%!postproc(html): #Rsecbinding here - - -%6.8 -%!postproc(tex): #Lsecdefdef "label{secdefdef}" -%!postproc(tex): #Rsecdefdef "sref{secdefdef}" -%!postproc(html): #Lsecdefdef -%!postproc(html): #Rsecdefdef here - -%7.2 -%!postproc(tex): #Lseclexing "label{seclexing}" -%!postproc(tex): #Rseclexing "sref{seclexing}" -%!postproc(html): #Lseclexing -%!postproc(html): #Rseclexing here - -%7.3 -%!postproc(tex): #Lsecprecedence "label{secprecedence}" -%!postproc(tex): #Rsecprecedence "sref{secprecedence}" -%!postproc(html): #Lsecprecedence -%!postproc(html): #Rsecprecedence here - -%8.3.4 -%!postproc(tex): #Lsecmathprogram "label{secmathprogram}" -%!postproc(tex): #Rsecmathprogram "sref{secmathprogram}" -%!postproc(html): #Lsecmathprogram -%!postproc(html): #Rsecmathprogram here - - - - - - -%!postproc(tex): #APPENDIX "appendix" -%!postproc(tex): #CHAPTER "chapter{The GF Programming Language}" -%!postproc(tex): #TOC "tableofcontents" - -%!postproc(tex): #BECE "begin{center}" -%!postproc(tex): #ENCE "end{center}" -%!postproc(tex): "subsection\*" "section" -%!postproc(tex): "section\*" "chapter" -%%!postproc(tex): "paragraph\{\}\\bf" "subsubsection" - -%!postproc(tex): #PARTbnf "input{RefDocGF.tex}" -%!preproc(html): #PARTbnf %!include: RefDocGF.txt - -%!postproc(tex): "textbf" "keywrd" -%!postproc(tex): #PRINTINDEX "printindex" -%!preproc(html): #PRINTINDEX "" - -%!postproc(tex): #sugar "sugar" -%!postproc(tex): #comput "computes" - -%!postproc(tex): #Aone "subscr{A}{1}" -%!postproc(tex): #Aen "subscr{A}{n}" -%!postproc(tex): #Aii "subscr{A}{i}" -%!postproc(tex): #aone "subscr{a}{1}" -%!postproc(tex): #anone "subscr{a}{n-1}" -%!postproc(tex): #aen "subscr{a}{n}" -%!postproc(tex): #Bone "subscr{B}{1}" -%!postproc(tex): #Bem "subscr{B}{m}" -%!postproc(tex): #Ben "subscr{B}{n}" -%!postproc(tex): #Cone "subscr{C}{1}" -%!postproc(tex): #Cen "subscr{C}{n}" -%!postproc(tex): #Cii "subscr{C}{i}" -%!postproc(tex): #fone "subscr{f}{1}" -%!postproc(tex): #fen "subscr{f}{n}" -%!postproc(tex): #Gone "subscr{G}{1}" -%!postproc(tex): #Gen "subscr{G}{n}" -%!postproc(tex): #pone "subscr{p}{1}" -%!postproc(tex): #pem "subscr{p}{m}" -%!postproc(tex): #pen "subscr{p}{n}" -%!postproc(tex): #pii "subscr{p}{i}" -%!postproc(tex): #rii "subscr{r}{i}" -%!postproc(tex): #rone "subscr{r}{1}" -%!postproc(tex): #ren "subscr{r}{n}" -%!postproc(tex): #sii "subscr{s}{i}" -%!postproc(tex): #sone "subscr{s}{1}" -%!postproc(tex): #sen "subscr{s}{n}" -%!postproc(tex): #Tone "subscr{T}{1}" -%!postproc(tex): #Ten "subscr{T}{n}" -%!postproc(tex): #tone "subscr{t}{1}" -%!postproc(tex): #tem "subscr{t}{m}" -%!postproc(tex): #ten "subscr{t}{n}" -%!postproc(tex): #tii "subscr{t}{i}" -%!postproc(tex): #Vone "subscr{V}{1}" -%!postproc(tex): #Ven "subscr{V}{n}" -%!postproc(tex): #Vii "subscr{V}{i}" -%!postproc(tex): #xnone "subscr{x}{n-1}" -%!postproc(tex): #xone "subscr{x}{1}" -%!postproc(tex): #xen "subscr{x}{n}" -%!postproc(tex): #xii "subscr{x}{i}" -%!postproc(tex): #yone "subscr{y}{1}" -%!postproc(tex): #yem "subscr{y}{m}" -%!postproc(tex): #zone "subscr{z}{1}" -%!postproc(tex): #zem "subscr{z}{m}" - -%%% undo the effect for these links in the synopsis -%!postproc(tex): "subscr\{G\}\{n\}der" "#Gender" -%!postproc(tex): "subscr\{T\}\{n\}se" "#Tense" - - -%%!target:html -%!postproc(html): #APPENDIX "" -%!postproc(html): #CHAPTER "" -%!postproc(html): #TOC "" - -%!postproc(html): #BECE "
" -%!postproc(html): #ENCE "
" -%%!postproc(html): "subsection\*" "section" -%%!postproc(html): "section\*" "chapter" -%%!preproc(html): #PARTbnf "[BNF Grammar of GF RefDocGF.html]" - -%!postproc(html): #sugar "===" -%!postproc(html): #comput "==>" - -%!postproc(html): #Aone "A1" -%!postproc(html): #Aen "An" -%!postproc(html): #Aii "Ai" -%!postproc(html): #aone "a1" -%!postproc(html): #anone "an-1" -%!postproc(html): #aen "an" -%!postproc(html): #Bone "B1" -%!postproc(html): #Bem "Bm" -%!postproc(html): #Ben "Bn" -%!postproc(html): #Cone "C1" -%!postproc(html): #Cen "Cn" -%!postproc(html): #Cii "Ci" -%!postproc(html): #fone "f1" -%!postproc(html): #fen "fn" -%!postproc(html): #Gone "G1" -%!postproc(html): #Gen "Gn" -%!postproc(html): #pone "p1" -%!postproc(html): #pem "pm" -%!postproc(html): #pen "pn" -%!postproc(html): #pii "pi" -%!postproc(html): #rii "ri" -%!postproc(html): #rone "r1" -%!postproc(html): #ren "rn" -%!postproc(html): #sii "si" -%!postproc(html): #sone "s1" -%!postproc(html): #sen "sn" -%!postproc(html): #Tone "T1" -%!postproc(html): #Ten "Tn" -%!postproc(html): #tone "t1" -%!postproc(html): #tem "tm" -%!postproc(html): #ten "tn" -%!postproc(html): #tii "ti" -%!postproc(html): #Vone "V1" -%!postproc(html): #Ven "Vn" -%!postproc(html): #Vii "Vi" -%!postproc(html): #xnone "xn-1" -%!postproc(html): #xone "x1" -%!postproc(html): #xen "xn" -%!postproc(html): #xii "xi" -%!postproc(html): #yone "y1" -%!postproc(html): #yem "ym" -%!postproc(html): #zone "z1" -%!postproc(html): #zem "zm" - - -%!postproc(tex): #Lpatternmatching "label{patternmatching}" -%!postproc(tex): #Rpatternmatching "sref{patternmatching}" -%!postproc(html): #Lpatternmatching -%!postproc(html): #Rpatternmatching here - -%!postproc(tex): #Lcatjudgements "label{catjudgements}" -%!postproc(tex): #Rcatjudgements "sref{catjudgements}" -%!postproc(html): #Lcatjudgements -%!postproc(html): #Rcatjudgements here - -%!postproc(tex): #Lcnctypes "label{cnctypes}" -%!postproc(tex): #Rcnctypes "sref{cnctypes}" -%!postproc(html): #Lcnctypes -%!postproc(html): #Rcnctypes here - -%!postproc(tex): #Lcompleteness "label{completeness}" -%!postproc(tex): #Rcompleteness "sref{completeness}" -%!postproc(html): #Lcompleteness -%!postproc(html): #Rcompleteness here - -%!postproc(tex): #Lcontexts "label{contexts}" -%!postproc(tex): #Rcontexts "sref{contexts}" -%!postproc(html): #Lcontexts -%!postproc(html): #Rcontexts here - -%!postproc(tex): #Lconversions "label{conversions}" -%!postproc(tex): #Rconversions "sref{conversions}" -%!postproc(html): #Lconversions -%!postproc(html): #Rconversions here - -%!postproc(tex): #Lexpressions "label{expressions}" -%!postproc(tex): #Rexpressions "sref{expressions}" -%!postproc(html): #Lexpressions -%!postproc(html): #Rexpressions here - -%!postproc(tex): #Lflagvalues "label{flagvalues}" -%!postproc(tex): #Rflagvalues "sref{flagvalues}" -%!postproc(html): #Lflagvalues -%!postproc(html): #Rflagvalues here - -%!postproc(tex): #Lfunctionelimination "label{functionelimination}" -%!postproc(tex): #Rfunctionelimination "sref{functionelimination}" -%!postproc(html): #Lfunctionelimination -%!postproc(html): #Rfunctionelimination here - -%!postproc(tex): #Lfunctiontype "label{functiontype}" -%!postproc(tex): #Rfunctiontype "sref{functiontype}" -%!postproc(html): #Lfunctiontype -%!postproc(html): #Rfunctiontype here - -%!postproc(tex): #Lgluing "label{gluing}" -%!postproc(tex): #Rgluing "sref{gluing}" -%!postproc(html): #Lgluing -%!postproc(html): #Rgluing here - -%!postproc(tex): #LHOAS "label{HOAS}" -%!postproc(tex): #RHOAS "sref{HOAS}" -%!postproc(html): #LHOAS -%!postproc(html): #RHOAS here - -%!postproc(tex): #Lidentifiers "label{identifiers}" -%!postproc(tex): #Ridentifiers "sref{identifiers}" -%!postproc(html): #Lidentifiers -%!postproc(html): #Ridentifiers here - -%!postproc(tex): #Ljudgementforms "label{judgementforms}" -%!postproc(tex): #Rjudgementforms "sref{judgementforms}" -%!postproc(html): #Ljudgementforms -%!postproc(html): #Rjudgementforms here - -%!postproc(tex): #Llindefjudgements "label{lindefjudgements}" -%!postproc(tex): #Rlindefjudgements "sref{lindefjudgements}" -%!postproc(html): #Llindefjudgements -%!postproc(html): #Rlindefjudgements here - -%!postproc(tex): #Llinexpansion "label{linexpansion}" -%!postproc(tex): #Rlinexpansion "sref{linexpansion}" -%!postproc(html): #Llinexpansion -%!postproc(html): #Rlinexpansion here - -%!postproc(tex): #Loldgf "label{oldgf}" -%!postproc(tex): #Roldgf "sref{oldgf}" -%!postproc(html): #Loldgf -%!postproc(html): #Roldgf here - -%!postproc(tex): #Lopenabstract "label{openabstract}" -%!postproc(tex): #Ropenabstract "sref{openabstract}" -%!postproc(html): #Lopenabstract -%!postproc(html): #Ropenabstract here - -%!postproc(tex): #Loverloading "label{overloading}" -%!postproc(tex): #Roverloading "sref{overloading}" -%!postproc(html): #Loverloading -%!postproc(html): #Roverloading here - -%!postproc(tex): #Lparamjudgements "label{paramjudgements}" -%!postproc(tex): #Rparamjudgements "sref{paramjudgements}" -%!postproc(html): #Lparamjudgements -%!postproc(html): #Rparamjudgements here - -%!postproc(tex): #Lparamtypes "label{paramtypes}" -%!postproc(tex): #Rparamtypes "sref{paramtypes}" -%!postproc(html): #Lparamtypes -%!postproc(html): #Rparamtypes here - -%!postproc(tex): #Lparamvalues "label{paramvalues}" -%!postproc(tex): #Rparamvalues "sref{paramvalues}" -%!postproc(html): #Lparamvalues -%!postproc(html): #Rparamvalues here - -%!postproc(tex): #Lpredefabs "label{predefabs}" -%!postproc(tex): #Rpredefabs "sref{predefabs}" -%!postproc(html): #Lpredefabs -%!postproc(html): #Rpredefabs here - -%!postproc(tex): #Lpredefcnc "label{predefcnc}" -%!postproc(tex): #Rpredefcnc "sref{predefcnc}" -%!postproc(html): #Lpredefcnc -%!postproc(html): #Rpredefcnc here - -%!postproc(tex): #Lqualifiednames "label{qualifiednames}" -%!postproc(tex): #Rqualifiednames "sref{qualifiednames}" -%!postproc(html): #Lqualifiednames -%!postproc(html): #Rqualifiednames here - -%!postproc(tex): #Lrenaming "label{renaming}" -%!postproc(tex): #Rrenaming "sref{renaming}" -%!postproc(html): #Lrenaming -%!postproc(html): #Rrenaming here - -%!postproc(tex): #Lrestrictedinheritance "label{restrictedinheritance}" -%!postproc(tex): #Rrestrictedinheritance "sref{restrictedinheritance}" -%!postproc(html): #Lrestrictedinheritance -%!postproc(html): #Rrestrictedinheritance here - -%!postproc(tex): #Lreuse "label{reuse}" -%!postproc(tex): #Rreuse "sref{reuse}" -%!postproc(html): #Lreuse -%!postproc(html): #Rreuse here - -%!postproc(tex): #Lruntimevariables "label{runtimevariables}" -%!postproc(tex): #Rruntimevariables "sref{runtimevariables}" -%!postproc(html): #Lruntimevariables -%!postproc(html): #Rruntimevariables here - -%!postproc(tex): #Lstrtype "label{strtype}" -%!postproc(tex): #Rstrtype "sref{strtype}" -%!postproc(html): #Lstrtype -%!postproc(html): #Rstrtype here - -%!postproc(tex): #Lsubtyping "label{subtyping}" -%!postproc(tex): #Rsubtyping "sref{subtyping}" -%!postproc(html): #Lsubtyping -%!postproc(html): #Rsubtyping here - -%!postproc(tex): #Lsyntaxtrees "label{syntaxtrees}" -%!postproc(tex): #Rsyntaxtrees "sref{syntaxtrees}" -%!postproc(html): #Lsyntaxtrees -%!postproc(html): #Rsyntaxtrees here - -%!postproc(tex): #Ltables "label{tables}" -%!postproc(tex): #Rtables "sref{tables}" -%!postproc(html): #Ltables -%!postproc(html): #Rtables here - -%!postproc(tex): #Lvariablebinding "label{variablebinding}" -%!postproc(tex): #Rvariablebinding "sref{variablebinding}" -%!postproc(html): #Lvariablebinding -%!postproc(html): #Rvariablebinding here - -%% last, to avoid overriding with subsection* -> section -%!postproc(tex): #PREFACE "subsection*{Preface}" -%!postproc(tex): #OVERVIEW "subsection*{Overview}" -%!postproc(html): #OVERVIEW

Overview

- - - - -#NEW - -=Overview= - -This is a hands-on introduction to grammar writing in GF. - -Main ingredients of GF: -- linguistics -- functional programming - - -Prerequisites: -- some previous experience from some programming language -- the basics of using computers, e.g. the use of - text editors and the management of files. -- knowledge of Unix commands is useful but not necessary -- knowledge of many natural languages may add fun to experience - - - - -#NEW - -==Outline== - -#Rchaptwo: a multilingual "Hello World" grammar. English, Finnish, Italian. - -#Rchapthree: a larger grammar for the domain of food. English and Italian. - -#Rchapfour: parameters - morphology and agreement. - -#Rchapfive: using the resource grammar library. - -#Rchapsix: semantics - **dependent types**, **variable bindings**, -and **semantic definitions**. - -#Rchapseven: implementing formal languages. - -#Rchapeight: embedded grammar applications. - - - -#NEW - -==Slides== - -You can chop this tutorial into a set of slides by the command -``` - htmls gf-tutorial.html -``` -where the program ``htmls`` is distributed with GF (see below), in - - [``GF/src/tools/Htmls.hs`` http://digitalgrammars.com/gf/src/tools/Htmls.hs] - -The slides will appear as a set of files beginning with ``01-gf-tutorial.htmls``. - -Internal links will not work in the slide format, except for those in the -upper left corner of each slide, and the links behind the "Contents" link. - - - -#NEW - -=Lesson 1: Getting Started with GF= - - -#Lchaptwo - -Goals: -- install and run GF -- write the first GF grammar: a "Hello World" grammar in three languages -- use GF for translation and multilingual generation - - -#NEW - -==What GF is== - -We use the term GF for three different things: -- a **system** (computer program) used for working with grammars -- a **programming language** in which grammars can be written -- a **theory** about grammars and languages - - -The GF system is an implementation -of the GF programming language, which in turn is built on the ideas of the -GF theory. - -The focus of this tutorial is on using the GF programming language. - -At the same time, we learn the way of thinking in the GF theory. - -We make the grammars run on a computer by -using the GF system. - - -#NEW - -==GF grammars and language processing tasks== - -A GF program is called a **grammar**. - -A grammar defines a language. - -From this definition, language processing components can be derived: -- **parsing**: to analyse the language -- **linearization**: to generate the language -- **translation**: to analyse one language and generate another - - -In general, a GF grammar is **multilingual**: -- many languages in one grammar -- translations between them - - - - - - - -#NEW - -==Getting the GF system== - -Open-source free software, downloaded via the GF Homepage: - -[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf/] - -There you find -- binaries for Linux, Mac OS X, and Windows -- source code and documentation -- grammar libraries and examples - - -Many examples in this tutorial are -[online http://digitalgrammars.com/gf/examples/tutorial]. - -Normally you don't have to compile GF yourself. -But, if you do want to compile GF from source follow the -instructions in the [Developers Guide gf-developers.html]. - - -#NEW - -==Running the GF system== - -Type ``gf`` in the Unix (or Cygwin) shell: -``` - % gf -``` -You will see GF's welcome message and the prompt ``>``. -The command -``` - > help -``` -will give you a list of available commands. - -As a common convention, we will use -- ``%`` as a prompt that marks system commands -- ``>`` as a prompt that marks GF commands - - -Thus you should not type these prompts, but only the characters that -follow them. - - -#NEW - -==A "Hello World" grammar== - -Like most programming language tutorials, we start with a -program that prints "Hello World" on the terminal. - -Extra features: -- **Multilinguality**: the message is printed in many languages. -- **Reversibility**: in addition to printing, you can **parse** the - message and **translate** it to other languages. - - -#NEW - -===The program: abstract syntax and concrete syntaxes=== - -A GF program, in general, is a **multilingual grammar**. Its main parts -are -- an **abstract syntax** -- one or more **concrete syntaxes** - - -The abstract syntax defines what **meanings** -can be expressed in the grammar -- //Greetings//, where we greet a //Recipient//, which can be - //World// or //Mum// or //Friends// - - - -#NEW - -GF code for the abstract syntax: -``` - -- a "Hello World" grammar - abstract Hello = { - - flags startcat = Greeting ; - - cat Greeting ; Recipient ; - - fun - Hello : Recipient -> Greeting ; - World, Mum, Friends : Recipient ; - } -``` -The code has the following parts: -- a **comment** (optional), saying what the module is doing -- a **module header** indicating that it is an abstract syntax - module named ``Hello`` -- a **module body** in braces, consisting of - - a **startcat flag declaration** stating that ``Greeting`` is the - default start category for parsing and generation - - **category declarations** introducing two categories, i.e. types of meanings - - **function declarations** introducing three meaning-building functions - - -#NEW - -English concrete syntax (mapping from meanings to strings): -``` - concrete HelloEng of Hello = { - - lincat Greeting, Recipient = {s : Str} ; - - lin - Hello recip = {s = "hello" ++ recip.s} ; - World = {s = "world"} ; - Mum = {s = "mum"} ; - Friends = {s = "friends"} ; - } -``` -The major parts of this code are: -- a module header indicating that it is a concrete syntax of the abstract syntax - ``Hello``, itself named ``HelloEng`` -- a module body in curly brackets, consisting of - - **linearization type definitions** stating that - ``Greeting`` and ``Recipient`` are **records** with a **string** ``s`` - - **linearization definitions** telling what records are assigned to - each of the meanings defined in the abstract syntax - - -Notice the concatenation ``++`` and the record projection ``.``. - - -#NEW - -Finnish and an Italian concrete syntaxes: -``` - concrete HelloFin of Hello = { - lincat Greeting, Recipient = {s : Str} ; - lin - Hello recip = {s = "terve" ++ recip.s} ; - World = {s = "maailma"} ; - Mum = {s = "iti"} ; - Friends = {s = "ystvt"} ; - } - - concrete HelloIta of Hello = { - lincat Greeting, Recipient = {s : Str} ; - lin - Hello recip = {s = "ciao" ++ recip.s} ; - World = {s = "mondo"} ; - Mum = {s = "mamma"} ; - Friends = {s = "amici"} ; - } -``` - - -#NEW - -===Using grammars in the GF system=== - -In order to compile the grammar in GF, -we create four files, one for each module, named //Modulename//``.gf``: -``` - Hello.gf HelloEng.gf HelloFin.gf HelloIta.gf -``` -The first GF command: **import** a grammar. -``` - > import HelloEng.gf -``` -All commands also have short names; here: -``` - > i HelloEng.gf -``` -The GF system will **compile** your grammar -into an internal representation and show the CPU time was consumed, followed -by a new prompt: -``` - > i HelloEng.gf - - compiling Hello.gf... wrote file Hello.gfo 8 msec - - compiling HelloEng.gf... wrote file HelloEng.gfo 12 msec - - 12 msec - > -``` - -#NEW - -You can use GF for **parsing** (``parse`` = ``p``) -``` - > parse "hello world" - Hello World -``` -Parsing takes a **string** into an **abstract syntax tree**. - -The notation for trees is that of **function application**: -``` - function argument1 ... argumentn -``` -Parentheses are only needed for grouping. - -Parsing something that is not in grammar will fail: -``` - > parse "hello dad" - Unknown words: dad - - > parse "world hello" - no tree found -``` - -#NEW - -You can also use GF for **linearization** (``linearize = l``). -It takes trees into strings: -``` - > linearize Hello World - hello world -``` -**Translation**: **pipe** linearization to parsing: -``` - > import HelloEng.gf - > import HelloIta.gf - - > parse -lang=HelloEng "hello mum" | linearize -lang=HelloIta - ciao mamma -``` -Default of the language flag (``-lang``): the last-imported concrete syntax. - -**Multilingual generation**: -``` - > parse -lang=HelloEng "hello friends" | linearize - terve ystvt - ciao amici - hello friends -``` -Linearization is by default to all available languages. - -#NEW - -===Exercises on the Hello World grammar=== - -+ Test the parsing and translation examples shown above, as well as -some other examples, in different combinations of languages. - -+ Extend the grammar ``Hello.gf`` and some of the -concrete syntaxes by five new recipients and one new greeting -form. - -+ Add a concrete syntax for some other -languages you might know. - -+ Add a pair of greetings that are expressed in one and -the same way in -one language and in two different ways in another. -For instance, //good morning// -and //good afternoon// in English are both expressed -as //buongiorno// in Italian. -Test what happens when you translate //buongiorno// to English in GF. - -+ Inject errors in the ``Hello`` grammars, for example, leave out -some line, omit a variable in a ``lin`` rule, or change the name -in one occurrence -of a variable. Inspect the error messages generated by GF. - - -#NEW - -==Using grammars from outside GF== - -You can use the ``gf`` program in a Unix pipe. -- echo a GF command -- pipe it into GF with grammar names as arguments - - -``` - % echo "l Hello World" | gf HelloEng.gf HelloFin.gf HelloIta.gf -``` -You can also write a **script**, a file containing the lines -``` - import HelloEng.gf - import HelloFin.gf - import HelloIta.gf - linearize Hello World -``` - - -#NEW - -==GF scripts== - -If we name this script ``hello.gfs``, we can do -``` - $ gf --run Quality -> Phrase ; - This, That : Kind -> Item ; - QKind : Quality -> Kind -> Kind ; - Wine, Cheese, Fish : Kind ; - Very : Quality -> Quality ; - Fresh, Warm, Italian, Expensive, Delicious, Boring : Quality ; - } -``` -Example ``Phrase`` -``` - Is (This (QKind Delicious (QKind Italian Wine))) (Very (Very Expensive)) - this delicious Italian wine is very very expensive -``` - - -#NEW - -==The concrete syntax FoodEng== - -``` - concrete FoodEng of Food = { - - lincat - Phrase, Item, Kind, Quality = {s : Str} ; - - lin - Is item quality = {s = item.s ++ "is" ++ quality.s} ; - This kind = {s = "this" ++ kind.s} ; - That kind = {s = "that" ++ kind.s} ; - QKind quality kind = {s = quality.s ++ kind.s} ; - Wine = {s = "wine"} ; - Cheese = {s = "cheese"} ; - Fish = {s = "fish"} ; - Very quality = {s = "very" ++ quality.s} ; - Fresh = {s = "fresh"} ; - Warm = {s = "warm"} ; - Italian = {s = "Italian"} ; - Expensive = {s = "expensive"} ; - Delicious = {s = "delicious"} ; - Boring = {s = "boring"} ; - } -``` - -#NEW - -Test the grammar for parsing: -``` - > import FoodEng.gf - > parse "this delicious wine is very very Italian" - Is (This (QKind Delicious Wine)) (Very (Very Italian)) -``` -Parse in other categories setting the ``cat`` flag: -``` - p -cat=Kind "very Italian wine" - QKind (Very Italian) Wine -``` - - -#NEW - -===Exercises on the Food grammar=== - -+ Extend the ``Food`` grammar by ten new food kinds and -qualities, and run the parser with new kinds of examples. - -+ Add a rule that enables question phrases of the form -//is this cheese Italian//. - -+ Enable the optional prefixing of -phrases with the words "excuse me but". Do this in such a way that -the prefix can occur at most once. - - - -#NEW - -==Commands for testing grammars== - -===Generating trees and strings=== - -Random generation (``generate_random = gr``): build -build a random tree in accordance with an abstract syntax: -``` - > generate_random - Is (This (QKind Italian Fish)) Fresh -``` -By using a pipe, random generation can be fed into linearization: -``` - > generate_random | linearize - this Italian fish is fresh -``` -Use the ``number`` flag to generate several trees: -``` - > gr -number=4 | l - that wine is boring - that fresh cheese is fresh - that cheese is very boring - this cheese is Italian -``` - -#NEW - -To generate //all// phrases that a grammar can produce, -use ``generate_trees = gt``. -``` - > generate_trees | l - that cheese is very Italian - that cheese is very boring - that cheese is very delicious - ... - this wine is fresh - this wine is warm -``` -The default **depth** is 3; the depth can be -set by using the ``depth`` flag: -``` - > generate_trees -depth=2 | l -``` -What options a command has can be seen by the ``help = h`` command: -``` - > help gr - > help gt -``` - - -#NEW - -===Exercises on generation=== - -+ If the command ``gt`` generated all -trees in your grammar, it would never terminate. Why? - -+ Measure how many trees the grammar gives with depths 4 and 5, -respectively. **Hint**. You can -use the Unix **word count** command ``wc`` to count lines. - - - -#NEW - -===More on pipes: tracing=== - -Put the **tracing** option ``-tr`` to each command whose output you -want to see: -``` - > gr -tr | l -tr | p - - Is (This Cheese) Boring - this cheese is boring - Is (This Cheese) Boring -``` -Useful for test purposes: the pipe above can show -if a grammar is **ambiguous**, i.e. -contains strings that can be parsed in more than one way. - -**Exercise**. Extend the ``Food`` grammar so that it produces ambiguous -strings, and try out the ambiguity test. - - -#NEW - -===Writing and reading files=== - -To save the outputs into a file, pipe it to the ``write_file = wf`` command, -``` - > gr -number=10 | linearize | write_file -file=exx.tmp -``` -To read a file to GF, use the ``read_file = rf`` command, -``` - > read_file -file=exx.tmp -lines | parse -``` -The flag ``-lines`` tells GF to read each line of the file separately. - -Files with examples can be used for **regression testing** -of grammars - the most systematic way to do this is by -**treebanks**; see #Rsectreebank. - - -#NEW - -===Visualizing trees=== - -Parentheses give a linear representation of trees, -useful for the computer. - -Human eye may prefer to see a visualization: ``visualize_tree = vt``: -``` - > parse "this delicious cheese is very Italian" | visualize_tree -``` -The tree is generated in postscript (``.ps``) file. The ``-view`` option is used for -telling what command to use to view the file. Its default is ``"gv"``, which works -on most Linux installations. On a Mac, one would probably write -``` - > parse "this delicious cheese is very Italian" | visualize_tree -view="open" -``` - - - -#MYTREE - -This command uses the program [Graphviz http://www.graphviz.org/], which you -might not have, but which are freely available on the web. - -You can save the temporary file ``_grph.dot``, -which the command ``vt`` produces. - -Then you can process this file with the ``dot`` -program (from the Graphviz package). -``` - % dot -Tpng _grph.dot > mytree.png -``` - - -#NEW - -===System commands=== - -You can give a **system command** without leaving GF: -``!`` followed by a Unix command, -``` - > ! dot -Tpng grphtmp.dot > mytree.png - > ! open mytree.png -``` -A system command may also receive its argument from -a GF pipes. It then has the name ``sp`` = ``system_pipe``: -``` - > generate_trees -depth=4 | sp -command="wc -l" -``` -This command example returns the number of generated trees. - - -**Exercise**. -Measure how many trees the grammar ``FoodEng`` gives with depths 4 and 5, -respectively. Use the Unix **word count** command ``wc`` to count lines, and -a system pipe from a GF command into a Unix command. - - - - - -#NEW - -==An Italian concrete syntax== - -#Lsecanitalian - -Just (?) replace English words with their dictionary equivalents: -``` - concrete FoodIta of Food = { - - lincat - Phrase, Item, Kind, Quality = {s : Str} ; - - lin - Is item quality = {s = item.s ++ "" ++ quality.s} ; - This kind = {s = "questo" ++ kind.s} ; - That kind = {s = "quel" ++ kind.s} ; - QKind quality kind = {s = kind.s ++ quality.s} ; - Wine = {s = "vino"} ; - Cheese = {s = "formaggio"} ; - Fish = {s = "pesce"} ; - Very quality = {s = "molto" ++ quality.s} ; - Fresh = {s = "fresco"} ; - Warm = {s = "caldo"} ; - Italian = {s = "italiano"} ; - Expensive = {s = "caro"} ; - Delicious = {s = "delizioso"} ; - Boring = {s = "noioso"} ; - } -``` - - -#NEW - -Not just replacing words: - -The order of a quality and the kind it modifies is changed in -``` - QKind quality kind = {s = kind.s ++ quality.s} ; -``` -Thus Italian says ``vino italiano`` for ``Italian wine``. - -(Some Italian adjectives -are put before the noun. This distinction can be controlled by parameters, -which are introduced in #Rchapfour.) - -#NEW - -===Exercises on multilinguality=== - -+ Write a concrete syntax of ``Food`` for some other language. -You will probably end up with grammatically incorrect -linearizations - but don't -worry about this yet. - -+ If you have written ``Food`` for German, Swedish, or some -other language, test with random or exhaustive generation what constructs -come out incorrect, and prepare a list of those ones that cannot be helped -with the currently available fragment of GF. You can return to your list -after having worked out #Rchapfour. - - - - -#NEW - -==Free variation== - -Semantically indistinguishable ways of expressing a thing. - -The **variants** construct of GF expresses free variation. For example, -``` - lin Delicious = {s = "delicious" | "exquisit" | "tasty"} ; -``` -By default, the ``linearize`` command -shows only the first variant from such lists; to see them -all, use the option ``-all``: -``` - > p "this exquisit wine is delicious" | l -all - this delicious wine is delicious - this delicious wine is exquisit - ... -``` - -#NEW - -An equivalent notation for variants is -``` - lin Delicious = {s = variants {"delicious" ; "exquisit" ; "tasty"}} ; -``` -This notation also allows the limiting case: an empty variant list, -``` - variants {} -``` -It can be used e.g. if a word lacks a certain inflection form. - -Free variation works for all types in concrete syntax; all terms in -a variant list must be of the same type. - - -#NEW - -==More application of multilingual grammars== - -===Multilingual treebanks=== - -#Lsectreebank - -**Multilingual treebank**: a set of trees with their -linearizations in different languages: -``` - > gr -number=2 | l -treebank - - Is (That Cheese) (Very Boring) - quel formaggio molto noioso - that cheese is very boring - - Is (That Cheese) Fresh - quel formaggio fresco - that cheese is fresh -``` - - - -#NEW - -===Translation quiz=== - -``translation_quiz = tq``: -generate random sentences, display them in one language, and check the user's -answer given in another language. -``` - > translation_quiz -from=FoodEng -to=FoodIta - - Welcome to GF Translation Quiz. - The quiz is over when you have done at least 10 examples - with at least 75 % success. - You can interrupt the quiz by entering a line consisting of a dot ('.'). - - this fish is warm - questo pesce caldo - > Yes. - Score 1/1 - - this cheese is Italian - questo formaggio noioso - > No, not questo formaggio noioso, but - questo formaggio italiano - - Score 1/2 - this fish is expensive -``` - - - -#NEW - -==Context-free grammars and GF== - -===The "cf" grammar format=== - -The grammar ``FoodEng`` can be written in a BNF format as follows: -``` - Is. Phrase ::= Item "is" Quality ; - That. Item ::= "that" Kind ; - This. Item ::= "this" Kind ; - QKind. Kind ::= Quality Kind ; - Cheese. Kind ::= "cheese" ; - Fish. Kind ::= "fish" ; - Wine. Kind ::= "wine" ; - Italian. Quality ::= "Italian" ; - Boring. Quality ::= "boring" ; - Delicious. Quality ::= "delicious" ; - Expensive. Quality ::= "expensive" ; - Fresh. Quality ::= "fresh" ; - Very. Quality ::= "very" Quality ; - Warm. Quality ::= "warm" ; -``` -GF can convert BNF grammars into GF. -BNF files are recognized by the file name suffix ``.cf`` (for **context-free**): -``` - > import food.cf -``` -The compiler creates separate abstract and concrete modules internally. - - -#NEW - -===Restrictions of context-free grammars=== - -Separating concrete and abstract syntax allows -three deviations from context-free grammar: -- **permutation**: changing the order of constituents -- **suppression**: omitting constituents -- **reduplication**: repeating constituents - - -**Exercise**. Define the non-context-free -copy language ``{x x | x <- (a|b)*}`` in GF. - - - -#NEW - -%--! -==Modules and files== - -GF uses suffixes to recognize different file formats: -- Source files: //Modulename//``.gf`` -- Target files: //Modulename//``.gfo`` - - -Importing generates target from source: -``` - > i FoodEng.gf - - compiling Food.gf... wrote file Food.gfo 16 msec - - compiling FoodEng.gf... wrote file FoodEng.gfo 20 msec -``` -The ``.gfo`` format (="GF Object") is precompiled GF, which is -faster to load than source GF (``.gf``). - -When reading a module, GF decides whether -to use an existing ``.gfo`` file or to generate -a new one, by looking at modification times. - - -#NEW - -**Exercise**. What happens when you import ``FoodEng.gf`` for -a second time? Try this in different situations: -- Right after importing it the first time (the modules are kept in - the memory of GF and need no reloading). -- After issuing the command ``empty`` (``e``), which clears the memory - of GF. -- After making a small change in ``FoodEng.gf``, be it only an added space. -- After making a change in ``Food.gf``. - - - -#NEW - -==Using operations and resource modules== - -===Operation definitions=== - -The golden rule of functional programmin: - -//Whenever you find yourself programming by copy-and-paste, write a function instead.// - -Functions in concrete syntax are defined using the keyword ``oper`` (for -**operation**), distinct from ``fun`` for the sake of clarity. - -Example: -``` - oper ss : Str -> {s : Str} = \x -> {s = x} ; -``` -The operation can be **applied** to an argument, and GF will -**compute** the value: -``` - ss "boy" ===> {s = "boy"} -``` -The symbol ``===>`` will be used for computation. - - -#NEW - -Notice the **lambda abstraction** form -- ``\``//x// ``->`` //t// - - -This is read: -- function with variable //x// and **function body** //t// - - -For lambda abstraction with multiple arguments, we have the shorthand -``` - \x,y -> t === \x -> \y -> t -``` -Linearization rules actually use syntactic -sugar for abstraction: -``` - lin f x = t === lin f = \x -> t -``` - - - -#NEW - -%--! -===The ``resource`` module type=== - -The ``resource`` module type is used to package -``oper`` definitions into reusable resources. -``` - resource StringOper = { - oper - SS : Type = {s : Str} ; - ss : Str -> SS = \x -> {s = x} ; - cc : SS -> SS -> SS = \x,y -> ss (x.s ++ y.s) ; - prefix : Str -> SS -> SS = \p,x -> ss (p ++ x.s) ; - } -``` - - -#NEW - -%--! -===Opening a resource=== - -Any number of ``resource`` modules can be -**open**ed in a ``concrete`` syntax. -``` - concrete FoodEng of Food = open StringOper in { - - lincat - S, Item, Kind, Quality = SS ; - - lin - Is item quality = cc item (prefix "is" quality) ; - This k = prefix "this" k ; - That k = prefix "that" k ; - QKind k q = cc k q ; - Wine = ss "wine" ; - Cheese = ss "cheese" ; - Fish = ss "fish" ; - Very = prefix "very" ; - Fresh = ss "fresh" ; - Warm = ss "warm" ; - Italian = ss "Italian" ; - Expensive = ss "expensive" ; - Delicious = ss "delicious" ; - Boring = ss "boring" ; - } -``` - - -#NEW - -%--! -===Partial application=== - -#Lsecpartapp - -The rule -``` - lin This k = prefix "this" k ; -``` -can be written more concisely -``` - lin This = prefix "this" ; -``` -Part of the art in functional programming: -decide the order of arguments in a function, -so that partial application can be used as much as possible. - -For instance, ``prefix`` is typically applied to -linearization variables with constant strings. Hence we -put the ``Str`` argument before the ``SS`` argument. - - -**Exercise**. Define an operation ``infix`` analogous to ``prefix``, -such that it allows you to write -``` - lin Is = infix "is" ; -``` - - -#NEW - -===Testing resource modules=== - -Import with the flag ``-retain``, -``` - > import -retain StringOper.gf -``` -Compute the value with ``compute_concrete = cc``, -``` - > compute_concrete prefix "in" (ss "addition") - {s : Str = "in" ++ "addition"} -``` - - -#NEW - -==Grammar architecture== - -#Lsecarchitecture - -===Extending a grammar=== - -A new module can **extend** an old one: -``` - abstract Morefood = Food ** { - cat - Question ; - fun - QIs : Item -> Quality -> Question ; - Pizza : Kind ; - } -``` -Parallel to the abstract syntax, extensions can -be built for concrete syntaxes: -``` - concrete MorefoodEng of Morefood = FoodEng ** { - lincat - Question = {s : Str} ; - lin - QIs item quality = {s = "is" ++ item.s ++ quality.s} ; - Pizza = {s = "pizza"} ; - } -``` -The effect of extension: all of the contents of the extended -and extending modules are put together. - -In other words: the new module **inherits** the contents of the old module. - -#NEW - -Simultaneous extension and opening: -``` - concrete MorefoodIta of Morefood = FoodIta ** open StringOper in { - lincat - Question = SS ; - lin - QIs item quality = ss (item.s ++ "" ++ quality.s) ; - Pizza = ss "pizza" ; - } -``` -Resource modules can extend other resource modules - thus it is -possible to build resource hierarchies. - - - -#NEW - -===Multiple inheritance=== - -Extend several grammars at the same time: -``` - abstract Foodmarket = Food, Fruit, Mushroom ** { - fun - FruitKind : Fruit -> Kind ; - MushroomKind : Mushroom -> Kind ; - } -``` -where -``` - abstract Fruit = { - cat Fruit ; - fun Apple, Peach : Fruit ; - } - - abstract Mushroom = { - cat Mushroom ; - fun Cep, Agaric : Mushroom ; - } -``` - -**Exercise**. Refactor ``Food`` by taking apart ``Wine`` into a special -``Drink`` module. - - - -#NEW - -=Lesson 3: Grammars with parameters= - -#Lchapfour - -Goals: -- implement sophisticated linguistic structures: - - morphology: the inflection of words - - agreement: rules for selecting word forms in syntactic combinations - - -- Cover all GF constructs for concrete syntax - - -It is possible to skip this chapter and go directly -to the next, since the use of the GF Resource Grammar library -makes it unnecessary to use parameters: they -could be left to library implementors. - - -#NEW - -==The problem: words have to be inflected== - -Plural forms are needed in things like -#BEQU -//these Italian wines are delicious// -#ENQU -This requires two things: -- the **inflection** of nouns and verbs in singular and plural -- the **agreement** of the verb to subject: - the verb must have the same number as the subject - - -Different languages have different types of inflection and agreement. -- Italian has also gender (masculine vs. feminine). - - - -In a multilingual grammar, -we want to ignore such distinctions in abstract syntax. - -**Exercise**. Make a list of the possible forms that nouns, -adjectives, and verbs can have in some languages that you know. - - -#NEW - -==Parameters and tables== - -We define the **parameter type** of number in English by -a new form of judgement: -``` - param Number = Sg | Pl ; -``` -This judgement defines the parameter type ``Number`` by listing -its two **constructors**, ``Sg`` and ``Pl`` -(singular and plural). - -We give ``Kind`` a linearization type that has a **table** depending on number: -``` - lincat Kind = {s : Number => Str} ; -``` -The **table type** ``Number => Str`` is similar a function type -(``Number -> Str``). - -Difference: the argument must be a parameter type. Then -the argument-value pairs can be listed in a finite table. - -#NEW - -Here is a table: -``` - lin Cheese = { - s = table { - Sg => "cheese" ; - Pl => "cheeses" - } - } ; -``` -The table has **branches**, with a **pattern** on the -left of the arrow ``=>`` and a **value** on the right. - -The application of a table is done by the **selection** operator ``!``. - -It which is computed by **pattern matching**: return -the value from the first branch whose pattern matches the -argument. For instance, -``` - table {Sg => "cheese" ; Pl => "cheeses"} ! Pl - ===> "cheeses" -``` - -#NEW - -**Case expressions** are syntactic sugar: -``` - case e of {...} === table {...} ! e -``` -Since they are familiar to Haskell and ML programmers, they can come out handy -when writing GF programs. - - -#NEW - -Constructors can take arguments from other parameter types. - -Example: forms of English verbs (except //be//): -``` - param VerbForm = VPresent Number | VPast | VPastPart | VPresPart ; -``` -Fact expressed: only present tense has number variation. - -Example table: the forms of the verb //drink//: -``` - table { - VPresent Sg => "drinks" ; - VPresent Pl => "drink" ; - VPast => "drank" ; - VPastPart => "drunk" ; - VPresPart => "drinking" - } -``` - - -**Exercise**. In an earlier exercise (previous section), -you made a list of the possible -forms that nouns, adjectives, and verbs can have in some languages that -you know. Now take some of the results and implement them by -using parameter type definitions and tables. Write them into a ``resource`` -module, which you can test by using the command ``compute_concrete``. - - - -#NEW - -==Inflection tables and paradigms== - -A morphological **paradigm** is a formula telling how a class of -words is inflected. - -From the GF point of view, a paradigm is a function that takes -a **lemma** (also known as a **dictionary form**, or a **citation form**) and -returns an inflection table. - -The following operation defines the regular noun paradigm of English: -``` - oper regNoun : Str -> {s : Number => Str} = \dog -> { - s = table { - Sg => dog ; - Pl => dog + "s" - } - } ; -``` -The **gluing** operator ``+`` glues strings to one **token**: -``` - (regNoun "cheese").s ! Pl ===> "cheese" + "s" ===> "cheeses" -``` - - -#NEW - -A more complex example: regular verbs, -``` - oper regVerb : Str -> {s : VerbForm => Str} = \talk -> { - s = table { - VPresent Sg => talk + "s" ; - VPresent Pl => talk ; - VPresPart => talk + "ing" ; - _ => talk + "ed" - } - } ; -``` -The catch-all case for the past tense and the past participle -uses a **wild card** pattern ``_``. - - -#NEW - -===Exercises on morphology=== - -+ Identify cases in which the ``regNoun`` paradigm does not -apply in English, and implement some alternative paradigms. - -+ Implement some regular paradigms for other languages you have -considered in earlier exercises. - - - -#NEW - -==Using parameters in concrete syntax== - -Purpose: a more radical -variation between languages -than just the use of different words and word orders. - -We add to the grammar ``Food`` two rules for forming plural items: -``` - fun These, Those : Kind -> Item ; -``` -We also add a noun which in Italian has the feminine case: -``` - fun Pizza : Kind ; -``` -This will force us to deal with gender- - - -#NEW - -%--! -===Agreement=== - -In English, the phrase-forming rule -``` - fun Is : Item -> Quality -> Phrase ; -``` -is affected by the number because of **subject-verb agreement**: -the verb of a sentence must be inflected in the number of the subject, -``` - Is (This Pizza) Warm ===> "this pizza is warm" - Is (These Pizza) Warm ===> "these pizzas are warm" -``` -It is the **copula** (the verb //be//) that is affected: -``` - oper copula : Number -> Str = \n -> - case n of { - Sg => "is" ; - Pl => "are" - } ; -``` -The **subject** ``Item`` must have such a number to provide to the copula: -``` - lincat Item = {s : Str ; n : Number} ; -``` -Now we can write -``` - lin Is item qual = {s = item.s ++ copula item.n ++ qual.s} ; -``` - - - -#NEW - -===Determiners=== - -How does an ``Item`` subject receive its number? The rules -``` - fun This, These : Kind -> Item ; -``` -add **determiners**, either //this// or //these//, which -require different //this pizza// vs. -//these pizzas//. - -Thus ``Kind`` must have both singular and plural forms: -``` - lincat Kind = {s : Number => Str} ; -``` -We can write -``` - lin This kind = { - s = "this" ++ kind.s ! Sg ; - n = Sg - } ; - - lin These kind = { - s = "these" ++ kind.s ! Pl ; - n = Pl - } ; -``` - - -#NEW - -To avoid copy-and-paste, we can factor out the pattern of determination, -``` - oper det : - Str -> Number -> {s : Number => Str} -> {s : Str ; n : Number} = - \det,n,kind -> { - s = det ++ kind.s ! n ; - n = n - } ; -``` -Now we can write -``` - lin This = det Sg "this" ; - lin These = det Pl "these" ; -``` -In a more **lexicalized** grammar, determiners would be a category: -``` - lincat Det = {s : Str ; n : Number} ; - fun Det : Det -> Kind -> Item ; - lin Det det kind = { - s = det.s ++ kind.s ! det.n ; - n = det.n - } ; -``` - - -#NEW - -===Parametric vs. inherent features=== - -``Kind``s have number as a **parametric feature**: both singular and plural -can be formed, -``` - lincat Kind = {s : Number => Str} ; -``` -``Item``s have number as an **inherent feature**: they are inherently either -singular or plural, -``` - lincat Item = {s : Str ; n : Number} ; -``` -Italian ``Kind`` will have parametric number and inherent gender: -``` - lincat Kind = {s : Number => Str ; g : Gender} ; -``` - - -#NEW - -Questions to ask when designing parameters: -- existence: what forms are possible to build by morphological and - other means? -- need: what features are expected via agreement or government? - - -Dictionaries give good advice: -#BEQU -**uomo**, pl. //uomini//, n.m. "man" -#ENQU -tells that //uomo// is a masculine noun with the plural form //uomini//. -Hence, parametric number and an inherent gender. - -For words, inherent features are usually given as lexical information. - -For combinations, they are //inherited// from some part of the construction -(typically the one called the **head**). Italian modification: -``` - lin QKind qual kind = - let gen = kind.g in { - s = table {n => kind.s ! n ++ qual.s ! gen ! n} ; - g = gen - } ; -``` -Notice -- **local definition** (``let`` expression) -- **variable pattern** ``n`` - - - -#NEW - -==An English concrete syntax for Foods with parameters== - -We use some string operations from the library ``Prelude`` are used. -``` - concrete FoodsEng of Foods = open Prelude in { - - lincat - S, Quality = SS ; - Kind = {s : Number => Str} ; - Item = {s : Str ; n : Number} ; - - lin - Is item quality = ss (item.s ++ copula item.n ++ quality.s) ; - This = det Sg "this" ; - That = det Sg "that" ; - These = det Pl "these" ; - Those = det Pl "those" ; - QKind quality kind = {s = table {n => quality.s ++ kind.s ! n}} ; - Wine = regNoun "wine" ; - Cheese = regNoun "cheese" ; - Fish = noun "fish" "fish" ; - Pizza = regNoun "pizza" ; - Very = prefixSS "very" ; - Fresh = ss "fresh" ; - Warm = ss "warm" ; - Italian = ss "Italian" ; - Expensive = ss "expensive" ; - Delicious = ss "delicious" ; - Boring = ss "boring" ; -``` - -#NEW - -``` - param - Number = Sg | Pl ; - - oper - det : Number -> Str -> {s : Number => Str} -> {s : Str ; n : Number} = - \n,d,cn -> { - s = d ++ cn.s ! n ; - n = n - } ; - noun : Str -> Str -> {s : Number => Str} = - \man,men -> {s = table { - Sg => man ; - Pl => men - } - } ; - regNoun : Str -> {s : Number => Str} = - \car -> noun car (car + "s") ; - copula : Number -> Str = - \n -> case n of { - Sg => "is" ; - Pl => "are" - } ; - } -``` - -#NEW - -==More on inflection paradigms== - -#Lsecinflection - -Let us extend the English noun paradigms so that we can -deal with all nouns, not just the regular ones. The goal is to -provide a morphology module that makes it easy to -add words to a lexicon. - - -#NEW - -===Worst-case functions=== - -We perform **data abstraction** from the type -of nouns by writing a a **worst-case function**: -``` - oper Noun : Type = {s : Number => Str} ; - - oper mkNoun : Str -> Str -> Noun = \x,y -> { - s = table { - Sg => x ; - Pl => y - } - } ; - - oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ; -``` -Then we can define -``` - lincat N = Noun ; - lin Mouse = mkNoun "mouse" "mice" ; - lin House = regNoun "house" ; -``` -where the underlying types are not seen. - -#NEW - -We are free to change the undelying definitions, e.g. -add **case** (nominative or genitive) to noun inflection: -``` - param Case = Nom | Gen ; - - oper Noun : Type = {s : Number => Case => Str} ; -``` -Now we have to redefine the worst-case function -``` - oper mkNoun : Str -> Str -> Noun = \x,y -> { - s = table { - Sg => table { - Nom => x ; - Gen => x + "'s" - } ; - Pl => table { - Nom => y ; - Gen => y + case last y of { - "s" => "'" ; - _ => "'s" - } - } - } ; -``` -But up from this level, we can retain the old definitions -``` - lin Mouse = mkNoun "mouse" "mice" ; - oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ; -``` - - - -#NEW - -In the last definition of ``mkNoun``, we used a case expression -on the last character of the plural, as well as the ``Prelude`` -operation -``` - last : Str -> Str ; -``` -returning the string consisting of the last character. - -The case expression uses **pattern matching over strings**, which -is supported in GF, alongside with pattern matching over -parameters. - - - -#NEW - -===Smart paradigms=== - -The regular //dog//-//dogs// paradigm has -predictable variations: -- nouns ending with an //y//: //fly//-//flies//, except if - a vowel precedes the //y//: //boy//-//boys// -- nouns ending with //s//, //ch//, and a number of - other endings: //bus//-//buses//, //leech//-//leeches// - - -We could provide alternative paradigms: -``` - noun_y : Str -> Noun = \fly -> mkNoun fly (init fly + "ies") ; - noun_s : Str -> Noun = \bus -> mkNoun bus (bus + "es") ; -``` -(The Prelude function ``init`` drops the last character of a token.) - -Drawbacks: -- it can be difficult to select the correct paradigm -- it can be difficult to remember the names of the different paradigms - - -#NEW - -Better solution: a **smart paradigm**: -``` - regNoun : Str -> Noun = \w -> - let - ws : Str = case w of { - _ + ("a" | "e" | "i" | "o") + "o" => w + "s" ; -- bamboo - _ + ("s" | "x" | "sh" | "o") => w + "es" ; -- bus, hero - _ + "z" => w + "zes" ;-- quiz - _ + ("a" | "e" | "o" | "u") + "y" => w + "s" ; -- boy - x + "y" => x + "ies" ;-- fly - _ => w + "s" -- car - } - in - mkNoun w ws -``` -GF has **regular expression patterns**: -- **disjunctive patterns** //P// ``|`` //Q// -- **concatenation patterns** //P// ``+`` //Q// - - -The patterns are ordered in such a way that, for instance, -the suffix ``"oo"`` prevents //bamboo// from matching the suffix -``"o"``. - - -#NEW - -===Exercises on regular patterns=== - -+ The same rules that form plural nouns in English also -apply in the formation of third-person singular verbs. -Write a regular verb paradigm that uses this idea, but first -rewrite ``regNoun`` so that the analysis needed to build //s//-forms -is factored out as a separate ``oper``, which is shared with -``regVerb``. - -+ Extend the verb paradigms to cover all verb forms -in English, with special care taken of variations with the suffix -//ed// (e.g. //try//-//tried//, //use//-//used//). - -+ Implement the German **Umlaut** operation on word stems. -The operation changes the vowel of the stressed stem syllable as follows: -//a// to ////, //au// to //u//, //o// to ////, and //u// to ////. You -can assume that the operation only takes syllables as arguments. Test the -operation to see whether it correctly changes //Arzt// to //rzt//, -//Baum// to //Bum//, //Topf// to //Tpf//, and //Kuh// to //Kh//. - - - -#NEW - -===Function types with variables=== - -In #Rchapsix, **dependent function types** need a notation -that binds a variable to the argument type, as in -``` - switchOff : (k : Kind) -> Action k -``` -Function types //without// variables are actually a shorthand: -``` - PredVP : NP -> VP -> S -``` -means -``` - PredVP : (x : NP) -> (y : VP) -> S -``` -or any other naming of the variables. - - -#NEW - -Sometimes variables shorten the code, since they can share a type: -``` - octuple : (x,y,z,u,v,w,s,t : Str) -> Str -``` -If a bound variable is not used, it can be replaced by a wildcard: -``` - octuple : (_,_,_,_,_,_,_,_ : Str) -> Str -``` -A good practice is to indicate the number of arguments: -``` - octuple : (x1,_,_,_,_,_,_,x8 : Str) -> Str -``` -For inflection paradigms, it is handy to use heuristic variable names, -looking like the expected forms: -``` - mkNoun : (mouse,mice : Str) -> Noun -``` - - -#NEW - -===Separating operation types and definitions=== - -In librarues, it is useful to group type signatures separately from -definitions. It is possible to divide an ``oper`` judgement, -``` - oper regNoun : Str -> Noun ; - oper regNoun s = mkNoun s (s + "s") ; -``` -and put the parts in different places. - -With the ``interface`` and ``instance`` module types -(see #Rsecinterface): the parts can even be put to different files. - - -#NEW - -===Overloading of operations=== - -**Overloading**: different functions can be given the same name, as e.g. in C++. - -The compiler performs **overload resolution**, which works as long as the -functions have different types. - -In GF, the functions must be grouped together in ``overload`` groups. - -Example: different ways to define nouns in English: -``` - oper mkN : overload { - mkN : (dog : Str) -> Noun ; -- regular nouns - mkN : (mouse,mice : Str) -> Noun ; -- irregular nouns - } -``` -Cf. dictionaries: if the -word is regular, just one form is needed. If it is irregular, -more forms are given. - -The definition can be given separately, or at the same time, as the types: -``` - oper mkN = overload { - mkN : (dog : Str) -> Noun = regNoun ; - mkN : (mouse,mice : Str) -> Noun = mkNoun ; - } -``` -**Exercise**. Design a system of English verb paradigms presented by -an overload group. - - -#NEW - -===Morphological analysis and morphology quiz=== - -The command ``morpho_analyse = ma`` -can be used to read a text and return for each word its analyses -(in the current grammar): -``` - > read_file bible.txt | morpho_analyse -``` -The command ``morpho_quiz = mq`` generates inflection exercises. -``` - % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfc - - > morpho_quiz -cat=V - - Welcome to GF Morphology Quiz. - ... - - rapparatre : VFin VCondit Pl P2 - rapparaitriez - > No, not rapparaitriez, but - rapparatriez - Score 0/1 -``` -To create a list for later use, use the command ``morpho_list = ml`` -``` - > morpho_list -number=25 -cat=V | write_file exx.txt -``` - - - - -#NEW - -==The Italian Foods grammar== - -#Lsecitalian - -Parameters include not only number but also gender. -``` -concrete FoodsIta of Foods = open Prelude in { - - param - Number = Sg | Pl ; - Gender = Masc | Fem ; -``` -Qualities are inflected for gender and number, whereas kinds -have a parametric number and an inherent gender. -Items have an inherent number and gender. -``` - lincat - Phr = SS ; - Quality = {s : Gender => Number => Str} ; - Kind = {s : Number => Str ; g : Gender} ; - Item = {s : Str ; g : Gender ; n : Number} ; -``` - -#NEW - -A Quality is an adjective, with one form for each gender-number combination. -``` - oper - adjective : (_,_,_,_ : Str) -> {s : Gender => Number => Str} = - \nero,nera,neri,nere -> { - s = table { - Masc => table { - Sg => nero ; - Pl => neri - } ; - Fem => table { - Sg => nera ; - Pl => nere - } - } - } ; -``` -Regular adjectives work by adding endings to the stem. -``` - regAdj : Str -> {s : Gender => Number => Str} = \nero -> - let ner = init nero - in adjective nero (ner + "a") (ner + "i") (ner + "e") ; -``` - -#NEW - -For noun inflection, we are happy to give the two forms and the gender -explicitly: -``` - noun : Str -> Str -> Gender -> {s : Number => Str ; g : Gender} = - \vino,vini,g -> { - s = table { - Sg => vino ; - Pl => vini - } ; - g = g - } ; -``` -We need only number variation for the copula. -``` - copula : Number -> Str = - \n -> case n of { - Sg => "" ; - Pl => "sono" - } ; -``` - -#NEW - -Determination is more complex than in English, because of gender: -``` - det : Number -> Str -> Str -> {s : Number => Str ; g : Gender} -> - {s : Str ; g : Gender ; n : Number} = - \n,m,f,cn -> { - s = case cn.g of {Masc => m ; Fem => f} ++ cn.s ! n ; - g = cn.g ; - n = n - } ; -``` - - -#NEW - -The complete set of linearization rules: -``` - lin - Is item quality = - ss (item.s ++ copula item.n ++ quality.s ! item.g ! item.n) ; - This = det Sg "questo" "questa" ; - That = det Sg "quel" "quella" ; - These = det Pl "questi" "queste" ; - Those = det Pl "quei" "quelle" ; - QKind quality kind = { - s = \\n => kind.s ! n ++ quality.s ! kind.g ! n ; - g = kind.g - } ; - Wine = noun "vino" "vini" Masc ; - Cheese = noun "formaggio" "formaggi" Masc ; - Fish = noun "pesce" "pesci" Masc ; - Pizza = noun "pizza" "pizze" Fem ; - Very qual = {s = \\g,n => "molto" ++ qual.s ! g ! n} ; - Fresh = adjective "fresco" "fresca" "freschi" "fresche" ; - Warm = regAdj "caldo" ; - Italian = regAdj "italiano" ; - Expensive = regAdj "caro" ; - Delicious = regAdj "delizioso" ; - Boring = regAdj "noioso" ; - } -``` - - -#NEW - -===Exercises on using parameters=== - -+ Experiment with multilingual generation and translation in the -``Foods`` grammars. - -+ Add items, qualities, and determiners to the grammar, -and try to get their inflection and inherent features right. - -+ Write a concrete syntax of ``Food`` for a language of your choice, -now aiming for complete grammatical correctness by the use of parameters. - -+ Measure the size of the context-free grammar corresponding to -``FoodsIta``. You can do this by printing the grammar in the context-free format -(``print_grammar -printer=bnf``) and counting the lines. - - - - -#NEW - -==Discontinuous constituents== - -A linearization record may contain more strings than one, and those -strings can be put apart in linearization. - -Example: English particle -verbs, (//switch off//). The object can appear between: - -//he switched it off// - -The verb //switch off// is called a -**discontinuous constituents**. - -We can define transitive verbs and their combinations as follows: -``` - lincat TV = {s : Number => Str ; part : Str} ; - - fun AppTV : Item -> TV -> Item -> Phrase ; - - lin AppTV subj tv obj = - {s = subj.s ++ tv.s ! subj.n ++ obj.s ++ tv.part} ; -``` - -**Exercise**. Define the language ``a^n b^n c^n`` in GF, i.e. -any number of //a//'s followed by the same number of //b//'s and -the same number of //c//'s. This language is not context-free, -but can be defined in GF by using discontinuous constituents. - - -#NEW - -==Strings at compile time vs. run time== - -Tokens are created in the following ways: -- quoted string: ``"foo"`` -- gluing : ``t + s`` -- predefined operations ``init, tail, tk, dp`` -- pattern matching over strings - - -Since //tokens must be known at compile time//, -the above operations may not be applied to **run-time variables** -(i.e. variables that stand for function arguments in linearization rules). - -Hence it is not legal to write -``` - cat Noun ; - fun Plural : Noun -> Noun ; - lin Plural n = {s = n.s + "s"} ; -``` -because ``n`` is a run-time variable. Also -``` - lin Plural n = {s = (regNoun n).s ! Pl} ; -``` -is incorrect with ``regNoun`` as defined #Rsecinflection, because the run-time -variable is eventually sent to string pattern matching and gluing. - - -#NEW - -How to write tokens together without a space? -``` - lin Question p = {s = p + "?"} ; -``` -is incorrect. - -The way to go is to use an **unlexer** that creates correct spacing -after linearization. - -Correspondingly, a **lexer** that e.g. analyses ``"warm?"`` into -to tokens is needed before parsing. -This topic will be covered in #Rseclexing. - - - - - - -#NEW - -===Supplementary constructs for concrete syntax=== - -====Record extension and subtyping==== - -The symbol ``**`` is used for both record types and record objects. -``` - lincat TV = Verb ** {c : Case} ; - - lin Follow = regVerb "folgen" ** {c = Dative} ; -``` -``TV`` becomes a **subtype** of ``Verb``. - -If //T// is a subtype of //R//, an object of //T// can be used whenever -an object of //R// is required. - -**Covariance**: a function returning a record //T// as value can -also be used to return a value of a supertype //R//. - -**Contravariance**: a function taking an //R// as argument -can also be applied to any object of a subtype //T//. - - -#NEW - -====Tuples and product types==== - -Product types and tuples are syntactic sugar for record types and records: -``` - T1 * ... * Tn === {p1 : T1 ; ... ; pn : Tn} - === {p1 = T1 ; ... ; pn = Tn} -``` -Thus the labels ``p1, p2,...`` are hard-coded. - - -#NEW - -====Prefix-dependent choices==== - -English indefinite article: -``` - oper artIndef : Str = - pre {"a" ; "an" / strs {"a" ; "e" ; "i" ; "o"}} ; -``` -Thus -``` - artIndef ++ "cheese" ---> "a" ++ "cheese" - artIndef ++ "apple" ---> "an" ++ "apple" -``` - - - - - - - - -#NEW - -=Lesson 4: Using the resource grammar library= - -#Lchapfive - -Goals: -- navigate in the GF resource grammar library and use it in applications -- get acquainted with basic linguistic categories -- write functors to achieve maximal sharing of code in multilingual grammars - - -#NEW - -==The coverage of the library== - -The current 12 resource languages are -- ``Bul``garian -- ``Cat``alan -- ``Dan``ish -- ``Eng``lish -- ``Fin``nish -- ``Fre``nch -- ``Ger``man -- ``Ita``lian -- ``Nor``wegian -- ``Rus``sian -- ``Spa``nish -- ``Swe``dish - - -The first three letters (``Eng`` etc) are used in grammar module names -(ISO 639 standard). - - -#NEW - -==The structure of the library== - -#Lseclexical - -Semantic grammars (up to now in this tutorial): -a grammar defines a system of meanings (abstract syntax) and -tells how they are expressed(concrete syntax). - -Resource grammars (as usual in linguistic tradition): -a grammar specifies the **grammatically correct combinations of words**, -whatever their meanings are. - -With resource grammars, we can achieve a -wider coverage than with semantic grammars. - -#NEW - -===Lexical vs. phrasal rules=== - -A resource grammar has two kinds of categories and two kinds of rules: -- lexical: - - lexical categories, to classify words - - lexical rules, to define words and their properties - -- phrasal (combinatorial, syntactic): - - phrasal categories, to classify phrases of arbitrary size - - phrasal rules, to combine phrases into larger phrases - - -GE makes no formal distinction between these two kinds. - -But it is a good discipline to follow. - - -#NEW - -===Lexical categories=== - -Two kinds of lexical categories: -- **closed**: - - a finite number of words - - seldom extended in the history of language - - structural words / function words, e.g. -``` - Conj ; -- conjunction e.g. "and" - QuantSg ; -- singular quantifier e.g. "this" - QuantPl ; -- plural quantifier e.g. "this" -``` - -- **open**: - - new words are added all the time - - content words, e.g. -``` - N ; -- noun e.g. "pizza" - A ; -- adjective e.g. "good" - V ; -- verb e.g. "sleep" -``` - - -#NEW - -===Lexical rules=== - -Closed classes: module ``Syntax``. In the ``Foods`` grammar, we need -``` - this_QuantSg, that_QuantSg : QuantSg ; - these_QuantPl, those_QuantPl : QuantPl ; - very_AdA : AdA ; -``` -Naming convention: word followed by the category (so we can -distinguish the quantifier //that// from the conjunction //that//). - -Open classes have no objects in ``Syntax``. Words are -built as they are needed in applications: if we have -``` - fun Wine : Kind ; -``` -we will define -``` - lin Wine = mkN "wine" ; -``` -where we use ``mkN`` from ``ParadigmsEng``: - - - -#NEW - -===Resource lexicon=== - -Alternative concrete syntax for -``` - fun Wine : Kind ; -``` -is to provide a **resource lexicon**, which contains definitions such as -``` - oper wine_N : N = mkN "wine" ; -``` -so that we can write -``` - lin Wine = wine_N ; -``` -Advantages: -- we accumulate a reusable lexicon -- we can use a #Rsecfunctor to speed up multilingual grammar implementation - - -#NEW - -===Phrasal categories=== - -In ``Foods``, we need just four phrasal categories: -``` - Cl ; -- clause e.g. "this pizza is good" - NP ; -- noun phrase e.g. "this pizza" - CN ; -- common noun e.g. "warm pizza" - AP ; -- adjectival phrase e.g. "very warm" -``` -Clauses are similar to sentences (``S``), but without a -fixed tense and mood; see #Rsecextended for how they relate. - -Common nouns are made into noun phrases by adding determiners. - - -#NEW - -===Syntactic combinations=== - -We need the following combinations: -``` - mkCl : NP -> AP -> Cl ; -- e.g. "this pizza is very warm" - mkNP : QuantSg -> CN -> NP ; -- e.g. "this pizza" - mkNP : QuantPl -> CN -> NP ; -- e.g. "these pizzas" - mkCN : AP -> CN -> CN ; -- e.g. "warm pizza" - mkAP : AdA -> AP -> AP ; -- e.g. "very warm" -``` -We also need **lexical insertion**, to form phrases from single words: -``` - mkCN : N -> NP ; - mkAP : A -> AP ; -``` -Naming convention: to construct a //C//, use a function ``mk``//C//. - -Heavy overloading: the current library -(version 1.2) has 23 operations named ``mkNP``! - - -#NEW - -===Example syntactic combination=== - -The sentence -#BEQU -//these very warm pizzas are Italian// -#ENQU -can be built as follows: -``` - mkCl - (mkNP these_QuantPl - (mkCN (mkAP very_AdA (mkAP warm_A)) (mkCN pizza_CN))) - (mkAP italian_AP) -``` -The task now: to define the concrete syntax of ``Foods`` so that -this syntactic tree gives the value of linearizing the semantic tree -``` - Is (These (QKind (Very Warm) Pizza)) Italian -``` - - - -#NEW - -==The resource API== - -Language-specific and language-independent parts - roughly, -- the syntax API ``Syntax``//L// has the same types and - functions for all languages //L// -- the morphology API ``Paradigms``//L// has partly - different types and functions - for different languages //L// - - -Full API documentation on-line: the **resource synopsis**, - -[``digitalgrammars.com/gf/lib/resource/doc/synopsis.html`` http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html] - - -#NEW - -===A miniature resource API: categories=== - -|| Category | Explanation | Example || -| ``Cl`` | clause (sentence), with all tenses | //she looks at this// | -| ``AP`` | adjectival phrase | //very warm// | -| ``CN`` | common noun (without determiner) | //red house// | -| ``NP`` | noun phrase (subject or object) | //the red house// | -| ``AdA`` | adjective-modifying adverb, | //very// | -| ``QuantSg`` | singular quantifier | //these// | -| ``QuantPl`` | plural quantifier | //this// | -| ``A`` | one-place adjective | //warm// | -| ``N`` | common noun | //house// | - - -#NEW - -===A miniature resource API: rules=== - -|| Function | Type | Example || -| ``mkCl`` | ``NP -> AP -> Cl`` | //John is very old// | -| ``mkNP`` | ``QuantSg -> CN -> NP`` | //this old man// | -| ``mkNP`` | ``QuantPl -> CN -> NP`` | //these old man// | -| ``mkCN`` | ``N -> CN`` | //house// | -| ``mkCN`` | ``AP -> CN -> CN`` | //very big blue house// | -| ``mkAP`` | ``A -> AP`` | //old// | -| ``mkAP`` | ``AdA -> AP -> AP`` | //very very old// | - -#NEW - -===A miniature resource API: structural words=== - -|| Function | Type | In English || -| ``this_QuantSg`` | ``QuantSg`` | //this// | -| ``that_QuantSg`` | ``QuantSg`` | //that// | -| ``these_QuantPl`` | ``QuantPl`` | //this// | -| ``those_QuantPl`` | ``QuantPl`` | //that// | -| ``very_AdA`` | ``AdA`` | //very// | - - -#NEW - -===A miniature resource API: paradigms=== - -From ``ParadigmsEng``: - -|| Function | Type || -| ``mkN`` | ``(dog : Str) -> N`` | -| ``mkN`` | ``(man,men : Str) -> N`` | -| ``mkA`` | ``(cold : Str) -> A`` | - -From ``ParadigmsIta``: - -|| Function | Type || -| ``mkN`` | ``(vino : Str) -> N`` | -| ``mkA`` | ``(caro : Str) -> A`` | - - -#NEW - -===A miniature resource API: more paradigms=== - -From ``ParadigmsGer``: - -|| Function | Type || -| ``Gender`` | ``Type`` | -| ``masculine`` | ``Gender`` | -| ``feminine`` | ``Gender`` | -| ``neuter`` | ``Gender`` | -| ``mkN`` | ``(Stufe : Str) -> N`` | -| ``mkN`` | ``(Bild,Bilder : Str) -> Gender -> N`` | -| ``mkA`` | ``(klein : Str) -> A`` | -| ``mkA`` | ``(gut,besser,beste : Str) -> A`` | - -From ``ParadigmsFin``: - -|| Function | Type || -| ``mkN`` | ``(talo : Str) -> N`` | -| ``mkA`` | ``(hieno : Str) -> A`` | - - - -#NEW - -===Exercises=== - -1. Try out the morphological paradigms in different languages. Do -as follows: -``` - > i -path=alltenses -retain alltenses/ParadigmsGer.gfo - > cc -table mkN "Farbe" - > cc -table mkA "gut" "besser" "beste" -``` - - -#NEW - -==Example: English== - -#Lsecenglish - -We assume the abstract syntax ``Foods`` from #Rchapfour. - -We don't need to think about inflection and agreement, but just pick -functions from the resource grammar library. - -We need a path with -- the current directory ``.`` -- the directory ``../foods``, in which ``Foods.gf`` resides. -- the library directory ``present``, which is relative to the - environment variable ``GF_LIB_PATH`` - - -Thus the beginning of the module is -``` - --# -path=.:../foods:present - - concrete FoodsEng of Foods = open SyntaxEng,ParadigmsEng in { -``` - - -#NEW - -===English example: linearization types and combination rules=== - -As linearization types, we use clauses for ``Phrase``, noun phrases -for ``Item``, common nouns for ``Kind``, and adjectival phrases for ``Quality``. -``` - lincat - Phrase = Cl ; - Item = NP ; - Kind = CN ; - Quality = AP ; -``` -Now the combination rules we need almost write themselves automatically: -``` - lin - Is item quality = mkCl item quality ; - This kind = mkNP this_QuantSg kind ; - That kind = mkNP that_QuantSg kind ; - These kind = mkNP these_QuantPl kind ; - Those kind = mkNP those_QuantPl kind ; - QKind quality kind = mkCN quality kind ; - Very quality = mkAP very_AdA quality ; -``` - - -#NEW - -===English example: lexical rules=== - -We use resource paradigms and lexical insertion rules. - -The two-place noun paradigm is needed only once, for -//fish// - everythins else is regular. -``` - Wine = mkCN (mkN "wine") ; - Pizza = mkCN (mkN "pizza") ; - Cheese = mkCN (mkN "cheese") ; - Fish = mkCN (mkN "fish" "fish") ; - Fresh = mkAP (mkA "fresh") ; - Warm = mkAP (mkA "warm") ; - Italian = mkAP (mkA "Italian") ; - Expensive = mkAP (mkA "expensive") ; - Delicious = mkAP (mkA "delicious") ; - Boring = mkAP (mkA "boring") ; - } -``` - - -#NEW - -===English example: exercises=== - -1. Compile the grammar ``FoodsEng`` and generate -and parse some sentences. - -2. Write a concrete syntax of ``Foods`` for Italian -or some other language included in the resource library. You can -compare the results with the hand-written -grammars presented earlier in this tutorial. - - - -#NEW - -==Functor implementation of multilingual grammars== - -#Lsecfunctor - -===New language by copy and paste=== - -If you write a concrete syntax of ``Foods`` for some other -language, much of the code will look exactly the same -as for English. This is because -- the ``Syntax`` API is the same for all languages (because - all languages in the resource package do implement the same - syntactic structures) -- languages tend to use the syntactic structures in similar ways - - -But lexical rules are more language-dependent. - -Thus, to port a grammar to a new language, you -+ copy the concrete syntax of a given language -+ change the words (strings and inflection paradigms) - - -Can we avoid this programming by copy-and-paste? - - - -#NEW - -===Functors: functions on the module level=== - -**Functors** familiar from the functional programming languages ML and OCaml, -also known as **parametrized modules**. - -In GF, a functor is a module that ``open``s one or more **interfaces**. - -An ``interface`` is a module similar to a ``resource``, but it only -contains the //types// of ``oper``s, not (necessarily) their definitions. - -Syntax for functors: add the keyword ``incomplete``. We will use the header -``` - incomplete concrete FoodsI of Foods = open Syntax, LexFoods in -``` -where -``` - interface Syntax -- the resource grammar interface - interface LexFoods -- the domain lexicon interface -``` -When we moreover have -``` - instance SyntaxEng of Syntax -- the English resource grammar - instance LexFoodsEng of LexFoods -- the English domain lexicon -``` -we can write a **functor instantiation**, -``` - concrete FoodsGer of Foods = FoodsI with - (Syntax = SyntaxGer), - (LexFoods = LexFoodsGer) ; -``` - -#NEW - -===Code for the Foods functor=== - -``` - --# -path=.:../foods - - incomplete concrete FoodsI of Foods = open Syntax, LexFoods in { - lincat - Phrase = Cl ; - Item = NP ; - Kind = CN ; - Quality = AP ; - lin - Is item quality = mkCl item quality ; - This kind = mkNP this_QuantSg kind ; - That kind = mkNP that_QuantSg kind ; - These kind = mkNP these_QuantPl kind ; - Those kind = mkNP those_QuantPl kind ; - QKind quality kind = mkCN quality kind ; - Very quality = mkAP very_AdA quality ; - - Wine = mkCN wine_N ; - Pizza = mkCN pizza_N ; - Cheese = mkCN cheese_N ; - Fish = mkCN fish_N ; - Fresh = mkAP fresh_A ; - Warm = mkAP warm_A ; - Italian = mkAP italian_A ; - Expensive = mkAP expensive_A ; - Delicious = mkAP delicious_A ; - Boring = mkAP boring_A ; - } -``` - - -#NEW - -===Code for the LexFoods interface=== - -#Lsecinterface - -``` - interface LexFoods = open Syntax in { - oper - wine_N : N ; - pizza_N : N ; - cheese_N : N ; - fish_N : N ; - fresh_A : A ; - warm_A : A ; - italian_A : A ; - expensive_A : A ; - delicious_A : A ; - boring_A : A ; - } -``` - -#NEW - -===Code for a German instance of the lexicon=== - -``` - instance LexFoodsGer of LexFoods = open SyntaxGer, ParadigmsGer in { - oper - wine_N = mkN "Wein" ; - pizza_N = mkN "Pizza" "Pizzen" feminine ; - cheese_N = mkN "Kse" "Ksen" masculine ; - fish_N = mkN "Fisch" ; - fresh_A = mkA "frisch" ; - warm_A = mkA "warm" "wrmer" "wrmste" ; - italian_A = mkA "italienisch" ; - expensive_A = mkA "teuer" ; - delicious_A = mkA "kstlich" ; - boring_A = mkA "langweilig" ; - } -``` - - -#NEW - -===Code for a German functor instantiation=== - -``` - --# -path=.:../foods:present - - concrete FoodsGer of Foods = FoodsI with - (Syntax = SyntaxGer), - (LexFoods = LexFoodsGer) ; -``` - - - -#NEW - -===Adding languages to a functor implementation=== - -Just two modules are needed: -- a domain lexicon instance -- a functor instantiation - - -The functor instantiation is completely mechanical to write. - -The domain lexicon instance requires some knowledge of the words of the -language: -- what words are used for which concepts -- how the words are -- features such as genders - - -#NEW - -===Example: adding Finnish=== - -Lexicon instance -``` - instance LexFoodsFin of LexFoods = open SyntaxFin, ParadigmsFin in { - oper - wine_N = mkN "viini" ; - pizza_N = mkN "pizza" ; - cheese_N = mkN "juusto" ; - fish_N = mkN "kala" ; - fresh_A = mkA "tuore" ; - warm_A = mkA "lmmin" ; - italian_A = mkA "italialainen" ; - expensive_A = mkA "kallis" ; - delicious_A = mkA "herkullinen" ; - boring_A = mkA "tyls" ; - } -``` -Functor instantiation -``` - --# -path=.:../foods:present - - concrete FoodsFin of Foods = FoodsI with - (Syntax = SyntaxFin), - (LexFoods = LexFoodsFin) ; -``` - - -#NEW - -===A design pattern=== - -This can be seen as a //design pattern// for multilingual grammars: -``` - concrete DomainL* - - instance LexDomainL instance SyntaxL* - - incomplete concrete DomainI - / | \ - interface LexDomain abstract Domain interface Syntax* -``` -Modules marked with ``*`` are either given in the library, or trivial. - -Of the hand-written modules, only ``LexDomainL`` is language-dependent. - - -#NEW - -===Functors: exercises=== - -1. Compile and test ``FoodsGer``. - -2. Refactor ``FoodsEng`` into a functor instantiation. - -3. Instantiate the functor ``FoodsI`` to some language of -your choice. - -4. Design a small grammar that can be used for controlling -an MP3 player. The grammar should be able to recognize commands such -as //play this song//, with the following variations: -- verbs: //play//, //remove// -- objects: //song//, //artist// -- determiners: //this//, //the previous// -- verbs without arguments: //stop//, //pause// - - -The implementation goes in the following phases: -+ abstract syntax -+ (optional:) prototype string-based concrete syntax -+ functor over resource syntax and lexicon interface -+ lexicon instance for the first language -+ functor instantiation for the first language -+ lexicon instance for the second language -+ functor instantiation for the second language -+ ... - - - -#NEW - -==Restricted inheritance== - -===A problem with functors=== - -Problem: a functor only works when all languages use the resource ``Syntax`` -in the same way. - -Example (contrived): assume that English has -no word for ``Pizza``, but has to use the paraphrase //Italian pie//. -This is no longer a noun ``N``, but a complex phrase -in the category ``CN``. - -Possible solution: change interface the ``LexFoods`` with -``` - oper pizza_CN : CN ; -``` -Problem with this solution: -- we may end up changing the interface and the function with each new language -- we must every time also change the instances for the old languages to maintain - type correctness - - -#NEW - -===Restricted inheritance: include or exclude=== - -A module may inherit just a selection of names. - -Example: the ``FoodMarket`` example "Rsecarchitecture: -``` - abstract Foodmarket = Food, Fruit [Peach], Mushroom - [Agaric] -``` -Here, from ``Fruit`` we include ``Peach`` only, and from ``Mushroom`` -we exclude ``Agaric``. - -A concrete syntax of ``Foodmarket`` must make the analogous restrictions. - - -#NEW - -===The functor problem solved=== - -The English instantiation inherits the functor -implementation except for the constant ``Pizza``. This constant -is defined in the body instead: -``` - --# -path=.:../foods:present - - concrete FoodsEng of Foods = FoodsI - [Pizza] with - (Syntax = SyntaxEng), - (LexFoods = LexFoodsEng) ** - open SyntaxEng, ParadigmsEng in { - - lin Pizza = mkCN (mkA "Italian") (mkN "pie") ; - } -``` - - -#NEW - -==Grammar reuse== - -Abstract syntax modules can be used as interfaces, -and concrete syntaxes as their instances. - -The following correspondencies are then applied: -``` - cat C <---> oper C : Type - - fun f : A <---> oper f : A - - lincat C = T <---> oper C : Type = T - - lin f = t <---> oper f : A = t -``` - - - - -#NEW - -===Library exercises=== - -1. Find resource grammar terms for the following -English phrases (in the category ``Phr``). You can first try to -build the terms manually. - -//every man loves a woman// - -//this grammar speaks more than ten languages// - -//which languages aren't in the grammar// - -//which languages did you want to speak// - - -Then translate the phrases to other languages. - - -#NEW - -==Tenses== - -#Lsectense - -In ``Foods`` grammars, we have used the path -``` - --# -path=.:../foods -``` -The library subdirectory ``present`` is a restricted version -of the resource, with only present tense of verbs and sentences. - -By just changing the path, we get all tenses: -``` - --# -path=.:../foods:alltenses -``` -Now we can see all the tenses of phrases, by using the ``-all`` flag -in linearization: -``` - > gr | l -all - This wine is delicious - Is this wine delicious - This wine isn't delicious - Isn't this wine delicious - This wine is not delicious - Is this wine not delicious - This wine has been delicious - Has this wine been delicious - This wine hasn't been delicious - Hasn't this wine been delicious - This wine has not been delicious - Has this wine not been delicious - This wine was delicious - Was this wine delicious - This wine wasn't delicious - Wasn't this wine delicious - This wine was not delicious - Was this wine not delicious - This wine had been delicious - Had this wine been delicious - This wine hadn't been delicious - Hadn't this wine been delicious - This wine had not been delicious - Had this wine not been delicious - This wine will be delicious - Will this wine be delicious - This wine won't be delicious - Won't this wine be delicious - This wine will not be delicious - Will this wine not be delicious - This wine will have been delicious - Will this wine have been delicious - This wine won't have been delicious - Won't this wine have been delicious - This wine will not have been delicious - Will this wine not have been delicious - This wine would be delicious - Would this wine be delicious - This wine wouldn't be delicious - Wouldn't this wine be delicious - This wine would not be delicious - Would this wine not be delicious - This wine would have been delicious - Would this wine have been delicious - This wine wouldn't have been delicious - Wouldn't this wine have been delicious - This wine would not have been delicious - Would this wine not have been delicious -``` -We also see -- polarity (positive vs. negative) -- word order (direct vs. inverted) -- variation between contracted and full negation - - -The list is even longer in languages that have more -tenses and moods, e.g. the Romance languages. - - - -#NEW - -=Lesson 5: Refining semantics in abstract syntax= - -#Lchapsix - -Goals: -- include semantic conditions in grammars, by using - - **dependent types** - - **higher order abstract syntax** - - proof objects - - semantic definitions - -These concepts are inherited from **type theory** (more precisely: -constructive type theory, or Martin-Lf type theory). - -Type theory is the basis **logical frameworks**. - -GF = logical framework + concrete syntax. - - -#NEW - -==Dependent types== - -#Lsecsmarthouse - -Problem: to express **conditions of semantic well-formedness**. - -Example: a voice command system for a "smart house" wants to -eliminate meaningless commands. - -Thus we want to restrict particular actions to -particular devices - we can //dim a light//, but we cannot -//dim a fan//. - -The following example is borrowed from the -Regulus Book (Rayner & al. 2006). - -A simple example is a "smart house" system, which -defines voice commands for household appliances. - - -#NEW - -===A dependent type system=== - -Ontology: -- there are commands and device kinds -- for each kind of device, there are devices and actions -- a command concerns an action of some kind on a device of the same kind - - -Abstract syntax formalizing this: -``` - cat - Command ; - Kind ; - Device Kind ; -- argument type Kind - Action Kind ; - fun - CAction : (k : Kind) -> Action k -> Device k -> Command ; -``` -``Device`` and ``Action`` are both dependent types. - - -#NEW - -===Examples of devices and actions=== - -Assume the kinds ``light`` and ``fan``, -``` - light, fan : Kind ; - dim : Action light ; -``` -Given a kind, //k//, you can form the device //the k//. -``` - DKindOne : (k : Kind) -> Device k ; -- the light -``` -Now we can form the syntax tree -``` - CAction light dim (DKindOne light) -``` -but we cannot form the trees -``` - CAction light dim (DKindOne fan) - CAction fan dim (DKindOne light) - CAction fan dim (DKindOne fan) -``` - - -#NEW - -===Linearization and parsing with dependent types=== - -Concrete syntax does not know if a category is a dependent type. -``` - lincat Action = {s : Str} ; - lin CAction _ act dev = {s = act.s ++ dev.s} ; -``` -Notice that the ``Kind`` argument is suppressed in linearization. - -Parsing with dependent types is performed in two phases: -+ context-free parsing -+ filtering through type checker - - -By just doing the first phase, the ``kind`` argument is not found: -``` - > parse "dim the light" - CAction ? dim (DKindOne light) -``` -Moreover, type-incorrect commands are not rejected: -``` - > parse "dim the fan" - CAction ? dim (DKindOne fan) -``` -The term ``?`` is a **metavariable**, returned by the parser -for any subtree that is suppressed by a linearization rule. -These are the same kind of metavariables as were used #Rsecediting -to mark incomplete parts of trees in the syntax editor. - - - -#NEW - -===Solving metavariables=== - -Use the command ``put_tree = pt`` with the option ``-typecheck``: -``` - > parse "dim the light" | put_tree -typecheck - CAction light dim (DKindOne light) -``` -The ``typecheck`` process may fail, in which case an error message -is shown and no tree is returned: -``` - > parse "dim the fan" | put_tree -typecheck - - Error in tree UCommand (CAction ? 0 dim (DKindOne fan)) : - (? 0 <> fan) (? 0 <> light) -``` - - - - -#NEW - -==Polymorphism== - -#Lsecpolymorphic - -Sometimes an action can be performed on all kinds of devices. - -This is represented as a function that takes a ``Kind`` as an argument -and produce an ``Action`` for that ``Kind``: -``` - fun switchOn, switchOff : (k : Kind) -> Action k ; -``` -Functions of this kind are called **polymorphic**. - -We can use this kind of polymorphism in concrete syntax as well, -to express Haskell-type library functions: -``` - oper const :(a,b : Type) -> a -> b -> a = - \_,_,c,_ -> c ; - - oper flip : (a,b,c : Type) -> (a -> b ->c) -> b -> a -> c = - \_,_,_,f,x,y -> f y x ; -``` - - -#NEW - -===Dependent types: exercises=== - -1. Write an abstract syntax module with above contents -and an appropriate English concrete syntax. Try to parse the commands -//dim the light// and //dim the fan//, with and without ``solve`` filtering. - - -2. Perform random and exhaustive generation, with and without -``solve`` filtering. - -3. Add some device kinds and actions to the grammar. - - - -#NEW - -==Proof objects== - -**Curry-Howard isomorphism** = **propositions as types principle**: -a proposition is a type of proofs (= proof objects). - -Example: define the //less than// proposition for natural numbers, -``` - cat Nat ; - fun Zero : Nat ; - fun Succ : Nat -> Nat ; -``` -Define inductively what it means for a number //x// to be //less than// -a number //y//: -- ``Zero`` is less than ``Succ`` //y// for any //y//. -- If //x// is less than //y//, then ``Succ`` //x// is less than ``Succ`` //y//. - - -Expressing these axioms in type theory -with a dependent type ``Less`` //x y// and two functions constructing -its objects: -``` - cat Less Nat Nat ; - fun lessZ : (y : Nat) -> Less Zero (Succ y) ; - fun lessS : (x,y : Nat) -> Less x y -> Less (Succ x) (Succ y) ; -``` -Example: the fact that 2 is less that 4 has the proof object -``` - lessS (Succ Zero) (Succ (Succ (Succ Zero))) - (lessS Zero (Succ (Succ Zero)) (lessZ (Succ Zero))) - : Less (Succ (Succ Zero)) (Succ (Succ (Succ (Succ Zero)))) -``` - - - -#NEW - -===Proof-carrying documents=== - -Idea: to be semantically well-formed, the abstract syntax of a document -must contain a proof of some property, -although the proof is not shown in the concrete document. - -Example: documents describing flight connections: - -//To fly from Gothenburg to Prague, first take LH3043 to Frankfurt, then OK0537 to Prague.// - -The well-formedness of this text is partly expressible by dependent typing: -``` - cat - City ; - Flight City City ; - fun - Gothenburg, Frankfurt, Prague : City ; - LH3043 : Flight Gothenburg Frankfurt ; - OK0537 : Flight Frankfurt Prague ; -``` -To extend the conditions to flight connections, we introduce a category -of proofs that a change is possible: -``` - cat IsPossible (x,y,z : City)(Flight x y)(Flight y z) ; -``` -A legal connection is formed by the function -``` - fun Connect : (x,y,z : City) -> - (u : Flight x y) -> (v : Flight y z) -> - IsPossible x y z u v -> Flight x z ; -``` - - -#NEW - -==Restricted polymorphism== - -Above, all Actions were either of -- **monomorphic**: defined for one Kind -- **polymorphic**: defined for all Kinds - - -To make this scale up for new Kinds, we can refine this to -**restricted polymorphism**: defined for Kinds of a certain **class** - - -The notion of class uses the Curry-Howard isomorphism as follows: -- a class is a **predicate** of Kinds --- i.e. a type depending of Kinds -- a Kind is in a class if there is a proof object of this type - - -#NEW - -===Example: classes for switching and dimming=== - -We modify the smart house grammar: -``` -cat - Switchable Kind ; - Dimmable Kind ; -fun - switchable_light : Switchable light ; - switchable_fan : Switchable fan ; - dimmable_light : Dimmable light ; - - switchOn : (k : Kind) -> Switchable k -> Action k ; - dim : (k : Kind) -> Dimmable k -> Action k ; -``` -Classes for new actions can be added incrementally. - - - -#NEW - -==Variable bindings== - -#Lsecbinding - -Mathematical notation and programming languages have -expressions that **bind** variables. - -Example: universal quantifier formula -``` - (All x)B(x) -``` -The variable ``x`` has a **binding** ``(All x)``, and -occurs **bound** in the **body** ``B(x)``. - -Examples from informal mathematical language: -``` - for all x, x is equal to x - - the function that for any numbers x and y returns the maximum of x+y - and x*y - - Let x be a natural number. Assume that x is even. Then x + 3 is odd. -``` - - - -#NEW - -===Higher-order abstract syntax=== - -Abstract syntax can use functions as arguments: -``` - cat Ind ; Prop ; - fun All : (Ind -> Prop) -> Prop -``` -where ``Ind`` is the type of individuals and ``Prop``, -the type of propositions. - -Let us add an equality predicate -``` - fun Eq : Ind -> Ind -> Prop -``` -Now we can form the tree -``` - All (\x -> Eq x x) -``` -which we want to relate to the ordinary notation -``` - (All x)(x = x) -``` -In **higher-order abstract syntax** (HOAS), all variable bindings are -expressed using higher-order syntactic constructors. - - -#NEW - -===Higher-order abstract syntax: linearization=== - -HOAS has proved to be useful in the semantics and computer implementation of -variable-binding expressions. - -How do we relate HOAS to the concrete syntax? - -In GF, we write -``` - fun All : (Ind -> Prop) -> Prop - lin All B = {s = "(" ++ "All" ++ B.$0 ++ ")" ++ B.s} -``` -General rule: if an argument type of a ``fun`` function is -a function type ``A -> C``, the linearization type of -this argument is the linearization type of ``C`` -together with a new field ``$0 : Str``. - -The argument ``B`` thus has the linearization type -``` - {s : Str ; $0 : Str}, -``` -If there are more bindings, we add ``$1``, ``$2``, etc. - - -#NEW - -===Eta expansion=== - -To make sense of linearization, syntax trees must be -**eta-expanded**: for any function of type -``` - A -> B -``` -an eta-expanded syntax tree has the form -``` - \x -> b -``` -where ``b : B`` under the assumption ``x : A``. - -Given the linearization rule -``` - lin Eq a b = {s = "(" ++ a.s ++ "=" ++ b.s ++ ")"} -``` -the linearization of the tree -``` - \x -> Eq x x -``` -is the record -``` - {$0 = "x", s = ["( x = x )"]} -``` -Then we can compute the linearization of the formula, -``` - All (\x -> Eq x x) --> {s = "[( All x ) ( x = x )]"}. -``` -The linearization of the variable ``x`` is, -"automagically", the string ``"x"``. - - - -#NEW - -===Parsing variable bindings=== - -GF can treat any one-word string as a variable symbol. -``` - > p -cat=Prop "( All x ) ( x = x )" - All (\x -> Eq x x) -``` -Variables must be bound if they are used: -``` - > p -cat=Prop "( All x ) ( x = y )" - no tree found -``` - - - - -#NEW - -===Exercises on variable bindings=== - -1. Write an abstract syntax of the whole -**predicate calculus**, with the -**connectives** "and", "or", "implies", and "not", and the -**quantifiers** "exists" and "for all". Use higher-order functions -to guarantee that unbounded variables do not occur. - -2. Write a concrete syntax for your favourite -notation of predicate calculus. Use Latex as target language -if you want nice output. You can also try producing boolean -expressions of some programming language. Use as many parenthesis as you need to -guarantee non-ambiguity. - - -#NEW - -==Semantic definitions== - -#Lsecdefdef - -The ``fun`` judgements of GF are declarations of functions, giving their types. - -Can we **compute** ``fun`` functions? - -Mostly we are not interested, since functions are seen as constructors, -i.e. data forms - as usual with -``` - fun Zero : Nat ; - fun Succ : Nat -> Nat ; -``` -But it is also possible to give **semantic definitions** to functions. -The key word is ``def``: -``` - fun one : Nat ; - def one = Succ Zero ; - - fun twice : Nat -> Nat ; - def twice x = plus x x ; - - fun plus : Nat -> Nat -> Nat ; - def - plus x Zero = x ; - plus x (Succ y) = Succ (Sum x y) ; -``` - -#NEW - -===Computing a tree=== - -Computation: follow a chain of definition until no definition -can be applied, -``` - plus one one --> - plus (Succ Zero) (Succ Zero) --> - Succ (plus (Succ Zero) Zero) --> - Succ (Succ Zero) -``` -Computation in GF is performed with the ``put_term`` command and the -``compute`` transformation, e.g. -``` - > parse -tr "1 + 1" | put_term -transform=compute -tr | l - plus one one - Succ (Succ Zero) - s(s(0)) -``` - - -#NEW - -===Definitional equality=== - -Two trees are definitionally equal if they compute into the same tree. - -Definitional equality does not guarantee sameness of linearization: -``` - plus one one ===> 1 + 1 - Succ (Succ Zero) ===> s(s(0)) -``` -The main use of this concept is in type checking: sameness of types. - -Thus e.g. the following types are equal -``` - Less Zero one - Less Zero (Succ Zero)) -``` -so that an object of one also is an object of the other. - - - -#NEW - -===Judgement forms for constructors=== - -The judgement form ``data`` tells that a category has -certain functions as constructors: -``` - data Nat = Succ | Zero ; -``` -The type signatures of constructors are given separately, -``` - fun Zero : Nat ; - fun Succ : Nat -> Nat ; -``` -There is also a shorthand: -``` - data Succ : Nat -> Nat ; === fun Succ : Nat -> Nat ; - data Nat = Succ ; -``` -Notice: in ``def`` definitions, identifier patterns not -marked as ``data`` will be treated as variables. - - -#NEW - -===Exercises on semantic definitions=== - -1. Implement an interpreter of a small functional programming -language with natural numbers, lists, pairs, lambdas, etc. Use higher-order -abstract syntax with semantic definitions. As concrete syntax, use -your favourite programming language. - -2. There is no termination checking for ``def`` definitions. -Construct an examples that makes type checking loop. -Type checking can be invoked with ``put_term -transform=solve``. - - - -#NEW - -==Lesson 6: Grammars of formal languages== - - -#Lchapseven - -Goals: -- write grammars for formal languages (mathematical notation, programming languages) -- interface between formal and natural langauges -- implement a compiler by using GF - - -#NEW - -===Arithmetic expressions=== - -We construct a calculator with addition, subtraction, multiplication, and -division of integers. -``` - abstract Calculator = { - - cat Exp ; - - fun - EPlus, EMinus, ETimes, EDiv : Exp -> Exp -> Exp ; - EInt : Int -> Exp ; - } -``` -The category ``Int`` is a built-in category of -integers. Its syntax trees **integer literals**, i.e. -sequences of digits: -``` - 5457455814608954681 : Int -``` -These are the only objects of type ``Int``: -grammars are not allowed to declare functions with ``Int`` as value type. - - -#NEW - -===Concrete syntax: a simple approach=== - -We begin with a -concrete syntax that always uses parentheses around binary -operator applications: -``` - concrete CalculatorP of Calculator = { - - lincat - Exp = SS ; - lin - EPlus = infix "+" ; - EMinus = infix "-" ; - ETimes = infix "*" ; - EDiv = infix "/" ; - EInt i = i ; - - oper - infix : Str -> SS -> SS -> SS = \f,x,y -> - ss ("(" ++ x.s ++ f ++ y.s ++ ")") ; - } -``` -Now we have -``` - > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) - ( 2 + ( 3 * 4 ) ) -``` -First problems: -- to get rid of superfluous spaces and -- to recognize integer literals in the parser - - -#NEW - -==Lexing and unlexing== - -#Lseclexing - -The input of parsing in GF is not just a string, but a list of -**tokens**, returned by a **lexer**. - -The default lexer in GF returns chunks separated by spaces: -``` - "(12 + (3 * 4))" ===> "(12", "+", "(3". "*". "4))" -``` -The proper way would be -``` - "(", "12", "+", "(", "3", "*", "4", ")", ")" -``` -Moreover, the tokens ``"12"``, ``"3"``, and ``"4"`` should be recognized as -integer literals - they cannot be found in the grammar. - - -#NEW - -Lexers are invoked by flags to the command ``put_string = ps``. -``` - > put_string -lexcode "(2 + (3 * 4))" - ( 2 + ( 3 * 4 ) ) -``` -This can be piped into a parser, as usual: -``` - > ps -lexcode "(2 + (3 * 4))" | parse - EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) -``` -In linearization, we use a corresponding **unlexer**: -``` - > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) | ps -unlexcode - (2 + (3 * 4)) -``` - - -#NEW - -===Most common lexers and unlexers=== - - || lexer | unlexer | description || - | ``chars`` | ``unchars`` | each character is a token - | ``lexcode`` | ``unlexcode`` | program code conventions (uses Haskell's lex) - | ``lexmixed`` | ``unlexmixed`` | like text, but between $ signs like code - | ``lextext`` | ``unlextext`` | with conventions on punctuation and capitals - | ``words`` | ``unwords`` | (default) tokens separated by space characters - -%TODO: also on alphabet encodings - although somewhere else - - -#NEW - -==Precedence and fixity== - -Arithmetic expressions should be unambiguous. If we write -``` - 2 + 3 * 4 -``` -it should be parsed as one, but not both, of -``` - EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) - ETimes (EPlus (EInt 2) (EInt 3)) (EInt 4) -``` -We choose the former tree, because -multiplication has **higher precedence** than addition. - -To express the latter tree, we have to use parentheses: -``` - (2 + 3) * 4 -``` -The usual precedence rules: -- Integer constants and expressions in parentheses have the highest precedence. -- Multiplication and division have equal precedence, lower than the highest - but higher than addition and subtraction, which are again equal. -- All the four binary operations are **left-associative**: - ``1 + 2 + 3`` means the same as ``(1 + 2) + 3``. - - - -#NEW - -===Precedence as a parameter=== - -Precedence can be made into an inherent feature of expressions: -``` - oper - Prec : PType = Ints 2 ; - TermPrec : Type = {s : Str ; p : Prec} ; - - mkPrec : Prec -> Str -> TermPrec = \p,s -> {s = s ; p = p} ; - - lincat - Exp = TermPrec ; -``` -Notice ``Ints 2``: a parameter type, whose values are the integers -``0,1,2``. - -Using precedence levels: compare the inherent precedence of an -expression with the expected precedence. -- if the inherent precedence is lower than the expected precedence, - use parentheses -- otherwise, no parentheses are needed - - -This idea is encoded in the operation -``` - oper usePrec : TermPrec -> Prec -> Str = \x,p -> - case lessPrec x.p p of { - True => "(" x.s ")" ; - False => x.s - } ; -``` -(We use ``lessPrec`` from ``lib/prelude/Formal``.) - - - -#NEW - -===Fixities=== - -We can define left-associative infix expressions: -``` - infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y -> - mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; -``` -Constant-like expressions (the highest level): -``` - constant : Str -> TermPrec = mkPrec 2 ; -``` -All these operations can be found in ``lib/prelude/Formal``, -which has 5 levels. - -Now we can write the whole concrete syntax of ``Calculator`` compactly: -``` - concrete CalculatorC of Calculator = open Formal, Prelude in { - - flags lexer = codelit ; unlexer = code ; startcat = Exp ; - - lincat Exp = TermPrec ; - - lin - EPlus = infixl 0 "+" ; - EMinus = infixl 0 "-" ; - ETimes = infixl 1 "*" ; - EDiv = infixl 1 "/" ; - EInt i = constant i.s ; - } -``` - - -#NEW - -===Exercises on precedence=== - -1. Define non-associative and right-associative infix operations -analogous to ``infixl``. - -2. Add a constructor that puts parentheses around expressions -to raise their precedence, but that is eliminated by a ``def`` definition. -Test parsing with and without a pipe to ``pt -transform=compute``. - - - -#NEW - -==Code generation as linearization== - -Translate arithmetic (infix) to JVM (postfix): -``` - 2 + 3 * 4 - - ===> - - iconst 2 : iconst 3 ; iconst 4 ; imul ; iadd -``` -Just give linearization rules for JVM: -``` - lin - EPlus = postfix "iadd" ; - EMinus = postfix "isub" ; - ETimes = postfix "imul" ; - EDiv = postfix "idiv" ; - EInt i = ss ("iconst" ++ i.s) ; - oper - postfix : Str -> SS -> SS -> SS = \op,x,y -> - ss (x.s ++ ";" ++ y.s ++ ";" ++ op) ; -``` - - -#NEW - -===Programs with variables=== - -A **straight code** programming language, with -**initializations** and **assignments**: -``` - int x = 2 + 3 ; - int y = x + 1 ; - x = x + 9 * y ; -``` -We define programs by the following constructors: -``` - fun - PEmpty : Prog ; - PInit : Exp -> (Var -> Prog) -> Prog ; - PAss : Var -> Exp -> Prog -> Prog ; -``` -``PInit`` uses higher-order abstract syntax for making the -initialized variable available in the **continuation** of the program. - -The abstract syntax tree for the above code is -``` - PInit (EPlus (EInt 2) (EInt 3)) (\x -> - PInit (EPlus (EVar x) (EInt 1)) (\y -> - PAss x (EPlus (EVar x) (ETimes (EInt 9) (EVar y))) - PEmpty)) -``` -No uninitialized variables are allowed - there are no constructors for ``Var``! -But we do have the rule -``` - fun EVar : Var -> Exp ; -``` -The rest of the grammar is just the same as for arithmetic expressions -#Rsecprecedence. The best way to implement it is perhaps by writing a -module that extends the expression module. The most natural start category -of the extension is ``Prog``. - - -#NEW - -===Exercises on code generation=== - -1. Define a C-like concrete syntax of the straight-code language. - -2. Extend the straight-code language to expressions of type ``float``. -To guarantee type safety, you can define a category ``Typ`` of types, and -make ``Exp`` and ``Var`` dependent on ``Typ``. Basic floating point expressions -can be formed from literal of the built-in GF type ``Float``. The arithmetic -operations should be made polymorphic (as #Rsecpolymorphic). - -3. Extend JVM generation to the straight-code language, using -two more instructions -- ``iload`` //x//, which loads the value of the variable //x// -- ``istore`` //x// which stores a value to the variable //x// - - -Thus the code for the example in the previous section is -``` - iconst 2 ; iconst 3 ; iadd ; istore x ; - iload x ; iconst 1 ; iadd ; istore y ; - iload x ; iconst 9 ; iload y ; imul ; iadd ; istore x ; -``` - -4. If you made the exercise of adding floating point numbers to -the language, you can now cash out the main advantage of type checking -for code generation: selecting type-correct JVM instructions. The floating -point instructions are precisely the same as the integer one, except that -the prefix is ``f`` instead of ``i``, and that ``fconst`` takes floating -point literals as arguments. - - - -#NEW - -=Lesson 7: Embedded grammars= - -#Lchapeight - -Goals: -- use grammars as parts of programs written in Haskell and JavaScript -- implement stand-alone question-answering systems and translators based on - GF grammars -- generate language models for speech recognition from GF grammars - - - -#NEW - -==Functionalities of an embedded grammar format== - -GF grammars can be used as parts of programs written in other programming -languages, to be called **host languages**. -This facility is based on several components: -- PGF: a portable format for multilingual GF grammars -- a PGF interpreter written in the host language -- a library in the host language that enables calling the interpreter -- a way to manipulate abstract syntax trees in the host language - - - - -#NEW - -==The portable grammar format== - -The portable format is called PGF, "Portable Grammar Format". - -This format is produced by the GF batch compiler ``gfc``, -executable from the operative system shell: -``` - % gfc --make SOURCE.gf -``` -PGF is the recommended format in -which final grammar products are distributed, because they -are stripped from superfluous information and can be started and applied -faster than sets of separate modules. - -Application programmers have never any need to read or modify PGF files. - -PGF thus plays the same role as machine code in -general-purpose programming (or bytecode in Java). - - -#NEW - -===Haskell: the EmbedAPI module=== - -The Haskell API contains (among other things) the following types and functions: -``` - readPGF :: FilePath -> IO PGF - - linearize :: PGF -> Language -> Tree -> String - parse :: PGF -> Language -> Category -> String -> [Tree] - - linearizeAll :: PGF -> Tree -> [String] - linearizeAllLang :: PGF -> Tree -> [(Language,String)] - - parseAll :: PGF -> Category -> String -> [[Tree]] - parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] - - languages :: PGF -> [Language] - categories :: PGF -> [Category] - startCat :: PGF -> Category -``` -This is the only module that needs to be imported in the Haskell application. -It is available as a part of the GF distribution, in the file -``src/PGF.hs``. - - - -#NEW - -===First application: a translator=== - -Let us first build a stand-alone translator, which can translate -in any multilingual grammar between any languages in the grammar. -``` -module Main where - -import PGF -import System (getArgs) - -main :: IO () -main = do - file:_ <- getArgs - gr <- readPGF file - interact (translate gr) - -translate :: PGF -> String -> String -translate gr s = case parseAllLang gr (startCat gr) s of - (lg,t:_):_ -> unlines [linearize gr l t | l <- languages gr, l /= lg] - _ -> "NO PARSE" -``` -To run the translator, first compile it by -``` - % ghc --make -o trans Translator.hs -``` -For this, you need the Haskell compiler [GHC http://www.haskell.org/ghc]. - - -#NEW - -===Producing GFCC for the translator=== - -Then produce a GFCC file. For instance, the ``Food`` grammar set can be -compiled as follows: -``` - % gfc --make FoodEng.gf FoodIta.gf -``` -This produces the file ``Food.pgf`` (its name comes from the abstract syntax). - -The Haskell library function ``interact`` makes the ``trans`` program work -like a Unix filter, which reads from standard input and writes to standard -output. Therefore it can be a part of a pipe and read and write files. -The simplest way to translate is to ``echo`` input to the program: -``` - % echo "this wine is delicious" | ./trans Food.pgf - questo vino delizioso -``` -The result is given in all languages except the input language. - -%TODO convert the output to UTF8 - - -#NEW - -===A translator loop=== - -To avoid starting the translator over and over again: -change ``interact`` in the main function to ``loop``, defined as -follows: -``` -loop :: (String -> String) -> IO () -loop trans = do - s <- getLine - if s == "quit" then putStrLn "bye" else do - putStrLn $ trans s - loop trans -``` -The loop keeps on translating line by line until the input line -is ``quit``. - - - -#NEW - -===A question-answer system=== - -#Lsecmathprogram - -The next application is also a translator, but it adds a -**transfer** component - a function that transforms syntax trees. - -The transfer function we use is one that computes a question into an answer. - -The program accepts simple questions about arithmetic and answers -"yes" or "no" in the language in which the question was made: -``` - Is 123 prime? - No. - 77 est impair ? - Oui. -``` -We change the pure translator by giving -the ``translate`` function the transfer as an extra argument: -``` - translate :: (Tree -> Tree) -> PGF -> String -> String -``` -Ordinary translation as a special case where -transfer is the identity function (``id`` in Haskell). - -To reply in the //same// language as the question: -``` - translate tr gr = case parseAllLang gr (startCat gr) s of - (lg,t:_):_ -> linearize gr lg (tr t) - _ -> "NO PARSE" -``` - - -#NEW - -===Abstract syntax of the query system=== - -Input: abstract syntax judgements -``` -abstract Query = { - - flags startcat=Question ; - - cat - Answer ; Question ; Object ; - - fun - Even : Object -> Question ; - Odd : Object -> Question ; - Prime : Object -> Question ; - Number : Int -> Object ; - - Yes : Answer ; - No : Answer ; -} -``` - - -#NEW - -===Exporting GF datatypes to Haskell=== - -To make it easy to define a transfer function, we export the -abstract syntax to a system of Haskell datatypes: -``` - % gfc --output-format=haskell Query.pgf -``` -It is also possible to produce the Haskell file together with GFCC, by -``` - % gfc --make --output-format=haskell QueryEng.gf -``` -The result is a file named ``Query.hs``, containing a -module named ``Query``. - - -#NEW - -Output: Haskell definitions -``` -module Query where -import PGF - -data GAnswer = - GYes - | GNo - -data GObject = GNumber GInt - -data GQuestion = - GPrime GObject - | GOdd GObject - | GEven GObject - -newtype GInt = GInt Integer -``` -All type and constructor names are prefixed with a ``G`` to prevent clashes. - -The Haskell module name is the same as the abstract syntax name. - - -#NEW - -===The question-answer function=== - -Haskell's type checker guarantees that the functions are well-typed also with -respect to GF. -``` -answer :: GQuestion -> GAnswer -answer p = case p of - GOdd x -> test odd x - GEven x -> test even x - GPrime x -> test prime x - -value :: GObject -> Int -value e = case e of - GNumber (GInt i) -> fromInteger i - -test :: (Int -> Bool) -> GObject -> GAnswer -test f x = if f (value x) then GYes else GNo -``` - - -#NEW - -===Converting between Haskell and GF trees=== - -The generated Haskell module also contains -``` -class Gf a where - gf :: a -> Tree - fg :: Tree -> a - -instance Gf GQuestion where - gf (GEven x1) = DTr [] (AC (CId "Even")) [gf x1] - gf (GOdd x1) = DTr [] (AC (CId "Odd")) [gf x1] - gf (GPrime x1) = DTr [] (AC (CId "Prime")) [gf x1] - fg t = - case t of - DTr [] (AC (CId "Even")) [x1] -> GEven (fg x1) - DTr [] (AC (CId "Odd")) [x1] -> GOdd (fg x1) - DTr [] (AC (CId "Prime")) [x1] -> GPrime (fg x1) - _ -> error ("no Question " ++ show t) -``` -For the programmer, it is enougo to know: -- all GF names are in Haskell prefixed with ``G`` -- ``gf`` translates from Haskell objects to GF trees -- ``fg`` translates from GF trees to Haskell objects - - - -#NEW - -===Putting it all together: the transfer definition=== - -``` -module TransferDef where - -import PGF (Tree) -import Query -- generated from GF - -transfer :: Tree -> Tree -transfer = gf . answer . fg - -answer :: GQuestion -> GAnswer -answer p = case p of - GOdd x -> test odd x - GEven x -> test even x - GPrime x -> test prime x - -value :: GObject -> Int -value e = case e of - GNumber (GInt i) -> fromInteger i - -test :: (Int -> Bool) -> GObject -> GAnswer -test f x = if f (value x) then GYes else GNo - -prime :: Int -> Bool -prime x = elem x primes where - primes = sieve [2 .. x] - sieve (p:xs) = p : sieve [ n | n <- xs, n `mod` p > 0 ] - sieve [] = [] -``` - - -#NEW - -===Putting it all together: the Main module=== - -Here is the complete code in the Haskell file ``TransferLoop.hs``. -``` -module Main where - -import PGF -import TransferDef (transfer) - -main :: IO () -main = do - gr <- readPGF "Query.pgf" - loop (translate transfer gr) - -loop :: (String -> String) -> IO () -loop trans = do - s <- getLine - if s == "quit" then putStrLn "bye" else do - putStrLn $ trans s - loop trans - -translate :: (Tree -> Tree) -> PGF -> String -> String -translate tr gr s = case parseAllLang gr (startCat gr) s of - (lg,t:_):_ -> linearize gr lg (tr t) - _ -> "NO PARSE" -``` - - - -#NEW - -===Putting it all together: the Makefile=== - -To automate the production of the system, we write a ``Makefile`` as follows: -``` -all: - gfc --make --output-format=haskell QueryEng - ghc --make -o ./math TransferLoop.hs - strip math -``` -(The empty segments starting the command lines in a Makefile must be tabs.) -Now we can compile the whole system by just typing -``` - make -``` -Then you can run it by typing -``` - ./math -``` -Just to summarize, the source of the application consists of the following files: -``` - Makefile -- a makefile - Math.gf -- abstract syntax - Math???.gf -- concrete syntaxes - TransferDef.hs -- definition of question-to-answer function - TransferLoop.hs -- Haskell Main module -``` - -#NEW - -==Web server applications== - -PGF files can be used in web servers, for which there is a Haskell library included -in ``src/server/``. How to build a server for tasks like translators is explained -in the [``README`` ../src/server/README] file in that directory. - -One of the servers that can be readily built with the library (without any -programming required) is **fridge poetry magnets**. It is an application that -uses an incremental parser to suggest grammatically correct next words. Here -is an example of its application to the ``Foods`` grammars. - -[food-magnet.png] - - -#NEW - -==JavaScript applications== - -JavaScript is a programming language that has interpreters built in in most -web browsers. It is therefore usable for client side web programs, which can even -be run without access to the internet. The following figure shows a JavaScript -program compiled from GF grammars as run on an iPhone. - -[iphone.jpg] - - -#NEW - -===Compiling to JavaScript=== - -JavaScript is one of the output formats of the GF batch compiler. Thus the following -command generates a JavaScript file from two ``Food`` grammars. -``` - % gfc --make --output-format=js FoodEng.gf FoodIta.gf -``` -The name of the generated file is ``Food.js``, derived from the top-most abstract -syntax name. This file contains the multilingual grammar as a JavaScript object. - - -#NEW - -===Using the JavaScript grammar=== - -To perform parsing and linearization, the run-time library -``gflib.js`` is used. It is included in ``GF/lib/javascript/``, together with -some other JavaScript and HTML files; these files can be used -as templates for building applications. - -An example of usage is -[``translator.html`` ../lib/javascript/translator.html], -which is in fact initialized with -a pointer to the Food grammar, so that it provides translation between the English -and Italian grammars: - -[food-js.png] - -The grammar must have the name ``grammar.js``. The abstract syntax and start -category names in ``translator.html`` must match the ones in the grammar. -With these changes, the translator works for any multilingual grammar. - - - - - -#NEW - -==Language models for speech recognition== - -The standard way of using GF in speech recognition is by building -**grammar-based language models**. - -GF supports several formats, including -GSL, the formatused in the [Nuance speech recognizer www.nuance.com]. - -GSL is produced from GF by running ``gfc`` with the flag -``--output-format=gsl``. - -Example: GSL generated from ``FoodsEng.gf``. -``` - % gfc --make --output-format=gsl FoodsEng.gf - % more FoodsEng.gsl - - ;GSL2.0 - ; Nuance speech recognition grammar for FoodsEng - ; Generated by GF - - .MAIN Phrase_cat - - Item_1 [("that" Kind_1) ("this" Kind_1)] - Item_2 [("these" Kind_2) ("those" Kind_2)] - Item_cat [Item_1 Item_2] - Kind_1 ["cheese" "fish" "pizza" (Quality_1 Kind_1) - "wine"] - Kind_2 ["cheeses" "fish" "pizzas" - (Quality_1 Kind_2) "wines"] - Kind_cat [Kind_1 Kind_2] - Phrase_1 [(Item_1 "is" Quality_1) - (Item_2 "are" Quality_1)] - Phrase_cat Phrase_1 - - Quality_1 ["boring" "delicious" "expensive" - "fresh" "italian" ("very" Quality_1) "warm"] - Quality_cat Quality_1 -``` - - -#NEW - -===More speech recognition grammar formats=== - -Other formats available via the ``--output-format`` flag include: - - || Format | Description || - | ``gsl`` | Nuance GSL speech recognition grammar - | ``jsgf`` | Java Speech Grammar Format (JSGF) - | ``jsgf_sisr_old`` | JSGF with semantic tags in SISR WD 20030401 format - | ``srgs_abnf`` | SRGS ABNF format - | ``srgs_xml`` | SRGS XML format - | ``srgs_xml_prob`` | SRGS XML format, with weights - | ``slf`` | finite automaton in the HTK SLF format - | ``slf_sub`` | finite automaton with sub-automata in HTK SLF - -All currently available formats can be seen with ``gfc --help``. - - diff --git a/doc/gf3-release.html b/doc/gf3-release.html deleted file mode 100644 index 75557c94a..000000000 --- a/doc/gf3-release.html +++ /dev/null @@ -1,73 +0,0 @@ - - - - -GF 3.0 - -

GF 3.0

- -Krasimir Angelov, Bjrn Bringert, and Aarne Ranta
-Beta release, 27 June 2008 -
- -

-GF Version 3.0 is a major revision of GF. The source language is a superset of the -language in 2.9, which means backward compatibility. But the target languages, the -compiler implementation, and the functionalities (e.g. the shell) have undergone -radical changes. -

-

New features

-

-Here is a summary of the main novelties visible to the user: -

-
    -
  • Size: the source code and the executable binary size have gone - down to about the half of 2.9. -
  • Portability: the new back end format PGF (Portable Grammar Format) is - much simpler than the old GFC format, and therefore easier to port to new - platforms. -
  • Multilingual web page support: as an example of portability, GF 3.0 provides a - compiler from PGF to JavaScript. There are also JavaScript libraries for creating - translators and syntax editors as client-side web applications. -
  • Incremental parsing: there is a possibility of word completion when - input strings are sent to the parser. -
  • Application programmer's interfaces: both source-GF and PGF formats, - the shell, and the compiler are accessible via high-level APIs. -
  • Resource library version 1.4: more coverage, more languages; some of - the new GF language features are exploited. -
  • Uniform character encoding: UTF8 in generated files, user-definable in - source files -
- -

Non-supported features

-

-There are some features of GF 2.9 that will not work in the 3.0 beta release. -

-
    -
  • Java Editor GUI: we now see the JavaScript editor as the main form of - syntax editing. -
  • Pre-module multi-file grammar format: the grammar format of GF before version 2.0 - is still not yet supported. -
  • Context-free and EBNF input grammar formats. -
  • Probabilistic GF grammars. -
  • Some output formats: LBNF. -
  • Some GF shell commands: while the main ones will be supported with their familiar - syntax and options, some old commands have not been included. The GF shell - command help -changes gives the actual list. -
- -

-Users who want to have these features are welcome to contact us, -and even more welcome to contribute code that restores them! -

-

GF language extensions

-

-Operations for defining patterns. -

-

-Inheritance of overload groups. -

- - - - diff --git a/doc/gf3-release.txt b/doc/gf3-release.txt deleted file mode 100644 index 631752c90..000000000 --- a/doc/gf3-release.txt +++ /dev/null @@ -1,58 +0,0 @@ -GF 3.0 -Krasimir Angelov, Bjrn Bringert, and Aarne Ranta -Beta release, 27 June 2008 - - -GF Version 3.0 is a major revision of GF. The source language is a superset of the -language in 2.9, which means backward compatibility. But the target languages, the -compiler implementation, and the functionalities (e.g. the shell) have undergone -radical changes. - - -==New features== - -Here is a summary of the main novelties visible to the user: -- **Size**: the source code and the executable binary size have gone - down to about the half of 2.9. -- **Portability**: the new back end format PGF (Portable Grammar Format) is - much simpler than the old GFC format, and therefore easier to port to new - platforms. -- **Multilingual web page support**: as an example of portability, GF 3.0 provides a - compiler from PGF to JavaScript. There are also JavaScript libraries for creating - translators and syntax editors as client-side web applications. -- **Incremental parsing**: there is a possibility of word completion when - input strings are sent to the parser. -- **Application programmer's interfaces**: both source-GF and PGF formats, - the shell, and the compiler are accessible via high-level APIs. -- **Resource library version 1.4**: more coverage, more languages; some of - the new GF language features are exploited. -- **Uniform character encoding**: UTF8 in generated files, user-definable in - source files - - -==Non-supported features== - -There are some features of GF 2.9 that will //not// work in the 3.0 beta release. -- Java Editor GUI: we now see the JavaScript editor as the main form of - syntax editing. -- Pre-module multi-file grammar format: the grammar format of GF before version 2.0 - is still not yet supported. -- Context-free and EBNF input grammar formats. -- Probabilistic GF grammars. -- Some output formats: LBNF. -- Some GF shell commands: while the main ones will be supported with their familiar - syntax and options, some old commands have not been included. The GF shell - command ``help -changes`` gives the actual list. - - -Users who want to have these features are welcome to contact us, -and even more welcome to contribute code that restores them! - - -==GF language extensions== - -Operations for defining patterns. - -Inheritance of overload groups. - - diff --git a/doc/index.html b/doc/index.html index e4aa842ff..f6bbf7f1a 100644 --- a/doc/index.html +++ b/doc/index.html @@ -13,28 +13,20 @@

Grammatical Framework Documents

-Top-3 documents: -Tutorial +Top-5 documents: -| - -ReferenceManual - -| - -LibrarySynopsis +Quick start instruction. +Old Tutorial, application-oriented. -

Tutorials

+New Tutorial, linguistics-oriented. -Quick start instruction. +ReferenceManual. -

+LibrarySynopsis. -GF Tutorial, -Now up-to-date for GF version 2.9. Covers all of GF. @@ -49,144 +41,13 @@ in a summary format. GF Reference Manual. A full-scale reference manual of the GF language. -

- - -User Manual explaining the GF user interfaces and command language (slightly -outdated). - -

- -Editor User Manual -on editing in the Java interface. - -

- -Chart of GF grammar compiler phases. - - - -

Grammar library documentation

- -Resource Grammar Tutorial Chapter. - -

- -Resource Grammar Synopsis -for library users. With APIs and use examples. - -

- - -Resource Grammar HOWTO -for library authors. - - - - -

Embedding GF grammars in computer programs

- -Embedded Grammar Tutorial Chapter. - -

- - -Embedded GF Interpreter manual for using GF grammars in Java programs. - -

- - -Embedded GF API for using GF grammars in Haskell programs. - -

- - -MCFG/GF library for Prolog, -for using GF grammars in Prolog programs. - - - -

Theoretical studies

- - -Grammatical Framework: A Type-Theoretical -Grammar Formalism (ps.gz). Theoretical paper on GF by A. Ranta. A later -version appeared -in The Journal of Functional Programming, vol. 14:2. 2004, pp. 145-189. -The standard reference on GF. - -

- - -Expressivity and Complexity of the Grammatical Framework, -PhD Thesis by -Peter Ljunglf. - - - -

Introductory talks

- - -GF in 25 Minutes - overview for computer science audience. - -

- - - -Slides on GF theory and implementation given -at INRIA Rocquencourt in December 2003. - -

- - -Slides on example-based grammar writing and a short introduction -to GF grammars. - -

- - -Course notes on Natural Language Technology, includes -slides on using GF. - - - -

Examples and applications

- - -Formal and Informal Software Specifications, -PhD Thesis by -Kristofer Johannisson. - - -

- - -Embedded grammars, -Master's thesis by -Bjrn Bringert - -

- -Demo film -of a multimodal dialogue system built with embedded grammars. - - -

- - -GFCC (pdf): -report on a compiler from a fragment of C to JVM, written in GF. -

More

+

Publications

-Bibliography: -more publications on GF, as well as background literature. +Bibliography: more publications on GF, as well as background literature. diff --git a/doc/iphone.jpg b/doc/iphone.jpg deleted file mode 100644 index d9e138b88..000000000 Binary files a/doc/iphone.jpg and /dev/null differ diff --git a/doc/mytree.png b/doc/mytree.png deleted file mode 100644 index fafcc8772..000000000 Binary files a/doc/mytree.png and /dev/null differ diff --git a/doc/school-langs.dot b/doc/school-langs.dot deleted file mode 100644 index 88e0a9c96..000000000 --- a/doc/school-langs.dot +++ /dev/null @@ -1,106 +0,0 @@ -graph{ - -size = "8,8" ; - -overlap = scale ; - -"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ; - -"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ; -"1" -- "Abs" [style = "solid"]; - -"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ; -"2" -- "Abs" [style = "solid"]; - -"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ; -"3" -- "Abs" [style = "solid"]; - -"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ; -"4" -- "Abs" [style = "solid"]; - -"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ; -"5" -- "Abs" [style = "solid"]; - -"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ; -"6" -- "Abs" [style = "solid"]; - -"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ; -"7" -- "Abs" [style = "solid"]; - -"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ; -"8" -- "Abs" [style = "solid"]; - -"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ; -"9" -- "Abs" [style = "solid"]; - -"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ; -"10" -- "Abs" [style = "solid"]; - -"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ; -"11" -- "Abs" [style = "solid"]; - -"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "12" [style = "solid"]; - -"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "13" [style = "solid"]; - -"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "14" [style = "solid"]; - -"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "15" [style = "solid"]; - -"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "16" [style = "solid"]; - -"17" [label = "Polish", style = "solid", shape = "ellipse", color = "orange"] ; -"Abs" -- "17" [style = "solid"]; - -"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "18" [style = "solid"]; - -"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "19" [style = "solid"]; - -"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "20" [style = "solid"]; - -"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "orange"] ; -"Abs" -- "21" [style = "solid"]; - -"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "22" [style = "solid"]; - -"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "23" [style = "solid"]; - -"24" [label = "Catalan", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "24" [style = "solid"]; - -"25" [label = "Norwegian", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "25" [style = "solid"]; - -"26" [label = "Russian", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "26" [style = "solid"]; - -"27" [label = "Interlingua", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "27" [style = "solid"]; - -"28" [label = "Latin", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "28" [style = "solid"]; -"29" [label = "Turkish", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "29" [style = "solid"]; -"30" [label = "Hindi", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "30" [style = "solid"]; -"31" [label = "Thai", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "31" [style = "solid"]; -"32" [label = "Urdu", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "32" [style = "solid"]; -"33" [label = "Telugu", style = "dotted", shape = "ellipse", color = "red"] ; -"Abs" -- "33" [style = "solid"]; -"34" [label = "Arabic", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "34" [style = "solid"]; - - -} diff --git a/doc/school-langs.png b/doc/school-langs.png deleted file mode 100644 index 7230e0bff..000000000 Binary files a/doc/school-langs.png and /dev/null differ diff --git a/doc/summer-align.png b/doc/summer-align.png deleted file mode 100644 index 796754408..000000000 Binary files a/doc/summer-align.png and /dev/null differ diff --git a/doc/summer-langs.png b/doc/summer-langs.png deleted file mode 100644 index 729af722a..000000000 Binary files a/doc/summer-langs.png and /dev/null differ diff --git a/doc/tutorial/10lang-small.png b/doc/tutorial/10lang-small.png new file mode 100644 index 000000000..49a3d0a98 Binary files /dev/null and b/doc/tutorial/10lang-small.png differ diff --git a/doc/tutorial/categories.png b/doc/tutorial/categories.png new file mode 100644 index 000000000..afc5873c5 Binary files /dev/null and b/doc/tutorial/categories.png differ diff --git a/doc/tutorial/food-js.png b/doc/tutorial/food-js.png new file mode 100644 index 000000000..fe579b1a9 Binary files /dev/null and b/doc/tutorial/food-js.png differ diff --git a/doc/tutorial/food-magnet.png b/doc/tutorial/food-magnet.png new file mode 100644 index 000000000..8b137875d Binary files /dev/null and b/doc/tutorial/food-magnet.png differ diff --git a/doc/tutorial/foodmarket.png b/doc/tutorial/foodmarket.png new file mode 100644 index 000000000..6b0e3fbd7 Binary files /dev/null and b/doc/tutorial/foodmarket.png differ diff --git a/doc/tutorial/gf-tutorial.html b/doc/tutorial/gf-tutorial.html new file mode 100644 index 000000000..46b17b96b --- /dev/null +++ b/doc/tutorial/gf-tutorial.html @@ -0,0 +1,5442 @@ + + + + + +Grammatical Framework Tutorial + +

Grammatical Framework Tutorial

+ +Aarne Ranta
+December 2010 (November 2008) +
+ +

+ +

+

Overview

+

+This is a hands-on introduction to grammar writing in GF. +

+

+Main ingredients of GF: +

+
    +
  • linguistics +
  • functional programming +
+ +

+Prerequisites: +

+
    +
  • some previous experience from some programming language +
  • the basics of using computers, e.g. the use of + text editors and the management of files. +
  • knowledge of Unix commands is useful but not necessary +
  • knowledge of many natural languages may add fun to experience +
+ +

+ +

+

Outline

+

+Lesson 1: a multilingual "Hello World" grammar. English, Finnish, Italian. +

+

+Lesson 2: a larger grammar for the domain of food. English and Italian. +

+

+Lesson 3: parameters - morphology and agreement. +

+

+Lesson 4: using the resource grammar library. +

+

+Lesson 5: semantics - dependent types, variable bindings, +and semantic definitions. +

+

+Lesson 6: implementing formal languages. +

+

+Lesson 7: embedded grammar applications. +

+

+ +

+

Slides

+

+You can chop this tutorial into a set of slides by the command +

+
+    htmls gf-tutorial.html
+
+

+where the program htmls is distributed with GF (see below), in +

+

+ GF/src/tools/Htmls.hs +

+

+The slides will appear as a set of files beginning with 01-gf-tutorial.htmls. +

+

+Internal links will not work in the slide format, except for those in the +upper left corner of each slide, and the links behind the "Contents" link. +

+

+ +

+

Lesson 1: Getting Started with GF

+

+ +

+

+Goals: +

+
    +
  • install and run GF +
  • write the first GF grammar: a "Hello World" grammar in three languages +
  • use GF for translation and multilingual generation +
+ +

+ +

+

What GF is

+

+We use the term GF for three different things: +

+
    +
  • a system (computer program) used for working with grammars +
  • a programming language in which grammars can be written +
  • a theory about grammars and languages +
+ +

+The GF system is an implementation +of the GF programming language, which in turn is built on the ideas of the +GF theory. +

+

+The focus of this tutorial is on using the GF programming language. +

+

+At the same time, we learn the way of thinking in the GF theory. +

+

+We make the grammars run on a computer by +using the GF system. +

+

+ +

+

GF grammars and language processing tasks

+

+A GF program is called a grammar. +

+

+A grammar defines a language. +

+

+From this definition, language processing components can be derived: +

+
    +
  • parsing: to analyse the language +
  • linearization: to generate the language +
  • translation: to analyse one language and generate another +
+ +

+In general, a GF grammar is multilingual: +

+
    +
  • many languages in one grammar +
  • translations between them +
+ +

+ +

+

Getting the GF system

+

+Open-source free software, downloaded via the GF Homepage: +

+

+grammaticalframework.org +

+

+There you find +

+
    +
  • binaries for Linux, Mac OS X, and Windows +
  • source code and documentation +
  • grammar libraries and examples +
+ +

+Many examples in this tutorial are +online. +

+

+Normally you don't have to compile GF yourself. +But, if you do want to compile GF from source follow the +instructions in the Developers Guide. +

+

+ +

+

Running the GF system

+

+Type gf in the Unix (or Cygwin) shell: +

+
+    % gf
+
+

+You will see GF's welcome message and the prompt >. +The command +

+
+    > help
+
+

+will give you a list of available commands. +

+

+As a common convention, we will use +

+
    +
  • % as a prompt that marks system commands +
  • > as a prompt that marks GF commands +
+ +

+Thus you should not type these prompts, but only the characters that +follow them. +

+

+ +

+

A "Hello World" grammar

+

+Like most programming language tutorials, we start with a +program that prints "Hello World" on the terminal. +

+

+Extra features: +

+
    +
  • Multilinguality: the message is printed in many languages. +
  • Reversibility: in addition to printing, you can parse the + message and translate it to other languages. +
+ +

+ +

+

The program: abstract syntax and concrete syntaxes

+

+A GF program, in general, is a multilingual grammar. Its main parts +are +

+
    +
  • an abstract syntax +
  • one or more concrete syntaxes +
+ +

+The abstract syntax defines what meanings +can be expressed in the grammar +

+
    +
  • Greetings, where we greet a Recipient, which can be + World or Mum or Friends +
+ +

+ +

+

+GF code for the abstract syntax: +

+
+    -- a "Hello World" grammar
+    abstract Hello = {
+  
+      flags startcat = Greeting ;
+  
+      cat Greeting ; Recipient ;
+  
+      fun 
+        Hello : Recipient -> Greeting ;
+        World, Mum, Friends : Recipient ;
+    }
+
+

+The code has the following parts: +

+
    +
  • a comment (optional), saying what the module is doing +
  • a module header indicating that it is an abstract syntax + module named Hello +
  • a module body in braces, consisting of +
      +
    • a startcat flag declaration stating that Greeting is the + default start category for parsing and generation +
    • category declarations introducing two categories, i.e. types of meanings +
    • function declarations introducing three meaning-building functions +
    +
+ +

+ +

+

+English concrete syntax (mapping from meanings to strings): +

+
+    concrete HelloEng of Hello = {
+  
+      lincat Greeting, Recipient = {s : Str} ;
+  
+      lin 
+        Hello recip = {s = "hello" ++ recip.s} ;
+        World = {s = "world"} ;
+        Mum = {s = "mum"} ;
+        Friends = {s = "friends"} ;
+    }
+
+

+The major parts of this code are: +

+
    +
  • a module header indicating that it is a concrete syntax of the abstract syntax + Hello, itself named HelloEng +
  • a module body in curly brackets, consisting of +
      +
    • linearization type definitions stating that + Greeting and Recipient are records with a string s +
    • linearization definitions telling what records are assigned to + each of the meanings defined in the abstract syntax +
    +
+ +

+Notice the concatenation ++ and the record projection .. +

+

+ +

+

+Finnish and an Italian concrete syntaxes: +

+
+    concrete HelloFin of Hello = {
+      lincat Greeting, Recipient = {s : Str} ;
+      lin 
+        Hello recip = {s = "terve" ++ recip.s} ;
+        World = {s = "maailma"} ;
+        Mum = {s = "iti"} ;
+        Friends = {s = "ystvt"} ;
+    }
+  
+    concrete HelloIta of Hello = {
+      lincat Greeting, Recipient = {s : Str} ;
+      lin 
+        Hello recip = {s = "ciao" ++ recip.s} ;
+        World = {s = "mondo"} ;
+        Mum = {s = "mamma"} ;
+        Friends = {s = "amici"} ;
+    }
+
+

+

+ +

+

Using grammars in the GF system

+

+In order to compile the grammar in GF, +we create four files, one for each module, named Modulename.gf: +

+
+    Hello.gf  HelloEng.gf  HelloFin.gf  HelloIta.gf
+
+

+The first GF command: import a grammar. +

+
+    > import HelloEng.gf
+
+

+All commands also have short names; here: +

+
+    > i HelloEng.gf
+
+

+The GF system will compile your grammar +into an internal representation and show the CPU time was consumed, followed +by a new prompt: +

+
+    > i HelloEng.gf
+    - compiling Hello.gf...   wrote file Hello.gfo 8 msec
+    - compiling HelloEng.gf...   wrote file HelloEng.gfo 12 msec
+  
+    12 msec
+    >
+
+

+

+ +

+

+You can use GF for parsing (parse = p) +

+
+    > parse "hello world"
+    Hello World
+
+

+Parsing takes a string into an abstract syntax tree. +

+

+The notation for trees is that of function application: +

+
+    function argument1 ... argumentn
+
+

+Parentheses are only needed for grouping. +

+

+Parsing something that is not in grammar will fail: +

+
+    > parse "hello dad"
+    Unknown words: dad
+  
+    > parse "world hello"
+    no tree found
+
+

+

+ +

+

+You can also use GF for linearization (linearize = l). +It takes trees into strings: +

+
+    > linearize Hello World
+    hello world
+
+

+Translation: pipe linearization to parsing: +

+
+    > import HelloEng.gf
+    > import HelloIta.gf
+  
+    > parse -lang=HelloEng "hello mum" | linearize -lang=HelloIta
+    ciao mamma
+
+

+Default of the language flag (-lang): the last-imported concrete syntax. +

+

+Multilingual generation: +

+
+    > parse -lang=HelloEng "hello friends" | linearize
+    terve ystvt
+    ciao amici
+    hello friends
+
+

+Linearization is by default to all available languages. +

+

+ +

+

Exercises on the Hello World grammar

+
    +
  1. Test the parsing and translation examples shown above, as well as +some other examples, in different combinations of languages. +

    +
  2. Extend the grammar Hello.gf and some of the +concrete syntaxes by five new recipients and one new greeting +form. +

    +
  3. Add a concrete syntax for some other +languages you might know. +

    +
  4. Add a pair of greetings that are expressed in one and +the same way in +one language and in two different ways in another. +For instance, good morning +and good afternoon in English are both expressed +as buongiorno in Italian. +Test what happens when you translate buongiorno to English in GF. +

    +
  5. Inject errors in the Hello grammars, for example, leave out +some line, omit a variable in a lin rule, or change the name +in one occurrence +of a variable. Inspect the error messages generated by GF. +
+ +

+ +

+

Using grammars from outside GF

+

+You can use the gf program in a Unix pipe. +

+
    +
  • echo a GF command +
  • pipe it into GF with grammar names as arguments +
+ +
+    % echo "l Hello World" | gf HelloEng.gf HelloFin.gf HelloIta.gf
+
+

+You can also write a script, a file containing the lines +

+
+    import HelloEng.gf
+    import HelloFin.gf
+    import HelloIta.gf
+    linearize Hello World
+
+

+

+ +

+

GF scripts

+

+If we name this script hello.gfs, we can do +

+
+    $ gf --run <hello.gfs
+  
+    ciao mondo
+    terve maailma
+    hello world
+
+

+The option --run removes prompts, CPU time, and other messages. +

+

+See Lesson 7, for stand-alone programs that don't need the GF system to run. +

+

+Exercise. (For Unix hackers.) Write a GF application that reads +an English string from the standard input and writes an Italian +translation to the output. +

+

+ +

+

What else can be done with the grammar

+

+Some more functions that will be covered: +

+
    +
  • morphological analysis: find out the possible inflection forms of words +
  • morphological synthesis: generate all inflection forms of words +
  • random generation: generate random expressions +
  • corpus generation: generate all expressions +
  • treebank generation: generate a list of trees with their linearizations +
  • teaching quizzes: train morphology and translation +
  • multilingual authoring: create a document in many languages simultaneously +
  • speech input: optimize a speech recognition system for a grammar +
+ +

+ +

+

Embedded grammar applications

+

+Application programs, using techniques from Lesson 7: +

+
    +
  • compile grammars to new formats, such as speech recognition grammars +
  • embed grammars in Java and Haskell programs +
  • build applications using compilation and embedding: +
      +
    • voice commands +
    • spoken language translators +
    • dialogue systems +
    • user interfaces +
    • localization: render the messages printed by a program + in different languages +
    +
+ +

+ +

+

Lesson 2: Designing a grammar for complex phrases

+

+ +

+

+Goals: +

+
    +
  • build a larger grammar: phrases about food in English and Italian +
  • learn to write reusable library functions ("operations") +
  • learn the basics of GF's module system +
+ +

+ +

+

The abstract syntax Food

+

+Phrases usable for speaking about food: +

+
    +
  • the start category is Phrase +
  • a Phrase can be built by assigning a Quality to an Item + (e.g. this cheese is Italian) +
  • anItem is build from a Kind by prefixing this or that + (e.g. this wine) +
  • a Kind is either atomic (e.g. cheese), or formed + qualifying a given Kind with a Quality (e.g. Italian cheese) +
  • a Quality is either atomic (e.g. Italian, + or built by modifying a given Quality with the word very (e.g. very warm) +
+ +

+Abstract syntax: +

+
+    abstract Food = {
+  
+      flags startcat = Phrase ;
+  
+      cat
+        Phrase ; Item ; Kind ; Quality ;
+  
+      fun
+        Is : Item -> Quality -> Phrase ;
+        This, That : Kind -> Item ;
+        QKind : Quality -> Kind -> Kind ;
+        Wine, Cheese, Fish : Kind ;
+        Very : Quality -> Quality ;
+        Fresh, Warm, Italian, Expensive, Delicious, Boring : Quality ;
+    }
+
+

+Example Phrase +

+
+    Is (This (QKind Delicious (QKind Italian Wine))) (Very (Very Expensive))
+    this delicious Italian wine is very very expensive
+
+

+

+ +

+

The concrete syntax FoodEng

+
+    concrete FoodEng of Food = {
+  
+      lincat
+        Phrase, Item, Kind, Quality = {s : Str} ;
+  
+      lin
+        Is item quality = {s = item.s ++ "is" ++ quality.s} ;
+        This kind = {s = "this" ++ kind.s} ;
+        That kind = {s = "that" ++ kind.s} ;
+        QKind quality kind = {s = quality.s ++ kind.s} ;
+        Wine = {s = "wine"} ;
+        Cheese = {s = "cheese"} ;
+        Fish = {s = "fish"} ;
+        Very quality = {s = "very" ++ quality.s} ;
+        Fresh = {s = "fresh"} ;
+        Warm = {s = "warm"} ;
+        Italian = {s = "Italian"} ;
+        Expensive = {s = "expensive"} ;
+        Delicious = {s = "delicious"} ;
+        Boring = {s = "boring"} ;
+    }  
+
+

+

+ +

+

+Test the grammar for parsing: +

+
+    > import FoodEng.gf
+    > parse "this delicious wine is very very Italian"
+    Is (This (QKind Delicious Wine)) (Very (Very Italian))
+
+

+Parse in other categories setting the cat flag: +

+
+    p -cat=Kind "very Italian wine"
+    QKind (Very Italian) Wine
+
+

+

+ +

+

Exercises on the Food grammar

+
    +
  1. Extend the Food grammar by ten new food kinds and +qualities, and run the parser with new kinds of examples. +

    +
  2. Add a rule that enables question phrases of the form +is this cheese Italian. +

    +
  3. Enable the optional prefixing of +phrases with the words "excuse me but". Do this in such a way that +the prefix can occur at most once. +
+ +

+ +

+

Commands for testing grammars

+

Generating trees and strings

+

+Random generation (generate_random = gr): build +build a random tree in accordance with an abstract syntax: +

+
+    > generate_random
+    Is (This (QKind Italian Fish)) Fresh
+
+

+By using a pipe, random generation can be fed into linearization: +

+
+    > generate_random | linearize
+    this Italian fish is fresh
+
+

+Use the number flag to generate several trees: +

+
+    > gr -number=4 | l
+    that wine is boring
+    that fresh cheese is fresh
+    that cheese is very boring
+    this cheese is Italian
+
+

+

+ +

+

+To generate all phrases that a grammar can produce, +use generate_trees = gt. +

+
+    > generate_trees | l
+    that cheese is very Italian
+    that cheese is very boring
+    that cheese is very delicious
+    ...
+    this wine is fresh
+    this wine is warm
+
+

+The default depth is 3; the depth can be +set by using the depth flag: +

+
+    > generate_trees -depth=2 | l
+
+

+What options a command has can be seen by the help = h command: +

+
+    > help gr
+    > help gt
+
+

+

+ +

+

Exercises on generation

+
    +
  1. If the command gt generated all +trees in your grammar, it would never terminate. Why? +

    +
  2. Measure how many trees the grammar gives with depths 4 and 5, +respectively. Hint. You can +use the Unix word count command wc to count lines. +
+ +

+ +

+

More on pipes: tracing

+

+Put the tracing option -tr to each command whose output you +want to see: +

+
+    > gr -tr | l -tr | p
+  
+    Is (This Cheese) Boring
+    this cheese is boring
+    Is (This Cheese) Boring  
+
+

+Useful for test purposes: the pipe above can show +if a grammar is ambiguous, i.e. +contains strings that can be parsed in more than one way. +

+

+Exercise. Extend the Food grammar so that it produces ambiguous +strings, and try out the ambiguity test. +

+

+ +

+

Writing and reading files

+

+To save the outputs into a file, pipe it to the write_file = wf command, +

+
+    > gr -number=10 | linearize | write_file -file=exx.tmp
+
+

+To read a file to GF, use the read_file = rf command, +

+
+    > read_file -file=exx.tmp -lines | parse
+
+

+The flag -lines tells GF to read each line of the file separately. +

+

+Files with examples can be used for regression testing +of grammars - the most systematic way to do this is by +treebanks; see here. +

+

+ +

+

Visualizing trees

+

+Parentheses give a linear representation of trees, +useful for the computer. +

+

+Human eye may prefer to see a visualization: visualize_tree = vt: +

+
+    > parse "this delicious cheese is very Italian" | visualize_tree
+
+

+The tree is generated in postscript (.ps) file. The -view option is used for +telling what command to use to view the file. Its default is "gv", which works +on most Linux installations. On a Mac, one would probably write +

+
+    > parse "this delicious cheese is very Italian" | visualize_tree -view="open"
+
+

+

+ +

+

+This command uses the program Graphviz, which you +might not have, but which are freely available on the web. +

+

+You can save the temporary file _grph.dot, +which the command vt produces. +

+

+Then you can process this file with the dot +program (from the Graphviz package). +

+
+    % dot -Tpng _grph.dot > mytree.png
+
+

+

+ +

+

System commands

+

+You can give a system command without leaving GF: +! followed by a Unix command, +

+
+    > ! dot -Tpng grphtmp.dot > mytree.png
+    > ! open mytree.png
+
+

+A system command may also receive its argument from +a GF pipes. It then has the name sp = system_pipe: +

+
+    > generate_trees -depth=4 | sp -command="wc -l"
+
+

+This command example returns the number of generated trees. +

+

+Exercise. +Measure how many trees the grammar FoodEng gives with depths 4 and 5, +respectively. Use the Unix word count command wc to count lines, and +a system pipe from a GF command into a Unix command. +

+

+ +

+

An Italian concrete syntax

+

+ +

+

+Just (?) replace English words with their dictionary equivalents: +

+
+    concrete FoodIta of Food = {
+  
+      lincat
+        Phrase, Item, Kind, Quality = {s : Str} ;
+  
+      lin
+        Is item quality = {s = item.s ++ "" ++ quality.s} ;
+        This kind = {s = "questo" ++ kind.s} ;
+        That kind = {s = "quel" ++ kind.s} ;
+        QKind quality kind = {s = kind.s ++ quality.s} ;
+        Wine = {s = "vino"} ;
+        Cheese = {s = "formaggio"} ;
+        Fish = {s = "pesce"} ;
+        Very quality = {s = "molto" ++ quality.s} ;
+        Fresh = {s = "fresco"} ;
+        Warm = {s = "caldo"} ;
+        Italian = {s = "italiano"} ;
+        Expensive = {s = "caro"} ;
+        Delicious = {s = "delizioso"} ;
+        Boring = {s = "noioso"} ;
+    }
+
+

+

+ +

+

+Not just replacing words: +

+

+The order of a quality and the kind it modifies is changed in +

+
+      QKind quality kind = {s = kind.s ++ quality.s} ;
+
+

+Thus Italian says vino italiano for Italian wine. +

+

+(Some Italian adjectives +are put before the noun. This distinction can be controlled by parameters, +which are introduced in Lesson 3.) +

+

+ +

+

Exercises on multilinguality

+
    +
  1. Write a concrete syntax of Food for some other language. +You will probably end up with grammatically incorrect +linearizations - but don't +worry about this yet. +

    +
  2. If you have written Food for German, Swedish, or some +other language, test with random or exhaustive generation what constructs +come out incorrect, and prepare a list of those ones that cannot be helped +with the currently available fragment of GF. You can return to your list +after having worked out Lesson 3. +
+ +

+ +

+

Free variation

+

+Semantically indistinguishable ways of expressing a thing. +

+

+The variants construct of GF expresses free variation. For example, +

+
+    lin Delicious = {s = "delicious" | "exquisit" | "tasty"} ;
+
+

+By default, the linearize command +shows only the first variant from such lists; to see them +all, use the option -all: +

+
+    > p "this exquisit wine is delicious" | l -all
+    this delicious wine is delicious
+    this delicious wine is exquisit
+    ...
+
+

+

+ +

+

+An equivalent notation for variants is +

+
+    lin Delicious = {s = variants {"delicious" ; "exquisit" ; "tasty"}} ;
+
+

+This notation also allows the limiting case: an empty variant list, +

+
+    variants {}
+
+

+It can be used e.g. if a word lacks a certain inflection form. +

+

+Free variation works for all types in concrete syntax; all terms in +a variant list must be of the same type. +

+

+ +

+

More application of multilingual grammars

+

Multilingual treebanks

+

+ +

+

+Multilingual treebank: a set of trees with their +linearizations in different languages: +

+
+    > gr -number=2 | l -treebank
+  
+    Is (That Cheese) (Very Boring)
+    quel formaggio  molto noioso
+    that cheese is very boring
+  
+    Is (That Cheese) Fresh
+    quel formaggio  fresco
+    that cheese is fresh
+
+

+

+ +

+

Translation quiz

+

+translation_quiz = tq: +generate random sentences, display them in one language, and check the user's +answer given in another language. +

+
+    > translation_quiz -from=FoodEng -to=FoodIta
+  
+    Welcome to GF Translation Quiz.
+    The quiz is over when you have done at least 10 examples
+    with at least 75 % success.
+    You can interrupt the quiz by entering a line consisting of a dot ('.').
+  
+    this fish is warm
+    questo pesce  caldo
+    > Yes.
+    Score 1/1
+  
+    this cheese is Italian
+    questo formaggio  noioso
+    > No, not questo formaggio  noioso, but
+    questo formaggio  italiano
+  
+    Score 1/2
+    this fish is expensive
+
+

+

+ +

+

Context-free grammars and GF

+

The "cf" grammar format

+

+The grammar FoodEng can be written in a BNF format as follows: +

+
+    Is.        Phrase  ::= Item "is" Quality ;
+    That.      Item    ::= "that" Kind ;
+    This.      Item    ::= "this" Kind ;
+    QKind.     Kind    ::= Quality Kind ;
+    Cheese.    Kind    ::= "cheese" ;
+    Fish.      Kind    ::= "fish" ;
+    Wine.      Kind    ::= "wine" ;
+    Italian.   Quality ::= "Italian" ;
+    Boring.    Quality ::= "boring" ;
+    Delicious. Quality ::= "delicious" ;
+    Expensive. Quality ::= "expensive" ;
+    Fresh.     Quality ::= "fresh" ;
+    Very.      Quality ::= "very" Quality ;
+    Warm.      Quality ::= "warm" ;
+
+

+GF can convert BNF grammars into GF. +BNF files are recognized by the file name suffix .cf (for context-free): +

+
+    > import food.cf
+
+

+The compiler creates separate abstract and concrete modules internally. +

+

+ +

+

Restrictions of context-free grammars

+

+Separating concrete and abstract syntax allows +three deviations from context-free grammar: +

+
    +
  • permutation: changing the order of constituents +
  • suppression: omitting constituents +
  • reduplication: repeating constituents +
+ +

+Exercise. Define the non-context-free +copy language {x x | x <- (a|b)*} in GF. +

+

+ +

+

Modules and files

+

+GF uses suffixes to recognize different file formats: +

+
    +
  • Source files: Modulename.gf +
  • Target files: Modulename.gfo +
+ +

+Importing generates target from source: +

+
+    > i FoodEng.gf
+    - compiling Food.gf...   wrote file Food.gfo 16 msec
+    - compiling FoodEng.gf...   wrote file FoodEng.gfo 20 msec
+
+

+The .gfo format (="GF Object") is precompiled GF, which is +faster to load than source GF (.gf). +

+

+When reading a module, GF decides whether +to use an existing .gfo file or to generate +a new one, by looking at modification times. +

+

+ +

+

+Exercise. What happens when you import FoodEng.gf for +a second time? Try this in different situations: +

+
    +
  • Right after importing it the first time (the modules are kept in + the memory of GF and need no reloading). +
  • After issuing the command empty (e), which clears the memory + of GF. +
  • After making a small change in FoodEng.gf, be it only an added space. +
  • After making a change in Food.gf. +
+ +

+ +

+

Using operations and resource modules

+

Operation definitions

+

+The golden rule of functional programmin: +

+

+Whenever you find yourself programming by copy-and-paste, write a function instead. +

+

+Functions in concrete syntax are defined using the keyword oper (for +operation), distinct from fun for the sake of clarity. +

+

+Example: +

+
+    oper ss : Str -> {s : Str} = \x -> {s = x} ;
+
+

+The operation can be applied to an argument, and GF will +compute the value: +

+
+    ss "boy" ===> {s = "boy"}
+
+

+The symbol ===> will be used for computation. +

+

+ +

+

+Notice the lambda abstraction form +

+
    +
  • \x -> t +
+ +

+This is read: +

+
    +
  • function with variable x and function body t +
+ +

+For lambda abstraction with multiple arguments, we have the shorthand +

+
+    \x,y -> t   ===  \x -> \y -> t
+
+

+Linearization rules actually use syntactic +sugar for abstraction: +

+
+    lin f x = t   ===  lin f = \x -> t
+
+

+

+ +

+

The ``resource`` module type

+

+The resource module type is used to package +oper definitions into reusable resources. +

+
+    resource StringOper = {
+      oper
+        SS : Type = {s : Str} ;
+        ss : Str -> SS = \x -> {s = x} ;
+        cc : SS -> SS -> SS = \x,y -> ss (x.s ++ y.s) ;
+        prefix : Str -> SS -> SS = \p,x -> ss (p ++ x.s) ;
+    }
+
+

+

+ +

+

Opening a resource

+

+Any number of resource modules can be +opened in a concrete syntax. +

+
+    concrete FoodEng of Food = open StringOper in {
+  
+      lincat
+        S, Item, Kind, Quality = SS ;
+  
+      lin
+        Is item quality = cc item (prefix "is" quality) ;
+        This k = prefix "this" k ;
+        That k = prefix "that" k ;
+        QKind k q = cc k q ;
+        Wine = ss "wine" ;
+        Cheese = ss "cheese" ;
+        Fish = ss "fish" ;
+        Very = prefix "very" ;
+        Fresh = ss "fresh" ;
+        Warm = ss "warm" ;
+        Italian = ss "Italian" ;
+        Expensive = ss "expensive" ;
+        Delicious = ss "delicious" ;
+        Boring = ss "boring" ;
+    }
+
+

+

+ +

+

Partial application

+

+ +

+

+The rule +

+
+    lin This k = prefix "this" k ;
+
+

+can be written more concisely +

+
+    lin This = prefix "this" ;
+
+

+Part of the art in functional programming: +decide the order of arguments in a function, +so that partial application can be used as much as possible. +

+

+For instance, prefix is typically applied to +linearization variables with constant strings. Hence we +put the Str argument before the SS argument. +

+

+Exercise. Define an operation infix analogous to prefix, +such that it allows you to write +

+
+    lin Is = infix "is" ;
+
+

+

+ +

+

Testing resource modules

+

+Import with the flag -retain, +

+
+    > import -retain StringOper.gf
+
+

+Compute the value with compute_concrete = cc, +

+
+    > compute_concrete prefix "in" (ss "addition")
+    {s : Str = "in" ++ "addition"}
+
+

+

+ +

+

Grammar architecture

+

+ +

+

Extending a grammar

+

+A new module can extend an old one: +

+
+    abstract Morefood = Food ** {
+      cat
+        Question ;
+      fun
+        QIs : Item -> Quality -> Question ;
+        Pizza : Kind ;      
+    }
+
+

+Parallel to the abstract syntax, extensions can +be built for concrete syntaxes: +

+
+    concrete MorefoodEng of Morefood = FoodEng ** {
+      lincat
+        Question = {s : Str} ;
+      lin
+        QIs item quality = {s = "is" ++ item.s ++ quality.s} ;
+        Pizza = {s = "pizza"} ;
+    }
+
+

+The effect of extension: all of the contents of the extended +and extending modules are put together. +

+

+In other words: the new module inherits the contents of the old module. +

+

+ +

+

+Simultaneous extension and opening: +

+
+    concrete MorefoodIta of Morefood = FoodIta ** open StringOper in {
+      lincat
+        Question = SS ;
+      lin
+        QIs item quality = ss (item.s ++ "" ++ quality.s) ;
+        Pizza = ss "pizza" ;
+    }
+
+

+Resource modules can extend other resource modules - thus it is +possible to build resource hierarchies. +

+

+ +

+

Multiple inheritance

+

+Extend several grammars at the same time: +

+
+    abstract Foodmarket = Food, Fruit, Mushroom ** {
+      fun 
+        FruitKind    : Fruit    -> Kind ;
+        MushroomKind : Mushroom -> Kind ;
+      }
+
+

+where +

+
+    abstract Fruit = {
+      cat Fruit ;
+      fun Apple, Peach : Fruit ;
+    }
+  
+    abstract Mushroom = {
+      cat Mushroom ;
+      fun Cep, Agaric : Mushroom ;
+    }
+
+

+

+Exercise. Refactor Food by taking apart Wine into a special +Drink module. +

+

+ +

+

Lesson 3: Grammars with parameters

+

+ +

+

+Goals: +

+
    +
  • implement sophisticated linguistic structures: +
      +
    • morphology: the inflection of words +
    • agreement: rules for selecting word forms in syntactic combinations +
    +
+ +
    +
  • Cover all GF constructs for concrete syntax +
+ +

+It is possible to skip this chapter and go directly +to the next, since the use of the GF Resource Grammar library +makes it unnecessary to use parameters: they +could be left to library implementors. +

+

+ +

+

The problem: words have to be inflected

+

+Plural forms are needed in things like +

+these Italian wines are delicious +
+This requires two things: +

+
    +
  • the inflection of nouns and verbs in singular and plural +
  • the agreement of the verb to subject: + the verb must have the same number as the subject +
+ +

+Different languages have different types of inflection and agreement. +

+
    +
  • Italian has also gender (masculine vs. feminine). +
+ +

+In a multilingual grammar, +we want to ignore such distinctions in abstract syntax. +

+

+Exercise. Make a list of the possible forms that nouns, +adjectives, and verbs can have in some languages that you know. +

+

+ +

+

Parameters and tables

+

+We define the parameter type of number in English by +a new form of judgement: +

+
+    param Number = Sg | Pl ;
+
+

+This judgement defines the parameter type Number by listing +its two constructors, Sg and Pl +(singular and plural). +

+

+We give Kind a linearization type that has a table depending on number: +

+
+    lincat Kind = {s : Number => Str} ;
+
+

+The table type Number => Str is similar a function type +(Number -> Str). +

+

+Difference: the argument must be a parameter type. Then +the argument-value pairs can be listed in a finite table. +

+

+ +

+

+Here is a table: +

+
+    lin Cheese = {
+      s = table {
+        Sg => "cheese" ;
+        Pl => "cheeses"
+      }
+    } ;
+
+

+The table has branches, with a pattern on the +left of the arrow => and a value on the right. +

+

+The application of a table is done by the selection operator !. +

+

+It which is computed by pattern matching: return +the value from the first branch whose pattern matches the +argument. For instance, +

+
+     table {Sg => "cheese" ; Pl => "cheeses"} ! Pl 
+     ===> "cheeses"
+
+

+

+ +

+

+Case expressions are syntactic sugar: +

+
+    case e of {...} ===  table {...} ! e
+
+

+Since they are familiar to Haskell and ML programmers, they can come out handy +when writing GF programs. +

+

+ +

+

+Constructors can take arguments from other parameter types. +

+

+Example: forms of English verbs (except be): +

+
+    param VerbForm = VPresent Number | VPast | VPastPart | VPresPart ;
+
+

+Fact expressed: only present tense has number variation. +

+

+Example table: the forms of the verb drink: +

+
+    table {
+      VPresent Sg => "drinks" ;
+      VPresent Pl => "drink" ;
+      VPast       => "drank" ;
+      VPastPart   => "drunk" ;
+      VPresPart   => "drinking"
+      }
+
+

+

+Exercise. In an earlier exercise (previous section), +you made a list of the possible +forms that nouns, adjectives, and verbs can have in some languages that +you know. Now take some of the results and implement them by +using parameter type definitions and tables. Write them into a resource +module, which you can test by using the command compute_concrete. +

+

+ +

+

Inflection tables and paradigms

+

+A morphological paradigm is a formula telling how a class of +words is inflected. +

+

+From the GF point of view, a paradigm is a function that takes +a lemma (also known as a dictionary form, or a citation form) and +returns an inflection table. +

+

+The following operation defines the regular noun paradigm of English: +

+
+    oper regNoun : Str -> {s : Number => Str} = \dog -> {
+      s = table {
+        Sg => dog ;
+        Pl => dog + "s"
+        }
+      } ;
+
+

+The gluing operator + glues strings to one token: +

+
+    (regNoun "cheese").s ! Pl  ===> "cheese" + "s"  ===>  "cheeses"
+
+

+

+ +

+

+A more complex example: regular verbs, +

+
+    oper regVerb : Str -> {s : VerbForm => Str} = \talk -> {
+      s = table {
+        VPresent Sg => talk + "s" ;
+        VPresent Pl => talk ;
+        VPresPart   => talk + "ing" ;
+        _           => talk + "ed"
+        }
+      } ;
+
+

+The catch-all case for the past tense and the past participle +uses a wild card pattern _. +

+

+ +

+

Exercises on morphology

+
    +
  1. Identify cases in which the regNoun paradigm does not +apply in English, and implement some alternative paradigms. +

    +
  2. Implement some regular paradigms for other languages you have +considered in earlier exercises. +
+ +

+ +

+

Using parameters in concrete syntax

+

+Purpose: a more radical +variation between languages +than just the use of different words and word orders. +

+

+We add to the grammar Food two rules for forming plural items: +

+
+    fun These, Those : Kind -> Item ;
+
+

+We also add a noun which in Italian has the feminine case: +

+
+    fun Pizza : Kind ;
+
+

+This will force us to deal with gender- +

+

+ +

+

Agreement

+

+In English, the phrase-forming rule +

+
+    fun Is : Item -> Quality -> Phrase ;
+
+

+is affected by the number because of subject-verb agreement: +the verb of a sentence must be inflected in the number of the subject, +

+
+    Is (This Pizza) Warm   ===>  "this pizza is warm"
+    Is (These Pizza) Warm  ===>  "these pizzas are warm"
+
+

+It is the copula (the verb be) that is affected: +

+
+    oper copula : Number -> Str = \n -> 
+      case n of {
+        Sg => "is" ;
+        Pl => "are"
+        } ;
+
+

+The subject Item must have such a number to provide to the copula: +

+
+    lincat Item = {s : Str ; n : Number} ;
+
+

+Now we can write +

+
+    lin Is item qual = {s = item.s ++ copula item.n ++ qual.s} ;
+
+

+

+ +

+

Determiners

+

+How does an Item subject receive its number? The rules +

+
+    fun This, These : Kind -> Item ;
+
+

+add determiners, either this or these, which +require different this pizza vs. +these pizzas. +

+

+Thus Kind must have both singular and plural forms: +

+
+    lincat Kind = {s : Number => Str} ;
+
+

+We can write +

+
+    lin This kind = {
+      s = "this" ++ kind.s ! Sg ; 
+      n = Sg
+    } ; 
+  
+    lin These kind = {
+      s = "these" ++ kind.s ! Pl ; 
+      n = Pl
+    } ; 
+
+

+

+ +

+

+To avoid copy-and-paste, we can factor out the pattern of determination, +

+
+    oper det : 
+      Str -> Number -> {s : Number => Str} -> {s : Str ; n : Number} = 
+        \det,n,kind -> {
+        s = det ++ kind.s ! n ; 
+        n = n
+      } ; 
+
+

+Now we can write +

+
+    lin This  = det Sg "this" ;
+    lin These = det Pl "these" ;
+
+

+In a more lexicalized grammar, determiners would be a category: +

+
+    lincat Det = {s : Str ; n : Number} ;
+    fun Det : Det -> Kind -> Item ;
+    lin Det det kind = {
+        s = det.s ++ kind.s ! det.n ; 
+        n = det.n
+      } ; 
+
+

+

+ +

+

Parametric vs. inherent features

+

+Kinds have number as a parametric feature: both singular and plural +can be formed, +

+
+    lincat Kind = {s : Number => Str} ;
+
+

+Items have number as an inherent feature: they are inherently either +singular or plural, +

+
+    lincat Item = {s : Str ; n : Number} ;
+
+

+Italian Kind will have parametric number and inherent gender: +

+
+    lincat Kind = {s : Number => Str ; g : Gender} ;
+
+

+

+ +

+

+Questions to ask when designing parameters: +

+
    +
  • existence: what forms are possible to build by morphological and + other means? +
  • need: what features are expected via agreement or government? +
+ +

+Dictionaries give good advice: +

+uomo, pl. uomini, n.m. "man" +
+tells that uomo is a masculine noun with the plural form uomini. +Hence, parametric number and an inherent gender. +

+

+For words, inherent features are usually given as lexical information. +

+

+For combinations, they are inherited from some part of the construction +(typically the one called the head). Italian modification: +

+
+    lin QKind qual kind = 
+      let gen = kind.g in {
+        s = table {n => kind.s ! n ++ qual.s ! gen ! n} ;
+        g = gen
+        } ;
+
+

+Notice +

+
    +
  • local definition (let expression) +
  • variable pattern n +
+ +

+ +

+

An English concrete syntax for Foods with parameters

+

+We use some string operations from the library Prelude are used. +

+
+     concrete FoodsEng of Foods = open Prelude in {
+  
+    lincat
+      S, Quality = SS ; 
+      Kind = {s : Number => Str} ; 
+      Item = {s : Str ; n : Number} ; 
+  
+    lin
+      Is item quality = ss (item.s ++ copula item.n ++ quality.s) ;
+      This  = det Sg "this" ;
+      That  = det Sg "that" ;
+      These = det Pl "these" ;
+      Those = det Pl "those" ;
+      QKind quality kind = {s = table {n => quality.s ++ kind.s ! n}} ;
+      Wine = regNoun "wine" ;
+      Cheese = regNoun "cheese" ;
+      Fish = noun "fish" "fish" ;
+      Pizza = regNoun "pizza" ;
+      Very = prefixSS "very" ;
+      Fresh = ss "fresh" ;
+      Warm = ss "warm" ;
+      Italian = ss "Italian" ;
+      Expensive = ss "expensive" ;
+      Delicious = ss "delicious" ;
+      Boring = ss "boring" ;
+
+

+

+ +

+
+    param
+      Number = Sg | Pl ;
+  
+    oper
+      det : Number -> Str -> {s : Number => Str} -> {s : Str ; n : Number} = 
+        \n,d,cn -> {
+          s = d ++ cn.s ! n ;
+          n = n
+        } ;
+      noun : Str -> Str -> {s : Number => Str} = 
+        \man,men -> {s = table {
+          Sg => man ;
+          Pl => men 
+          }
+        } ;
+      regNoun : Str -> {s : Number => Str} = 
+        \car -> noun car (car + "s") ;
+      copula : Number -> Str = 
+        \n -> case n of {
+          Sg => "is" ;
+          Pl => "are"
+          } ;
+    }    
+
+

+

+ +

+

More on inflection paradigms

+

+ +

+

+Let us extend the English noun paradigms so that we can +deal with all nouns, not just the regular ones. The goal is to +provide a morphology module that makes it easy to +add words to a lexicon. +

+

+ +

+

Worst-case functions

+

+We perform data abstraction from the type +of nouns by writing a a worst-case function: +

+
+    oper Noun : Type = {s : Number => Str} ;
+  
+    oper mkNoun : Str -> Str -> Noun = \x,y -> {
+      s = table {
+        Sg => x ;
+        Pl => y
+        }
+      } ;
+  
+    oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ;
+
+

+Then we can define +

+
+    lincat N = Noun ;
+    lin Mouse = mkNoun "mouse" "mice" ;
+    lin House = regNoun "house" ;
+
+

+where the underlying types are not seen. +

+

+ +

+

+We are free to change the undelying definitions, e.g. +add case (nominative or genitive) to noun inflection: +

+
+    param Case = Nom | Gen ;
+  
+    oper Noun : Type = {s : Number => Case => Str} ;
+
+

+Now we have to redefine the worst-case function +

+
+    oper mkNoun : Str -> Str -> Noun = \x,y -> {
+      s = table {
+        Sg => table {
+          Nom => x ;
+          Gen => x + "'s"
+          } ;
+        Pl => table {
+          Nom => y ;
+          Gen => y + case last y of {
+            "s" => "'" ;
+            _   => "'s"
+          }
+        }
+      } ;
+
+

+But up from this level, we can retain the old definitions +

+
+    lin Mouse = mkNoun "mouse" "mice" ;
+    oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ;
+
+

+

+ +

+

+In the last definition of mkNoun, we used a case expression +on the last character of the plural, as well as the Prelude +operation +

+
+    last : Str -> Str ;
+
+

+returning the string consisting of the last character. +

+

+The case expression uses pattern matching over strings, which +is supported in GF, alongside with pattern matching over +parameters. +

+

+ +

+

Smart paradigms

+

+The regular dog-dogs paradigm has +predictable variations: +

+
    +
  • nouns ending with an y: fly-flies, except if + a vowel precedes the y: boy-boys +
  • nouns ending with s, ch, and a number of + other endings: bus-buses, leech-leeches +
+ +

+We could provide alternative paradigms: +

+
+    noun_y : Str -> Noun = \fly -> mkNoun fly (init fly + "ies") ;  
+    noun_s : Str -> Noun = \bus -> mkNoun bus (bus + "es") ;
+
+

+(The Prelude function init drops the last character of a token.) +

+

+Drawbacks: +

+
    +
  • it can be difficult to select the correct paradigm +
  • it can be difficult to remember the names of the different paradigms +
+ +

+ +

+

+Better solution: a smart paradigm: +

+
+    regNoun : Str -> Noun = \w -> 
+      let 
+        ws : Str = case w of {
+          _ + ("a" | "e" | "i" | "o") + "o" => w + "s" ;  -- bamboo
+          _ + ("s" | "x" | "sh" | "o")      => w + "es" ; -- bus, hero
+          _ + "z"                           => w + "zes" ;-- quiz 
+          _ + ("a" | "e" | "o" | "u") + "y" => w + "s" ;  -- boy
+          x + "y"                           => x + "ies" ;-- fly
+          _                                 => w + "s"    -- car
+          } 
+      in 
+      mkNoun w ws
+
+

+GF has regular expression patterns: +

+
    +
  • disjunctive patterns P | Q +
  • concatenation patterns P + Q +
+ +

+The patterns are ordered in such a way that, for instance, +the suffix "oo" prevents bamboo from matching the suffix +"o". +

+

+ +

+

Exercises on regular patterns

+
    +
  1. The same rules that form plural nouns in English also +apply in the formation of third-person singular verbs. +Write a regular verb paradigm that uses this idea, but first +rewrite regNoun so that the analysis needed to build s-forms +is factored out as a separate oper, which is shared with +regVerb. +

    +
  2. Extend the verb paradigms to cover all verb forms +in English, with special care taken of variations with the suffix +ed (e.g. try-tried, use-used). +

    +
  3. Implement the German Umlaut operation on word stems. +The operation changes the vowel of the stressed stem syllable as follows: +a to , au to u, o to , and u to . You +can assume that the operation only takes syllables as arguments. Test the +operation to see whether it correctly changes Arzt to rzt, +Baum to Bum, Topf to Tpf, and Kuh to Kh. +
+ +

+ +

+

Function types with variables

+

+In Lesson 5, dependent function types need a notation +that binds a variable to the argument type, as in +

+
+    switchOff : (k : Kind) -> Action k
+
+

+Function types without variables are actually a shorthand: +

+
+    PredVP : NP -> VP -> S
+
+

+means +

+
+    PredVP : (x : NP) -> (y : VP) -> S
+
+

+or any other naming of the variables. +

+

+ +

+

+Sometimes variables shorten the code, since they can share a type: +

+
+    octuple : (x,y,z,u,v,w,s,t : Str) -> Str
+
+

+If a bound variable is not used, it can be replaced by a wildcard: +

+
+    octuple : (_,_,_,_,_,_,_,_ : Str) -> Str
+
+

+A good practice is to indicate the number of arguments: +

+
+    octuple : (x1,_,_,_,_,_,_,x8 : Str) -> Str
+
+

+For inflection paradigms, it is handy to use heuristic variable names, +looking like the expected forms: +

+
+    mkNoun : (mouse,mice : Str) -> Noun
+
+

+

+ +

+

Separating operation types and definitions

+

+In librarues, it is useful to group type signatures separately from +definitions. It is possible to divide an oper judgement, +

+
+    oper regNoun : Str -> Noun ;
+    oper regNoun s = mkNoun s (s + "s") ;
+
+

+and put the parts in different places. +

+

+With the interface and instance module types +(see here): the parts can even be put to different files. +

+

+ +

+

Overloading of operations

+

+Overloading: different functions can be given the same name, as e.g. in C++. +

+

+The compiler performs overload resolution, which works as long as the +functions have different types. +

+

+In GF, the functions must be grouped together in overload groups. +

+

+Example: different ways to define nouns in English: +

+
+    oper mkN : overload {
+      mkN : (dog : Str) -> Noun ;         -- regular nouns
+      mkN : (mouse,mice : Str) -> Noun ;  -- irregular nouns
+    }
+
+

+Cf. dictionaries: if the +word is regular, just one form is needed. If it is irregular, +more forms are given. +

+

+The definition can be given separately, or at the same time, as the types: +

+
+    oper mkN = overload {
+      mkN : (dog : Str) -> Noun = regNoun ;
+      mkN : (mouse,mice : Str) -> Noun = mkNoun ;
+    }
+
+

+Exercise. Design a system of English verb paradigms presented by +an overload group. +

+

+ +

+

Morphological analysis and morphology quiz

+

+The command morpho_analyse = ma +can be used to read a text and return for each word its analyses +(in the current grammar): +

+
+    > read_file bible.txt | morpho_analyse
+
+

+The command morpho_quiz = mq generates inflection exercises. +

+
+    % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo
+  
+    > morpho_quiz -cat=V
+  
+    Welcome to GF Morphology Quiz.
+    ...
+  
+    rapparatre : VFin VCondit  Pl  P2
+    rapparaitriez
+    > No, not rapparaitriez, but
+    rapparatriez
+    Score 0/1
+
+

+To create a list for later use, use the command morpho_list = ml +

+
+    > morpho_list -number=25 -cat=V | write_file exx.txt
+
+

+

+ +

+

The Italian Foods grammar

+

+ +

+

+Parameters include not only number but also gender. +

+
+  concrete FoodsIta of Foods = open Prelude in {
+  
+    param
+      Number = Sg | Pl ;
+      Gender = Masc | Fem ;
+
+

+Qualities are inflected for gender and number, whereas kinds +have a parametric number and an inherent gender. +Items have an inherent number and gender. +

+
+    lincat
+      Phr = SS ; 
+      Quality = {s : Gender => Number => Str} ; 
+      Kind = {s : Number => Str ; g : Gender} ; 
+      Item = {s : Str ; g : Gender ; n : Number} ; 
+
+

+

+ +

+

+A Quality is an adjective, with one form for each gender-number combination. +

+
+    oper
+      adjective : (_,_,_,_ : Str) -> {s : Gender => Number => Str} = 
+        \nero,nera,neri,nere -> {
+          s = table {
+            Masc => table {
+              Sg => nero ;
+              Pl => neri
+              } ; 
+            Fem => table {
+              Sg => nera ;
+              Pl => nere
+              }
+            }
+        } ;
+
+

+Regular adjectives work by adding endings to the stem. +

+
+      regAdj : Str -> {s : Gender => Number => Str} = \nero ->
+        let ner = init nero 
+        in adjective nero (ner + "a") (ner + "i") (ner + "e") ;
+
+

+

+ +

+

+For noun inflection, we are happy to give the two forms and the gender +explicitly: +

+
+      noun : Str -> Str -> Gender -> {s : Number => Str ; g : Gender} = 
+        \vino,vini,g -> {
+          s = table {
+            Sg => vino ;
+            Pl => vini
+            } ;
+          g = g
+        } ;
+
+

+We need only number variation for the copula. +

+
+      copula : Number -> Str = 
+        \n -> case n of {
+          Sg => "" ;
+          Pl => "sono"
+          } ;
+
+

+

+ +

+

+Determination is more complex than in English, because of gender: +

+
+      det : Number -> Str -> Str -> {s : Number => Str ; g : Gender} -> 
+          {s : Str ; g : Gender ; n : Number} = 
+        \n,m,f,cn -> {
+          s = case cn.g of {Masc => m ; Fem => f} ++ cn.s ! n ;
+          g = cn.g ;
+          n = n
+        } ;
+
+

+

+ +

+

+The complete set of linearization rules: +

+
+    lin
+      Is item quality = 
+        ss (item.s ++ copula item.n ++ quality.s ! item.g ! item.n) ;
+      This  = det Sg "questo" "questa" ;
+      That  = det Sg "quel"   "quella" ;
+      These = det Pl "questi" "queste" ;
+      Those = det Pl "quei"   "quelle" ;
+      QKind quality kind = {
+        s = \\n => kind.s ! n ++ quality.s ! kind.g ! n ;
+        g = kind.g
+        } ;
+      Wine = noun "vino" "vini" Masc ;
+      Cheese = noun "formaggio" "formaggi" Masc ;
+      Fish = noun "pesce" "pesci" Masc ;
+      Pizza = noun "pizza" "pizze" Fem ;
+      Very qual = {s = \\g,n => "molto" ++ qual.s ! g ! n} ;
+      Fresh = adjective "fresco" "fresca" "freschi" "fresche" ;
+      Warm = regAdj "caldo" ;
+      Italian = regAdj "italiano" ;
+      Expensive = regAdj "caro" ;
+      Delicious = regAdj "delizioso" ;
+      Boring = regAdj "noioso" ;
+    }
+
+

+

+ +

+

Exercises on using parameters

+
    +
  1. Experiment with multilingual generation and translation in the +Foods grammars. +

    +
  2. Add items, qualities, and determiners to the grammar, +and try to get their inflection and inherent features right. +

    +
  3. Write a concrete syntax of Food for a language of your choice, +now aiming for complete grammatical correctness by the use of parameters. +

    +
  4. Measure the size of the context-free grammar corresponding to +FoodsIta. You can do this by printing the grammar in the context-free format +(print_grammar -printer=bnf) and counting the lines. +
+ +

+ +

+

Discontinuous constituents

+

+A linearization record may contain more strings than one, and those +strings can be put apart in linearization. +

+

+Example: English particle +verbs, (switch off). The object can appear between: +

+

+he switched it off +

+

+The verb switch off is called a +discontinuous constituents. +

+

+We can define transitive verbs and their combinations as follows: +

+
+    lincat TV = {s : Number => Str ; part : Str} ;
+  
+    fun AppTV : Item -> TV -> Item -> Phrase ;
+  
+    lin AppTV subj tv obj = 
+      {s = subj.s ++ tv.s ! subj.n ++ obj.s ++ tv.part} ;
+
+

+

+Exercise. Define the language a^n b^n c^n in GF, i.e. +any number of a's followed by the same number of b's and +the same number of c's. This language is not context-free, +but can be defined in GF by using discontinuous constituents. +

+

+ +

+

Strings at compile time vs. run time

+

+Tokens are created in the following ways: +

+
    +
  • quoted string: "foo" +
  • gluing : t + s +
  • predefined operations init, tail, tk, dp +
  • pattern matching over strings +
+ +

+Since tokens must be known at compile time, +the above operations may not be applied to run-time variables +(i.e. variables that stand for function arguments in linearization rules). +

+

+Hence it is not legal to write +

+
+    cat Noun ;
+    fun Plural : Noun -> Noun ;
+    lin Plural n = {s = n.s + "s"} ;
+
+

+because n is a run-time variable. Also +

+
+    lin Plural n = {s = (regNoun n).s ! Pl} ; 
+
+

+is incorrect with regNoun as defined here, because the run-time +variable is eventually sent to string pattern matching and gluing. +

+

+ +

+

+How to write tokens together without a space? +

+
+    lin Question p = {s = p + "?"} ;
+
+

+is incorrect. +

+

+The way to go is to use an unlexer that creates correct spacing +after linearization. +

+

+Correspondingly, a lexer that e.g. analyses "warm?" into +to tokens is needed before parsing. +This topic will be covered in here. +

+

+ +

+

Supplementary constructs for concrete syntax

+

Record extension and subtyping

+

+The symbol ** is used for both record types and record objects. +

+
+    lincat TV = Verb ** {c : Case} ;
+  
+    lin Follow = regVerb "folgen" ** {c = Dative} ; 
+
+

+TV becomes a subtype of Verb. +

+

+If T is a subtype of R, an object of T can be used whenever +an object of R is required. +

+

+Covariance: a function returning a record T as value can +also be used to return a value of a supertype R. +

+

+Contravariance: a function taking an R as argument +can also be applied to any object of a subtype T. +

+

+ +

+

Tuples and product types

+

+Product types and tuples are syntactic sugar for record types and records: +

+
+    T1 * ... * Tn   ===   {p1 : T1 ; ... ; pn : Tn}
+    <t1, ...,  tn>  ===   {p1 = T1 ; ... ; pn = Tn}
+
+

+Thus the labels p1, p2,... are hard-coded. +

+

+ +

+

Prefix-dependent choices

+

+English indefinite article: +

+
+    oper artIndef : Str = 
+      pre {"a" ; "an" / strs {"a" ; "e" ; "i" ; "o"}} ;
+
+

+Thus +

+
+    artIndef ++ "cheese"  --->  "a" ++ "cheese"
+    artIndef ++ "apple"   --->  "an" ++ "apple"
+
+

+

+ +

+

Lesson 4: Using the resource grammar library

+

+ +

+

+Goals: +

+
    +
  • navigate in the GF resource grammar library and use it in applications +
  • get acquainted with basic linguistic categories +
  • write functors to achieve maximal sharing of code in multilingual grammars +
+ +

+ +

+

The coverage of the library

+

+The current 12 resource languages are +

+
    +
  • Bulgarian +
  • Catalan +
  • Danish +
  • English +
  • Finnish +
  • French +
  • German +
  • Italian +
  • Norwegian +
  • Russian +
  • Spanish +
  • Swedish +
+ +

+The first three letters (Eng etc) are used in grammar module names +(ISO 639 standard). +

+

+ +

+

The structure of the library

+

+ +

+

+Semantic grammars (up to now in this tutorial): +a grammar defines a system of meanings (abstract syntax) and +tells how they are expressed(concrete syntax). +

+

+Resource grammars (as usual in linguistic tradition): +a grammar specifies the grammatically correct combinations of words, +whatever their meanings are. +

+

+With resource grammars, we can achieve a +wider coverage than with semantic grammars. +

+

+ +

+

Lexical vs. phrasal rules

+

+A resource grammar has two kinds of categories and two kinds of rules: +

+
    +
  • lexical: +
      +
    • lexical categories, to classify words +
    • lexical rules, to define words and their properties +

      +
    +
  • phrasal (combinatorial, syntactic): +
      +
    • phrasal categories, to classify phrases of arbitrary size +
    • phrasal rules, to combine phrases into larger phrases +
    +
+ +

+GE makes no formal distinction between these two kinds. +

+

+But it is a good discipline to follow. +

+

+ +

+

Lexical categories

+

+Two kinds of lexical categories: +

+
    +
  • closed: +
      +
    • a finite number of words +
    • seldom extended in the history of language +
    • structural words / function words, e.g. +
      +      Conj ;     -- conjunction           e.g. "and"
      +      QuantSg ;  -- singular quantifier   e.g. "this"
      +      QuantPl ;  -- plural quantifier     e.g. "this"
      +
      +

      +
    +
  • open: +
      +
    • new words are added all the time +
    • content words, e.g. +
      +      N ;        -- noun         e.g. "pizza"
      +      A ;        -- adjective    e.g. "good"
      +      V ;        -- verb         e.g. "sleep"
      +
      +
    +
+ +

+ +

+

Lexical rules

+

+Closed classes: module Syntax. In the Foods grammar, we need +

+
+    this_QuantSg, that_QuantSg : QuantSg ; 
+    these_QuantPl, those_QuantPl : QuantPl ; 
+    very_AdA  : AdA ;
+
+

+Naming convention: word followed by the category (so we can +distinguish the quantifier that from the conjunction that). +

+

+Open classes have no objects in Syntax. Words are +built as they are needed in applications: if we have +

+
+    fun Wine : Kind ;
+
+

+we will define +

+
+    lin Wine = mkN "wine" ;
+
+

+where we use mkN from ParadigmsEng: +

+

+ +

+

Resource lexicon

+

+Alternative concrete syntax for +

+
+    fun Wine : Kind ;
+
+

+is to provide a resource lexicon, which contains definitions such as +

+
+    oper wine_N : N = mkN "wine" ;
+
+

+so that we can write +

+
+    lin Wine = wine_N ;
+
+

+Advantages: +

+
    +
  • we accumulate a reusable lexicon +
  • we can use a here to speed up multilingual grammar implementation +
+ +

+ +

+

Phrasal categories

+

+In Foods, we need just four phrasal categories: +

+
+    Cl ;   -- clause             e.g. "this pizza is good"
+    NP ;   -- noun phrase        e.g. "this pizza"
+    CN ;   -- common noun        e.g. "warm pizza"
+    AP ;   -- adjectival phrase  e.g. "very warm"
+
+

+Clauses are similar to sentences (S), but without a +fixed tense and mood; see here for how they relate. +

+

+Common nouns are made into noun phrases by adding determiners. +

+

+ +

+

Syntactic combinations

+

+We need the following combinations: +

+
+    mkCl : NP -> AP -> Cl ;      -- e.g. "this pizza is very warm"
+    mkNP : QuantSg -> CN -> NP ; -- e.g. "this pizza" 
+    mkNP : QuantPl -> CN -> NP ; -- e.g. "these pizzas"
+    mkCN : AP -> CN -> CN ;      -- e.g. "warm pizza"
+    mkAP : AdA -> AP -> AP ;     -- e.g. "very warm" 
+
+

+We also need lexical insertion, to form phrases from single words: +

+
+    mkCN : N -> NP ;
+    mkAP : A -> AP ;
+
+

+Naming convention: to construct a C, use a function mkC. +

+

+Heavy overloading: the current library +(version 1.2) has 23 operations named mkNP! +

+

+ +

+

Example syntactic combination

+

+The sentence +

+these very warm pizzas are Italian +
+can be built as follows: +

+
+    mkCl 
+      (mkNP these_QuantPl 
+         (mkCN (mkAP very_AdA (mkAP warm_A)) (mkCN pizza_CN)))
+      (mkAP italian_AP) 
+
+

+The task now: to define the concrete syntax of Foods so that +this syntactic tree gives the value of linearizing the semantic tree +

+
+    Is (These (QKind (Very Warm) Pizza)) Italian
+
+

+

+ +

+

The resource API

+

+Language-specific and language-independent parts - roughly, +

+
    +
  • the syntax API SyntaxL has the same types and + functions for all languages L +
  • the morphology API ParadigmsL has partly + different types and functions + for different languages L +
+ +

+Full API documentation on-line: the resource synopsis, +

+

+grammaticalframework.org/lib/resource/doc/synopsis.html +

+

+ +

+

A miniature resource API: categories

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CategoryExplanationExample
Clclause (sentence), with all tensesshe looks at this
APadjectival phrasevery warm
CNcommon noun (without determiner)red house
NPnoun phrase (subject or object)the red house
AdAadjective-modifying adverb,very
QuantSgsingular quantifierthese
QuantPlplural quantifierthis
Aone-place adjectivewarm
Ncommon nounhouse
+ +

+ +

+

A miniature resource API: rules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionTypeExample
mkClNP -> AP -> ClJohn is very old
mkNPQuantSg -> CN -> NPthis old man
mkNPQuantPl -> CN -> NPthese old man
mkCNN -> CNhouse
mkCNAP -> CN -> CNvery big blue house
mkAPA -> APold
mkAPAdA -> AP -> APvery very old
+ +

+ +

+

A miniature resource API: structural words

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionTypeIn English
this_QuantSgQuantSgthis
that_QuantSgQuantSgthat
these_QuantPlQuantPlthis
those_QuantPlQuantPlthat
very_AdAAdAvery
+ +

+ +

+

A miniature resource API: paradigms

+

+From ParadigmsEng: +

+ + + + + + + + + + + + + + + + + +
FunctionType
mkN(dog : Str) -> N
mkN(man,men : Str) -> N
mkA(cold : Str) -> A
+ +

+From ParadigmsIta: +

+ + + + + + + + + + + + + +
FunctionType
mkN(vino : Str) -> N
mkA(caro : Str) -> A
+ +

+ +

+

A miniature resource API: more paradigms

+

+From ParadigmsGer: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionType
GenderType
masculineGender
feminineGender
neuterGender
mkN(Stufe : Str) -> N
mkN(Bild,Bilder : Str) -> Gender -> N
mkA(klein : Str) -> A
mkA(gut,besser,beste : Str) -> A
+ +

+From ParadigmsFin: +

+ + + + + + + + + + + + + +
FunctionType
mkN(talo : Str) -> N
mkA(hieno : Str) -> A
+ +

+ +

+

Exercises

+

+1. Try out the morphological paradigms in different languages. Do +as follows: +

+
+    > i -path=alltenses -retain alltenses/ParadigmsGer.gfo
+    > cc -table mkN "Farbe"
+    > cc -table mkA "gut" "besser" "beste"
+
+

+

+ +

+

Example: English

+

+ +

+

+We assume the abstract syntax Foods from Lesson 3. +

+

+We don't need to think about inflection and agreement, but just pick +functions from the resource grammar library. +

+

+We need a path with +

+
    +
  • the current directory . +
  • the directory ../foods, in which Foods.gf resides. +
  • the library directory present, which is relative to the + environment variable GF_LIB_PATH +
+ +

+Thus the beginning of the module is +

+
+    --# -path=.:../foods:present
+  
+    concrete FoodsEng of Foods = open SyntaxEng,ParadigmsEng in {
+
+

+

+ +

+

English example: linearization types and combination rules

+

+As linearization types, we use clauses for Phrase, noun phrases +for Item, common nouns for Kind, and adjectival phrases for Quality. +

+
+    lincat
+      Phrase = Cl ; 
+      Item = NP ;
+      Kind = CN ;
+      Quality = AP ;
+
+

+Now the combination rules we need almost write themselves automatically: +

+
+    lin
+      Is item quality = mkCl item quality ;
+      This kind = mkNP this_QuantSg kind ;
+      That kind = mkNP that_QuantSg kind ;
+      These kind = mkNP these_QuantPl kind ;
+      Those kind = mkNP those_QuantPl kind ;
+      QKind quality kind = mkCN quality kind ;
+      Very quality = mkAP very_AdA quality ;
+
+

+

+ +

+

English example: lexical rules

+

+We use resource paradigms and lexical insertion rules. +

+

+The two-place noun paradigm is needed only once, for +fish - everythins else is regular. +

+
+      Wine = mkCN (mkN "wine") ;
+      Pizza = mkCN (mkN "pizza") ;
+      Cheese = mkCN (mkN "cheese") ;
+      Fish = mkCN (mkN "fish" "fish") ;
+      Fresh = mkAP (mkA "fresh") ;
+      Warm = mkAP (mkA "warm") ;
+      Italian = mkAP (mkA "Italian") ;
+      Expensive = mkAP (mkA "expensive") ;
+      Delicious = mkAP (mkA "delicious") ;
+      Boring = mkAP (mkA "boring") ;
+    }
+
+

+

+ +

+

English example: exercises

+

+1. Compile the grammar FoodsEng and generate +and parse some sentences. +

+

+2. Write a concrete syntax of Foods for Italian +or some other language included in the resource library. You can +compare the results with the hand-written +grammars presented earlier in this tutorial. +

+

+ +

+

Functor implementation of multilingual grammars

+

+ +

+

New language by copy and paste

+

+If you write a concrete syntax of Foods for some other +language, much of the code will look exactly the same +as for English. This is because +

+
    +
  • the Syntax API is the same for all languages (because + all languages in the resource package do implement the same + syntactic structures) +
  • languages tend to use the syntactic structures in similar ways +
+ +

+But lexical rules are more language-dependent. +

+

+Thus, to port a grammar to a new language, you +

+
    +
  1. copy the concrete syntax of a given language +
  2. change the words (strings and inflection paradigms) +
+ +

+Can we avoid this programming by copy-and-paste? +

+

+ +

+

Functors: functions on the module level

+

+Functors familiar from the functional programming languages ML and OCaml, +also known as parametrized modules. +

+

+In GF, a functor is a module that opens one or more interfaces. +

+

+An interface is a module similar to a resource, but it only +contains the types of opers, not (necessarily) their definitions. +

+

+Syntax for functors: add the keyword incomplete. We will use the header +

+
+    incomplete concrete FoodsI of Foods = open Syntax, LexFoods in
+
+

+where +

+
+    interface Syntax    -- the resource grammar interface
+    interface LexFoods  -- the domain lexicon interface
+
+

+When we moreover have +

+
+    instance SyntaxEng of Syntax     -- the English resource grammar
+    instance LexFoodsEng of LexFoods -- the English domain lexicon
+
+

+we can write a functor instantiation, +

+
+    concrete FoodsGer of Foods = FoodsI with 
+      (Syntax = SyntaxGer),
+      (LexFoods = LexFoodsGer) ;
+
+

+

+ +

+

Code for the Foods functor

+
+    --# -path=.:../foods
+  
+    incomplete concrete FoodsI of Foods = open Syntax, LexFoods in {
+    lincat
+      Phrase = Cl ; 
+      Item = NP ;
+      Kind = CN ;
+      Quality = AP ;
+    lin
+      Is item quality = mkCl item quality ;
+      This kind = mkNP this_QuantSg kind ;
+      That kind = mkNP that_QuantSg kind ;
+      These kind = mkNP these_QuantPl kind ;
+      Those kind = mkNP those_QuantPl kind ;
+      QKind quality kind = mkCN quality kind ;
+      Very quality = mkAP very_AdA quality ;
+  
+      Wine = mkCN wine_N ;
+      Pizza = mkCN pizza_N ;
+      Cheese = mkCN cheese_N ;
+      Fish = mkCN fish_N ;
+      Fresh = mkAP fresh_A ;
+      Warm = mkAP warm_A ;
+      Italian = mkAP italian_A ;
+      Expensive = mkAP expensive_A ;
+      Delicious = mkAP delicious_A ;
+      Boring = mkAP boring_A ;
+    }
+
+

+

+ +

+

Code for the LexFoods interface

+

+ +

+
+    interface LexFoods = open Syntax in {
+    oper
+      wine_N : N ;
+      pizza_N : N ;
+      cheese_N : N ;
+      fish_N : N ;
+      fresh_A : A ;
+      warm_A : A ;
+      italian_A : A ;
+      expensive_A : A ;
+      delicious_A : A ;
+      boring_A : A ;
+    }
+
+

+

+ +

+

Code for a German instance of the lexicon

+
+    instance LexFoodsGer of LexFoods = open SyntaxGer, ParadigmsGer in {
+    oper
+      wine_N = mkN "Wein" ;
+      pizza_N = mkN "Pizza" "Pizzen" feminine ;
+      cheese_N = mkN "Kse" "Ksen" masculine ;
+      fish_N = mkN "Fisch" ;
+      fresh_A = mkA "frisch" ;
+      warm_A = mkA "warm" "wrmer" "wrmste" ;
+      italian_A = mkA "italienisch" ;
+      expensive_A = mkA "teuer" ;
+      delicious_A = mkA "kstlich" ;
+      boring_A = mkA "langweilig" ;
+    }
+
+

+

+ +

+

Code for a German functor instantiation

+
+    --# -path=.:../foods:present
+  
+    concrete FoodsGer of Foods = FoodsI with 
+      (Syntax = SyntaxGer),
+      (LexFoods = LexFoodsGer) ;
+
+

+

+ +

+

Adding languages to a functor implementation

+

+Just two modules are needed: +

+
    +
  • a domain lexicon instance +
  • a functor instantiation +
+ +

+The functor instantiation is completely mechanical to write. +

+

+The domain lexicon instance requires some knowledge of the words of the +language: +

+
    +
  • what words are used for which concepts +
  • how the words are +
  • features such as genders +
+ +

+ +

+

Example: adding Finnish

+

+Lexicon instance +

+
+    instance LexFoodsFin of LexFoods = open SyntaxFin, ParadigmsFin in {
+    oper
+      wine_N = mkN "viini" ;
+      pizza_N = mkN "pizza" ;
+      cheese_N = mkN "juusto" ;
+      fish_N = mkN "kala" ;
+      fresh_A = mkA "tuore" ;
+      warm_A = mkA "lmmin" ;
+      italian_A = mkA "italialainen" ;
+      expensive_A = mkA "kallis" ;
+      delicious_A = mkA "herkullinen" ;
+      boring_A = mkA "tyls" ;
+    }
+
+

+Functor instantiation +

+
+    --# -path=.:../foods:present
+  
+    concrete FoodsFin of Foods = FoodsI with 
+      (Syntax = SyntaxFin),
+      (LexFoods = LexFoodsFin) ;
+
+

+

+ +

+

A design pattern

+

+This can be seen as a design pattern for multilingual grammars: +

+
+                        concrete DomainL*
+  
+      instance LexDomainL                 instance SyntaxL*
+     
+                   incomplete concrete DomainI
+                   /           |              \               
+     interface LexDomain   abstract Domain    interface Syntax*
+
+

+Modules marked with * are either given in the library, or trivial. +

+

+Of the hand-written modules, only LexDomainL is language-dependent. +

+

+ +

+

Functors: exercises

+

+1. Compile and test FoodsGer. +

+

+2. Refactor FoodsEng into a functor instantiation. +

+

+3. Instantiate the functor FoodsI to some language of +your choice. +

+

+4. Design a small grammar that can be used for controlling +an MP3 player. The grammar should be able to recognize commands such +as play this song, with the following variations: +

+
    +
  • verbs: play, remove +
  • objects: song, artist +
  • determiners: this, the previous +
  • verbs without arguments: stop, pause +
+ +

+The implementation goes in the following phases: +

+
    +
  1. abstract syntax +
  2. (optional:) prototype string-based concrete syntax +
  3. functor over resource syntax and lexicon interface +
  4. lexicon instance for the first language +
  5. functor instantiation for the first language +
  6. lexicon instance for the second language +
  7. functor instantiation for the second language +
  8. ... +
+ +

+ +

+

Restricted inheritance

+

A problem with functors

+

+Problem: a functor only works when all languages use the resource Syntax +in the same way. +

+

+Example (contrived): assume that English has +no word for Pizza, but has to use the paraphrase Italian pie. +This is no longer a noun N, but a complex phrase +in the category CN. +

+

+Possible solution: change interface the LexFoods with +

+
+    oper pizza_CN : CN ;
+
+

+Problem with this solution: +

+
    +
  • we may end up changing the interface and the function with each new language +
  • we must every time also change the instances for the old languages to maintain + type correctness +
+ +

+ +

+

Restricted inheritance: include or exclude

+

+A module may inherit just a selection of names. +

+

+Example: the FoodMarket example "Rsecarchitecture: +

+
+    abstract Foodmarket = Food, Fruit [Peach], Mushroom - [Agaric]
+
+

+Here, from Fruit we include Peach only, and from Mushroom +we exclude Agaric. +

+

+A concrete syntax of Foodmarket must make the analogous restrictions. +

+

+ +

+

The functor problem solved

+

+The English instantiation inherits the functor +implementation except for the constant Pizza. This constant +is defined in the body instead: +

+
+    --# -path=.:../foods:present
+  
+    concrete FoodsEng of Foods = FoodsI - [Pizza] with 
+      (Syntax = SyntaxEng),
+      (LexFoods = LexFoodsEng) ** 
+        open SyntaxEng, ParadigmsEng in {
+  
+      lin Pizza = mkCN (mkA "Italian") (mkN "pie") ;
+    }
+
+

+

+ +

+

Grammar reuse

+

+Abstract syntax modules can be used as interfaces, +and concrete syntaxes as their instances. +

+

+The following correspondencies are then applied: +

+
+    cat C         <--->  oper C : Type
+  
+    fun f : A     <--->  oper f : A
+  
+    lincat C = T  <--->  oper C : Type = T
+  
+    lin f = t     <--->  oper f : A = t
+
+

+

+ +

+

Library exercises

+

+1. Find resource grammar terms for the following +English phrases (in the category Phr). You can first try to +build the terms manually. +

+

+every man loves a woman +

+

+this grammar speaks more than ten languages +

+

+which languages aren't in the grammar +

+

+which languages did you want to speak +

+

+Then translate the phrases to other languages. +

+

+ +

+

Tenses

+

+ +

+

+In Foods grammars, we have used the path +

+
+    --# -path=.:../foods
+
+

+The library subdirectory present is a restricted version +of the resource, with only present tense of verbs and sentences. +

+

+By just changing the path, we get all tenses: +

+
+    --# -path=.:../foods:alltenses
+
+

+Now we can see all the tenses of phrases, by using the -all flag +in linearization: +

+
+    > gr | l -all
+    This wine is delicious
+    Is this wine delicious
+    This wine isn't delicious
+    Isn't this wine delicious
+    This wine is not delicious
+    Is this wine not delicious
+    This wine has been delicious
+    Has this wine been delicious
+    This wine hasn't been delicious
+    Hasn't this wine been delicious
+    This wine has not been delicious
+    Has this wine not been delicious
+    This wine was delicious
+    Was this wine delicious
+    This wine wasn't delicious
+    Wasn't this wine delicious
+    This wine was not delicious
+    Was this wine not delicious
+    This wine had been delicious
+    Had this wine been delicious
+    This wine hadn't been delicious
+    Hadn't this wine been delicious
+    This wine had not been delicious
+    Had this wine not been delicious
+    This wine will be delicious
+    Will this wine be delicious
+    This wine won't be delicious
+    Won't this wine be delicious
+    This wine will not be delicious
+    Will this wine not be delicious
+    This wine will have been delicious
+    Will this wine have been delicious
+    This wine won't have been delicious
+    Won't this wine have been delicious
+    This wine will not have been delicious
+    Will this wine not have been delicious
+    This wine would be delicious
+    Would this wine be delicious
+    This wine wouldn't be delicious
+    Wouldn't this wine be delicious
+    This wine would not be delicious
+    Would this wine not be delicious
+    This wine would have been delicious
+    Would this wine have been delicious
+    This wine wouldn't have been delicious
+    Wouldn't this wine have been delicious
+    This wine would not have been delicious
+    Would this wine not have been delicious
+
+

+We also see +

+
    +
  • polarity (positive vs. negative) +
  • word order (direct vs. inverted) +
  • variation between contracted and full negation +
+ +

+The list is even longer in languages that have more +tenses and moods, e.g. the Romance languages. +

+

+ +

+

Lesson 5: Refining semantics in abstract syntax

+

+ +

+

+Goals: +

+
    +
  • include semantic conditions in grammars, by using +
      +
    • dependent types +
    • higher order abstract syntax +
    • proof objects +
    • semantic definitions +

      +These concepts are inherited from type theory (more precisely: +constructive type theory, or Martin-Lf type theory). +

      +Type theory is the basis logical frameworks. +

      +GF = logical framework + concrete syntax. +
    +
+ +

+ +

+

Dependent types

+

+ +

+

+Problem: to express conditions of semantic well-formedness. +

+

+Example: a voice command system for a "smart house" wants to +eliminate meaningless commands. +

+

+Thus we want to restrict particular actions to +particular devices - we can dim a light, but we cannot +dim a fan. +

+

+The following example is borrowed from the +Regulus Book (Rayner & al. 2006). +

+

+A simple example is a "smart house" system, which +defines voice commands for household appliances. +

+

+ +

+

A dependent type system

+

+Ontology: +

+
    +
  • there are commands and device kinds +
  • for each kind of device, there are devices and actions +
  • a command concerns an action of some kind on a device of the same kind +
+ +

+Abstract syntax formalizing this: +

+
+    cat
+      Command ;
+      Kind ; 
+      Device Kind ; -- argument type Kind 
+      Action Kind ; 
+    fun 
+      CAction : (k : Kind) -> Action k -> Device k -> Command ;
+
+

+Device and Action are both dependent types. +

+

+ +

+

Examples of devices and actions

+

+Assume the kinds light and fan, +

+
+    light, fan : Kind ;
+    dim : Action light ;
+
+

+Given a kind, k, you can form the device the k. +

+
+    DKindOne  : (k : Kind) -> Device k ;  -- the light
+
+

+Now we can form the syntax tree +

+
+    CAction light dim (DKindOne light)
+
+

+but we cannot form the trees +

+
+    CAction light dim (DKindOne fan)
+    CAction fan   dim (DKindOne light)
+    CAction fan   dim (DKindOne fan)
+
+

+

+ +

+

Linearization and parsing with dependent types

+

+Concrete syntax does not know if a category is a dependent type. +

+
+    lincat Action = {s : Str} ;
+    lin CAction _ act dev = {s = act.s ++ dev.s} ; 
+
+

+Notice that the Kind argument is suppressed in linearization. +

+

+Parsing with dependent types is performed in two phases: +

+
    +
  1. context-free parsing +
  2. filtering through type checker +
+ +

+By just doing the first phase, the kind argument is not found: +

+
+    > parse "dim the light"
+    CAction ? dim (DKindOne light)
+
+

+Moreover, type-incorrect commands are not rejected: +

+
+    > parse "dim the fan"
+    CAction ? dim (DKindOne fan)
+
+

+The term ? is a metavariable, returned by the parser +for any subtree that is suppressed by a linearization rule. +These are the same kind of metavariables as were used here +to mark incomplete parts of trees in the syntax editor. +

+

+ +

+

Solving metavariables

+

+Use the command put_tree = pt with the option -typecheck: +

+
+    > parse "dim the light" | put_tree -typecheck
+    CAction light dim (DKindOne light)
+
+

+The typecheck process may fail, in which case an error message +is shown and no tree is returned: +

+
+    > parse "dim the fan" | put_tree -typecheck
+  
+    Error in tree UCommand (CAction ? 0 dim (DKindOne fan)) :
+      (? 0 <> fan) (? 0 <> light)
+
+

+

+ +

+

Polymorphism

+

+ +

+

+Sometimes an action can be performed on all kinds of devices. +

+

+This is represented as a function that takes a Kind as an argument +and produce an Action for that Kind: +

+
+    fun switchOn, switchOff : (k : Kind) -> Action k ;
+
+

+Functions of this kind are called polymorphic. +

+

+We can use this kind of polymorphism in concrete syntax as well, +to express Haskell-type library functions: +

+
+    oper const :(a,b : Type) -> a -> b -> a =
+      \_,_,c,_ -> c ;
+  
+    oper flip : (a,b,c : Type) -> (a -> b ->c) -> b -> a -> c =
+      \_,_,_,f,x,y -> f y x ;
+
+

+

+ +

+

Dependent types: exercises

+

+1. Write an abstract syntax module with above contents +and an appropriate English concrete syntax. Try to parse the commands +dim the light and dim the fan, with and without solve filtering. +

+

+2. Perform random and exhaustive generation, with and without +solve filtering. +

+

+3. Add some device kinds and actions to the grammar. +

+

+ +

+

Proof objects

+

+Curry-Howard isomorphism = propositions as types principle: +a proposition is a type of proofs (= proof objects). +

+

+Example: define the less than proposition for natural numbers, +

+
+    cat Nat ; 
+    fun Zero : Nat ;
+    fun Succ : Nat -> Nat ;
+
+

+Define inductively what it means for a number x to be less than +a number y: +

+
    +
  • Zero is less than Succ y for any y. +
  • If x is less than y, then Succ x is less than Succ y. +
+ +

+Expressing these axioms in type theory +with a dependent type Less x y and two functions constructing +its objects: +

+
+    cat Less Nat Nat ; 
+    fun lessZ : (y : Nat) -> Less Zero (Succ y) ;
+    fun lessS : (x,y : Nat) -> Less x y -> Less (Succ x) (Succ y) ;
+
+

+Example: the fact that 2 is less that 4 has the proof object +

+
+    lessS (Succ Zero) (Succ (Succ (Succ Zero)))
+          (lessS Zero (Succ (Succ Zero)) (lessZ (Succ Zero)))
+     : Less (Succ (Succ Zero)) (Succ (Succ (Succ (Succ Zero))))
+
+

+

+ +

+

Proof-carrying documents

+

+Idea: to be semantically well-formed, the abstract syntax of a document +must contain a proof of some property, +although the proof is not shown in the concrete document. +

+

+Example: documents describing flight connections: +

+

+To fly from Gothenburg to Prague, first take LH3043 to Frankfurt, then OK0537 to Prague. +

+

+The well-formedness of this text is partly expressible by dependent typing: +

+
+    cat
+      City ;
+      Flight City City ;
+    fun
+      Gothenburg, Frankfurt, Prague : City ;
+      LH3043 : Flight Gothenburg Frankfurt ;
+      OK0537 : Flight Frankfurt Prague ;
+
+

+To extend the conditions to flight connections, we introduce a category +of proofs that a change is possible: +

+
+    cat IsPossible (x,y,z : City)(Flight x y)(Flight y z) ;
+
+

+A legal connection is formed by the function +

+
+    fun Connect : (x,y,z : City) -> 
+      (u : Flight x y) -> (v : Flight y z) -> 
+        IsPossible x y z u v -> Flight x z ;
+
+

+

+ +

+

Restricted polymorphism

+

+Above, all Actions were either of +

+
    +
  • monomorphic: defined for one Kind +
  • polymorphic: defined for all Kinds +
+ +

+To make this scale up for new Kinds, we can refine this to +restricted polymorphism: defined for Kinds of a certain class +

+

+The notion of class uses the Curry-Howard isomorphism as follows: +

+
    +
  • a class is a predicate of Kinds --- i.e. a type depending of Kinds +
  • a Kind is in a class if there is a proof object of this type +
+ +

+ +

+

Example: classes for switching and dimming

+

+We modify the smart house grammar: +

+
+  cat
+    Switchable Kind ;
+    Dimmable   Kind ;
+  fun
+    switchable_light : Switchable light ;
+    switchable_fan   : Switchable fan ;
+    dimmable_light   : Dimmable light ;
+  
+    switchOn : (k : Kind) -> Switchable k -> Action k ;
+    dim      : (k : Kind) -> Dimmable k -> Action k ;
+
+

+Classes for new actions can be added incrementally. +

+

+ +

+

Variable bindings

+

+ +

+

+Mathematical notation and programming languages have +expressions that bind variables. +

+

+Example: universal quantifier formula +

+
+    (All x)B(x)
+
+

+The variable x has a binding (All x), and +occurs bound in the body B(x). +

+

+Examples from informal mathematical language: +

+
+    for all x, x is equal to x
+  
+    the function that for any numbers x and y returns the maximum of x+y
+    and x*y
+  
+    Let x be a natural number. Assume that x is even. Then x + 3 is odd.
+
+

+

+ +

+

Higher-order abstract syntax

+

+Abstract syntax can use functions as arguments: +

+
+    cat Ind ; Prop ;
+    fun All : (Ind -> Prop) -> Prop
+
+

+where Ind is the type of individuals and Prop, +the type of propositions. +

+

+Let us add an equality predicate +

+
+    fun Eq : Ind -> Ind -> Prop
+
+

+Now we can form the tree +

+
+    All (\x -> Eq x x)
+
+

+which we want to relate to the ordinary notation +

+
+    (All x)(x = x)
+
+

+In higher-order abstract syntax (HOAS), all variable bindings are +expressed using higher-order syntactic constructors. +

+

+ +

+

Higher-order abstract syntax: linearization

+

+HOAS has proved to be useful in the semantics and computer implementation of +variable-binding expressions. +

+

+How do we relate HOAS to the concrete syntax? +

+

+In GF, we write +

+
+    fun All : (Ind -> Prop) -> Prop
+    lin All B = {s = "(" ++ "All" ++ B.$0 ++ ")" ++ B.s}
+
+

+General rule: if an argument type of a fun function is +a function type A -> C, the linearization type of +this argument is the linearization type of C +together with a new field $0 : Str. +

+

+The argument B thus has the linearization type +

+
+    {s : Str ; $0 : Str},
+
+

+If there are more bindings, we add $1, $2, etc. +

+

+ +

+

Eta expansion

+

+To make sense of linearization, syntax trees must be +eta-expanded: for any function of type +

+
+    A -> B
+
+

+an eta-expanded syntax tree has the form +

+
+    \x -> b
+
+

+where b : B under the assumption x : A. +

+

+Given the linearization rule +

+
+    lin Eq a b = {s = "(" ++ a.s ++ "=" ++ b.s ++ ")"}
+
+

+the linearization of the tree +

+
+    \x -> Eq x x
+
+

+is the record +

+
+    {$0 = "x", s = ["( x = x )"]}
+
+

+Then we can compute the linearization of the formula, +

+
+    All (\x -> Eq x x)  --> {s = "[( All x ) ( x = x )]"}.
+
+

+The linearization of the variable x is, +"automagically", the string "x". +

+

+ +

+

Parsing variable bindings

+

+GF can treat any one-word string as a variable symbol. +

+
+    > p -cat=Prop "( All x ) ( x = x )"
+    All (\x -> Eq x x)
+
+

+Variables must be bound if they are used: +

+
+    > p -cat=Prop "( All x ) ( x = y )"
+    no tree found
+
+

+

+ +

+

Exercises on variable bindings

+

+1. Write an abstract syntax of the whole +predicate calculus, with the +connectives "and", "or", "implies", and "not", and the +quantifiers "exists" and "for all". Use higher-order functions +to guarantee that unbounded variables do not occur. +

+

+2. Write a concrete syntax for your favourite +notation of predicate calculus. Use Latex as target language +if you want nice output. You can also try producing boolean +expressions of some programming language. Use as many parenthesis as you need to +guarantee non-ambiguity. +

+

+ +

+

Semantic definitions

+

+ +

+

+The fun judgements of GF are declarations of functions, giving their types. +

+

+Can we compute fun functions? +

+

+Mostly we are not interested, since functions are seen as constructors, +i.e. data forms - as usual with +

+
+    fun Zero : Nat ;
+    fun Succ : Nat -> Nat ;
+
+

+But it is also possible to give semantic definitions to functions. +The key word is def: +

+
+    fun one : Nat ;
+    def one = Succ Zero ;
+  
+    fun twice : Nat -> Nat ;
+    def twice x = plus x x ;
+  
+    fun plus : Nat -> Nat -> Nat ;
+    def 
+      plus x Zero = x ;
+      plus x (Succ y) = Succ (Sum x y) ;
+
+

+

+ +

+

Computing a tree

+

+Computation: follow a chain of definition until no definition +can be applied, +

+
+    plus one one -->
+    plus (Succ Zero) (Succ Zero) -->
+    Succ (plus (Succ Zero) Zero) -->
+    Succ (Succ Zero)
+
+

+Computation in GF is performed with the put_term command and the +compute transformation, e.g. +

+
+    > parse -tr "1 + 1" | put_term -transform=compute -tr | l
+    plus one one
+    Succ (Succ Zero)
+    s(s(0))
+
+

+

+ +

+

Definitional equality

+

+Two trees are definitionally equal if they compute into the same tree. +

+

+Definitional equality does not guarantee sameness of linearization: +

+
+    plus one one     ===> 1 + 1
+    Succ (Succ Zero) ===> s(s(0))
+
+

+The main use of this concept is in type checking: sameness of types. +

+

+Thus e.g. the following types are equal +

+
+    Less Zero one
+    Less Zero (Succ Zero))
+
+

+so that an object of one also is an object of the other. +

+

+ +

+

Judgement forms for constructors

+

+The judgement form data tells that a category has +certain functions as constructors: +

+
+    data Nat = Succ | Zero ;
+
+

+The type signatures of constructors are given separately, +

+
+    fun Zero : Nat ;
+    fun Succ : Nat -> Nat ;
+
+

+There is also a shorthand: +

+
+    data Succ : Nat -> Nat ;    ===   fun Succ : Nat -> Nat ;
+                                      data Nat = Succ ;
+
+

+Notice: in def definitions, identifier patterns not +marked as data will be treated as variables. +

+

+ +

+

Exercises on semantic definitions

+

+1. Implement an interpreter of a small functional programming +language with natural numbers, lists, pairs, lambdas, etc. Use higher-order +abstract syntax with semantic definitions. As concrete syntax, use +your favourite programming language. +

+

+2. There is no termination checking for def definitions. +Construct an examples that makes type checking loop. +Type checking can be invoked with put_term -transform=solve. +

+

+ +

+

Lesson 6: Grammars of formal languages

+

+ +

+

+Goals: +

+
    +
  • write grammars for formal languages (mathematical notation, programming languages) +
  • interface between formal and natural langauges +
  • implement a compiler by using GF +
+ +

+ +

+

Arithmetic expressions

+

+We construct a calculator with addition, subtraction, multiplication, and +division of integers. +

+
+    abstract Calculator = {
+  
+    cat Exp ;
+  
+    fun
+      EPlus, EMinus, ETimes, EDiv : Exp -> Exp -> Exp ;
+      EInt : Int -> Exp ;
+    }
+
+

+The category Int is a built-in category of +integers. Its syntax trees integer literals, i.e. +sequences of digits: +

+
+    5457455814608954681 : Int
+
+

+These are the only objects of type Int: +grammars are not allowed to declare functions with Int as value type. +

+

+ +

+

Concrete syntax: a simple approach

+

+We begin with a +concrete syntax that always uses parentheses around binary +operator applications: +

+
+    concrete CalculatorP of Calculator = {
+  
+    lincat 
+      Exp = SS ;
+    lin
+      EPlus  = infix "+" ;
+      EMinus = infix "-" ;
+      ETimes = infix "*" ;
+      EDiv   = infix "/" ;
+      EInt i = i ;
+  
+    oper
+      infix : Str -> SS -> SS -> SS = \f,x,y -> 
+        ss ("(" ++ x.s ++ f ++ y.s ++ ")") ;
+    }
+
+

+Now we have +

+
+    > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4))
+    ( 2 + ( 3 * 4 ) )
+
+

+First problems: +

+
    +
  • to get rid of superfluous spaces and +
  • to recognize integer literals in the parser +
+ +

+ +

+

Lexing and unlexing

+

+ +

+

+The input of parsing in GF is not just a string, but a list of +tokens, returned by a lexer. +

+

+The default lexer in GF returns chunks separated by spaces: +

+
+    "(12 + (3 * 4))"  ===>  "(12", "+", "(3". "*". "4))"
+
+

+The proper way would be +

+
+    "(", "12", "+", "(", "3", "*", "4", ")", ")"
+
+

+Moreover, the tokens "12", "3", and "4" should be recognized as +integer literals - they cannot be found in the grammar. +

+

+ +

+

+Lexers are invoked by flags to the command put_string = ps. +

+
+    > put_string -lexcode "(2 + (3 * 4))"
+    ( 2 + ( 3 * 4 ) )
+
+

+This can be piped into a parser, as usual: +

+
+    > ps -lexcode "(2 + (3 * 4))" | parse
+    EPlus (EInt 2) (ETimes (EInt 3) (EInt 4))
+
+

+In linearization, we use a corresponding unlexer: +

+
+    > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) | ps -unlexcode
+    (2 + (3 * 4))
+
+

+

+ +

+

Most common lexers and unlexers

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
lexerunlexerdescription
charsuncharseach character is a token
lexcodeunlexcodeprogram code conventions (uses Haskell's lex)
lexmixedunlexmixedlike text, but between $ signs like code
lextextunlextextwith conventions on punctuation and capitals
wordsunwords(default) tokens separated by space characters
+ +

+ +

+

Precedence and fixity

+

+Arithmetic expressions should be unambiguous. If we write +

+
+    2 + 3 * 4
+
+

+it should be parsed as one, but not both, of +

+
+    EPlus (EInt 2) (ETimes (EInt 3) (EInt 4))
+    ETimes (EPlus (EInt 2) (EInt 3)) (EInt 4)
+
+

+We choose the former tree, because +multiplication has higher precedence than addition. +

+

+To express the latter tree, we have to use parentheses: +

+
+    (2 + 3) * 4
+
+

+The usual precedence rules: +

+
    +
  • Integer constants and expressions in parentheses have the highest precedence. +
  • Multiplication and division have equal precedence, lower than the highest + but higher than addition and subtraction, which are again equal. +
  • All the four binary operations are left-associative: + 1 + 2 + 3 means the same as (1 + 2) + 3. +
+ +

+ +

+

Precedence as a parameter

+

+Precedence can be made into an inherent feature of expressions: +

+
+    oper
+      Prec : PType = Ints 2 ;
+      TermPrec : Type = {s : Str ; p : Prec} ;
+  
+      mkPrec : Prec -> Str -> TermPrec = \p,s -> {s = s ; p = p} ;
+  
+    lincat 
+      Exp = TermPrec ;
+
+

+Notice Ints 2: a parameter type, whose values are the integers +0,1,2. +

+

+Using precedence levels: compare the inherent precedence of an +expression with the expected precedence. +

+
    +
  • if the inherent precedence is lower than the expected precedence, + use parentheses +
  • otherwise, no parentheses are needed +
+ +

+This idea is encoded in the operation +

+
+    oper usePrec : TermPrec -> Prec -> Str = \x,p ->
+      case lessPrec x.p p of {
+        True  => "(" x.s ")" ;
+        False => x.s
+      } ;
+
+

+(We use lessPrec from lib/prelude/Formal.) +

+

+ +

+

Fixities

+

+We can define left-associative infix expressions: +

+
+    infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
+      mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ;
+
+

+Constant-like expressions (the highest level): +

+
+    constant : Str -> TermPrec = mkPrec 2 ;
+
+

+All these operations can be found in lib/prelude/Formal, +which has 5 levels. +

+

+Now we can write the whole concrete syntax of Calculator compactly: +

+
+    concrete CalculatorC of Calculator = open Formal, Prelude in {
+  
+    flags lexer = codelit ; unlexer = code ; startcat = Exp ;
+  
+    lincat Exp = TermPrec ;
+  
+    lin
+      EPlus  = infixl 0 "+" ;
+      EMinus = infixl 0 "-" ;
+      ETimes = infixl 1 "*" ;
+      EDiv   = infixl 1 "/" ;
+      EInt i = constant i.s ;
+    }
+
+

+

+ +

+

Exercises on precedence

+

+1. Define non-associative and right-associative infix operations +analogous to infixl. +

+

+2. Add a constructor that puts parentheses around expressions +to raise their precedence, but that is eliminated by a def definition. +Test parsing with and without a pipe to pt -transform=compute. +

+

+ +

+

Code generation as linearization

+

+Translate arithmetic (infix) to JVM (postfix): +

+
+    2 + 3 * 4
+  
+      ===>
+  
+    iconst 2 : iconst 3 ; iconst 4 ; imul ; iadd
+
+

+Just give linearization rules for JVM: +

+
+    lin
+      EPlus  = postfix "iadd" ;
+      EMinus = postfix "isub" ;
+      ETimes = postfix "imul" ;
+      EDiv   = postfix "idiv" ;
+      EInt i = ss ("iconst" ++ i.s) ;
+    oper
+      postfix : Str -> SS -> SS -> SS = \op,x,y -> 
+        ss (x.s ++ ";" ++ y.s ++ ";" ++ op) ;
+
+

+

+ +

+

Programs with variables

+

+A straight code programming language, with +initializations and assignments: +

+
+    int x = 2 + 3 ;  
+    int y = x + 1 ; 
+    x = x + 9 * y ;
+
+

+We define programs by the following constructors: +

+
+    fun
+      PEmpty : Prog ;
+      PInit  : Exp -> (Var -> Prog) -> Prog ;
+      PAss   : Var -> Exp  -> Prog  -> Prog ;
+
+

+PInit uses higher-order abstract syntax for making the +initialized variable available in the continuation of the program. +

+

+The abstract syntax tree for the above code is +

+
+    PInit (EPlus (EInt 2) (EInt 3)) (\x -> 
+      PInit (EPlus (EVar x) (EInt 1)) (\y -> 
+        PAss x (EPlus (EVar x) (ETimes (EInt 9) (EVar y))) 
+          PEmpty))
+
+

+No uninitialized variables are allowed - there are no constructors for Var! +But we do have the rule +

+
+    fun EVar : Var -> Exp ;
+
+

+The rest of the grammar is just the same as for arithmetic expressions +here. The best way to implement it is perhaps by writing a +module that extends the expression module. The most natural start category +of the extension is Prog. +

+

+ +

+

Exercises on code generation

+

+1. Define a C-like concrete syntax of the straight-code language. +

+

+2. Extend the straight-code language to expressions of type float. +To guarantee type safety, you can define a category Typ of types, and +make Exp and Var dependent on Typ. Basic floating point expressions +can be formed from literal of the built-in GF type Float. The arithmetic +operations should be made polymorphic (as here). +

+

+3. Extend JVM generation to the straight-code language, using +two more instructions +

+
    +
  • iload x, which loads the value of the variable x +
  • istore x which stores a value to the variable x +
+ +

+Thus the code for the example in the previous section is +

+
+    iconst 2 ; iconst 3 ; iadd ; istore x ;
+    iload x ; iconst 1 ; iadd ; istore y ;
+    iload x ; iconst 9 ; iload y ; imul ; iadd ; istore x ;
+
+

+

+4. If you made the exercise of adding floating point numbers to +the language, you can now cash out the main advantage of type checking +for code generation: selecting type-correct JVM instructions. The floating +point instructions are precisely the same as the integer one, except that +the prefix is f instead of i, and that fconst takes floating +point literals as arguments. +

+

+ +

+

Lesson 7: Embedded grammars

+

+ +

+

+Goals: +

+
    +
  • use grammars as parts of programs written in Haskell and JavaScript +
  • implement stand-alone question-answering systems and translators based on + GF grammars +
  • generate language models for speech recognition from GF grammars +
+ +

+ +

+

Functionalities of an embedded grammar format

+

+GF grammars can be used as parts of programs written in other programming +languages, to be called host languages. +This facility is based on several components: +

+
    +
  • PGF: a portable format for multilingual GF grammars +
  • a PGF interpreter written in the host language +
  • a library in the host language that enables calling the interpreter +
  • a way to manipulate abstract syntax trees in the host language +
+ +

+ +

+

The portable grammar format

+

+The portable format is called PGF, "Portable Grammar Format". +

+

+This format is produced by the GF batch compiler gf, +executable from the operative system shell: +

+
+    % gf --make SOURCE.gf
+
+

+PGF is the recommended format in +which final grammar products are distributed, because they +are stripped from superfluous information and can be started and applied +faster than sets of separate modules. +

+

+Application programmers have never any need to read or modify PGF files. +

+

+PGF thus plays the same role as machine code in +general-purpose programming (or bytecode in Java). +

+

+ +

+

Haskell: the EmbedAPI module

+

+The Haskell API contains (among other things) the following types and functions: +

+
+    readPGF   :: FilePath -> IO PGF
+  
+    linearize :: PGF -> Language -> Tree -> String
+    parse     :: PGF -> Language -> Category -> String -> [Tree]
+  
+    linearizeAll     :: PGF -> Tree -> [String]
+    linearizeAllLang :: PGF -> Tree -> [(Language,String)]
+  
+    parseAll     :: PGF -> Category -> String -> [[Tree]]
+    parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])]
+  
+    languages    :: PGF -> [Language]
+    categories   :: PGF -> [Category]
+    startCat     :: PGF -> Category
+
+

+This is the only module that needs to be imported in the Haskell application. +It is available as a part of the GF distribution, in the file +src/PGF.hs. +

+

+ +

+

First application: a translator

+

+Let us first build a stand-alone translator, which can translate +in any multilingual grammar between any languages in the grammar. +

+
+  module Main where
+  
+  import PGF
+  import System (getArgs)
+  
+  main :: IO () 
+  main = do
+    file:_ <- getArgs
+    gr     <- readPGF file
+    interact (translate gr)
+  
+  translate :: PGF -> String -> String
+  translate gr s = case parseAllLang gr (startCat gr) s of
+    (lg,t:_):_ -> unlines [linearize gr l t | l <- languages gr, l /= lg]
+    _ -> "NO PARSE"
+
+

+To run the translator, first compile it by +

+
+    % ghc --make -o trans Translator.hs 
+
+

+For this, you need the Haskell compiler GHC. +

+

+ +

+

Producing PGF for the translator

+

+Then produce a PGF file. For instance, the Food grammar set can be +compiled as follows: +

+
+    % gf --make FoodEng.gf FoodIta.gf
+
+

+This produces the file Food.pgf (its name comes from the abstract syntax). +

+

+The Haskell library function interact makes the trans program work +like a Unix filter, which reads from standard input and writes to standard +output. Therefore it can be a part of a pipe and read and write files. +The simplest way to translate is to echo input to the program: +

+
+    % echo "this wine is delicious" | ./trans Food.pgf
+    questo vino  delizioso
+
+

+The result is given in all languages except the input language. +

+

+ +

+

A translator loop

+

+To avoid starting the translator over and over again: +change interact in the main function to loop, defined as +follows: +

+
+  loop :: (String -> String) -> IO ()
+  loop trans = do 
+    s <- getLine
+    if s == "quit" then putStrLn "bye" else do  
+      putStrLn $ trans s
+      loop trans
+
+

+The loop keeps on translating line by line until the input line +is quit. +

+

+ +

+

A question-answer system

+

+ +

+

+The next application is also a translator, but it adds a +transfer component - a function that transforms syntax trees. +

+

+The transfer function we use is one that computes a question into an answer. +

+

+The program accepts simple questions about arithmetic and answers +"yes" or "no" in the language in which the question was made: +

+
+    Is 123 prime?
+    No.
+    77 est impair ?
+    Oui.
+
+

+We change the pure translator by giving +the translate function the transfer as an extra argument: +

+
+    translate :: (Tree -> Tree) -> PGF -> String -> String
+
+

+Ordinary translation as a special case where +transfer is the identity function (id in Haskell). +

+

+To reply in the same language as the question: +

+
+    translate tr gr = case parseAllLang gr (startCat gr) s of
+      (lg,t:_):_ -> linearize gr lg (tr t)
+      _ -> "NO PARSE"
+
+

+

+ +

+

Abstract syntax of the query system

+

+Input: abstract syntax judgements +

+
+  abstract Query = {
+  
+    flags startcat=Question ;
+  
+    cat 
+      Answer ; Question ; Object ;
+  
+    fun 
+      Even   : Object -> Question ;
+      Odd    : Object -> Question ;
+      Prime  : Object -> Question ;
+      Number : Int -> Object ;
+  
+      Yes : Answer ;
+      No  : Answer ;
+  }
+
+

+

+ +

+

Exporting GF datatypes to Haskell

+

+To make it easy to define a transfer function, we export the +abstract syntax to a system of Haskell datatypes: +

+
+    % gf --output-format=haskell Query.pgf
+
+

+It is also possible to produce the Haskell file together with PGF, by +

+
+    % gf --make --output-format=haskell QueryEng.gf
+
+

+The result is a file named Query.hs, containing a +module named Query. +

+

+ +

+

+Output: Haskell definitions +

+
+  module Query where
+  import PGF
+  
+  data GAnswer =
+     GYes 
+   | GNo 
+  
+  data GObject = GNumber GInt 
+  
+  data GQuestion =
+     GPrime GObject 
+   | GOdd GObject 
+   | GEven GObject 
+  
+  newtype GInt = GInt Integer
+
+

+All type and constructor names are prefixed with a G to prevent clashes. +

+

+The Haskell module name is the same as the abstract syntax name. +

+

+ +

+

The question-answer function

+

+Haskell's type checker guarantees that the functions are well-typed also with +respect to GF. +

+
+  answer :: GQuestion -> GAnswer
+  answer p = case p of
+    GOdd x   -> test odd x
+    GEven x  -> test even x
+    GPrime x -> test prime x
+  
+  value :: GObject -> Int
+  value e = case e of
+    GNumber (GInt i) -> fromInteger i
+  
+  test :: (Int -> Bool) -> GObject -> GAnswer
+  test f x = if f (value x) then GYes else GNo
+
+

+

+ +

+

Converting between Haskell and GF trees

+

+The generated Haskell module also contains +

+
+  class Gf a where 
+    gf :: a -> Tree
+    fg :: Tree -> a
+  
+  instance Gf GQuestion where
+    gf (GEven x1) = DTr [] (AC (CId "Even")) [gf x1]
+    gf (GOdd x1) = DTr [] (AC (CId "Odd")) [gf x1]
+    gf (GPrime x1) = DTr [] (AC (CId "Prime")) [gf x1]
+    fg t =
+      case t of
+        DTr [] (AC (CId "Even")) [x1] -> GEven (fg x1)
+        DTr [] (AC (CId "Odd")) [x1] -> GOdd (fg x1)
+        DTr [] (AC (CId "Prime")) [x1] -> GPrime (fg x1)
+        _ -> error ("no Question " ++ show t)
+
+

+For the programmer, it is enougo to know: +

+
    +
  • all GF names are in Haskell prefixed with G +
  • gf translates from Haskell objects to GF trees +
  • fg translates from GF trees to Haskell objects +
+ +

+ +

+

Putting it all together: the transfer definition

+
+  module TransferDef where
+  
+  import PGF (Tree)
+  import Query   -- generated from GF
+  
+  transfer :: Tree -> Tree
+  transfer = gf . answer . fg
+  
+  answer :: GQuestion -> GAnswer
+  answer p = case p of
+    GOdd x   -> test odd x
+    GEven x  -> test even x
+    GPrime x -> test prime x
+  
+  value :: GObject -> Int
+  value e = case e of
+    GNumber (GInt i) -> fromInteger i
+  
+  test :: (Int -> Bool) -> GObject -> GAnswer
+  test f x = if f (value x) then GYes else GNo
+  
+  prime :: Int -> Bool
+  prime x = elem x primes where
+    primes = sieve [2 .. x]
+    sieve (p:xs) = p : sieve [ n | n <- xs, n `mod` p > 0 ]
+    sieve [] = []
+
+

+

+ +

+

Putting it all together: the Main module

+

+Here is the complete code in the Haskell file TransferLoop.hs. +

+
+  module Main where
+  
+  import PGF
+  import TransferDef (transfer)
+  
+  main :: IO () 
+  main = do
+    gr <- readPGF "Query.pgf"
+    loop (translate transfer gr)
+  
+  loop :: (String -> String) -> IO ()
+  loop trans = do 
+    s <- getLine
+    if s == "quit" then putStrLn "bye" else do  
+      putStrLn $ trans s
+      loop trans
+  
+  translate :: (Tree -> Tree) -> PGF -> String -> String
+  translate tr gr s = case parseAllLang gr (startCat gr) s of
+    (lg,t:_):_ -> linearize gr lg (tr t)
+    _ -> "NO PARSE"
+
+

+

+ +

+

Putting it all together: the Makefile

+

+To automate the production of the system, we write a Makefile as follows: +

+
+  all:
+          gf --make --output-format=haskell QueryEng
+          ghc --make -o ./math TransferLoop.hs
+          strip math
+
+

+(The empty segments starting the command lines in a Makefile must be tabs.) +Now we can compile the whole system by just typing +

+
+    make
+
+

+Then you can run it by typing +

+
+    ./math
+
+

+Just to summarize, the source of the application consists of the following files: +

+
+    Makefile         -- a makefile
+    Math.gf          -- abstract syntax
+    Math???.gf       -- concrete syntaxes
+    TransferDef.hs   -- definition of question-to-answer function
+    TransferLoop.hs  -- Haskell Main module
+
+

+

+ +

+

Web server applications

+

+PGF files can be used in web servers, for which there is a Haskell library included +in src/server/. How to build a server for tasks like translators is explained +in the README file in that directory. +

+

+One of the servers that can be readily built with the library (without any +programming required) is fridge poetry magnets. It is an application that +uses an incremental parser to suggest grammatically correct next words. Here +is an example of its application to the Foods grammars. +

+

+ +

+

+ +

+

JavaScript applications

+

+JavaScript is a programming language that has interpreters built in in most +web browsers. It is therefore usable for client side web programs, which can even +be run without access to the internet. The following figure shows a JavaScript +program compiled from GF grammars as run on an iPhone. +

+

+ +

+

+ +

+

Compiling to JavaScript

+

+JavaScript is one of the output formats of the GF batch compiler. Thus the following +command generates a JavaScript file from two Food grammars. +

+
+    % gf --make --output-format=js FoodEng.gf FoodIta.gf
+
+

+The name of the generated file is Food.js, derived from the top-most abstract +syntax name. This file contains the multilingual grammar as a JavaScript object. +

+

+ +

+

Using the JavaScript grammar

+

+To perform parsing and linearization, the run-time library +gflib.js is used. It is included in GF/lib/javascript/, together with +some other JavaScript and HTML files; these files can be used +as templates for building applications. +

+

+An example of usage is +translator.html, +which is in fact initialized with +a pointer to the Food grammar, so that it provides translation between the English +and Italian grammars: +

+

+ +

+

+The grammar must have the name grammar.js. The abstract syntax and start +category names in translator.html must match the ones in the grammar. +With these changes, the translator works for any multilingual grammar. +

+

+ +

+

Language models for speech recognition

+

+The standard way of using GF in speech recognition is by building +grammar-based language models. +

+

+GF supports several formats, including +GSL, the formatused in the Nuance speech recognizer. +

+

+GSL is produced from GF by running gf with the flag +--output-format=gsl. +

+

+Example: GSL generated from FoodsEng.gf. +

+
+    % gf --make --output-format=gsl FoodsEng.gf
+    % more FoodsEng.gsl
+  
+    ;GSL2.0
+    ; Nuance speech recognition grammar for FoodsEng
+    ; Generated by GF
+  
+    .MAIN Phrase_cat
+  
+    Item_1 [("that" Kind_1) ("this" Kind_1)]
+    Item_2 [("these" Kind_2) ("those" Kind_2)]
+    Item_cat [Item_1 Item_2]
+    Kind_1 ["cheese" "fish" "pizza" (Quality_1 Kind_1)
+            "wine"]
+    Kind_2 ["cheeses" "fish" "pizzas"
+            (Quality_1 Kind_2) "wines"]
+    Kind_cat [Kind_1 Kind_2]
+    Phrase_1 [(Item_1 "is" Quality_1)
+              (Item_2 "are" Quality_1)]
+    Phrase_cat Phrase_1
+    
+    Quality_1 ["boring" "delicious" "expensive"
+               "fresh" "italian" ("very" Quality_1) "warm"]
+    Quality_cat Quality_1
+
+

+

+ +

+

More speech recognition grammar formats

+

+Other formats available via the --output-format flag include: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatDescription
gslNuance GSL speech recognition grammar
jsgfJava Speech Grammar Format (JSGF)
jsgf_sisr_oldJSGF with semantic tags in SISR WD 20030401 format
srgs_abnfSRGS ABNF format
srgs_xmlSRGS XML format
srgs_xml_probSRGS XML format, with weights
slffinite automaton in the HTK SLF format
slf_subfinite automaton with sub-automata in HTK SLF
+ +

+All currently available formats can be seen with gf --help. +

+ + + + diff --git a/doc/tutorial/gf-tutorial.txt b/doc/tutorial/gf-tutorial.txt new file mode 100644 index 000000000..8ae053a99 --- /dev/null +++ b/doc/tutorial/gf-tutorial.txt @@ -0,0 +1,5022 @@ +Grammatical Framework Tutorial +Aarne Ranta +December 2010 (November 2008) + + +% NOTE: this is a txt2tags file. +% Create a tex file from this file using: +% txt2tags --toc -ttex gf-tutorial.txt + +%!target:html +%!encoding: iso-8859-1 + +%!postproc(tex) : "\\subsection\*" "\\newslide" +%!preproc(tex): "#NEW" "" +%!postproc(html): #NEW + + + +%!postproc(html): #OVERVIEW

Overview

+ +%%!postproc(tex): "section\*" "section" + +%!postproc(tex): "\\documentclass{article}" "" + +%!postproc(tex): "subsection\*" "section" +%!postproc(tex): "section\*" "chapter" + +%!postproc(tex): "textbf{Exercise}" "exercise" +%!postproc(tex): "textbf" "keywrd" + +%!postproc(html): #BCEN
+%!postproc(html): #ECEN
+ +%!postproc(tex): #BCEN "begin{center}" +%!postproc(tex): #ECEN "end{center}" + +%!postproc(tex): #BEQU "bequ" +%!postproc(tex): #ENQU "enqu" +%!postproc(html): #BEQU "
" +%!postproc(html): #ENQU "
" + +%!preproc(html): #EDITORPNG [quick-editor.png] +%!preproc(tex): #EDITORPNG [10lang-small.png] + +%!preproc(html): #LOGOPNG [Logos/gf0.png] +%!preproc(tex): #LOGOPNG [Logos/gf0.png] + + +%!postproc(tex): #PARTone "part{Tutorial}" +%!postproc(tex): #PARTtwo "part{Applications of Grammars}" +%!postproc(tex): #PARTfour "part{Advanced Grammar Writing}" +%!postproc(tex): #PARTthree "part{Reference Manual}" + +%%!postproc(tex): #PARTbnf "include{DocGF}" +%!postproc(tex): #PARTquickref "chapter{Quick Reference}" +%!postproc(tex): #twocolumn ""%twocolumn" +%!postproc(tex): #newpage "newpage" +%!postproc(tex): #smallsize "small" +%!postproc(tex): #normalsize "normalsize" +%!postproc(tex): #startappendix "appendix" + + +%!postproc(tex): #indexYACC "index{YACC}" + +%!postproc(tex): #MYTREE "input{mytree}" +%!preproc(html): #MYTREE [mytree.png] +%!postproc(tex): #FOODMARKET "input{foodmarket}" +%!preproc(html): #FOODMARKET [foodmarket.png] +%!postproc(tex): #CATEGORIES "input{categories}" +%!preproc(html): #CATEGORIES [categories.png] + +%!postproc(tex): #Syntaxpic "input{Syntax}" +%!postproc(tex): #Germanpic "input{German}" + +%!postproc(tex): #REFERENCES "input{references}" + +%!postproc(tex): #FORMULAone "input{FORMULAone}" + +%!postproc(tex): #SETLENGTHS "input{SETLENGTHS}" + +%!postproc(tex): #PRINTINDEX "printindex" + +%!postproc(tex): #Lchaptwo "label{chaptwo}" +%!postproc(tex): #Rchaptwo "chref{chaptwo}" +%!postproc(html): #Lchaptwo +%!postproc(html): #Rchaptwo Lesson 1 + +%!postproc(tex): #Lchapthree "label{chapthree}" +%!postproc(tex): #Rchapthree "chref{chapthree}" +%!postproc(html): #Lchapthree +%!postproc(html): #Rchapthree Lesson 2 + +%!postproc(tex): #Lchapfour "label{chapfour}" +%!postproc(tex): #Rchapfour "chref{chapfour}" +%!postproc(html): #Lchapfour +%!postproc(html): #Rchapfour Lesson 3 + +%!postproc(tex): #Lchapfive "label{chapfive}" +%!postproc(tex): #Rchapfive "chref{chapfive}" +%!postproc(html): #Lchapfive +%!postproc(html): #Rchapfive Lesson 4 + +%!postproc(tex): #Lchapsix "label{chapsix}" +%!postproc(tex): #Rchapsix "chref{chapsix}" +%!postproc(html): #Lchapsix +%!postproc(html): #Rchapsix Lesson 5 + +%!postproc(tex): #Lchapseven "label{chapseven}" +%!postproc(tex): #Rchapseven "chref{chapseven}" +%!postproc(html): #Lchapseven +%!postproc(html): #Rchapseven Lesson 6 + +%!postproc(tex): #Lchapeight "label{chapeight}" +%!postproc(tex): #Rchapeight "chref{chapeight}" +%!postproc(html): #Lchapeight +%!postproc(html): #Rchapeight Lesson 7 + + +%2.7.2 +%!postproc(tex): #Lsecjment "label{secjment}" +%!postproc(tex): #Rsecjment "sref{secjment}" +%!postproc(html): #Lsecjment +%!postproc(html): #Rsecjment here + +%3.4 +%!postproc(tex): #Lsecanitalian "label{secanitalian}" +%!postproc(tex): #Rsecanitalian "sref{secanitalian}" +%!postproc(html): #Lsecanitalian +%!postproc(html): #Rsecanitalian here + +%3.6.1 +%!postproc(tex): #Lsectreebank "label{sectreebank}" +%!postproc(tex): #Rsectreebank "sref{sectreebank}" +%!postproc(html): #Lsectreebank +%!postproc(html): #Rsectreebank here + + +%3.6.4 +%!postproc(tex): #Lsecediting "label{secediting}" +%!postproc(tex): #Rsecediting "sref{secediting}" +%!postproc(html): #Lsecediting +%!postproc(html): #Rsecediting here + + +%3.9.5 +%!postproc(tex): #Lsecpartapp "label{secpartapp}" +%!postproc(tex): #Rsecpartapp "sref{secpartapp}" +%!postproc(html): #Lsecpartapp +%!postproc(html): #Rsecpartapp here + +%3.10 +%!postproc(tex): #Lsecarchitecture "label{secarchitecture}" +%!postproc(tex): #Rsecarchitecture "sref{secarchitecture}" +%!postproc(html): #Lsecarchitecture +%!postproc(html): #Rsecarchitecture here + +%4.6 +%!postproc(tex): #Lsecinflection "label{secinflection}" +%!postproc(tex): #Rsecinflection "sref{secinflection}" +%!postproc(html): #Lsecinflection +%!postproc(html): #Rsecinflection here + +%4.7 +%!postproc(tex): #Lsecitalian "label{secitalian}" +%!postproc(tex): #Rsecitalian "sref{secitalian}" +%!postproc(html): #Lsecitalian +%!postproc(html): #Rsecitalian here + +%4.10.2 +%!postproc(tex): #Lsecmatching "label{secmatching}" +%!postproc(tex): #Rsecmatching "sref{secmatching}" +%!postproc(html): #Lsecmatching +%!postproc(html): #Rsecmatching here + +%5.2 +%!postproc(tex): #Lseclexical "label{seclexical}" +%!postproc(tex): #Rseclexical "sref{seclexical}" +%!postproc(html): #Lseclexical +%!postproc(html): #Rseclexical here + +%5.4 +%!postproc(tex): #Lsecenglish "label{secenglish}" +%!postproc(tex): #Rsecenglish "sref{secenglish}" +%!postproc(html): #Lsecenglish +%!postproc(html): #Rsecenglish here + +%5.5 +%!postproc(tex): #Lsecfunctor "label{secfunctor}" +%!postproc(tex): #Rsecfunctor "sref{secfunctor}" +%!postproc(html): #Lsecfunctor +%!postproc(html): #Rsecfunctor here + +%5.6 +%!postproc(tex): #Lsecinterface "label{secinterface}" +%!postproc(tex): #Rsecinterface "sref{secinterface}" +%!postproc(html): #Lsecinterface +%!postproc(html): #Rsecinterface here + +%5.11 +%!postproc(tex): #Lsecbrowsing "label{secbrowsing}" +%!postproc(tex): #Rsecbrowsing "sref{secbrowsing}" +%!postproc(html): #Lsecbrowsing +%!postproc(html): #Rsecbrowsing here + +%5.12 +%!postproc(tex): #Lsecextended "label{secextended}" +%!postproc(tex): #Rsecextended "sref{secextended}" +%!postproc(html): #Lsecextended +%!postproc(html): #Rsecextended here + +%5.13 +%!postproc(tex): #Lsectense "label{sectense}" +%!postproc(tex): #Rsectense "sref{sectense}" +%!postproc(html): #Lsectense +%!postproc(html): #Rsectense here + +%5.14.2 +%!postproc(tex): #Lseclock "label{seclock}" +%!postproc(tex): #Rseclock "sref{seclock}" +%!postproc(html): #Lseclock +%!postproc(html): #Rseclock here + +%6.2 +%!postproc(tex): #Lsecsmarthouse "label{secsmarthouse}" +%!postproc(tex): #Rsecsmarthouse "sref{secsmarthouse}" +%!postproc(html): #Lsecsmarthouse +%!postproc(html): #Rsecsmarthouse here + +%6.3 +%!postproc(tex): #Lsecpolymorphic "label{secpolymorphic}" +%!postproc(tex): #Rsecpolymorphic "sref{secpolymorphic}" +%!postproc(html): #Lsecpolymorphic +%!postproc(html): #Rsecpolymorphic here + +%6.7 +%!postproc(tex): #Lsecbinding "label{secbinding}" +%!postproc(tex): #Rsecbinding "sref{secbinding}" +%!postproc(html): #Lsecbinding +%!postproc(html): #Rsecbinding here + + +%6.8 +%!postproc(tex): #Lsecdefdef "label{secdefdef}" +%!postproc(tex): #Rsecdefdef "sref{secdefdef}" +%!postproc(html): #Lsecdefdef +%!postproc(html): #Rsecdefdef here + +%7.2 +%!postproc(tex): #Lseclexing "label{seclexing}" +%!postproc(tex): #Rseclexing "sref{seclexing}" +%!postproc(html): #Lseclexing +%!postproc(html): #Rseclexing here + +%7.3 +%!postproc(tex): #Lsecprecedence "label{secprecedence}" +%!postproc(tex): #Rsecprecedence "sref{secprecedence}" +%!postproc(html): #Lsecprecedence +%!postproc(html): #Rsecprecedence here + +%8.3.4 +%!postproc(tex): #Lsecmathprogram "label{secmathprogram}" +%!postproc(tex): #Rsecmathprogram "sref{secmathprogram}" +%!postproc(html): #Lsecmathprogram +%!postproc(html): #Rsecmathprogram here + + + + + + +%!postproc(tex): #APPENDIX "appendix" +%!postproc(tex): #CHAPTER "chapter{The GF Programming Language}" +%!postproc(tex): #TOC "tableofcontents" + +%!postproc(tex): #BECE "begin{center}" +%!postproc(tex): #ENCE "end{center}" +%!postproc(tex): "subsection\*" "section" +%!postproc(tex): "section\*" "chapter" +%%!postproc(tex): "paragraph\{\}\\bf" "subsubsection" + +%!postproc(tex): #PARTbnf "input{RefDocGF.tex}" +%!preproc(html): #PARTbnf %!include: RefDocGF.txt + +%!postproc(tex): "textbf" "keywrd" +%!postproc(tex): #PRINTINDEX "printindex" +%!preproc(html): #PRINTINDEX "" + +%!postproc(tex): #sugar "sugar" +%!postproc(tex): #comput "computes" + +%!postproc(tex): #Aone "subscr{A}{1}" +%!postproc(tex): #Aen "subscr{A}{n}" +%!postproc(tex): #Aii "subscr{A}{i}" +%!postproc(tex): #aone "subscr{a}{1}" +%!postproc(tex): #anone "subscr{a}{n-1}" +%!postproc(tex): #aen "subscr{a}{n}" +%!postproc(tex): #Bone "subscr{B}{1}" +%!postproc(tex): #Bem "subscr{B}{m}" +%!postproc(tex): #Ben "subscr{B}{n}" +%!postproc(tex): #Cone "subscr{C}{1}" +%!postproc(tex): #Cen "subscr{C}{n}" +%!postproc(tex): #Cii "subscr{C}{i}" +%!postproc(tex): #fone "subscr{f}{1}" +%!postproc(tex): #fen "subscr{f}{n}" +%!postproc(tex): #Gone "subscr{G}{1}" +%!postproc(tex): #Gen "subscr{G}{n}" +%!postproc(tex): #pone "subscr{p}{1}" +%!postproc(tex): #pem "subscr{p}{m}" +%!postproc(tex): #pen "subscr{p}{n}" +%!postproc(tex): #pii "subscr{p}{i}" +%!postproc(tex): #rii "subscr{r}{i}" +%!postproc(tex): #rone "subscr{r}{1}" +%!postproc(tex): #ren "subscr{r}{n}" +%!postproc(tex): #sii "subscr{s}{i}" +%!postproc(tex): #sone "subscr{s}{1}" +%!postproc(tex): #sen "subscr{s}{n}" +%!postproc(tex): #Tone "subscr{T}{1}" +%!postproc(tex): #Ten "subscr{T}{n}" +%!postproc(tex): #tone "subscr{t}{1}" +%!postproc(tex): #tem "subscr{t}{m}" +%!postproc(tex): #ten "subscr{t}{n}" +%!postproc(tex): #tii "subscr{t}{i}" +%!postproc(tex): #Vone "subscr{V}{1}" +%!postproc(tex): #Ven "subscr{V}{n}" +%!postproc(tex): #Vii "subscr{V}{i}" +%!postproc(tex): #xnone "subscr{x}{n-1}" +%!postproc(tex): #xone "subscr{x}{1}" +%!postproc(tex): #xen "subscr{x}{n}" +%!postproc(tex): #xii "subscr{x}{i}" +%!postproc(tex): #yone "subscr{y}{1}" +%!postproc(tex): #yem "subscr{y}{m}" +%!postproc(tex): #zone "subscr{z}{1}" +%!postproc(tex): #zem "subscr{z}{m}" + +%%% undo the effect for these links in the synopsis +%!postproc(tex): "subscr\{G\}\{n\}der" "#Gender" +%!postproc(tex): "subscr\{T\}\{n\}se" "#Tense" + + +%%!target:html +%!postproc(html): #APPENDIX "" +%!postproc(html): #CHAPTER "" +%!postproc(html): #TOC "" + +%!postproc(html): #BECE "
" +%!postproc(html): #ENCE "
" +%%!postproc(html): "subsection\*" "section" +%%!postproc(html): "section\*" "chapter" +%%!preproc(html): #PARTbnf "[BNF Grammar of GF RefDocGF.html]" + +%!postproc(html): #sugar "===" +%!postproc(html): #comput "==>" + +%!postproc(html): #Aone "A1" +%!postproc(html): #Aen "An" +%!postproc(html): #Aii "Ai" +%!postproc(html): #aone "a1" +%!postproc(html): #anone "an-1" +%!postproc(html): #aen "an" +%!postproc(html): #Bone "B1" +%!postproc(html): #Bem "Bm" +%!postproc(html): #Ben "Bn" +%!postproc(html): #Cone "C1" +%!postproc(html): #Cen "Cn" +%!postproc(html): #Cii "Ci" +%!postproc(html): #fone "f1" +%!postproc(html): #fen "fn" +%!postproc(html): #Gone "G1" +%!postproc(html): #Gen "Gn" +%!postproc(html): #pone "p1" +%!postproc(html): #pem "pm" +%!postproc(html): #pen "pn" +%!postproc(html): #pii "pi" +%!postproc(html): #rii "ri" +%!postproc(html): #rone "r1" +%!postproc(html): #ren "rn" +%!postproc(html): #sii "si" +%!postproc(html): #sone "s1" +%!postproc(html): #sen "sn" +%!postproc(html): #Tone "T1" +%!postproc(html): #Ten "Tn" +%!postproc(html): #tone "t1" +%!postproc(html): #tem "tm" +%!postproc(html): #ten "tn" +%!postproc(html): #tii "ti" +%!postproc(html): #Vone "V1" +%!postproc(html): #Ven "Vn" +%!postproc(html): #Vii "Vi" +%!postproc(html): #xnone "xn-1" +%!postproc(html): #xone "x1" +%!postproc(html): #xen "xn" +%!postproc(html): #xii "xi" +%!postproc(html): #yone "y1" +%!postproc(html): #yem "ym" +%!postproc(html): #zone "z1" +%!postproc(html): #zem "zm" + + +%!postproc(tex): #Lpatternmatching "label{patternmatching}" +%!postproc(tex): #Rpatternmatching "sref{patternmatching}" +%!postproc(html): #Lpatternmatching +%!postproc(html): #Rpatternmatching here + +%!postproc(tex): #Lcatjudgements "label{catjudgements}" +%!postproc(tex): #Rcatjudgements "sref{catjudgements}" +%!postproc(html): #Lcatjudgements +%!postproc(html): #Rcatjudgements here + +%!postproc(tex): #Lcnctypes "label{cnctypes}" +%!postproc(tex): #Rcnctypes "sref{cnctypes}" +%!postproc(html): #Lcnctypes +%!postproc(html): #Rcnctypes here + +%!postproc(tex): #Lcompleteness "label{completeness}" +%!postproc(tex): #Rcompleteness "sref{completeness}" +%!postproc(html): #Lcompleteness +%!postproc(html): #Rcompleteness here + +%!postproc(tex): #Lcontexts "label{contexts}" +%!postproc(tex): #Rcontexts "sref{contexts}" +%!postproc(html): #Lcontexts +%!postproc(html): #Rcontexts here + +%!postproc(tex): #Lconversions "label{conversions}" +%!postproc(tex): #Rconversions "sref{conversions}" +%!postproc(html): #Lconversions +%!postproc(html): #Rconversions here + +%!postproc(tex): #Lexpressions "label{expressions}" +%!postproc(tex): #Rexpressions "sref{expressions}" +%!postproc(html): #Lexpressions +%!postproc(html): #Rexpressions here + +%!postproc(tex): #Lflagvalues "label{flagvalues}" +%!postproc(tex): #Rflagvalues "sref{flagvalues}" +%!postproc(html): #Lflagvalues +%!postproc(html): #Rflagvalues here + +%!postproc(tex): #Lfunctionelimination "label{functionelimination}" +%!postproc(tex): #Rfunctionelimination "sref{functionelimination}" +%!postproc(html): #Lfunctionelimination +%!postproc(html): #Rfunctionelimination here + +%!postproc(tex): #Lfunctiontype "label{functiontype}" +%!postproc(tex): #Rfunctiontype "sref{functiontype}" +%!postproc(html): #Lfunctiontype +%!postproc(html): #Rfunctiontype here + +%!postproc(tex): #Lgluing "label{gluing}" +%!postproc(tex): #Rgluing "sref{gluing}" +%!postproc(html): #Lgluing +%!postproc(html): #Rgluing here + +%!postproc(tex): #LHOAS "label{HOAS}" +%!postproc(tex): #RHOAS "sref{HOAS}" +%!postproc(html): #LHOAS +%!postproc(html): #RHOAS here + +%!postproc(tex): #Lidentifiers "label{identifiers}" +%!postproc(tex): #Ridentifiers "sref{identifiers}" +%!postproc(html): #Lidentifiers +%!postproc(html): #Ridentifiers here + +%!postproc(tex): #Ljudgementforms "label{judgementforms}" +%!postproc(tex): #Rjudgementforms "sref{judgementforms}" +%!postproc(html): #Ljudgementforms +%!postproc(html): #Rjudgementforms here + +%!postproc(tex): #Llindefjudgements "label{lindefjudgements}" +%!postproc(tex): #Rlindefjudgements "sref{lindefjudgements}" +%!postproc(html): #Llindefjudgements +%!postproc(html): #Rlindefjudgements here + +%!postproc(tex): #Llinexpansion "label{linexpansion}" +%!postproc(tex): #Rlinexpansion "sref{linexpansion}" +%!postproc(html): #Llinexpansion +%!postproc(html): #Rlinexpansion here + +%!postproc(tex): #Loldgf "label{oldgf}" +%!postproc(tex): #Roldgf "sref{oldgf}" +%!postproc(html): #Loldgf +%!postproc(html): #Roldgf here + +%!postproc(tex): #Lopenabstract "label{openabstract}" +%!postproc(tex): #Ropenabstract "sref{openabstract}" +%!postproc(html): #Lopenabstract +%!postproc(html): #Ropenabstract here + +%!postproc(tex): #Loverloading "label{overloading}" +%!postproc(tex): #Roverloading "sref{overloading}" +%!postproc(html): #Loverloading +%!postproc(html): #Roverloading here + +%!postproc(tex): #Lparamjudgements "label{paramjudgements}" +%!postproc(tex): #Rparamjudgements "sref{paramjudgements}" +%!postproc(html): #Lparamjudgements +%!postproc(html): #Rparamjudgements here + +%!postproc(tex): #Lparamtypes "label{paramtypes}" +%!postproc(tex): #Rparamtypes "sref{paramtypes}" +%!postproc(html): #Lparamtypes +%!postproc(html): #Rparamtypes here + +%!postproc(tex): #Lparamvalues "label{paramvalues}" +%!postproc(tex): #Rparamvalues "sref{paramvalues}" +%!postproc(html): #Lparamvalues +%!postproc(html): #Rparamvalues here + +%!postproc(tex): #Lpredefabs "label{predefabs}" +%!postproc(tex): #Rpredefabs "sref{predefabs}" +%!postproc(html): #Lpredefabs +%!postproc(html): #Rpredefabs here + +%!postproc(tex): #Lpredefcnc "label{predefcnc}" +%!postproc(tex): #Rpredefcnc "sref{predefcnc}" +%!postproc(html): #Lpredefcnc +%!postproc(html): #Rpredefcnc here + +%!postproc(tex): #Lqualifiednames "label{qualifiednames}" +%!postproc(tex): #Rqualifiednames "sref{qualifiednames}" +%!postproc(html): #Lqualifiednames +%!postproc(html): #Rqualifiednames here + +%!postproc(tex): #Lrenaming "label{renaming}" +%!postproc(tex): #Rrenaming "sref{renaming}" +%!postproc(html): #Lrenaming +%!postproc(html): #Rrenaming here + +%!postproc(tex): #Lrestrictedinheritance "label{restrictedinheritance}" +%!postproc(tex): #Rrestrictedinheritance "sref{restrictedinheritance}" +%!postproc(html): #Lrestrictedinheritance +%!postproc(html): #Rrestrictedinheritance here + +%!postproc(tex): #Lreuse "label{reuse}" +%!postproc(tex): #Rreuse "sref{reuse}" +%!postproc(html): #Lreuse +%!postproc(html): #Rreuse here + +%!postproc(tex): #Lruntimevariables "label{runtimevariables}" +%!postproc(tex): #Rruntimevariables "sref{runtimevariables}" +%!postproc(html): #Lruntimevariables +%!postproc(html): #Rruntimevariables here + +%!postproc(tex): #Lstrtype "label{strtype}" +%!postproc(tex): #Rstrtype "sref{strtype}" +%!postproc(html): #Lstrtype +%!postproc(html): #Rstrtype here + +%!postproc(tex): #Lsubtyping "label{subtyping}" +%!postproc(tex): #Rsubtyping "sref{subtyping}" +%!postproc(html): #Lsubtyping +%!postproc(html): #Rsubtyping here + +%!postproc(tex): #Lsyntaxtrees "label{syntaxtrees}" +%!postproc(tex): #Rsyntaxtrees "sref{syntaxtrees}" +%!postproc(html): #Lsyntaxtrees +%!postproc(html): #Rsyntaxtrees here + +%!postproc(tex): #Ltables "label{tables}" +%!postproc(tex): #Rtables "sref{tables}" +%!postproc(html): #Ltables +%!postproc(html): #Rtables here + +%!postproc(tex): #Lvariablebinding "label{variablebinding}" +%!postproc(tex): #Rvariablebinding "sref{variablebinding}" +%!postproc(html): #Lvariablebinding +%!postproc(html): #Rvariablebinding here + +%% last, to avoid overriding with subsection* -> section +%!postproc(tex): #PREFACE "subsection*{Preface}" +%!postproc(tex): #OVERVIEW "subsection*{Overview}" +%!postproc(html): #OVERVIEW

Overview

+ + + + +#NEW + +=Overview= + +This is a hands-on introduction to grammar writing in GF. + +Main ingredients of GF: +- linguistics +- functional programming + + +Prerequisites: +- some previous experience from some programming language +- the basics of using computers, e.g. the use of + text editors and the management of files. +- knowledge of Unix commands is useful but not necessary +- knowledge of many natural languages may add fun to experience + + + + +#NEW + +==Outline== + +#Rchaptwo: a multilingual "Hello World" grammar. English, Finnish, Italian. + +#Rchapthree: a larger grammar for the domain of food. English and Italian. + +#Rchapfour: parameters - morphology and agreement. + +#Rchapfive: using the resource grammar library. + +#Rchapsix: semantics - **dependent types**, **variable bindings**, +and **semantic definitions**. + +#Rchapseven: implementing formal languages. + +#Rchapeight: embedded grammar applications. + + + +#NEW + +==Slides== + +You can chop this tutorial into a set of slides by the command +``` + htmls gf-tutorial.html +``` +where the program ``htmls`` is distributed with GF (see below), in + + [``GF/src/tools/Htmls.hs`` http://grammaticalframework.org/src/tools/Htmls.hs] + +The slides will appear as a set of files beginning with ``01-gf-tutorial.htmls``. + +Internal links will not work in the slide format, except for those in the +upper left corner of each slide, and the links behind the "Contents" link. + + + +#NEW + +=Lesson 1: Getting Started with GF= + + +#Lchaptwo + +Goals: +- install and run GF +- write the first GF grammar: a "Hello World" grammar in three languages +- use GF for translation and multilingual generation + + +#NEW + +==What GF is== + +We use the term GF for three different things: +- a **system** (computer program) used for working with grammars +- a **programming language** in which grammars can be written +- a **theory** about grammars and languages + + +The GF system is an implementation +of the GF programming language, which in turn is built on the ideas of the +GF theory. + +The focus of this tutorial is on using the GF programming language. + +At the same time, we learn the way of thinking in the GF theory. + +We make the grammars run on a computer by +using the GF system. + + +#NEW + +==GF grammars and language processing tasks== + +A GF program is called a **grammar**. + +A grammar defines a language. + +From this definition, language processing components can be derived: +- **parsing**: to analyse the language +- **linearization**: to generate the language +- **translation**: to analyse one language and generate another + + +In general, a GF grammar is **multilingual**: +- many languages in one grammar +- translations between them + + + + + + + +#NEW + +==Getting the GF system== + +Open-source free software, downloaded via the GF Homepage: + +[``grammaticalframework.org`` http://grammaticalframework.org/] + +There you find +- binaries for Linux, Mac OS X, and Windows +- source code and documentation +- grammar libraries and examples + + +Many examples in this tutorial are +[online http://grammaticalframework.org/examples/tutorial]. + +Normally you don't have to compile GF yourself. +But, if you do want to compile GF from source follow the +instructions in the [Developers Guide ../gf-developers.html]. + + +#NEW + +==Running the GF system== + +Type ``gf`` in the Unix (or Cygwin) shell: +``` + % gf +``` +You will see GF's welcome message and the prompt ``>``. +The command +``` + > help +``` +will give you a list of available commands. + +As a common convention, we will use +- ``%`` as a prompt that marks system commands +- ``>`` as a prompt that marks GF commands + + +Thus you should not type these prompts, but only the characters that +follow them. + + +#NEW + +==A "Hello World" grammar== + +Like most programming language tutorials, we start with a +program that prints "Hello World" on the terminal. + +Extra features: +- **Multilinguality**: the message is printed in many languages. +- **Reversibility**: in addition to printing, you can **parse** the + message and **translate** it to other languages. + + +#NEW + +===The program: abstract syntax and concrete syntaxes=== + +A GF program, in general, is a **multilingual grammar**. Its main parts +are +- an **abstract syntax** +- one or more **concrete syntaxes** + + +The abstract syntax defines what **meanings** +can be expressed in the grammar +- //Greetings//, where we greet a //Recipient//, which can be + //World// or //Mum// or //Friends// + + + +#NEW + +GF code for the abstract syntax: +``` + -- a "Hello World" grammar + abstract Hello = { + + flags startcat = Greeting ; + + cat Greeting ; Recipient ; + + fun + Hello : Recipient -> Greeting ; + World, Mum, Friends : Recipient ; + } +``` +The code has the following parts: +- a **comment** (optional), saying what the module is doing +- a **module header** indicating that it is an abstract syntax + module named ``Hello`` +- a **module body** in braces, consisting of + - a **startcat flag declaration** stating that ``Greeting`` is the + default start category for parsing and generation + - **category declarations** introducing two categories, i.e. types of meanings + - **function declarations** introducing three meaning-building functions + + +#NEW + +English concrete syntax (mapping from meanings to strings): +``` + concrete HelloEng of Hello = { + + lincat Greeting, Recipient = {s : Str} ; + + lin + Hello recip = {s = "hello" ++ recip.s} ; + World = {s = "world"} ; + Mum = {s = "mum"} ; + Friends = {s = "friends"} ; + } +``` +The major parts of this code are: +- a module header indicating that it is a concrete syntax of the abstract syntax + ``Hello``, itself named ``HelloEng`` +- a module body in curly brackets, consisting of + - **linearization type definitions** stating that + ``Greeting`` and ``Recipient`` are **records** with a **string** ``s`` + - **linearization definitions** telling what records are assigned to + each of the meanings defined in the abstract syntax + + +Notice the concatenation ``++`` and the record projection ``.``. + + +#NEW + +Finnish and an Italian concrete syntaxes: +``` + concrete HelloFin of Hello = { + lincat Greeting, Recipient = {s : Str} ; + lin + Hello recip = {s = "terve" ++ recip.s} ; + World = {s = "maailma"} ; + Mum = {s = "iti"} ; + Friends = {s = "ystvt"} ; + } + + concrete HelloIta of Hello = { + lincat Greeting, Recipient = {s : Str} ; + lin + Hello recip = {s = "ciao" ++ recip.s} ; + World = {s = "mondo"} ; + Mum = {s = "mamma"} ; + Friends = {s = "amici"} ; + } +``` + + +#NEW + +===Using grammars in the GF system=== + +In order to compile the grammar in GF, +we create four files, one for each module, named //Modulename//``.gf``: +``` + Hello.gf HelloEng.gf HelloFin.gf HelloIta.gf +``` +The first GF command: **import** a grammar. +``` + > import HelloEng.gf +``` +All commands also have short names; here: +``` + > i HelloEng.gf +``` +The GF system will **compile** your grammar +into an internal representation and show the CPU time was consumed, followed +by a new prompt: +``` + > i HelloEng.gf + - compiling Hello.gf... wrote file Hello.gfo 8 msec + - compiling HelloEng.gf... wrote file HelloEng.gfo 12 msec + + 12 msec + > +``` + +#NEW + +You can use GF for **parsing** (``parse`` = ``p``) +``` + > parse "hello world" + Hello World +``` +Parsing takes a **string** into an **abstract syntax tree**. + +The notation for trees is that of **function application**: +``` + function argument1 ... argumentn +``` +Parentheses are only needed for grouping. + +Parsing something that is not in grammar will fail: +``` + > parse "hello dad" + Unknown words: dad + + > parse "world hello" + no tree found +``` + +#NEW + +You can also use GF for **linearization** (``linearize = l``). +It takes trees into strings: +``` + > linearize Hello World + hello world +``` +**Translation**: **pipe** linearization to parsing: +``` + > import HelloEng.gf + > import HelloIta.gf + + > parse -lang=HelloEng "hello mum" | linearize -lang=HelloIta + ciao mamma +``` +Default of the language flag (``-lang``): the last-imported concrete syntax. + +**Multilingual generation**: +``` + > parse -lang=HelloEng "hello friends" | linearize + terve ystvt + ciao amici + hello friends +``` +Linearization is by default to all available languages. + +#NEW + +===Exercises on the Hello World grammar=== + ++ Test the parsing and translation examples shown above, as well as +some other examples, in different combinations of languages. + ++ Extend the grammar ``Hello.gf`` and some of the +concrete syntaxes by five new recipients and one new greeting +form. + ++ Add a concrete syntax for some other +languages you might know. + ++ Add a pair of greetings that are expressed in one and +the same way in +one language and in two different ways in another. +For instance, //good morning// +and //good afternoon// in English are both expressed +as //buongiorno// in Italian. +Test what happens when you translate //buongiorno// to English in GF. + ++ Inject errors in the ``Hello`` grammars, for example, leave out +some line, omit a variable in a ``lin`` rule, or change the name +in one occurrence +of a variable. Inspect the error messages generated by GF. + + +#NEW + +==Using grammars from outside GF== + +You can use the ``gf`` program in a Unix pipe. +- echo a GF command +- pipe it into GF with grammar names as arguments + + +``` + % echo "l Hello World" | gf HelloEng.gf HelloFin.gf HelloIta.gf +``` +You can also write a **script**, a file containing the lines +``` + import HelloEng.gf + import HelloFin.gf + import HelloIta.gf + linearize Hello World +``` + + +#NEW + +==GF scripts== + +If we name this script ``hello.gfs``, we can do +``` + $ gf --run Quality -> Phrase ; + This, That : Kind -> Item ; + QKind : Quality -> Kind -> Kind ; + Wine, Cheese, Fish : Kind ; + Very : Quality -> Quality ; + Fresh, Warm, Italian, Expensive, Delicious, Boring : Quality ; + } +``` +Example ``Phrase`` +``` + Is (This (QKind Delicious (QKind Italian Wine))) (Very (Very Expensive)) + this delicious Italian wine is very very expensive +``` + + +#NEW + +==The concrete syntax FoodEng== + +``` + concrete FoodEng of Food = { + + lincat + Phrase, Item, Kind, Quality = {s : Str} ; + + lin + Is item quality = {s = item.s ++ "is" ++ quality.s} ; + This kind = {s = "this" ++ kind.s} ; + That kind = {s = "that" ++ kind.s} ; + QKind quality kind = {s = quality.s ++ kind.s} ; + Wine = {s = "wine"} ; + Cheese = {s = "cheese"} ; + Fish = {s = "fish"} ; + Very quality = {s = "very" ++ quality.s} ; + Fresh = {s = "fresh"} ; + Warm = {s = "warm"} ; + Italian = {s = "Italian"} ; + Expensive = {s = "expensive"} ; + Delicious = {s = "delicious"} ; + Boring = {s = "boring"} ; + } +``` + +#NEW + +Test the grammar for parsing: +``` + > import FoodEng.gf + > parse "this delicious wine is very very Italian" + Is (This (QKind Delicious Wine)) (Very (Very Italian)) +``` +Parse in other categories setting the ``cat`` flag: +``` + p -cat=Kind "very Italian wine" + QKind (Very Italian) Wine +``` + + +#NEW + +===Exercises on the Food grammar=== + ++ Extend the ``Food`` grammar by ten new food kinds and +qualities, and run the parser with new kinds of examples. + ++ Add a rule that enables question phrases of the form +//is this cheese Italian//. + ++ Enable the optional prefixing of +phrases with the words "excuse me but". Do this in such a way that +the prefix can occur at most once. + + + +#NEW + +==Commands for testing grammars== + +===Generating trees and strings=== + +Random generation (``generate_random = gr``): build +build a random tree in accordance with an abstract syntax: +``` + > generate_random + Is (This (QKind Italian Fish)) Fresh +``` +By using a pipe, random generation can be fed into linearization: +``` + > generate_random | linearize + this Italian fish is fresh +``` +Use the ``number`` flag to generate several trees: +``` + > gr -number=4 | l + that wine is boring + that fresh cheese is fresh + that cheese is very boring + this cheese is Italian +``` + +#NEW + +To generate //all// phrases that a grammar can produce, +use ``generate_trees = gt``. +``` + > generate_trees | l + that cheese is very Italian + that cheese is very boring + that cheese is very delicious + ... + this wine is fresh + this wine is warm +``` +The default **depth** is 3; the depth can be +set by using the ``depth`` flag: +``` + > generate_trees -depth=2 | l +``` +What options a command has can be seen by the ``help = h`` command: +``` + > help gr + > help gt +``` + + +#NEW + +===Exercises on generation=== + ++ If the command ``gt`` generated all +trees in your grammar, it would never terminate. Why? + ++ Measure how many trees the grammar gives with depths 4 and 5, +respectively. **Hint**. You can +use the Unix **word count** command ``wc`` to count lines. + + + +#NEW + +===More on pipes: tracing=== + +Put the **tracing** option ``-tr`` to each command whose output you +want to see: +``` + > gr -tr | l -tr | p + + Is (This Cheese) Boring + this cheese is boring + Is (This Cheese) Boring +``` +Useful for test purposes: the pipe above can show +if a grammar is **ambiguous**, i.e. +contains strings that can be parsed in more than one way. + +**Exercise**. Extend the ``Food`` grammar so that it produces ambiguous +strings, and try out the ambiguity test. + + +#NEW + +===Writing and reading files=== + +To save the outputs into a file, pipe it to the ``write_file = wf`` command, +``` + > gr -number=10 | linearize | write_file -file=exx.tmp +``` +To read a file to GF, use the ``read_file = rf`` command, +``` + > read_file -file=exx.tmp -lines | parse +``` +The flag ``-lines`` tells GF to read each line of the file separately. + +Files with examples can be used for **regression testing** +of grammars - the most systematic way to do this is by +**treebanks**; see #Rsectreebank. + + +#NEW + +===Visualizing trees=== + +Parentheses give a linear representation of trees, +useful for the computer. + +Human eye may prefer to see a visualization: ``visualize_tree = vt``: +``` + > parse "this delicious cheese is very Italian" | visualize_tree +``` +The tree is generated in postscript (``.ps``) file. The ``-view`` option is used for +telling what command to use to view the file. Its default is ``"gv"``, which works +on most Linux installations. On a Mac, one would probably write +``` + > parse "this delicious cheese is very Italian" | visualize_tree -view="open" +``` + + + +#MYTREE + +This command uses the program [Graphviz http://www.graphviz.org/], which you +might not have, but which are freely available on the web. + +You can save the temporary file ``_grph.dot``, +which the command ``vt`` produces. + +Then you can process this file with the ``dot`` +program (from the Graphviz package). +``` + % dot -Tpng _grph.dot > mytree.png +``` + + +#NEW + +===System commands=== + +You can give a **system command** without leaving GF: +``!`` followed by a Unix command, +``` + > ! dot -Tpng grphtmp.dot > mytree.png + > ! open mytree.png +``` +A system command may also receive its argument from +a GF pipes. It then has the name ``sp`` = ``system_pipe``: +``` + > generate_trees -depth=4 | sp -command="wc -l" +``` +This command example returns the number of generated trees. + + +**Exercise**. +Measure how many trees the grammar ``FoodEng`` gives with depths 4 and 5, +respectively. Use the Unix **word count** command ``wc`` to count lines, and +a system pipe from a GF command into a Unix command. + + + + + +#NEW + +==An Italian concrete syntax== + +#Lsecanitalian + +Just (?) replace English words with their dictionary equivalents: +``` + concrete FoodIta of Food = { + + lincat + Phrase, Item, Kind, Quality = {s : Str} ; + + lin + Is item quality = {s = item.s ++ "" ++ quality.s} ; + This kind = {s = "questo" ++ kind.s} ; + That kind = {s = "quel" ++ kind.s} ; + QKind quality kind = {s = kind.s ++ quality.s} ; + Wine = {s = "vino"} ; + Cheese = {s = "formaggio"} ; + Fish = {s = "pesce"} ; + Very quality = {s = "molto" ++ quality.s} ; + Fresh = {s = "fresco"} ; + Warm = {s = "caldo"} ; + Italian = {s = "italiano"} ; + Expensive = {s = "caro"} ; + Delicious = {s = "delizioso"} ; + Boring = {s = "noioso"} ; + } +``` + + +#NEW + +Not just replacing words: + +The order of a quality and the kind it modifies is changed in +``` + QKind quality kind = {s = kind.s ++ quality.s} ; +``` +Thus Italian says ``vino italiano`` for ``Italian wine``. + +(Some Italian adjectives +are put before the noun. This distinction can be controlled by parameters, +which are introduced in #Rchapfour.) + +#NEW + +===Exercises on multilinguality=== + ++ Write a concrete syntax of ``Food`` for some other language. +You will probably end up with grammatically incorrect +linearizations - but don't +worry about this yet. + ++ If you have written ``Food`` for German, Swedish, or some +other language, test with random or exhaustive generation what constructs +come out incorrect, and prepare a list of those ones that cannot be helped +with the currently available fragment of GF. You can return to your list +after having worked out #Rchapfour. + + + + +#NEW + +==Free variation== + +Semantically indistinguishable ways of expressing a thing. + +The **variants** construct of GF expresses free variation. For example, +``` + lin Delicious = {s = "delicious" | "exquisit" | "tasty"} ; +``` +By default, the ``linearize`` command +shows only the first variant from such lists; to see them +all, use the option ``-all``: +``` + > p "this exquisit wine is delicious" | l -all + this delicious wine is delicious + this delicious wine is exquisit + ... +``` + +#NEW + +An equivalent notation for variants is +``` + lin Delicious = {s = variants {"delicious" ; "exquisit" ; "tasty"}} ; +``` +This notation also allows the limiting case: an empty variant list, +``` + variants {} +``` +It can be used e.g. if a word lacks a certain inflection form. + +Free variation works for all types in concrete syntax; all terms in +a variant list must be of the same type. + + +#NEW + +==More application of multilingual grammars== + +===Multilingual treebanks=== + +#Lsectreebank + +**Multilingual treebank**: a set of trees with their +linearizations in different languages: +``` + > gr -number=2 | l -treebank + + Is (That Cheese) (Very Boring) + quel formaggio molto noioso + that cheese is very boring + + Is (That Cheese) Fresh + quel formaggio fresco + that cheese is fresh +``` + + + +#NEW + +===Translation quiz=== + +``translation_quiz = tq``: +generate random sentences, display them in one language, and check the user's +answer given in another language. +``` + > translation_quiz -from=FoodEng -to=FoodIta + + Welcome to GF Translation Quiz. + The quiz is over when you have done at least 10 examples + with at least 75 % success. + You can interrupt the quiz by entering a line consisting of a dot ('.'). + + this fish is warm + questo pesce caldo + > Yes. + Score 1/1 + + this cheese is Italian + questo formaggio noioso + > No, not questo formaggio noioso, but + questo formaggio italiano + + Score 1/2 + this fish is expensive +``` + + + +#NEW + +==Context-free grammars and GF== + +===The "cf" grammar format=== + +The grammar ``FoodEng`` can be written in a BNF format as follows: +``` + Is. Phrase ::= Item "is" Quality ; + That. Item ::= "that" Kind ; + This. Item ::= "this" Kind ; + QKind. Kind ::= Quality Kind ; + Cheese. Kind ::= "cheese" ; + Fish. Kind ::= "fish" ; + Wine. Kind ::= "wine" ; + Italian. Quality ::= "Italian" ; + Boring. Quality ::= "boring" ; + Delicious. Quality ::= "delicious" ; + Expensive. Quality ::= "expensive" ; + Fresh. Quality ::= "fresh" ; + Very. Quality ::= "very" Quality ; + Warm. Quality ::= "warm" ; +``` +GF can convert BNF grammars into GF. +BNF files are recognized by the file name suffix ``.cf`` (for **context-free**): +``` + > import food.cf +``` +The compiler creates separate abstract and concrete modules internally. + + +#NEW + +===Restrictions of context-free grammars=== + +Separating concrete and abstract syntax allows +three deviations from context-free grammar: +- **permutation**: changing the order of constituents +- **suppression**: omitting constituents +- **reduplication**: repeating constituents + + +**Exercise**. Define the non-context-free +copy language ``{x x | x <- (a|b)*}`` in GF. + + + +#NEW + +%--! +==Modules and files== + +GF uses suffixes to recognize different file formats: +- Source files: //Modulename//``.gf`` +- Target files: //Modulename//``.gfo`` + + +Importing generates target from source: +``` + > i FoodEng.gf + - compiling Food.gf... wrote file Food.gfo 16 msec + - compiling FoodEng.gf... wrote file FoodEng.gfo 20 msec +``` +The ``.gfo`` format (="GF Object") is precompiled GF, which is +faster to load than source GF (``.gf``). + +When reading a module, GF decides whether +to use an existing ``.gfo`` file or to generate +a new one, by looking at modification times. + + +#NEW + +**Exercise**. What happens when you import ``FoodEng.gf`` for +a second time? Try this in different situations: +- Right after importing it the first time (the modules are kept in + the memory of GF and need no reloading). +- After issuing the command ``empty`` (``e``), which clears the memory + of GF. +- After making a small change in ``FoodEng.gf``, be it only an added space. +- After making a change in ``Food.gf``. + + + +#NEW + +==Using operations and resource modules== + +===Operation definitions=== + +The golden rule of functional programmin: + +//Whenever you find yourself programming by copy-and-paste, write a function instead.// + +Functions in concrete syntax are defined using the keyword ``oper`` (for +**operation**), distinct from ``fun`` for the sake of clarity. + +Example: +``` + oper ss : Str -> {s : Str} = \x -> {s = x} ; +``` +The operation can be **applied** to an argument, and GF will +**compute** the value: +``` + ss "boy" ===> {s = "boy"} +``` +The symbol ``===>`` will be used for computation. + + +#NEW + +Notice the **lambda abstraction** form +- ``\``//x// ``->`` //t// + + +This is read: +- function with variable //x// and **function body** //t// + + +For lambda abstraction with multiple arguments, we have the shorthand +``` + \x,y -> t === \x -> \y -> t +``` +Linearization rules actually use syntactic +sugar for abstraction: +``` + lin f x = t === lin f = \x -> t +``` + + + +#NEW + +%--! +===The ``resource`` module type=== + +The ``resource`` module type is used to package +``oper`` definitions into reusable resources. +``` + resource StringOper = { + oper + SS : Type = {s : Str} ; + ss : Str -> SS = \x -> {s = x} ; + cc : SS -> SS -> SS = \x,y -> ss (x.s ++ y.s) ; + prefix : Str -> SS -> SS = \p,x -> ss (p ++ x.s) ; + } +``` + + +#NEW + +%--! +===Opening a resource=== + +Any number of ``resource`` modules can be +**open**ed in a ``concrete`` syntax. +``` + concrete FoodEng of Food = open StringOper in { + + lincat + S, Item, Kind, Quality = SS ; + + lin + Is item quality = cc item (prefix "is" quality) ; + This k = prefix "this" k ; + That k = prefix "that" k ; + QKind k q = cc k q ; + Wine = ss "wine" ; + Cheese = ss "cheese" ; + Fish = ss "fish" ; + Very = prefix "very" ; + Fresh = ss "fresh" ; + Warm = ss "warm" ; + Italian = ss "Italian" ; + Expensive = ss "expensive" ; + Delicious = ss "delicious" ; + Boring = ss "boring" ; + } +``` + + +#NEW + +%--! +===Partial application=== + +#Lsecpartapp + +The rule +``` + lin This k = prefix "this" k ; +``` +can be written more concisely +``` + lin This = prefix "this" ; +``` +Part of the art in functional programming: +decide the order of arguments in a function, +so that partial application can be used as much as possible. + +For instance, ``prefix`` is typically applied to +linearization variables with constant strings. Hence we +put the ``Str`` argument before the ``SS`` argument. + + +**Exercise**. Define an operation ``infix`` analogous to ``prefix``, +such that it allows you to write +``` + lin Is = infix "is" ; +``` + + +#NEW + +===Testing resource modules=== + +Import with the flag ``-retain``, +``` + > import -retain StringOper.gf +``` +Compute the value with ``compute_concrete = cc``, +``` + > compute_concrete prefix "in" (ss "addition") + {s : Str = "in" ++ "addition"} +``` + + +#NEW + +==Grammar architecture== + +#Lsecarchitecture + +===Extending a grammar=== + +A new module can **extend** an old one: +``` + abstract Morefood = Food ** { + cat + Question ; + fun + QIs : Item -> Quality -> Question ; + Pizza : Kind ; + } +``` +Parallel to the abstract syntax, extensions can +be built for concrete syntaxes: +``` + concrete MorefoodEng of Morefood = FoodEng ** { + lincat + Question = {s : Str} ; + lin + QIs item quality = {s = "is" ++ item.s ++ quality.s} ; + Pizza = {s = "pizza"} ; + } +``` +The effect of extension: all of the contents of the extended +and extending modules are put together. + +In other words: the new module **inherits** the contents of the old module. + +#NEW + +Simultaneous extension and opening: +``` + concrete MorefoodIta of Morefood = FoodIta ** open StringOper in { + lincat + Question = SS ; + lin + QIs item quality = ss (item.s ++ "" ++ quality.s) ; + Pizza = ss "pizza" ; + } +``` +Resource modules can extend other resource modules - thus it is +possible to build resource hierarchies. + + + +#NEW + +===Multiple inheritance=== + +Extend several grammars at the same time: +``` + abstract Foodmarket = Food, Fruit, Mushroom ** { + fun + FruitKind : Fruit -> Kind ; + MushroomKind : Mushroom -> Kind ; + } +``` +where +``` + abstract Fruit = { + cat Fruit ; + fun Apple, Peach : Fruit ; + } + + abstract Mushroom = { + cat Mushroom ; + fun Cep, Agaric : Mushroom ; + } +``` + +**Exercise**. Refactor ``Food`` by taking apart ``Wine`` into a special +``Drink`` module. + + + +#NEW + +=Lesson 3: Grammars with parameters= + +#Lchapfour + +Goals: +- implement sophisticated linguistic structures: + - morphology: the inflection of words + - agreement: rules for selecting word forms in syntactic combinations + + +- Cover all GF constructs for concrete syntax + + +It is possible to skip this chapter and go directly +to the next, since the use of the GF Resource Grammar library +makes it unnecessary to use parameters: they +could be left to library implementors. + + +#NEW + +==The problem: words have to be inflected== + +Plural forms are needed in things like +#BEQU +//these Italian wines are delicious// +#ENQU +This requires two things: +- the **inflection** of nouns and verbs in singular and plural +- the **agreement** of the verb to subject: + the verb must have the same number as the subject + + +Different languages have different types of inflection and agreement. +- Italian has also gender (masculine vs. feminine). + + + +In a multilingual grammar, +we want to ignore such distinctions in abstract syntax. + +**Exercise**. Make a list of the possible forms that nouns, +adjectives, and verbs can have in some languages that you know. + + +#NEW + +==Parameters and tables== + +We define the **parameter type** of number in English by +a new form of judgement: +``` + param Number = Sg | Pl ; +``` +This judgement defines the parameter type ``Number`` by listing +its two **constructors**, ``Sg`` and ``Pl`` +(singular and plural). + +We give ``Kind`` a linearization type that has a **table** depending on number: +``` + lincat Kind = {s : Number => Str} ; +``` +The **table type** ``Number => Str`` is similar a function type +(``Number -> Str``). + +Difference: the argument must be a parameter type. Then +the argument-value pairs can be listed in a finite table. + +#NEW + +Here is a table: +``` + lin Cheese = { + s = table { + Sg => "cheese" ; + Pl => "cheeses" + } + } ; +``` +The table has **branches**, with a **pattern** on the +left of the arrow ``=>`` and a **value** on the right. + +The application of a table is done by the **selection** operator ``!``. + +It which is computed by **pattern matching**: return +the value from the first branch whose pattern matches the +argument. For instance, +``` + table {Sg => "cheese" ; Pl => "cheeses"} ! Pl + ===> "cheeses" +``` + +#NEW + +**Case expressions** are syntactic sugar: +``` + case e of {...} === table {...} ! e +``` +Since they are familiar to Haskell and ML programmers, they can come out handy +when writing GF programs. + + +#NEW + +Constructors can take arguments from other parameter types. + +Example: forms of English verbs (except //be//): +``` + param VerbForm = VPresent Number | VPast | VPastPart | VPresPart ; +``` +Fact expressed: only present tense has number variation. + +Example table: the forms of the verb //drink//: +``` + table { + VPresent Sg => "drinks" ; + VPresent Pl => "drink" ; + VPast => "drank" ; + VPastPart => "drunk" ; + VPresPart => "drinking" + } +``` + + +**Exercise**. In an earlier exercise (previous section), +you made a list of the possible +forms that nouns, adjectives, and verbs can have in some languages that +you know. Now take some of the results and implement them by +using parameter type definitions and tables. Write them into a ``resource`` +module, which you can test by using the command ``compute_concrete``. + + + +#NEW + +==Inflection tables and paradigms== + +A morphological **paradigm** is a formula telling how a class of +words is inflected. + +From the GF point of view, a paradigm is a function that takes +a **lemma** (also known as a **dictionary form**, or a **citation form**) and +returns an inflection table. + +The following operation defines the regular noun paradigm of English: +``` + oper regNoun : Str -> {s : Number => Str} = \dog -> { + s = table { + Sg => dog ; + Pl => dog + "s" + } + } ; +``` +The **gluing** operator ``+`` glues strings to one **token**: +``` + (regNoun "cheese").s ! Pl ===> "cheese" + "s" ===> "cheeses" +``` + + +#NEW + +A more complex example: regular verbs, +``` + oper regVerb : Str -> {s : VerbForm => Str} = \talk -> { + s = table { + VPresent Sg => talk + "s" ; + VPresent Pl => talk ; + VPresPart => talk + "ing" ; + _ => talk + "ed" + } + } ; +``` +The catch-all case for the past tense and the past participle +uses a **wild card** pattern ``_``. + + +#NEW + +===Exercises on morphology=== + ++ Identify cases in which the ``regNoun`` paradigm does not +apply in English, and implement some alternative paradigms. + ++ Implement some regular paradigms for other languages you have +considered in earlier exercises. + + + +#NEW + +==Using parameters in concrete syntax== + +Purpose: a more radical +variation between languages +than just the use of different words and word orders. + +We add to the grammar ``Food`` two rules for forming plural items: +``` + fun These, Those : Kind -> Item ; +``` +We also add a noun which in Italian has the feminine case: +``` + fun Pizza : Kind ; +``` +This will force us to deal with gender- + + +#NEW + +%--! +===Agreement=== + +In English, the phrase-forming rule +``` + fun Is : Item -> Quality -> Phrase ; +``` +is affected by the number because of **subject-verb agreement**: +the verb of a sentence must be inflected in the number of the subject, +``` + Is (This Pizza) Warm ===> "this pizza is warm" + Is (These Pizza) Warm ===> "these pizzas are warm" +``` +It is the **copula** (the verb //be//) that is affected: +``` + oper copula : Number -> Str = \n -> + case n of { + Sg => "is" ; + Pl => "are" + } ; +``` +The **subject** ``Item`` must have such a number to provide to the copula: +``` + lincat Item = {s : Str ; n : Number} ; +``` +Now we can write +``` + lin Is item qual = {s = item.s ++ copula item.n ++ qual.s} ; +``` + + + +#NEW + +===Determiners=== + +How does an ``Item`` subject receive its number? The rules +``` + fun This, These : Kind -> Item ; +``` +add **determiners**, either //this// or //these//, which +require different //this pizza// vs. +//these pizzas//. + +Thus ``Kind`` must have both singular and plural forms: +``` + lincat Kind = {s : Number => Str} ; +``` +We can write +``` + lin This kind = { + s = "this" ++ kind.s ! Sg ; + n = Sg + } ; + + lin These kind = { + s = "these" ++ kind.s ! Pl ; + n = Pl + } ; +``` + + +#NEW + +To avoid copy-and-paste, we can factor out the pattern of determination, +``` + oper det : + Str -> Number -> {s : Number => Str} -> {s : Str ; n : Number} = + \det,n,kind -> { + s = det ++ kind.s ! n ; + n = n + } ; +``` +Now we can write +``` + lin This = det Sg "this" ; + lin These = det Pl "these" ; +``` +In a more **lexicalized** grammar, determiners would be a category: +``` + lincat Det = {s : Str ; n : Number} ; + fun Det : Det -> Kind -> Item ; + lin Det det kind = { + s = det.s ++ kind.s ! det.n ; + n = det.n + } ; +``` + + +#NEW + +===Parametric vs. inherent features=== + +``Kind``s have number as a **parametric feature**: both singular and plural +can be formed, +``` + lincat Kind = {s : Number => Str} ; +``` +``Item``s have number as an **inherent feature**: they are inherently either +singular or plural, +``` + lincat Item = {s : Str ; n : Number} ; +``` +Italian ``Kind`` will have parametric number and inherent gender: +``` + lincat Kind = {s : Number => Str ; g : Gender} ; +``` + + +#NEW + +Questions to ask when designing parameters: +- existence: what forms are possible to build by morphological and + other means? +- need: what features are expected via agreement or government? + + +Dictionaries give good advice: +#BEQU +**uomo**, pl. //uomini//, n.m. "man" +#ENQU +tells that //uomo// is a masculine noun with the plural form //uomini//. +Hence, parametric number and an inherent gender. + +For words, inherent features are usually given as lexical information. + +For combinations, they are //inherited// from some part of the construction +(typically the one called the **head**). Italian modification: +``` + lin QKind qual kind = + let gen = kind.g in { + s = table {n => kind.s ! n ++ qual.s ! gen ! n} ; + g = gen + } ; +``` +Notice +- **local definition** (``let`` expression) +- **variable pattern** ``n`` + + + +#NEW + +==An English concrete syntax for Foods with parameters== + +We use some string operations from the library ``Prelude`` are used. +``` + concrete FoodsEng of Foods = open Prelude in { + + lincat + S, Quality = SS ; + Kind = {s : Number => Str} ; + Item = {s : Str ; n : Number} ; + + lin + Is item quality = ss (item.s ++ copula item.n ++ quality.s) ; + This = det Sg "this" ; + That = det Sg "that" ; + These = det Pl "these" ; + Those = det Pl "those" ; + QKind quality kind = {s = table {n => quality.s ++ kind.s ! n}} ; + Wine = regNoun "wine" ; + Cheese = regNoun "cheese" ; + Fish = noun "fish" "fish" ; + Pizza = regNoun "pizza" ; + Very = prefixSS "very" ; + Fresh = ss "fresh" ; + Warm = ss "warm" ; + Italian = ss "Italian" ; + Expensive = ss "expensive" ; + Delicious = ss "delicious" ; + Boring = ss "boring" ; +``` + +#NEW + +``` + param + Number = Sg | Pl ; + + oper + det : Number -> Str -> {s : Number => Str} -> {s : Str ; n : Number} = + \n,d,cn -> { + s = d ++ cn.s ! n ; + n = n + } ; + noun : Str -> Str -> {s : Number => Str} = + \man,men -> {s = table { + Sg => man ; + Pl => men + } + } ; + regNoun : Str -> {s : Number => Str} = + \car -> noun car (car + "s") ; + copula : Number -> Str = + \n -> case n of { + Sg => "is" ; + Pl => "are" + } ; + } +``` + +#NEW + +==More on inflection paradigms== + +#Lsecinflection + +Let us extend the English noun paradigms so that we can +deal with all nouns, not just the regular ones. The goal is to +provide a morphology module that makes it easy to +add words to a lexicon. + + +#NEW + +===Worst-case functions=== + +We perform **data abstraction** from the type +of nouns by writing a a **worst-case function**: +``` + oper Noun : Type = {s : Number => Str} ; + + oper mkNoun : Str -> Str -> Noun = \x,y -> { + s = table { + Sg => x ; + Pl => y + } + } ; + + oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ; +``` +Then we can define +``` + lincat N = Noun ; + lin Mouse = mkNoun "mouse" "mice" ; + lin House = regNoun "house" ; +``` +where the underlying types are not seen. + +#NEW + +We are free to change the undelying definitions, e.g. +add **case** (nominative or genitive) to noun inflection: +``` + param Case = Nom | Gen ; + + oper Noun : Type = {s : Number => Case => Str} ; +``` +Now we have to redefine the worst-case function +``` + oper mkNoun : Str -> Str -> Noun = \x,y -> { + s = table { + Sg => table { + Nom => x ; + Gen => x + "'s" + } ; + Pl => table { + Nom => y ; + Gen => y + case last y of { + "s" => "'" ; + _ => "'s" + } + } + } ; +``` +But up from this level, we can retain the old definitions +``` + lin Mouse = mkNoun "mouse" "mice" ; + oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ; +``` + + + +#NEW + +In the last definition of ``mkNoun``, we used a case expression +on the last character of the plural, as well as the ``Prelude`` +operation +``` + last : Str -> Str ; +``` +returning the string consisting of the last character. + +The case expression uses **pattern matching over strings**, which +is supported in GF, alongside with pattern matching over +parameters. + + + +#NEW + +===Smart paradigms=== + +The regular //dog//-//dogs// paradigm has +predictable variations: +- nouns ending with an //y//: //fly//-//flies//, except if + a vowel precedes the //y//: //boy//-//boys// +- nouns ending with //s//, //ch//, and a number of + other endings: //bus//-//buses//, //leech//-//leeches// + + +We could provide alternative paradigms: +``` + noun_y : Str -> Noun = \fly -> mkNoun fly (init fly + "ies") ; + noun_s : Str -> Noun = \bus -> mkNoun bus (bus + "es") ; +``` +(The Prelude function ``init`` drops the last character of a token.) + +Drawbacks: +- it can be difficult to select the correct paradigm +- it can be difficult to remember the names of the different paradigms + + +#NEW + +Better solution: a **smart paradigm**: +``` + regNoun : Str -> Noun = \w -> + let + ws : Str = case w of { + _ + ("a" | "e" | "i" | "o") + "o" => w + "s" ; -- bamboo + _ + ("s" | "x" | "sh" | "o") => w + "es" ; -- bus, hero + _ + "z" => w + "zes" ;-- quiz + _ + ("a" | "e" | "o" | "u") + "y" => w + "s" ; -- boy + x + "y" => x + "ies" ;-- fly + _ => w + "s" -- car + } + in + mkNoun w ws +``` +GF has **regular expression patterns**: +- **disjunctive patterns** //P// ``|`` //Q// +- **concatenation patterns** //P// ``+`` //Q// + + +The patterns are ordered in such a way that, for instance, +the suffix ``"oo"`` prevents //bamboo// from matching the suffix +``"o"``. + + +#NEW + +===Exercises on regular patterns=== + ++ The same rules that form plural nouns in English also +apply in the formation of third-person singular verbs. +Write a regular verb paradigm that uses this idea, but first +rewrite ``regNoun`` so that the analysis needed to build //s//-forms +is factored out as a separate ``oper``, which is shared with +``regVerb``. + ++ Extend the verb paradigms to cover all verb forms +in English, with special care taken of variations with the suffix +//ed// (e.g. //try//-//tried//, //use//-//used//). + ++ Implement the German **Umlaut** operation on word stems. +The operation changes the vowel of the stressed stem syllable as follows: +//a// to ////, //au// to //u//, //o// to ////, and //u// to ////. You +can assume that the operation only takes syllables as arguments. Test the +operation to see whether it correctly changes //Arzt// to //rzt//, +//Baum// to //Bum//, //Topf// to //Tpf//, and //Kuh// to //Kh//. + + + +#NEW + +===Function types with variables=== + +In #Rchapsix, **dependent function types** need a notation +that binds a variable to the argument type, as in +``` + switchOff : (k : Kind) -> Action k +``` +Function types //without// variables are actually a shorthand: +``` + PredVP : NP -> VP -> S +``` +means +``` + PredVP : (x : NP) -> (y : VP) -> S +``` +or any other naming of the variables. + + +#NEW + +Sometimes variables shorten the code, since they can share a type: +``` + octuple : (x,y,z,u,v,w,s,t : Str) -> Str +``` +If a bound variable is not used, it can be replaced by a wildcard: +``` + octuple : (_,_,_,_,_,_,_,_ : Str) -> Str +``` +A good practice is to indicate the number of arguments: +``` + octuple : (x1,_,_,_,_,_,_,x8 : Str) -> Str +``` +For inflection paradigms, it is handy to use heuristic variable names, +looking like the expected forms: +``` + mkNoun : (mouse,mice : Str) -> Noun +``` + + +#NEW + +===Separating operation types and definitions=== + +In librarues, it is useful to group type signatures separately from +definitions. It is possible to divide an ``oper`` judgement, +``` + oper regNoun : Str -> Noun ; + oper regNoun s = mkNoun s (s + "s") ; +``` +and put the parts in different places. + +With the ``interface`` and ``instance`` module types +(see #Rsecinterface): the parts can even be put to different files. + + +#NEW + +===Overloading of operations=== + +**Overloading**: different functions can be given the same name, as e.g. in C++. + +The compiler performs **overload resolution**, which works as long as the +functions have different types. + +In GF, the functions must be grouped together in ``overload`` groups. + +Example: different ways to define nouns in English: +``` + oper mkN : overload { + mkN : (dog : Str) -> Noun ; -- regular nouns + mkN : (mouse,mice : Str) -> Noun ; -- irregular nouns + } +``` +Cf. dictionaries: if the +word is regular, just one form is needed. If it is irregular, +more forms are given. + +The definition can be given separately, or at the same time, as the types: +``` + oper mkN = overload { + mkN : (dog : Str) -> Noun = regNoun ; + mkN : (mouse,mice : Str) -> Noun = mkNoun ; + } +``` +**Exercise**. Design a system of English verb paradigms presented by +an overload group. + + +#NEW + +===Morphological analysis and morphology quiz=== + +The command ``morpho_analyse = ma`` +can be used to read a text and return for each word its analyses +(in the current grammar): +``` + > read_file bible.txt | morpho_analyse +``` +The command ``morpho_quiz = mq`` generates inflection exercises. +``` + % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo + + > morpho_quiz -cat=V + + Welcome to GF Morphology Quiz. + ... + + rapparatre : VFin VCondit Pl P2 + rapparaitriez + > No, not rapparaitriez, but + rapparatriez + Score 0/1 +``` +To create a list for later use, use the command ``morpho_list = ml`` +``` + > morpho_list -number=25 -cat=V | write_file exx.txt +``` + + + + +#NEW + +==The Italian Foods grammar== + +#Lsecitalian + +Parameters include not only number but also gender. +``` +concrete FoodsIta of Foods = open Prelude in { + + param + Number = Sg | Pl ; + Gender = Masc | Fem ; +``` +Qualities are inflected for gender and number, whereas kinds +have a parametric number and an inherent gender. +Items have an inherent number and gender. +``` + lincat + Phr = SS ; + Quality = {s : Gender => Number => Str} ; + Kind = {s : Number => Str ; g : Gender} ; + Item = {s : Str ; g : Gender ; n : Number} ; +``` + +#NEW + +A Quality is an adjective, with one form for each gender-number combination. +``` + oper + adjective : (_,_,_,_ : Str) -> {s : Gender => Number => Str} = + \nero,nera,neri,nere -> { + s = table { + Masc => table { + Sg => nero ; + Pl => neri + } ; + Fem => table { + Sg => nera ; + Pl => nere + } + } + } ; +``` +Regular adjectives work by adding endings to the stem. +``` + regAdj : Str -> {s : Gender => Number => Str} = \nero -> + let ner = init nero + in adjective nero (ner + "a") (ner + "i") (ner + "e") ; +``` + +#NEW + +For noun inflection, we are happy to give the two forms and the gender +explicitly: +``` + noun : Str -> Str -> Gender -> {s : Number => Str ; g : Gender} = + \vino,vini,g -> { + s = table { + Sg => vino ; + Pl => vini + } ; + g = g + } ; +``` +We need only number variation for the copula. +``` + copula : Number -> Str = + \n -> case n of { + Sg => "" ; + Pl => "sono" + } ; +``` + +#NEW + +Determination is more complex than in English, because of gender: +``` + det : Number -> Str -> Str -> {s : Number => Str ; g : Gender} -> + {s : Str ; g : Gender ; n : Number} = + \n,m,f,cn -> { + s = case cn.g of {Masc => m ; Fem => f} ++ cn.s ! n ; + g = cn.g ; + n = n + } ; +``` + + +#NEW + +The complete set of linearization rules: +``` + lin + Is item quality = + ss (item.s ++ copula item.n ++ quality.s ! item.g ! item.n) ; + This = det Sg "questo" "questa" ; + That = det Sg "quel" "quella" ; + These = det Pl "questi" "queste" ; + Those = det Pl "quei" "quelle" ; + QKind quality kind = { + s = \\n => kind.s ! n ++ quality.s ! kind.g ! n ; + g = kind.g + } ; + Wine = noun "vino" "vini" Masc ; + Cheese = noun "formaggio" "formaggi" Masc ; + Fish = noun "pesce" "pesci" Masc ; + Pizza = noun "pizza" "pizze" Fem ; + Very qual = {s = \\g,n => "molto" ++ qual.s ! g ! n} ; + Fresh = adjective "fresco" "fresca" "freschi" "fresche" ; + Warm = regAdj "caldo" ; + Italian = regAdj "italiano" ; + Expensive = regAdj "caro" ; + Delicious = regAdj "delizioso" ; + Boring = regAdj "noioso" ; + } +``` + + +#NEW + +===Exercises on using parameters=== + ++ Experiment with multilingual generation and translation in the +``Foods`` grammars. + ++ Add items, qualities, and determiners to the grammar, +and try to get their inflection and inherent features right. + ++ Write a concrete syntax of ``Food`` for a language of your choice, +now aiming for complete grammatical correctness by the use of parameters. + ++ Measure the size of the context-free grammar corresponding to +``FoodsIta``. You can do this by printing the grammar in the context-free format +(``print_grammar -printer=bnf``) and counting the lines. + + + + +#NEW + +==Discontinuous constituents== + +A linearization record may contain more strings than one, and those +strings can be put apart in linearization. + +Example: English particle +verbs, (//switch off//). The object can appear between: + +//he switched it off// + +The verb //switch off// is called a +**discontinuous constituents**. + +We can define transitive verbs and their combinations as follows: +``` + lincat TV = {s : Number => Str ; part : Str} ; + + fun AppTV : Item -> TV -> Item -> Phrase ; + + lin AppTV subj tv obj = + {s = subj.s ++ tv.s ! subj.n ++ obj.s ++ tv.part} ; +``` + +**Exercise**. Define the language ``a^n b^n c^n`` in GF, i.e. +any number of //a//'s followed by the same number of //b//'s and +the same number of //c//'s. This language is not context-free, +but can be defined in GF by using discontinuous constituents. + + +#NEW + +==Strings at compile time vs. run time== + +Tokens are created in the following ways: +- quoted string: ``"foo"`` +- gluing : ``t + s`` +- predefined operations ``init, tail, tk, dp`` +- pattern matching over strings + + +Since //tokens must be known at compile time//, +the above operations may not be applied to **run-time variables** +(i.e. variables that stand for function arguments in linearization rules). + +Hence it is not legal to write +``` + cat Noun ; + fun Plural : Noun -> Noun ; + lin Plural n = {s = n.s + "s"} ; +``` +because ``n`` is a run-time variable. Also +``` + lin Plural n = {s = (regNoun n).s ! Pl} ; +``` +is incorrect with ``regNoun`` as defined #Rsecinflection, because the run-time +variable is eventually sent to string pattern matching and gluing. + + +#NEW + +How to write tokens together without a space? +``` + lin Question p = {s = p + "?"} ; +``` +is incorrect. + +The way to go is to use an **unlexer** that creates correct spacing +after linearization. + +Correspondingly, a **lexer** that e.g. analyses ``"warm?"`` into +to tokens is needed before parsing. +This topic will be covered in #Rseclexing. + + + + + + +#NEW + +===Supplementary constructs for concrete syntax=== + +====Record extension and subtyping==== + +The symbol ``**`` is used for both record types and record objects. +``` + lincat TV = Verb ** {c : Case} ; + + lin Follow = regVerb "folgen" ** {c = Dative} ; +``` +``TV`` becomes a **subtype** of ``Verb``. + +If //T// is a subtype of //R//, an object of //T// can be used whenever +an object of //R// is required. + +**Covariance**: a function returning a record //T// as value can +also be used to return a value of a supertype //R//. + +**Contravariance**: a function taking an //R// as argument +can also be applied to any object of a subtype //T//. + + +#NEW + +====Tuples and product types==== + +Product types and tuples are syntactic sugar for record types and records: +``` + T1 * ... * Tn === {p1 : T1 ; ... ; pn : Tn} + === {p1 = T1 ; ... ; pn = Tn} +``` +Thus the labels ``p1, p2,...`` are hard-coded. + + +#NEW + +====Prefix-dependent choices==== + +English indefinite article: +``` + oper artIndef : Str = + pre {"a" ; "an" / strs {"a" ; "e" ; "i" ; "o"}} ; +``` +Thus +``` + artIndef ++ "cheese" ---> "a" ++ "cheese" + artIndef ++ "apple" ---> "an" ++ "apple" +``` + + + + + + + + +#NEW + +=Lesson 4: Using the resource grammar library= + +#Lchapfive + +Goals: +- navigate in the GF resource grammar library and use it in applications +- get acquainted with basic linguistic categories +- write functors to achieve maximal sharing of code in multilingual grammars + + +#NEW + +==The coverage of the library== + +The current 12 resource languages are +- ``Bul``garian +- ``Cat``alan +- ``Dan``ish +- ``Eng``lish +- ``Fin``nish +- ``Fre``nch +- ``Ger``man +- ``Ita``lian +- ``Nor``wegian +- ``Rus``sian +- ``Spa``nish +- ``Swe``dish + + +The first three letters (``Eng`` etc) are used in grammar module names +(ISO 639 standard). + + +#NEW + +==The structure of the library== + +#Lseclexical + +Semantic grammars (up to now in this tutorial): +a grammar defines a system of meanings (abstract syntax) and +tells how they are expressed(concrete syntax). + +Resource grammars (as usual in linguistic tradition): +a grammar specifies the **grammatically correct combinations of words**, +whatever their meanings are. + +With resource grammars, we can achieve a +wider coverage than with semantic grammars. + +#NEW + +===Lexical vs. phrasal rules=== + +A resource grammar has two kinds of categories and two kinds of rules: +- lexical: + - lexical categories, to classify words + - lexical rules, to define words and their properties + +- phrasal (combinatorial, syntactic): + - phrasal categories, to classify phrases of arbitrary size + - phrasal rules, to combine phrases into larger phrases + + +GE makes no formal distinction between these two kinds. + +But it is a good discipline to follow. + + +#NEW + +===Lexical categories=== + +Two kinds of lexical categories: +- **closed**: + - a finite number of words + - seldom extended in the history of language + - structural words / function words, e.g. +``` + Conj ; -- conjunction e.g. "and" + QuantSg ; -- singular quantifier e.g. "this" + QuantPl ; -- plural quantifier e.g. "this" +``` + +- **open**: + - new words are added all the time + - content words, e.g. +``` + N ; -- noun e.g. "pizza" + A ; -- adjective e.g. "good" + V ; -- verb e.g. "sleep" +``` + + +#NEW + +===Lexical rules=== + +Closed classes: module ``Syntax``. In the ``Foods`` grammar, we need +``` + this_QuantSg, that_QuantSg : QuantSg ; + these_QuantPl, those_QuantPl : QuantPl ; + very_AdA : AdA ; +``` +Naming convention: word followed by the category (so we can +distinguish the quantifier //that// from the conjunction //that//). + +Open classes have no objects in ``Syntax``. Words are +built as they are needed in applications: if we have +``` + fun Wine : Kind ; +``` +we will define +``` + lin Wine = mkN "wine" ; +``` +where we use ``mkN`` from ``ParadigmsEng``: + + + +#NEW + +===Resource lexicon=== + +Alternative concrete syntax for +``` + fun Wine : Kind ; +``` +is to provide a **resource lexicon**, which contains definitions such as +``` + oper wine_N : N = mkN "wine" ; +``` +so that we can write +``` + lin Wine = wine_N ; +``` +Advantages: +- we accumulate a reusable lexicon +- we can use a #Rsecfunctor to speed up multilingual grammar implementation + + +#NEW + +===Phrasal categories=== + +In ``Foods``, we need just four phrasal categories: +``` + Cl ; -- clause e.g. "this pizza is good" + NP ; -- noun phrase e.g. "this pizza" + CN ; -- common noun e.g. "warm pizza" + AP ; -- adjectival phrase e.g. "very warm" +``` +Clauses are similar to sentences (``S``), but without a +fixed tense and mood; see #Rsecextended for how they relate. + +Common nouns are made into noun phrases by adding determiners. + + +#NEW + +===Syntactic combinations=== + +We need the following combinations: +``` + mkCl : NP -> AP -> Cl ; -- e.g. "this pizza is very warm" + mkNP : QuantSg -> CN -> NP ; -- e.g. "this pizza" + mkNP : QuantPl -> CN -> NP ; -- e.g. "these pizzas" + mkCN : AP -> CN -> CN ; -- e.g. "warm pizza" + mkAP : AdA -> AP -> AP ; -- e.g. "very warm" +``` +We also need **lexical insertion**, to form phrases from single words: +``` + mkCN : N -> NP ; + mkAP : A -> AP ; +``` +Naming convention: to construct a //C//, use a function ``mk``//C//. + +Heavy overloading: the current library +(version 1.2) has 23 operations named ``mkNP``! + + +#NEW + +===Example syntactic combination=== + +The sentence +#BEQU +//these very warm pizzas are Italian// +#ENQU +can be built as follows: +``` + mkCl + (mkNP these_QuantPl + (mkCN (mkAP very_AdA (mkAP warm_A)) (mkCN pizza_CN))) + (mkAP italian_AP) +``` +The task now: to define the concrete syntax of ``Foods`` so that +this syntactic tree gives the value of linearizing the semantic tree +``` + Is (These (QKind (Very Warm) Pizza)) Italian +``` + + + +#NEW + +==The resource API== + +Language-specific and language-independent parts - roughly, +- the syntax API ``Syntax``//L// has the same types and + functions for all languages //L// +- the morphology API ``Paradigms``//L// has partly + different types and functions + for different languages //L// + + +Full API documentation on-line: the **resource synopsis**, + +[``grammaticalframework.org/lib/resource/doc/synopsis.html`` http://grammaticalframework.org/lib/doc/synopsis.html] + + +#NEW + +===A miniature resource API: categories=== + +|| Category | Explanation | Example || +| ``Cl`` | clause (sentence), with all tenses | //she looks at this// | +| ``AP`` | adjectival phrase | //very warm// | +| ``CN`` | common noun (without determiner) | //red house// | +| ``NP`` | noun phrase (subject or object) | //the red house// | +| ``AdA`` | adjective-modifying adverb, | //very// | +| ``QuantSg`` | singular quantifier | //these// | +| ``QuantPl`` | plural quantifier | //this// | +| ``A`` | one-place adjective | //warm// | +| ``N`` | common noun | //house// | + + +#NEW + +===A miniature resource API: rules=== + +|| Function | Type | Example || +| ``mkCl`` | ``NP -> AP -> Cl`` | //John is very old// | +| ``mkNP`` | ``QuantSg -> CN -> NP`` | //this old man// | +| ``mkNP`` | ``QuantPl -> CN -> NP`` | //these old man// | +| ``mkCN`` | ``N -> CN`` | //house// | +| ``mkCN`` | ``AP -> CN -> CN`` | //very big blue house// | +| ``mkAP`` | ``A -> AP`` | //old// | +| ``mkAP`` | ``AdA -> AP -> AP`` | //very very old// | + +#NEW + +===A miniature resource API: structural words=== + +|| Function | Type | In English || +| ``this_QuantSg`` | ``QuantSg`` | //this// | +| ``that_QuantSg`` | ``QuantSg`` | //that// | +| ``these_QuantPl`` | ``QuantPl`` | //this// | +| ``those_QuantPl`` | ``QuantPl`` | //that// | +| ``very_AdA`` | ``AdA`` | //very// | + + +#NEW + +===A miniature resource API: paradigms=== + +From ``ParadigmsEng``: + +|| Function | Type || +| ``mkN`` | ``(dog : Str) -> N`` | +| ``mkN`` | ``(man,men : Str) -> N`` | +| ``mkA`` | ``(cold : Str) -> A`` | + +From ``ParadigmsIta``: + +|| Function | Type || +| ``mkN`` | ``(vino : Str) -> N`` | +| ``mkA`` | ``(caro : Str) -> A`` | + + +#NEW + +===A miniature resource API: more paradigms=== + +From ``ParadigmsGer``: + +|| Function | Type || +| ``Gender`` | ``Type`` | +| ``masculine`` | ``Gender`` | +| ``feminine`` | ``Gender`` | +| ``neuter`` | ``Gender`` | +| ``mkN`` | ``(Stufe : Str) -> N`` | +| ``mkN`` | ``(Bild,Bilder : Str) -> Gender -> N`` | +| ``mkA`` | ``(klein : Str) -> A`` | +| ``mkA`` | ``(gut,besser,beste : Str) -> A`` | + +From ``ParadigmsFin``: + +|| Function | Type || +| ``mkN`` | ``(talo : Str) -> N`` | +| ``mkA`` | ``(hieno : Str) -> A`` | + + + +#NEW + +===Exercises=== + +1. Try out the morphological paradigms in different languages. Do +as follows: +``` + > i -path=alltenses -retain alltenses/ParadigmsGer.gfo + > cc -table mkN "Farbe" + > cc -table mkA "gut" "besser" "beste" +``` + + +#NEW + +==Example: English== + +#Lsecenglish + +We assume the abstract syntax ``Foods`` from #Rchapfour. + +We don't need to think about inflection and agreement, but just pick +functions from the resource grammar library. + +We need a path with +- the current directory ``.`` +- the directory ``../foods``, in which ``Foods.gf`` resides. +- the library directory ``present``, which is relative to the + environment variable ``GF_LIB_PATH`` + + +Thus the beginning of the module is +``` + --# -path=.:../foods:present + + concrete FoodsEng of Foods = open SyntaxEng,ParadigmsEng in { +``` + + +#NEW + +===English example: linearization types and combination rules=== + +As linearization types, we use clauses for ``Phrase``, noun phrases +for ``Item``, common nouns for ``Kind``, and adjectival phrases for ``Quality``. +``` + lincat + Phrase = Cl ; + Item = NP ; + Kind = CN ; + Quality = AP ; +``` +Now the combination rules we need almost write themselves automatically: +``` + lin + Is item quality = mkCl item quality ; + This kind = mkNP this_QuantSg kind ; + That kind = mkNP that_QuantSg kind ; + These kind = mkNP these_QuantPl kind ; + Those kind = mkNP those_QuantPl kind ; + QKind quality kind = mkCN quality kind ; + Very quality = mkAP very_AdA quality ; +``` + + +#NEW + +===English example: lexical rules=== + +We use resource paradigms and lexical insertion rules. + +The two-place noun paradigm is needed only once, for +//fish// - everythins else is regular. +``` + Wine = mkCN (mkN "wine") ; + Pizza = mkCN (mkN "pizza") ; + Cheese = mkCN (mkN "cheese") ; + Fish = mkCN (mkN "fish" "fish") ; + Fresh = mkAP (mkA "fresh") ; + Warm = mkAP (mkA "warm") ; + Italian = mkAP (mkA "Italian") ; + Expensive = mkAP (mkA "expensive") ; + Delicious = mkAP (mkA "delicious") ; + Boring = mkAP (mkA "boring") ; + } +``` + + +#NEW + +===English example: exercises=== + +1. Compile the grammar ``FoodsEng`` and generate +and parse some sentences. + +2. Write a concrete syntax of ``Foods`` for Italian +or some other language included in the resource library. You can +compare the results with the hand-written +grammars presented earlier in this tutorial. + + + +#NEW + +==Functor implementation of multilingual grammars== + +#Lsecfunctor + +===New language by copy and paste=== + +If you write a concrete syntax of ``Foods`` for some other +language, much of the code will look exactly the same +as for English. This is because +- the ``Syntax`` API is the same for all languages (because + all languages in the resource package do implement the same + syntactic structures) +- languages tend to use the syntactic structures in similar ways + + +But lexical rules are more language-dependent. + +Thus, to port a grammar to a new language, you ++ copy the concrete syntax of a given language ++ change the words (strings and inflection paradigms) + + +Can we avoid this programming by copy-and-paste? + + + +#NEW + +===Functors: functions on the module level=== + +**Functors** familiar from the functional programming languages ML and OCaml, +also known as **parametrized modules**. + +In GF, a functor is a module that ``open``s one or more **interfaces**. + +An ``interface`` is a module similar to a ``resource``, but it only +contains the //types// of ``oper``s, not (necessarily) their definitions. + +Syntax for functors: add the keyword ``incomplete``. We will use the header +``` + incomplete concrete FoodsI of Foods = open Syntax, LexFoods in +``` +where +``` + interface Syntax -- the resource grammar interface + interface LexFoods -- the domain lexicon interface +``` +When we moreover have +``` + instance SyntaxEng of Syntax -- the English resource grammar + instance LexFoodsEng of LexFoods -- the English domain lexicon +``` +we can write a **functor instantiation**, +``` + concrete FoodsGer of Foods = FoodsI with + (Syntax = SyntaxGer), + (LexFoods = LexFoodsGer) ; +``` + +#NEW + +===Code for the Foods functor=== + +``` + --# -path=.:../foods + + incomplete concrete FoodsI of Foods = open Syntax, LexFoods in { + lincat + Phrase = Cl ; + Item = NP ; + Kind = CN ; + Quality = AP ; + lin + Is item quality = mkCl item quality ; + This kind = mkNP this_QuantSg kind ; + That kind = mkNP that_QuantSg kind ; + These kind = mkNP these_QuantPl kind ; + Those kind = mkNP those_QuantPl kind ; + QKind quality kind = mkCN quality kind ; + Very quality = mkAP very_AdA quality ; + + Wine = mkCN wine_N ; + Pizza = mkCN pizza_N ; + Cheese = mkCN cheese_N ; + Fish = mkCN fish_N ; + Fresh = mkAP fresh_A ; + Warm = mkAP warm_A ; + Italian = mkAP italian_A ; + Expensive = mkAP expensive_A ; + Delicious = mkAP delicious_A ; + Boring = mkAP boring_A ; + } +``` + + +#NEW + +===Code for the LexFoods interface=== + +#Lsecinterface + +``` + interface LexFoods = open Syntax in { + oper + wine_N : N ; + pizza_N : N ; + cheese_N : N ; + fish_N : N ; + fresh_A : A ; + warm_A : A ; + italian_A : A ; + expensive_A : A ; + delicious_A : A ; + boring_A : A ; + } +``` + +#NEW + +===Code for a German instance of the lexicon=== + +``` + instance LexFoodsGer of LexFoods = open SyntaxGer, ParadigmsGer in { + oper + wine_N = mkN "Wein" ; + pizza_N = mkN "Pizza" "Pizzen" feminine ; + cheese_N = mkN "Kse" "Ksen" masculine ; + fish_N = mkN "Fisch" ; + fresh_A = mkA "frisch" ; + warm_A = mkA "warm" "wrmer" "wrmste" ; + italian_A = mkA "italienisch" ; + expensive_A = mkA "teuer" ; + delicious_A = mkA "kstlich" ; + boring_A = mkA "langweilig" ; + } +``` + + +#NEW + +===Code for a German functor instantiation=== + +``` + --# -path=.:../foods:present + + concrete FoodsGer of Foods = FoodsI with + (Syntax = SyntaxGer), + (LexFoods = LexFoodsGer) ; +``` + + + +#NEW + +===Adding languages to a functor implementation=== + +Just two modules are needed: +- a domain lexicon instance +- a functor instantiation + + +The functor instantiation is completely mechanical to write. + +The domain lexicon instance requires some knowledge of the words of the +language: +- what words are used for which concepts +- how the words are +- features such as genders + + +#NEW + +===Example: adding Finnish=== + +Lexicon instance +``` + instance LexFoodsFin of LexFoods = open SyntaxFin, ParadigmsFin in { + oper + wine_N = mkN "viini" ; + pizza_N = mkN "pizza" ; + cheese_N = mkN "juusto" ; + fish_N = mkN "kala" ; + fresh_A = mkA "tuore" ; + warm_A = mkA "lmmin" ; + italian_A = mkA "italialainen" ; + expensive_A = mkA "kallis" ; + delicious_A = mkA "herkullinen" ; + boring_A = mkA "tyls" ; + } +``` +Functor instantiation +``` + --# -path=.:../foods:present + + concrete FoodsFin of Foods = FoodsI with + (Syntax = SyntaxFin), + (LexFoods = LexFoodsFin) ; +``` + + +#NEW + +===A design pattern=== + +This can be seen as a //design pattern// for multilingual grammars: +``` + concrete DomainL* + + instance LexDomainL instance SyntaxL* + + incomplete concrete DomainI + / | \ + interface LexDomain abstract Domain interface Syntax* +``` +Modules marked with ``*`` are either given in the library, or trivial. + +Of the hand-written modules, only ``LexDomainL`` is language-dependent. + + +#NEW + +===Functors: exercises=== + +1. Compile and test ``FoodsGer``. + +2. Refactor ``FoodsEng`` into a functor instantiation. + +3. Instantiate the functor ``FoodsI`` to some language of +your choice. + +4. Design a small grammar that can be used for controlling +an MP3 player. The grammar should be able to recognize commands such +as //play this song//, with the following variations: +- verbs: //play//, //remove// +- objects: //song//, //artist// +- determiners: //this//, //the previous// +- verbs without arguments: //stop//, //pause// + + +The implementation goes in the following phases: ++ abstract syntax ++ (optional:) prototype string-based concrete syntax ++ functor over resource syntax and lexicon interface ++ lexicon instance for the first language ++ functor instantiation for the first language ++ lexicon instance for the second language ++ functor instantiation for the second language ++ ... + + + +#NEW + +==Restricted inheritance== + +===A problem with functors=== + +Problem: a functor only works when all languages use the resource ``Syntax`` +in the same way. + +Example (contrived): assume that English has +no word for ``Pizza``, but has to use the paraphrase //Italian pie//. +This is no longer a noun ``N``, but a complex phrase +in the category ``CN``. + +Possible solution: change interface the ``LexFoods`` with +``` + oper pizza_CN : CN ; +``` +Problem with this solution: +- we may end up changing the interface and the function with each new language +- we must every time also change the instances for the old languages to maintain + type correctness + + +#NEW + +===Restricted inheritance: include or exclude=== + +A module may inherit just a selection of names. + +Example: the ``FoodMarket`` example "Rsecarchitecture: +``` + abstract Foodmarket = Food, Fruit [Peach], Mushroom - [Agaric] +``` +Here, from ``Fruit`` we include ``Peach`` only, and from ``Mushroom`` +we exclude ``Agaric``. + +A concrete syntax of ``Foodmarket`` must make the analogous restrictions. + + +#NEW + +===The functor problem solved=== + +The English instantiation inherits the functor +implementation except for the constant ``Pizza``. This constant +is defined in the body instead: +``` + --# -path=.:../foods:present + + concrete FoodsEng of Foods = FoodsI - [Pizza] with + (Syntax = SyntaxEng), + (LexFoods = LexFoodsEng) ** + open SyntaxEng, ParadigmsEng in { + + lin Pizza = mkCN (mkA "Italian") (mkN "pie") ; + } +``` + + +#NEW + +==Grammar reuse== + +Abstract syntax modules can be used as interfaces, +and concrete syntaxes as their instances. + +The following correspondencies are then applied: +``` + cat C <---> oper C : Type + + fun f : A <---> oper f : A + + lincat C = T <---> oper C : Type = T + + lin f = t <---> oper f : A = t +``` + + + + +#NEW + +===Library exercises=== + +1. Find resource grammar terms for the following +English phrases (in the category ``Phr``). You can first try to +build the terms manually. + +//every man loves a woman// + +//this grammar speaks more than ten languages// + +//which languages aren't in the grammar// + +//which languages did you want to speak// + + +Then translate the phrases to other languages. + + +#NEW + +==Tenses== + +#Lsectense + +In ``Foods`` grammars, we have used the path +``` + --# -path=.:../foods +``` +The library subdirectory ``present`` is a restricted version +of the resource, with only present tense of verbs and sentences. + +By just changing the path, we get all tenses: +``` + --# -path=.:../foods:alltenses +``` +Now we can see all the tenses of phrases, by using the ``-all`` flag +in linearization: +``` + > gr | l -all + This wine is delicious + Is this wine delicious + This wine isn't delicious + Isn't this wine delicious + This wine is not delicious + Is this wine not delicious + This wine has been delicious + Has this wine been delicious + This wine hasn't been delicious + Hasn't this wine been delicious + This wine has not been delicious + Has this wine not been delicious + This wine was delicious + Was this wine delicious + This wine wasn't delicious + Wasn't this wine delicious + This wine was not delicious + Was this wine not delicious + This wine had been delicious + Had this wine been delicious + This wine hadn't been delicious + Hadn't this wine been delicious + This wine had not been delicious + Had this wine not been delicious + This wine will be delicious + Will this wine be delicious + This wine won't be delicious + Won't this wine be delicious + This wine will not be delicious + Will this wine not be delicious + This wine will have been delicious + Will this wine have been delicious + This wine won't have been delicious + Won't this wine have been delicious + This wine will not have been delicious + Will this wine not have been delicious + This wine would be delicious + Would this wine be delicious + This wine wouldn't be delicious + Wouldn't this wine be delicious + This wine would not be delicious + Would this wine not be delicious + This wine would have been delicious + Would this wine have been delicious + This wine wouldn't have been delicious + Wouldn't this wine have been delicious + This wine would not have been delicious + Would this wine not have been delicious +``` +We also see +- polarity (positive vs. negative) +- word order (direct vs. inverted) +- variation between contracted and full negation + + +The list is even longer in languages that have more +tenses and moods, e.g. the Romance languages. + + + +#NEW + +=Lesson 5: Refining semantics in abstract syntax= + +#Lchapsix + +Goals: +- include semantic conditions in grammars, by using + - **dependent types** + - **higher order abstract syntax** + - proof objects + - semantic definitions + +These concepts are inherited from **type theory** (more precisely: +constructive type theory, or Martin-Lf type theory). + +Type theory is the basis **logical frameworks**. + +GF = logical framework + concrete syntax. + + +#NEW + +==Dependent types== + +#Lsecsmarthouse + +Problem: to express **conditions of semantic well-formedness**. + +Example: a voice command system for a "smart house" wants to +eliminate meaningless commands. + +Thus we want to restrict particular actions to +particular devices - we can //dim a light//, but we cannot +//dim a fan//. + +The following example is borrowed from the +Regulus Book (Rayner & al. 2006). + +A simple example is a "smart house" system, which +defines voice commands for household appliances. + + +#NEW + +===A dependent type system=== + +Ontology: +- there are commands and device kinds +- for each kind of device, there are devices and actions +- a command concerns an action of some kind on a device of the same kind + + +Abstract syntax formalizing this: +``` + cat + Command ; + Kind ; + Device Kind ; -- argument type Kind + Action Kind ; + fun + CAction : (k : Kind) -> Action k -> Device k -> Command ; +``` +``Device`` and ``Action`` are both dependent types. + + +#NEW + +===Examples of devices and actions=== + +Assume the kinds ``light`` and ``fan``, +``` + light, fan : Kind ; + dim : Action light ; +``` +Given a kind, //k//, you can form the device //the k//. +``` + DKindOne : (k : Kind) -> Device k ; -- the light +``` +Now we can form the syntax tree +``` + CAction light dim (DKindOne light) +``` +but we cannot form the trees +``` + CAction light dim (DKindOne fan) + CAction fan dim (DKindOne light) + CAction fan dim (DKindOne fan) +``` + + +#NEW + +===Linearization and parsing with dependent types=== + +Concrete syntax does not know if a category is a dependent type. +``` + lincat Action = {s : Str} ; + lin CAction _ act dev = {s = act.s ++ dev.s} ; +``` +Notice that the ``Kind`` argument is suppressed in linearization. + +Parsing with dependent types is performed in two phases: ++ context-free parsing ++ filtering through type checker + + +By just doing the first phase, the ``kind`` argument is not found: +``` + > parse "dim the light" + CAction ? dim (DKindOne light) +``` +Moreover, type-incorrect commands are not rejected: +``` + > parse "dim the fan" + CAction ? dim (DKindOne fan) +``` +The term ``?`` is a **metavariable**, returned by the parser +for any subtree that is suppressed by a linearization rule. +These are the same kind of metavariables as were used #Rsecediting +to mark incomplete parts of trees in the syntax editor. + + + +#NEW + +===Solving metavariables=== + +Use the command ``put_tree = pt`` with the option ``-typecheck``: +``` + > parse "dim the light" | put_tree -typecheck + CAction light dim (DKindOne light) +``` +The ``typecheck`` process may fail, in which case an error message +is shown and no tree is returned: +``` + > parse "dim the fan" | put_tree -typecheck + + Error in tree UCommand (CAction ? 0 dim (DKindOne fan)) : + (? 0 <> fan) (? 0 <> light) +``` + + + + +#NEW + +==Polymorphism== + +#Lsecpolymorphic + +Sometimes an action can be performed on all kinds of devices. + +This is represented as a function that takes a ``Kind`` as an argument +and produce an ``Action`` for that ``Kind``: +``` + fun switchOn, switchOff : (k : Kind) -> Action k ; +``` +Functions of this kind are called **polymorphic**. + +We can use this kind of polymorphism in concrete syntax as well, +to express Haskell-type library functions: +``` + oper const :(a,b : Type) -> a -> b -> a = + \_,_,c,_ -> c ; + + oper flip : (a,b,c : Type) -> (a -> b ->c) -> b -> a -> c = + \_,_,_,f,x,y -> f y x ; +``` + + +#NEW + +===Dependent types: exercises=== + +1. Write an abstract syntax module with above contents +and an appropriate English concrete syntax. Try to parse the commands +//dim the light// and //dim the fan//, with and without ``solve`` filtering. + + +2. Perform random and exhaustive generation, with and without +``solve`` filtering. + +3. Add some device kinds and actions to the grammar. + + + +#NEW + +==Proof objects== + +**Curry-Howard isomorphism** = **propositions as types principle**: +a proposition is a type of proofs (= proof objects). + +Example: define the //less than// proposition for natural numbers, +``` + cat Nat ; + fun Zero : Nat ; + fun Succ : Nat -> Nat ; +``` +Define inductively what it means for a number //x// to be //less than// +a number //y//: +- ``Zero`` is less than ``Succ`` //y// for any //y//. +- If //x// is less than //y//, then ``Succ`` //x// is less than ``Succ`` //y//. + + +Expressing these axioms in type theory +with a dependent type ``Less`` //x y// and two functions constructing +its objects: +``` + cat Less Nat Nat ; + fun lessZ : (y : Nat) -> Less Zero (Succ y) ; + fun lessS : (x,y : Nat) -> Less x y -> Less (Succ x) (Succ y) ; +``` +Example: the fact that 2 is less that 4 has the proof object +``` + lessS (Succ Zero) (Succ (Succ (Succ Zero))) + (lessS Zero (Succ (Succ Zero)) (lessZ (Succ Zero))) + : Less (Succ (Succ Zero)) (Succ (Succ (Succ (Succ Zero)))) +``` + + + +#NEW + +===Proof-carrying documents=== + +Idea: to be semantically well-formed, the abstract syntax of a document +must contain a proof of some property, +although the proof is not shown in the concrete document. + +Example: documents describing flight connections: + +//To fly from Gothenburg to Prague, first take LH3043 to Frankfurt, then OK0537 to Prague.// + +The well-formedness of this text is partly expressible by dependent typing: +``` + cat + City ; + Flight City City ; + fun + Gothenburg, Frankfurt, Prague : City ; + LH3043 : Flight Gothenburg Frankfurt ; + OK0537 : Flight Frankfurt Prague ; +``` +To extend the conditions to flight connections, we introduce a category +of proofs that a change is possible: +``` + cat IsPossible (x,y,z : City)(Flight x y)(Flight y z) ; +``` +A legal connection is formed by the function +``` + fun Connect : (x,y,z : City) -> + (u : Flight x y) -> (v : Flight y z) -> + IsPossible x y z u v -> Flight x z ; +``` + + +#NEW + +==Restricted polymorphism== + +Above, all Actions were either of +- **monomorphic**: defined for one Kind +- **polymorphic**: defined for all Kinds + + +To make this scale up for new Kinds, we can refine this to +**restricted polymorphism**: defined for Kinds of a certain **class** + + +The notion of class uses the Curry-Howard isomorphism as follows: +- a class is a **predicate** of Kinds --- i.e. a type depending of Kinds +- a Kind is in a class if there is a proof object of this type + + +#NEW + +===Example: classes for switching and dimming=== + +We modify the smart house grammar: +``` +cat + Switchable Kind ; + Dimmable Kind ; +fun + switchable_light : Switchable light ; + switchable_fan : Switchable fan ; + dimmable_light : Dimmable light ; + + switchOn : (k : Kind) -> Switchable k -> Action k ; + dim : (k : Kind) -> Dimmable k -> Action k ; +``` +Classes for new actions can be added incrementally. + + + +#NEW + +==Variable bindings== + +#Lsecbinding + +Mathematical notation and programming languages have +expressions that **bind** variables. + +Example: universal quantifier formula +``` + (All x)B(x) +``` +The variable ``x`` has a **binding** ``(All x)``, and +occurs **bound** in the **body** ``B(x)``. + +Examples from informal mathematical language: +``` + for all x, x is equal to x + + the function that for any numbers x and y returns the maximum of x+y + and x*y + + Let x be a natural number. Assume that x is even. Then x + 3 is odd. +``` + + + +#NEW + +===Higher-order abstract syntax=== + +Abstract syntax can use functions as arguments: +``` + cat Ind ; Prop ; + fun All : (Ind -> Prop) -> Prop +``` +where ``Ind`` is the type of individuals and ``Prop``, +the type of propositions. + +Let us add an equality predicate +``` + fun Eq : Ind -> Ind -> Prop +``` +Now we can form the tree +``` + All (\x -> Eq x x) +``` +which we want to relate to the ordinary notation +``` + (All x)(x = x) +``` +In **higher-order abstract syntax** (HOAS), all variable bindings are +expressed using higher-order syntactic constructors. + + +#NEW + +===Higher-order abstract syntax: linearization=== + +HOAS has proved to be useful in the semantics and computer implementation of +variable-binding expressions. + +How do we relate HOAS to the concrete syntax? + +In GF, we write +``` + fun All : (Ind -> Prop) -> Prop + lin All B = {s = "(" ++ "All" ++ B.$0 ++ ")" ++ B.s} +``` +General rule: if an argument type of a ``fun`` function is +a function type ``A -> C``, the linearization type of +this argument is the linearization type of ``C`` +together with a new field ``$0 : Str``. + +The argument ``B`` thus has the linearization type +``` + {s : Str ; $0 : Str}, +``` +If there are more bindings, we add ``$1``, ``$2``, etc. + + +#NEW + +===Eta expansion=== + +To make sense of linearization, syntax trees must be +**eta-expanded**: for any function of type +``` + A -> B +``` +an eta-expanded syntax tree has the form +``` + \x -> b +``` +where ``b : B`` under the assumption ``x : A``. + +Given the linearization rule +``` + lin Eq a b = {s = "(" ++ a.s ++ "=" ++ b.s ++ ")"} +``` +the linearization of the tree +``` + \x -> Eq x x +``` +is the record +``` + {$0 = "x", s = ["( x = x )"]} +``` +Then we can compute the linearization of the formula, +``` + All (\x -> Eq x x) --> {s = "[( All x ) ( x = x )]"}. +``` +The linearization of the variable ``x`` is, +"automagically", the string ``"x"``. + + + +#NEW + +===Parsing variable bindings=== + +GF can treat any one-word string as a variable symbol. +``` + > p -cat=Prop "( All x ) ( x = x )" + All (\x -> Eq x x) +``` +Variables must be bound if they are used: +``` + > p -cat=Prop "( All x ) ( x = y )" + no tree found +``` + + + + +#NEW + +===Exercises on variable bindings=== + +1. Write an abstract syntax of the whole +**predicate calculus**, with the +**connectives** "and", "or", "implies", and "not", and the +**quantifiers** "exists" and "for all". Use higher-order functions +to guarantee that unbounded variables do not occur. + +2. Write a concrete syntax for your favourite +notation of predicate calculus. Use Latex as target language +if you want nice output. You can also try producing boolean +expressions of some programming language. Use as many parenthesis as you need to +guarantee non-ambiguity. + + +#NEW + +==Semantic definitions== + +#Lsecdefdef + +The ``fun`` judgements of GF are declarations of functions, giving their types. + +Can we **compute** ``fun`` functions? + +Mostly we are not interested, since functions are seen as constructors, +i.e. data forms - as usual with +``` + fun Zero : Nat ; + fun Succ : Nat -> Nat ; +``` +But it is also possible to give **semantic definitions** to functions. +The key word is ``def``: +``` + fun one : Nat ; + def one = Succ Zero ; + + fun twice : Nat -> Nat ; + def twice x = plus x x ; + + fun plus : Nat -> Nat -> Nat ; + def + plus x Zero = x ; + plus x (Succ y) = Succ (Sum x y) ; +``` + +#NEW + +===Computing a tree=== + +Computation: follow a chain of definition until no definition +can be applied, +``` + plus one one --> + plus (Succ Zero) (Succ Zero) --> + Succ (plus (Succ Zero) Zero) --> + Succ (Succ Zero) +``` +Computation in GF is performed with the ``put_term`` command and the +``compute`` transformation, e.g. +``` + > parse -tr "1 + 1" | put_term -transform=compute -tr | l + plus one one + Succ (Succ Zero) + s(s(0)) +``` + + +#NEW + +===Definitional equality=== + +Two trees are definitionally equal if they compute into the same tree. + +Definitional equality does not guarantee sameness of linearization: +``` + plus one one ===> 1 + 1 + Succ (Succ Zero) ===> s(s(0)) +``` +The main use of this concept is in type checking: sameness of types. + +Thus e.g. the following types are equal +``` + Less Zero one + Less Zero (Succ Zero)) +``` +so that an object of one also is an object of the other. + + + +#NEW + +===Judgement forms for constructors=== + +The judgement form ``data`` tells that a category has +certain functions as constructors: +``` + data Nat = Succ | Zero ; +``` +The type signatures of constructors are given separately, +``` + fun Zero : Nat ; + fun Succ : Nat -> Nat ; +``` +There is also a shorthand: +``` + data Succ : Nat -> Nat ; === fun Succ : Nat -> Nat ; + data Nat = Succ ; +``` +Notice: in ``def`` definitions, identifier patterns not +marked as ``data`` will be treated as variables. + + +#NEW + +===Exercises on semantic definitions=== + +1. Implement an interpreter of a small functional programming +language with natural numbers, lists, pairs, lambdas, etc. Use higher-order +abstract syntax with semantic definitions. As concrete syntax, use +your favourite programming language. + +2. There is no termination checking for ``def`` definitions. +Construct an examples that makes type checking loop. +Type checking can be invoked with ``put_term -transform=solve``. + + + +#NEW + +==Lesson 6: Grammars of formal languages== + + +#Lchapseven + +Goals: +- write grammars for formal languages (mathematical notation, programming languages) +- interface between formal and natural langauges +- implement a compiler by using GF + + +#NEW + +===Arithmetic expressions=== + +We construct a calculator with addition, subtraction, multiplication, and +division of integers. +``` + abstract Calculator = { + + cat Exp ; + + fun + EPlus, EMinus, ETimes, EDiv : Exp -> Exp -> Exp ; + EInt : Int -> Exp ; + } +``` +The category ``Int`` is a built-in category of +integers. Its syntax trees **integer literals**, i.e. +sequences of digits: +``` + 5457455814608954681 : Int +``` +These are the only objects of type ``Int``: +grammars are not allowed to declare functions with ``Int`` as value type. + + +#NEW + +===Concrete syntax: a simple approach=== + +We begin with a +concrete syntax that always uses parentheses around binary +operator applications: +``` + concrete CalculatorP of Calculator = { + + lincat + Exp = SS ; + lin + EPlus = infix "+" ; + EMinus = infix "-" ; + ETimes = infix "*" ; + EDiv = infix "/" ; + EInt i = i ; + + oper + infix : Str -> SS -> SS -> SS = \f,x,y -> + ss ("(" ++ x.s ++ f ++ y.s ++ ")") ; + } +``` +Now we have +``` + > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) + ( 2 + ( 3 * 4 ) ) +``` +First problems: +- to get rid of superfluous spaces and +- to recognize integer literals in the parser + + +#NEW + +==Lexing and unlexing== + +#Lseclexing + +The input of parsing in GF is not just a string, but a list of +**tokens**, returned by a **lexer**. + +The default lexer in GF returns chunks separated by spaces: +``` + "(12 + (3 * 4))" ===> "(12", "+", "(3". "*". "4))" +``` +The proper way would be +``` + "(", "12", "+", "(", "3", "*", "4", ")", ")" +``` +Moreover, the tokens ``"12"``, ``"3"``, and ``"4"`` should be recognized as +integer literals - they cannot be found in the grammar. + + +#NEW + +Lexers are invoked by flags to the command ``put_string = ps``. +``` + > put_string -lexcode "(2 + (3 * 4))" + ( 2 + ( 3 * 4 ) ) +``` +This can be piped into a parser, as usual: +``` + > ps -lexcode "(2 + (3 * 4))" | parse + EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) +``` +In linearization, we use a corresponding **unlexer**: +``` + > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) | ps -unlexcode + (2 + (3 * 4)) +``` + + +#NEW + +===Most common lexers and unlexers=== + + || lexer | unlexer | description || + | ``chars`` | ``unchars`` | each character is a token + | ``lexcode`` | ``unlexcode`` | program code conventions (uses Haskell's lex) + | ``lexmixed`` | ``unlexmixed`` | like text, but between $ signs like code + | ``lextext`` | ``unlextext`` | with conventions on punctuation and capitals + | ``words`` | ``unwords`` | (default) tokens separated by space characters + +%TODO: also on alphabet encodings - although somewhere else + + +#NEW + +==Precedence and fixity== + +Arithmetic expressions should be unambiguous. If we write +``` + 2 + 3 * 4 +``` +it should be parsed as one, but not both, of +``` + EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) + ETimes (EPlus (EInt 2) (EInt 3)) (EInt 4) +``` +We choose the former tree, because +multiplication has **higher precedence** than addition. + +To express the latter tree, we have to use parentheses: +``` + (2 + 3) * 4 +``` +The usual precedence rules: +- Integer constants and expressions in parentheses have the highest precedence. +- Multiplication and division have equal precedence, lower than the highest + but higher than addition and subtraction, which are again equal. +- All the four binary operations are **left-associative**: + ``1 + 2 + 3`` means the same as ``(1 + 2) + 3``. + + + +#NEW + +===Precedence as a parameter=== + +Precedence can be made into an inherent feature of expressions: +``` + oper + Prec : PType = Ints 2 ; + TermPrec : Type = {s : Str ; p : Prec} ; + + mkPrec : Prec -> Str -> TermPrec = \p,s -> {s = s ; p = p} ; + + lincat + Exp = TermPrec ; +``` +Notice ``Ints 2``: a parameter type, whose values are the integers +``0,1,2``. + +Using precedence levels: compare the inherent precedence of an +expression with the expected precedence. +- if the inherent precedence is lower than the expected precedence, + use parentheses +- otherwise, no parentheses are needed + + +This idea is encoded in the operation +``` + oper usePrec : TermPrec -> Prec -> Str = \x,p -> + case lessPrec x.p p of { + True => "(" x.s ")" ; + False => x.s + } ; +``` +(We use ``lessPrec`` from ``lib/prelude/Formal``.) + + + +#NEW + +===Fixities=== + +We can define left-associative infix expressions: +``` + infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y -> + mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; +``` +Constant-like expressions (the highest level): +``` + constant : Str -> TermPrec = mkPrec 2 ; +``` +All these operations can be found in ``lib/prelude/Formal``, +which has 5 levels. + +Now we can write the whole concrete syntax of ``Calculator`` compactly: +``` + concrete CalculatorC of Calculator = open Formal, Prelude in { + + flags lexer = codelit ; unlexer = code ; startcat = Exp ; + + lincat Exp = TermPrec ; + + lin + EPlus = infixl 0 "+" ; + EMinus = infixl 0 "-" ; + ETimes = infixl 1 "*" ; + EDiv = infixl 1 "/" ; + EInt i = constant i.s ; + } +``` + + +#NEW + +===Exercises on precedence=== + +1. Define non-associative and right-associative infix operations +analogous to ``infixl``. + +2. Add a constructor that puts parentheses around expressions +to raise their precedence, but that is eliminated by a ``def`` definition. +Test parsing with and without a pipe to ``pt -transform=compute``. + + + +#NEW + +==Code generation as linearization== + +Translate arithmetic (infix) to JVM (postfix): +``` + 2 + 3 * 4 + + ===> + + iconst 2 : iconst 3 ; iconst 4 ; imul ; iadd +``` +Just give linearization rules for JVM: +``` + lin + EPlus = postfix "iadd" ; + EMinus = postfix "isub" ; + ETimes = postfix "imul" ; + EDiv = postfix "idiv" ; + EInt i = ss ("iconst" ++ i.s) ; + oper + postfix : Str -> SS -> SS -> SS = \op,x,y -> + ss (x.s ++ ";" ++ y.s ++ ";" ++ op) ; +``` + + +#NEW + +===Programs with variables=== + +A **straight code** programming language, with +**initializations** and **assignments**: +``` + int x = 2 + 3 ; + int y = x + 1 ; + x = x + 9 * y ; +``` +We define programs by the following constructors: +``` + fun + PEmpty : Prog ; + PInit : Exp -> (Var -> Prog) -> Prog ; + PAss : Var -> Exp -> Prog -> Prog ; +``` +``PInit`` uses higher-order abstract syntax for making the +initialized variable available in the **continuation** of the program. + +The abstract syntax tree for the above code is +``` + PInit (EPlus (EInt 2) (EInt 3)) (\x -> + PInit (EPlus (EVar x) (EInt 1)) (\y -> + PAss x (EPlus (EVar x) (ETimes (EInt 9) (EVar y))) + PEmpty)) +``` +No uninitialized variables are allowed - there are no constructors for ``Var``! +But we do have the rule +``` + fun EVar : Var -> Exp ; +``` +The rest of the grammar is just the same as for arithmetic expressions +#Rsecprecedence. The best way to implement it is perhaps by writing a +module that extends the expression module. The most natural start category +of the extension is ``Prog``. + + +#NEW + +===Exercises on code generation=== + +1. Define a C-like concrete syntax of the straight-code language. + +2. Extend the straight-code language to expressions of type ``float``. +To guarantee type safety, you can define a category ``Typ`` of types, and +make ``Exp`` and ``Var`` dependent on ``Typ``. Basic floating point expressions +can be formed from literal of the built-in GF type ``Float``. The arithmetic +operations should be made polymorphic (as #Rsecpolymorphic). + +3. Extend JVM generation to the straight-code language, using +two more instructions +- ``iload`` //x//, which loads the value of the variable //x// +- ``istore`` //x// which stores a value to the variable //x// + + +Thus the code for the example in the previous section is +``` + iconst 2 ; iconst 3 ; iadd ; istore x ; + iload x ; iconst 1 ; iadd ; istore y ; + iload x ; iconst 9 ; iload y ; imul ; iadd ; istore x ; +``` + +4. If you made the exercise of adding floating point numbers to +the language, you can now cash out the main advantage of type checking +for code generation: selecting type-correct JVM instructions. The floating +point instructions are precisely the same as the integer one, except that +the prefix is ``f`` instead of ``i``, and that ``fconst`` takes floating +point literals as arguments. + + + +#NEW + +=Lesson 7: Embedded grammars= + +#Lchapeight + +Goals: +- use grammars as parts of programs written in Haskell and JavaScript +- implement stand-alone question-answering systems and translators based on + GF grammars +- generate language models for speech recognition from GF grammars + + + +#NEW + +==Functionalities of an embedded grammar format== + +GF grammars can be used as parts of programs written in other programming +languages, to be called **host languages**. +This facility is based on several components: +- PGF: a portable format for multilingual GF grammars +- a PGF interpreter written in the host language +- a library in the host language that enables calling the interpreter +- a way to manipulate abstract syntax trees in the host language + + + + +#NEW + +==The portable grammar format== + +The portable format is called PGF, "Portable Grammar Format". + +This format is produced by the GF batch compiler ``gf``, +executable from the operative system shell: +``` + % gf --make SOURCE.gf +``` +PGF is the recommended format in +which final grammar products are distributed, because they +are stripped from superfluous information and can be started and applied +faster than sets of separate modules. + +Application programmers have never any need to read or modify PGF files. + +PGF thus plays the same role as machine code in +general-purpose programming (or bytecode in Java). + + +#NEW + +===Haskell: the EmbedAPI module=== + +The Haskell API contains (among other things) the following types and functions: +``` + readPGF :: FilePath -> IO PGF + + linearize :: PGF -> Language -> Tree -> String + parse :: PGF -> Language -> Category -> String -> [Tree] + + linearizeAll :: PGF -> Tree -> [String] + linearizeAllLang :: PGF -> Tree -> [(Language,String)] + + parseAll :: PGF -> Category -> String -> [[Tree]] + parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] + + languages :: PGF -> [Language] + categories :: PGF -> [Category] + startCat :: PGF -> Category +``` +This is the only module that needs to be imported in the Haskell application. +It is available as a part of the GF distribution, in the file +``src/PGF.hs``. + + + +#NEW + +===First application: a translator=== + +Let us first build a stand-alone translator, which can translate +in any multilingual grammar between any languages in the grammar. +``` +module Main where + +import PGF +import System (getArgs) + +main :: IO () +main = do + file:_ <- getArgs + gr <- readPGF file + interact (translate gr) + +translate :: PGF -> String -> String +translate gr s = case parseAllLang gr (startCat gr) s of + (lg,t:_):_ -> unlines [linearize gr l t | l <- languages gr, l /= lg] + _ -> "NO PARSE" +``` +To run the translator, first compile it by +``` + % ghc --make -o trans Translator.hs +``` +For this, you need the Haskell compiler [GHC http://www.haskell.org/ghc]. + + +#NEW + +===Producing PGF for the translator=== + +Then produce a PGF file. For instance, the ``Food`` grammar set can be +compiled as follows: +``` + % gf --make FoodEng.gf FoodIta.gf +``` +This produces the file ``Food.pgf`` (its name comes from the abstract syntax). + +The Haskell library function ``interact`` makes the ``trans`` program work +like a Unix filter, which reads from standard input and writes to standard +output. Therefore it can be a part of a pipe and read and write files. +The simplest way to translate is to ``echo`` input to the program: +``` + % echo "this wine is delicious" | ./trans Food.pgf + questo vino delizioso +``` +The result is given in all languages except the input language. + +%TODO convert the output to UTF8 + + +#NEW + +===A translator loop=== + +To avoid starting the translator over and over again: +change ``interact`` in the main function to ``loop``, defined as +follows: +``` +loop :: (String -> String) -> IO () +loop trans = do + s <- getLine + if s == "quit" then putStrLn "bye" else do + putStrLn $ trans s + loop trans +``` +The loop keeps on translating line by line until the input line +is ``quit``. + + + +#NEW + +===A question-answer system=== + +#Lsecmathprogram + +The next application is also a translator, but it adds a +**transfer** component - a function that transforms syntax trees. + +The transfer function we use is one that computes a question into an answer. + +The program accepts simple questions about arithmetic and answers +"yes" or "no" in the language in which the question was made: +``` + Is 123 prime? + No. + 77 est impair ? + Oui. +``` +We change the pure translator by giving +the ``translate`` function the transfer as an extra argument: +``` + translate :: (Tree -> Tree) -> PGF -> String -> String +``` +Ordinary translation as a special case where +transfer is the identity function (``id`` in Haskell). + +To reply in the //same// language as the question: +``` + translate tr gr = case parseAllLang gr (startCat gr) s of + (lg,t:_):_ -> linearize gr lg (tr t) + _ -> "NO PARSE" +``` + + +#NEW + +===Abstract syntax of the query system=== + +Input: abstract syntax judgements +``` +abstract Query = { + + flags startcat=Question ; + + cat + Answer ; Question ; Object ; + + fun + Even : Object -> Question ; + Odd : Object -> Question ; + Prime : Object -> Question ; + Number : Int -> Object ; + + Yes : Answer ; + No : Answer ; +} +``` + + +#NEW + +===Exporting GF datatypes to Haskell=== + +To make it easy to define a transfer function, we export the +abstract syntax to a system of Haskell datatypes: +``` + % gf --output-format=haskell Query.pgf +``` +It is also possible to produce the Haskell file together with PGF, by +``` + % gf --make --output-format=haskell QueryEng.gf +``` +The result is a file named ``Query.hs``, containing a +module named ``Query``. + + +#NEW + +Output: Haskell definitions +``` +module Query where +import PGF + +data GAnswer = + GYes + | GNo + +data GObject = GNumber GInt + +data GQuestion = + GPrime GObject + | GOdd GObject + | GEven GObject + +newtype GInt = GInt Integer +``` +All type and constructor names are prefixed with a ``G`` to prevent clashes. + +The Haskell module name is the same as the abstract syntax name. + + +#NEW + +===The question-answer function=== + +Haskell's type checker guarantees that the functions are well-typed also with +respect to GF. +``` +answer :: GQuestion -> GAnswer +answer p = case p of + GOdd x -> test odd x + GEven x -> test even x + GPrime x -> test prime x + +value :: GObject -> Int +value e = case e of + GNumber (GInt i) -> fromInteger i + +test :: (Int -> Bool) -> GObject -> GAnswer +test f x = if f (value x) then GYes else GNo +``` + + +#NEW + +===Converting between Haskell and GF trees=== + +The generated Haskell module also contains +``` +class Gf a where + gf :: a -> Tree + fg :: Tree -> a + +instance Gf GQuestion where + gf (GEven x1) = DTr [] (AC (CId "Even")) [gf x1] + gf (GOdd x1) = DTr [] (AC (CId "Odd")) [gf x1] + gf (GPrime x1) = DTr [] (AC (CId "Prime")) [gf x1] + fg t = + case t of + DTr [] (AC (CId "Even")) [x1] -> GEven (fg x1) + DTr [] (AC (CId "Odd")) [x1] -> GOdd (fg x1) + DTr [] (AC (CId "Prime")) [x1] -> GPrime (fg x1) + _ -> error ("no Question " ++ show t) +``` +For the programmer, it is enougo to know: +- all GF names are in Haskell prefixed with ``G`` +- ``gf`` translates from Haskell objects to GF trees +- ``fg`` translates from GF trees to Haskell objects + + + +#NEW + +===Putting it all together: the transfer definition=== + +``` +module TransferDef where + +import PGF (Tree) +import Query -- generated from GF + +transfer :: Tree -> Tree +transfer = gf . answer . fg + +answer :: GQuestion -> GAnswer +answer p = case p of + GOdd x -> test odd x + GEven x -> test even x + GPrime x -> test prime x + +value :: GObject -> Int +value e = case e of + GNumber (GInt i) -> fromInteger i + +test :: (Int -> Bool) -> GObject -> GAnswer +test f x = if f (value x) then GYes else GNo + +prime :: Int -> Bool +prime x = elem x primes where + primes = sieve [2 .. x] + sieve (p:xs) = p : sieve [ n | n <- xs, n `mod` p > 0 ] + sieve [] = [] +``` + + +#NEW + +===Putting it all together: the Main module=== + +Here is the complete code in the Haskell file ``TransferLoop.hs``. +``` +module Main where + +import PGF +import TransferDef (transfer) + +main :: IO () +main = do + gr <- readPGF "Query.pgf" + loop (translate transfer gr) + +loop :: (String -> String) -> IO () +loop trans = do + s <- getLine + if s == "quit" then putStrLn "bye" else do + putStrLn $ trans s + loop trans + +translate :: (Tree -> Tree) -> PGF -> String -> String +translate tr gr s = case parseAllLang gr (startCat gr) s of + (lg,t:_):_ -> linearize gr lg (tr t) + _ -> "NO PARSE" +``` + + + +#NEW + +===Putting it all together: the Makefile=== + +To automate the production of the system, we write a ``Makefile`` as follows: +``` +all: + gf --make --output-format=haskell QueryEng + ghc --make -o ./math TransferLoop.hs + strip math +``` +(The empty segments starting the command lines in a Makefile must be tabs.) +Now we can compile the whole system by just typing +``` + make +``` +Then you can run it by typing +``` + ./math +``` +Just to summarize, the source of the application consists of the following files: +``` + Makefile -- a makefile + Math.gf -- abstract syntax + Math???.gf -- concrete syntaxes + TransferDef.hs -- definition of question-to-answer function + TransferLoop.hs -- Haskell Main module +``` + +#NEW + +==Web server applications== + +PGF files can be used in web servers, for which there is a Haskell library included +in ``src/server/``. How to build a server for tasks like translators is explained +in the [``README`` ../src/server/README] file in that directory. + +One of the servers that can be readily built with the library (without any +programming required) is **fridge poetry magnets**. It is an application that +uses an incremental parser to suggest grammatically correct next words. Here +is an example of its application to the ``Foods`` grammars. + +[food-magnet.png] + + +#NEW + +==JavaScript applications== + +JavaScript is a programming language that has interpreters built in in most +web browsers. It is therefore usable for client side web programs, which can even +be run without access to the internet. The following figure shows a JavaScript +program compiled from GF grammars as run on an iPhone. + +[iphone.jpg] + + +#NEW + +===Compiling to JavaScript=== + +JavaScript is one of the output formats of the GF batch compiler. Thus the following +command generates a JavaScript file from two ``Food`` grammars. +``` + % gf --make --output-format=js FoodEng.gf FoodIta.gf +``` +The name of the generated file is ``Food.js``, derived from the top-most abstract +syntax name. This file contains the multilingual grammar as a JavaScript object. + + +#NEW + +===Using the JavaScript grammar=== + +To perform parsing and linearization, the run-time library +``gflib.js`` is used. It is included in ``GF/lib/javascript/``, together with +some other JavaScript and HTML files; these files can be used +as templates for building applications. + +An example of usage is +[``translator.html`` http://grammaticalframework.org:41296], +which is in fact initialized with +a pointer to the Food grammar, so that it provides translation between the English +and Italian grammars: + +[food-js.png] + +The grammar must have the name ``grammar.js``. The abstract syntax and start +category names in ``translator.html`` must match the ones in the grammar. +With these changes, the translator works for any multilingual grammar. + + + + + +#NEW + +==Language models for speech recognition== + +The standard way of using GF in speech recognition is by building +**grammar-based language models**. + +GF supports several formats, including +GSL, the formatused in the [Nuance speech recognizer www.nuance.com]. + +GSL is produced from GF by running ``gf`` with the flag +``--output-format=gsl``. + +Example: GSL generated from ``FoodsEng.gf``. +``` + % gf --make --output-format=gsl FoodsEng.gf + % more FoodsEng.gsl + + ;GSL2.0 + ; Nuance speech recognition grammar for FoodsEng + ; Generated by GF + + .MAIN Phrase_cat + + Item_1 [("that" Kind_1) ("this" Kind_1)] + Item_2 [("these" Kind_2) ("those" Kind_2)] + Item_cat [Item_1 Item_2] + Kind_1 ["cheese" "fish" "pizza" (Quality_1 Kind_1) + "wine"] + Kind_2 ["cheeses" "fish" "pizzas" + (Quality_1 Kind_2) "wines"] + Kind_cat [Kind_1 Kind_2] + Phrase_1 [(Item_1 "is" Quality_1) + (Item_2 "are" Quality_1)] + Phrase_cat Phrase_1 + + Quality_1 ["boring" "delicious" "expensive" + "fresh" "italian" ("very" Quality_1) "warm"] + Quality_cat Quality_1 +``` + + +#NEW + +===More speech recognition grammar formats=== + +Other formats available via the ``--output-format`` flag include: + + || Format | Description || + | ``gsl`` | Nuance GSL speech recognition grammar + | ``jsgf`` | Java Speech Grammar Format (JSGF) + | ``jsgf_sisr_old`` | JSGF with semantic tags in SISR WD 20030401 format + | ``srgs_abnf`` | SRGS ABNF format + | ``srgs_xml`` | SRGS XML format + | ``srgs_xml_prob`` | SRGS XML format, with weights + | ``slf`` | finite automaton in the HTK SLF format + | ``slf_sub`` | finite automaton with sub-automata in HTK SLF + +All currently available formats can be seen with ``gf --help``. + + diff --git a/doc/tutorial/iphone.jpg b/doc/tutorial/iphone.jpg new file mode 100644 index 000000000..d9e138b88 Binary files /dev/null and b/doc/tutorial/iphone.jpg differ diff --git a/doc/tutorial/mytree.png b/doc/tutorial/mytree.png new file mode 100644 index 000000000..fafcc8772 Binary files /dev/null and b/doc/tutorial/mytree.png differ diff --git a/doc/vr.html b/doc/vr.html deleted file mode 100644 index e5dee1885..000000000 --- a/doc/vr.html +++ /dev/null @@ -1,46 +0,0 @@ - - - - -Library-Based Grammar Engineering - -

Library-Based Grammar Engineering

- -VR Project 2006-2008
-
- -

Staff

-

-Lars Borin (co-leader) -

-

-Robin Cooper (co-leader) -

-

-Aarne Ranta (project responsible) -

-

-Sibylle Schupp (co-leader) -

-

Publications

-

-Ali El Dada, MSc Thesis -

-

-Muhammad Humayoun, MSc Thesis -

-

-Janna Khegai, -Language Engineering in GF, PhD Thesis, Chalmers. 2006. -

-

Links

-

-GF -

-

-Functional Morphology -

- - - - diff --git a/doc/vr.txt b/doc/vr.txt deleted file mode 100644 index 9b5045978..000000000 --- a/doc/vr.txt +++ /dev/null @@ -1,32 +0,0 @@ -Library-Based Grammar Engineering -VR Project 2006-2008 - - -=Staff= - -Lars Borin (co-leader) - -Robin Cooper (co-leader) - -Aarne Ranta (project responsible) - -Sibylle Schupp (co-leader) - - - -=Publications= - -Ali El Dada, MSc Thesis - -Muhammad Humayoun, MSc Thesis - -Janna Khegai, -Language Engineering in GF, PhD Thesis, Chalmers. 2006. - - - -=Links= - -[GF http://www.cs.chalmers.se/~aarne/GF/] - -[Functional Morphology http://www.cs.chalmers.se/~markus/FM/] diff --git a/index.html b/index.html index 3a23a66a9..d88e3b6df 100644 --- a/index.html +++ b/index.html @@ -26,11 +26,13 @@ April 2010 | Download | Libraries | Reference -| Tutorial +| Tutorial +| QuickStart +| UserGroup ]

-[ Developers +[ ForDevelopers | People | Publications | QuickRefCard @@ -139,7 +141,7 @@ fifty scientific publications (see GF publica

Programming in GF

-GF is easy to learn by following the tutorial. +GF is easy to learn by following the tutorial. You can write your first translator in 15 minutes.

-- cgit v1.2.3