diff options
| author | John J. Camilleri <john@digitalgrammars.com> | 2018-11-02 14:07:00 +0100 |
|---|---|---|
| committer | John J. Camilleri <john@digitalgrammars.com> | 2018-11-02 14:07:00 +0100 |
| commit | f7dc9a6eaf66bb97aa8a1be6bfc25614c2168811 (patch) | |
| tree | 539d7fc086c049a2b61680a02bfb1d1269eadb2d | |
| parent | 7867c8c8287c4430d6fd5c8aed2e581ca4a9186d (diff) | |
| parent | 777028dcd8b4038d1fd725f8e851f218f78fab14 (diff) | |
Merge branch 'master' into new-website
| -rw-r--r-- | doc/tutorial/gf-tutorial.html | 5871 | ||||
| -rw-r--r-- | doc/tutorial/gf-tutorial.t2t | 1 |
2 files changed, 1 insertions, 5871 deletions
diff --git a/doc/tutorial/gf-tutorial.html b/doc/tutorial/gf-tutorial.html deleted file mode 100644 index 76d6b04a4..000000000 --- a/doc/tutorial/gf-tutorial.html +++ /dev/null @@ -1,5871 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> -<TITLE>Grammatical Framework Tutorial</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> -<P ALIGN="center"><CENTER><H1>Grammatical Framework Tutorial</H1> -<FONT SIZE="4"> -<I>Aarne Ranta</I><BR> -December 2010 for GF 3.2 -</FONT></CENTER> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> - <UL> - <LI><A HREF="#toc1">Overview</A> - <UL> - <LI><A HREF="#toc2">Outline</A> - <LI><A HREF="#toc3">Slides</A> - </UL> - <LI><A HREF="#toc4">Lesson 1: Getting Started with GF</A> - <UL> - <LI><A HREF="#toc5">What GF is</A> - <LI><A HREF="#toc6">GF grammars and language processing tasks</A> - <LI><A HREF="#toc7">Getting the GF system</A> - <LI><A HREF="#toc8">Running the GF system</A> - <LI><A HREF="#toc9">A "Hello World" grammar</A> - <UL> - <LI><A HREF="#toc10">The program: abstract syntax and concrete syntaxes</A> - <LI><A HREF="#toc11">Using grammars in the GF system</A> - <LI><A HREF="#toc12">Exercises on the Hello World grammar</A> - </UL> - <LI><A HREF="#toc13">Using grammars from outside GF</A> - <LI><A HREF="#toc14">GF scripts</A> - <LI><A HREF="#toc15">What else can be done with the grammar</A> - <LI><A HREF="#toc16">Embedded grammar applications</A> - </UL> - <LI><A HREF="#toc17">Lesson 2: Designing a grammar for complex phrases</A> - <UL> - <LI><A HREF="#toc18">The abstract syntax Food</A> - <LI><A HREF="#toc19">The concrete syntax FoodEng</A> - <UL> - <LI><A HREF="#toc20">Exercises on the Food grammar</A> - </UL> - <LI><A HREF="#toc21">Commands for testing grammars</A> - <UL> - <LI><A HREF="#toc22">Generating trees and strings</A> - <LI><A HREF="#toc23">Exercises on generation</A> - <LI><A HREF="#toc24">More on pipes: tracing</A> - <LI><A HREF="#toc25">Writing and reading files</A> - <LI><A HREF="#toc26">Visualizing trees</A> - <LI><A HREF="#toc27">System commands</A> - </UL> - <LI><A HREF="#toc28">An Italian concrete syntax</A> - <UL> - <LI><A HREF="#toc29">Exercises on multilinguality</A> - </UL> - <LI><A HREF="#toc30">Free variation</A> - <LI><A HREF="#toc31">More application of multilingual grammars</A> - <UL> - <LI><A HREF="#toc32">Multilingual treebanks</A> - <LI><A HREF="#toc33">Translation quiz</A> - </UL> - <LI><A HREF="#toc34">Context-free grammars and GF</A> - <UL> - <LI><A HREF="#toc35">The "cf" grammar format</A> - <LI><A HREF="#toc36">Restrictions of context-free grammars</A> - </UL> - <LI><A HREF="#toc37">Modules and files</A> - <LI><A HREF="#toc38">Using operations and resource modules</A> - <UL> - <LI><A HREF="#toc39">Operation definitions</A> - <LI><A HREF="#toc40">The ``resource`` module type</A> - <LI><A HREF="#toc41">Opening a resource</A> - <LI><A HREF="#toc42">Partial application</A> - <LI><A HREF="#toc43">Testing resource modules</A> - </UL> - <LI><A HREF="#toc44">Grammar architecture</A> - <UL> - <LI><A HREF="#toc45">Extending a grammar</A> - <LI><A HREF="#toc46">Multiple inheritance</A> - </UL> - </UL> - <LI><A HREF="#toc47">Lesson 3: Grammars with parameters</A> - <UL> - <LI><A HREF="#toc48">The problem: words have to be inflected</A> - <LI><A HREF="#toc49">Parameters and tables</A> - <LI><A HREF="#toc50">Inflection tables and paradigms</A> - <UL> - <LI><A HREF="#toc51">Exercises on morphology</A> - </UL> - <LI><A HREF="#toc52">Using parameters in concrete syntax</A> - <UL> - <LI><A HREF="#toc53">Agreement</A> - <LI><A HREF="#toc54">Determiners</A> - <LI><A HREF="#toc55">Parametric vs. inherent features</A> - </UL> - <LI><A HREF="#toc56">An English concrete syntax for Foods with parameters</A> - <LI><A HREF="#toc57">More on inflection paradigms</A> - <UL> - <LI><A HREF="#toc58">Worst-case functions</A> - <LI><A HREF="#toc59">Smart paradigms</A> - <LI><A HREF="#toc60">Exercises on regular patterns</A> - <LI><A HREF="#toc61">Function types with variables</A> - <LI><A HREF="#toc62">Separating operation types and definitions</A> - <LI><A HREF="#toc63">Overloading of operations</A> - <LI><A HREF="#toc64">Morphological analysis and morphology quiz</A> - </UL> - <LI><A HREF="#toc65">The Italian Foods grammar</A> - <UL> - <LI><A HREF="#toc66">Exercises on using parameters</A> - </UL> - <LI><A HREF="#toc67">Discontinuous constituents</A> - <LI><A HREF="#toc68">Strings at compile time vs. run time</A> - <UL> - <LI><A HREF="#toc69">Supplementary constructs for concrete syntax</A> - </UL> - </UL> - <LI><A HREF="#toc70">Lesson 4: Using the resource grammar library</A> - <UL> - <LI><A HREF="#toc71">The coverage of the library</A> - <LI><A HREF="#toc72">The structure of the library</A> - <UL> - <LI><A HREF="#toc73">Lexical vs. phrasal rules</A> - <LI><A HREF="#toc74">Lexical categories</A> - <LI><A HREF="#toc75">Lexical rules</A> - <LI><A HREF="#toc76">Resource lexicon</A> - <LI><A HREF="#toc77">Phrasal categories</A> - <LI><A HREF="#toc78">Syntactic combinations</A> - <LI><A HREF="#toc79">Example syntactic combination</A> - </UL> - <LI><A HREF="#toc80">The resource API</A> - <UL> - <LI><A HREF="#toc81">A miniature resource API: categories</A> - <LI><A HREF="#toc82">A miniature resource API: rules</A> - <LI><A HREF="#toc83">A miniature resource API: structural words</A> - <LI><A HREF="#toc84">A miniature resource API: paradigms</A> - <LI><A HREF="#toc85">A miniature resource API: more paradigms</A> - <LI><A HREF="#toc86">Exercises</A> - </UL> - <LI><A HREF="#toc87">Example: English</A> - <UL> - <LI><A HREF="#toc88">English example: linearization types and combination rules</A> - <LI><A HREF="#toc89">English example: lexical rules</A> - <LI><A HREF="#toc90">English example: exercises</A> - </UL> - <LI><A HREF="#toc91">Functor implementation of multilingual grammars</A> - <UL> - <LI><A HREF="#toc92">New language by copy and paste</A> - <LI><A HREF="#toc93">Functors: functions on the module level</A> - <LI><A HREF="#toc94">Code for the Foods functor</A> - <LI><A HREF="#toc95">Code for the LexFoods interface</A> - <LI><A HREF="#toc96">Code for a German instance of the lexicon</A> - <LI><A HREF="#toc97">Code for a German functor instantiation</A> - <LI><A HREF="#toc98">Adding languages to a functor implementation</A> - <LI><A HREF="#toc99">Example: adding Finnish</A> - <LI><A HREF="#toc100">A design pattern</A> - <LI><A HREF="#toc101">Functors: exercises</A> - </UL> - <LI><A HREF="#toc102">Restricted inheritance</A> - <UL> - <LI><A HREF="#toc103">A problem with functors</A> - <LI><A HREF="#toc104">Restricted inheritance: include or exclude</A> - <LI><A HREF="#toc105">The functor problem solved</A> - </UL> - <LI><A HREF="#toc106">Grammar reuse</A> - <UL> - <LI><A HREF="#toc107">Library exercises</A> - </UL> - <LI><A HREF="#toc108">Tenses</A> - </UL> - <LI><A HREF="#toc109">Lesson 5: Refining semantics in abstract syntax</A> - <UL> - <LI><A HREF="#toc110">Dependent types</A> - <UL> - <LI><A HREF="#toc111">A dependent type system</A> - <LI><A HREF="#toc112">Examples of devices and actions</A> - <LI><A HREF="#toc113">Linearization and parsing with dependent types</A> - <LI><A HREF="#toc114">Solving metavariables</A> - </UL> - <LI><A HREF="#toc115">Polymorphism</A> - <UL> - <LI><A HREF="#toc116">Dependent types: exercises</A> - </UL> - <LI><A HREF="#toc117">Proof objects</A> - <UL> - <LI><A HREF="#toc118">Proof-carrying documents</A> - </UL> - <LI><A HREF="#toc119">Restricted polymorphism</A> - <UL> - <LI><A HREF="#toc120">Example: classes for switching and dimming</A> - </UL> - <LI><A HREF="#toc121">Variable bindings</A> - <UL> - <LI><A HREF="#toc122">Higher-order abstract syntax</A> - <LI><A HREF="#toc123">Higher-order abstract syntax: linearization</A> - <LI><A HREF="#toc124">Eta expansion</A> - <LI><A HREF="#toc125">Parsing variable bindings</A> - <LI><A HREF="#toc126">Exercises on variable bindings</A> - </UL> - <LI><A HREF="#toc127">Semantic definitions</A> - <UL> - <LI><A HREF="#toc128">Computing a tree</A> - <LI><A HREF="#toc129">Definitional equality</A> - <LI><A HREF="#toc130">Judgement forms for constructors</A> - <LI><A HREF="#toc131">Exercises on semantic definitions</A> - </UL> - <LI><A HREF="#toc132">Lesson 6: Grammars of formal languages</A> - <UL> - <LI><A HREF="#toc133">Arithmetic expressions</A> - <LI><A HREF="#toc134">Concrete syntax: a simple approach</A> - </UL> - <LI><A HREF="#toc135">Lexing and unlexing</A> - <UL> - <LI><A HREF="#toc136">Most common lexers and unlexers</A> - </UL> - <LI><A HREF="#toc137">Precedence and fixity</A> - <UL> - <LI><A HREF="#toc138">Precedence as a parameter</A> - <LI><A HREF="#toc139">Fixities</A> - <LI><A HREF="#toc140">Exercises on precedence</A> - </UL> - <LI><A HREF="#toc141">Code generation as linearization</A> - <UL> - <LI><A HREF="#toc142">Programs with variables</A> - <LI><A HREF="#toc143">Exercises on code generation</A> - </UL> - </UL> - <LI><A HREF="#toc144">Lesson 7: Embedded grammars</A> - <UL> - <LI><A HREF="#toc145">Functionalities of an embedded grammar format</A> - <LI><A HREF="#toc146">The portable grammar format</A> - <UL> - <LI><A HREF="#toc147">Haskell: the EmbedAPI module</A> - <LI><A HREF="#toc148">First application: a translator</A> - <LI><A HREF="#toc149">Producing PGF for the translator</A> - <LI><A HREF="#toc150">A translator loop</A> - <LI><A HREF="#toc151">A question-answer system</A> - <LI><A HREF="#toc152">Abstract syntax of the query system</A> - <LI><A HREF="#toc153">Exporting GF datatypes to Haskell</A> - <LI><A HREF="#toc154">The question-answer function</A> - <LI><A HREF="#toc155">Converting between Haskell and GF trees</A> - <LI><A HREF="#toc156">Putting it all together: the transfer definition</A> - <LI><A HREF="#toc157">Putting it all together: the Main module</A> - <LI><A HREF="#toc158">Putting it all together: the Makefile</A> - </UL> - <LI><A HREF="#toc159">Web server applications</A> - <LI><A HREF="#toc160">JavaScript applications</A> - <UL> - <LI><A HREF="#toc161">Compiling to JavaScript</A> - <LI><A HREF="#toc162">Using the JavaScript grammar</A> - </UL> - <LI><A HREF="#toc163">Language models for speech recognition</A> - <UL> - <LI><A HREF="#toc164">More speech recognition grammar formats</A> - </UL> - </UL> - </UL> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc1"></A> -<H1>Overview</H1> -<P> -This is a hands-on introduction to grammar writing in GF. -</P> -<P> -Main ingredients of GF: -</P> -<UL> -<LI>linguistics -<LI>functional programming -</UL> - -<P> -Prerequisites: -</P> -<UL> -<LI>some previous experience from some programming language -<LI>the basics of using computers, e.g. the use of - text editors and the management of files. -<LI>knowledge of Unix commands is useful but not necessary -<LI>knowledge of many natural languages may add fun to experience -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc2"></A> -<H2>Outline</H2> -<P> -<a href="#chaptwo">Lesson 1</a>: a multilingual "Hello World" grammar. English, Finnish, Italian. -</P> -<P> -<a href="#chapthree">Lesson 2</a>: a larger grammar for the domain of food. English and Italian. -</P> -<P> -<a href="#chapfour">Lesson 3</a>: parameters - morphology and agreement. -</P> -<P> -<a href="#chapfive">Lesson 4</a>: using the resource grammar library. -</P> -<P> -<a href="#chapsix">Lesson 5</a>: semantics - <B>dependent types</B>, <B>variable bindings</B>, -and <B>semantic definitions</B>. -</P> -<P> -<a href="#chapseven">Lesson 6</a>: implementing formal languages. -</P> -<P> -<a href="#chapeight">Lesson 7</a>: embedded grammar applications. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc3"></A> -<H2>Slides</H2> -<P> -You can chop this tutorial into a set of slides by the command -</P> -<PRE> - htmls gf-tutorial.html -</PRE> -<P> -where the program <CODE>htmls</CODE> is distributed with GF (see below), in -</P> -<P> - <A HREF="http://grammaticalframework.org/src/tools/Htmls.hs"><CODE>GF/src/tools/Htmls.hs</CODE></A> -</P> -<P> -The slides will appear as a set of files beginning with <CODE>01-gf-tutorial.htmls</CODE>. -</P> -<P> -Internal links will not work in the slide format, except for those in the -upper left corner of each slide, and the links behind the "Contents" link. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc4"></A> -<H1>Lesson 1: Getting Started with GF</H1> -<P> -<a name="chaptwo"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>install and run GF -<LI>write the first GF grammar: a "Hello World" grammar in three languages -<LI>use GF for translation and multilingual generation -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc5"></A> -<H2>What GF is</H2> -<P> -We use the term GF for three different things: -</P> -<UL> -<LI>a <B>system</B> (computer program) used for working with grammars -<LI>a <B>programming language</B> in which grammars can be written -<LI>a <B>theory</B> about grammars and languages -</UL> - -<P> -The GF system is an implementation -of the GF programming language, which in turn is built on the ideas of the -GF theory. -</P> -<P> -The focus of this tutorial is on using the GF programming language. -</P> -<P> -At the same time, we learn the way of thinking in the GF theory. -</P> -<P> -We make the grammars run on a computer by -using the GF system. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc6"></A> -<H2>GF grammars and language processing tasks</H2> -<P> -A GF program is called a <B>grammar</B>. -</P> -<P> -A grammar defines a language. -</P> -<P> -From this definition, language processing components can be derived: -</P> -<UL> -<LI><B>parsing</B>: to analyse the language -<LI><B>linearization</B>: to generate the language -<LI><B>translation</B>: to analyse one language and generate another -</UL> - -<P> -In general, a GF grammar is <B>multilingual</B>: -</P> -<UL> -<LI>many languages in one grammar -<LI>translations between them -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc7"></A> -<H2>Getting the GF system</H2> -<P> -Open-source free software, downloaded via the GF Homepage: -</P> -<P> -<A HREF="http://grammaticalframework.org/"><CODE>grammaticalframework.org</CODE></A> -</P> -<P> -There you find -</P> -<UL> -<LI>binaries for Linux, Mac OS X, and Windows -<LI>source code and documentation -<LI>grammar libraries and examples -</UL> - -<P> -Many examples in this tutorial are -<A HREF="http://grammaticalframework.org/examples/tutorial">online</A>. -</P> -<P> -Normally you don't have to compile GF yourself. -But, if you do want to compile GF from source follow the -instructions in the <A HREF="../gf-developers.html">Developers Guide</A>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc8"></A> -<H2>Running the GF system</H2> -<P> -Type <CODE>gf</CODE> in the Unix (or Cygwin) shell: -</P> -<PRE> - % gf -</PRE> -<P> -You will see GF's welcome message and the prompt <CODE>></CODE>. -The command -</P> -<PRE> - > help -</PRE> -<P> -will give you a list of available commands. -</P> -<P> -As a common convention, we will use -</P> -<UL> -<LI><CODE>%</CODE> as a prompt that marks system commands -<LI><CODE>></CODE> as a prompt that marks GF commands -</UL> - -<P> -Thus you should not type these prompts, but only the characters that -follow them. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc9"></A> -<H2>A "Hello World" grammar</H2> -<P> -Like most programming language tutorials, we start with a -program that prints "Hello World" on the terminal. -</P> -<P> -Extra features: -</P> -<UL> -<LI><B>Multilinguality</B>: the message is printed in many languages. -<LI><B>Reversibility</B>: in addition to printing, you can <B>parse</B> the - message and <B>translate</B> it to other languages. -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc10"></A> -<H3>The program: abstract syntax and concrete syntaxes</H3> -<P> -A GF program, in general, is a <B>multilingual grammar</B>. Its main parts -are -</P> -<UL> -<LI>an <B>abstract syntax</B> -<LI>one or more <B>concrete syntaxes</B> -</UL> - -<P> -The abstract syntax defines what <B>meanings</B> -can be expressed in the grammar -</P> -<UL> -<LI><I>Greetings</I>, where we greet a <I>Recipient</I>, which can be - <I>World</I> or <I>Mum</I> or <I>Friends</I> -</UL> - -<P> -<!-- NEW --> -</P> -<P> -GF code for the abstract syntax: -</P> -<PRE> - -- a "Hello World" grammar - abstract Hello = { - - flags startcat = Greeting ; - - cat Greeting ; Recipient ; - - fun - Hello : Recipient -> Greeting ; - World, Mum, Friends : Recipient ; - } -</PRE> -<P> -The code has the following parts: -</P> -<UL> -<LI>a <B>comment</B> (optional), saying what the module is doing -<LI>a <B>module header</B> indicating that it is an abstract syntax - module named <CODE>Hello</CODE> -<LI>a <B>module body</B> in braces, consisting of - <UL> - <LI>a <B>startcat flag declaration</B> stating that <CODE>Greeting</CODE> is the - default start category for parsing and generation - <LI><B>category declarations</B> introducing two categories, i.e. types of meanings - <LI><B>function declarations</B> introducing three meaning-building functions - </UL> -</UL> - -<P> -<!-- NEW --> -</P> -<P> -English concrete syntax (mapping from meanings to strings): -</P> -<PRE> - concrete HelloEng of Hello = { - - lincat Greeting, Recipient = {s : Str} ; - - lin - Hello recip = {s = "hello" ++ recip.s} ; - World = {s = "world"} ; - Mum = {s = "mum"} ; - Friends = {s = "friends"} ; - } -</PRE> -<P> -The major parts of this code are: -</P> -<UL> -<LI>a module header indicating that it is a concrete syntax of the abstract syntax - <CODE>Hello</CODE>, itself named <CODE>HelloEng</CODE> -<LI>a module body in curly brackets, consisting of - <UL> - <LI><B>linearization type definitions</B> stating that - <CODE>Greeting</CODE> and <CODE>Recipient</CODE> are <B>records</B> with a <B>string</B> <CODE>s</CODE> - <LI><B>linearization definitions</B> telling what records are assigned to - each of the meanings defined in the abstract syntax - </UL> -</UL> - -<P> -Notice the concatenation <CODE>++</CODE> and the record projection <CODE>.</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<P> -Finnish and an Italian concrete syntaxes: -</P> -<PRE> - concrete HelloFin of Hello = { - lincat Greeting, Recipient = {s : Str} ; - lin - Hello recip = {s = "terve" ++ recip.s} ; - World = {s = "maailma"} ; - Mum = {s = "äiti"} ; - Friends = {s = "ystävät"} ; - } - - concrete HelloIta of Hello = { - lincat Greeting, Recipient = {s : Str} ; - lin - Hello recip = {s = "ciao" ++ recip.s} ; - World = {s = "mondo"} ; - Mum = {s = "mamma"} ; - Friends = {s = "amici"} ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc11"></A> -<H3>Using grammars in the GF system</H3> -<P> -In order to compile the grammar in GF, -we create four files, one for each module, named <I>Modulename</I><CODE>.gf</CODE>: -</P> -<PRE> - Hello.gf HelloEng.gf HelloFin.gf HelloIta.gf -</PRE> -<P> -The first GF command: <B>import</B> a grammar. -</P> -<PRE> - > import HelloEng.gf -</PRE> -<P> -All commands also have short names; here: -</P> -<PRE> - > i HelloEng.gf -</PRE> -<P> -The GF system will <B>compile</B> your grammar -into an internal representation and show the CPU time was consumed, followed -by a new prompt: -</P> -<PRE> - > i HelloEng.gf - - compiling Hello.gf... wrote file Hello.gfo 8 msec - - compiling HelloEng.gf... wrote file HelloEng.gfo 12 msec - - 12 msec - > -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -You can use GF for <B>parsing</B> (<CODE>parse</CODE> = <CODE>p</CODE>) -</P> -<PRE> - > parse "hello world" - Hello World -</PRE> -<P> -Parsing takes a <B>string</B> into an <B>abstract syntax tree</B>. -</P> -<P> -The notation for trees is that of <B>function application</B>: -</P> -<PRE> - function argument1 ... argumentn -</PRE> -<P> -Parentheses are only needed for grouping. -</P> -<P> -Parsing something that is not in grammar will fail: -</P> -<PRE> - > parse "hello dad" - Unknown words: dad - - > parse "world hello" - no tree found -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -You can also use GF for <B>linearization</B> (<CODE>linearize = l</CODE>). -It takes trees into strings: -</P> -<PRE> - > linearize Hello World - hello world -</PRE> -<P> -<B>Translation</B>: <B>pipe</B> linearization to parsing: -</P> -<PRE> - > import HelloEng.gf - > import HelloIta.gf - - > parse -lang=HelloEng "hello mum" | linearize -lang=HelloIta - ciao mamma -</PRE> -<P> -Default of the language flag (<CODE>-lang</CODE>): the last-imported concrete syntax. -</P> -<P> -<B>Multilingual generation</B>: -</P> -<PRE> - > parse -lang=HelloEng "hello friends" | linearize - terve ystävät - ciao amici - hello friends -</PRE> -<P> -Linearization is by default to all available languages. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc12"></A> -<H3>Exercises on the Hello World grammar</H3> -<OL> -<LI>Test the parsing and translation examples shown above, as well as -some other examples, in different combinations of languages. -<P></P> -<LI>Extend the grammar <CODE>Hello.gf</CODE> and some of the -concrete syntaxes by five new recipients and one new greeting -form. -<P></P> -<LI>Add a concrete syntax for some other -languages you might know. -<P></P> -<LI>Add a pair of greetings that are expressed in one and -the same way in -one language and in two different ways in another. -For instance, <I>good morning</I> -and <I>good afternoon</I> in English are both expressed -as <I>buongiorno</I> in Italian. -Test what happens when you translate <I>buongiorno</I> to English in GF. -<P></P> -<LI>Inject errors in the <CODE>Hello</CODE> grammars, for example, leave out -some line, omit a variable in a <CODE>lin</CODE> rule, or change the name -in one occurrence -of a variable. Inspect the error messages generated by GF. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc13"></A> -<H2>Using grammars from outside GF</H2> -<P> -You can use the <CODE>gf</CODE> program in a Unix pipe. -</P> -<UL> -<LI>echo a GF command -<LI>pipe it into GF with grammar names as arguments -</UL> - -<PRE> - % echo "l Hello World" | gf HelloEng.gf HelloFin.gf HelloIta.gf -</PRE> -<P> -You can also write a <B>script</B>, a file containing the lines -</P> -<PRE> - import HelloEng.gf - import HelloFin.gf - import HelloIta.gf - linearize Hello World -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc14"></A> -<H2>GF scripts</H2> -<P> -If we name this script <CODE>hello.gfs</CODE>, we can do -</P> -<PRE> - $ gf --run <hello.gfs - - ciao mondo - terve maailma - hello world -</PRE> -<P> -The option <CODE>--run</CODE> removes prompts, CPU time, and other messages. -</P> -<P> -See <a href="#chapeight">Lesson 7</a>, for stand-alone programs that don't need the GF system to run. -</P> -<P> -<B>Exercise</B>. (For Unix hackers.) Write a GF application that reads -an English string from the standard input and writes an Italian -translation to the output. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc15"></A> -<H2>What else can be done with the grammar</H2> -<P> -Some more functions that will be covered: -</P> -<UL> -<LI><B>morphological analysis</B>: find out the possible inflection forms of words -<LI><B>morphological synthesis</B>: generate all inflection forms of words -<LI><B>random generation</B>: generate random expressions -<LI><B>corpus generation</B>: generate all expressions -<LI><B>treebank generation</B>: generate a list of trees with their linearizations -<LI><B>teaching quizzes</B>: train morphology and translation -<LI><B>multilingual authoring</B>: create a document in many languages simultaneously -<LI><B>speech input</B>: optimize a speech recognition system for a grammar -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc16"></A> -<H2>Embedded grammar applications</H2> -<P> -Application programs, using techniques from <a href="#chapeight">Lesson 7</a>: -</P> -<UL> -<LI>compile grammars to new formats, such as speech recognition grammars -<LI>embed grammars in Java and Haskell programs -<LI>build applications using compilation and embedding: - <UL> - <LI>voice commands - <LI>spoken language translators - <LI>dialogue systems - <LI>user interfaces - <LI>localization: render the messages printed by a program - in different languages - </UL> -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc17"></A> -<H1>Lesson 2: Designing a grammar for complex phrases</H1> -<P> -<a name="chapthree"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>build a larger grammar: phrases about food in English and Italian -<LI>learn to write reusable library functions ("operations") -<LI>learn the basics of GF's module system -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc18"></A> -<H2>The abstract syntax Food</H2> -<P> -Phrases usable for speaking about food: -</P> -<UL> -<LI>the start category is <CODE>Phrase</CODE> -<LI>a <CODE>Phrase</CODE> can be built by assigning a <CODE>Quality</CODE> to an <CODE>Item</CODE> - (e.g. <I>this cheese is Italian</I>) -<LI>an<CODE>Item</CODE> is build from a <CODE>Kind</CODE> by prefixing <I>this</I> or <I>that</I> - (e.g. <I>this wine</I>) -<LI>a <CODE>Kind</CODE> is either <B>atomic</B> (e.g. <I>cheese</I>), or formed - qualifying a given <CODE>Kind</CODE> with a <CODE>Quality</CODE> (e.g. <I>Italian cheese</I>) -<LI>a <CODE>Quality</CODE> is either atomic (e.g. <I>Italian</I>, - or built by modifying a given <CODE>Quality</CODE> with the word <I>very</I> (e.g. <I>very warm</I>) -</UL> - -<P> -Abstract syntax: -</P> -<PRE> - abstract Food = { - - flags startcat = Phrase ; - - cat - Phrase ; Item ; Kind ; Quality ; - - fun - Is : Item -> Quality -> Phrase ; - This, That : Kind -> Item ; - QKind : Quality -> Kind -> Kind ; - Wine, Cheese, Fish : Kind ; - Very : Quality -> Quality ; - Fresh, Warm, Italian, Expensive, Delicious, Boring : Quality ; - } -</PRE> -<P> -Example <CODE>Phrase</CODE> -</P> -<PRE> - Is (This (QKind Delicious (QKind Italian Wine))) (Very (Very Expensive)) - this delicious Italian wine is very very expensive -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc19"></A> -<H2>The concrete syntax FoodEng</H2> -<PRE> - concrete FoodEng of Food = { - - lincat - Phrase, Item, Kind, Quality = {s : Str} ; - - lin - Is item quality = {s = item.s ++ "is" ++ quality.s} ; - This kind = {s = "this" ++ kind.s} ; - That kind = {s = "that" ++ kind.s} ; - QKind quality kind = {s = quality.s ++ kind.s} ; - Wine = {s = "wine"} ; - Cheese = {s = "cheese"} ; - Fish = {s = "fish"} ; - Very quality = {s = "very" ++ quality.s} ; - Fresh = {s = "fresh"} ; - Warm = {s = "warm"} ; - Italian = {s = "Italian"} ; - Expensive = {s = "expensive"} ; - Delicious = {s = "delicious"} ; - Boring = {s = "boring"} ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -Test the grammar for parsing: -</P> -<PRE> - > import FoodEng.gf - > parse "this delicious wine is very very Italian" - Is (This (QKind Delicious Wine)) (Very (Very Italian)) -</PRE> -<P> -Parse in other categories setting the <CODE>cat</CODE> flag: -</P> -<PRE> - p -cat=Kind "very Italian wine" - QKind (Very Italian) Wine -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc20"></A> -<H3>Exercises on the Food grammar</H3> -<OL> -<LI>Extend the <CODE>Food</CODE> grammar by ten new food kinds and -qualities, and run the parser with new kinds of examples. -<P></P> -<LI>Add a rule that enables question phrases of the form -<I>is this cheese Italian</I>. -<P></P> -<LI>Enable the optional prefixing of -phrases with the words "excuse me but". Do this in such a way that -the prefix can occur at most once. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc21"></A> -<H2>Commands for testing grammars</H2> -<A NAME="toc22"></A> -<H3>Generating trees and strings</H3> -<P> -Random generation (<CODE>generate_random = gr</CODE>): build -build a random tree in accordance with an abstract syntax: -</P> -<PRE> - > generate_random - Is (This (QKind Italian Fish)) Fresh -</PRE> -<P> -By using a pipe, random generation can be fed into linearization: -</P> -<PRE> - > generate_random | linearize - this Italian fish is fresh -</PRE> -<P> -Use the <CODE>number</CODE> flag to generate several trees: -</P> -<PRE> - > gr -number=4 | l - that wine is boring - that fresh cheese is fresh - that cheese is very boring - this cheese is Italian -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -To generate <I>all</I> phrases that a grammar can produce, -use <CODE>generate_trees = gt</CODE>. -</P> -<PRE> - > generate_trees | l - that cheese is very Italian - that cheese is very boring - that cheese is very delicious - ... - this wine is fresh - this wine is warm -</PRE> -<P> -The default <B>depth</B> is 3; the depth can be -set by using the <CODE>depth</CODE> flag: -</P> -<PRE> - > generate_trees -depth=2 | l -</PRE> -<P> -What options a command has can be seen by the <CODE>help = h</CODE> command: -</P> -<PRE> - > help gr - > help gt -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc23"></A> -<H3>Exercises on generation</H3> -<OL> -<LI>If the command <CODE>gt</CODE> generated all -trees in your grammar, it would never terminate. Why? -<P></P> -<LI>Measure how many trees the grammar gives with depths 4 and 5, -respectively. <B>Hint</B>. You can -use the Unix <B>word count</B> command <CODE>wc</CODE> to count lines. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc24"></A> -<H3>More on pipes: tracing</H3> -<P> -Put the <B>tracing</B> option <CODE>-tr</CODE> to each command whose output you -want to see: -</P> -<PRE> - > gr -tr | l -tr | p - - Is (This Cheese) Boring - this cheese is boring - Is (This Cheese) Boring -</PRE> -<P> -Useful for test purposes: the pipe above can show -if a grammar is <B>ambiguous</B>, i.e. -contains strings that can be parsed in more than one way. -</P> -<P> -<B>Exercise</B>. Extend the <CODE>Food</CODE> grammar so that it produces ambiguous -strings, and try out the ambiguity test. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc25"></A> -<H3>Writing and reading files</H3> -<P> -To save the outputs into a file, pipe it to the <CODE>write_file = wf</CODE> command, -</P> -<PRE> - > gr -number=10 | linearize | write_file -file=exx.tmp -</PRE> -<P> -To read a file to GF, use the <CODE>read_file = rf</CODE> command, -</P> -<PRE> - > read_file -file=exx.tmp -lines | parse -</PRE> -<P> -The flag <CODE>-lines</CODE> tells GF to read each line of the file separately. -</P> -<P> -Files with examples can be used for <B>regression testing</B> -of grammars - the most systematic way to do this is by -<B>treebanks</B>; see <a href="#sectreebank">here</a>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc26"></A> -<H3>Visualizing trees</H3> -<P> -Parentheses give a linear representation of trees, -useful for the computer. -</P> -<P> -Human eye may prefer to see a visualization: <CODE>visualize_tree = vt</CODE>: -</P> -<PRE> - > parse "this delicious cheese is very Italian" | visualize_tree -</PRE> -<P> -The tree is generated in postscript (<CODE>.ps</CODE>) file. The <CODE>-view</CODE> option is used for -telling what command to use to view the file. Its default is <CODE>"open"</CODE>, which works -on Mac OS X. On Ubuntu Linux, one can write -</P> -<PRE> - > parse "this delicious cheese is very Italian" | visualize_tree -view="eog" -</PRE> -<P></P> -<P> -<IMG ALIGN="middle" SRC="mytree.png" BORDER="0" ALT=""> -</P> -<P> -This command uses the program <A HREF="http://www.graphviz.org/">Graphviz</A>, which you -might not have, but which are freely available on the web. -</P> -<P> -You can save the temporary file <CODE>_grph.dot</CODE>, -which the command <CODE>vt</CODE> produces. -</P> -<P> -Then you can process this file with the <CODE>dot</CODE> -program (from the Graphviz package). -</P> -<PRE> - % dot -Tpng _grph.dot > mytree.png -</PRE> -<P> -You can also visualize <B>parse trees</B>, which show categories and words instead of -function symbols. The command is <CODE>visualize_parse = vp</CODE>: -</P> -<PRE> - > parse "this delicious cheese is very Italian" | visualize_parse -</PRE> -<P></P> -<P> -<IMG ALIGN="middle" SRC="myparse.png" BORDER="0" ALT=""> -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc27"></A> -<H3>System commands</H3> -<P> -You can give a <B>system command</B> without leaving GF: -<CODE>!</CODE> followed by a Unix command, -</P> -<PRE> - > ! dot -Tpng grphtmp.dot > mytree.png - > ! open mytree.png -</PRE> -<P> -A system command may also receive its argument from -a GF pipes. It then uses the symbol <CODE>?</CODE>: -</P> -<PRE> - > generate_trees -depth=4 | ? wc -l -</PRE> -<P> -This command example returns the number of generated trees. -</P> -<P> -<B>Exercise</B>. -Measure how many trees the grammar <CODE>FoodEng</CODE> gives with depths 4 and 5, -respectively. Use the Unix <B>word count</B> command <CODE>wc</CODE> to count lines, and -a system pipe from a GF command into a Unix command. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc28"></A> -<H2>An Italian concrete syntax</H2> -<P> -<a name="secanitalian"></a> -</P> -<P> -Just (?) replace English words with their dictionary equivalents: -</P> -<PRE> - concrete FoodIta of Food = { - - lincat - Phrase, Item, Kind, Quality = {s : Str} ; - - lin - Is item quality = {s = item.s ++ "è" ++ quality.s} ; - This kind = {s = "questo" ++ kind.s} ; - That kind = {s = "quel" ++ kind.s} ; - QKind quality kind = {s = kind.s ++ quality.s} ; - Wine = {s = "vino"} ; - Cheese = {s = "formaggio"} ; - Fish = {s = "pesce"} ; - Very quality = {s = "molto" ++ quality.s} ; - Fresh = {s = "fresco"} ; - Warm = {s = "caldo"} ; - Italian = {s = "italiano"} ; - Expensive = {s = "caro"} ; - Delicious = {s = "delizioso"} ; - Boring = {s = "noioso"} ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -Not just replacing words: -</P> -<P> -The order of a quality and the kind it modifies is changed in -</P> -<PRE> - QKind quality kind = {s = kind.s ++ quality.s} ; -</PRE> -<P> -Thus Italian says <CODE>vino italiano</CODE> for <CODE>Italian wine</CODE>. -</P> -<P> -(Some Italian adjectives -are put before the noun. This distinction can be controlled by parameters, -which are introduced in <a href="#chapfour">Lesson 3</a>.) -</P> -<P> -Multilingual grammars have yet another visualization option: -<B>word alignment</B>, which shows what words correspond to each other. -Technically, this means words that have the same smallest spanning subtrees -in abstract syntax. The command is <CODE>align_words = aw</CODE>: -</P> -<PRE> - > parse "this delicious cheese is very Italian" | align_words -</PRE> -<P></P> -<P> -<IMG ALIGN="middle" SRC="align2.png" BORDER="0" ALT=""> -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc29"></A> -<H3>Exercises on multilinguality</H3> -<OL> -<LI>Write a concrete syntax of <CODE>Food</CODE> for some other language. -You will probably end up with grammatically incorrect -linearizations - but don't -worry about this yet. -<P></P> -<LI>If you have written <CODE>Food</CODE> for German, Swedish, or some -other language, test with random or exhaustive generation what constructs -come out incorrect, and prepare a list of those ones that cannot be helped -with the currently available fragment of GF. You can return to your list -after having worked out <a href="#chapfour">Lesson 3</a>. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc30"></A> -<H2>Free variation</H2> -<P> -Semantically indistinguishable ways of expressing a thing. -</P> -<P> -The <B>variants</B> construct of GF expresses free variation. For example, -</P> -<PRE> - lin Delicious = {s = "delicious" | "exquisit" | "tasty"} ; -</PRE> -<P> -By default, the <CODE>linearize</CODE> command -shows only the first variant from such lists; to see them -all, use the option <CODE>-all</CODE>: -</P> -<PRE> - > p "this exquisit wine is delicious" | l -all - this delicious wine is delicious - this delicious wine is exquisit - ... -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -An equivalent notation for variants is -</P> -<PRE> - lin Delicious = {s = variants {"delicious" ; "exquisit" ; "tasty"}} ; -</PRE> -<P> -This notation also allows the limiting case: an empty variant list, -</P> -<PRE> - variants {} -</PRE> -<P> -It can be used e.g. if a word lacks a certain inflection form. -</P> -<P> -Free variation works for all types in concrete syntax; all terms in -a variant list must be of the same type. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc31"></A> -<H2>More application of multilingual grammars</H2> -<A NAME="toc32"></A> -<H3>Multilingual treebanks</H3> -<P> -<a name="sectreebank"></a> -</P> -<P> -<B>Multilingual treebank</B>: a set of trees with their -linearizations in different languages: -</P> -<PRE> - > gr -number=2 | l -treebank - - Is (That Cheese) (Very Boring) - quel formaggio è molto noioso - that cheese is very boring - - Is (That Cheese) Fresh - quel formaggio è fresco - that cheese is fresh -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc33"></A> -<H3>Translation quiz</H3> -<P> -<CODE>translation_quiz = tq</CODE>: -generate random sentences, display them in one language, and check the user's -answer given in another language. -</P> -<PRE> - > translation_quiz -from=FoodEng -to=FoodIta - - Welcome to GF Translation Quiz. - The quiz is over when you have done at least 10 examples - with at least 75 % success. - You can interrupt the quiz by entering a line consisting of a dot ('.'). - - this fish is warm - questo pesce è caldo - > Yes. - Score 1/1 - - this cheese is Italian - questo formaggio è noioso - > No, not questo formaggio è noioso, but - questo formaggio è italiano - - Score 1/2 - this fish is expensive -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc34"></A> -<H2>Context-free grammars and GF</H2> -<A NAME="toc35"></A> -<H3>The "cf" grammar format</H3> -<P> -The grammar <CODE>FoodEng</CODE> can be written in a BNF format as follows: -</P> -<PRE> - Is. Phrase ::= Item "is" Quality ; - That. Item ::= "that" Kind ; - This. Item ::= "this" Kind ; - QKind. Kind ::= Quality Kind ; - Cheese. Kind ::= "cheese" ; - Fish. Kind ::= "fish" ; - Wine. Kind ::= "wine" ; - Italian. Quality ::= "Italian" ; - Boring. Quality ::= "boring" ; - Delicious. Quality ::= "delicious" ; - Expensive. Quality ::= "expensive" ; - Fresh. Quality ::= "fresh" ; - Very. Quality ::= "very" Quality ; - Warm. Quality ::= "warm" ; -</PRE> -<P> -GF can convert BNF grammars into GF. -BNF files are recognized by the file name suffix <CODE>.cf</CODE> (for <B>context-free</B>): -</P> -<PRE> - > import food.cf -</PRE> -<P> -The compiler creates separate abstract and concrete modules internally. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc36"></A> -<H3>Restrictions of context-free grammars</H3> -<P> -Separating concrete and abstract syntax allows -three deviations from context-free grammar: -</P> -<UL> -<LI><B>permutation</B>: changing the order of constituents -<LI><B>suppression</B>: omitting constituents -<LI><B>reduplication</B>: repeating constituents -</UL> - -<P> -<B>Exercise</B>. Define the non-context-free -copy language <CODE>{x x | x <- (a|b)*}</CODE> in GF. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc37"></A> -<H2>Modules and files</H2> -<P> -GF uses suffixes to recognize different file formats: -</P> -<UL> -<LI>Source files: <I>Modulename</I><CODE>.gf</CODE> -<LI>Target files: <I>Modulename</I><CODE>.gfo</CODE> -</UL> - -<P> -Importing generates target from source: -</P> -<PRE> - > i FoodEng.gf - - compiling Food.gf... wrote file Food.gfo 16 msec - - compiling FoodEng.gf... wrote file FoodEng.gfo 20 msec -</PRE> -<P> -The <CODE>.gfo</CODE> format (="GF Object") is precompiled GF, which is -faster to load than source GF (<CODE>.gf</CODE>). -</P> -<P> -When reading a module, GF decides whether -to use an existing <CODE>.gfo</CODE> file or to generate -a new one, by looking at modification times. -</P> -<P> -<!-- NEW --> -</P> -<P> -<B>Exercise</B>. What happens when you import <CODE>FoodEng.gf</CODE> for -a second time? Try this in different situations: -</P> -<UL> -<LI>Right after importing it the first time (the modules are kept in - the memory of GF and need no reloading). -<LI>After issuing the command <CODE>empty</CODE> (<CODE>e</CODE>), which clears the memory - of GF. -<LI>After making a small change in <CODE>FoodEng.gf</CODE>, be it only an added space. -<LI>After making a change in <CODE>Food.gf</CODE>. -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc38"></A> -<H2>Using operations and resource modules</H2> -<A NAME="toc39"></A> -<H3>Operation definitions</H3> -<P> -The golden rule of functional programmin: -</P> -<P> -<I>Whenever you find yourself programming by copy-and-paste, write a function instead.</I> -</P> -<P> -Functions in concrete syntax are defined using the keyword <CODE>oper</CODE> (for -<B>operation</B>), distinct from <CODE>fun</CODE> for the sake of clarity. -</P> -<P> -Example: -</P> -<PRE> - oper ss : Str -> {s : Str} = \x -> {s = x} ; -</PRE> -<P> -The operation can be <B>applied</B> to an argument, and GF will -<B>compute</B> the value: -</P> -<PRE> - ss "boy" ===> {s = "boy"} -</PRE> -<P> -The symbol <CODE>===></CODE> will be used for computation. -</P> -<P> -<!-- NEW --> -</P> -<P> -Notice the <B>lambda abstraction</B> form -</P> -<UL> -<LI><CODE>\</CODE><I>x</I> <CODE>-></CODE> <I>t</I> -</UL> - -<P> -This is read: -</P> -<UL> -<LI>function with variable <I>x</I> and <B>function body</B> <I>t</I> -</UL> - -<P> -For lambda abstraction with multiple arguments, we have the shorthand -</P> -<PRE> - \x,y -> t === \x -> \y -> t -</PRE> -<P> -Linearization rules actually use syntactic -sugar for abstraction: -</P> -<PRE> - lin f x = t === lin f = \x -> t -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc40"></A> -<H3>The ``resource`` module type</H3> -<P> -The <CODE>resource</CODE> module type is used to package -<CODE>oper</CODE> definitions into reusable resources. -</P> -<PRE> - resource StringOper = { - oper - SS : Type = {s : Str} ; - ss : Str -> SS = \x -> {s = x} ; - cc : SS -> SS -> SS = \x,y -> ss (x.s ++ y.s) ; - prefix : Str -> SS -> SS = \p,x -> ss (p ++ x.s) ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc41"></A> -<H3>Opening a resource</H3> -<P> -Any number of <CODE>resource</CODE> modules can be -<B>open</B>ed in a <CODE>concrete</CODE> syntax. -</P> -<PRE> - concrete FoodEng of Food = open StringOper in { - - lincat - S, Item, Kind, Quality = SS ; - - lin - Is item quality = cc item (prefix "is" quality) ; - This k = prefix "this" k ; - That k = prefix "that" k ; - QKind k q = cc k q ; - Wine = ss "wine" ; - Cheese = ss "cheese" ; - Fish = ss "fish" ; - Very = prefix "very" ; - Fresh = ss "fresh" ; - Warm = ss "warm" ; - Italian = ss "Italian" ; - Expensive = ss "expensive" ; - Delicious = ss "delicious" ; - Boring = ss "boring" ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc42"></A> -<H3>Partial application</H3> -<P> -<a name="secpartapp"></a> -</P> -<P> -The rule -</P> -<PRE> - lin This k = prefix "this" k ; -</PRE> -<P> -can be written more concisely -</P> -<PRE> - lin This = prefix "this" ; -</PRE> -<P> -Part of the art in functional programming: -decide the order of arguments in a function, -so that partial application can be used as much as possible. -</P> -<P> -For instance, <CODE>prefix</CODE> is typically applied to -linearization variables with constant strings. Hence we -put the <CODE>Str</CODE> argument before the <CODE>SS</CODE> argument. -</P> -<P> -<B>Exercise</B>. Define an operation <CODE>infix</CODE> analogous to <CODE>prefix</CODE>, -such that it allows you to write -</P> -<PRE> - lin Is = infix "is" ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc43"></A> -<H3>Testing resource modules</H3> -<P> -Import with the flag <CODE>-retain</CODE>, -</P> -<PRE> - > import -retain StringOper.gf -</PRE> -<P> -Compute the value with <CODE>compute_concrete = cc</CODE>, -</P> -<PRE> - > compute_concrete prefix "in" (ss "addition") - {s : Str = "in" ++ "addition"} -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc44"></A> -<H2>Grammar architecture</H2> -<P> -<a name="secarchitecture"></a> -</P> -<A NAME="toc45"></A> -<H3>Extending a grammar</H3> -<P> -A new module can <B>extend</B> an old one: -</P> -<PRE> - abstract Morefood = Food ** { - cat - Question ; - fun - QIs : Item -> Quality -> Question ; - Pizza : Kind ; - } -</PRE> -<P> -Parallel to the abstract syntax, extensions can -be built for concrete syntaxes: -</P> -<PRE> - concrete MorefoodEng of Morefood = FoodEng ** { - lincat - Question = {s : Str} ; - lin - QIs item quality = {s = "is" ++ item.s ++ quality.s} ; - Pizza = {s = "pizza"} ; - } -</PRE> -<P> -The effect of extension: all of the contents of the extended -and extending modules are put together. -</P> -<P> -In other words: the new module <B>inherits</B> the contents of the old module. -</P> -<P> -<!-- NEW --> -</P> -<P> -Simultaneous extension and opening: -</P> -<PRE> - concrete MorefoodIta of Morefood = FoodIta ** open StringOper in { - lincat - Question = SS ; - lin - QIs item quality = ss (item.s ++ "è" ++ quality.s) ; - Pizza = ss "pizza" ; - } -</PRE> -<P> -Resource modules can extend other resource modules - thus it is -possible to build resource hierarchies. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc46"></A> -<H3>Multiple inheritance</H3> -<P> -Extend several grammars at the same time: -</P> -<PRE> - abstract Foodmarket = Food, Fruit, Mushroom ** { - fun - FruitKind : Fruit -> Kind ; - MushroomKind : Mushroom -> Kind ; - } -</PRE> -<P> -where -</P> -<PRE> - abstract Fruit = { - cat Fruit ; - fun Apple, Peach : Fruit ; - } - - abstract Mushroom = { - cat Mushroom ; - fun Cep, Agaric : Mushroom ; - } -</PRE> -<P></P> -<P> -<B>Exercise</B>. Refactor <CODE>Food</CODE> by taking apart <CODE>Wine</CODE> into a special -<CODE>Drink</CODE> module. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc47"></A> -<H1>Lesson 3: Grammars with parameters</H1> -<P> -<a name="chapfour"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>implement sophisticated linguistic structures: - <UL> - <LI>morphology: the inflection of words - <LI>agreement: rules for selecting word forms in syntactic combinations - </UL> -</UL> - -<UL> -<LI>Cover all GF constructs for concrete syntax -</UL> - -<P> -It is possible to skip this chapter and go directly -to the next, since the use of the GF Resource Grammar library -makes it unnecessary to use parameters: they -could be left to library implementors. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc48"></A> -<H2>The problem: words have to be inflected</H2> -<P> -Plural forms are needed in things like -<center> -<I>these Italian wines are delicious</I> -</center> -This requires two things: -</P> -<UL> -<LI>the <B>inflection</B> of nouns and verbs in singular and plural -<LI>the <B>agreement</B> of the verb to subject: - the verb must have the same number as the subject -</UL> - -<P> -Different languages have different types of inflection and agreement. -</P> -<UL> -<LI>Italian has also gender (masculine vs. feminine). -</UL> - -<P> -In a multilingual grammar, -we want to ignore such distinctions in abstract syntax. -</P> -<P> -<B>Exercise</B>. Make a list of the possible forms that nouns, -adjectives, and verbs can have in some languages that you know. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc49"></A> -<H2>Parameters and tables</H2> -<P> -We define the <B>parameter type</B> of number in English by -a new form of judgement: -</P> -<PRE> - param Number = Sg | Pl ; -</PRE> -<P> -This judgement defines the parameter type <CODE>Number</CODE> by listing -its two <B>constructors</B>, <CODE>Sg</CODE> and <CODE>Pl</CODE> -(singular and plural). -</P> -<P> -We give <CODE>Kind</CODE> a linearization type that has a <B>table</B> depending on number: -</P> -<PRE> - lincat Kind = {s : Number => Str} ; -</PRE> -<P> -The <B>table type</B> <CODE>Number => Str</CODE> is similar a function type -(<CODE>Number -> Str</CODE>). -</P> -<P> -Difference: the argument must be a parameter type. Then -the argument-value pairs can be listed in a finite table. -</P> -<P> -<!-- NEW --> -</P> -<P> -Here is a table: -</P> -<PRE> - lin Cheese = { - s = table { - Sg => "cheese" ; - Pl => "cheeses" - } - } ; -</PRE> -<P> -The table has <B>branches</B>, with a <B>pattern</B> on the -left of the arrow <CODE>=></CODE> and a <B>value</B> on the right. -</P> -<P> -The application of a table is done by the <B>selection</B> operator <CODE>!</CODE>. -</P> -<P> -It which is computed by <B>pattern matching</B>: return -the value from the first branch whose pattern matches the -argument. For instance, -</P> -<PRE> - table {Sg => "cheese" ; Pl => "cheeses"} ! Pl - ===> "cheeses" -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -<B>Case expressions</B> are syntactic sugar: -</P> -<PRE> - case e of {...} === table {...} ! e -</PRE> -<P> -Since they are familiar to Haskell and ML programmers, they can come out handy -when writing GF programs. -</P> -<P> -<!-- NEW --> -</P> -<P> -Constructors can take arguments from other parameter types. -</P> -<P> -Example: forms of English verbs (except <I>be</I>): -</P> -<PRE> - param VerbForm = VPresent Number | VPast | VPastPart | VPresPart ; -</PRE> -<P> -Fact expressed: only present tense has number variation. -</P> -<P> -Example table: the forms of the verb <I>drink</I>: -</P> -<PRE> - table { - VPresent Sg => "drinks" ; - VPresent Pl => "drink" ; - VPast => "drank" ; - VPastPart => "drunk" ; - VPresPart => "drinking" - } -</PRE> -<P></P> -<P> -<B>Exercise</B>. In an earlier exercise (previous section), -you made a list of the possible -forms that nouns, adjectives, and verbs can have in some languages that -you know. Now take some of the results and implement them by -using parameter type definitions and tables. Write them into a <CODE>resource</CODE> -module, which you can test by using the command <CODE>compute_concrete</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc50"></A> -<H2>Inflection tables and paradigms</H2> -<P> -A morphological <B>paradigm</B> is a formula telling how a class of -words is inflected. -</P> -<P> -From the GF point of view, a paradigm is a function that takes -a <B>lemma</B> (also known as a <B>dictionary form</B>, or a <B>citation form</B>) and -returns an inflection table. -</P> -<P> -The following operation defines the regular noun paradigm of English: -</P> -<PRE> - oper regNoun : Str -> {s : Number => Str} = \dog -> { - s = table { - Sg => dog ; - Pl => dog + "s" - } - } ; -</PRE> -<P> -The <B>gluing</B> operator <CODE>+</CODE> glues strings to one <B>token</B>: -</P> -<PRE> - (regNoun "cheese").s ! Pl ===> "cheese" + "s" ===> "cheeses" -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -A more complex example: regular verbs, -</P> -<PRE> - oper regVerb : Str -> {s : VerbForm => Str} = \talk -> { - s = table { - VPresent Sg => talk + "s" ; - VPresent Pl => talk ; - VPresPart => talk + "ing" ; - _ => talk + "ed" - } - } ; -</PRE> -<P> -The catch-all case for the past tense and the past participle -uses a <B>wild card</B> pattern <CODE>_</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc51"></A> -<H3>Exercises on morphology</H3> -<OL> -<LI>Identify cases in which the <CODE>regNoun</CODE> paradigm does not -apply in English, and implement some alternative paradigms. -<P></P> -<LI>Implement some regular paradigms for other languages you have -considered in earlier exercises. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc52"></A> -<H2>Using parameters in concrete syntax</H2> -<P> -Purpose: a more radical -variation between languages -than just the use of different words and word orders. -</P> -<P> -We add to the grammar <CODE>Food</CODE> two rules for forming plural items: -</P> -<PRE> - fun These, Those : Kind -> Item ; -</PRE> -<P> -We also add a noun which in Italian has the feminine case: -</P> -<PRE> - fun Pizza : Kind ; -</PRE> -<P> -This will force us to deal with gender- -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc53"></A> -<H3>Agreement</H3> -<P> -In English, the phrase-forming rule -</P> -<PRE> - fun Is : Item -> Quality -> Phrase ; -</PRE> -<P> -is affected by the number because of <B>subject-verb agreement</B>: -the verb of a sentence must be inflected in the number of the subject, -</P> -<PRE> - Is (This Pizza) Warm ===> "this pizza is warm" - Is (These Pizza) Warm ===> "these pizzas are warm" -</PRE> -<P> -It is the <B>copula</B> (the verb <I>be</I>) that is affected: -</P> -<PRE> - oper copula : Number -> Str = \n -> - case n of { - Sg => "is" ; - Pl => "are" - } ; -</PRE> -<P> -The <B>subject</B> <CODE>Item</CODE> must have such a number to provide to the copula: -</P> -<PRE> - lincat Item = {s : Str ; n : Number} ; -</PRE> -<P> -Now we can write -</P> -<PRE> - lin Is item qual = {s = item.s ++ copula item.n ++ qual.s} ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc54"></A> -<H3>Determiners</H3> -<P> -How does an <CODE>Item</CODE> subject receive its number? The rules -</P> -<PRE> - fun This, These : Kind -> Item ; -</PRE> -<P> -add <B>determiners</B>, either <I>this</I> or <I>these</I>, which -require different <I>this pizza</I> vs. -<I>these pizzas</I>. -</P> -<P> -Thus <CODE>Kind</CODE> must have both singular and plural forms: -</P> -<PRE> - lincat Kind = {s : Number => Str} ; -</PRE> -<P> -We can write -</P> -<PRE> - lin This kind = { - s = "this" ++ kind.s ! Sg ; - n = Sg - } ; - - lin These kind = { - s = "these" ++ kind.s ! Pl ; - n = Pl - } ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -To avoid copy-and-paste, we can factor out the pattern of determination, -</P> -<PRE> - oper det : - Str -> Number -> {s : Number => Str} -> {s : Str ; n : Number} = - \det,n,kind -> { - s = det ++ kind.s ! n ; - n = n - } ; -</PRE> -<P> -Now we can write -</P> -<PRE> - lin This = det Sg "this" ; - lin These = det Pl "these" ; -</PRE> -<P> -In a more <B>lexicalized</B> grammar, determiners would be a category: -</P> -<PRE> - lincat Det = {s : Str ; n : Number} ; - fun Det : Det -> Kind -> Item ; - lin Det det kind = { - s = det.s ++ kind.s ! det.n ; - n = det.n - } ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc55"></A> -<H3>Parametric vs. inherent features</H3> -<P> -<CODE>Kind</CODE>s have number as a <B>parametric feature</B>: both singular and plural -can be formed, -</P> -<PRE> - lincat Kind = {s : Number => Str} ; -</PRE> -<P> -<CODE>Item</CODE>s have number as an <B>inherent feature</B>: they are inherently either -singular or plural, -</P> -<PRE> - lincat Item = {s : Str ; n : Number} ; -</PRE> -<P> -Italian <CODE>Kind</CODE> will have parametric number and inherent gender: -</P> -<PRE> - lincat Kind = {s : Number => Str ; g : Gender} ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -Questions to ask when designing parameters: -</P> -<UL> -<LI>existence: what forms are possible to build by morphological and - other means? -<LI>need: what features are expected via agreement or government? -</UL> - -<P> -Dictionaries give good advice: -<center> -<B>uomo</B>, pl. <I>uomini</I>, n.m. "man" -</center> -tells that <I>uomo</I> is a masculine noun with the plural form <I>uomini</I>. -Hence, parametric number and an inherent gender. -</P> -<P> -For words, inherent features are usually given as lexical information. -</P> -<P> -For combinations, they are <I>inherited</I> from some part of the construction -(typically the one called the <B>head</B>). Italian modification: -</P> -<PRE> - lin QKind qual kind = - let gen = kind.g in { - s = table {n => kind.s ! n ++ qual.s ! gen ! n} ; - g = gen - } ; -</PRE> -<P> -Notice -</P> -<UL> -<LI><B>local definition</B> (<CODE>let</CODE> expression) -<LI><B>variable pattern</B> <CODE>n</CODE> -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc56"></A> -<H2>An English concrete syntax for Foods with parameters</H2> -<P> -We use some string operations from the library <CODE>Prelude</CODE> are used. -</P> -<PRE> - concrete FoodsEng of Foods = open Prelude in { - - lincat - S, Quality = SS ; - Kind = {s : Number => Str} ; - Item = {s : Str ; n : Number} ; - - lin - Is item quality = ss (item.s ++ copula item.n ++ quality.s) ; - This = det Sg "this" ; - That = det Sg "that" ; - These = det Pl "these" ; - Those = det Pl "those" ; - QKind quality kind = {s = table {n => quality.s ++ kind.s ! n}} ; - Wine = regNoun "wine" ; - Cheese = regNoun "cheese" ; - Fish = noun "fish" "fish" ; - Pizza = regNoun "pizza" ; - Very = prefixSS "very" ; - Fresh = ss "fresh" ; - Warm = ss "warm" ; - Italian = ss "Italian" ; - Expensive = ss "expensive" ; - Delicious = ss "delicious" ; - Boring = ss "boring" ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<PRE> - param - Number = Sg | Pl ; - - oper - det : Number -> Str -> {s : Number => Str} -> {s : Str ; n : Number} = - \n,d,cn -> { - s = d ++ cn.s ! n ; - n = n - } ; - noun : Str -> Str -> {s : Number => Str} = - \man,men -> {s = table { - Sg => man ; - Pl => men - } - } ; - regNoun : Str -> {s : Number => Str} = - \car -> noun car (car + "s") ; - copula : Number -> Str = - \n -> case n of { - Sg => "is" ; - Pl => "are" - } ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc57"></A> -<H2>More on inflection paradigms</H2> -<P> -<a name="secinflection"></a> -</P> -<P> -Let us extend the English noun paradigms so that we can -deal with all nouns, not just the regular ones. The goal is to -provide a morphology module that makes it easy to -add words to a lexicon. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc58"></A> -<H3>Worst-case functions</H3> -<P> -We perform <B>data abstraction</B> from the type -of nouns by writing a a <B>worst-case function</B>: -</P> -<PRE> - oper Noun : Type = {s : Number => Str} ; - - oper mkNoun : Str -> Str -> Noun = \x,y -> { - s = table { - Sg => x ; - Pl => y - } - } ; - - oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ; -</PRE> -<P> -Then we can define -</P> -<PRE> - lincat N = Noun ; - lin Mouse = mkNoun "mouse" "mice" ; - lin House = regNoun "house" ; -</PRE> -<P> -where the underlying types are not seen. -</P> -<P> -<!-- NEW --> -</P> -<P> -We are free to change the undelying definitions, e.g. -add <B>case</B> (nominative or genitive) to noun inflection: -</P> -<PRE> - param Case = Nom | Gen ; - - oper Noun : Type = {s : Number => Case => Str} ; -</PRE> -<P> -Now we have to redefine the worst-case function -</P> -<PRE> - oper mkNoun : Str -> Str -> Noun = \x,y -> { - s = table { - Sg => table { - Nom => x ; - Gen => x + "'s" - } ; - Pl => table { - Nom => y ; - Gen => y + case last y of { - "s" => "'" ; - _ => "'s" - } - } - } ; -</PRE> -<P> -But up from this level, we can retain the old definitions -</P> -<PRE> - lin Mouse = mkNoun "mouse" "mice" ; - oper regNoun : Str -> Noun = \x -> mkNoun x (x + "s") ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -In the last definition of <CODE>mkNoun</CODE>, we used a case expression -on the last character of the plural, as well as the <CODE>Prelude</CODE> -operation -</P> -<PRE> - last : Str -> Str ; -</PRE> -<P> -returning the string consisting of the last character. -</P> -<P> -The case expression uses <B>pattern matching over strings</B>, which -is supported in GF, alongside with pattern matching over -parameters. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc59"></A> -<H3>Smart paradigms</H3> -<P> -The regular <I>dog</I>-<I>dogs</I> paradigm has -predictable variations: -</P> -<UL> -<LI>nouns ending with an <I>y</I>: <I>fly</I>-<I>flies</I>, except if - a vowel precedes the <I>y</I>: <I>boy</I>-<I>boys</I> -<LI>nouns ending with <I>s</I>, <I>ch</I>, and a number of - other endings: <I>bus</I>-<I>buses</I>, <I>leech</I>-<I>leeches</I> -</UL> - -<P> -We could provide alternative paradigms: -</P> -<PRE> - noun_y : Str -> Noun = \fly -> mkNoun fly (init fly + "ies") ; - noun_s : Str -> Noun = \bus -> mkNoun bus (bus + "es") ; -</PRE> -<P> -(The Prelude function <CODE>init</CODE> drops the last character of a token.) -</P> -<P> -Drawbacks: -</P> -<UL> -<LI>it can be difficult to select the correct paradigm -<LI>it can be difficult to remember the names of the different paradigms -</UL> - -<P> -<!-- NEW --> -</P> -<P> -Better solution: a <B>smart paradigm</B>: -</P> -<PRE> - regNoun : Str -> Noun = \w -> - let - ws : Str = case w of { - _ + ("a" | "e" | "i" | "o") + "o" => w + "s" ; -- bamboo - _ + ("s" | "x" | "sh" | "o") => w + "es" ; -- bus, hero - _ + "z" => w + "zes" ;-- quiz - _ + ("a" | "e" | "o" | "u") + "y" => w + "s" ; -- boy - x + "y" => x + "ies" ;-- fly - _ => w + "s" -- car - } - in - mkNoun w ws -</PRE> -<P> -GF has <B>regular expression patterns</B>: -</P> -<UL> -<LI><B>disjunctive patterns</B> <I>P</I> <CODE>|</CODE> <I>Q</I> -<LI><B>concatenation patterns</B> <I>P</I> <CODE>+</CODE> <I>Q</I> -</UL> - -<P> -The patterns are ordered in such a way that, for instance, -the suffix <CODE>"oo"</CODE> prevents <I>bamboo</I> from matching the suffix -<CODE>"o"</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc60"></A> -<H3>Exercises on regular patterns</H3> -<OL> -<LI>The same rules that form plural nouns in English also -apply in the formation of third-person singular verbs. -Write a regular verb paradigm that uses this idea, but first -rewrite <CODE>regNoun</CODE> so that the analysis needed to build <I>s</I>-forms -is factored out as a separate <CODE>oper</CODE>, which is shared with -<CODE>regVerb</CODE>. -<P></P> -<LI>Extend the verb paradigms to cover all verb forms -in English, with special care taken of variations with the suffix -<I>ed</I> (e.g. <I>try</I>-<I>tried</I>, <I>use</I>-<I>used</I>). -<P></P> -<LI>Implement the German <B>Umlaut</B> operation on word stems. -The operation changes the vowel of the stressed stem syllable as follows: -<I>a</I> to <I>ä</I>, <I>au</I> to <I>äu</I>, <I>o</I> to <I>ö</I>, and <I>u</I> to <I>ü</I>. You -can assume that the operation only takes syllables as arguments. Test the -operation to see whether it correctly changes <I>Arzt</I> to <I>Ärzt</I>, -<I>Baum</I> to <I>Bäum</I>, <I>Topf</I> to <I>Töpf</I>, and <I>Kuh</I> to <I>Küh</I>. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc61"></A> -<H3>Function types with variables</H3> -<P> -In <a href="#chapsix">Lesson 5</a>, <B>dependent function types</B> need a notation -that binds a variable to the argument type, as in -</P> -<PRE> - switchOff : (k : Kind) -> Action k -</PRE> -<P> -Function types <I>without</I> variables are actually a shorthand: -</P> -<PRE> - PredVP : NP -> VP -> S -</PRE> -<P> -means -</P> -<PRE> - PredVP : (x : NP) -> (y : VP) -> S -</PRE> -<P> -or any other naming of the variables. -</P> -<P> -<!-- NEW --> -</P> -<P> -Sometimes variables shorten the code, since they can share a type: -</P> -<PRE> - octuple : (x,y,z,u,v,w,s,t : Str) -> Str -</PRE> -<P> -If a bound variable is not used, it can be replaced by a wildcard: -</P> -<PRE> - octuple : (_,_,_,_,_,_,_,_ : Str) -> Str -</PRE> -<P> -A good practice is to indicate the number of arguments: -</P> -<PRE> - octuple : (x1,_,_,_,_,_,_,x8 : Str) -> Str -</PRE> -<P> -For inflection paradigms, it is handy to use heuristic variable names, -looking like the expected forms: -</P> -<PRE> - mkNoun : (mouse,mice : Str) -> Noun -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc62"></A> -<H3>Separating operation types and definitions</H3> -<P> -In librarues, it is useful to group type signatures separately from -definitions. It is possible to divide an <CODE>oper</CODE> judgement, -</P> -<PRE> - oper regNoun : Str -> Noun ; - oper regNoun s = mkNoun s (s + "s") ; -</PRE> -<P> -and put the parts in different places. -</P> -<P> -With the <CODE>interface</CODE> and <CODE>instance</CODE> module types -(see <a href="#secinterface">here</a>): the parts can even be put to different files. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc63"></A> -<H3>Overloading of operations</H3> -<P> -<B>Overloading</B>: different functions can be given the same name, as e.g. in C++. -</P> -<P> -The compiler performs <B>overload resolution</B>, which works as long as the -functions have different types. -</P> -<P> -In GF, the functions must be grouped together in <CODE>overload</CODE> groups. -</P> -<P> -Example: different ways to define nouns in English: -</P> -<PRE> - oper mkN : overload { - mkN : (dog : Str) -> Noun ; -- regular nouns - mkN : (mouse,mice : Str) -> Noun ; -- irregular nouns - } -</PRE> -<P> -Cf. dictionaries: if the -word is regular, just one form is needed. If it is irregular, -more forms are given. -</P> -<P> -The definition can be given separately, or at the same time, as the types: -</P> -<PRE> - oper mkN = overload { - mkN : (dog : Str) -> Noun = regNoun ; - mkN : (mouse,mice : Str) -> Noun = mkNoun ; - } -</PRE> -<P> -<B>Exercise</B>. Design a system of English verb paradigms presented by -an overload group. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc64"></A> -<H3>Morphological analysis and morphology quiz</H3> -<P> -The command <CODE>morpho_analyse = ma</CODE> -can be used to read a text and return for each word its analyses -(in the current grammar): -</P> -<PRE> - > read_file bible.txt | morpho_analyse -</PRE> -<P> -The command <CODE>morpho_quiz = mq</CODE> generates inflection exercises. -</P> -<PRE> - % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo - - > morpho_quiz -cat=V - - Welcome to GF Morphology Quiz. - ... - - réapparaître : VFin VCondit Pl P2 - réapparaitriez - > No, not réapparaitriez, but - réapparaîtriez - Score 0/1 -</PRE> -<P> -To create a list for later use, use the command <CODE>morpho_list = ml</CODE> -</P> -<PRE> - > morpho_list -number=25 -cat=V | write_file exx.txt -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc65"></A> -<H2>The Italian Foods grammar</H2> -<P> -<a name="secitalian"></a> -</P> -<P> -Parameters include not only number but also gender. -</P> -<PRE> - concrete FoodsIta of Foods = open Prelude in { - - param - Number = Sg | Pl ; - Gender = Masc | Fem ; -</PRE> -<P> -Qualities are inflected for gender and number, whereas kinds -have a parametric number and an inherent gender. -Items have an inherent number and gender. -</P> -<PRE> - lincat - Phr = SS ; - Quality = {s : Gender => Number => Str} ; - Kind = {s : Number => Str ; g : Gender} ; - Item = {s : Str ; g : Gender ; n : Number} ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -A Quality is an adjective, with one form for each gender-number combination. -</P> -<PRE> - oper - adjective : (_,_,_,_ : Str) -> {s : Gender => Number => Str} = - \nero,nera,neri,nere -> { - s = table { - Masc => table { - Sg => nero ; - Pl => neri - } ; - Fem => table { - Sg => nera ; - Pl => nere - } - } - } ; -</PRE> -<P> -Regular adjectives work by adding endings to the stem. -</P> -<PRE> - regAdj : Str -> {s : Gender => Number => Str} = \nero -> - let ner = init nero - in adjective nero (ner + "a") (ner + "i") (ner + "e") ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -For noun inflection, we are happy to give the two forms and the gender -explicitly: -</P> -<PRE> - noun : Str -> Str -> Gender -> {s : Number => Str ; g : Gender} = - \vino,vini,g -> { - s = table { - Sg => vino ; - Pl => vini - } ; - g = g - } ; -</PRE> -<P> -We need only number variation for the copula. -</P> -<PRE> - copula : Number -> Str = - \n -> case n of { - Sg => "è" ; - Pl => "sono" - } ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -Determination is more complex than in English, because of gender: -</P> -<PRE> - det : Number -> Str -> Str -> {s : Number => Str ; g : Gender} -> - {s : Str ; g : Gender ; n : Number} = - \n,m,f,cn -> { - s = case cn.g of {Masc => m ; Fem => f} ++ cn.s ! n ; - g = cn.g ; - n = n - } ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<P> -The complete set of linearization rules: -</P> -<PRE> - lin - Is item quality = - ss (item.s ++ copula item.n ++ quality.s ! item.g ! item.n) ; - This = det Sg "questo" "questa" ; - That = det Sg "quel" "quella" ; - These = det Pl "questi" "queste" ; - Those = det Pl "quei" "quelle" ; - QKind quality kind = { - s = \\n => kind.s ! n ++ quality.s ! kind.g ! n ; - g = kind.g - } ; - Wine = noun "vino" "vini" Masc ; - Cheese = noun "formaggio" "formaggi" Masc ; - Fish = noun "pesce" "pesci" Masc ; - Pizza = noun "pizza" "pizze" Fem ; - Very qual = {s = \\g,n => "molto" ++ qual.s ! g ! n} ; - Fresh = adjective "fresco" "fresca" "freschi" "fresche" ; - Warm = regAdj "caldo" ; - Italian = regAdj "italiano" ; - Expensive = regAdj "caro" ; - Delicious = regAdj "delizioso" ; - Boring = regAdj "noioso" ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc66"></A> -<H3>Exercises on using parameters</H3> -<OL> -<LI>Experiment with multilingual generation and translation in the -<CODE>Foods</CODE> grammars. -<P></P> -<LI>Add items, qualities, and determiners to the grammar, -and try to get their inflection and inherent features right. -<P></P> -<LI>Write a concrete syntax of <CODE>Food</CODE> for a language of your choice, -now aiming for complete grammatical correctness by the use of parameters. -<P></P> -<LI>Measure the size of the context-free grammar corresponding to -<CODE>FoodsIta</CODE>. You can do this by printing the grammar in the context-free format -(<CODE>print_grammar -printer=bnf</CODE>) and counting the lines. -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc67"></A> -<H2>Discontinuous constituents</H2> -<P> -A linearization record may contain more strings than one, and those -strings can be put apart in linearization. -</P> -<P> -Example: English particle -verbs, (<I>switch off</I>). The object can appear between: -</P> -<P> -<I>he switched it off</I> -</P> -<P> -The verb <I>switch off</I> is called a -<B>discontinuous constituents</B>. -</P> -<P> -We can define transitive verbs and their combinations as follows: -</P> -<PRE> - lincat TV = {s : Number => Str ; part : Str} ; - - fun AppTV : Item -> TV -> Item -> Phrase ; - - lin AppTV subj tv obj = - {s = subj.s ++ tv.s ! subj.n ++ obj.s ++ tv.part} ; -</PRE> -<P></P> -<P> -<B>Exercise</B>. Define the language <CODE>a^n b^n c^n</CODE> in GF, i.e. -any number of <I>a</I>'s followed by the same number of <I>b</I>'s and -the same number of <I>c</I>'s. This language is not context-free, -but can be defined in GF by using discontinuous constituents. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc68"></A> -<H2>Strings at compile time vs. run time</H2> -<P> -Tokens are created in the following ways: -</P> -<UL> -<LI>quoted string: <CODE>"foo"</CODE> -<LI>gluing : <CODE>t + s</CODE> -<LI>predefined operations <CODE>init, tail, tk, dp</CODE> -<LI>pattern matching over strings -</UL> - -<P> -Since <I>tokens must be known at compile time</I>, -the above operations may not be applied to <B>run-time variables</B> -(i.e. variables that stand for function arguments in linearization rules). -</P> -<P> -Hence it is not legal to write -</P> -<PRE> - cat Noun ; - fun Plural : Noun -> Noun ; - lin Plural n = {s = n.s + "s"} ; -</PRE> -<P> -because <CODE>n</CODE> is a run-time variable. Also -</P> -<PRE> - lin Plural n = {s = (regNoun n).s ! Pl} ; -</PRE> -<P> -is incorrect with <CODE>regNoun</CODE> as defined <a href="#secinflection">here</a>, because the run-time -variable is eventually sent to string pattern matching and gluing. -</P> -<P> -<!-- NEW --> -</P> -<P> -How to write tokens together without a space? -</P> -<PRE> - lin Question p = {s = p + "?"} ; -</PRE> -<P> -is incorrect. -</P> -<P> -The way to go is to use an <B>unlexer</B> that creates correct spacing -after linearization. -</P> -<P> -Correspondingly, a <B>lexer</B> that e.g. analyses <CODE>"warm?"</CODE> into -to tokens is needed before parsing. -This topic will be covered in <a href="#seclexing">here</a>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc69"></A> -<H3>Supplementary constructs for concrete syntax</H3> -<H4>Record extension and subtyping</H4> -<P> -The symbol <CODE>**</CODE> is used for both record types and record objects. -</P> -<PRE> - lincat TV = Verb ** {c : Case} ; - - lin Follow = regVerb "folgen" ** {c = Dative} ; -</PRE> -<P> -<CODE>TV</CODE> becomes a <B>subtype</B> of <CODE>Verb</CODE>. -</P> -<P> -If <I>T</I> is a subtype of <I>R</I>, an object of <I>T</I> can be used whenever -an object of <I>R</I> is required. -</P> -<P> -<B>Covariance</B>: a function returning a record <I>T</I> as value can -also be used to return a value of a supertype <I>R</I>. -</P> -<P> -<B>Contravariance</B>: a function taking an <I>R</I> as argument -can also be applied to any object of a subtype <I>T</I>. -</P> -<P> -<!-- NEW --> -</P> -<H4>Tuples and product types</H4> -<P> -Product types and tuples are syntactic sugar for record types and records: -</P> -<PRE> - T1 * ... * Tn === {p1 : T1 ; ... ; pn : Tn} - <t1, ..., tn> === {p1 = T1 ; ... ; pn = Tn} -</PRE> -<P> -Thus the labels <CODE>p1, p2,...</CODE> are hard-coded. -</P> -<P> -<!-- NEW --> -</P> -<H4>Prefix-dependent choices</H4> -<P> -English indefinite article: -</P> -<PRE> - oper artIndef : Str = - pre {"a" ; "an" / strs {"a" ; "e" ; "i" ; "o"}} ; -</PRE> -<P> -Thus -</P> -<PRE> - artIndef ++ "cheese" ---> "a" ++ "cheese" - artIndef ++ "apple" ---> "an" ++ "apple" -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc70"></A> -<H1>Lesson 4: Using the resource grammar library</H1> -<P> -<a name="chapfive"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>navigate in the GF resource grammar library and use it in applications -<LI>get acquainted with basic linguistic categories -<LI>write functors to achieve maximal sharing of code in multilingual grammars -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc71"></A> -<H2>The coverage of the library</H2> -<P> -The current 16 resource languages (GF version 3.2, December 2010) are -</P> -<UL> -<LI><CODE>Bul</CODE>garian -<LI><CODE>Cat</CODE>alan -<LI><CODE>Dan</CODE>ish -<LI><CODE>Dut</CODE>ch -<LI><CODE>Eng</CODE>lish -<LI><CODE>Fin</CODE>nish -<LI><CODE>Fre</CODE>nch -<LI><CODE>Ger</CODE>man -<LI><CODE>Ita</CODE>lian -<LI><CODE>Nor</CODE>wegian -<LI><CODE>Pol</CODE>ish -<LI><CODE>Ron</CODE>, Romanian -<LI><CODE>Rus</CODE>sian -<LI><CODE>Spa</CODE>nish -<LI><CODE>Swe</CODE>dish -<LI><CODE>Urd</CODE>u -</UL> - -<P> -The first three letters (<CODE>Eng</CODE> etc) are used in grammar module names -(ISO 639-3 standard). -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc72"></A> -<H2>The structure of the library</H2> -<P> -<a name="seclexical"></a> -</P> -<P> -Semantic grammars (up to now in this tutorial): -a grammar defines a system of meanings (abstract syntax) and -tells how they are expressed(concrete syntax). -</P> -<P> -Resource grammars (as usual in linguistic tradition): -a grammar specifies the <B>grammatically correct combinations of words</B>, -whatever their meanings are. -</P> -<P> -With resource grammars, we can achieve a -wider coverage than with semantic grammars. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc73"></A> -<H3>Lexical vs. phrasal rules</H3> -<P> -A resource grammar has two kinds of categories and two kinds of rules: -</P> -<UL> -<LI>lexical: - <UL> - <LI>lexical categories, to classify words - <LI>lexical rules, to define words and their properties - <P></P> - </UL> -<LI>phrasal (combinatorial, syntactic): - <UL> - <LI>phrasal categories, to classify phrases of arbitrary size - <LI>phrasal rules, to combine phrases into larger phrases - </UL> -</UL> - -<P> -GE makes no formal distinction between these two kinds. -</P> -<P> -But it is a good discipline to follow. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc74"></A> -<H3>Lexical categories</H3> -<P> -Two kinds of lexical categories: -</P> -<UL> -<LI><B>closed</B>: - <UL> - <LI>a finite number of words - <LI>seldom extended in the history of language - <LI>structural words / function words, e.g. -<PRE> - Conj ; -- conjunction e.g. "and" - Det ; -- determiner e.g. "this" -</PRE> - <P></P> - </UL> -<LI><B>open</B>: - <UL> - <LI>new words are added all the time - <LI>content words, e.g. -<PRE> - N ; -- noun e.g. "pizza" - A ; -- adjective e.g. "good" - V ; -- verb e.g. "sleep" -</PRE> - </UL> -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc75"></A> -<H3>Lexical rules</H3> -<P> -Closed classes: module <CODE>Syntax</CODE>. In the <CODE>Foods</CODE> grammar, we need -</P> -<PRE> - this_Det, that_Det, these_Det, those_Det : Det ; - very_AdA : AdA ; -</PRE> -<P> -Naming convention: word followed by the category (so we can -distinguish the quantifier <I>that</I> from the conjunction <I>that</I>). -</P> -<P> -Open classes have no objects in <CODE>Syntax</CODE>. Words are -built as they are needed in applications: if we have -</P> -<PRE> - fun Wine : Kind ; -</PRE> -<P> -we will define -</P> -<PRE> - lin Wine = mkN "wine" ; -</PRE> -<P> -where we use <CODE>mkN</CODE> from <CODE>ParadigmsEng</CODE>: -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc76"></A> -<H3>Resource lexicon</H3> -<P> -Alternative concrete syntax for -</P> -<PRE> - fun Wine : Kind ; -</PRE> -<P> -is to provide a <B>resource lexicon</B>, which contains definitions such as -</P> -<PRE> - oper wine_N : N = mkN "wine" ; -</PRE> -<P> -so that we can write -</P> -<PRE> - lin Wine = wine_N ; -</PRE> -<P> -Advantages: -</P> -<UL> -<LI>we accumulate a reusable lexicon -<LI>we can use a <a href="#secfunctor">here</a> to speed up multilingual grammar implementation -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc77"></A> -<H3>Phrasal categories</H3> -<P> -In <CODE>Foods</CODE>, we need just four phrasal categories: -</P> -<PRE> - Cl ; -- clause e.g. "this pizza is good" - NP ; -- noun phrase e.g. "this pizza" - CN ; -- common noun e.g. "warm pizza" - AP ; -- adjectival phrase e.g. "very warm" -</PRE> -<P> -Clauses are similar to sentences (<CODE>S</CODE>), but without a -fixed tense and mood; see <a href="#secextended">here</a> for how they relate. -</P> -<P> -Common nouns are made into noun phrases by adding determiners. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc78"></A> -<H3>Syntactic combinations</H3> -<P> -We need the following combinations: -</P> -<PRE> - mkCl : NP -> AP -> Cl ; -- e.g. "this pizza is very warm" - mkNP : Det -> CN -> NP ; -- e.g. "this pizza" - mkCN : AP -> CN -> CN ; -- e.g. "warm pizza" - mkAP : AdA -> AP -> AP ; -- e.g. "very warm" -</PRE> -<P> -We also need <B>lexical insertion</B>, to form phrases from single words: -</P> -<PRE> - mkCN : N -> NP ; - mkAP : A -> AP ; -</PRE> -<P> -Naming convention: to construct a <I>C</I>, use a function <CODE>mk</CODE><I>C</I>. -</P> -<P> -Heavy overloading: the current library -(version 1.2) has 23 operations named <CODE>mkNP</CODE>! -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc79"></A> -<H3>Example syntactic combination</H3> -<P> -The sentence -<center> -<I>these very warm pizzas are Italian</I> -</center> -can be built as follows: -</P> -<PRE> - mkCl - (mkNP these_Det - (mkCN (mkAP very_AdA (mkAP warm_A)) (mkCN pizza_CN))) - (mkAP italian_AP) -</PRE> -<P> -The task now: to define the concrete syntax of <CODE>Foods</CODE> so that -this syntactic tree gives the value of linearizing the semantic tree -</P> -<PRE> - Is (These (QKind (Very Warm) Pizza)) Italian -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc80"></A> -<H2>The resource API</H2> -<P> -Language-specific and language-independent parts - roughly, -</P> -<UL> -<LI>the syntax API <CODE>Syntax</CODE><I>L</I> has the same types and - functions for all languages <I>L</I> -<LI>the morphology API <CODE>Paradigms</CODE><I>L</I> has partly - different types and functions - for different languages <I>L</I> -</UL> - -<P> -Full API documentation on-line: the <B>resource synopsis</B>, -</P> -<P> -<A HREF="http://grammaticalframework.org/lib/doc/synopsis.html"><CODE>grammaticalframework.org/lib/doc/synopsis.html</CODE></A> -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc81"></A> -<H3>A miniature resource API: categories</H3> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Category</TH> -<TH>Explanation</TH> -<TH COLSPAN="2">Example</TH> -</TR> -<TR> -<TD><CODE>Cl</CODE></TD> -<TD>clause (sentence), with all tenses</TD> -<TD><I>she looks at this</I></TD> -</TR> -<TR> -<TD><CODE>AP</CODE></TD> -<TD>adjectival phrase</TD> -<TD><I>very warm</I></TD> -</TR> -<TR> -<TD><CODE>CN</CODE></TD> -<TD>common noun (without determiner)</TD> -<TD><I>red house</I></TD> -</TR> -<TR> -<TD><CODE>NP</CODE></TD> -<TD>noun phrase (subject or object)</TD> -<TD><I>the red house</I></TD> -</TR> -<TR> -<TD><CODE>AdA</CODE></TD> -<TD>adjective-modifying adverb,</TD> -<TD><I>very</I></TD> -</TR> -<TR> -<TD><CODE>Det</CODE></TD> -<TD>determiner</TD> -<TD><I>these</I></TD> -</TR> -<TR> -<TD><CODE>A</CODE></TD> -<TD>one-place adjective</TD> -<TD><I>warm</I></TD> -</TR> -<TR> -<TD><CODE>N</CODE></TD> -<TD>common noun</TD> -<TD><I>house</I></TD> -</TR> -</TABLE> - -<P> -<!-- NEW --> -</P> -<A NAME="toc82"></A> -<H3>A miniature resource API: rules</H3> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Function</TH> -<TH>Type</TH> -<TH COLSPAN="2">Example</TH> -</TR> -<TR> -<TD><CODE>mkCl</CODE></TD> -<TD><CODE>NP -> AP -> Cl</CODE></TD> -<TD><I>John is very old</I></TD> -</TR> -<TR> -<TD><CODE>mkNP</CODE></TD> -<TD><CODE>Det -> CN -> NP</CODE></TD> -<TD><I>these old man</I></TD> -</TR> -<TR> -<TD><CODE>mkCN</CODE></TD> -<TD><CODE>N -> CN</CODE></TD> -<TD><I>house</I></TD> -</TR> -<TR> -<TD><CODE>mkCN</CODE></TD> -<TD><CODE>AP -> CN -> CN</CODE></TD> -<TD><I>very big blue house</I></TD> -</TR> -<TR> -<TD><CODE>mkAP</CODE></TD> -<TD><CODE>A -> AP</CODE></TD> -<TD><I>old</I></TD> -</TR> -<TR> -<TD><CODE>mkAP</CODE></TD> -<TD><CODE>AdA -> AP -> AP</CODE></TD> -<TD><I>very very old</I></TD> -</TR> -</TABLE> - -<P> -<!-- NEW --> -</P> -<A NAME="toc83"></A> -<H3>A miniature resource API: structural words</H3> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Function</TH> -<TH>Type</TH> -<TH COLSPAN="2">In English</TH> -</TR> -<TR> -<TD><CODE>this_Det</CODE></TD> -<TD><CODE>Det</CODE></TD> -<TD><I>this</I></TD> -</TR> -<TR> -<TD><CODE>that_Det</CODE></TD> -<TD><CODE>Det</CODE></TD> -<TD><I>that</I></TD> -</TR> -<TR> -<TD><CODE>these_Det</CODE></TD> -<TD><CODE>Det</CODE></TD> -<TD><I>this</I></TD> -</TR> -<TR> -<TD><CODE>those_Det</CODE></TD> -<TD><CODE>Det</CODE></TD> -<TD><I>that</I></TD> -</TR> -<TR> -<TD><CODE>very_AdA</CODE></TD> -<TD><CODE>AdA</CODE></TD> -<TD><I>very</I></TD> -</TR> -</TABLE> - -<P> -<!-- NEW --> -</P> -<A NAME="toc84"></A> -<H3>A miniature resource API: paradigms</H3> -<P> -From <CODE>ParadigmsEng</CODE>: -</P> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Function</TH> -<TH COLSPAN="2">Type</TH> -</TR> -<TR> -<TD><CODE>mkN</CODE></TD> -<TD><CODE>(dog : Str) -> N</CODE></TD> -</TR> -<TR> -<TD><CODE>mkN</CODE></TD> -<TD><CODE>(man,men : Str) -> N</CODE></TD> -</TR> -<TR> -<TD><CODE>mkA</CODE></TD> -<TD><CODE>(cold : Str) -> A</CODE></TD> -</TR> -</TABLE> - -<P> -From <CODE>ParadigmsIta</CODE>: -</P> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Function</TH> -<TH COLSPAN="2">Type</TH> -</TR> -<TR> -<TD><CODE>mkN</CODE></TD> -<TD><CODE>(vino : Str) -> N</CODE></TD> -</TR> -<TR> -<TD><CODE>mkA</CODE></TD> -<TD><CODE>(caro : Str) -> A</CODE></TD> -</TR> -</TABLE> - -<P> -<!-- NEW --> -</P> -<A NAME="toc85"></A> -<H3>A miniature resource API: more paradigms</H3> -<P> -From <CODE>ParadigmsGer</CODE>: -</P> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Function</TH> -<TH COLSPAN="2">Type</TH> -</TR> -<TR> -<TD><CODE>Gender</CODE></TD> -<TD><CODE>Type</CODE></TD> -</TR> -<TR> -<TD><CODE>masculine</CODE></TD> -<TD><CODE>Gender</CODE></TD> -</TR> -<TR> -<TD><CODE>feminine</CODE></TD> -<TD><CODE>Gender</CODE></TD> -</TR> -<TR> -<TD><CODE>neuter</CODE></TD> -<TD><CODE>Gender</CODE></TD> -</TR> -<TR> -<TD><CODE>mkN</CODE></TD> -<TD><CODE>(Stufe : Str) -> N</CODE></TD> -</TR> -<TR> -<TD><CODE>mkN</CODE></TD> -<TD><CODE>(Bild,Bilder : Str) -> Gender -> N</CODE></TD> -</TR> -<TR> -<TD><CODE>mkA</CODE></TD> -<TD><CODE>(klein : Str) -> A</CODE></TD> -</TR> -<TR> -<TD><CODE>mkA</CODE></TD> -<TD><CODE>(gut,besser,beste : Str) -> A</CODE></TD> -</TR> -</TABLE> - -<P> -From <CODE>ParadigmsFin</CODE>: -</P> -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TH>Function</TH> -<TH COLSPAN="2">Type</TH> -</TR> -<TR> -<TD><CODE>mkN</CODE></TD> -<TD><CODE>(talo : Str) -> N</CODE></TD> -</TR> -<TR> -<TD><CODE>mkA</CODE></TD> -<TD><CODE>(hieno : Str) -> A</CODE></TD> -</TR> -</TABLE> - -<P> -<!-- NEW --> -</P> -<A NAME="toc86"></A> -<H3>Exercises</H3> -<P> -1. Try out the morphological paradigms in different languages. Do -as follows: -</P> -<PRE> - > i -path=alltenses -retain alltenses/ParadigmsGer.gfo - > cc -table mkN "Farbe" - > cc -table mkA "gut" "besser" "beste" -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc87"></A> -<H2>Example: English</H2> -<P> -<a name="secenglish"></a> -</P> -<P> -We assume the abstract syntax <CODE>Foods</CODE> from <a href="#chapfour">Lesson 3</a>. -</P> -<P> -We don't need to think about inflection and agreement, but just pick -functions from the resource grammar library. -</P> -<P> -We need a path with -</P> -<UL> -<LI>the current directory <CODE>.</CODE> -<LI>the directory <CODE>../foods</CODE>, in which <CODE>Foods.gf</CODE> resides. -<LI>the library directory <CODE>present</CODE>, which is relative to the - environment variable <CODE>GF_LIB_PATH</CODE> -</UL> - -<P> -Thus the beginning of the module is -</P> -<PRE> - --# -path=.:../foods:present - - concrete FoodsEng of Foods = open SyntaxEng,ParadigmsEng in { -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc88"></A> -<H3>English example: linearization types and combination rules</H3> -<P> -As linearization types, we use clauses for <CODE>Phrase</CODE>, noun phrases -for <CODE>Item</CODE>, common nouns for <CODE>Kind</CODE>, and adjectival phrases for <CODE>Quality</CODE>. -</P> -<PRE> - lincat - Phrase = Cl ; - Item = NP ; - Kind = CN ; - Quality = AP ; -</PRE> -<P> -Now the combination rules we need almost write themselves automatically: -</P> -<PRE> - lin - Is item quality = mkCl item quality ; - This kind = mkNP this_Det kind ; - That kind = mkNP that_Det kind ; - These kind = mkNP these_Det kind ; - Those kind = mkNP those_Det kind ; - QKind quality kind = mkCN quality kind ; - Very quality = mkAP very_AdA quality ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc89"></A> -<H3>English example: lexical rules</H3> -<P> -We use resource paradigms and lexical insertion rules. -</P> -<P> -The two-place noun paradigm is needed only once, for -<I>fish</I> - everythins else is regular. -</P> -<PRE> - Wine = mkCN (mkN "wine") ; - Pizza = mkCN (mkN "pizza") ; - Cheese = mkCN (mkN "cheese") ; - Fish = mkCN (mkN "fish" "fish") ; - Fresh = mkAP (mkA "fresh") ; - Warm = mkAP (mkA "warm") ; - Italian = mkAP (mkA "Italian") ; - Expensive = mkAP (mkA "expensive") ; - Delicious = mkAP (mkA "delicious") ; - Boring = mkAP (mkA "boring") ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc90"></A> -<H3>English example: exercises</H3> -<P> -1. Compile the grammar <CODE>FoodsEng</CODE> and generate -and parse some sentences. -</P> -<P> -2. Write a concrete syntax of <CODE>Foods</CODE> for Italian -or some other language included in the resource library. You can -compare the results with the hand-written -grammars presented earlier in this tutorial. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc91"></A> -<H2>Functor implementation of multilingual grammars</H2> -<P> -<a name="secfunctor"></a> -</P> -<A NAME="toc92"></A> -<H3>New language by copy and paste</H3> -<P> -If you write a concrete syntax of <CODE>Foods</CODE> for some other -language, much of the code will look exactly the same -as for English. This is because -</P> -<UL> -<LI>the <CODE>Syntax</CODE> API is the same for all languages (because - all languages in the resource package do implement the same - syntactic structures) -<LI>languages tend to use the syntactic structures in similar ways -</UL> - -<P> -But lexical rules are more language-dependent. -</P> -<P> -Thus, to port a grammar to a new language, you -</P> -<OL> -<LI>copy the concrete syntax of a given language -<LI>change the words (strings and inflection paradigms) -</OL> - -<P> -Can we avoid this programming by copy-and-paste? -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc93"></A> -<H3>Functors: functions on the module level</H3> -<P> -<B>Functors</B> familiar from the functional programming languages ML and OCaml, -also known as <B>parametrized modules</B>. -</P> -<P> -In GF, a functor is a module that <CODE>open</CODE>s one or more <B>interfaces</B>. -</P> -<P> -An <CODE>interface</CODE> is a module similar to a <CODE>resource</CODE>, but it only -contains the <I>types</I> of <CODE>oper</CODE>s, not (necessarily) their definitions. -</P> -<P> -Syntax for functors: add the keyword <CODE>incomplete</CODE>. We will use the header -</P> -<PRE> - incomplete concrete FoodsI of Foods = open Syntax, LexFoods in -</PRE> -<P> -where -</P> -<PRE> - interface Syntax -- the resource grammar interface - interface LexFoods -- the domain lexicon interface -</PRE> -<P> -When we moreover have -</P> -<PRE> - instance SyntaxEng of Syntax -- the English resource grammar - instance LexFoodsEng of LexFoods -- the English domain lexicon -</PRE> -<P> -we can write a <B>functor instantiation</B>, -</P> -<PRE> - concrete FoodsGer of Foods = FoodsI with - (Syntax = SyntaxGer), - (LexFoods = LexFoodsGer) ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc94"></A> -<H3>Code for the Foods functor</H3> -<PRE> - --# -path=.:../foods - - incomplete concrete FoodsI of Foods = open Syntax, LexFoods in { - lincat - Phrase = Cl ; - Item = NP ; - Kind = CN ; - Quality = AP ; - lin - Is item quality = mkCl item quality ; - This kind = mkNP this_Det kind ; - That kind = mkNP that_Det kind ; - These kind = mkNP these_Det kind ; - Those kind = mkNP those_Det kind ; - QKind quality kind = mkCN quality kind ; - Very quality = mkAP very_AdA quality ; - - Wine = mkCN wine_N ; - Pizza = mkCN pizza_N ; - Cheese = mkCN cheese_N ; - Fish = mkCN fish_N ; - Fresh = mkAP fresh_A ; - Warm = mkAP warm_A ; - Italian = mkAP italian_A ; - Expensive = mkAP expensive_A ; - Delicious = mkAP delicious_A ; - Boring = mkAP boring_A ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc95"></A> -<H3>Code for the LexFoods interface</H3> -<P> -<a name="secinterface"></a> -</P> -<PRE> - interface LexFoods = open Syntax in { - oper - wine_N : N ; - pizza_N : N ; - cheese_N : N ; - fish_N : N ; - fresh_A : A ; - warm_A : A ; - italian_A : A ; - expensive_A : A ; - delicious_A : A ; - boring_A : A ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc96"></A> -<H3>Code for a German instance of the lexicon</H3> -<PRE> - instance LexFoodsGer of LexFoods = open SyntaxGer, ParadigmsGer in { - oper - wine_N = mkN "Wein" ; - pizza_N = mkN "Pizza" "Pizzen" feminine ; - cheese_N = mkN "Käse" "Käsen" masculine ; - fish_N = mkN "Fisch" ; - fresh_A = mkA "frisch" ; - warm_A = mkA "warm" "wärmer" "wärmste" ; - italian_A = mkA "italienisch" ; - expensive_A = mkA "teuer" ; - delicious_A = mkA "köstlich" ; - boring_A = mkA "langweilig" ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc97"></A> -<H3>Code for a German functor instantiation</H3> -<PRE> - --# -path=.:../foods:present - - concrete FoodsGer of Foods = FoodsI with - (Syntax = SyntaxGer), - (LexFoods = LexFoodsGer) ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc98"></A> -<H3>Adding languages to a functor implementation</H3> -<P> -Just two modules are needed: -</P> -<UL> -<LI>a domain lexicon instance -<LI>a functor instantiation -</UL> - -<P> -The functor instantiation is completely mechanical to write. -</P> -<P> -The domain lexicon instance requires some knowledge of the words of the -language: -</P> -<UL> -<LI>what words are used for which concepts -<LI>how the words are -<LI>features such as genders -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc99"></A> -<H3>Example: adding Finnish</H3> -<P> -Lexicon instance -</P> -<PRE> - instance LexFoodsFin of LexFoods = open SyntaxFin, ParadigmsFin in { - oper - wine_N = mkN "viini" ; - pizza_N = mkN "pizza" ; - cheese_N = mkN "juusto" ; - fish_N = mkN "kala" ; - fresh_A = mkA "tuore" ; - warm_A = mkA "lämmin" ; - italian_A = mkA "italialainen" ; - expensive_A = mkA "kallis" ; - delicious_A = mkA "herkullinen" ; - boring_A = mkA "tylsä" ; - } -</PRE> -<P> -Functor instantiation -</P> -<PRE> - --# -path=.:../foods:present - - concrete FoodsFin of Foods = FoodsI with - (Syntax = SyntaxFin), - (LexFoods = LexFoodsFin) ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc100"></A> -<H3>A design pattern</H3> -<P> -This can be seen as a <I>design pattern</I> for multilingual grammars: -</P> -<PRE> - concrete DomainL* - - instance LexDomainL instance SyntaxL* - - incomplete concrete DomainI - / | \ - interface LexDomain abstract Domain interface Syntax* -</PRE> -<P> -Modules marked with <CODE>*</CODE> are either given in the library, or trivial. -</P> -<P> -Of the hand-written modules, only <CODE>LexDomainL</CODE> is language-dependent. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc101"></A> -<H3>Functors: exercises</H3> -<P> -1. Compile and test <CODE>FoodsGer</CODE>. -</P> -<P> -2. Refactor <CODE>FoodsEng</CODE> into a functor instantiation. -</P> -<P> -3. Instantiate the functor <CODE>FoodsI</CODE> to some language of -your choice. -</P> -<P> -4. Design a small grammar that can be used for controlling -an MP3 player. The grammar should be able to recognize commands such -as <I>play this song</I>, with the following variations: -</P> -<UL> -<LI>verbs: <I>play</I>, <I>remove</I> -<LI>objects: <I>song</I>, <I>artist</I> -<LI>determiners: <I>this</I>, <I>the previous</I> -<LI>verbs without arguments: <I>stop</I>, <I>pause</I> -</UL> - -<P> -The implementation goes in the following phases: -</P> -<OL> -<LI>abstract syntax -<LI>(optional:) prototype string-based concrete syntax -<LI>functor over resource syntax and lexicon interface -<LI>lexicon instance for the first language -<LI>functor instantiation for the first language -<LI>lexicon instance for the second language -<LI>functor instantiation for the second language -<LI>... -</OL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc102"></A> -<H2>Restricted inheritance</H2> -<A NAME="toc103"></A> -<H3>A problem with functors</H3> -<P> -Problem: a functor only works when all languages use the resource <CODE>Syntax</CODE> -in the same way. -</P> -<P> -Example (contrived): assume that English has -no word for <CODE>Pizza</CODE>, but has to use the paraphrase <I>Italian pie</I>. -This is no longer a noun <CODE>N</CODE>, but a complex phrase -in the category <CODE>CN</CODE>. -</P> -<P> -Possible solution: change interface the <CODE>LexFoods</CODE> with -</P> -<PRE> - oper pizza_CN : CN ; -</PRE> -<P> -Problem with this solution: -</P> -<UL> -<LI>we may end up changing the interface and the function with each new language -<LI>we must every time also change the instances for the old languages to maintain - type correctness -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc104"></A> -<H3>Restricted inheritance: include or exclude</H3> -<P> -A module may inherit just a selection of names. -</P> -<P> -Example: the <CODE>FoodMarket</CODE> example "Rsecarchitecture: -</P> -<PRE> - abstract Foodmarket = Food, Fruit [Peach], Mushroom - [Agaric] -</PRE> -<P> -Here, from <CODE>Fruit</CODE> we include <CODE>Peach</CODE> only, and from <CODE>Mushroom</CODE> -we exclude <CODE>Agaric</CODE>. -</P> -<P> -A concrete syntax of <CODE>Foodmarket</CODE> must make the analogous restrictions. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc105"></A> -<H3>The functor problem solved</H3> -<P> -The English instantiation inherits the functor -implementation except for the constant <CODE>Pizza</CODE>. This constant -is defined in the body instead: -</P> -<PRE> - --# -path=.:../foods:present - - concrete FoodsEng of Foods = FoodsI - [Pizza] with - (Syntax = SyntaxEng), - (LexFoods = LexFoodsEng) ** - open SyntaxEng, ParadigmsEng in { - - lin Pizza = mkCN (mkA "Italian") (mkN "pie") ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc106"></A> -<H2>Grammar reuse</H2> -<P> -Abstract syntax modules can be used as interfaces, -and concrete syntaxes as their instances. -</P> -<P> -The following correspondencies are then applied: -</P> -<PRE> - cat C <---> oper C : Type - - fun f : A <---> oper f : A - - lincat C = T <---> oper C : Type = T - - lin f = t <---> oper f : A = t -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc107"></A> -<H3>Library exercises</H3> -<P> -1. Find resource grammar terms for the following -English phrases (in the category <CODE>Phr</CODE>). You can first try to -build the terms manually. -</P> -<P> -<I>every man loves a woman</I> -</P> -<P> -<I>this grammar speaks more than ten languages</I> -</P> -<P> -<I>which languages aren't in the grammar</I> -</P> -<P> -<I>which languages did you want to speak</I> -</P> -<P> -Then translate the phrases to other languages. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc108"></A> -<H2>Tenses</H2> -<P> -<a name="sectense"></a> -</P> -<P> -In <CODE>Foods</CODE> grammars, we have used the path -</P> -<PRE> - --# -path=.:../foods -</PRE> -<P> -The library subdirectory <CODE>present</CODE> is a restricted version -of the resource, with only present tense of verbs and sentences. -</P> -<P> -By just changing the path, we get all tenses: -</P> -<PRE> - --# -path=.:../foods:alltenses -</PRE> -<P> -Now we can see all the tenses of phrases, by using the <CODE>-all</CODE> flag -in linearization: -</P> -<PRE> - > gr | l -all - This wine is delicious - Is this wine delicious - This wine isn't delicious - Isn't this wine delicious - This wine is not delicious - Is this wine not delicious - This wine has been delicious - Has this wine been delicious - This wine hasn't been delicious - Hasn't this wine been delicious - This wine has not been delicious - Has this wine not been delicious - This wine was delicious - Was this wine delicious - This wine wasn't delicious - Wasn't this wine delicious - This wine was not delicious - Was this wine not delicious - This wine had been delicious - Had this wine been delicious - This wine hadn't been delicious - Hadn't this wine been delicious - This wine had not been delicious - Had this wine not been delicious - This wine will be delicious - Will this wine be delicious - This wine won't be delicious - Won't this wine be delicious - This wine will not be delicious - Will this wine not be delicious - This wine will have been delicious - Will this wine have been delicious - This wine won't have been delicious - Won't this wine have been delicious - This wine will not have been delicious - Will this wine not have been delicious - This wine would be delicious - Would this wine be delicious - This wine wouldn't be delicious - Wouldn't this wine be delicious - This wine would not be delicious - Would this wine not be delicious - This wine would have been delicious - Would this wine have been delicious - This wine wouldn't have been delicious - Wouldn't this wine have been delicious - This wine would not have been delicious - Would this wine not have been delicious -</PRE> -<P> -We also see -</P> -<UL> -<LI>polarity (positive vs. negative) -<LI>word order (direct vs. inverted) -<LI>variation between contracted and full negation -</UL> - -<P> -The list is even longer in languages that have more -tenses and moods, e.g. the Romance languages. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc109"></A> -<H1>Lesson 5: Refining semantics in abstract syntax</H1> -<P> -<a name="chapsix"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>include semantic conditions in grammars, by using - <UL> - <LI><B>dependent types</B> - <LI><B>higher order abstract syntax</B> - <LI>proof objects - <LI>semantic definitions - <P></P> -These concepts are inherited from <B>type theory</B> (more precisely: -constructive type theory, or Martin-Löf type theory). - <P></P> -Type theory is the basis <B>logical frameworks</B>. - <P></P> -GF = logical framework + concrete syntax. - </UL> -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc110"></A> -<H2>Dependent types</H2> -<P> -<a name="secsmarthouse"></a> -</P> -<P> -Problem: to express <B>conditions of semantic well-formedness</B>. -</P> -<P> -Example: a voice command system for a "smart house" wants to -eliminate meaningless commands. -</P> -<P> -Thus we want to restrict particular actions to -particular devices - we can <I>dim a light</I>, but we cannot -<I>dim a fan</I>. -</P> -<P> -The following example is borrowed from the -Regulus Book (Rayner & al. 2006). -</P> -<P> -A simple example is a "smart house" system, which -defines voice commands for household appliances. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc111"></A> -<H3>A dependent type system</H3> -<P> -Ontology: -</P> -<UL> -<LI>there are commands and device kinds -<LI>for each kind of device, there are devices and actions -<LI>a command concerns an action of some kind on a device of the same kind -</UL> - -<P> -Abstract syntax formalizing this: -</P> -<PRE> - cat - Command ; - Kind ; - Device Kind ; -- argument type Kind - Action Kind ; - fun - CAction : (k : Kind) -> Action k -> Device k -> Command ; -</PRE> -<P> -<CODE>Device</CODE> and <CODE>Action</CODE> are both dependent types. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc112"></A> -<H3>Examples of devices and actions</H3> -<P> -Assume the kinds <CODE>light</CODE> and <CODE>fan</CODE>, -</P> -<PRE> - light, fan : Kind ; - dim : Action light ; -</PRE> -<P> -Given a kind, <I>k</I>, you can form the device <I>the k</I>. -</P> -<PRE> - DKindOne : (k : Kind) -> Device k ; -- the light -</PRE> -<P> -Now we can form the syntax tree -</P> -<PRE> - CAction light dim (DKindOne light) -</PRE> -<P> -but we cannot form the trees -</P> -<PRE> - CAction light dim (DKindOne fan) - CAction fan dim (DKindOne light) - CAction fan dim (DKindOne fan) -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc113"></A> -<H3>Linearization and parsing with dependent types</H3> -<P> -Concrete syntax does not know if a category is a dependent type. -</P> -<PRE> - lincat Action = {s : Str} ; - lin CAction _ act dev = {s = act.s ++ dev.s} ; -</PRE> -<P> -Notice that the <CODE>Kind</CODE> argument is suppressed in linearization. -</P> -<P> -Parsing with dependent types is performed in two phases: -</P> -<OL> -<LI>context-free parsing -<LI>filtering through type checker -</OL> - -<P> -By just doing the first phase, the <CODE>kind</CODE> argument is not found: -</P> -<PRE> - > parse "dim the light" - CAction ? dim (DKindOne light) -</PRE> -<P> -Moreover, type-incorrect commands are not rejected: -</P> -<PRE> - > parse "dim the fan" - CAction ? dim (DKindOne fan) -</PRE> -<P> -The term <CODE>?</CODE> is a <B>metavariable</B>, returned by the parser -for any subtree that is suppressed by a linearization rule. -These are the same kind of metavariables as were used <a href="#secediting">here</a> -to mark incomplete parts of trees in the syntax editor. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc114"></A> -<H3>Solving metavariables</H3> -<P> -Use the command <CODE>put_tree = pt</CODE> with the option <CODE>-typecheck</CODE>: -</P> -<PRE> - > parse "dim the light" | put_tree -typecheck - CAction light dim (DKindOne light) -</PRE> -<P> -The <CODE>typecheck</CODE> process may fail, in which case an error message -is shown and no tree is returned: -</P> -<PRE> - > parse "dim the fan" | put_tree -typecheck - - Error in tree UCommand (CAction ? 0 dim (DKindOne fan)) : - (? 0 <> fan) (? 0 <> light) -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc115"></A> -<H2>Polymorphism</H2> -<P> -<a name="secpolymorphic"></a> -</P> -<P> -Sometimes an action can be performed on all kinds of devices. -</P> -<P> -This is represented as a function that takes a <CODE>Kind</CODE> as an argument -and produce an <CODE>Action</CODE> for that <CODE>Kind</CODE>: -</P> -<PRE> - fun switchOn, switchOff : (k : Kind) -> Action k ; -</PRE> -<P> -Functions of this kind are called <B>polymorphic</B>. -</P> -<P> -We can use this kind of polymorphism in concrete syntax as well, -to express Haskell-type library functions: -</P> -<PRE> - oper const :(a,b : Type) -> a -> b -> a = - \_,_,c,_ -> c ; - - oper flip : (a,b,c : Type) -> (a -> b ->c) -> b -> a -> c = - \_,_,_,f,x,y -> f y x ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc116"></A> -<H3>Dependent types: exercises</H3> -<P> -1. Write an abstract syntax module with above contents -and an appropriate English concrete syntax. Try to parse the commands -<I>dim the light</I> and <I>dim the fan</I>, with and without <CODE>solve</CODE> filtering. -</P> -<P> -2. Perform random and exhaustive generation, with and without -<CODE>solve</CODE> filtering. -</P> -<P> -3. Add some device kinds and actions to the grammar. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc117"></A> -<H2>Proof objects</H2> -<P> -<B>Curry-Howard isomorphism</B> = <B>propositions as types principle</B>: -a proposition is a type of proofs (= proof objects). -</P> -<P> -Example: define the <I>less than</I> proposition for natural numbers, -</P> -<PRE> - cat Nat ; - fun Zero : Nat ; - fun Succ : Nat -> Nat ; -</PRE> -<P> -Define inductively what it means for a number <I>x</I> to be <I>less than</I> -a number <I>y</I>: -</P> -<UL> -<LI><CODE>Zero</CODE> is less than <CODE>Succ</CODE> <I>y</I> for any <I>y</I>. -<LI>If <I>x</I> is less than <I>y</I>, then <CODE>Succ</CODE> <I>x</I> is less than <CODE>Succ</CODE> <I>y</I>. -</UL> - -<P> -Expressing these axioms in type theory -with a dependent type <CODE>Less</CODE> <I>x y</I> and two functions constructing -its objects: -</P> -<PRE> - cat Less Nat Nat ; - fun lessZ : (y : Nat) -> Less Zero (Succ y) ; - fun lessS : (x,y : Nat) -> Less x y -> Less (Succ x) (Succ y) ; -</PRE> -<P> -Example: the fact that 2 is less that 4 has the proof object -</P> -<PRE> - lessS (Succ Zero) (Succ (Succ (Succ Zero))) - (lessS Zero (Succ (Succ Zero)) (lessZ (Succ Zero))) - : Less (Succ (Succ Zero)) (Succ (Succ (Succ (Succ Zero)))) -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc118"></A> -<H3>Proof-carrying documents</H3> -<P> -Idea: to be semantically well-formed, the abstract syntax of a document -must contain a proof of some property, -although the proof is not shown in the concrete document. -</P> -<P> -Example: documents describing flight connections: -</P> -<P> -<I>To fly from Gothenburg to Prague, first take LH3043 to Frankfurt, then OK0537 to Prague.</I> -</P> -<P> -The well-formedness of this text is partly expressible by dependent typing: -</P> -<PRE> - cat - City ; - Flight City City ; - fun - Gothenburg, Frankfurt, Prague : City ; - LH3043 : Flight Gothenburg Frankfurt ; - OK0537 : Flight Frankfurt Prague ; -</PRE> -<P> -To extend the conditions to flight connections, we introduce a category -of proofs that a change is possible: -</P> -<PRE> - cat IsPossible (x,y,z : City)(Flight x y)(Flight y z) ; -</PRE> -<P> -A legal connection is formed by the function -</P> -<PRE> - fun Connect : (x,y,z : City) -> - (u : Flight x y) -> (v : Flight y z) -> - IsPossible x y z u v -> Flight x z ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc119"></A> -<H2>Restricted polymorphism</H2> -<P> -Above, all Actions were either of -</P> -<UL> -<LI><B>monomorphic</B>: defined for one Kind -<LI><B>polymorphic</B>: defined for all Kinds -</UL> - -<P> -To make this scale up for new Kinds, we can refine this to -<B>restricted polymorphism</B>: defined for Kinds of a certain <B>class</B> -</P> -<P> -The notion of class uses the Curry-Howard isomorphism as follows: -</P> -<UL> -<LI>a class is a <B>predicate</B> of Kinds --- i.e. a type depending of Kinds -<LI>a Kind is in a class if there is a proof object of this type -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc120"></A> -<H3>Example: classes for switching and dimming</H3> -<P> -We modify the smart house grammar: -</P> -<PRE> - cat - Switchable Kind ; - Dimmable Kind ; - fun - switchable_light : Switchable light ; - switchable_fan : Switchable fan ; - dimmable_light : Dimmable light ; - - switchOn : (k : Kind) -> Switchable k -> Action k ; - dim : (k : Kind) -> Dimmable k -> Action k ; -</PRE> -<P> -Classes for new actions can be added incrementally. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc121"></A> -<H2>Variable bindings</H2> -<P> -<a name="secbinding"></a> -</P> -<P> -Mathematical notation and programming languages have -expressions that <B>bind</B> variables. -</P> -<P> -Example: universal quantifier formula -</P> -<PRE> - (All x)B(x) -</PRE> -<P> -The variable <CODE>x</CODE> has a <B>binding</B> <CODE>(All x)</CODE>, and -occurs <B>bound</B> in the <B>body</B> <CODE>B(x)</CODE>. -</P> -<P> -Examples from informal mathematical language: -</P> -<PRE> - for all x, x is equal to x - - the function that for any numbers x and y returns the maximum of x+y - and x*y - - Let x be a natural number. Assume that x is even. Then x + 3 is odd. -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc122"></A> -<H3>Higher-order abstract syntax</H3> -<P> -Abstract syntax can use functions as arguments: -</P> -<PRE> - cat Ind ; Prop ; - fun All : (Ind -> Prop) -> Prop -</PRE> -<P> -where <CODE>Ind</CODE> is the type of individuals and <CODE>Prop</CODE>, -the type of propositions. -</P> -<P> -Let us add an equality predicate -</P> -<PRE> - fun Eq : Ind -> Ind -> Prop -</PRE> -<P> -Now we can form the tree -</P> -<PRE> - All (\x -> Eq x x) -</PRE> -<P> -which we want to relate to the ordinary notation -</P> -<PRE> - (All x)(x = x) -</PRE> -<P> -In <B>higher-order abstract syntax</B> (HOAS), all variable bindings are -expressed using higher-order syntactic constructors. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc123"></A> -<H3>Higher-order abstract syntax: linearization</H3> -<P> -HOAS has proved to be useful in the semantics and computer implementation of -variable-binding expressions. -</P> -<P> -How do we relate HOAS to the concrete syntax? -</P> -<P> -In GF, we write -</P> -<PRE> - fun All : (Ind -> Prop) -> Prop - lin All B = {s = "(" ++ "All" ++ B.$0 ++ ")" ++ B.s} -</PRE> -<P> -General rule: if an argument type of a <CODE>fun</CODE> function is -a function type <CODE>A -> C</CODE>, the linearization type of -this argument is the linearization type of <CODE>C</CODE> -together with a new field <CODE>$0 : Str</CODE>. -</P> -<P> -The argument <CODE>B</CODE> thus has the linearization type -</P> -<PRE> - {s : Str ; $0 : Str}, -</PRE> -<P> -If there are more bindings, we add <CODE>$1</CODE>, <CODE>$2</CODE>, etc. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc124"></A> -<H3>Eta expansion</H3> -<P> -To make sense of linearization, syntax trees must be -<B>eta-expanded</B>: for any function of type -</P> -<PRE> - A -> B -</PRE> -<P> -an eta-expanded syntax tree has the form -</P> -<PRE> - \x -> b -</PRE> -<P> -where <CODE>b : B</CODE> under the assumption <CODE>x : A</CODE>. -</P> -<P> -Given the linearization rule -</P> -<PRE> - lin Eq a b = {s = "(" ++ a.s ++ "=" ++ b.s ++ ")"} -</PRE> -<P> -the linearization of the tree -</P> -<PRE> - \x -> Eq x x -</PRE> -<P> -is the record -</P> -<PRE> - {$0 = "x", s = ["( x = x )"]} -</PRE> -<P> -Then we can compute the linearization of the formula, -</P> -<PRE> - All (\x -> Eq x x) --> {s = "[( All x ) ( x = x )]"}. -</PRE> -<P> -The linearization of the variable <CODE>x</CODE> is, -"automagically", the string <CODE>"x"</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc125"></A> -<H3>Parsing variable bindings</H3> -<P> -GF can treat any one-word string as a variable symbol. -</P> -<PRE> - > p -cat=Prop "( All x ) ( x = x )" - All (\x -> Eq x x) -</PRE> -<P> -Variables must be bound if they are used: -</P> -<PRE> - > p -cat=Prop "( All x ) ( x = y )" - no tree found -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc126"></A> -<H3>Exercises on variable bindings</H3> -<P> -1. Write an abstract syntax of the whole -<B>predicate calculus</B>, with the -<B>connectives</B> "and", "or", "implies", and "not", and the -<B>quantifiers</B> "exists" and "for all". Use higher-order functions -to guarantee that unbounded variables do not occur. -</P> -<P> -2. Write a concrete syntax for your favourite -notation of predicate calculus. Use Latex as target language -if you want nice output. You can also try producing boolean -expressions of some programming language. Use as many parenthesis as you need to -guarantee non-ambiguity. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc127"></A> -<H2>Semantic definitions</H2> -<P> -<a name="secdefdef"></a> -</P> -<P> -The <CODE>fun</CODE> judgements of GF are declarations of functions, giving their types. -</P> -<P> -Can we <B>compute</B> <CODE>fun</CODE> functions? -</P> -<P> -Mostly we are not interested, since functions are seen as constructors, -i.e. data forms - as usual with -</P> -<PRE> - fun Zero : Nat ; - fun Succ : Nat -> Nat ; -</PRE> -<P> -But it is also possible to give <B>semantic definitions</B> to functions. -The key word is <CODE>def</CODE>: -</P> -<PRE> - fun one : Nat ; - def one = Succ Zero ; - - fun twice : Nat -> Nat ; - def twice x = plus x x ; - - fun plus : Nat -> Nat -> Nat ; - def - plus x Zero = x ; - plus x (Succ y) = Succ (Sum x y) ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc128"></A> -<H3>Computing a tree</H3> -<P> -Computation: follow a chain of definition until no definition -can be applied, -</P> -<PRE> - plus one one --> - plus (Succ Zero) (Succ Zero) --> - Succ (plus (Succ Zero) Zero) --> - Succ (Succ Zero) -</PRE> -<P> -Computation in GF is performed with the <CODE>put_term</CODE> command and the -<CODE>compute</CODE> transformation, e.g. -</P> -<PRE> - > parse -tr "1 + 1" | put_term -transform=compute -tr | l - plus one one - Succ (Succ Zero) - s(s(0)) -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc129"></A> -<H3>Definitional equality</H3> -<P> -Two trees are definitionally equal if they compute into the same tree. -</P> -<P> -Definitional equality does not guarantee sameness of linearization: -</P> -<PRE> - plus one one ===> 1 + 1 - Succ (Succ Zero) ===> s(s(0)) -</PRE> -<P> -The main use of this concept is in type checking: sameness of types. -</P> -<P> -Thus e.g. the following types are equal -</P> -<PRE> - Less Zero one - Less Zero (Succ Zero)) -</PRE> -<P> -so that an object of one also is an object of the other. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc130"></A> -<H3>Judgement forms for constructors</H3> -<P> -The judgement form <CODE>data</CODE> tells that a category has -certain functions as constructors: -</P> -<PRE> - data Nat = Succ | Zero ; -</PRE> -<P> -The type signatures of constructors are given separately, -</P> -<PRE> - fun Zero : Nat ; - fun Succ : Nat -> Nat ; -</PRE> -<P> -There is also a shorthand: -</P> -<PRE> - data Succ : Nat -> Nat ; === fun Succ : Nat -> Nat ; - data Nat = Succ ; -</PRE> -<P> -Notice: in <CODE>def</CODE> definitions, identifier patterns not -marked as <CODE>data</CODE> will be treated as variables. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc131"></A> -<H3>Exercises on semantic definitions</H3> -<P> -1. Implement an interpreter of a small functional programming -language with natural numbers, lists, pairs, lambdas, etc. Use higher-order -abstract syntax with semantic definitions. As concrete syntax, use -your favourite programming language. -</P> -<P> -2. There is no termination checking for <CODE>def</CODE> definitions. -Construct an examples that makes type checking loop. -Type checking can be invoked with <CODE>put_term -transform=solve</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc132"></A> -<H2>Lesson 6: Grammars of formal languages</H2> -<P> -<a name="chapseven"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>write grammars for formal languages (mathematical notation, programming languages) -<LI>interface between formal and natural langauges -<LI>implement a compiler by using GF -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc133"></A> -<H3>Arithmetic expressions</H3> -<P> -We construct a calculator with addition, subtraction, multiplication, and -division of integers. -</P> -<PRE> - abstract Calculator = { - - cat Exp ; - - fun - EPlus, EMinus, ETimes, EDiv : Exp -> Exp -> Exp ; - EInt : Int -> Exp ; - } -</PRE> -<P> -The category <CODE>Int</CODE> is a built-in category of -integers. Its syntax trees <B>integer literals</B>, i.e. -sequences of digits: -</P> -<PRE> - 5457455814608954681 : Int -</PRE> -<P> -These are the only objects of type <CODE>Int</CODE>: -grammars are not allowed to declare functions with <CODE>Int</CODE> as value type. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc134"></A> -<H3>Concrete syntax: a simple approach</H3> -<P> -We begin with a -concrete syntax that always uses parentheses around binary -operator applications: -</P> -<PRE> - concrete CalculatorP of Calculator = { - - lincat - Exp = SS ; - lin - EPlus = infix "+" ; - EMinus = infix "-" ; - ETimes = infix "*" ; - EDiv = infix "/" ; - EInt i = i ; - - oper - infix : Str -> SS -> SS -> SS = \f,x,y -> - ss ("(" ++ x.s ++ f ++ y.s ++ ")") ; - } -</PRE> -<P> -Now we have -</P> -<PRE> - > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) - ( 2 + ( 3 * 4 ) ) -</PRE> -<P> -First problems: -</P> -<UL> -<LI>to get rid of superfluous spaces and -<LI>to recognize integer literals in the parser -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc135"></A> -<H2>Lexing and unlexing</H2> -<P> -<a name="seclexing"></a> -</P> -<P> -The input of parsing in GF is not just a string, but a list of -<B>tokens</B>, returned by a <B>lexer</B>. -</P> -<P> -The default lexer in GF returns chunks separated by spaces: -</P> -<PRE> - "(12 + (3 * 4))" ===> "(12", "+", "(3". "*". "4))" -</PRE> -<P> -The proper way would be -</P> -<PRE> - "(", "12", "+", "(", "3", "*", "4", ")", ")" -</PRE> -<P> -Moreover, the tokens <CODE>"12"</CODE>, <CODE>"3"</CODE>, and <CODE>"4"</CODE> should be recognized as -integer literals - they cannot be found in the grammar. -</P> -<P> -<!-- NEW --> -</P> -<P> -Lexers are invoked by flags to the command <CODE>put_string = ps</CODE>. -</P> -<PRE> - > put_string -lexcode "(2 + (3 * 4))" - ( 2 + ( 3 * 4 ) ) -</PRE> -<P> -This can be piped into a parser, as usual: -</P> -<PRE> - > ps -lexcode "(2 + (3 * 4))" | parse - EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) -</PRE> -<P> -In linearization, we use a corresponding <B>unlexer</B>: -</P> -<PRE> - > linearize EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) | ps -unlexcode - (2 + (3 * 4)) -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc136"></A> -<H3>Most common lexers and unlexers</H3> -<TABLE ALIGN="center" CELLPADDING="4" BORDER="1"> -<TR> -<TH>lexer</TH> -<TH>unlexer</TH> -<TH COLSPAN="2">description</TH> -</TR> -<TR> -<TD><CODE>chars</CODE></TD> -<TD><CODE>unchars</CODE></TD> -<TD>each character is a token</TD> -</TR> -<TR> -<TD><CODE>lexcode</CODE></TD> -<TD><CODE>unlexcode</CODE></TD> -<TD>program code conventions (uses Haskell's lex)</TD> -</TR> -<TR> -<TD><CODE>lexmixed</CODE></TD> -<TD><CODE>unlexmixed</CODE></TD> -<TD>like text, but between $ signs like code</TD> -</TR> -<TR> -<TD><CODE>lextext</CODE></TD> -<TD><CODE>unlextext</CODE></TD> -<TD>with conventions on punctuation and capitals</TD> -</TR> -<TR> -<TD><CODE>words</CODE></TD> -<TD><CODE>unwords</CODE></TD> -<TD>(default) tokens separated by space characters</TD> -</TR> -</TABLE> - -<P> -<!-- NEW --> -</P> -<A NAME="toc137"></A> -<H2>Precedence and fixity</H2> -<P> -Arithmetic expressions should be unambiguous. If we write -</P> -<PRE> - 2 + 3 * 4 -</PRE> -<P> -it should be parsed as one, but not both, of -</P> -<PRE> - EPlus (EInt 2) (ETimes (EInt 3) (EInt 4)) - ETimes (EPlus (EInt 2) (EInt 3)) (EInt 4) -</PRE> -<P> -We choose the former tree, because -multiplication has <B>higher precedence</B> than addition. -</P> -<P> -To express the latter tree, we have to use parentheses: -</P> -<PRE> - (2 + 3) * 4 -</PRE> -<P> -The usual precedence rules: -</P> -<UL> -<LI>Integer constants and expressions in parentheses have the highest precedence. -<LI>Multiplication and division have equal precedence, lower than the highest - but higher than addition and subtraction, which are again equal. -<LI>All the four binary operations are <B>left-associative</B>: - <CODE>1 + 2 + 3</CODE> means the same as <CODE>(1 + 2) + 3</CODE>. -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc138"></A> -<H3>Precedence as a parameter</H3> -<P> -Precedence can be made into an inherent feature of expressions: -</P> -<PRE> - oper - Prec : PType = Ints 2 ; - TermPrec : Type = {s : Str ; p : Prec} ; - - mkPrec : Prec -> Str -> TermPrec = \p,s -> {s = s ; p = p} ; - - lincat - Exp = TermPrec ; -</PRE> -<P> -Notice <CODE>Ints 2</CODE>: a parameter type, whose values are the integers -<CODE>0,1,2</CODE>. -</P> -<P> -Using precedence levels: compare the inherent precedence of an -expression with the expected precedence. -</P> -<UL> -<LI>if the inherent precedence is lower than the expected precedence, - use parentheses -<LI>otherwise, no parentheses are needed -</UL> - -<P> -This idea is encoded in the operation -</P> -<PRE> - oper usePrec : TermPrec -> Prec -> Str = \x,p -> - case lessPrec x.p p of { - True => "(" x.s ")" ; - False => x.s - } ; -</PRE> -<P> -(We use <CODE>lessPrec</CODE> from <CODE>lib/prelude/Formal</CODE>.) -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc139"></A> -<H3>Fixities</H3> -<P> -We can define left-associative infix expressions: -</P> -<PRE> - infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y -> - mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; -</PRE> -<P> -Constant-like expressions (the highest level): -</P> -<PRE> - constant : Str -> TermPrec = mkPrec 2 ; -</PRE> -<P> -All these operations can be found in <CODE>lib/prelude/Formal</CODE>, -which has 5 levels. -</P> -<P> -Now we can write the whole concrete syntax of <CODE>Calculator</CODE> compactly: -</P> -<PRE> - concrete CalculatorC of Calculator = open Formal, Prelude in { - - flags lexer = codelit ; unlexer = code ; startcat = Exp ; - - lincat Exp = TermPrec ; - - lin - EPlus = infixl 0 "+" ; - EMinus = infixl 0 "-" ; - ETimes = infixl 1 "*" ; - EDiv = infixl 1 "/" ; - EInt i = constant i.s ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc140"></A> -<H3>Exercises on precedence</H3> -<P> -1. Define non-associative and right-associative infix operations -analogous to <CODE>infixl</CODE>. -</P> -<P> -2. Add a constructor that puts parentheses around expressions -to raise their precedence, but that is eliminated by a <CODE>def</CODE> definition. -Test parsing with and without a pipe to <CODE>pt -transform=compute</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc141"></A> -<H2>Code generation as linearization</H2> -<P> -Translate arithmetic (infix) to JVM (postfix): -</P> -<PRE> - 2 + 3 * 4 - - ===> - - iconst 2 : iconst 3 ; iconst 4 ; imul ; iadd -</PRE> -<P> -Just give linearization rules for JVM: -</P> -<PRE> - lin - EPlus = postfix "iadd" ; - EMinus = postfix "isub" ; - ETimes = postfix "imul" ; - EDiv = postfix "idiv" ; - EInt i = ss ("iconst" ++ i.s) ; - oper - postfix : Str -> SS -> SS -> SS = \op,x,y -> - ss (x.s ++ ";" ++ y.s ++ ";" ++ op) ; -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc142"></A> -<H3>Programs with variables</H3> -<P> -A <B>straight code</B> programming language, with -<B>initializations</B> and <B>assignments</B>: -</P> -<PRE> - int x = 2 + 3 ; - int y = x + 1 ; - x = x + 9 * y ; -</PRE> -<P> -We define programs by the following constructors: -</P> -<PRE> - fun - PEmpty : Prog ; - PInit : Exp -> (Var -> Prog) -> Prog ; - PAss : Var -> Exp -> Prog -> Prog ; -</PRE> -<P> -<CODE>PInit</CODE> uses higher-order abstract syntax for making the -initialized variable available in the <B>continuation</B> of the program. -</P> -<P> -The abstract syntax tree for the above code is -</P> -<PRE> - PInit (EPlus (EInt 2) (EInt 3)) (\x -> - PInit (EPlus (EVar x) (EInt 1)) (\y -> - PAss x (EPlus (EVar x) (ETimes (EInt 9) (EVar y))) - PEmpty)) -</PRE> -<P> -No uninitialized variables are allowed - there are no constructors for <CODE>Var</CODE>! -But we do have the rule -</P> -<PRE> - fun EVar : Var -> Exp ; -</PRE> -<P> -The rest of the grammar is just the same as for arithmetic expressions -<a href="#secprecedence">here</a>. The best way to implement it is perhaps by writing a -module that extends the expression module. The most natural start category -of the extension is <CODE>Prog</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc143"></A> -<H3>Exercises on code generation</H3> -<P> -1. Define a C-like concrete syntax of the straight-code language. -</P> -<P> -2. Extend the straight-code language to expressions of type <CODE>float</CODE>. -To guarantee type safety, you can define a category <CODE>Typ</CODE> of types, and -make <CODE>Exp</CODE> and <CODE>Var</CODE> dependent on <CODE>Typ</CODE>. Basic floating point expressions -can be formed from literal of the built-in GF type <CODE>Float</CODE>. The arithmetic -operations should be made polymorphic (as <a href="#secpolymorphic">here</a>). -</P> -<P> -3. Extend JVM generation to the straight-code language, using -two more instructions -</P> -<UL> -<LI><CODE>iload</CODE> <I>x</I>, which loads the value of the variable <I>x</I> -<LI><CODE>istore</CODE> <I>x</I> which stores a value to the variable <I>x</I> -</UL> - -<P> -Thus the code for the example in the previous section is -</P> -<PRE> - iconst 2 ; iconst 3 ; iadd ; istore x ; - iload x ; iconst 1 ; iadd ; istore y ; - iload x ; iconst 9 ; iload y ; imul ; iadd ; istore x ; -</PRE> -<P></P> -<P> -4. If you made the exercise of adding floating point numbers to -the language, you can now cash out the main advantage of type checking -for code generation: selecting type-correct JVM instructions. The floating -point instructions are precisely the same as the integer one, except that -the prefix is <CODE>f</CODE> instead of <CODE>i</CODE>, and that <CODE>fconst</CODE> takes floating -point literals as arguments. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc144"></A> -<H1>Lesson 7: Embedded grammars</H1> -<P> -<a name="chapeight"></a> -</P> -<P> -Goals: -</P> -<UL> -<LI>use grammars as parts of programs written in Haskell and JavaScript -<LI>implement stand-alone question-answering systems and translators based on - GF grammars -<LI>generate language models for speech recognition from GF grammars -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc145"></A> -<H2>Functionalities of an embedded grammar format</H2> -<P> -GF grammars can be used as parts of programs written in other programming -languages, to be called <B>host languages</B>. -This facility is based on several components: -</P> -<UL> -<LI>PGF: a portable format for multilingual GF grammars -<LI>a PGF interpreter written in the host language -<LI>a library in the host language that enables calling the interpreter -<LI>a way to manipulate abstract syntax trees in the host language -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc146"></A> -<H2>The portable grammar format</H2> -<P> -The portable format is called PGF, "Portable Grammar Format". -</P> -<P> -This format is produced by using GF as batch compiler, with the option <CODE>-make</CODE>, -from the operative system shell: -</P> -<PRE> - % gf -make SOURCE.gf -</PRE> -<P> -PGF is the recommended format in -which final grammar products are distributed, because they -are stripped from superfluous information and can be started and applied -faster than sets of separate modules. -</P> -<P> -Application programmers have never any need to read or modify PGF files. -</P> -<P> -PGF thus plays the same role as machine code in -general-purpose programming (or bytecode in Java). -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc147"></A> -<H3>Haskell: the EmbedAPI module</H3> -<P> -The Haskell API contains (among other things) the following types and functions: -</P> -<PRE> - readPGF :: FilePath -> IO PGF - - linearize :: PGF -> Language -> Tree -> String - parse :: PGF -> Language -> Category -> String -> [Tree] - - linearizeAll :: PGF -> Tree -> [String] - linearizeAllLang :: PGF -> Tree -> [(Language,String)] - - parseAll :: PGF -> Category -> String -> [[Tree]] - parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] - - languages :: PGF -> [Language] - categories :: PGF -> [Category] - startCat :: PGF -> Category -</PRE> -<P> -This is the only module that needs to be imported in the Haskell application. -It is available as a part of the GF distribution, in the file -<CODE>src/PGF.hs</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc148"></A> -<H3>First application: a translator</H3> -<P> -Let us first build a stand-alone translator, which can translate -in any multilingual grammar between any languages in the grammar. -</P> -<PRE> - module Main where - - import PGF - import System (getArgs) - - main :: IO () - main = do - file:_ <- getArgs - gr <- readPGF file - interact (translate gr) - - translate :: PGF -> String -> String - translate gr s = case parseAllLang gr (startCat gr) s of - (lg,t:_):_ -> unlines [linearize gr l t | l <- languages gr, l /= lg] - _ -> "NO PARSE" -</PRE> -<P> -To run the translator, first compile it by -</P> -<PRE> - % ghc -make -o trans Translator.hs -</PRE> -<P> -For this, you need the Haskell compiler <A HREF="http://www.haskell.org/ghc">GHC</A>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc149"></A> -<H3>Producing PGF for the translator</H3> -<P> -Then produce a PGF file. For instance, the <CODE>Food</CODE> grammar set can be -compiled as follows: -</P> -<PRE> - % gf -make FoodEng.gf FoodIta.gf -</PRE> -<P> -This produces the file <CODE>Food.pgf</CODE> (its name comes from the abstract syntax). -</P> -<P> -The Haskell library function <CODE>interact</CODE> makes the <CODE>trans</CODE> program work -like a Unix filter, which reads from standard input and writes to standard -output. Therefore it can be a part of a pipe and read and write files. -The simplest way to translate is to <CODE>echo</CODE> input to the program: -</P> -<PRE> - % echo "this wine is delicious" | ./trans Food.pgf - questo vino è delizioso -</PRE> -<P> -The result is given in all languages except the input language. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc150"></A> -<H3>A translator loop</H3> -<P> -To avoid starting the translator over and over again: -change <CODE>interact</CODE> in the main function to <CODE>loop</CODE>, defined as -follows: -</P> -<PRE> - loop :: (String -> String) -> IO () - loop trans = do - s <- getLine - if s == "quit" then putStrLn "bye" else do - putStrLn $ trans s - loop trans -</PRE> -<P> -The loop keeps on translating line by line until the input line -is <CODE>quit</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc151"></A> -<H3>A question-answer system</H3> -<P> -<a name="secmathprogram"></a> -</P> -<P> -The next application is also a translator, but it adds a -<B>transfer</B> component - a function that transforms syntax trees. -</P> -<P> -The transfer function we use is one that computes a question into an answer. -</P> -<P> -The program accepts simple questions about arithmetic and answers -"yes" or "no" in the language in which the question was made: -</P> -<PRE> - Is 123 prime? - No. - 77 est impair ? - Oui. -</PRE> -<P> -We change the pure translator by giving -the <CODE>translate</CODE> function the transfer as an extra argument: -</P> -<PRE> - translate :: (Tree -> Tree) -> PGF -> String -> String -</PRE> -<P> -Ordinary translation as a special case where -transfer is the identity function (<CODE>id</CODE> in Haskell). -</P> -<P> -To reply in the <I>same</I> language as the question: -</P> -<PRE> - translate tr gr = case parseAllLang gr (startCat gr) s of - (lg,t:_):_ -> linearize gr lg (tr t) - _ -> "NO PARSE" -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc152"></A> -<H3>Abstract syntax of the query system</H3> -<P> -Input: abstract syntax judgements -</P> -<PRE> - abstract Query = { - - flags startcat=Question ; - - cat - Answer ; Question ; Object ; - - fun - Even : Object -> Question ; - Odd : Object -> Question ; - Prime : Object -> Question ; - Number : Int -> Object ; - - Yes : Answer ; - No : Answer ; - } -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc153"></A> -<H3>Exporting GF datatypes to Haskell</H3> -<P> -To make it easy to define a transfer function, we export the -abstract syntax to a system of Haskell datatypes: -</P> -<PRE> - % gf --output-format=haskell Query.pgf -</PRE> -<P> -It is also possible to produce the Haskell file together with PGF, by -</P> -<PRE> - % gf -make --output-format=haskell QueryEng.gf -</PRE> -<P> -The result is a file named <CODE>Query.hs</CODE>, containing a -module named <CODE>Query</CODE>. -</P> -<P> -<!-- NEW --> -</P> -<P> -Output: Haskell definitions -</P> -<PRE> - module Query where - import PGF - - data GAnswer = - GYes - | GNo - - data GObject = GNumber GInt - - data GQuestion = - GPrime GObject - | GOdd GObject - | GEven GObject - - newtype GInt = GInt Integer -</PRE> -<P> -All type and constructor names are prefixed with a <CODE>G</CODE> to prevent clashes. -</P> -<P> -The Haskell module name is the same as the abstract syntax name. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc154"></A> -<H3>The question-answer function</H3> -<P> -Haskell's type checker guarantees that the functions are well-typed also with -respect to GF. -</P> -<PRE> - answer :: GQuestion -> GAnswer - answer p = case p of - GOdd x -> test odd x - GEven x -> test even x - GPrime x -> test prime x - - value :: GObject -> Int - value e = case e of - GNumber (GInt i) -> fromInteger i - - test :: (Int -> Bool) -> GObject -> GAnswer - test f x = if f (value x) then GYes else GNo -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc155"></A> -<H3>Converting between Haskell and GF trees</H3> -<P> -The generated Haskell module also contains -</P> -<PRE> - class Gf a where - gf :: a -> Tree - fg :: Tree -> a - - instance Gf GQuestion where - gf (GEven x1) = DTr [] (AC (CId "Even")) [gf x1] - gf (GOdd x1) = DTr [] (AC (CId "Odd")) [gf x1] - gf (GPrime x1) = DTr [] (AC (CId "Prime")) [gf x1] - fg t = - case t of - DTr [] (AC (CId "Even")) [x1] -> GEven (fg x1) - DTr [] (AC (CId "Odd")) [x1] -> GOdd (fg x1) - DTr [] (AC (CId "Prime")) [x1] -> GPrime (fg x1) - _ -> error ("no Question " ++ show t) -</PRE> -<P> -For the programmer, it is enough to know: -</P> -<UL> -<LI>all GF names are in Haskell prefixed with <CODE>G</CODE> -<LI><CODE>gf</CODE> translates from Haskell objects to GF trees -<LI><CODE>fg</CODE> translates from GF trees to Haskell objects -</UL> - -<P> -<!-- NEW --> -</P> -<A NAME="toc156"></A> -<H3>Putting it all together: the transfer definition</H3> -<PRE> - module TransferDef where - - import PGF (Tree) - import Query -- generated from GF - - transfer :: Tree -> Tree - transfer = gf . answer . fg - - answer :: GQuestion -> GAnswer - answer p = case p of - GOdd x -> test odd x - GEven x -> test even x - GPrime x -> test prime x - - value :: GObject -> Int - value e = case e of - GNumber (GInt i) -> fromInteger i - - test :: (Int -> Bool) -> GObject -> GAnswer - test f x = if f (value x) then GYes else GNo - - prime :: Int -> Bool - prime x = elem x primes where - primes = sieve [2 .. x] - sieve (p:xs) = p : sieve [ n | n <- xs, n `mod` p > 0 ] - sieve [] = [] -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc157"></A> -<H3>Putting it all together: the Main module</H3> -<P> -Here is the complete code in the Haskell file <CODE>TransferLoop.hs</CODE>. -</P> -<PRE> - module Main where - - import PGF - import TransferDef (transfer) - - main :: IO () - main = do - gr <- readPGF "Query.pgf" - loop (translate transfer gr) - - loop :: (String -> String) -> IO () - loop trans = do - s <- getLine - if s == "quit" then putStrLn "bye" else do - putStrLn $ trans s - loop trans - - translate :: (Tree -> Tree) -> PGF -> String -> String - translate tr gr s = case parseAllLang gr (startCat gr) s of - (lg,t:_):_ -> linearize gr lg (tr t) - _ -> "NO PARSE" -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc158"></A> -<H3>Putting it all together: the Makefile</H3> -<P> -To automate the production of the system, we write a <CODE>Makefile</CODE> as follows: -</P> -<PRE> - all: - gf -make --output-format=haskell QueryEng - ghc --make -o ./math TransferLoop.hs - strip math -</PRE> -<P> -(The empty segments starting the command lines in a Makefile must be tabs.) -Now we can compile the whole system by just typing -</P> -<PRE> - make -</PRE> -<P> -Then you can run it by typing -</P> -<PRE> - ./math -</PRE> -<P> -Just to summarize, the source of the application consists of the following files: -</P> -<PRE> - Makefile -- a makefile - Math.gf -- abstract syntax - Math???.gf -- concrete syntaxes - TransferDef.hs -- definition of question-to-answer function - TransferLoop.hs -- Haskell Main module -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc159"></A> -<H2>Web server applications</H2> -<P> -PGF files can be used in web servers, for which there is a Haskell library included -in <CODE>src/server/</CODE>. How to build a server for tasks like translators is explained -in the <A HREF="../src/server/README"><CODE>README</CODE></A> file in that directory. -</P> -<P> -One of the servers that can be readily built with the library (without any -programming required) is <B>fridge poetry magnets</B>. It is an application that -uses an incremental parser to suggest grammatically correct next words. Here -is an example of its application to the <CODE>Foods</CODE> grammars. -</P> -<P> -<IMG ALIGN="middle" SRC="food-magnet.png" BORDER="0" ALT=""> -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc160"></A> -<H2>JavaScript applications</H2> -<P> -JavaScript is a programming language that has interpreters built in in most -web browsers. It is therefore usable for client side web programs, which can even -be run without access to the internet. The following figure shows a JavaScript -program compiled from GF grammars as run on an iPhone. -</P> -<P> -<IMG ALIGN="middle" SRC="iphone.jpg" BORDER="0" ALT=""> -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc161"></A> -<H3>Compiling to JavaScript</H3> -<P> -JavaScript is one of the output formats of the GF batch compiler. Thus the following -command generates a JavaScript file from two <CODE>Food</CODE> grammars. -</P> -<PRE> - % gf -make --output-format=js FoodEng.gf FoodIta.gf -</PRE> -<P> -The name of the generated file is <CODE>Food.js</CODE>, derived from the top-most abstract -syntax name. This file contains the multilingual grammar as a JavaScript object. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc162"></A> -<H3>Using the JavaScript grammar</H3> -<P> -To perform parsing and linearization, the run-time library -<CODE>gflib.js</CODE> is used. It is included in <CODE>GF/lib/javascript/</CODE>, together with -some other JavaScript and HTML files; these files can be used -as templates for building applications. -</P> -<P> -An example of usage is -<A HREF="http://grammaticalframework.org:41296"><CODE>translator.html</CODE></A>, -which is in fact initialized with -a pointer to the Food grammar, so that it provides translation between the English -and Italian grammars: -</P> -<P> -<IMG ALIGN="middle" SRC="food-js.png" BORDER="0" ALT=""> -</P> -<P> -The grammar must have the name <CODE>grammar.js</CODE>. The abstract syntax and start -category names in <CODE>translator.html</CODE> must match the ones in the grammar. -With these changes, the translator works for any multilingual grammar. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc163"></A> -<H2>Language models for speech recognition</H2> -<P> -The standard way of using GF in speech recognition is by building -<B>grammar-based language models</B>. -</P> -<P> -GF supports several formats, including -GSL, the formatused in the <A HREF="http://www.nuance.com">Nuance speech recognizer</A>. -</P> -<P> -GSL is produced from GF by running <CODE>gf</CODE> with the flag -<CODE>--output-format=gsl</CODE>. -</P> -<P> -Example: GSL generated from <CODE>FoodsEng.gf</CODE>. -</P> -<PRE> - % gf -make --output-format=gsl FoodsEng.gf - % more FoodsEng.gsl - - ;GSL2.0 - ; Nuance speech recognition grammar for FoodsEng - ; Generated by GF - - .MAIN Phrase_cat - - Item_1 [("that" Kind_1) ("this" Kind_1)] - Item_2 [("these" Kind_2) ("those" Kind_2)] - Item_cat [Item_1 Item_2] - Kind_1 ["cheese" "fish" "pizza" (Quality_1 Kind_1) - "wine"] - Kind_2 ["cheeses" "fish" "pizzas" - (Quality_1 Kind_2) "wines"] - Kind_cat [Kind_1 Kind_2] - Phrase_1 [(Item_1 "is" Quality_1) - (Item_2 "are" Quality_1)] - Phrase_cat Phrase_1 - - Quality_1 ["boring" "delicious" "expensive" - "fresh" "italian" ("very" Quality_1) "warm"] - Quality_cat Quality_1 -</PRE> -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc164"></A> -<H3>More speech recognition grammar formats</H3> -<P> -Other formats available via the <CODE>--output-format</CODE> flag include: -</P> -<TABLE ALIGN="center" CELLPADDING="4" BORDER="1"> -<TR> -<TH>Format</TH> -<TH COLSPAN="2">Description</TH> -</TR> -<TR> -<TD><CODE>gsl</CODE></TD> -<TD>Nuance GSL speech recognition grammar</TD> -</TR> -<TR> -<TD><CODE>jsgf</CODE></TD> -<TD>Java Speech Grammar Format (JSGF)</TD> -</TR> -<TR> -<TD><CODE>jsgf_sisr_old</CODE></TD> -<TD>JSGF with semantic tags in SISR WD 20030401 format</TD> -</TR> -<TR> -<TD><CODE>srgs_abnf</CODE></TD> -<TD>SRGS ABNF format</TD> -</TR> -<TR> -<TD><CODE>srgs_xml</CODE></TD> -<TD>SRGS XML format</TD> -</TR> -<TR> -<TD><CODE>srgs_xml_prob</CODE></TD> -<TD>SRGS XML format, with weights</TD> -</TR> -<TR> -<TD><CODE>slf</CODE></TD> -<TD>finite automaton in the HTK SLF format</TD> -</TR> -<TR> -<TD><CODE>slf_sub</CODE></TD> -<TD>finite automaton with sub-automata in HTK SLF</TD> -</TR> -</TABLE> - -<P> -All currently available formats can be seen with <CODE>gf --help</CODE>. -</P> - -<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -\-toc gf-tutorial.t2t --> -</BODY></HTML> diff --git a/doc/tutorial/gf-tutorial.t2t b/doc/tutorial/gf-tutorial.t2t index 7231c9edb..2e3f086f7 100644 --- a/doc/tutorial/gf-tutorial.t2t +++ b/doc/tutorial/gf-tutorial.t2t @@ -9,6 +9,7 @@ December 2010 for GF 3.2 %!target:html %!encoding: iso-8859-1 +%!options: --toc %!postproc(tex) : "\\subsection\*" "\\newslide" %!preproc(tex): "#NEW" "" |
