updated phrasebook doc

author: aarne <aarne@chalmers.se> 2010-06-01 22:48:43 +0000
committer: aarne <aarne@chalmers.se> 2010-06-01 22:48:43 +0000
commit: b3c302ca6fa99abaa5cbc3ed69f138aecc9d7e98 (patch)
tree: 219cec765f861782b3d67db699ab7227b59cc3a5 /examples/phrasebook
parent: 83015a80184e4b2b1e34a4a7cd1b3832ec680d35 (diff)
9 files changed, 657 insertions, 88 deletions
diff --git a/examples/phrasebook/GreetingsSpa.gf b/examples/phrasebook/GreetingsSpa.gf
index 6008688f6..673bac85e 100644
--- a/examples/phrasebook/GreetingsSpa.gf
+++ b/examples/phrasebook/GreetingsSpa.gf
@@ -11,6 +11,9 @@ lin
   GDamn = ss "joder" ;
   GExcuse = ss "perdón" ;
   GExcusePol = ss "perdone" ;
+  GCongratulations = ss "felicitaciones" ;
+  GGoodLuck = ss "buena suerte" ;
+  GHappyBirthday = ss "feliz cumpleaños" ;
   GGoodMorning, GGoodDay = ss "buenos días" ;
   GGoodEvening = ss "buenas tardes" ;
   GGoodNight = ss "buenas noches" ;
diff --git a/examples/phrasebook/Implementation.html b/examples/phrasebook/Implementation.html
index 41bab9f70..ff2275979 100644
--- a/examples/phrasebook/Implementation.html
+++ b/examples/phrasebook/Implementation.html
@@ -106,8 +106,10 @@ gfdoc - a rudimentary GF document generator.
       Too property = mkAP too_AdA (mkAP property) ;
       PropQuality property = mkAP property ;
   
-      ThePlace kind = placeNP the_Det kind ;
-      APlace kind = placeNP a_Det kind ;
+      ThePlace kind = let dd = if_then_else Det kind.isPl thePl_Det theSg_Det 
+                       in placeNP dd kind ;
+      APlace kind = let dd = if_then_else Det kind.isPl thePl_Det theSg_Det 
+                       in placeNP dd kind ;
   
       IMale, IFemale = mkPerson i_Pron ;
       YouFamMale, YouFamFemale = mkPerson youSg_Pron ;
@@ -130,7 +132,11 @@ gfdoc - a rudimentary GF document generator.
   
       NNumeral n = mkCard &lt;lin Numeral n : Numeral>  ;
   
-      AHave p obj = mkCl p.name have_V2 obj ;
+      SHave   p obj = mkS (mkCl p.name have_V2 obj) ;
+      SHaveNo p k = mkS negativePol (mkCl p.name have_V2 (mkNP aPl_Det k)) ;
+      SHaveNoMass p m = mkS negativePol (mkCl p.name have_V2 (mkNP m)) ;
+      QDoHave p obj = mkQS (mkQCl (mkCl p.name have_V2 obj)) ;
+  
       AHaveCurr p curr = mkCl p.name have_V2 (mkNP aPl_Det curr) ;
       ACitizen p n = mkCl p.name n ;
       ABePlace p place = mkCl p.name place.at ;
@@ -166,12 +172,20 @@ These are used in Words for each language.
         } ;
   
     NPPlace : Type = {name : NP ; at : Adv ; to : Adv} ;
-    CNPlace : Type = {name : CN ; at : Prep ; to : Prep} ;
+    CNPlace : Type = {name : CN ; at : Prep ; to : Prep; isPl : Bool} ;
   
     mkCNPlace : CN -> Prep -> Prep -> CNPlace = \p,i,t -> {
       name = p ;
       at = i ;
-      to = t
+      to = t ;
+      isPl = False
+      } ;
+  
+   mkCNPlacePl : CN -> Prep -> Prep -> CNPlace = \p,i,t -> {
+      name = p ;
+      at = i ;
+      to = t ;
+      isPl = True
       } ;
   
     placeNP : Det -> CNPlace -> NPPlace = \det,kind ->
@@ -344,7 +358,7 @@ Means of transportation
 
 Actions: the predication patterns are very often language-dependent.
 <pre>
-      AHasAge p num = mkCl p.name (mkNP (mkNP num L.year_N) (mkAdv "old"));
+      AHasAge p num = mkCl p.name (mkNP (mkNP num L.year_N) (ParadigmsEng.mkAdv "old"));
       AHasChildren p num = mkCl p.name have_V2 (mkNP num L.child_N) ;
       AHasRoom p num = mkCl p.name have_V2 
         (mkNP (mkNP a_Det (mkN "room")) (SyntaxEng.mkAdv for_Prep (mkNP num (mkN "person")))) ;
@@ -456,10 +470,10 @@ auxiliaries
         mkNPDay day (SyntaxEng.mkAdv on_Prep day) 
           (SyntaxEng.mkAdv on_Prep (mkNP a_Quant plNum (mkCN (mkN d)))) ;
   
-      mkCompoundPlace : Str -> Str -> Str -> {name : CN ; at : Prep ; to : Prep} = \comp, p, i ->
+      mkCompoundPlace : Str -> Str -> Str -> {name : CN ; at : Prep ; to : Prep; isPl : Bool} = \comp, p, i ->
        mkCNPlace (mkCN (P.mkN comp (mkN p))) (P.mkPrep i) to_Prep ;
   
-      mkPlace : Str -> Str -> {name : CN ; at : Prep ; to : Prep} = \p,i -> 
+      mkPlace : Str -> Str -> {name : CN ; at : Prep ; to : Prep; isPl : Bool} = \p,i -> 
         mkCNPlace (mkCN (mkN p)) (P.mkPrep i) to_Prep ;
   
       open_Adv = P.mkAdv "open" ;
diff --git a/examples/phrasebook/Makefile b/examples/phrasebook/Makefile
index f0dc1826d..4e36e2988 100644
--- a/examples/phrasebook/Makefile
+++ b/examples/phrasebook/Makefile
@@ -29,7 +29,7 @@ doc:
 	rm -f Ontology.gf
 	cat SentencesI.gf WordsEng.gf >Implementation.gf
 	gfdoc Implementation.gf
-	txt2tags -thtml phrasebook.txt
+	txt2tags -thtml --toc phrasebook.txt
 	rm -f Ontology.gf Implementation.gf
 
 upload:: Phrasebook.pgf
diff --git a/examples/phrasebook/Ontology.html b/examples/phrasebook/Ontology.html
index 0765ac4e0..48059049a 100644
--- a/examples/phrasebook/Ontology.html
+++ b/examples/phrasebook/Ontology.html
@@ -147,12 +147,16 @@ Determiners.
 Actions are typically language-dependent, not only lexically but also
 structurally. However, these ones are mostly functorial.
 <pre>
-      AHave     : Person -> Object      -> Action ;  -- you have pizzas
+      SHave       : Person -> Object      -> Sentence ;  -- you have beer
+      SHaveNo     : Person -> Kind        -> Sentence ;  -- you have no apples
+      SHaveNoMass : Person -> MassKind    -> Sentence ;  -- you have no beer
+      QDoHave     : Person -> Object      -> Question ;  -- do you have beer
+  
       AHaveCurr : Person -> Currency    -> Action ;  -- you have dollars
       ACitizen  : Person -> Citizenship -> Action ;  -- you are Swedish
       ABePlace  : Person -> Place       -> Action ;  -- you are in the bar
   
-      ByTransp : Transport -> ByTransport ;         -- by bus
+      ByTransp : Transport -> ByTransport ;          -- by bus
   
   }
 </pre>
diff --git a/examples/phrasebook/WordsFin.gf b/examples/phrasebook/WordsFin.gf
index 29494ccb2..0e4e7d14c 100644
--- a/examples/phrasebook/WordsFin.gf
+++ b/examples/phrasebook/WordsFin.gf
@@ -208,7 +208,9 @@ concrete WordsFin of Words = SentencesFin **
       mkQS (mkQCl (mkIP which_IDet trans.name) (mkVP (mkVP L.go_V) place.to)) ;
 
     IsTranspPlace trans place =
-      mkQS (mkQCl (E.AdvPredNP place.to L.go_V (E.PartCN (trans.name)))) ; 
+      mkQS (mkQCl (mkCl (mkVP (mkVP (mkVP (mkV "p��st�")) trans.by) place.to))) ;
+      -- p��seek� keskustaan bussilla
+      -- mkQS (mkQCl (E.AdvPredNP place.to L.go_V (E.PartCN (trans.name)))) ; 
       -- meneek� keskustaan bussia
 
 -- modifiers of places
diff --git a/examples/phrasebook/missing.txt b/examples/phrasebook/missing.txt
index e05b4c3c2..88a998dfb 100644
--- a/examples/phrasebook/missing.txt
+++ b/examples/phrasebook/missing.txt
@@ -11,5 +11,5 @@ PhrasebookIta :
 PhrasebookNor :
 PhrasebookPol :
 PhrasebookRon :
-PhrasebookSpa : GCongratulations GGoodLuck GHappyBirthday
+PhrasebookSpa :
 PhrasebookSwe :
diff --git a/examples/phrasebook/phrasebook.html b/examples/phrasebook/phrasebook.html
index fae61468a..2d36e5fc0 100644
--- a/examples/phrasebook/phrasebook.html
+++ b/examples/phrasebook/phrasebook.html
@@ -2,6 +2,7 @@
 <HTML>
 <HEAD>
 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
 <TITLE>MOLTO Multilingual Phrasebook</TITLE>
 </HEAD><BODY BGCOLOR="white" TEXT="black">
 <P ALIGN="center"><CENTER><H1>MOLTO Multilingual Phrasebook</H1>
@@ -10,6 +11,25 @@
 Showcase for project FP7-ICT-247914, Deliverable D10.2.
 </FONT></CENTER>
 
+<P></P>
+<HR NOSHADE SIZE=1>
+<P></P>
+  <UL>
+  <LI><A HREF="#toc1">Purpose</A>
+  <LI><A HREF="#toc2">Points illustrated</A>
+  <LI><A HREF="#toc3">Ontology</A>
+  <LI><A HREF="#toc4">Files</A>
+  <LI><A HREF="#toc5">To Do</A>
+  <LI><A HREF="#toc6">How to contribute</A>
+  <LI><A HREF="#toc7">Effort and cost</A>
+  <LI><A HREF="#toc8">Example-based grammar writing prototype</A>
+  <LI><A HREF="#toc9">Conclusions (tentative)</A>
+  <LI><A HREF="#toc10">Acknowledgements</A>
+  </UL>
+
+<P></P>
+<HR NOSHADE SIZE=1>
+<P></P>
 <P>
 <HR>
 <font size=-1>
@@ -18,6 +38,8 @@ Showcase for project FP7-ICT-247914, Deliverable D10.2.
 History
 </P>
 <UL>
+<LI>2 June. Version 1.0 released!
+<LI>29 May. Link to Google translate with the current language pair and phrase.
 <LI>27 May. Polish added.
 <LI>26 May. Version 0.9:
   Catalan added, mass/count noun distinction to reduce overgeneration, 
@@ -49,33 +71,34 @@ History
 </font>
 <HR>
 </P>
+<A NAME="toc1"></A>
 <H1>Purpose</H1>
 <P>
 This phrasebook is a program for translating touristic phrases 
-between the 15 European languages included in the 
+between 14 European languages included in the 
 <A HREF="http://www.molto-project.eu">MOLTO</A> project
 (Multilingual On-Line Translation):
 </P>
 <UL>
 <LI>Bulgarian, Catalan, Danish, Dutch, English,
   Finnish, French, German, Italian, Norwegian,
-  Polish, Romanian, Russian, Spanish, Swedish
+  Polish, Romanian, Spanish, Swedish
 </UL>
 
 <P>
 It is implemented by using the GF programming language 
 (<A HREF="http://grammaticalframework.org">Grammatical Framework</A>).
-It is the first demo for the MOLTO project, released in the third month (by June 2010)
-but to be updated in the course of the project.
+It is the first demo for the MOLTO project, released in the third month (by June 2010).
+The first version is a very small system, but it will extended in the course of the project.
 </P>
 <P>
-The phrasebook has the following requirements:
+The phrasebook has the following requirement specification:
 </P>
 <UL>
 <LI>high quality: reliable translations to express yourself in any language
 <LI>translation between all pairs of languages
 <LI>runnable in web browsers
-<LI>runnable on mobile phones (also off-line: forthcoming for Android phones)
+<LI>runnable on mobile phones (forthcoming: Android phones)
 <LI>easily extensible by new words (forthcoming: semi-automatic extensions by users)
 </UL>
 
@@ -84,39 +107,91 @@ The phrasebook is available as open-source software, licensed under GNU LGPL.
 The source code resides in 
 <A HREF="http://code.haskell.org/gf/examples/phrasebook/"><CODE>code.haskell.org/gf/examples/phrasebook/</CODE></A>
 </P>
+<A NAME="toc2"></A>
+<H1>Points illustrated</H1>
 <P>
-Current status (27 May 2010):
+Interlingua-based translation
 </P>
 <UL>
-<LI>small but useful coverage in abstract syntax
-<LI>reasonable implementations for all MOLTO languages except Russian
-<LI>works on web browsers calling a server
-<LI>web service not yet released, but preliminarily available in
-  <A HREF="http://www.grammaticalframework.org/demos/phrasebook/">http://www.grammaticalframework.org/demos/phrasebook/</A>
+<LI>we translate meanings, rather than words
 </UL>
 
-<H1>Points illustrated</H1>
 <P>
-Interlingua-based translation.
+Incremental parsing
 </P>
+<UL>
+<LI>the user is at every point guided by the list of possible next words
+</UL>
+
 <P>
-Incremental parsing.
+The use of resource grammars and functors
 </P>
+<UL>
+<LI>the translator was implemented on top of an earlier linguistic knowledge base,
+  the <A HREF="http://grammaticalframework.com/lib">GF Resource Grammar Library</A>
+</UL>
+
 <P>
-The use of resource grammars and functors.
+Example-based grammar writing and grammar induction from statistical models 
+(<A HREF="http://translate.google.com">Google translate</A>)
 </P>
+<UL>
+<LI>many of the grammars were created semi-automatically by generalization from
+  examples
+</UL>
+
 <P>
-Example-based grammar writing and grammar induction from statistical models (Google).
+Compile-time transfer: especially, in Action in Words
 </P>
+<UL>
+<LI>the structural differences between languages are treated at compile time,
+  for maximal run-time efficiency
+</UL>
+
 <P>
-Compile-time transfer: especially, in Action in Words.
+Quasi-incremental translation: many basic types are also used as phrases
 </P>
+<UL>
+<LI>one can translate both words and complete sentences, and get intermediate results
+</UL>
+
 <P>
-Quasi-incremental translation: many basic types are also used as phrases.
+Disambiguation, esp. of politeness distinctions
 </P>
+<UL>
+<LI>if a phrase has many translations, each of them is shown and given an explanation
+  (currently just in English, later in any source language)
+</UL>
+
 <P>
-Disambiguation, esp. of politeness distinctions.
+Fall-back to statistical translation
 </P>
+<UL>
+<LI>currently just a link to Google translate (forthcoming: tailor-made statistical models)
+</UL>
+
+<P>
+Feed-back from users
+</P>
+<UL>
+<LI>you are welcome to send comments, bug reports, and better translation suggestions!
+</UL>
+
+<P>
+The level of skills involved in grammar development
+</P>
+<UL>
+<LI>testing different configurations (see table below)
+</UL>
+
+<P>
+Grammar testing
+</P>
+<UL>
+<LI>use of treebanks with guided random generation for initial evaluation and regression testing
+</UL>
+
+<A NAME="toc3"></A>
 <H1>Ontology</H1>
 <P>
 The abstract syntax defines the <B>ontology</B> behind the phrasebook. 
@@ -128,6 +203,7 @@ and
 <A HREF="http://code.haskell.org/gf/examples/phrasebook/Words.gf"><CODE>Words.gf</CODE></A>
 by <CODE>make doc</CODE>.
 </P>
+<A NAME="toc4"></A>
 <H1>Files</H1>
 <P>
 <CODE>Sentences</CODE>: general syntactic structures implementable in a uniform way. 
@@ -164,18 +240,9 @@ Here is the module structure as produced in GF by
 <P>
 <IMG ALIGN="middle" SRC="pgraph.png" BORDER="0" ALT="">
 </P>
+<A NAME="toc5"></A>
 <H1>To Do</H1>
 <P>
-Improved translation interface
-</P>
-<UL>
-<LI>a nicer way to show disambiguation (maybe hidden by default)
-</UL>
-
-<P>
-Complete the missing words and phrases
-</P>
-<P>
 Disambiguation grammars for other languages than English
 </P>
 <P>
@@ -183,20 +250,15 @@ Extend the abstract lexicon in <CODE>Words</CODE> by hand or (semi)automatically
 </P>
 <UL>
 <LI>food stuff
-<LI>languages
 <LI>places
+<LI>actions
 </UL>
 
 <P>
-Link to Google translate, for fall-back and for comparison
-</P>
-<P>
-Feedback facility in the UI
-</P>
-<P>
-Customizable distribution: make your own selection of the 2^15 language subsets
+Customizable phone distribution: make your own selection of the 2^15 language subsets
 when downloading the phrasebook to a phone
 </P>
+<A NAME="toc6"></A>
 <H1>How to contribute</H1>
 <P>
 The basic things "everyone" can do is 
@@ -253,15 +315,337 @@ Here are the steps to follow for contributors:
 <LI>Don't compromise quality to gain coverage: <I>non multa sed multum!</I> 
 </UL>
 
-<H2>Acknowledgements</H2>
+<A NAME="toc7"></A>
+<H1>Effort and cost</H1>
+<TABLE BORDER="1" CELLPADDING="4">
+<TR>
+<TH>Language</TH>
+<TH>Grammarian's language skills</TH>
+<TH>Grammarian's GF skills</TH>
+<TH>Informant used for development</TH>
+<TH>Informant used for testing</TH>
+<TH>Use of external tools</TH>
+<TH>Impact of external tools</TH>
+<TH>Changes on the resource grammar</TH>
+<TH COLSPAN="2">Development time</TH>
+</TR>
+<TR>
+<TD>Bulgarian</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>Catalan</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">#</TD>
+</TR>
+<TR>
+<TD>Danish</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>Dutch</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>English</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">_</TD>
+<TD ALIGN="center">#</TD>
+</TR>
+<TR>
+<TD>Finnish</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>French</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">#</TD>
+</TR>
+<TR>
+<TD>German</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">###</TD>
+</TR>
+<TR>
+<TD>Italian</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>Norwegian</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>Polish</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>Romanian</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">###</TD>
+</TR>
+<TR>
+<TD>Spanish</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">#</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">_</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+<TR>
+<TD>Swedish</TD>
+<TD ALIGN="center">##</TD>
+<TD ALIGN="center">###</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">+</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">?</TD>
+<TD ALIGN="center">-</TD>
+<TD ALIGN="center">##</TD>
+</TR>
+</TABLE>
+
+<P>
+Explanation on scores
+</P>
+<UL>
+<LI>Grammarian's language skills  
+   <UL>
+   <LI>-   : no skills
+   <LI>#   : passive knowledge
+   <LI>##  : fluent non-native
+   <LI>### : native speaker
+   </UL>
+</UL>
+
+<UL>
+<LI>Grammarian's GF skills  
+   <UL>
+   <LI>-   : no skills
+   <LI>#   : basic skills (2-day GF tutorial)
+   <LI>##  : medium skills (previous experience of similar task)
+   <LI>### : advanced skills (resource grammar writer/substantial contributor)
+   </UL>
+</UL>
+
+<UL>
+<LI>Informant used for development/Informant needed for testing/Use of external tools 
+   <UL>
+   <LI>- : no
+   <LI>+ : yes
+   </UL>
+</UL>
+
+<UL>
+<LI>Impact of external tools 
+   <UL>
+   <LI>? : not investigated
+   <LI>- : no effect on the Phrasebook
+   <LI># : small impact (literal translation, simple idioms)
+   <LI>## : medium effect (translation of more forms of words, contextual preposition)
+   <LI>### : great effect (no extra work needed, translations are correct)
+   </UL>
+</UL>
+
+<UL>
+<LI>Changes on the resource grammars
+   <UL>
+   <LI>- : no changes
+   <LI># : 1-3 minor changes
+   <LI>## : 4-10 minor changes, 1-3 medium changes
+   <LI>### : &gt;10 changes of any kind  
+   </UL>
+</UL>
+
+<UL>
+<LI>Overall effort (including extra work on resource grammars)
+   <UL>
+   <LI># : less than 8 person hours
+   <LI>## : 8-24 person hours
+   <LI>### : &gt;24 person hours
+   </UL>
+</UL>
+
+<A NAME="toc8"></A>
+<H1>Example-based grammar writing prototype</H1>
+<P>
+The figure presents the process of creating a Phrasebook using an example-based 
+approach for the language X, where X = {Danish, Dutch, German, Norwegian}.
+</P>
+<P>
+<IMG ALIGN="middle" SRC="picpic.jpg" BORDER="0" ALT="">
+</P>
+<UL>
+<LI>the first step assumes an analysis of the resource grammar and extracts the necessary 
+  information that functions that build new lexical entries would need. 
+  A model is built so that the proper forms of the word can be rendered, 
+  and additional information, such as gender, can be inferred. The script applies 
+  these rules to each entry that we want to translate into the target language, and 
+  one obtains a set of constructions.
+<LI>they are furthermore given to an external translator tool (Google translate) 
+  or a native speaker for translation. One needs the configuration file even if the 
+  translator is human, because formal knowledge of grammar is not assumed.
+<LI>the translations into the target language are further more processed in order to 
+  build the linearizations of the categories first, decoding the information received. 
+  Furthermore, having the words in the lexicon, one can parse the translations of 
+  functions with the GF parser and generalize from that.
+<LI>the resulting grammar is tested with the aid of a script that generates 
+  constructions covering all the functions and categories from the grammar, along 
+  with some other constructions that proved to be problematic in some language. 
+  The result of the script contains for each construction in the target language 
+  its English correspondent and the abstract syntax tree. A native speaker 
+  evaluates the results and if corrections are needed, the algorithm runs again 
+  with the new examples. Depending on the language skills of the grammar writer, 
+  the changes can be made directly into the GF files, and the correct examples 
+  given by the native informant are just kept for validating the results. 
+  The algorithm is repeated as long as corrections are needed.
+</UL>
+
+<P>
+The time needed for preparing the configuration files for a grammar will not be needed 
+in the future, since the files are reusable for other applications.
+The time for the second step can be saved if automatic tools, like Google translate 
+are used. This is only possible in languages with a simpler morphology and syntax 
+and large corpora available.
+Good results were obtained for German and Dutch with Google translate, but for 
+languages like Romanian or Polish, which are both complex and lack enough resources, 
+the results are discouraging. 
+</P>
+<P>
+If the statistical oracle works well, the only step where the presence of a human 
+translator is needed is the evaluation and feedback step. An average of 4 hours per 
+round and 2 rounds were needed in average for the languages for which we performed 
+the experiment. It is possible that more effort is needed for more complex languages.
+</P>
+<A NAME="toc9"></A>
+<H1>Conclusions (tentative)</H1>
+<P>
+The grammarian need not be a native speaker of the language.
+</P>
+<P>
+For many languages, the grammarian need not even know the language - native informants are
+enough.
+</P>
+<P>
+However, evaluation by native speakers is necessary.
+</P>
+<P>
+Correct and idiomatic translations are possible.
+</P>
+<P>
+A typical development time was 2-3 person working days per language.
+</P>
+<P>
+Google translate helps in bootstrapping grammars, but must be checked.
+</P>
+<UL>
+<LI>in particular, unreliable for morphologically rich languages
+</UL>
+
+<P>
+Resource grammars should give some more support
+</P>
+<UL>
+<LI>higher-level access to constructions like negative expressions
+<LI>large-scale morphological lexica
+</UL>
+
+<A NAME="toc10"></A>
+<H1>Acknowledgements</H1>
 <P>
 The Phrasebook has been built in the MOLTO project funded by the European Commission.
 </P>
 <P>
 The authors are grateful to their native speaker informants helping to bootstrap and evaluate
-the grammars: Richard Bubel, Grégoire Détrez, Michal Palka, Willard Rafnsson,...
+the grammars: 
+Richard Bubel, 
+Grégoire Détrez, 
+Karin Keijzer, 
+Michał Pałka,
+Willard Rafnsson,
+Nick Smallbone.
 </P>
 
 <!-- html code generated by txt2tags 2.5 (http://txt2tags.sf.net) -->
-<!-- cmdline: txt2tags -thtml phrasebook.txt -->
+<!-- cmdline: txt2tags -thtml -\-toc phrasebook.txt -->
 </BODY></HTML>
diff --git a/examples/phrasebook/phrasebook.txt b/examples/phrasebook/phrasebook.txt
index 7226ae1b1..d7bfa162d 100644
--- a/examples/phrasebook/phrasebook.txt
+++ b/examples/phrasebook/phrasebook.txt
@@ -3,6 +3,8 @@ Krasimir Angelov, Olga Caprotti, Ramona Enache, Thomas Hallgren, Inari Listenmaa
 Showcase for project FP7-ICT-247914, Deliverable D10.2.
 
 
+%!Encoding:utf-8
+
 %!postproc(html): #HR <HR>
 %!postproc(html): #BSMALL <font size=-1>
 %!postproc(html): #ESMALL </font>
@@ -14,6 +16,8 @@ Showcase for project FP7-ICT-247914, Deliverable D10.2.
 #BSMALL
 
 History
+- 2 June. Version 1.0 released!
+- 29 May. Link to Google translate with the current language pair and phrase.
 - 27 May. Polish added.
 - 26 May. Version 0.9:
   Catalan added, mass/count noun distinction to reduce overgeneration, 
@@ -46,24 +50,24 @@ History
 =Purpose=
 
 This phrasebook is a program for translating touristic phrases 
-between the 15 European languages included in the 
+between 14 European languages included in the 
 [MOLTO http://www.molto-project.eu] project
 (Multilingual On-Line Translation):
 - Bulgarian, Catalan, Danish, Dutch, English,
   Finnish, French, German, Italian, Norwegian,
-  Polish, Romanian, Russian, Spanish, Swedish
+  Polish, Romanian, Spanish, Swedish
 
 
 It is implemented by using the GF programming language 
 ([Grammatical Framework http://grammaticalframework.org]).
-It is the first demo for the MOLTO project, released in the third month (by June 2010)
-but to be updated in the course of the project.
+It is the first demo for the MOLTO project, released in the third month (by June 2010).
+The first version is a very small system, but it will extended in the course of the project.
 
-The phrasebook has the following requirements:
+The phrasebook has the following requirement specification:
 - high quality: reliable translations to express yourself in any language
 - translation between all pairs of languages
 - runnable in web browsers
-- runnable on mobile phones (also off-line: forthcoming for Android phones)
+- runnable on mobile phones (forthcoming: Android phones)
 - easily extensible by new words (forthcoming: semi-automatic extensions by users)
 
 
@@ -72,30 +76,57 @@ The source code resides in
 [``code.haskell.org/gf/examples/phrasebook/`` http://code.haskell.org/gf/examples/phrasebook/]
 
 
-Current status (27 May 2010):
-- small but useful coverage in abstract syntax
-- reasonable implementations for all MOLTO languages except Russian
-- works on web browsers calling a server
-- web service not yet released, but preliminarily available in
-  http://www.grammaticalframework.org/demos/phrasebook/
 
+=Points illustrated=
+
+Interlingua-based translation
+- we translate meanings, rather than words
 
 
-=Points illustrated=
+Incremental parsing
+- the user is at every point guided by the list of possible next words
+
+
+The use of resource grammars and functors
+- the translator was implemented on top of an earlier linguistic knowledge base,
+  the [GF Resource Grammar Library http://grammaticalframework.com/lib]
+
+
+Example-based grammar writing and grammar induction from statistical models 
+([Google translate http://translate.google.com])
+- many of the grammars were created semi-automatically by generalization from
+  examples
+
+
+Compile-time transfer: especially, in Action in Words
+- the structural differences between languages are treated at compile time,
+  for maximal run-time efficiency
+
+
+Quasi-incremental translation: many basic types are also used as phrases
+- one can translate both words and complete sentences, and get intermediate results
+
+
+Disambiguation, esp. of politeness distinctions
+- if a phrase has many translations, each of them is shown and given an explanation
+  (currently just in English, later in any source language)
+
 
-Interlingua-based translation.
+Fall-back to statistical translation
+- currently just a link to Google translate (forthcoming: tailor-made statistical models)
 
-Incremental parsing.
 
-The use of resource grammars and functors.
+Feed-back from users
+- you are welcome to send comments, bug reports, and better translation suggestions!
 
-Example-based grammar writing and grammar induction from statistical models (Google).
 
-Compile-time transfer: especially, in Action in Words.
+The level of skills involved in grammar development
+- testing different configurations (see table below)
 
-Quasi-incremental translation: many basic types are also used as phrases.
 
-Disambiguation, esp. of politeness distinctions.
+Grammar testing
+- use of treebanks with guided random generation for initial evaluation and regression testing
+
 
 
 
@@ -146,25 +177,15 @@ Here is the module structure as produced in GF by
 
 =To Do=
 
-Improved translation interface
-- a nicer way to show disambiguation (maybe hidden by default)
-
-
-Complete the missing words and phrases
-
 Disambiguation grammars for other languages than English
 
 Extend the abstract lexicon in ``Words`` by hand or (semi)automatically for
 - food stuff
-- languages
 - places
+- actions
 
 
-Link to Google translate, for fall-back and for comparison
-
-Feedback facility in the UI
-
-Customizable distribution: make your own selection of the 2^15 language subsets
+Customizable phone distribution: make your own selection of the 2^15 language subsets
 when downloading the phrasebook to a phone
 
 
@@ -214,10 +235,151 @@ Here are the steps to follow for contributors:
 - Don't compromise quality to gain coverage: //non multa sed multum!// 
 
 
-==Acknowledgements==
+
+=Effort and cost=
+
+|| Language | Grammarian's language skills  | Grammarian's GF skills | Informant used for development | Informant used for testing | Use of external tools | Impact of external tools | Changes on the resource grammar | Development time || 
+| Bulgarian |   ###    |    ###  |    -  |  -  |   -   |   ?   |  #   |   ##  |
+| Catalan   |   ###    |    ###  |    -  |  -  |   -   |   ?   |  #   |   #   |
+| Danish    |   -      |    ###  |    +  |  +  |   +   |   ##  |  ##  |   ##  |
+| Dutch     |   -      |    ###  |    +  |  +  |   +   |   ##  |  #   |   ##  |
+| English   |   ##     |    ###  |    -  |  +  |   -   |   -   |  _   |   #   |
+| Finnish   |   ###    |    ###  |    -  |  -  |   -   |   ?   |  #   |   ##  |
+| French    |   ##     |    ###  |    -  |  +  |   -   |   ?   |  #   |   #   |
+| German    |   #      |    ###  |    +  |  +  |   +   |   ##  |  ##  |   ###  |
+| Italian   |   ###    |    #    |    -  |  -  |   -   |   ?   |  ##  |   ##  |
+| Norwegian |   #      |    ###  |    +  |  -  |   +   |   ##  |  #   |   ##  |
+| Polish    |   ###    |    ###  |    +  |  +  |   +   |   #   |  #   |   ##  |
+| Romanian  |   ###    |    ###  |    -  |  -  |   +   |   #   |  ###  |   ###  |
+| Spanish   |   ##     |    #    |    -  |  -  |   -   |   ?   |  _   |   ##   |
+| Swedish   |   ##     |    ###  |    -  |  +  |   -   |   ?   |  -   |   ##  |
+
+
+Explanation on scores
+
+- Grammarian's language skills  
+   - -   : no skills
+   - #   : passive knowledge
+   - ##  : fluent non-native
+   - ### : native speaker
+
+
+- Grammarian's GF skills  
+   - -   : no skills
+   - #   : basic skills (2-day GF tutorial)
+   - ##  : medium skills (previous experience of similar task)
+   - ### : advanced skills (resource grammar writer/substantial contributor)
+
+
+- Informant used for development/Informant needed for testing/Use of external tools 
+   - - : no
+   - + : yes
+
+
+- Impact of external tools 
+   - ? : not investigated
+   - - : no effect on the Phrasebook
+   - # : small impact (literal translation, simple idioms)
+   - ## : medium effect (translation of more forms of words, contextual preposition)
+   - ### : great effect (no extra work needed, translations are correct)
+
+
+- Changes on the resource grammars
+   - - : no changes
+   - # : 1-3 minor changes
+   - ## : 4-10 minor changes, 1-3 medium changes
+   - ### : >10 changes of any kind  
+
+
+- Overall effort (including extra work on resource grammars)
+   - # : less than 8 person hours
+   - ## : 8-24 person hours
+   - ### : >24 person hours
+ 
+
+=Example-based grammar writing prototype=
+
+The figure presents the process of creating a Phrasebook using an example-based 
+approach for the language X, where X = {Danish, Dutch, German, Norwegian}.
+
+[picpic.jpg]
+
+- the first step assumes an analysis of the resource grammar and extracts the necessary 
+  information that functions that build new lexical entries would need. 
+  A model is built so that the proper forms of the word can be rendered, 
+  and additional information, such as gender, can be inferred. The script applies 
+  these rules to each entry that we want to translate into the target language, and 
+  one obtains a set of constructions.
+- they are furthermore given to an external translator tool (Google translate) 
+  or a native speaker for translation. One needs the configuration file even if the 
+  translator is human, because formal knowledge of grammar is not assumed.
+- the translations into the target language are further more processed in order to 
+  build the linearizations of the categories first, decoding the information received. 
+  Furthermore, having the words in the lexicon, one can parse the translations of 
+  functions with the GF parser and generalize from that.
+- the resulting grammar is tested with the aid of a script that generates 
+  constructions covering all the functions and categories from the grammar, along 
+  with some other constructions that proved to be problematic in some language. 
+  The result of the script contains for each construction in the target language 
+  its English correspondent and the abstract syntax tree. A native speaker 
+  evaluates the results and if corrections are needed, the algorithm runs again 
+  with the new examples. Depending on the language skills of the grammar writer, 
+  the changes can be made directly into the GF files, and the correct examples 
+  given by the native informant are just kept for validating the results. 
+  The algorithm is repeated as long as corrections are needed.
+
+
+The time needed for preparing the configuration files for a grammar will not be needed 
+in the future, since the files are reusable for other applications.
+The time for the second step can be saved if automatic tools, like Google translate 
+are used. This is only possible in languages with a simpler morphology and syntax 
+and large corpora available.
+Good results were obtained for German and Dutch with Google translate, but for 
+languages like Romanian or Polish, which are both complex and lack enough resources, 
+the results are discouraging. 
+
+If the statistical oracle works well, the only step where the presence of a human 
+translator is needed is the evaluation and feedback step. An average of 4 hours per 
+round and 2 rounds were needed in average for the languages for which we performed 
+the experiment. It is possible that more effort is needed for more complex languages.
+
+
+=Conclusions (tentative)=
+
+The grammarian need not be a native speaker of the language.
+
+For many languages, the grammarian need not even know the language - native informants are
+enough.
+
+However, evaluation by native speakers is necessary.
+
+Correct and idiomatic translations are possible.
+
+A typical development time was 2-3 person working days per language.
+
+Google translate helps in bootstrapping grammars, but must be checked.
+- in particular, unreliable for morphologically rich languages
+
+
+Resource grammars should give some more support
+- higher-level access to constructions like negative expressions
+- large-scale morphological lexica
+
+
+
+
+
+
+=Acknowledgements=
 
 The Phrasebook has been built in the MOLTO project funded by the European Commission.
 
 The authors are grateful to their native speaker informants helping to bootstrap and evaluate
-the grammars: Richard Bubel, Grégoire Détrez, Michal Palka, Willard Rafnsson,...
+the grammars: 
+Richard Bubel, 
+Grégoire Détrez, 
+Karin Keijzer, 
+Michał Pałka,
+Willard Rafnsson,
+Nick Smallbone.
 
diff --git a/examples/phrasebook/picpic.jpg b/examples/phrasebook/picpic.jpg
new file mode 100644
index 000000000..aac20b611
--- /dev/null
+++ b/examples/phrasebook/picpic.jpg
author	aarne <aarne@chalmers.se>	2010-06-01 22:48:43 +0000
committer	aarne <aarne@chalmers.se>	2010-06-01 22:48:43 +0000
commit	b3c302ca6fa99abaa5cbc3ed69f138aecc9d7e98 (patch)
tree	219cec765f861782b3d67db699ab7227b59cc3a5 /examples/phrasebook
parent	83015a80184e4b2b1e34a4a7cd1b3832ec680d35 (diff)