diff options
Diffstat (limited to 'doc')
| -rw-r--r-- | doc/2341.html | 259 | ||||
| -rw-r--r-- | doc/DocGF.pdf | bin | 56906 -> 0 bytes | |||
| -rw-r--r-- | doc/DocGF.tex | 569 | ||||
| -rw-r--r-- | doc/German.png | bin | 21000 -> 0 bytes | |||
| -rw-r--r-- | doc/Grammar.dot | 75 | ||||
| -rw-r--r-- | doc/Grammar.png | bin | 78790 -> 0 bytes | |||
| -rw-r--r-- | doc/Resource-HOWTO.html | 967 | ||||
| -rw-r--r-- | doc/Resource-HOWTO.txt | 827 | ||||
| -rw-r--r-- | doc/Syntax.png | bin | 104604 -> 0 bytes | |||
| -rw-r--r-- | doc/TODO | 231 | ||||
| -rw-r--r-- | doc/compiling-gf.txt | 750 | ||||
| -rw-r--r-- | doc/eu-langs.dot | 79 | ||||
| -rw-r--r-- | doc/eu-langs.png | bin | 85484 -> 0 bytes | |||
| -rw-r--r-- | doc/food-translet.png | bin | 22916 -> 0 bytes | |||
| -rw-r--r-- | doc/food1.png | bin | 22805 -> 0 bytes | |||
| -rw-r--r-- | doc/food2.png | bin | 31506 -> 0 bytes | |||
| -rw-r--r-- | doc/gf-compiler.dot | 88 | ||||
| -rw-r--r-- | doc/gf-compiler.png | bin | 27451 -> 0 bytes | |||
| -rw-r--r-- | doc/gf-formalism.html | 350 | ||||
| -rw-r--r-- | doc/gf-formalism.txt | 279 | ||||
| -rw-r--r-- | doc/gf-ideas.html | 311 | ||||
| -rw-r--r-- | doc/gf-ideas.txt | 231 | ||||
| -rw-r--r-- | doc/gf-people.html | 27 | ||||
| -rw-r--r-- | doc/gf-quickstart.html | 42 | ||||
| -rw-r--r-- | doc/gf-refman.html | 2 | ||||
| -rw-r--r-- | doc/gf-statistics.txt | 289 | ||||
| -rw-r--r-- | doc/gf-summerschool.txt | 533 | ||||
| -rw-r--r-- | doc/gf3-release.html | 73 | ||||
| -rw-r--r-- | doc/gf3-release.txt | 58 | ||||
| -rw-r--r-- | doc/index.html | 155 | ||||
| -rw-r--r-- | doc/school-langs.dot | 106 | ||||
| -rw-r--r-- | doc/school-langs.png | bin | 131704 -> 0 bytes | |||
| -rw-r--r-- | doc/summer-align.png | bin | 449911 -> 0 bytes | |||
| -rw-r--r-- | doc/summer-langs.png | bin | 1885485 -> 0 bytes | |||
| -rw-r--r-- | doc/tutorial/10lang-small.png (renamed from doc/10lang-small.png) | bin | 66840 -> 66840 bytes | |||
| -rw-r--r-- | doc/tutorial/categories.png (renamed from doc/categories.png) | bin | 4241 -> 4241 bytes | |||
| -rw-r--r-- | doc/tutorial/food-js.png (renamed from doc/food-js.png) | bin | 19002 -> 19002 bytes | |||
| -rw-r--r-- | doc/tutorial/food-magnet.png (renamed from doc/food-magnet.png) | bin | 98845 -> 98845 bytes | |||
| -rw-r--r-- | doc/tutorial/foodmarket.png (renamed from doc/foodmarket.png) | bin | 2099 -> 2099 bytes | |||
| -rw-r--r-- | doc/tutorial/gf-tutorial.html (renamed from doc/gf-tutorial.html) | 461 | ||||
| -rw-r--r-- | doc/tutorial/gf-tutorial.txt (renamed from doc/gf-tutorial.txt) | 42 | ||||
| -rw-r--r-- | doc/tutorial/iphone.jpg (renamed from doc/iphone.jpg) | bin | 17150 -> 17150 bytes | |||
| -rw-r--r-- | doc/tutorial/mytree.png (renamed from doc/mytree.png) | bin | 2230 -> 2230 bytes | |||
| -rw-r--r-- | doc/vr.html | 46 | ||||
| -rw-r--r-- | doc/vr.txt | 32 |
45 files changed, 76 insertions, 6806 deletions
diff --git a/doc/2341.html b/doc/2341.html deleted file mode 100644 index ff3e9644d..000000000 --- a/doc/2341.html +++ /dev/null @@ -1,259 +0,0 @@ -<html> -<HEAD><META http-equiv=Content-Type content="text/html; charset=utf-8"></HEAD> -<body> -af_tunni : lámma kún síddi? boqól afartón i ków - -<p> -albanian : dy mijë tre qind e dyzet e një - -<p> -amharic : ሁለት ሺህ ሦስት መቶ ኣርባ ኣንድ - -<p> -arabic_classical : الفان و ثلاث مائة و واحد و أربعون - -<p> -arabic_modern : ﺍﻟﻔﻴﻦ ﻭ ﺛﻼﺛﻤﺎﺋﺔ ﻭ ﻭﺍﺣﺪ ﻭ ﺃﺭﺑﻌﻴﻦ - -<p> -basque : bi mila ta hirurehun berrogei ta bat - -<p> -bearlake_slave : nákee lamíl tai lak'o, óno, di,i, honéno, ?ó, l-ée - -<p> -bulgarian : две жиляди триста четирисет и едно - -<p> -catalan : dos mil tres-cents quaranta - u - -<p> -chinese : è´° ä» é¶ å ä½° è æ¾ 壹 - -<p> -croatian : dva hiljade tri stotine četrdeset i jedan - -<p> -czech : dva tisíce tr^i sta čtyr^icet jeden - -<p> -dagur : hoire miange guarebe jau duci neke - -<p> -danish : to tusind og tre hundrede og en og fyrre - -<p> -decimal : 2341 - -<p> -dutch : twee duizend drie honderd een en veertig - -<p> -english : two thousand three hundred and forty - one - -<p> -finnish : kaksi tuhatta kolme sataa neljä kymmentä yksi - -<p> -french : deux mille trois cent quarante et un - -<p> -french_swiss : deux mille trois cent quarante et un - -<p> -fulfulde : ujine d.id.i temed.d.e tati e chappand.e nai e go'o - -<p> -geez : ዕሽራ ወ ሠላስቱ ምእት አርብዓ ወ አሐዱ - -<p> -german : zwei tausend drei hundert ein und vierzig - -<p> -greek_classical : δισχίλιοι τριακόσιοι τετταράκοντα εἵς - -<p> -greek_modern : δύο χιλιάδες τριακόσια σαράντα ένα - -<p> -guahibo : aniha sunu akueya sia yana bae kae - -<p> -guarani : moko~i ma mpohapy sa~ irundy kua~ petei~ - -<p> -hebrew_biblical : אלפים ו שלש מאות ו ארבעים ו אחד - -<p> -hindi : दो हज़ार तीन सौ एक्तालीस - -<p> -hungarian : két ezer három száz negyven egy - -<p> -icelandic : tvö Þúsund Þrjú hundrað fjörutíu og einn - -<p> -irish : dhá mhíle trí chead dhá fhichead a haon - -<p> -italian : due mila tre cento quaranta uno - -<p> -japanese : にせん さんびゃく よんぢゅう いち - -<p> -kabardian : m&yn&yt' s'a&ys' p'L-'&s'ra z&ra - -<p> -kambera : dua riu tailu ngahu patu kambulu hau - -<p> -kawaiisu : N -<p> -khmer : bīra bā'na pī raya sē sipa mwya - -<p> -khowar : joo hazâr troi shọr oché joo bîsher î - -<p> -kodagu : i:ra:yrat mu:nu:yt.a na:padï - -<p> -kolyma_yukaghir : N -<p> -kulung : ni habau su chhum lik i - -<p> -kwami : dùbúk póllów dálmágí kúnún kán kúu pòD^òw kán múndí - -<p> -kwaza : N -<p> -lalo : `n. t'w sa há i tjhí tjh`& - -<p> -lamani : di hajaar do se caaLise par ek - -<p> -latvian : divtu^kstoš trīssimt četrdesmit viens - -<p> -lithuanian : dù tú:kstanc^iu, try:s s^imtai~ ke:turiasdes^imt víenas - -<p> -lotuxo : tausand ârrexai ikO EssIxa xunixoi ikO atOmwana aNwan x' âbotye - -<p> -maale : lam?ó $íya haitsó s'ééta ?oydí-támmi pétte - -<p> -malay : dua ribu tiga ratus empat puluh satu - -<p> -maltese : elfejn tliet mija u wieh-ed u erbgh-in - -<p> -mapuche : epu warangka külá pataka meli mari kiñe - -<p> -margi : dúbú s`&d>àN ghàrú mák`&r agá fód>ú kùmì gà s'&r pátlú* - -<p> -maybrat : N -<p> -miya : d'&bu ts`&r '`&náa d>àriy kìdi '`&náa díb>i f`&d>& bèh&n wut'& - -<p> -mongolian : qoyar mingGan Gurban ĵa'un döčin nigän - -<p> -nenets : side juonar n-ahar jur t-êt ju' ~ob - -<p> -norwegian_book : to tusen og tre hundre og førti et - -<p> -old_church_slavonic : дъвѣ тысѭшти триѥ съта четыре десѧте и ѥдинъ - -<p> -oromo : kuma lama fi dhibba sadii fi afurtamii tokko - -<p> -pashto : دوه زره دري سوه او يو څلوۍښت - -<p> -polish : dwa tysiace trzysta czterdziesci jeden - -<p> -portuguese : dois mil trezentos quarenta e um - -<p> -quechua : iskay warank'a kinsa pachak tawa chunka jukniyuq - -<p> -romanian : două mii trei sute patruzeci şi unu - -<p> -russian : две тысячи триста сорок один - -<p> -sango : ngbangbu bale óse na ndó ní ngbangbu otá na ndó ní bale osió na ndó ní ÓkO - -<p> -sanskrit : त्रि शतान्य एकचत्वारिंशच च द्वे सहस्रे - -<p> -slovak : dva tisic tri sto styridsat jedna - -<p> -sorani : دۇ ههزار سىسهد ځل و يهك - -<p> -spanish : dos mil trescientos cuarenta y uno - -<p> -stieng : baar ban pê riêng puôn jo't muôi - -<p> -swahili : elfu mbili mia tatu arobaini na moja - -<p> -swedish : två tusen tre hundra fyrtio ett - -<p> -tamil : இரணௌடௌ ஆயாரதௌதீ மீனௌ நரீ நரௌ பதௌ ஓனௌரீ - -<p> -tampere : kaks tuhatta kolme sataa nel kyt yks - -<p> -tibetan : t̆ong ṭ'a' n̆yī d́ang sumğya d́ang z̆hyib chu źhye chi' - -<p> -totonac : maa t~u3 mil lii ~a tuhun pus^um tun - -<p> -tuda_daza : dubu cu sao kidra ago.zo. sao mOrta tozo sao tro - -<p> -tukang_besi : dua riwu tolu hatu hato hulu sa'asa - -<p> -turkish : iki bin üç yüz kırk bir - -<p> -votic : kahsi tuhatta keVmsata: nelläts^ümmet ühsi - -<p> -welsh : dau fil tri chan un a deugain - -<p> -yasin_burushaski : altó hazár iskí tha altó-áltar hek - -<p> -zaiwa : i55 hing55 sum11 syo31 mi11 cue31 ra11 - -</body> -</html> - diff --git a/doc/DocGF.pdf b/doc/DocGF.pdf Binary files differdeleted file mode 100644 index 27e4262db..000000000 --- a/doc/DocGF.pdf +++ /dev/null diff --git a/doc/DocGF.tex b/doc/DocGF.tex deleted file mode 100644 index 6388d3548..000000000 --- a/doc/DocGF.tex +++ /dev/null @@ -1,569 +0,0 @@ -\batchmode -%This Latex file is machine-generated by the BNF-converter - -\documentclass[a4paper,11pt]{article} -\author{BNF-converter} -\title{The Language GF} -\setlength{\parindent}{0mm} -\setlength{\parskip}{1mm} -\begin{document} - -\maketitle - -\newcommand{\emptyP}{\mbox{$\epsilon$}} -\newcommand{\terminal}[1]{\mbox{{\texttt {#1}}}} -\newcommand{\nonterminal}[1]{\mbox{$\langle \mbox{{\sl #1 }} \! \rangle$}} -\newcommand{\arrow}{\mbox{::=}} -\newcommand{\delimit}{\mbox{$|$}} -\newcommand{\reserved}[1]{\mbox{{\texttt {#1}}}} -\newcommand{\literal}[1]{\mbox{{\texttt {#1}}}} -\newcommand{\symb}[1]{\mbox{{\texttt {#1}}}} - -This document was automatically generated by the {\em BNF-Converter}. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place). - -\section*{The lexical structure of GF} -\subsection*{Identifiers} -Identifiers \nonterminal{Ident} are unquoted strings beginning with a letter, -followed by any combination of letters, digits, and the characters {\tt \_ '}, -reserved words excluded. - - -\subsection*{Literals} -Integer literals \nonterminal{Int}\ are nonempty sequences of digits. - - -String literals \nonterminal{String}\ have the form -\terminal{"}$x$\terminal{"}, where $x$ is any sequence of any characters -except \terminal{"}\ unless preceded by \verb6\6. - - - - -LString literals are recognized by the regular expression -\(\mbox{`''} ({\nonterminal{anychar}} - \mbox{`''})* \mbox{`''}\) - - -\subsection*{Reserved words and symbols} -The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions. - -The reserved words used in GF are the following: \\ - -\begin{tabular}{lll} -{\reserved{Lin}} &{\reserved{PType}} &{\reserved{Str}} \\ -{\reserved{Strs}} &{\reserved{Tok}} &{\reserved{Type}} \\ -{\reserved{abstract}} &{\reserved{case}} &{\reserved{cat}} \\ -{\reserved{concrete}} &{\reserved{data}} &{\reserved{def}} \\ -{\reserved{flags}} &{\reserved{fn}} &{\reserved{fun}} \\ -{\reserved{grammar}} &{\reserved{in}} &{\reserved{include}} \\ -{\reserved{incomplete}} &{\reserved{instance}} &{\reserved{interface}} \\ -{\reserved{let}} &{\reserved{lin}} &{\reserved{lincat}} \\ -{\reserved{lindef}} &{\reserved{lintype}} &{\reserved{of}} \\ -{\reserved{open}} &{\reserved{oper}} &{\reserved{out}} \\ -{\reserved{package}} &{\reserved{param}} &{\reserved{pattern}} \\ -{\reserved{pre}} &{\reserved{printname}} &{\reserved{resource}} \\ -{\reserved{reuse}} &{\reserved{strs}} &{\reserved{table}} \\ -{\reserved{tokenizer}} &{\reserved{transfer}} &{\reserved{union}} \\ -{\reserved{var}} &{\reserved{variants}} &{\reserved{where}} \\ -{\reserved{with}} & & \\ -\end{tabular}\\ - -The symbols used in GF are the following: \\ - -\begin{tabular}{lll} -{\symb{;}} &{\symb{{$=$}}} &{\symb{\{}} \\ -{\symb{\}}} &{\symb{(}} &{\symb{)}} \\ -{\symb{:}} &{\symb{{$-$}{$>$}}} &{\symb{**}} \\ -{\symb{,}} &{\symb{[}} &{\symb{]}} \\ -{\symb{.}} &{\symb{{$|$}}} &{\symb{\%}} \\ -{\symb{?}} &{\symb{{$<$}}} &{\symb{{$>$}}} \\ -{\symb{@}} &{\symb{!}} &{\symb{*}} \\ -{\symb{$\backslash$}} &{\symb{{$=$}{$>$}}} &{\symb{{$+$}{$+$}}} \\ -{\symb{{$+$}}} &{\symb{\_}} &{\symb{\$}} \\ -{\symb{/}} &{\symb{{$-$}}} & \\ -\end{tabular}\\ - -\subsection*{Comments} -Single-line comments begin with {\symb{{$-$}{$-$}}}. \\Multiple-line comments are enclosed with {\symb{\{{$-$}}} and {\symb{{$-$}\}}}. - -\section*{The syntactic structure of GF} -Non-terminals are enclosed between $\langle$ and $\rangle$. -The symbols {\arrow} (production), {\delimit} (union) -and {\emptyP} (empty rule) belong to the BNF notation. -All other symbols are terminals.\\ - -\begin{tabular}{lll} -{\nonterminal{Grammar}} & {\arrow} &{\nonterminal{ListModDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListModDef}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{ModDef}} {\nonterminal{ListModDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ModDef}} & {\arrow} &{\nonterminal{ModDef}} {\terminal{;}} \\ - & {\delimit} &{\terminal{grammar}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\terminal{abstract}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{;}} {\nonterminal{ListConcSpec}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{ComplMod}} {\nonterminal{ModType}} {\terminal{{$=$}}} {\nonterminal{ModBody}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ConcSpec}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ConcExp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListConcSpec}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{ConcSpec}} \\ - & {\delimit} &{\nonterminal{ConcSpec}} {\terminal{;}} {\nonterminal{ListConcSpec}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ConcExp}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListTransfer}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListTransfer}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Transfer}} {\nonterminal{ListTransfer}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Transfer}} & {\arrow} &{\terminal{(}} {\terminal{transfer}} {\terminal{in}} {\nonterminal{Open}} {\terminal{)}} \\ - & {\delimit} &{\terminal{(}} {\terminal{transfer}} {\terminal{out}} {\nonterminal{Open}} {\terminal{)}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ModType}} & {\arrow} &{\terminal{abstract}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{resource}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{interface}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{concrete}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{instance}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{transfer}} {\nonterminal{Ident}} {\terminal{:}} {\nonterminal{Open}} {\terminal{{$-$}{$>$}}} {\nonterminal{Open}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ModBody}} & {\arrow} &{\nonterminal{Extend}} {\nonterminal{Opens}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\ - & {\delimit} &{\nonterminal{ListIdent}} {\terminal{**}} {\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\ - & {\delimit} &{\terminal{reuse}} {\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{union}} {\nonterminal{ListIncluded}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListTopDef}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{TopDef}} {\nonterminal{ListTopDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Extend}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{**}} \\ - & {\delimit} &{\emptyP} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListOpen}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Open}} \\ - & {\delimit} &{\nonterminal{Open}} {\terminal{,}} {\nonterminal{ListOpen}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Opens}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{open}} {\nonterminal{ListOpen}} {\terminal{in}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Open}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{)}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{)}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ComplMod}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{incomplete}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{QualOpen}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{incomplete}} \\ - & {\delimit} &{\terminal{interface}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListIncluded}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Included}} \\ - & {\delimit} &{\nonterminal{Included}} {\terminal{,}} {\nonterminal{ListIncluded}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Included}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{[}} {\nonterminal{ListIdent}} {\terminal{]}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Def}} & {\arrow} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Name}} {\nonterminal{ListPatt}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{TopDef}} & {\arrow} &{\terminal{cat}} {\nonterminal{ListCatDef}} \\ - & {\delimit} &{\terminal{fun}} {\nonterminal{ListFunDef}} \\ - & {\delimit} &{\terminal{data}} {\nonterminal{ListFunDef}} \\ - & {\delimit} &{\terminal{def}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{data}} {\nonterminal{ListDataDef}} \\ - & {\delimit} &{\terminal{transfer}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{param}} {\nonterminal{ListParDef}} \\ - & {\delimit} &{\terminal{oper}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{lincat}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{lindef}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{lin}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{printname}} {\terminal{cat}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{printname}} {\terminal{fun}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{flags}} {\nonterminal{ListFlagDef}} \\ - & {\delimit} &{\terminal{printname}} {\nonterminal{ListPrintDef}} \\ - & {\delimit} &{\terminal{lintype}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{pattern}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{package}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} {\terminal{;}} \\ - & {\delimit} &{\terminal{var}} {\nonterminal{ListDef}} \\ - & {\delimit} &{\terminal{tokenizer}} {\nonterminal{Ident}} {\terminal{;}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{CatDef}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} {\terminal{\{}} {\nonterminal{Integer}} {\terminal{\}}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{FunDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{DataDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListDataConstr}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{DataConstr}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDataConstr}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{DataConstr}} \\ - & {\delimit} &{\nonterminal{DataConstr}} {\terminal{{$|$}}} {\nonterminal{ListDataConstr}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ParDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListParConstr}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ParConstr}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PrintDef}} & {\arrow} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{FlagDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDef}} & {\arrow} &{\nonterminal{Def}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{Def}} {\terminal{;}} {\nonterminal{ListDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListCatDef}} & {\arrow} &{\nonterminal{CatDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{CatDef}} {\terminal{;}} {\nonterminal{ListCatDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListFunDef}} & {\arrow} &{\nonterminal{FunDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{FunDef}} {\terminal{;}} {\nonterminal{ListFunDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDataDef}} & {\arrow} &{\nonterminal{DataDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{DataDef}} {\terminal{;}} {\nonterminal{ListDataDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListParDef}} & {\arrow} &{\nonterminal{ParDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{ParDef}} {\terminal{;}} {\nonterminal{ListParDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPrintDef}} & {\arrow} &{\nonterminal{PrintDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{PrintDef}} {\terminal{;}} {\nonterminal{ListPrintDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListFlagDef}} & {\arrow} &{\nonterminal{FlagDef}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{FlagDef}} {\terminal{;}} {\nonterminal{ListFlagDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListParConstr}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{ParConstr}} \\ - & {\delimit} &{\nonterminal{ParConstr}} {\terminal{{$|$}}} {\nonterminal{ListParConstr}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListIdent}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{,}} {\nonterminal{ListIdent}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Name}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\terminal{]}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListName}} & {\arrow} &{\nonterminal{Name}} \\ - & {\delimit} &{\nonterminal{Name}} {\terminal{,}} {\nonterminal{ListName}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{LocDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListLocDef}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{LocDef}} \\ - & {\delimit} &{\nonterminal{LocDef}} {\terminal{;}} {\nonterminal{ListLocDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp4}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{\%}} \\ - & {\delimit} &{\nonterminal{Sort}} \\ - & {\delimit} &{\nonterminal{String}} \\ - & {\delimit} &{\nonterminal{Integer}} \\ - & {\delimit} &{\terminal{?}} \\ - & {\delimit} &{\terminal{[}} {\terminal{]}} \\ - & {\delimit} &{\terminal{data}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{Exps}} {\terminal{]}} \\ - & {\delimit} &{\terminal{[}} {\nonterminal{String}} {\terminal{]}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListTupleComp}} {\terminal{{$>$}}} \\ - & {\delimit} &{\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\ - & {\delimit} &{\terminal{{$<$}}} {\nonterminal{Exp}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$>$}}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{Exp}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{LString}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp3}} & {\arrow} &{\nonterminal{Exp3}} {\terminal{.}} {\nonterminal{Label}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\%}} \\ - & {\delimit} &{\nonterminal{Exp4}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp2}} & {\arrow} &{\nonterminal{Exp2}} {\nonterminal{Exp3}} \\ - & {\delimit} &{\terminal{table}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{[}} {\nonterminal{ListExp}} {\terminal{]}} \\ - & {\delimit} &{\terminal{case}} {\nonterminal{Exp}} {\terminal{of}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{variants}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{pre}} {\terminal{\{}} {\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListAltern}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{strs}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{@}} {\nonterminal{Exp4}} \\ - & {\delimit} &{\nonterminal{Exp3}} \\ - & {\delimit} &{\terminal{Lin}} {\nonterminal{Ident}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp1}} & {\arrow} &{\nonterminal{Exp1}} {\terminal{!}} {\nonterminal{Exp2}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{*}} {\nonterminal{Exp2}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{**}} {\nonterminal{Exp2}} \\ - & {\delimit} &{\nonterminal{Exp2}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exp}} & {\arrow} &{\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\terminal{$\backslash$}} {\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Decl}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}{$+$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}}} {\nonterminal{Exp}} \\ - & {\delimit} &{\terminal{let}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} {\terminal{in}} {\nonterminal{Exp}} \\ - & {\delimit} &{\terminal{let}} {\nonterminal{ListLocDef}} {\terminal{in}} {\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp1}} {\terminal{where}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{fn}} {\terminal{\{}} {\nonterminal{ListEquation}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Exp1}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListExp}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Exp}} \\ - & {\delimit} &{\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListExp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Exps}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Exp4}} {\nonterminal{Exps}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Patt1}} & {\arrow} &{\terminal{\_}} \\ - & {\delimit} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\ - & {\delimit} &{\nonterminal{Integer}} \\ - & {\delimit} &{\nonterminal{String}} \\ - & {\delimit} &{\terminal{\{}} {\nonterminal{ListPattAss}} {\terminal{\}}} \\ - & {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListPattTupleComp}} {\terminal{{$>$}}} \\ - & {\delimit} &{\terminal{(}} {\nonterminal{Patt}} {\terminal{)}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Patt}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListPatt}} \\ - & {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\nonterminal{ListPatt}} \\ - & {\delimit} &{\nonterminal{Patt1}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PattAss}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Patt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Label}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\$}} {\nonterminal{Integer}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Sort}} & {\arrow} &{\terminal{Type}} \\ - & {\delimit} &{\terminal{PType}} \\ - & {\delimit} &{\terminal{Tok}} \\ - & {\delimit} &{\terminal{Str}} \\ - & {\delimit} &{\terminal{Strs}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPattAss}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{PattAss}} \\ - & {\delimit} &{\nonterminal{PattAss}} {\terminal{;}} {\nonterminal{ListPattAss}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PattAlt}} & {\arrow} &{\nonterminal{Patt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPatt}} & {\arrow} &{\nonterminal{Patt1}} \\ - & {\delimit} &{\nonterminal{Patt1}} {\nonterminal{ListPatt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPattAlt}} & {\arrow} &{\nonterminal{PattAlt}} \\ - & {\delimit} &{\nonterminal{PattAlt}} {\terminal{{$|$}}} {\nonterminal{ListPattAlt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Bind}} & {\arrow} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{\_}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListBind}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Bind}} \\ - & {\delimit} &{\nonterminal{Bind}} {\terminal{,}} {\nonterminal{ListBind}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Decl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{Exp2}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{TupleComp}} & {\arrow} &{\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{PattTupleComp}} & {\arrow} &{\nonterminal{Patt}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListTupleComp}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{TupleComp}} \\ - & {\delimit} &{\nonterminal{TupleComp}} {\terminal{,}} {\nonterminal{ListTupleComp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListPattTupleComp}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{PattTupleComp}} \\ - & {\delimit} &{\nonterminal{PattTupleComp}} {\terminal{,}} {\nonterminal{ListPattTupleComp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Case}} & {\arrow} &{\nonterminal{ListPattAlt}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListCase}} & {\arrow} &{\nonterminal{Case}} \\ - & {\delimit} &{\nonterminal{Case}} {\terminal{;}} {\nonterminal{ListCase}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Equation}} & {\arrow} &{\nonterminal{ListPatt}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListEquation}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Equation}} \\ - & {\delimit} &{\nonterminal{Equation}} {\terminal{;}} {\nonterminal{ListEquation}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Altern}} & {\arrow} &{\nonterminal{Exp}} {\terminal{/}} {\nonterminal{Exp}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListAltern}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{Altern}} \\ - & {\delimit} &{\nonterminal{Altern}} {\terminal{;}} {\nonterminal{ListAltern}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{DDecl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\ - & {\delimit} &{\nonterminal{Exp4}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListDDecl}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\nonterminal{DDecl}} {\nonterminal{ListDDecl}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{OldGrammar}} & {\arrow} &{\nonterminal{Include}} {\nonterminal{ListTopDef}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{Include}} & {\arrow} &{\emptyP} \\ - & {\delimit} &{\terminal{include}} {\nonterminal{ListFileName}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{FileName}} & {\arrow} &{\nonterminal{String}} \\ - & {\delimit} &{\nonterminal{Ident}} \\ - & {\delimit} &{\terminal{/}} {\nonterminal{FileName}} \\ - & {\delimit} &{\terminal{.}} {\nonterminal{FileName}} \\ - & {\delimit} &{\terminal{{$-$}}} {\nonterminal{FileName}} \\ - & {\delimit} &{\nonterminal{Ident}} {\nonterminal{FileName}} \\ -\end{tabular}\\ - -\begin{tabular}{lll} -{\nonterminal{ListFileName}} & {\arrow} &{\nonterminal{FileName}} {\terminal{;}} \\ - & {\delimit} &{\nonterminal{FileName}} {\terminal{;}} {\nonterminal{ListFileName}} \\ -\end{tabular}\\ - - - -\end{document} - diff --git a/doc/German.png b/doc/German.png Binary files differdeleted file mode 100644 index 7c6303897..000000000 --- a/doc/German.png +++ /dev/null diff --git a/doc/Grammar.dot b/doc/Grammar.dot deleted file mode 100644 index cb2998eb3..000000000 --- a/doc/Grammar.dot +++ /dev/null @@ -1,75 +0,0 @@ -digraph { - -size = "12,8" ; - -Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"]; - -Lang -> Grammar [style = "solid"]; -Lang -> Lexicon [style = "solid"]; - -Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"]; - - -Grammar -> Noun [style = "solid"]; -Grammar -> Verb [style = "solid"]; -Grammar -> Adjective [style = "solid"]; -Grammar -> Adverb [style = "solid"]; -Grammar -> Numeral [style = "solid"]; -Grammar -> Sentence [style = "solid"]; -Grammar -> Question [style = "solid"]; -Grammar -> Relative [style = "solid"]; -Grammar -> Conjunction [style = "solid"]; -Grammar -> Phrase [style = "solid"]; -Grammar -> Text [style = "solid"]; -Grammar -> Idiom [style = "solid"]; -Grammar -> Structural [style = "solid"]; - - -Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"]; -Noun -> Cat [style = "solid"]; - -Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"]; -Verb -> Cat [style = "solid"]; - -Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"]; -Adjective -> Cat [style = "solid"]; - -Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"]; -Adverb -> Cat [style = "solid"]; - -Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"]; -Numeral -> Cat [style = "solid"]; - -Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"]; -Sentence -> Cat [style = "solid"]; - -Question [style = "solid", shape = "ellipse", URL = "Question.gf"]; -Question -> Cat [style = "solid"]; - -Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"]; -Relative -> Cat [style = "solid"]; - -Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"]; -Conjunction -> Cat [style = "solid"]; - -Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; -Phrase -> Cat [style = "solid"]; - -Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; -Text -> Cat [style = "solid"]; - -Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; -Idiom -> Cat [style = "solid"]; - -Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"]; -Structural -> Cat [style = "solid"]; - -Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"]; -Lexicon -> Cat [style = "solid"]; - -Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"]; -Cat -> Common [style = "solid"]; - -Common [style = "solid", shape = "ellipse", URL = "Tense.gf"]; - -} diff --git a/doc/Grammar.png b/doc/Grammar.png Binary files differdeleted file mode 100644 index ada2847d7..000000000 --- a/doc/Grammar.png +++ /dev/null diff --git a/doc/Resource-HOWTO.html b/doc/Resource-HOWTO.html deleted file mode 100644 index ce2c15137..000000000 --- a/doc/Resource-HOWTO.html +++ /dev/null @@ -1,967 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<TITLE>Resource grammar writing HOWTO</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> -<P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1> -<FONT SIZE="4"> -<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR> -Last update: Mon Sep 22 14:28:01 2008 -</FONT></CENTER> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> - <UL> - <LI><A HREF="#toc1">The resource grammar structure</A> - <UL> - <LI><A HREF="#toc2">Library API modules</A> - <LI><A HREF="#toc3">Phrase category modules</A> - <LI><A HREF="#toc4">Infrastructure modules</A> - <LI><A HREF="#toc5">Lexical modules</A> - </UL> - <LI><A HREF="#toc6">Language-dependent syntax modules</A> - <UL> - <LI><A HREF="#toc7">The present-tense fragment</A> - </UL> - <LI><A HREF="#toc8">Phases of the work</A> - <UL> - <LI><A HREF="#toc9">Putting up a directory</A> - <LI><A HREF="#toc10">Direction of work</A> - <LI><A HREF="#toc11">The develop-test cycle</A> - <LI><A HREF="#toc12">Auxiliary modules</A> - <LI><A HREF="#toc13">Morphology and lexicon</A> - <LI><A HREF="#toc14">Lock fields</A> - <LI><A HREF="#toc15">Lexicon construction</A> - </UL> - <LI><A HREF="#toc16">Lexicon extension</A> - <UL> - <LI><A HREF="#toc17">The irregularity lexicon</A> - <LI><A HREF="#toc18">Lexicon extraction from a word list</A> - <LI><A HREF="#toc19">Lexicon extraction from raw text data</A> - <LI><A HREF="#toc20">Bootstrapping with smart paradigms</A> - </UL> - <LI><A HREF="#toc21">Extending the resource grammar API</A> - <LI><A HREF="#toc22">Using parametrized modules</A> - <UL> - <LI><A HREF="#toc23">Writing an instance of parametrized resource grammar implementation</A> - <LI><A HREF="#toc24">Parametrizing a resource grammar implementation</A> - </UL> - <LI><A HREF="#toc25">Character encoding and transliterations</A> - <LI><A HREF="#toc26">Coding conventions in GF</A> - <LI><A HREF="#toc27">Transliterations</A> - </UL> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> -<P> -<B>History</B> -</P> -<P> -September 2008: updated for Version 1.5. -</P> -<P> -October 2007: updated for Version 1.2. -</P> -<P> -January 2006: first version. -</P> -<P> -The purpose of this document is to tell how to implement the GF -resource grammar API for a new language. We will <I>not</I> cover how -to use the resource grammar, nor how to change the API. But we -will give some hints how to extend the API. -</P> -<P> -A manual for using the resource grammar is found in -</P> -<P> -<A HREF="../lib/resource/doc/synopsis.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html</CODE></A>. -</P> -<P> -A tutorial on GF, also introducing the idea of resource grammars, is found in -</P> -<P> -<A HREF="./gf-tutorial.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html</CODE></A>. -</P> -<P> -This document concerns the API v. 1.5, while the current stable release is 1.4. -You can find the code for the stable release in -</P> -<P> -<A HREF="../lib/resource"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/</CODE></A> -</P> -<P> -and the next release in -</P> -<P> -<A HREF="../next-lib/src"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/</CODE></A> -</P> -<P> -It is recommended to build new grammars to match the next release. -</P> -<A NAME="toc1"></A> -<H2>The resource grammar structure</H2> -<P> -The library is divided into a bunch of modules, whose dependencies -are given in the following figure. -</P> -<P> -<IMG ALIGN="left" SRC="Syntax.png" BORDER="0" ALT=""> -</P> -<P> -Modules of different kinds are distinguished as follows: -</P> -<UL> -<LI>solid contours: module seen by end users -<LI>dashed contours: internal module -<LI>ellipse: abstract/concrete pair of modules -<LI>rectangle: resource or instance -<LI>diamond: interface -</UL> - -<P> -Put in another way: -</P> -<UL> -<LI>solid rectangles and diamonds: user-accessible library API -<LI>solid ellipses: user-accessible top-level grammar for parsing and linearization -<LI>dashed contours: not visible to users -</UL> - -<P> -The dashed ellipses form the main parts of the implementation, on which the resource -grammar programmer has to work with. She also has to work on the <CODE>Paradigms</CODE> -module. The rest of the modules can be produced mechanically from corresponding -modules for other languages, by just changing the language codes appearing in -their module headers. -</P> -<P> -The module structure is rather flat: most modules are direct -parents of <CODE>Grammar</CODE>. The idea -is that the implementors can concentrate on one linguistic aspect at a time, or -also distribute the work among several authors. The module <CODE>Cat</CODE> -defines the "glue" that ties the aspects together - a type system -to which all the other modules conform, so that e.g. <CODE>NP</CODE> means -the same thing in those modules that use <CODE>NP</CODE>s and those that -constructs them. -</P> -<A NAME="toc2"></A> -<H3>Library API modules</H3> -<P> -For the user of the library, these modules are the most important ones. -In a typical application, it is enough to open <CODE>Paradigms</CODE> and <CODE>Syntax</CODE>. -The module <CODE>Try</CODE> combines these two, making it possible to experiment -with combinations of syntactic and lexical constructors by using the -<CODE>cc</CODE> command in the GF shell. Here are short explanations of each API module: -</P> -<UL> -<LI><CODE>Try</CODE>: the whole resource library for a language (<CODE>Paradigms</CODE>, <CODE>Syntax</CODE>, - <CODE>Irreg</CODE>, and <CODE>Extra</CODE>); - produced mechanically as a collection of modules -<LI><CODE>Syntax</CODE>: language-independent categories, syntax functions, and structural words; - produced mechanically as a collection of modules -<LI><CODE>Constructors</CODE>: language-independent syntax functions and structural words; - produced mechanically via functor instantiation -<LI><CODE>Paradigms</CODE>: language-dependent morphological paradigms -</UL> - -<A NAME="toc3"></A> -<H3>Phrase category modules</H3> -<P> -The immediate parents of <CODE>Grammar</CODE> will be called <B>phrase category modules</B>, -since each of them concentrates on a particular phrase category (nouns, verbs, -adjectives, sentences,...). A phrase category module tells -<I>how to construct phrases in that category</I>. You will find out that -all functions in any of these modules have the same value type (or maybe -one of a small number of different types). Thus we have -</P> -<UL> -<LI><CODE>Noun</CODE>: construction of nouns and noun phrases -<LI><CODE>Adjective</CODE>: construction of adjectival phrases -<LI><CODE>Verb</CODE>: construction of verb phrases -<LI><CODE>Adverb</CODE>: construction of adverbial phrases -<LI><CODE>Numeral</CODE>: construction of cardinal and ordinal numerals -<LI><CODE>Sentence</CODE>: construction of sentences and imperatives -<LI><CODE>Question</CODE>: construction of questions -<LI><CODE>Relative</CODE>: construction of relative clauses -<LI><CODE>Conjunction</CODE>: coordination of phrases -<LI><CODE>Phrase</CODE>: construction of the major units of text and speech -<LI><CODE>Text</CODE>: construction of texts as sequences of phrases -<LI><CODE>Idiom</CODE>: idiomatic expressions such as existentials -</UL> - -<A NAME="toc4"></A> -<H3>Infrastructure modules</H3> -<P> -Expressions of each phrase category are constructed in the corresponding -phrase category module. But their <I>use</I> takes mostly place in other modules. -For instance, noun phrases, which are constructed in <CODE>Noun</CODE>, are -used as arguments of functions of almost all other phrase category modules. -How can we build all these modules independently of each other? -</P> -<P> -As usual in typeful programming, the <I>only</I> thing you need to know -about an object you use is its type. When writing a linearization rule -for a GF abstract syntax function, the only thing you need to know is -the linearization types of its value and argument categories. To achieve -the division of the resource grammar to several parallel phrase category modules, -what we need is an underlying definition of the linearization types. This -definition is given as the implementation of -</P> -<UL> -<LI><CODE>Cat</CODE>: syntactic categories of the resource grammar -</UL> - -<P> -Any resource grammar implementation has first to agree on how to implement -<CODE>Cat</CODE>. Luckily enough, even this can be done incrementally: you -can skip the <CODE>lincat</CODE> definition of a category and use the default -<CODE>{s : Str}</CODE> until you need to change it to something else. In -English, for instance, many categories do have this linearization type. -</P> -<A NAME="toc5"></A> -<H3>Lexical modules</H3> -<P> -What is lexical and what is syntactic is not as clearcut in GF as in -some other grammar formalisms. Logically, lexical means atom, i.e. a -<CODE>fun</CODE> with no arguments. Linguistically, one may add to this -that the <CODE>lin</CODE> consists of only one token (or of a table whose values -are single tokens). Even in the restricted lexicon included in the resource -API, the latter rule is sometimes violated in some languages. For instance, -<CODE>Structural.both7and_DConj</CODE> is an atom, but its linearization is -two words e.g. <I>both - and</I>. -</P> -<P> -Another characterization of lexical is that lexical units can be added -almost <I>ad libitum</I>, and they cannot be defined in terms of already -given rules. The lexical modules of the resource API are thus more like -samples than complete lists. There are two such modules: -</P> -<UL> -<LI><CODE>Structural</CODE>: structural words (determiners, conjunctions,...) -<LI><CODE>Lexicon</CODE>: basic everyday content words (nouns, verbs,...) -</UL> - -<P> -The module <CODE>Structural</CODE> aims for completeness, and is likely to -be extended in future releases of the resource. The module <CODE>Lexicon</CODE> -gives a "random" list of words, which enables testing the syntax. -It also provides a check list for morphology, since those words are likely to include -most morphological patterns of the language. -</P> -<P> -In the case of <CODE>Lexicon</CODE> it may come out clearer than anywhere else -in the API that it is impossible to give exact translation equivalents in -different languages on the level of a resource grammar. This is no problem, -since application grammars can use the resource in different ways for -different languages. -</P> -<A NAME="toc6"></A> -<H2>Language-dependent syntax modules</H2> -<P> -In addition to the common API, there is room for language-dependent extensions -of the resource. The top level of each languages looks as follows (with German -as example): -</P> -<PRE> - abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs -</PRE> -<P> -where <CODE>ExtraGerAbs</CODE> is a collection of syntactic structures specific to German, -and <CODE>IrregGerAbs</CODE> is a dictionary of irregular words of German -(at the moment, just verbs). Each of these language-specific grammars has -the potential to grow into a full-scale grammar of the language. These grammar -can also be used as libraries, but the possibility of using functors is lost. -</P> -<P> -To give a better overview of language-specific structures, -modules like <CODE>ExtraGerAbs</CODE> -are built from a language-independent module <CODE>ExtraAbs</CODE> -by restricted inheritance: -</P> -<PRE> - abstract ExtraGerAbs = Extra [f,g,...] -</PRE> -<P> -Thus any category and function in <CODE>Extra</CODE> may be shared by a subset of all -languages. One can see this set-up as a matrix, which tells -what <CODE>Extra</CODE> structures -are implemented in what languages. For the common API in <CODE>Grammar</CODE>, the matrix -is filled with 1's (everything is implemented in every language). -</P> -<P> -In a minimal resource grammar implementation, the language-dependent -extensions are just empty modules, but it is good to provide them for -the sake of uniformity. -</P> -<A NAME="toc7"></A> -<H3>The present-tense fragment</H3> -<P> -Some lines in the resource library are suffixed with the comment -</P> -<PRE> - --# notpresent -</PRE> -<P> -which is used by a preprocessor to exclude those lines from -a reduced version of the full resource. This present-tense-only -version is useful for applications in most technical text, since -they reduce the grammar size and compilation time. It can also -be useful to exclude those lines in a first version of resource -implementation. To compile a grammar with present-tense-only, use -</P> -<PRE> - make Present -</PRE> -<P> -with <CODE>resource/Makefile</CODE>. -</P> -<A NAME="toc8"></A> -<H2>Phases of the work</H2> -<A NAME="toc9"></A> -<H3>Putting up a directory</H3> -<P> -Unless you are writing an instance of a parametrized implementation -(Romance or Scandinavian), which will be covered later, the -simplest way is to follow roughly the following procedure. Assume you -are building a grammar for the German language. Here are the first steps, -which we actually followed ourselves when building the German implementation -of resource v. 1.0 at Ubuntu linux. We have slightly modified them to -match resource v. 1.5 and GF v. 3.0. -</P> -<OL> -<LI>Create a sister directory for <CODE>GF/lib/resource/english</CODE>, named - <CODE>german</CODE>. -<PRE> - cd GF/lib/resource/ - mkdir german - cd german -</PRE> -<P></P> -<LI>Check out the [ISO 639 3-letter language code - <A HREF="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">http://www.w3.org/WAI/ER/IG/ert/iso639.htm</A>] - for German: both <CODE>Ger</CODE> and <CODE>Deu</CODE> are given, and we pick <CODE>Ger</CODE>. - (We use the 3-letter codes rather than the more common 2-letter codes, - since they will suffice for many more languages!) -<P></P> -<LI>Copy the <CODE>*Eng.gf</CODE> files from <CODE>english</CODE> <CODE>german</CODE>, - and rename them: -<PRE> - cp ../english/*Eng.gf . - rename 's/Eng/Ger/' *Eng.gf -</PRE> - If you don't have the <CODE>rename</CODE> command, you can use a bash script with <CODE>mv</CODE>. -</OL> - -<OL> -<LI>Change the <CODE>Eng</CODE> module references to <CODE>Ger</CODE> references - in all files: -<PRE> - sed -i 's/English/German/g' *Ger.gf - sed -i 's/Eng/Ger/g' *Ger.gf -</PRE> - The first line prevents changing the word <CODE>English</CODE>, which appears - here and there in comments, to <CODE>Gerlish</CODE>. The <CODE>sed</CODE> command syntax - may vary depending on your operating system. -<P></P> -<LI>This may of course change unwanted occurrences of the - string <CODE>Eng</CODE> - verify this by -<PRE> - grep Ger *.gf -</PRE> - But you will have to make lots of manual changes in all files anyway! -<P></P> -<LI>Comment out the contents of these files: -<PRE> - sed -i 's/^/--/' *Ger.gf -</PRE> - This will give you a set of templates out of which the grammar - will grow as you uncomment and modify the files rule by rule. -<P></P> -<LI>In all <CODE>.gf</CODE> files, uncomment the module headers and brackets, - leaving the module bodies commented. Unfortunately, there is no - simple way to do this automatically (or to avoid commenting these - lines in the previous step) - but uncommenting the first - and the last lines will actually do the job for many of the files. -<P></P> -<LI>Uncomment the contents of the main grammar file: -<PRE> - sed -i 's/^--//' LangGer.gf -</PRE> -<P></P> -<LI>Now you can open the grammar <CODE>LangGer</CODE> in GF: -<PRE> - gf LangGer.gf -</PRE> - You will get lots of warnings on missing rules, but the grammar will compile. -<P></P> -<LI>At all the following steps you will now have a valid, but incomplete - GF grammar. The GF command -<PRE> - pg -missing -</PRE> - tells you what exactly is missing. -</OL> - -<P> -Here is the module structure of <CODE>LangGer</CODE>. It has been simplified by leaving out -the majority of the phrase category modules. Each of them has the same dependencies -as <CODE>VerbGer</CODE>, whose complete dependencies are shown as an example. -</P> -<P> -<IMG ALIGN="middle" SRC="German.png" BORDER="0" ALT=""> -</P> -<A NAME="toc10"></A> -<H3>Direction of work</H3> -<P> -The real work starts now. There are many ways to proceed, the most obvious ones being -</P> -<UL> -<LI>Top-down: start from the module <CODE>Phrase</CODE> and go down to <CODE>Sentence</CODE>, then - <CODE>Verb</CODE>, <CODE>Noun</CODE>, and in the end <CODE>Lexicon</CODE>. In this way, you are all the time - building complete phrases, and add them with more content as you proceed. - <B>This approach is not recommended</B>. It is impossible to test the rules if - you have no words to apply the constructions to. -<P></P> -<LI>Bottom-up: set as your first goal to implement <CODE>Lexicon</CODE>. To this end, you - need to write <CODE>ParadigmsGer</CODE>, which in turn needs parts of - <CODE>MorphoGer</CODE> and <CODE>ResGer</CODE>. - <B>This approach is not recommended</B>. You can get stuck to details of - morphology such as irregular words, and you don't have enough grasp about - the type system to decide what forms to cover in morphology. -</UL> - -<P> -The practical working direction is thus a saw-like motion between the morphological -and top-level modules. Here is a possible course of the work that gives enough -test data and enough general view at any point: -</P> -<OL> -<LI>Define <CODE>Cat.N</CODE> and the required parameter types in <CODE>ResGer</CODE>. As we define -<PRE> - lincat N = {s : Number => Case => Str ; g : Gender} ; -</PRE> -we need the parameter types <CODE>Number</CODE>, <CODE>Case</CODE>, and <CODE>Gender</CODE>. The definition -of <CODE>Number</CODE> in <A HREF="../lib/resource/common/ParamX.gf"><CODE>common/ParamX</CODE></A> -works for German, so we -use it and just define <CODE>Case</CODE> and <CODE>Gender</CODE> in <CODE>ResGer</CODE>. -<P></P> -<LI>Define some cases of <CODE>mkN</CODE> in <CODE>ParadigmsGer</CODE>. In this way you can -already implement a huge amount of nouns correctly in <CODE>LexiconGer</CODE>. Actually -just adding the worst-case instance of <CODE>mkN</CODE> (the one taking the most -arguments) should suffice for every noun - but, -since it is tedious to use, you -might proceed to the next step before returning to morphology and defining the -real work horse, <CODE>mkN</CODE> taking two forms and a gender. -<P></P> -<LI>While doing this, you may want to test the resource independently. Do this by - starting the GF shell in the <CODE>resource</CODE> directory, by the commands -<PRE> - > i -retain german/ParadigmsGer - > cc -table mkN "Kirche" -</PRE> -<P></P> -<LI>Proceed to determiners and pronouns in -<CODE>NounGer</CODE> (<CODE>DetCN UsePron DetQuant NumSg DefArt IndefArt UseN</CODE>) and -<CODE>StructuralGer</CODE> (<CODE>i_Pron this_Quant</CODE>). You also need some categories and -parameter types. At this point, it is maybe not possible to find out the final -linearization types of <CODE>CN</CODE>, <CODE>NP</CODE>, <CODE>Det</CODE>, and <CODE>Quant</CODE>, but at least you should -be able to correctly inflect noun phrases such as <I>every airplane</I>: -<PRE> - > i german/LangGer.gf - > l -table DetCN every_Det (UseN airplane_N) - - Nom: jeder Flugzeug - Acc: jeden Flugzeug - Dat: jedem Flugzeug - Gen: jedes Flugzeugs -</PRE> -<P></P> -<LI>Proceed to verbs: define <CODE>CatGer.V</CODE>, <CODE>ResGer.VForm</CODE>, and -<CODE>ParadigmsGer.mkV</CODE>. You may choose to exclude <CODE>notpresent</CODE> -cases at this point. But anyway, you will be able to inflect a good -number of verbs in <CODE>Lexicon</CODE>, such as -<CODE>live_V</CODE> (<CODE>mkV "leben"</CODE>). -<P></P> -<LI>Now you can soon form your first sentences: define <CODE>VP</CODE> and -<CODE>Cl</CODE> in <CODE>CatGer</CODE>, <CODE>VerbGer.UseV</CODE>, and <CODE>SentenceGer.PredVP</CODE>. -Even if you have excluded the tenses, you will be able to produce -<PRE> - > i -preproc=./mkPresent german/LangGer.gf - > l -table PredVP (UsePron i_Pron) (UseV live_V) - - Pres Simul Pos Main: ich lebe - Pres Simul Pos Inv: lebe ich - Pres Simul Pos Sub: ich lebe - Pres Simul Neg Main: ich lebe nicht - Pres Simul Neg Inv: lebe ich nicht - Pres Simul Neg Sub: ich nicht lebe -</PRE> -You should also be able to parse: -<PRE> - > p -cat=Cl "ich lebe" - PredVP (UsePron i_Pron) (UseV live_V) -</PRE> -<P></P> -<LI>Transitive verbs -(<CODE>CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a</CODE>) -are a natural next step, so that you can -produce <CODE>ich liebe dich</CODE> ("I love you"). -<P></P> -<LI>Adjectives (<CODE>CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA</CODE>) -will force you to think about strong and weak declensions, so that you can -correctly inflect <I>mein neuer Wagen, dieser neue Wagen</I> -("my new car, this new car"). -<P></P> -<LI>Once you have implemented the set -(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP), -you have overcome most of difficulties. You know roughly what parameters -and dependences there are in your language, and you can now proceed very -much in the order you please. -</OL> - -<A NAME="toc11"></A> -<H3>The develop-test cycle</H3> -<P> -The following develop-test cycle will -be applied most of the time, both in the first steps described above -and in later steps where you are more on your own. -</P> -<OL> -<LI>Select a phrase category module, e.g. <CODE>NounGer</CODE>, and uncomment some - linearization rules (for instance, <CODE>DetCN</CODE>, as above). -<P></P> -<LI>Write down some German examples of this rule, for instance translations - of "the dog", "the house", "the big house", etc. Write these in all their - different forms (two numbers and four cases). -<P></P> -<LI>Think about the categories involved (<CODE>CN, NP, N, Det</CODE>) and the - variations they have. Encode this in the lincats of <CODE>CatGer</CODE>. - You may have to define some new parameter types in <CODE>ResGer</CODE>. -<P></P> -<LI>To be able to test the construction, - define some words you need to instantiate it - in <CODE>LexiconGer</CODE>. You will also need some regular inflection patterns - in<CODE>ParadigmsGer</CODE>. -<P></P> -<LI>Test by parsing, linearization, - and random generation. In particular, linearization to a table should - be used so that you see all forms produced; the <CODE>treebank</CODE> option - preserves the tree -<PRE> - > gr -cat=NP -number=20 | l -table -treebank -</PRE> -<P></P> -<LI>Save some tree-linearization pairs for later regression testing. You can save - a gold standard treebank and use the Unix <CODE>diff</CODE> command to compare later - linearizations produced from the same list of trees. If you save the trees - in a file <CODE>trees</CODE>, you can do as follows: -<PRE> - > rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank -</PRE> -<P></P> -<LI>A file with trees testing all resource functions is included in the resource, - entitled <CODE>resource/exx-resource.gft</CODE>. A treebank can be created from this by - the Unix command -<PRE> - % runghc Make.hs test langs=Ger -</PRE> -</OL> - -<P> -You are likely to run this cycle a few times for each linearization rule -you implement, and some hundreds of times altogether. There are roughly -70 <CODE>cat</CODE>s and -600 <CODE>funs</CODE> in <CODE>Lang</CODE> at the moment; 170 of the <CODE>funs</CODE> are outside the two -lexicon modules). -</P> -<A NAME="toc12"></A> -<H3>Auxiliary modules</H3> -<P> -These auxuliary <CODE>resource</CODE> modules will be written by you. -</P> -<UL> -<LI><CODE>ResGer</CODE>: parameter types and auxiliary operations -(a resource for the resource grammar!) -<LI><CODE>ParadigmsGer</CODE>: complete inflection engine and most important regular paradigms -<LI><CODE>MorphoGer</CODE>: auxiliaries for <CODE>ParadigmsGer</CODE> and <CODE>StructuralGer</CODE>. This need -not be separate from <CODE>ResGer</CODE>. -</UL> - -<P> -These modules are language-independent and provided by the existing resource -package. -</P> -<UL> -<LI><CODE>ParamX</CODE>: parameter types used in many languages -<LI><CODE>CommonX</CODE>: implementation of language-uniform categories - such as $Text$ and $Phr$, as well as of - the logical tense, anteriority, and polarity parameters -<LI><CODE>Coordination</CODE>: operations to deal with lists and coordination -<LI><CODE>Prelude</CODE>: general-purpose operations on strings, records, - truth values, etc. -<LI><CODE>Predef</CODE>: general-purpose operations with hard-coded definitions -</UL> - -<P> -An important decision is what rules to implement in terms of operations in -<CODE>ResGer</CODE>. The <B>golden rule of functional programming</B> says: -</P> -<UL> -<LI><I>Whenever you find yourself programming by copy and paste, write a function instead!</I>. -</UL> - -<P> -This rule suggests that an operation should be created if it is to be -used at least twice. At the same time, a sound principle of <B>vicinity</B> says: -</P> -<UL> -<LI><I>It should not require too much browsing to understand what a piece of code does.</I> -</UL> - -<P> -From these two principles, we have derived the following practice: -</P> -<UL> -<LI>If an operation is needed <I>in two different modules</I>, - it should be created in as an <CODE>oper</CODE> in <CODE>ResGer</CODE>. An example is <CODE>mkClause</CODE>, - used in <CODE>Sentence</CODE>, <CODE>Question</CODE>, and <CODE>Relative</CODE>- -<LI>If an operation is needed <I>twice in the same module</I>, but never - outside, it should be created in the same module. Many examples are - found in <CODE>Numerals</CODE>. -<LI>If an operation is needed <I>twice in the same judgement</I>, but never - outside, it should be created by a <CODE>let</CODE> definition. -<LI>If an operation is only needed once, it should not be created as an <CODE>oper</CODE>, - but rather inlined. However, a <CODE>let</CODE> definition may well be in place just - to make the readable. - Most functions in phrase category modules - are implemented in this way. -</UL> - -<P> -This discipline is very different from the one followed in early -versions of the library (up to 0.9). We then valued the principle of -abstraction more than vicinity, creating layers of abstraction for -almost everything. This led in practice to the duplication of almost -all code on the <CODE>lin</CODE> and <CODE>oper</CODE> levels, and made the code -hard to understand and maintain. -</P> -<A NAME="toc13"></A> -<H3>Morphology and lexicon</H3> -<P> -The paradigms needed to implement -<CODE>LexiconGer</CODE> are defined in -<CODE>ParadigmsGer</CODE>. -This module provides high-level ways to define the linearization of -lexical items, of categories <CODE>N, A, V</CODE> and their complement-taking -variants. -</P> -<P> -For ease of use, the <CODE>Paradigms</CODE> modules follow a certain -naming convention. Thus they for each lexical category, such as <CODE>N</CODE>, -the overloaded functions, such as <CODE>mkN</CODE>, with the following cases: -</P> -<UL> -<LI>the worst-case construction of <CODE>N</CODE>. Its type signature - has the form -<PRE> - mkN : Str -> ... -> Str -> P -> ... -> Q -> N -</PRE> - with as many string and parameter arguments as can ever be needed to - construct an <CODE>N</CODE>. -<LI>the most regular cases, with just one string argument: -<PRE> - mkN : Str -> N -</PRE> -<LI>A language-dependent (small) set of functions to handle mild irregularities - and common exceptions. -</UL> - -<P> -For the complement-taking variants, such as <CODE>V2</CODE>, we provide -</P> -<UL> -<LI>a case that takes a <CODE>V</CODE> and all necessary arguments, such - as case and preposition: -<PRE> - mkV2 : V -> Case -> Str -> V2 ; -</PRE> -<LI>a case that takes a <CODE>Str</CODE> and produces a transitive verb with the direct - object case: -<PRE> - mkV2 : Str -> V2 ; -</PRE> -<LI>A language-dependent (small) set of functions to handle common special cases, - such as transitive verbs that are not regular: -<PRE> - mkV2 : V -> V2 ; -</PRE> -</UL> - -<P> -The golden rule for the design of paradigms is that -</P> -<UL> -<LI><I>The user of the library will only need function applications with constants and strings, never any records or tables.</I> -</UL> - -<P> -The discipline of data abstraction moreover requires that the user of the resource -is not given access to parameter constructors, but only to constants that denote -them. This gives the resource grammarian the freedom to change the underlying -data representation if needed. It means that the <CODE>ParadigmsGer</CODE> module has -to define constants for those parameter types and constructors that -the application grammarian may need to use, e.g. -</P> -<PRE> - oper - Case : Type ; - nominative, accusative, genitive, dative : Case ; -</PRE> -<P> -These constants are defined in terms of parameter types and constructors -in <CODE>ResGer</CODE> and <CODE>MorphoGer</CODE>, which modules are not -visible to the application grammarian. -</P> -<A NAME="toc14"></A> -<H3>Lock fields</H3> -<P> -An important difference between <CODE>MorphoGer</CODE> and -<CODE>ParadigmsGer</CODE> is that the former uses "raw" record types -for word classes, whereas the latter used category symbols defined in -<CODE>CatGer</CODE>. When these category symbols are used to denote -record types in a resource modules, such as <CODE>ParadigmsGer</CODE>, -a <B>lock field</B> is added to the record, so that categories -with the same implementation are not confused with each other. -(This is inspired by the <CODE>newtype</CODE> discipline in Haskell.) -For instance, the lincats of adverbs and conjunctions are the same -in <CODE>CommonX</CODE> (and therefore in <CODE>CatGer</CODE>, which inherits it): -</P> -<PRE> - lincat Adv = {s : Str} ; - lincat Conj = {s : Str} ; -</PRE> -<P> -But when these category symbols are used to denote their linearization -types in resource module, these definitions are translated to -</P> -<PRE> - oper Adv : Type = {s : Str ; lock_Adv : {}} ; - oper Conj : Type = {s : Str} ; lock_Conj : {}} ; -</PRE> -<P> -In this way, the user of a resource grammar cannot confuse adverbs with -conjunctions. In other words, the lock fields force the type checker -to function as grammaticality checker. -</P> -<P> -When the resource grammar is <CODE>open</CODE>ed in an application grammar, the -lock fields are never seen (except possibly in type error messages), -and the application grammarian should never write them herself. If she -has to do this, it is a sign that the resource grammar is incomplete, and -the proper way to proceed is to fix the resource grammar. -</P> -<P> -The resource grammarian has to provide the dummy lock field values -in her hidden definitions of constants in <CODE>Paradigms</CODE>. For instance, -</P> -<PRE> - mkAdv : Str -> Adv ; - -- mkAdv s = {s = s ; lock_Adv = <>} ; -</PRE> -<P></P> -<A NAME="toc15"></A> -<H3>Lexicon construction</H3> -<P> -The lexicon belonging to <CODE>LangGer</CODE> consists of two modules: -</P> -<UL> -<LI><CODE>StructuralGer</CODE>, structural words, built by using both - <CODE>ParadigmsGer</CODE> and <CODE>MorphoGer</CODE>. -<LI><CODE>LexiconGer</CODE>, content words, built by using <CODE>ParadigmsGer</CODE> only. -</UL> - -<P> -The reason why <CODE>MorphoGer</CODE> has to be used in <CODE>StructuralGer</CODE> -is that <CODE>ParadigmsGer</CODE> does not contain constructors for closed -word classes such as pronouns and determiners. The reason why we -recommend <CODE>ParadigmsGer</CODE> for building <CODE>LexiconGer</CODE> is that -the coverage of the paradigms gets thereby tested and that the -use of the paradigms in <CODE>LexiconGer</CODE> gives a good set of examples for -those who want to build new lexica. -</P> -<A NAME="toc16"></A> -<H2>Lexicon extension</H2> -<A NAME="toc17"></A> -<H3>The irregularity lexicon</H3> -<P> -It is useful in most languages to provide a separate module of irregular -verbs and other words which are difficult for a lexicographer -to handle. There are usually a limited number of such words - a -few hundred perhaps. Building such a lexicon separately also -makes it less important to cover <I>everything</I> by the -worst-case variants of the paradigms <CODE>mkV</CODE> etc. -</P> -<A NAME="toc18"></A> -<H3>Lexicon extraction from a word list</H3> -<P> -You can often find resources such as lists of -irregular verbs on the internet. For instance, the -Irregular German Verb page -previously found in -<CODE>http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html</CODE> -page gives a list of verbs in the -traditional tabular format, which begins as follows: -</P> -<PRE> - backen (du bäckst, er bäckt) backte [buk] gebacken - befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen - beginnen begann (begönne; begänne) begonnen - beißen biß gebissen -</PRE> -<P> -All you have to do is to write a suitable verb paradigm -</P> -<PRE> - irregV : (x1,_,_,_,_,x6 : Str) -> V ; -</PRE> -<P> -and a Perl or Python or Haskell script that transforms -the table to -</P> -<PRE> - backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ; - befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ; -</PRE> -<P></P> -<P> -When using ready-made word lists, you should think about -coyright issues. All resource grammar material should -be provided under GNU Lesser General Public License (LGPL). -</P> -<A NAME="toc19"></A> -<H3>Lexicon extraction from raw text data</H3> -<P> -This is a cheap technique to build a lexicon of thousands -of words, if text data is available in digital format. -See the <A HREF="http://www.cs.chalmers.se/~markus/extract/">Extract Homepage</A> -homepage for details. -</P> -<A NAME="toc20"></A> -<H3>Bootstrapping with smart paradigms</H3> -<P> -This is another cheap technique, where you need as input a list of words with -part-of-speech marking. You initialize the lexicon by using the one-argument -<CODE>mkN</CODE> etc paradigms, and add forms to those words that do not come out right. -This procedure is described in the paper -</P> -<P> -A. Ranta. -How predictable is Finnish morphology? An experiment on lexicon construction. -In J. Nivre, M. Dahllöf and B. Megyesi (eds), -<I>Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein</I>, -University of Uppsala, -2008. -Available from the <A HREF="http://publications.uu.se/abstract.xsql?dbid=8933">series homepage</A> -</P> -<A NAME="toc21"></A> -<H2>Extending the resource grammar API</H2> -<P> -Sooner or later it will happen that the resource grammar API -does not suffice for all applications. A common reason is -that it does not include idiomatic expressions in a given language. -The solution then is in the first place to build language-specific -extension modules, like <CODE>ExtraGer</CODE>. -</P> -<A NAME="toc22"></A> -<H2>Using parametrized modules</H2> -<A NAME="toc23"></A> -<H3>Writing an instance of parametrized resource grammar implementation</H3> -<P> -Above we have looked at how a resource implementation is built by -the copy and paste method (from English to German), that is, formally -speaking, from scratch. A more elegant solution available for -families of languages such as Romance and Scandinavian is to -use parametrized modules. The advantages are -</P> -<UL> -<LI>theoretical: linguistic generalizations and insights -<LI>practical: maintainability improves with fewer components -</UL> - -<P> -Here is a set of -<A HREF="http://www.cs.chalmers.se/~aarne/geocal2006.pdf">slides</A> -on the topic. -</P> -<A NAME="toc24"></A> -<H3>Parametrizing a resource grammar implementation</H3> -<P> -This is the most demanding form of resource grammar writing. -We do <I>not</I> recommend the method of parametrizing from the -beginning: it is easier to have one language first implemented -in the conventional way and then add another language of the -same family by aprametrization. This means that the copy and -paste method is still used, but at this time the differences -are put into an <CODE>interface</CODE> module. -</P> -<A NAME="toc25"></A> -<H2>Character encoding and transliterations</H2> -<P> -This section is relevant for languages using a non-ASCII character set. -</P> -<A NAME="toc26"></A> -<H2>Coding conventions in GF</H2> -<P> -From version 3.0, GF follows a simple encoding convention: -</P> -<UL> -<LI>GF source files may follow any encoding, such as isolatin-1 or UTF-8; - the default is isolatin-1, and UTF8 must be indicated by the judgement -<PRE> - flags coding = utf8 ; -</PRE> - in each source module. -<LI>for internal processing, all characters are converted to 16-bit unicode, - as the first step of grammar compilation guided by the <CODE>coding</CODE> flag -<LI>as the last step of compilation, all characters are converted to UTF-8 -<LI>thus, GF object files (<CODE>gfo</CODE>) and the Portable Grammar Format (<CODE>pgf</CODE>) - are in UTF-8 -</UL> - -<P> -Most current resource grammars use isolatin-1 in the source, but this does -not affect their use in parallel with grammars written in other encodings. -In fact, a grammar can be put up from modules using different codings. -</P> -<P> -<B>Warning</B>. While string literals may contain any characters, identifiers -must be isolatin-1 letters (or digits, underscores, or dashes). This has to -do with the restrictions of the lexer tool that is used. -</P> -<A NAME="toc27"></A> -<H2>Transliterations</H2> -<P> -While UTF-8 is well supported by most web browsers, its use in terminals and -text editors may cause disappointment. Many grammarians therefore prefer to -use ASCII transliterations. GF 3.0beta2 provides the following built-in -transliterations: -</P> -<UL> -<LI>Arabic -<LI>Devanagari (Hindi) -<LI>Thai -</UL> - -<P> -New transliterations can be defined in the GF source file -<A HREF="../src/GF/Text/Transliterations.hs"><CODE>GF/Text/Transliterations.hs</CODE></A>. -This file also gives instructions on how new ones are added. -</P> - -<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -\-toc Resource-HOWTO.txt --> -</BODY></HTML> diff --git a/doc/Resource-HOWTO.txt b/doc/Resource-HOWTO.txt deleted file mode 100644 index 8e50974a7..000000000 --- a/doc/Resource-HOWTO.txt +++ /dev/null @@ -1,827 +0,0 @@ -Resource grammar writing HOWTO -Author: Aarne Ranta <aarne (at) cs.chalmers.se> -Last update: %%date(%c) - -% NOTE: this is a txt2tags file. -% Create an html file from this file using: -% txt2tags --toc -thtml Resource-HOWTO.txt - -%!target:html - -**History** - -September 2008: updated for Version 1.5. - -October 2007: updated for Version 1.2. - -January 2006: first version. - - -The purpose of this document is to tell how to implement the GF -resource grammar API for a new language. We will //not// cover how -to use the resource grammar, nor how to change the API. But we -will give some hints how to extend the API. - -A manual for using the resource grammar is found in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html`` ../lib/resource/doc/synopsis.html]. - -A tutorial on GF, also introducing the idea of resource grammars, is found in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html`` ./gf-tutorial.html]. - -This document concerns the API v. 1.5, while the current stable release is 1.4. -You can find the code for the stable release in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/`` ../lib/resource] - -and the next release in - -[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/`` ../next-lib/src] - -It is recommended to build new grammars to match the next release. - - - - -==The resource grammar structure== - -The library is divided into a bunch of modules, whose dependencies -are given in the following figure. - -[Syntax.png] - -Modules of different kinds are distinguished as follows: -- solid contours: module seen by end users -- dashed contours: internal module -- ellipse: abstract/concrete pair of modules -- rectangle: resource or instance -- diamond: interface - - -Put in another way: -- solid rectangles and diamonds: user-accessible library API -- solid ellipses: user-accessible top-level grammar for parsing and linearization -- dashed contours: not visible to users - - -The dashed ellipses form the main parts of the implementation, on which the resource -grammar programmer has to work with. She also has to work on the ``Paradigms`` -module. The rest of the modules can be produced mechanically from corresponding -modules for other languages, by just changing the language codes appearing in -their module headers. - -The module structure is rather flat: most modules are direct -parents of ``Grammar``. The idea -is that the implementors can concentrate on one linguistic aspect at a time, or -also distribute the work among several authors. The module ``Cat`` -defines the "glue" that ties the aspects together - a type system -to which all the other modules conform, so that e.g. ``NP`` means -the same thing in those modules that use ``NP``s and those that -constructs them. - - -===Library API modules=== - -For the user of the library, these modules are the most important ones. -In a typical application, it is enough to open ``Paradigms`` and ``Syntax``. -The module ``Try`` combines these two, making it possible to experiment -with combinations of syntactic and lexical constructors by using the -``cc`` command in the GF shell. Here are short explanations of each API module: -- ``Try``: the whole resource library for a language (``Paradigms``, ``Syntax``, - ``Irreg``, and ``Extra``); - produced mechanically as a collection of modules -- ``Syntax``: language-independent categories, syntax functions, and structural words; - produced mechanically as a collection of modules -- ``Constructors``: language-independent syntax functions and structural words; - produced mechanically via functor instantiation -- ``Paradigms``: language-dependent morphological paradigms - - - - - -===Phrase category modules=== - -The immediate parents of ``Grammar`` will be called **phrase category modules**, -since each of them concentrates on a particular phrase category (nouns, verbs, -adjectives, sentences,...). A phrase category module tells -//how to construct phrases in that category//. You will find out that -all functions in any of these modules have the same value type (or maybe -one of a small number of different types). Thus we have -- ``Noun``: construction of nouns and noun phrases -- ``Adjective``: construction of adjectival phrases -- ``Verb``: construction of verb phrases -- ``Adverb``: construction of adverbial phrases -- ``Numeral``: construction of cardinal and ordinal numerals -- ``Sentence``: construction of sentences and imperatives -- ``Question``: construction of questions -- ``Relative``: construction of relative clauses -- ``Conjunction``: coordination of phrases -- ``Phrase``: construction of the major units of text and speech -- ``Text``: construction of texts as sequences of phrases -- ``Idiom``: idiomatic expressions such as existentials - - - - -===Infrastructure modules=== - -Expressions of each phrase category are constructed in the corresponding -phrase category module. But their //use// takes mostly place in other modules. -For instance, noun phrases, which are constructed in ``Noun``, are -used as arguments of functions of almost all other phrase category modules. -How can we build all these modules independently of each other? - -As usual in typeful programming, the //only// thing you need to know -about an object you use is its type. When writing a linearization rule -for a GF abstract syntax function, the only thing you need to know is -the linearization types of its value and argument categories. To achieve -the division of the resource grammar to several parallel phrase category modules, -what we need is an underlying definition of the linearization types. This -definition is given as the implementation of -- ``Cat``: syntactic categories of the resource grammar - - -Any resource grammar implementation has first to agree on how to implement -``Cat``. Luckily enough, even this can be done incrementally: you -can skip the ``lincat`` definition of a category and use the default -``{s : Str}`` until you need to change it to something else. In -English, for instance, many categories do have this linearization type. - - - -===Lexical modules=== - -What is lexical and what is syntactic is not as clearcut in GF as in -some other grammar formalisms. Logically, lexical means atom, i.e. a -``fun`` with no arguments. Linguistically, one may add to this -that the ``lin`` consists of only one token (or of a table whose values -are single tokens). Even in the restricted lexicon included in the resource -API, the latter rule is sometimes violated in some languages. For instance, -``Structural.both7and_DConj`` is an atom, but its linearization is -two words e.g. //both - and//. - -Another characterization of lexical is that lexical units can be added -almost //ad libitum//, and they cannot be defined in terms of already -given rules. The lexical modules of the resource API are thus more like -samples than complete lists. There are two such modules: -- ``Structural``: structural words (determiners, conjunctions,...) -- ``Lexicon``: basic everyday content words (nouns, verbs,...) - - -The module ``Structural`` aims for completeness, and is likely to -be extended in future releases of the resource. The module ``Lexicon`` -gives a "random" list of words, which enables testing the syntax. -It also provides a check list for morphology, since those words are likely to include -most morphological patterns of the language. - -In the case of ``Lexicon`` it may come out clearer than anywhere else -in the API that it is impossible to give exact translation equivalents in -different languages on the level of a resource grammar. This is no problem, -since application grammars can use the resource in different ways for -different languages. - - -==Language-dependent syntax modules== - -In addition to the common API, there is room for language-dependent extensions -of the resource. The top level of each languages looks as follows (with German -as example): -``` - abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs -``` -where ``ExtraGerAbs`` is a collection of syntactic structures specific to German, -and ``IrregGerAbs`` is a dictionary of irregular words of German -(at the moment, just verbs). Each of these language-specific grammars has -the potential to grow into a full-scale grammar of the language. These grammar -can also be used as libraries, but the possibility of using functors is lost. - -To give a better overview of language-specific structures, -modules like ``ExtraGerAbs`` -are built from a language-independent module ``ExtraAbs`` -by restricted inheritance: -``` - abstract ExtraGerAbs = Extra [f,g,...] -``` -Thus any category and function in ``Extra`` may be shared by a subset of all -languages. One can see this set-up as a matrix, which tells -what ``Extra`` structures -are implemented in what languages. For the common API in ``Grammar``, the matrix -is filled with 1's (everything is implemented in every language). - -In a minimal resource grammar implementation, the language-dependent -extensions are just empty modules, but it is good to provide them for -the sake of uniformity. - - - -===The present-tense fragment=== - -Some lines in the resource library are suffixed with the comment -``` - --# notpresent -``` -which is used by a preprocessor to exclude those lines from -a reduced version of the full resource. This present-tense-only -version is useful for applications in most technical text, since -they reduce the grammar size and compilation time. It can also -be useful to exclude those lines in a first version of resource -implementation. To compile a grammar with present-tense-only, use -``` - make Present -``` -with ``resource/Makefile``. - - - -==Phases of the work== - -===Putting up a directory=== - -Unless you are writing an instance of a parametrized implementation -(Romance or Scandinavian), which will be covered later, the -simplest way is to follow roughly the following procedure. Assume you -are building a grammar for the German language. Here are the first steps, -which we actually followed ourselves when building the German implementation -of resource v. 1.0 at Ubuntu linux. We have slightly modified them to -match resource v. 1.5 and GF v. 3.0. - -+ Create a sister directory for ``GF/lib/resource/english``, named - ``german``. -``` - cd GF/lib/resource/ - mkdir german - cd german -``` - -+ Check out the [ISO 639 3-letter language code - http://www.w3.org/WAI/ER/IG/ert/iso639.htm] - for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``. - (We use the 3-letter codes rather than the more common 2-letter codes, - since they will suffice for many more languages!) - -+ Copy the ``*Eng.gf`` files from ``english`` ``german``, - and rename them: -``` - cp ../english/*Eng.gf . - rename 's/Eng/Ger/' *Eng.gf -``` - If you don't have the ``rename`` command, you can use a bash script with ``mv``. - - -+ Change the ``Eng`` module references to ``Ger`` references - in all files: -``` - sed -i 's/English/German/g' *Ger.gf - sed -i 's/Eng/Ger/g' *Ger.gf -``` - The first line prevents changing the word ``English``, which appears - here and there in comments, to ``Gerlish``. The ``sed`` command syntax - may vary depending on your operating system. - -+ This may of course change unwanted occurrences of the - string ``Eng`` - verify this by -``` - grep Ger *.gf -``` - But you will have to make lots of manual changes in all files anyway! - -+ Comment out the contents of these files: -``` - sed -i 's/^/--/' *Ger.gf -``` - This will give you a set of templates out of which the grammar - will grow as you uncomment and modify the files rule by rule. - -+ In all ``.gf`` files, uncomment the module headers and brackets, - leaving the module bodies commented. Unfortunately, there is no - simple way to do this automatically (or to avoid commenting these - lines in the previous step) - but uncommenting the first - and the last lines will actually do the job for many of the files. - -+ Uncomment the contents of the main grammar file: -``` - sed -i 's/^--//' LangGer.gf -``` - -+ Now you can open the grammar ``LangGer`` in GF: -``` - gf LangGer.gf -``` - You will get lots of warnings on missing rules, but the grammar will compile. - -+ At all the following steps you will now have a valid, but incomplete - GF grammar. The GF command -``` - pg -missing -``` - tells you what exactly is missing. - - -Here is the module structure of ``LangGer``. It has been simplified by leaving out -the majority of the phrase category modules. Each of them has the same dependencies -as ``VerbGer``, whose complete dependencies are shown as an example. - -[German.png] - - -===Direction of work=== - -The real work starts now. There are many ways to proceed, the most obvious ones being -- Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then - ``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time - building complete phrases, and add them with more content as you proceed. - **This approach is not recommended**. It is impossible to test the rules if - you have no words to apply the constructions to. - -- Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you - need to write ``ParadigmsGer``, which in turn needs parts of - ``MorphoGer`` and ``ResGer``. - **This approach is not recommended**. You can get stuck to details of - morphology such as irregular words, and you don't have enough grasp about - the type system to decide what forms to cover in morphology. - - -The practical working direction is thus a saw-like motion between the morphological -and top-level modules. Here is a possible course of the work that gives enough -test data and enough general view at any point: -+ Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define -``` - lincat N = {s : Number => Case => Str ; g : Gender} ; -``` -we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition -of ``Number`` in [``common/ParamX`` ../lib/resource/common/ParamX.gf] -works for German, so we -use it and just define ``Case`` and ``Gender`` in ``ResGer``. - -+ Define some cases of ``mkN`` in ``ParadigmsGer``. In this way you can -already implement a huge amount of nouns correctly in ``LexiconGer``. Actually -just adding the worst-case instance of ``mkN`` (the one taking the most -arguments) should suffice for every noun - but, -since it is tedious to use, you -might proceed to the next step before returning to morphology and defining the -real work horse, ``mkN`` taking two forms and a gender. - -+ While doing this, you may want to test the resource independently. Do this by - starting the GF shell in the ``resource`` directory, by the commands -``` - > i -retain german/ParadigmsGer - > cc -table mkN "Kirche" -``` - -+ Proceed to determiners and pronouns in -``NounGer`` (``DetCN UsePron DetQuant NumSg DefArt IndefArt UseN``) and -``StructuralGer`` (``i_Pron this_Quant``). You also need some categories and -parameter types. At this point, it is maybe not possible to find out the final -linearization types of ``CN``, ``NP``, ``Det``, and ``Quant``, but at least you should -be able to correctly inflect noun phrases such as //every airplane//: -``` - > i german/LangGer.gf - > l -table DetCN every_Det (UseN airplane_N) - - Nom: jeder Flugzeug - Acc: jeden Flugzeug - Dat: jedem Flugzeug - Gen: jedes Flugzeugs -``` - -+ Proceed to verbs: define ``CatGer.V``, ``ResGer.VForm``, and -``ParadigmsGer.mkV``. You may choose to exclude ``notpresent`` -cases at this point. But anyway, you will be able to inflect a good -number of verbs in ``Lexicon``, such as -``live_V`` (``mkV "leben"``). - -+ Now you can soon form your first sentences: define ``VP`` and -``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``. -Even if you have excluded the tenses, you will be able to produce -``` - > i -preproc=./mkPresent german/LangGer.gf - > l -table PredVP (UsePron i_Pron) (UseV live_V) - - Pres Simul Pos Main: ich lebe - Pres Simul Pos Inv: lebe ich - Pres Simul Pos Sub: ich lebe - Pres Simul Neg Main: ich lebe nicht - Pres Simul Neg Inv: lebe ich nicht - Pres Simul Neg Sub: ich nicht lebe -``` -You should also be able to parse: -``` - > p -cat=Cl "ich lebe" - PredVP (UsePron i_Pron) (UseV live_V) -``` - -+ Transitive verbs -(``CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a``) -are a natural next step, so that you can -produce ``ich liebe dich`` ("I love you"). - -+ Adjectives (``CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA``) -will force you to think about strong and weak declensions, so that you can -correctly inflect //mein neuer Wagen, dieser neue Wagen// -("my new car, this new car"). - -+ Once you have implemented the set -(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP), -you have overcome most of difficulties. You know roughly what parameters -and dependences there are in your language, and you can now proceed very -much in the order you please. - - - -===The develop-test cycle=== - -The following develop-test cycle will -be applied most of the time, both in the first steps described above -and in later steps where you are more on your own. - -+ Select a phrase category module, e.g. ``NounGer``, and uncomment some - linearization rules (for instance, ``DetCN``, as above). - -+ Write down some German examples of this rule, for instance translations - of "the dog", "the house", "the big house", etc. Write these in all their - different forms (two numbers and four cases). - -+ Think about the categories involved (``CN, NP, N, Det``) and the - variations they have. Encode this in the lincats of ``CatGer``. - You may have to define some new parameter types in ``ResGer``. - -+ To be able to test the construction, - define some words you need to instantiate it - in ``LexiconGer``. You will also need some regular inflection patterns - in``ParadigmsGer``. - -+ Test by parsing, linearization, - and random generation. In particular, linearization to a table should - be used so that you see all forms produced; the ``treebank`` option - preserves the tree -``` - > gr -cat=NP -number=20 | l -table -treebank -``` - -+ Save some tree-linearization pairs for later regression testing. You can save - a gold standard treebank and use the Unix ``diff`` command to compare later - linearizations produced from the same list of trees. If you save the trees - in a file ``trees``, you can do as follows: -``` - > rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank -``` - -+ A file with trees testing all resource functions is included in the resource, - entitled ``resource/exx-resource.gft``. A treebank can be created from this by - the Unix command -``` - % runghc Make.hs test langs=Ger -``` - - - -You are likely to run this cycle a few times for each linearization rule -you implement, and some hundreds of times altogether. There are roughly -70 ``cat``s and -600 ``funs`` in ``Lang`` at the moment; 170 of the ``funs`` are outside the two -lexicon modules). - - -===Auxiliary modules=== - -These auxuliary ``resource`` modules will be written by you. - -- ``ResGer``: parameter types and auxiliary operations -(a resource for the resource grammar!) -- ``ParadigmsGer``: complete inflection engine and most important regular paradigms -- ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need -not be separate from ``ResGer``. - - -These modules are language-independent and provided by the existing resource -package. - -- ``ParamX``: parameter types used in many languages -- ``CommonX``: implementation of language-uniform categories - such as $Text$ and $Phr$, as well as of - the logical tense, anteriority, and polarity parameters -- ``Coordination``: operations to deal with lists and coordination -- ``Prelude``: general-purpose operations on strings, records, - truth values, etc. -- ``Predef``: general-purpose operations with hard-coded definitions - - -An important decision is what rules to implement in terms of operations in -``ResGer``. The **golden rule of functional programming** says: -- //Whenever you find yourself programming by copy and paste, write a function instead!//. - - -This rule suggests that an operation should be created if it is to be -used at least twice. At the same time, a sound principle of **vicinity** says: -- //It should not require too much browsing to understand what a piece of code does.// - - -From these two principles, we have derived the following practice: -- If an operation is needed //in two different modules//, - it should be created in as an ``oper`` in ``ResGer``. An example is ``mkClause``, - used in ``Sentence``, ``Question``, and ``Relative``- -- If an operation is needed //twice in the same module//, but never - outside, it should be created in the same module. Many examples are - found in ``Numerals``. -- If an operation is needed //twice in the same judgement//, but never - outside, it should be created by a ``let`` definition. -- If an operation is only needed once, it should not be created as an ``oper``, - but rather inlined. However, a ``let`` definition may well be in place just - to make the readable. - Most functions in phrase category modules - are implemented in this way. - - -This discipline is very different from the one followed in early -versions of the library (up to 0.9). We then valued the principle of -abstraction more than vicinity, creating layers of abstraction for -almost everything. This led in practice to the duplication of almost -all code on the ``lin`` and ``oper`` levels, and made the code -hard to understand and maintain. - - - -===Morphology and lexicon=== - -The paradigms needed to implement -``LexiconGer`` are defined in -``ParadigmsGer``. -This module provides high-level ways to define the linearization of -lexical items, of categories ``N, A, V`` and their complement-taking -variants. - -For ease of use, the ``Paradigms`` modules follow a certain -naming convention. Thus they for each lexical category, such as ``N``, -the overloaded functions, such as ``mkN``, with the following cases: - -- the worst-case construction of ``N``. Its type signature - has the form -``` - mkN : Str -> ... -> Str -> P -> ... -> Q -> N -``` - with as many string and parameter arguments as can ever be needed to - construct an ``N``. -- the most regular cases, with just one string argument: -``` - mkN : Str -> N -``` -- A language-dependent (small) set of functions to handle mild irregularities - and common exceptions. - - -For the complement-taking variants, such as ``V2``, we provide -- a case that takes a ``V`` and all necessary arguments, such - as case and preposition: -``` - mkV2 : V -> Case -> Str -> V2 ; -``` -- a case that takes a ``Str`` and produces a transitive verb with the direct - object case: -``` - mkV2 : Str -> V2 ; -``` -- A language-dependent (small) set of functions to handle common special cases, - such as transitive verbs that are not regular: -``` - mkV2 : V -> V2 ; -``` - - -The golden rule for the design of paradigms is that -- //The user of the library will only need function applications with constants and strings, never any records or tables.// - - -The discipline of data abstraction moreover requires that the user of the resource -is not given access to parameter constructors, but only to constants that denote -them. This gives the resource grammarian the freedom to change the underlying -data representation if needed. It means that the ``ParadigmsGer`` module has -to define constants for those parameter types and constructors that -the application grammarian may need to use, e.g. -``` - oper - Case : Type ; - nominative, accusative, genitive, dative : Case ; -``` -These constants are defined in terms of parameter types and constructors -in ``ResGer`` and ``MorphoGer``, which modules are not -visible to the application grammarian. - - -===Lock fields=== - -An important difference between ``MorphoGer`` and -``ParadigmsGer`` is that the former uses "raw" record types -for word classes, whereas the latter used category symbols defined in -``CatGer``. When these category symbols are used to denote -record types in a resource modules, such as ``ParadigmsGer``, -a **lock field** is added to the record, so that categories -with the same implementation are not confused with each other. -(This is inspired by the ``newtype`` discipline in Haskell.) -For instance, the lincats of adverbs and conjunctions are the same -in ``CommonX`` (and therefore in ``CatGer``, which inherits it): -``` - lincat Adv = {s : Str} ; - lincat Conj = {s : Str} ; -``` -But when these category symbols are used to denote their linearization -types in resource module, these definitions are translated to -``` - oper Adv : Type = {s : Str ; lock_Adv : {}} ; - oper Conj : Type = {s : Str} ; lock_Conj : {}} ; -``` -In this way, the user of a resource grammar cannot confuse adverbs with -conjunctions. In other words, the lock fields force the type checker -to function as grammaticality checker. - -When the resource grammar is ``open``ed in an application grammar, the -lock fields are never seen (except possibly in type error messages), -and the application grammarian should never write them herself. If she -has to do this, it is a sign that the resource grammar is incomplete, and -the proper way to proceed is to fix the resource grammar. - -The resource grammarian has to provide the dummy lock field values -in her hidden definitions of constants in ``Paradigms``. For instance, -``` - mkAdv : Str -> Adv ; - -- mkAdv s = {s = s ; lock_Adv = <>} ; -``` - - -===Lexicon construction=== - -The lexicon belonging to ``LangGer`` consists of two modules: -- ``StructuralGer``, structural words, built by using both - ``ParadigmsGer`` and ``MorphoGer``. -- ``LexiconGer``, content words, built by using ``ParadigmsGer`` only. - - -The reason why ``MorphoGer`` has to be used in ``StructuralGer`` -is that ``ParadigmsGer`` does not contain constructors for closed -word classes such as pronouns and determiners. The reason why we -recommend ``ParadigmsGer`` for building ``LexiconGer`` is that -the coverage of the paradigms gets thereby tested and that the -use of the paradigms in ``LexiconGer`` gives a good set of examples for -those who want to build new lexica. - - - - - -==Lexicon extension== - -===The irregularity lexicon=== - -It is useful in most languages to provide a separate module of irregular -verbs and other words which are difficult for a lexicographer -to handle. There are usually a limited number of such words - a -few hundred perhaps. Building such a lexicon separately also -makes it less important to cover //everything// by the -worst-case variants of the paradigms ``mkV`` etc. - - - -===Lexicon extraction from a word list=== - -You can often find resources such as lists of -irregular verbs on the internet. For instance, the -Irregular German Verb page -previously found in -``http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html`` -page gives a list of verbs in the -traditional tabular format, which begins as follows: -``` - backen (du bäckst, er bäckt) backte [buk] gebacken - befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen - beginnen begann (begönne; begänne) begonnen - beißen biß gebissen -``` -All you have to do is to write a suitable verb paradigm -``` - irregV : (x1,_,_,_,_,x6 : Str) -> V ; -``` -and a Perl or Python or Haskell script that transforms -the table to -``` - backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ; - befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ; -``` - -When using ready-made word lists, you should think about -coyright issues. All resource grammar material should -be provided under GNU Lesser General Public License (LGPL). - - - -===Lexicon extraction from raw text data=== - -This is a cheap technique to build a lexicon of thousands -of words, if text data is available in digital format. -See the [Extract Homepage http://www.cs.chalmers.se/~markus/extract/] -homepage for details. - - -===Bootstrapping with smart paradigms=== - -This is another cheap technique, where you need as input a list of words with -part-of-speech marking. You initialize the lexicon by using the one-argument -``mkN`` etc paradigms, and add forms to those words that do not come out right. -This procedure is described in the paper - -A. Ranta. -How predictable is Finnish morphology? An experiment on lexicon construction. -In J. Nivre, M. Dahllöf and B. Megyesi (eds), -//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//, -University of Uppsala, -2008. -Available from the [series homepage http://publications.uu.se/abstract.xsql?dbid=8933] - - - - -==Extending the resource grammar API== - -Sooner or later it will happen that the resource grammar API -does not suffice for all applications. A common reason is -that it does not include idiomatic expressions in a given language. -The solution then is in the first place to build language-specific -extension modules, like ``ExtraGer``. - -==Using parametrized modules== - -===Writing an instance of parametrized resource grammar implementation=== - -Above we have looked at how a resource implementation is built by -the copy and paste method (from English to German), that is, formally -speaking, from scratch. A more elegant solution available for -families of languages such as Romance and Scandinavian is to -use parametrized modules. The advantages are -- theoretical: linguistic generalizations and insights -- practical: maintainability improves with fewer components - - -Here is a set of -[slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf] -on the topic. - - -===Parametrizing a resource grammar implementation=== - -This is the most demanding form of resource grammar writing. -We do //not// recommend the method of parametrizing from the -beginning: it is easier to have one language first implemented -in the conventional way and then add another language of the -same family by aprametrization. This means that the copy and -paste method is still used, but at this time the differences -are put into an ``interface`` module. - - -==Character encoding and transliterations== - -This section is relevant for languages using a non-ASCII character set. - -==Coding conventions in GF== - -From version 3.0, GF follows a simple encoding convention: -- GF source files may follow any encoding, such as isolatin-1 or UTF-8; - the default is isolatin-1, and UTF8 must be indicated by the judgement -``` - flags coding = utf8 ; -``` - in each source module. -- for internal processing, all characters are converted to 16-bit unicode, - as the first step of grammar compilation guided by the ``coding`` flag -- as the last step of compilation, all characters are converted to UTF-8 -- thus, GF object files (``gfo``) and the Portable Grammar Format (``pgf``) - are in UTF-8 - - -Most current resource grammars use isolatin-1 in the source, but this does -not affect their use in parallel with grammars written in other encodings. -In fact, a grammar can be put up from modules using different codings. - -**Warning**. While string literals may contain any characters, identifiers -must be isolatin-1 letters (or digits, underscores, or dashes). This has to -do with the restrictions of the lexer tool that is used. - - -==Transliterations== - -While UTF-8 is well supported by most web browsers, its use in terminals and -text editors may cause disappointment. Many grammarians therefore prefer to -use ASCII transliterations. GF 3.0beta2 provides the following built-in -transliterations: -- Arabic -- Devanagari (Hindi) -- Thai - - -New transliterations can be defined in the GF source file -[``GF/Text/Transliterations.hs`` ../src/GF/Text/Transliterations.hs]. -This file also gives instructions on how new ones are added. - - - - - diff --git a/doc/Syntax.png b/doc/Syntax.png Binary files differdeleted file mode 100644 index f36c098f6..000000000 --- a/doc/Syntax.png +++ /dev/null diff --git a/doc/TODO b/doc/TODO deleted file mode 100644 index c92f4c8fa..000000000 --- a/doc/TODO +++ /dev/null @@ -1,231 +0,0 @@ - -* Some notes on the syntax of this file, making it possible to use todoo-mode.el: - -- Items start with "* " -- Sub-items start with "- " -- It should be noted somewhere in the item, who has reported the item - Suggestion: Add "[who]" at the beginning of the item title - (then one can use "assign item" in todoo-mode) -- Each item should have a priority - Suggestion: Add "URGENT", "IMPORTANT" or "WISH" at the beginning of - the item title -- Sort the items in priority order - (todoo-mode can move an item up or down) - ----------------------------------------------------------------------- - - -* [peb] URGENT: Error messages for syntax errors - - When a syntax error is reported, it should be noted which file it - is. Otherwise it is impossible to know where the error is - (if one uses the -s flag): - - > i -s Domain/MP3/Domain_MP_Semantics.gf - syntax error at line 33 before ve , Proposition , - - There's no problem with other kinds of errors: - - > i -s Domain/MP3/Domain_MP_Semantics.gf - checking module Godis_Semantics - Happened in linearization of userMove : - product expected instead of { - pl : Str - } - - -* [peb] IMPORTANT: Add the -path of a module to daughter modules - - Then the main module does not have to know where all grandchildren are: - - file A.gf: - abstract A = B ** {...} - - file B.gf: - --# -path=./resource - abstract B = Lang ** {...} - - I.e.: the file A.gf should not need to know that B.gf uses the - resource library. - - -* [peb] IMPORTANT: incomplete concrete and interfaces - -- The following works in GF: - - incomplete concrete TestDI of TestA = open (C=TestCI) in { - lincat A = TestCI.A ** {p : Str}; - lin f = TestCI.f ** {p = "f"}; - g = TestCI.g ** {p = "g"}; - } - - > i -src TestDE.gf - -- BUT, if we exchange "TestCI" for "C" we get an error: - - incomplete concrete TestDI of TestA = open (C=TestCI) in { - lincat A = C.A ** {p : Str}; - lin f = C.f ** {p = "f"}; - g = C.g ** {p = "g"}; - } - - > i -src TestDE.gf - compiling TestDE.gf... failed to find C - OCCURRED IN - atomic term C given TestCE TestCI TestCE TestDE - OCCURRED IN - renaming definition of f - OCCURRED IN - renaming module TestDE - -- the other modules: - - abstract TestA = { - cat A; - fun f, g : A; - } - - instance TestBE of TestBI = { - oper hello = "hello"; - bye = "bye"; - } - - interface TestBI = { - oper hello : Str; - bye : Str; - } - - concrete TestCE of TestA = TestCI with (TestBI = TestBE); - - incomplete concrete TestCI of TestA = open TestBI in { - lincat A = {s : Str}; - lin f = {s = hello}; - g = {s = bye}; - } - - concrete TestDE of TestA = TestDI with (TestCI = TestCE); - -* [peb] IMPORTANT: Missing things in the help command - - > h -printer - (the flag -printer=cfgm is missing) - - > h -cat - WARNING: invalid option: cat - - > h -lang - WARNING: invalid option: lang - - > h -language - WARNING: invalid option: language - - > h -parser - WARNING: invalid option: parser - - > h -aslkdjaslkdjss - WARNING: invalid option: aslkdjaslkdjss - Command not found. - (it should note: "option not found") - - > h -optimize - WARNING: invalid option: optimize - - > h -startcat - WARNING: invalid option: startcat - - > h h - h, help: h Command? - (it should also mention "h -option") - - -* [peb] IMPORTANT: Set GF_LIb-PATH within GF - - > sf libpath=~/GF/lib - - -* [peb] IMPORTANT: Set the starting category with "sf" - - > sf startcat=X - - -* [peb] IMPORTANT: import-flags - -- There are some inconsistencies when importing grammars: - - 1. when doing "pg -printer=cfg", one must have used "i -conversion=finite", - since "pg" doesn't care about the flags that are set in the grammar file - - 2. when doing "pm -printer=cfgm", one must have set the flag - "conversion=finite" within the grammar file, since "pm" doesn't - care about the flags to the import command - - (I guess it's me (peb) who should fix this, but I don't know where - the different flags reside...) - -- Also, it must be decided in what cases flags can override other flags: - - a) in the grammar file, e.g. "flags conversion=finite;" - b) on the command line, e.g. "> sf conversion=finite" - c) as argument to a command, e.g. "> i -conversion=finite file.gf" - -- A related issue is to decide the scope of flags: - - Some flags are (or should be) local to the module - (e.g. -coding and -path) - Other flags override daughter flags for daughter modules - (e.g. -startcat and -conversion) - -* [bringert] IMPORTANT: get right startcat flag when printing CFGM - GF.CFGM.PrintCFGrammar.prCanonAsCFGM currently only gets the startcat - flag from the top-level concrete module. This might be easier - to fix if the multi grammar printers had access to more than just - the CanonGrammar. - -* [peb] WISH: generalizing incomplete concrete - - I want to be able to open an incomplete concrete module - inside another incomplete conrete. - Then I can instantiate both incompletes at the same time. - -* [peb] WISH: _tmpi, _tmpo - - The files _tmpi and _tmpo are never removed when quitting GF. - Further suggestion: put them in /tmp or similar. - - peb: nr man anvnder "|" till ett systemanrop, t.ex: - pg | ! sort - s skapas filerna _tmpi och _tmpo. Men de tas aldrig bort. - - peb: nnu bttre: ta bort filerna eftert. - - aarne: Sant: nr GF quittas (om detta inte sker onormalt). - Eller nr kommandot har krt frdigt (om det terminerar). - - peb: Bst(?): skapa filerna i /tmp eller liknande. - - aarne: Ibland fr man skrivrttighetsproblem - och det r - inte kul om man mste ange en tmp-path. Och olika - anvndare och gf-processer mste ha unika filnamn. - Och vet inte hur det funkar p windows... - - aarne: Ett till alternativ skulle vara att anvnda handles - utan ngra tmp-filer alls. Men jag har inte hunnit - ta reda p hur det gr till. - - bjrn: Lite slumpmssiga tankar: - + man kan anvnda System.Directory.getTemporaryDirectory, s slipper man iaf bry sig om olika plattformsproblem. - + sen kan man anvnda System.IO.openTempFile fr att skapa en temporr fil. Den tas dock inte bort nr programmet avslutas, s det fr man fixa sjlv. - + System.Posix.Temp.mkstemp gr nt liknande, men dokumentationen r dlig. - + biblioteket HsShellScript har lite funktioner fr snt hr, se - http://www.volker-wysk.de/hsshellscript/apidoc/HsShellScript.html#16 - - -* [peb] WISH: Hierarchic modules - - Suggestion by peb: - The module A.B.C is located in the file A/B/C.gf - - Main advantage: you no longer need to state "--# -path=..." in - modules - -- How can this be combined with several modules inside one file? diff --git a/doc/compiling-gf.txt b/doc/compiling-gf.txt deleted file mode 100644 index 9e438f40f..000000000 --- a/doc/compiling-gf.txt +++ /dev/null @@ -1,750 +0,0 @@ -Compiling GF -Aarne Ranta -Proglog meeting, 1 November 2006 - -% to compile: txt2tags -thtml compiling-gf.txt ; htmls compiling-gf.html - -%!target:html -%!postproc(html): #NEW <!-- NEW --> - -#NEW - -==The compilation task== - -GF is a grammar formalism, i.e. a special purpose programming language -for writing grammars. - -Other grammar formalisms: -- BNF, YACC, Happy (grammars for programming languages); -- PATR, HPSG, LFG (grammars for natural languages). - - -The grammar compiler prepares a GF grammar for two computational tasks: -- linearization: take syntax trees to strings -- parsing: take strings to syntax trees - - -The grammar gives a declarative description of these functionalities, -on a high abstraction level that improves grammar writing -productivity. - -For efficiency, the grammar is compiled to lower-level formats. - -Type checking is another essential compilation phase. Its purpose is -twofold, as usual: -- checking the correctness of the grammar -- type-annotating expressions for code generation - - -#NEW - -==Characteristics of GF language== - -Functional language with types, both built-in and user-defined. -``` - Str : Type - - param Number = Sg | Pl - - param AdjForm = ASg Gender | APl - - Noun : Type = {s : Number => Str ; g : Gender} -``` -Pattern matching. -``` - svart_A = table { - ASg _ => "svart" ; - _ => "svarta" - } -``` -Higher-order functions. - -Dependent types. -``` - flip : (a, b, c : Type) -> (a -> b -> c) -> b -> a -> c = - \_,_,_,f,y,x -> f x y ; -``` - - -#NEW - -==The module system of GF== - -Main division: abstract syntax and concrete syntax -``` - abstract Greeting = { - cat Greet ; - fun Hello : Greet ; - } - - concrete GreetingEng of Greeting = { - lincat Greet = {s : Str} ; - lin Hello = {s = "hello"} ; - } - - concrete GreetingIta of Greeting = { - param Politeness = Familiar | Polite ; - lincat Greet = {s : Politeness => Str} ; - lin Hello = {s = table { - Familiar => "ciao" ; - Polite => "buongiorno" - } ; - } -``` -Other features of the module system: -- extension and opening -- parametrized modules (cf. ML: signatures, structures, functors) - - - - -#NEW - -==GF vs. Haskell== - -Some things that (standard) Haskell hasn't: -- records and record subtyping -- regular expression patterns -- dependent types -- ML-style modules - - -Some things that GF hasn't: -- infinite (recursive) data types -- recursive functions -- classes, polymorphism - - -#NEW - -==GF vs. most linguistic grammar formalisms== - -GF separates abstract syntax from concrete syntax. - -GF has a module system with separate compilation. - -GF is generation-oriented (as opposed to parsing). - -GF has unidirectional matching (as opposed to unification). - -GF has a static type system (as opposed to a type-free universe). - -"I was - and I still am - firmly convinced that a program composed -out of statically type-checked parts is more likely to faithfully -express a well-thought-out design than a program relying on -weakly-typed interfaces or dynamically-checked interfaces." -(B. Stroustrup, 1994, p. 107) - - - -#NEW - -==The computation model: abstract syntax== - -An abstract syntax defines a free algebra of trees (using -dependent types, recursion, higher-order abstract syntax: -GF includes a complete Logical Framework). -``` - cat C (x_1 : A_1)...(x_n : A_n) - a_1 : A_1 - ... - a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1} - ---------------------------------------------------- - (C a_1 ... a_n) : Type - - - fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A - a_1 : A_1 - ... - a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1} - ---------------------------------------------------- - (f a_1 ... a_n) : A{x_1 : A_1,...,x_n : A_n} - - - A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A - ---------------------------- ---------------------- ------------------------ - (x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A} -``` -Notice that all syntax trees are in eta-long form. - - -#NEW - -==The computation model: concrete syntax== - -A concrete syntax defines a homomorphism (compositional mapping) -from the abstract syntax to a system of concrete syntax objects. -``` - cat C _ - -------------------- - lincat C = C* : Type - - fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A - ----------------------------------------------- - lin f = f* : A_1* -> ... -> A_n* -> A* - - (f a_1 ... a_n)* = f* a_1* ... a_n* -``` -The homomorphism can as such be used as linearization function. - -It is a functional program, but a restricted one, since it works -in the end on finite data structures only. - -But a more efficient program is obtained via compilation to -GFC = Canonical GF: the "machine code" of GF. - -The parsing problem of GFC can be reduced to that of MPCFG (Multiple -Parallel Context Free Grammars), see P. Ljunglöf's thesis (2004). - - - -#NEW - -==The core type system of concrete syntax: basic types== - -``` - param P P : PType - PType : Type --------- --------- - P : PType P : Type - - s : Str t : Str - Str : type "foo" : Str [] : Str ---------------- - s ++ t : Str -``` - - -#NEW - -==The core type system of concrete syntax: functions and tables== - -``` - A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A - ---------------------------- ---------------------- ------------------------ - (x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A} - - - P : PType A : Type t : P => A p : p - -------------------- ----------------- - P => A : Type t ! p : A - - v_1,...,v_n : A - ---------------------------------------------- P = {C_1,...,C_n} - table {C_1 => v_1 ; ... ; C_n => v_n} : P => A -``` -Pattern matching is treated as an abbreviation for tables. Notice that -``` - case e of {...} == table {...} ! e -``` - - -#NEW - -==The core type system of concrete syntax: records== - -``` - A_1,...,A_n : Type - ------------------------------------ n >= 0 - {r_1 : A_1 ; ... ; r_n : A_n} : Type - - - a_1 : A_1 ... a_n : A_n - ------------------------------------------------------------ - {r_1 = a_1 ; ... ; r_n = a_n} : {r_1 : A_1 ; ... ; r_n : A_n} - - - r : {r_1 : A_1 ; ... ; r_n : A_n} - ----------------------------------- i = 1,...,n - r.r_1 : A_1 -``` -Subtyping: if ``r : R`` then ``r : R ** {r : A}`` - - - -#NEW - -==Computation rules== - -``` - (\x -> b) a = b{x := a} - - (table {C_1 => v_1 ; ... ; C_n => v_n} : P => A) ! C_i = v_i - - {r_1 = a_1 ; ... ; r_n = a_n}.r_i = a_i -``` - - - -#NEW - -==Canonical GF== - -Concrete syntax type system: -``` - A_1 : Type ... A_n : Type - Str : Type Int : Type ------------------------- $i : A - [A_1, ..., A_n] : Type - - - a_1 : A_1 ... a_n : A_n t : [A_1, ..., A_n] - --------------------------------- ------------------- i = 1,..,n - [a_1, ..., a_n] : [A_1, ..., A_n] t ! i : A_i -``` -Tuples represent both records and tables. - -There are no functions. - -Linearization: -``` - lin f = f* - - (f a_1 ... a_n)* = f*{$1 = a_1*, ..., $n = a_n*} -``` - - -#NEW - -==The compilation task, again== - -1. From a GF source grammar, derive a canonical GF grammar. - -2. From the canonical GF grammar derive an MPCFG grammar - -The canonical GF grammar can be used for linearization, with -linear time complexity (w.r.t. the size of the tree). - -The MPCFG grammar can be used for parsing, with (unbounded) -polynomial time complexity (w.r.t. the size of the string). - -For these target formats, we have also built interpreters in -different programming languages (C, C++, Haskell, Java, Prolog). - -Moreover, we generate supplementary formats such as grammars -required by various speech recognition systems. - - -#NEW - -==An overview of compilation phases== - -Legend: -- ellipse node: representation saved in a file -- plain text node: internal representation -- solid arrow or ellipse: essential phare or format -- dashed arrow or ellipse: optional phase or format -- arrow label: the module implementing the phase - - -[gf-compiler.png] - - -#NEW - -==Using the compiler== - -Batch mode (cf. GHC). - -Interactive mode, building the grammar incrementally from -different files, with the possibility of testing them -(cf. GHCI). - -The interactive mode was first, built on the model of ALF-2 -(L. Magnusson), and there was no file output of compiled -grammars. - - -#NEW - -==Modules and separate compilation== - -The above diagram shows what happens to each module. -(But not quite, since some of the back-end formats must be -built for sets of modules: GFCC and the parser formats.) - -When the grammar compiler is called, it has a main module as its -argument. It then builds recursively a dependency graph with all -the other modules, and decides which ones must be recompiled. -The behaviour is rather similar to GHC. - -Separate compilation is //extremely important// when developing -big grammars, especially when using grammar libraries. Example: compiling -the GF resource grammar library takes 5 minutes, whereas reading -in the compiled image takes 10 seconds. - - -#NEW - -==Module dependencies and recompilation== - -(For later use, not for the Proglog talk) - -For each module M, there are 3 kinds of files: -- M.gf, source file -- M.gfc, compiled file ("object file") -- M.gfr, type-checked and optimized source file (for resource modules only) - - -The compiler reads gf files and writes gfc files (and gfr files if appropriate) - -The Main module is the one used as argument when calling GF. - -A module M (immediately) depends on the module K, if either -- M is a concrete of K -- M is an instance of K -- M extends K -- M opens K -- M is a completion of K with something -- M is a completion of some module with K instantiated with something - - -A module M (transitively) depends on the module K, if either -- M immediately depends on K -- M depends on some L such that L immediately depends on K - - -Immediate dependence is readable from the module header without parsing -the whole module. - -The compiler reads recursively the headers of all modules that Main depends on. - -These modules are arranged in a dependency graph, which is checked to be acyclic. - -To decide whether a module M has to be compiled, do: -+ Get the time stamps t() of M.gf and M.gfc (if a file doesn't exist, its - time is minus infinity). -+ If t(M.gf) > t(M.gfc), M must be compiled. -+ If M depends on K and K must be compiled, then M must be compiled. -+ If M depends on K and t(K.gf) > t(M.gfc), then M must be compiled. - - -Decorate the dependency graph by information on whether the gf or the gfc (and gfr) -format is to be read. - -Topologically sort the decorated graph, and read each file in the chosen format. - -The gfr file is generated for these module types only: -- resource -- instance - - -When reading K.gfc, also K.gfr is read if some M depending on K has to be compiled. -In other cases, it is enough to read K.gfc. - -In an interactive GF session, some modules may be in memory already. -When read to the memory, each module M is given time stamp t(M.m). -The additional rule now is: -- If M.gfc is to be read, and t(M.m) > t(M.gfc), don't read M.gfc. - - - - -#NEW - -==Techniques used== - -The compiler is written in Haskell, with some C foreign function calls -in the interactive version (readline, killing threads). - -BNFC is used for generating both the parsers and printers. -This has helped to make the formats portable. - -"Almost compositional functions" (``composOp``) are used in -many compiler passes, making them easier to write and understand. -A ``grep`` on the sources reveals 40 uses (outside the definition -of ``composOp`` itself). - -The key algorithmic ideas are -- type-driven partial evaluation in GF-to-GFC generation -- common subexpression elimination as back-end optimization -- some ideas in GFC-to-MCFG encoding - - -#NEW - -==Type-driven partial evaluation== - -Each abstract syntax category in GF has a corresponding linearization type: -``` - cat C - lincat C = T -``` -The general form of a GF rule pair is -``` - fun f : C1 -> ... -> Cn -> C - lin f = t -``` -with the typing condition following the ``lincat`` definitions -``` - t : T1 -> ... -> Tn -> T -``` -The term ``t`` is in general built by using abstraction methods such -as pattern matching, higher-order functions, local definitions, -and library functions. - -The compilation technique proceeds as follows: -- use eta-expansion on ``t`` to determine the canonical form of the term -``` - \ $C1, ...., $Cn -> (t $C1 .... $Cn) -``` -with unique variables ``$C1 .... $Cn`` for the arguments; repeat this -inside the term for records and tables -- evaluate the resulting term using the computation rules of GF -- what remains is a canonical term with ``$C1 .... $Cn`` the only -variables (the run-time input of the linearization function) - - -#NEW - -==Eta-expanding records and tables== - -For records that are valied via subtyping, eta expansion -eliminates superfluous fields: -``` - {r1 = t1 ; r2 = t2} : {r1 : T1} ----> {r1 = t1} -``` -For tables, the effect is always expansion, since -pattern matching can be used to represent tables -compactly: -``` - table {n => "fish"} : Number => Str ---> - - table { - Sg => "fish" ; - Pl => "fish" - } -``` -This can be helped by back-end optimizations (see below). - - -#NEW - -==Eliminating functions== - -"Everything is finite": parameter types, records, tables; -finite number of string tokens per grammar. - -But "inifinite types" such as function types are useful when -writing grammars, to enable abstractions. - -Since function types do not appear in linearization types, -we want functions to be eliminated from linearization terms. - -This is similar to the **subformula property** in logic. -Also the main problem is similar: function depending on -a run-time variable, -``` - (table {P => f ; Q = g} ! x) a -``` -This is not a redex, but we can make it closer to one by moving -the application inside the table, -``` - table {P => f a ; Q = g a} ! x -``` -This transformation is the same as Prawitz's (1965) elimination -of maximal segments in natural deduction: -``` - A B - C -> D C C -> D C - A B --------- --------- - A v B C -> D C -> D A v B D D - --------------------- ===> ------------------------- - C -> D C D - -------------------- - D -``` - - - -#NEW - -==Size effects of partial evaluation== - -Irrelevant table branches are thrown away, which can reduce the size. - -But, since tables are expanded and auxiliary functions are inlined, -the size can grow exponentially. - -How can we keep the first property and eliminate the second? - - -#NEW - -==Parametrization of tables== - -Algorithm: for each branch in a table, consider replacing the -argument by a variable: -``` - table { table { - P => t ; ---> x => t[P->x] ; - Q => u x => u[Q->x] - } } -``` -If the resulting branches are all equal, you can replace the table -by a lambda abstract -``` - \\x => t[P->x] -``` -If each created variable ``x`` is unique in the grammar, computation -with the lambda abstract is efficient. - - - -#NEW - -==Course-of-values tables== - -By maintaining a canonical order of parameters in a type, we can -eliminate the left hand sides of branches. -``` - table { table T [ - P => t ; ---> t ; - Q => u u - } ] -``` -The treatment is similar to ``Enum`` instances in Haskell. - -In the end, all parameter types can be translated to -initial segments of integers. - - -#NEW - -==Common subexpression elimination== - -Algorithm: -+ Go through all terms and subterms in a module, creating - a symbol table mapping terms to the number of occurrences. -+ For each subterm appearing at least twice, create a fresh - constant defined as that subterm. -+ Go through all rules (incl. rules for the new constants), - replacing largest possible subterms with such new constants. - - -This algorithm, in a way, creates the strongest possible abstractions. - -In general, the new constants have open terms as definitions. -But since all variables (and constants) are unique, they can -be computed by simple replacement. - - - -#NEW - -==Size effects of optimizations== - -Example: the German resource grammar -``LangGer`` - -|| optimization | lines | characters | size % | blow-up | -| none | 5394 | 3208435 | 100 | 25 | -| all | 5394 | 750277 | 23 | 6 | -| none_subs | 5772 | 1290866 | 40 | 10 | -| all_subs | 5644 | 414119 | 13 | 3 | -| gfcc | 3279 | 190004 | 6 | 1.5 | -| gf source | 3976 | 121939 | 4 | 1 | - - -Optimization "all" means parametrization + course-of-values. - -The source code size is an estimate, since it includes -potentially irrelevant library modules, and comments. - -The GFCC format is not reusable in separate compilation. - - - -#NEW - -==The shared prefix optimization== - -This is currently performed in GFCC only. - -The idea works for languages that have a rich morphology -based on suffixes. Then we can replace a course of values -with a pair of a prefix and a suffix set: -``` - ["apa", "apan", "apor", "aporna"] ---> - ("ap" + ["a", "an", "or", "orna"]) -``` -The real gain comes via common subexpression elimination: -``` - _34 = ["a", "an", "or", "orna"] - apa = ("ap" + _34) - blomma = ("blomm" + _34) - flicka = ("flick" + _34) -``` -Notice that it now matters a lot how grammars are written. -For instance, if German verbs are treated as a one-dimensional -table, -``` - ["lieben", "liebe", "liebst", ...., "geliebt", "geliebter",...] -``` -no shared prefix optimization is possible. A better form is -separate tables for non-"ge" and "ge" forms: -``` - [["lieben", "liebe", "liebst", ....], ["geliebt", "geliebter",...]] -``` - - -#NEW - -==Reuse of grammars as libraries== - -The idea of resource grammars: take care of all aspects of -surface grammaticality (inflection, agreement, word order). - -Reuse in application grammar: via translations -``` - cat C ---> oper C : Type = T - lincat C = T - - fun f : A ---> oper f : A* = t - lin f = t -``` -The user only needs to know the type signatures (abstract syntax). - -However, this does not quite guarantee grammaticality, because -different categories can have the same lincat: -``` - lincat Conj = {s : Str} - lincat Adv = {s : Str} -``` -Thus someone may by accident use "and" as an adverb! - - -#NEW - -==Forcing the type checker to act as a grammar checker== - -We just have to make linearization types unique for each category. - -The technique is reminiscent of Haskell's ``newtype`` but uses -records instead: we add **lock fields** e.g. -``` - lincat Conj = {s : Str ; lock_Conj : {}} - lincat Adv = {s : Str ; lock_Adv : {}} -``` -Thanks to record subtyping, the translation is simple: -``` - fun f : C1 -> ... -> Cn -> C - lin f = t - - ---> - - oper f : C1* -> ... -> Cn* -> C* = - \x1,...,xn -> (t x1 ... xn) ** {lock_C = {}} -``` - -#NEW - -==Things to do== - -Better compression of gfc file format. - -Type checking of dependent-type pattern matching in abstract syntax. - -Compilation-related modules that need rewriting -- ``ReadFiles``: clarify the logic of dependencies -- ``Compile``: clarify the logic of what to do with each module -- ``Compute``: make the evaluation more efficient -- ``Parsing/*``, ``OldParsing/*``, ``Conversion/*``: reduce the number - of parser formats and algorithms diff --git a/doc/eu-langs.dot b/doc/eu-langs.dot deleted file mode 100644 index 115ce0040..000000000 --- a/doc/eu-langs.dot +++ /dev/null @@ -1,79 +0,0 @@ -graph{ - -size = "7,7" ; - -overlap = scale ; - -"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ; - -"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ; -"1" -- "Abs" [style = "solid"]; - -"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ; -"2" -- "Abs" [style = "solid"]; - -"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ; -"3" -- "Abs" [style = "solid"]; - -"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ; -"4" -- "Abs" [style = "solid"]; - -"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ; -"5" -- "Abs" [style = "solid"]; - -"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ; -"6" -- "Abs" [style = "solid"]; - -"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ; -"7" -- "Abs" [style = "solid"]; - -"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ; -"8" -- "Abs" [style = "solid"]; - -"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ; -"9" -- "Abs" [style = "solid"]; - -"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ; -"10" -- "Abs" [style = "solid"]; - -"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ; -"11" -- "Abs" [style = "solid"]; - -"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "12" [style = "solid"]; - -"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "13" [style = "solid"]; - -"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "14" [style = "solid"]; - -"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "15" [style = "solid"]; - -"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "16" [style = "solid"]; - -"17" [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "17" [style = "solid"]; - -"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "18" [style = "solid"]; - -"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "19" [style = "solid"]; - -"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "20" [style = "solid"]; - -"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "21" [style = "solid"]; - -"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "22" [style = "solid"]; - -"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "23" [style = "solid"]; - - -} diff --git a/doc/eu-langs.png b/doc/eu-langs.png Binary files differdeleted file mode 100644 index 8c46a19db..000000000 --- a/doc/eu-langs.png +++ /dev/null diff --git a/doc/food-translet.png b/doc/food-translet.png Binary files differdeleted file mode 100644 index dd622a4bf..000000000 --- a/doc/food-translet.png +++ /dev/null diff --git a/doc/food1.png b/doc/food1.png Binary files differdeleted file mode 100644 index 767069dab..000000000 --- a/doc/food1.png +++ /dev/null diff --git a/doc/food2.png b/doc/food2.png Binary files differdeleted file mode 100644 index b36a01b22..000000000 --- a/doc/food2.png +++ /dev/null diff --git a/doc/gf-compiler.dot b/doc/gf-compiler.dot deleted file mode 100644 index f8ce1aaae..000000000 --- a/doc/gf-compiler.dot +++ /dev/null @@ -1,88 +0,0 @@ -digraph { - - gfe [label = "file.gfe", style = "dashed", shape = "ellipse"]; - gfe -> gf1 [label = " MkConcrete", style = "dashed"]; - -gf1 [label = "file.gf", style = "solid", shape = "ellipse"]; -gf1 -> gf2 [label = " LexGF", style = "solid"]; - -gf2 [label = "token list", style = "solid", shape = "plaintext"]; -gf2 -> gf3 [label = " ParGF", style = "solid"]; - -gf3 [label = "source tree", style = "solid", shape = "plaintext"]; -gf3 -> gf4 [label = " SourceToGrammar", style = "solid"]; - - cf [label = "file.cf", style = "dashed", shape = "ellipse"]; - cf -> gf4 [label = " CF.PPrCF", style = "dashed"]; - - ebnf [label = "file.ebnf", style = "dashed", shape = "ellipse"]; - ebnf -> gf4 [label = " CF.EBNF", style = "dashed"]; - - -gf4 [label = "GF tree", style = "solid", shape = "plaintext"]; -gf4 -> gf5 [label = " Extend", style = "solid"]; - -gf5 [label = "inheritance-linked GF tree", style = "solid", shape = "plaintext"]; -gf5 -> gf6 [label = " Rename", style = "solid"]; - -gf6 [label = "name-resolved GF tree", style = "solid", shape = "plaintext"]; -gf6 -> gf7 [label = " CheckGrammar", style = "solid"]; - -gf7 [label = "type-annotated GF tree", style = "solid", shape = "plaintext"]; -gf7 -> gf8 [label = " Optimize", style = "solid"]; - -gf8 [label = "optimized GF tree", style = "solid", shape = "plaintext"]; -gf8 -> gf9 [label = " GrammarToCanon", style = "solid"]; - -gf9 [label = "GFC tree", style = "solid", shape = "plaintext"]; -gf9 -> gfc [label = " BackOpt", style = "solid"]; - -gfc [label = "optimized GFC tree", style = "solid", shape = "box"]; -gfc -> gf11 [label = " PrintGFC", style = "solid"]; - -gf11 [label = "file.gfc", style = "solid", shape = "ellipse"]; - - - gfcc [label = "file.gfcc", style = "solid", shape = "ellipse"]; - gfc -> gfcc [label = " CanonToGFCC", style = "solid"]; - - mcfg [label = "file.gfcm", style = "dashed", shape = "ellipse"]; - gfc -> mcfg [label = " PrintGFC", style = "dashed"]; - - bnf [label = "file.cf", style = "dashed", shape = "ellipse"]; - gfc -> bnf [label = " CF.PrLBNF", style = "dashed"]; - - happy [label = "file.y (Happy)", style = "dashed", shape = "ellipse"]; - bnf -> happy [label = " bnfc", style = "dashed"]; - - bison [label = "file.y (Bison)", style = "dashed", shape = "ellipse"]; - bnf -> bison [label = " bnfc", style = "dashed"]; - - cup [label = "parser.java (CUP)", style = "dashed", shape = "ellipse"]; - bnf -> cup [label = " bnfc", style = "dashed"]; - - xml [label = "file.dtd (XML)", style = "dashed", shape = "ellipse"]; - bnf -> xml [label = " bnfc", style = "dashed"]; - - cfg [label = "CFG tree", style = "solid", shape = "plaintext"]; - gfc -> cfg [label = " Conversions.GFC", style = "dashed"]; - - cfgm [label = "file.cfgm", style = "dashed", shape = "ellipse"]; - cfg -> cfgm [label = " Conversions.GFC", style = "dashed"]; - - srg [label = "Non-LR CFG", style = "solid", shape = "plaintext"]; - cfg -> srg [label = " Speech.SRG", style = "dashed"]; - - gsl [label = "file.gsl", style = "dashed", shape = "ellipse"]; - srg -> gsl [label = " Speech.PrGSL", style = "dashed"]; - - jsgf [label = "file.jsgf", style = "dashed", shape = "ellipse"]; - srg -> jsgf [label = " Speech.PrJSGF", style = "dashed"]; - - fa [label = "DFA", style = "solid", shape = "plaintext"]; - cfg -> fa [label = " Speech.CFGToFiniteState", style = "dashed"]; - - slf [label = "file.slf", style = "dashed", shape = "ellipse"]; - fa -> slf [label = " Speech.PrSLF", style = "dashed"]; - -} diff --git a/doc/gf-compiler.png b/doc/gf-compiler.png Binary files differdeleted file mode 100644 index 6949c37b5..000000000 --- a/doc/gf-compiler.png +++ /dev/null diff --git a/doc/gf-formalism.html b/doc/gf-formalism.html deleted file mode 100644 index 52d9256aa..000000000 --- a/doc/gf-formalism.html +++ /dev/null @@ -1,350 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<TITLE>A Birds-Eye View of GF as a Grammar Formalism</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> -<P ALIGN="center"><CENTER><H1>A Birds-Eye View of GF as a Grammar Formalism</H1> -<FONT SIZE="4"> -<I>Author: Aarne Ranta</I><BR> -Last update: Thu Feb 2 14:16:01 2006 -</FONT></CENTER> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> - <UL> - <LI><A HREF="#toc1">GF in a few words</A> - <LI><A HREF="#toc2">History of GF</A> - <LI><A HREF="#toc3">Some key ingredients of GF in other grammar formalisms</A> - <LI><A HREF="#toc4">Examples of descriptions in each formalism</A> - <LI><A HREF="#toc5">Lambda terms and records</A> - <LI><A HREF="#toc6">The structure of GF formalisms</A> - <LI><A HREF="#toc7">The expressivity of GF</A> - <LI><A HREF="#toc8">Grammars and parsing</A> - <LI><A HREF="#toc9">Grammars as software libraries</A> - <LI><A HREF="#toc10">Multilinguality</A> - <LI><A HREF="#toc11">Parametrized modules</A> - </UL> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> -<P> -<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT=""> -</P> -<P> -<I>Abstract. This document gives a general description of the</I> -<I>Grammatical Framework (GF), with comparisons to other grammar</I> -<I>formalisms such as CG, ACG, HPSG, and LFG.</I> -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc1"></A> -<H2>GF in a few words</H2> -<P> -Grammatical Framework (GF) is a grammar formalism -based on <B>constructive type theory</B>. -</P> -<P> -GF makes a distinction between <B>abstract syntax</B> and <B>concrete syntax</B>. -</P> -<P> -The abstract syntax part of GF is a <B>logical framework</B>, with -dependent types and higher-order functions. -</P> -<P> -The concrete syntax is a system of <B>records</B> containing strings and features. -</P> -<P> -A GF grammar defines a <B>reversible homomorphism</B> from an abstract syntax to a -concrete syntax. -</P> -<P> -A <B>multilingual GF grammar</B> is a set of concrete syntaxes associated with -one abstract syntax. -</P> -<P> -GF grammars are written in a high-level <B>functional programming language</B>, -which is compiled into a <B>core language</B> (GFC). -</P> -<P> -GF grammars can be used as <B>resources</B>, i.e. as libraries for writing -new grammars; these are compiled and optimized by the method of -<B>grammar composition</B>. -</P> -<P> -GF has a <B>module system</B> that supports grammar engineering and separate -compilation. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc2"></A> -<H2>History of GF</H2> -<P> -1988. Intuitionistic Categorial Grammar; type theory as abstract syntax, -playing the role of Montague's analysis trees. Grammars implemented in Prolog. -</P> -<P> -1994. Type-Theoretical Grammar. Abstract syntax organized as a system of -combinators. Grammars implemented in ALF. -</P> -<P> -1996. Multilingual Type-Theoretical Grammar. Rules for generating six -languages from the same abstract syntax. Grammars implemented in ALF, ML, and -Haskell. -</P> -<P> -1998. The first implementation of GF as a language of its own. -</P> -<P> -2000. New version of GF: high-level functional source language, records used -for concrete syntax. -</P> -<P> -2003. The module system. -</P> -<P> -2004. Ljunglöf's thesis <I>Expressivity and Complexity of GF</I>. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc3"></A> -<H2>Some key ingredients of GF in other grammar formalisms</H2> -<UL> -<LI>[GF ]: Grammatical Framework -<LI>[CG ]: categorial grammar -<LI>[ACG ]: abstract categorial grammar -<LI>[HPSG ]: head-driven phrase structure grammar -<LI>[LFG ]: lexical functional grammar -</UL> - -<TABLE CELLPADDING="4" BORDER="1"> -<TR> -<TD ALIGN="center">/</TD> -<TD>GF</TD> -<TD>ACG</TD> -<TD>LFG</TD> -<TD>HPSG</TD> -<TD>CG</TD> -</TR> -<TR> -<TD>abstract vs concrete syntax</TD> -<TD>X</TD> -<TD>X</TD> -<TD>?</TD> -<TD>-</TD> -<TD>-</TD> -</TR> -<TR> -<TD>type theory</TD> -<TD>X</TD> -<TD>X</TD> -<TD>-</TD> -<TD>-</TD> -<TD>X</TD> -</TR> -<TR> -<TD>records and features</TD> -<TD>X</TD> -<TD>-</TD> -<TD>X</TD> -<TD>X</TD> -<TD>-</TD> -</TR> -</TABLE> - -<P></P> -<P> -<!-- NEW --> -</P> -<A NAME="toc4"></A> -<H2>Examples of descriptions in each formalism</H2> -<P> -To be written... -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc5"></A> -<H2>Lambda terms and records</H2> -<P> -In CS, abstract syntax is trees and concrete syntax is strings. -This works more or less for programming languages. -</P> -<P> -In CG, all syntax is lambda terms. -</P> -<P> -In Montague grammar, abstract syntax is lambda terms and -concrete syntax is trees. Abstract syntax as lambda terms -can be considered well-established. -</P> -<P> -In PATR and HPSG, concrete syntax it records. This can be considered -well-established for natural languages. -</P> -<P> -In ACG, both are lambda terms. This is more general than GF, -but reversibility requires linearity restriction, which can be -unnatural for grammar writing. -</P> -<P> -In GF, linearization from lambda terms to records is reversible, -and grammar writing is not restricted to linear terms. -</P> -<P> -Grammar composition in ACG is just function composition. In GF, -it is more restricted... -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc6"></A> -<H2>The structure of GF formalisms</H2> -<P> -The following diagram (to be drawn properly!) describes the -levels. -</P> -<PRE> - | programming language design - V - GF source language - | - | type-directed partial evaluation - V - GFC assembly language - | - | Ljunglöf's translation - V - MCFG parser -</PRE> -<P> -The last two phases are nontrivial mathematica properties. -</P> -<P> -In most grammar formalisms, grammarians have to work on the GFC -(or MCFG) level. -</P> -<P> -Maybe they use macros - they are therefore like macro assemblers. But there -are no separately compiled library modules, no type checking, etc. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc7"></A> -<H2>The expressivity of GF</H2> -<P> -Parsing complexity is the same as MCFG: polynomial, with -unrestricted exponent depending on grammar. -This is between TAG and HPSG. -</P> -<P> -If semantic well-formedness (type theory) is taken into account, -then arbitrary logic can be expressed. The well-formedness of -abstract syntax is decidable, but the well-formedness of a -concrete-syntax string can require an arbitrary proof construction -and is therefore undecidable. -</P> -<P> -Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF -has the goal of assigning intended trees for strings. This is -generalized to shared trees for different languages. -</P> -<P> -The high-level language strives after the properties of -writability and readability (programming language notions). -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc8"></A> -<H2>Grammars and parsing</H2> -<P> -In many projects, a grammar is just seen as a <B>declarative parsing program</B>. -</P> -<P> -For GF, a grammar is primarily the <B>definition of a language</B>. -</P> -<P> -Detaching grammars from parsers is a good idea, giving -</P> -<UL> -<LI>more efficient and robust parsing (statistical etc) -<LI>cleaner grammars -</UL> - -<P> -Separating abstract from concrete syntax is a prerequisite for this: -we want parsers to return abstract syntax objects, and these must exist -independently of parse trees. -</P> -<P> -A possible radical approach to parsing: -use a grammar to generate a treebank and machine-learn -a statistical parser from this. -</P> -<P> -Comparison: Steedman in CCG has done something like this. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc9"></A> -<H2>Grammars as software libraries</H2> -<P> -Reuse for different purposes. -</P> -<P> -Grammar composition. -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc10"></A> -<H2>Multilinguality</H2> -<P> -In <B>application grammars</B>, the AS is a semantic -model, and a CS covers domain terminology and idioms. -</P> -<P> -This can give publication-quality translation on -limited domains (e.g. the WebALT project). -</P> -<P> -Resource grammars with grammar composition lead to -<B>compile-time transfer</B>. -</P> -<P> -When is <B>run-time transfer</B> necessary? -</P> -<P> -Cf. CLE (Core Language Engine). -</P> -<P> -<!-- NEW --> -</P> -<A NAME="toc11"></A> -<H2>Parametrized modules</H2> -<P> -This notion comes from the ML language in the 1980's. -</P> -<P> -It can be used for sharing even more code between languages -than their AS. -</P> -<P> -Especially, for related languages (Scandinavian, Romance). -</P> -<P> -Cf. grammar porting in CLE: what they do with untyped -macro packages GF does with typable interfaces. -</P> - -<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -thtml -\-toc gf-formalism.txt --> -</BODY></HTML> diff --git a/doc/gf-formalism.txt b/doc/gf-formalism.txt deleted file mode 100644 index 3b6963d11..000000000 --- a/doc/gf-formalism.txt +++ /dev/null @@ -1,279 +0,0 @@ -A Birds-Eye View of GF as a Grammar Formalism -Author: Aarne Ranta -Last update: %%date(%c) - -% NOTE: this is a txt2tags file. -% Create an html file from this file using: -% txt2tags -thtml --toc gf-formalism.txt - -%!target:html - -%!postproc(html): #NEW <!-- NEW --> - -[Logos/gf0.png] - -//Abstract. This document gives a general description of the// -//Grammatical Framework (GF), with comparisons to other grammar// -//formalisms such as CG, ACG, HPSG, and LFG.// - - -#NEW - -==Logical Frameworks and Grammar Formalisms== - -Logic - formalization of mathematics (mathematical language?) - -Linguistics - formalization of natural language - -Since math lang is a subset, we can expect similarities. - -But in natural language we have -- masses of empirical data -- no right of reform - - - -#NEW - -==High-level programming== - -We have to write a lot of program code when formalizing language. - -We need a language with proper abstractions. - -Cf. Paul Graham on Prolog: very high-level, but wrong abstractions. - -Typed functional languages work well in maths. - -We have developed one for linguistics -- some extra constructs, e.g. inflection tables -- constraint of reversibility (nontrivial math problem) - - -Writing a grammar of e.g. French clitics should not be a topic -on which one can write a paper - it should be easy to render in code -the known facts about languages! - - - -#NEW - -==GF in a few words== - -Grammatical Framework (GF) is a grammar formalism -based on **constructive type theory**. - -GF makes a distinction between **abstract syntax** and **concrete syntax**. - -The abstract syntax part of GF is a **logical framework**, with -dependent types and higher-order functions. - -The concrete syntax is a system of **records** containing strings and features. - -A GF grammar defines a **reversible homomorphism** from an abstract syntax to a -concrete syntax. - -A **multilingual GF grammar** is a set of concrete syntaxes associated with -one abstract syntax. - -GF grammars are written in a high-level **functional programming language**, -which is compiled into a **core language** (GFC). - -GF grammars can be used as **resources**, i.e. as libraries for writing -new grammars; these are compiled and optimized by the method of -**grammar composition**. - -GF has a **module system** that supports grammar engineering and separate -compilation. - - -#NEW - -==History of GF== - -1988. Intuitionistic Categorial Grammar; type theory as abstract syntax, -playing the role of Montague's analysis trees. Grammars implemented in Prolog. - -1994. Type-Theoretical Grammar. Abstract syntax organized as a system of -combinators. Grammars implemented in ALF. - -1996. Multilingual Type-Theoretical Grammar. Rules for generating six -languages from the same abstract syntax. Grammars implemented in ALF, ML, and -Haskell. - -1998. The first implementation of GF as a language of its own. - -2000. New version of GF: high-level functional source language, records used -for concrete syntax. - -2003. The module system. - -2004. Ljunglöf's thesis //Expressivity and Complexity of GF//. - - - -#NEW - -==Some key ingredients of GF in other grammar formalisms== - -- [GF ]: Grammatical Framework -- [CG ]: categorial grammar -- [ACG ]: abstract categorial grammar -- [HPSG ]: head-driven phrase structure grammar -- [LFG ]: lexical functional grammar - - -| / | GF | ACG | LFG | HPSG | CG | -| abstract vs concrete syntax | X | X | ? | - | - | -| type theory | X | X | - | - | X | -| records and features | X | - | X | X | - | - - -#NEW - -==Examples of descriptions in each formalism== - -To be written... - - -#NEW - -==Lambda terms and records== - -In CS, abstract syntax is trees and concrete syntax is strings. -This works more or less for programming languages. - -In CG, all syntax is lambda terms. - -In Montague grammar, abstract syntax is lambda terms and -concrete syntax is trees. Abstract syntax as lambda terms -can be considered well-established. - -In PATR and HPSG, concrete syntax it records. This can be considered -well-established for natural languages. - -In ACG, both are lambda terms. This is more general than GF, -but reversibility requires linearity restriction, which can be -unnatural for grammar writing. - -In GF, linearization from lambda terms to records is reversible, -and grammar writing is not restricted to linear terms. - -Grammar composition in ACG is just function composition. In GF, -it is more restricted... - - -#NEW - -==The structure of GF formalisms== - -The following diagram (to be drawn properly!) describes the -levels. -``` - | programming language design - V - GF source language - | - | type-directed partial evaluation - V - GFC assembly language - | - | Ljunglöf's translation - V - MCFG parser -``` -The last two phases are nontrivial mathematica properties. - -In most grammar formalisms, grammarians have to work on the GFC -(or MCFG) level. - -Maybe they use macros - they are therefore like macro assemblers. But there -are no separately compiled library modules, no type checking, etc. - - -#NEW - -==The expressivity of GF== - -Parsing complexity is the same as MCFG: polynomial, with -unrestricted exponent depending on grammar. -This is between TAG and HPSG. - -If semantic well-formedness (type theory) is taken into account, -then arbitrary logic can be expressed. The well-formedness of -abstract syntax is decidable, but the well-formedness of a -concrete-syntax string can require an arbitrary proof construction -and is therefore undecidable. - -Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF -has the goal of assigning intended trees for strings. This is -generalized to shared trees for different languages. - -The high-level language strives after the properties of -writability and readability (programming language notions). - - -#NEW - -==Grammars and parsing== - -In many projects, a grammar is just seen as a **declarative parsing program**. - -For GF, a grammar is primarily the **definition of a language**. - -Detaching grammars from parsers is a good idea, giving -- more efficient and robust parsing (statistical etc) -- cleaner grammars - - -Separating abstract from concrete syntax is a prerequisite for this: -we want parsers to return abstract syntax objects, and these must exist -independently of parse trees. - -A possible radical approach to parsing: -use a grammar to generate a treebank and machine-learn -a statistical parser from this. - -Comparison: Steedman in CCG has done something like this. - - -#NEW - -==Grammars as software libraries== - -Reuse for different purposes. - -Grammar composition. - - -#NEW - -==Multilinguality== - -In **application grammars**, the AS is a semantic -model, and a CS covers domain terminology and idioms. - -This can give publication-quality translation on -limited domains (e.g. the WebALT project). - -Resource grammars with grammar composition lead to -**compile-time transfer**. - -When is **run-time transfer** necessary? - -Cf. CLE (Core Language Engine). - - -#NEW - -==Parametrized modules== - -This notion comes from the ML language in the 1980's. - -It can be used for sharing even more code between languages -than their AS. - -Especially, for related languages (Scandinavian, Romance). - -Cf. grammar porting in CLE: what they do with untyped -macro packages GF does with typable interfaces. diff --git a/doc/gf-ideas.html b/doc/gf-ideas.html deleted file mode 100644 index 8119740fa..000000000 --- a/doc/gf-ideas.html +++ /dev/null @@ -1,311 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> -<TITLE>GF Project Ideas</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> - -<P> -<center> -<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT=""> -</center> -</P> - -<P ALIGN="center"><CENTER> -<H1>GF Project Ideas</H1> -<FONT SIZE="4"> -<I>Resource Grammars, Web Applications, etc</I><BR> -contact: Aarne Ranta (aarne at chalmers dot se) -</FONT></CENTER> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> - <UL> - <LI><A HREF="#toc1">Resource Grammar Implementations</A> - <UL> - <LI><A HREF="#toc2">Tasks</A> - <LI><A HREF="#toc3">Who is qualified</A> - <LI><A HREF="#toc4">The Summer School</A> - </UL> - <LI><A HREF="#toc5">Other project ideas</A> - <UL> - <LI><A HREF="#toc6">GF interpreter in Java</A> - <LI><A HREF="#toc7">GF interpreter in C#</A> - <LI><A HREF="#toc8">GF localization library</A> - <LI><A HREF="#toc9">Multilingual grammar applications for mobile phones</A> - <LI><A HREF="#toc10">Multilingual grammar applications for the web</A> - <LI><A HREF="#toc11">GMail gadget for GF</A> - </UL> - <LI><A HREF="#toc12">Dissemination and intellectual property</A> - </UL> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> -<A NAME="toc1"></A> -<H2>Resource Grammar Implementations</H2> -<P> -GF Resource Grammar Library is an open-source computational grammar resource -that currently covers 12 languages. -The Library is a collaborative effort to which programmers from many countries -have contributed. The next goal is to extend the library -to all of the 23 official EU languages. Also other languages -are welcome all the time. The following diagram show the current status of the -library. Each of the red and yellow ones are a potential project. -</P> -<P> -<center> -<IMG ALIGN="middle" SRC="school-langs.png" BORDER="0" ALT=""> -</center> -</P> -<P> -<I>red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu</I> -</P> -<P> -The linguistic coverage of the library includes the inflectional morphology -and basic syntax of each language. It can be used in GF applications -and also ported to other formats. It can also be used for building other -linguistic resources, such as morphological lexica and parsers. -The library is licensed under LGPL. -</P> -<A NAME="toc2"></A> -<H3>Tasks</H3> -<P> -Writing a grammar for a language is usually easier if other languages -from the same family already have grammars. The colours have the same -meaning as in the diagram above; in addition, we use boldface for the -red, still unimplemented languages and italics for the -orange languages in progress. Thus, in particular, each of the languages -coloured red below are possible programming projects. -</P> -<P> -Baltic: -</P> -<UL> -<LI><font color="red"><b> Latvian </b></font> -<LI><font color="red"><b> Lithuanian </b></font> -</UL> - -<P> -Celtic: -</P> -<UL> -<LI><font color="red"><b> Irish </b></font> -</UL> - -<P> -Fenno-Ugric: -</P> -<UL> -<LI><font color="red"><b> Estonian </b></font> -<LI><font color="green" size="-1"> Finnish </font> -<LI><font color="red"><b> Hungarian </b></font> -</UL> - -<P> -Germanic: -</P> -<UL> -<LI><font color="green" size="-1"> Danish </font> -<LI><font color="red"><b> Dutch </b></font> -<LI><font color="green" size="-1"> English </font> -<LI><font color="green" size="-1"> German </font> -<LI><font color="green" size="-1"> Norwegian </font> -<LI><font color="green" size="-1"> Swedish </font> -</UL> - -<P> -Hellenic: -</P> -<UL> -<LI><font color="red"><b> Greek </b></font> -</UL> - -<P> -Indo-Iranian: -</P> -<UL> -<LI><font color="orange"><i> Hindi </i></font> -<LI><font color="orange"><i> Urdu </i></font> -</UL> - -<P> -Romance: -</P> -<UL> -<LI><font color="green" size="-1"> Catalan </font> -<LI><font color="green" size="-1"> French </font> -<LI><font color="green" size="-1"> Italian </font> -<LI><font color="red"><b> Portuguese </b></font> -<LI><font color="orange"><i> Romanian </i></font> -<LI><font color="green" size="-1"> Spanish </font> -</UL> - -<P> -Semitic: -</P> -<UL> -<LI><font color="orange"><i> Arabic </i></font> -<LI><font color="red"><b> Maltese </b></font> -</UL> - -<P> -Slavonic: -</P> -<UL> -<LI><font color="green" size="-1"> Bulgarian </font> -<LI><font color="red"><b> Czech </b></font> -<LI><font color="orange"><i> Polish </i></font> -<LI><font color="green" size="-1"> Russian </font> -<LI><font color="red"><b> Slovak </b></font> -<LI><font color="red"><b> Slovenian </b></font> -</UL> - -<P> -Tai: -</P> -<UL> -<LI><font color="orange"><i> Thai </i></font> -</UL> - -<P> -Turkic: -</P> -<UL> -<LI><font color="orange"><i> Turkish </i></font> -</UL> - -<A NAME="toc3"></A> -<H3>Who is qualified</H3> -<P> -Writing a resource grammar implementation requires good general programming -skills, and a good explicit knowledge of the grammar of the target language. -A typical participant could be -</P> -<UL> -<LI>native or fluent speaker of the target language -<LI>interested in languages on the theoretical level, and preferably familiar - with many languages (to be able to think about them on an abstract level) -<LI>familiar with functional programming languages such as ML or Haskell - (GF itself is a language similar to these) -<LI>on Master's or PhD level in linguistics, computer science, or mathematics -</UL> - -<P> -But it is the quality of the assignment that is assessed, not any formal -requirements. The "typical participant" was described to give an idea of -who is likely to succeed in this. -</P> -<A NAME="toc4"></A> -<H3>The Summer School</H3> -<P> -A Summer School on resource grammars and applications will -be organized at the campus of Chalmers University of Technology in Gothenburg, -Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in -a resource grammar project; the participants are assumed to learn GF before -the Summer School, but how far they have come in their projects may vary. -</P> -<P> -More information on the Summer School web page: -</P> -<P> -<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html"><CODE>http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html</CODE></A> -</P> -<A NAME="toc5"></A> -<H2>Other project ideas</H2> -<A NAME="toc6"></A> -<H3>GF interpreter in Java</H3> -<P> -The idea is to write a run-time system for GF grammars in Java. This enables -the use of <B>embedded grammars</B> in Java applications. This project is -a fresh-up of <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>, -now using the new run-time format PGF and addressing a new parsing algorithm. -</P> -<P> -Requirements: Java, Haskell, basics of compilers and parsing algorithms. -</P> -<A NAME="toc7"></A> -<H3>GF interpreter in C#</H3> -<P> -The idea is to write a run-time system for GF grammars in C#. This enables -the use of <B>embedded grammars</B> in C# applications. This project is -similar to <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A> -on Java, now addressing C# and using the new run-time format PGF. -</P> -<P> -Requirements: C#, Haskell, basics of compilers and parsing algorithms. -</P> -<A NAME="toc8"></A> -<H3>GF localization library</H3> -<P> -This is an idea for a software localization library using GF grammars. -The library should replace strings by grammar rules, which can be conceived -as very smart templates always guaranteeing grammatically correct output. -The library should be based on the -<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html">GF Resource Grammar Library</A>, providing infrastructure -currently for 12 languages. -</P> -<P> -Requirements: GF, some natural languages, some localization platform -</P> -<A NAME="toc9"></A> -<H3>Multilingual grammar applications for mobile phones</H3> -<P> -GF grammars can be compiled into programs that can be run on different -platforms, such as web browsers and mobile phones. An example is a -<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html">numeral translator</A> running on both these platforms. -</P> -<P> -The proposed project is rather open: find some cool applications of -the technology that are useful or entertaining for mobile phone users. A -part of the project is to investigate implementation issues such as making -the best use of the phone's resources. Possible applications have -something to do with translation; one suggestion is an sms editor/translator. -</P> -<P> -Requirements: GF, JavaScript, some phone application development tools -</P> -<A NAME="toc10"></A> -<H3>Multilingual grammar applications for the web</H3> -<P> -This project is rather open: find some cool applications of -the technology that are useful or entertaining on the web. Examples include -</P> -<UL> -<LI>translators: see <A HREF="http://tournesol.cs.chalmers.se:41296/translate">demo</A> -<LI>multilingual wikis: see <A HREF="http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi">demo</A> -<LI>fridge magnets: see <A HREF="http://tournesol.cs.chalmers.se:41296/fridge">demo</A> -</UL> - -<P> -Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI -</P> -<A NAME="toc11"></A> -<H3>GMail gadget for GF</H3> -<P> -It is possible to add custom gadgets to GMail. If you are going to write -e-mail in a foreign language then you probably will need help from -dictonary or you may want to check something in the grammar. GF provides -all resources that you may need but you have to think about how to -design gadget that fits well in the GMail environment and what -functionality from GF you want to expose. -</P> -<P> -Requirements: GF, Google Web Toolkit -</P> -<A NAME="toc12"></A> -<H2>Dissemination and intellectual property</H2> -<P> -All code suggested here will be released under the LGPL just like -the current resource grammars and run-time GF libraries, -with the copyright held by respective authors. -</P> -<P> -As a rule, the code will be distributed via the GF web site. -</P> - -<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -\-toc gf-ideas.txt --> -</BODY></HTML> diff --git a/doc/gf-ideas.txt b/doc/gf-ideas.txt deleted file mode 100644 index 3f62196b9..000000000 --- a/doc/gf-ideas.txt +++ /dev/null @@ -1,231 +0,0 @@ -GF Project Ideas -Resource Grammars, Web Applications, etc -contact: Aarne Ranta (aarne at chalmers dot se) - -%!Encoding : iso-8859-1 - -%!target:html -%!postproc(html): #BECE <center> -%!postproc(html): #ENCE </center> -%!postproc(html): #GRAY <font color="green" size="-1"> -%!postproc(html): #EGRAY </font> -%!postproc(html): #RED <font color="red"><b> -%!postproc(html): #YELLOW <font color="orange"><i> -%!postproc(html): #ERED </b></font> -%!postproc(html): #EYELLOW </i></font> - -#BECE -[Logos/gf0.png] -#ENCE - - -==Resource Grammar Implementations== - -GF Resource Grammar Library is an open-source computational grammar resource -that currently covers 12 languages. -The Library is a collaborative effort to which programmers from many countries -have contributed. The next goal is to extend the library -to all of the 23 official EU languages. Also other languages -are welcome all the time. The following diagram show the current status of the -library. Each of the red and yellow ones are a potential project. - -#BECE -[school-langs.png] -#ENCE - - -//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu// - -The linguistic coverage of the library includes the inflectional morphology -and basic syntax of each language. It can be used in GF applications -and also ported to other formats. It can also be used for building other -linguistic resources, such as morphological lexica and parsers. -The library is licensed under LGPL. - - -===Tasks=== - -Writing a grammar for a language is usually easier if other languages -from the same family already have grammars. The colours have the same -meaning as in the diagram above; in addition, we use boldface for the -red, still unimplemented languages and italics for the -orange languages in progress. Thus, in particular, each of the languages -coloured red below are possible programming projects. - -Baltic: -- #RED Latvian #ERED -- #RED Lithuanian #ERED - - -Celtic: -- #RED Irish #ERED - - -Fenno-Ugric: -- #RED Estonian #ERED -- #GRAY Finnish #EGRAY -- #RED Hungarian #ERED - - -Germanic: -- #GRAY Danish #EGRAY -- #RED Dutch #ERED -- #GRAY English #EGRAY -- #GRAY German #EGRAY -- #GRAY Norwegian #EGRAY -- #GRAY Swedish #EGRAY - - -Hellenic: -- #RED Greek #ERED - - -Indo-Iranian: -- #YELLOW Hindi #EYELLOW -- #YELLOW Urdu #EYELLOW - - -Romance: -- #GRAY Catalan #EGRAY -- #GRAY French #EGRAY -- #GRAY Italian #EGRAY -- #RED Portuguese #ERED -- #YELLOW Romanian #EYELLOW -- #GRAY Spanish #EGRAY - - -Semitic: -- #YELLOW Arabic #EYELLOW -- #RED Maltese #ERED - - -Slavonic: -- #GRAY Bulgarian #EGRAY -- #RED Czech #ERED -- #YELLOW Polish #EYELLOW -- #GRAY Russian #EGRAY -- #RED Slovak #ERED -- #RED Slovenian #ERED - - -Tai: -- #YELLOW Thai #EYELLOW - - -Turkic: -- #YELLOW Turkish #EYELLOW - - -===Who is qualified=== - -Writing a resource grammar implementation requires good general programming -skills, and a good explicit knowledge of the grammar of the target language. -A typical participant could be -- native or fluent speaker of the target language -- interested in languages on the theoretical level, and preferably familiar - with many languages (to be able to think about them on an abstract level) -- familiar with functional programming languages such as ML or Haskell - (GF itself is a language similar to these) -- on Master's or PhD level in linguistics, computer science, or mathematics - - -But it is the quality of the assignment that is assessed, not any formal -requirements. The "typical participant" was described to give an idea of -who is likely to succeed in this. - - -===The Summer School=== - -A Summer School on resource grammars and applications will -be organized at the campus of Chalmers University of Technology in Gothenburg, -Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in -a resource grammar project; the participants are assumed to learn GF before -the Summer School, but how far they have come in their projects may vary. - -More information on the Summer School web page: - -[``http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html`` http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html] - - -==Other project ideas== - -===GF interpreter in Java=== - -The idea is to write a run-time system for GF grammars in Java. This enables -the use of **embedded grammars** in Java applications. This project is -a fresh-up of [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html], -now using the new run-time format PGF and addressing a new parsing algorithm. - -Requirements: Java, Haskell, basics of compilers and parsing algorithms. - - -===GF interpreter in C#=== - -The idea is to write a run-time system for GF grammars in C#. This enables -the use of **embedded grammars** in C# applications. This project is -similar to [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html] -on Java, now addressing C# and using the new run-time format PGF. - -Requirements: C#, Haskell, basics of compilers and parsing algorithms. - - -===GF localization library=== - -This is an idea for a software localization library using GF grammars. -The library should replace strings by grammar rules, which can be conceived -as very smart templates always guaranteeing grammatically correct output. -The library should be based on the -[GF Resource Grammar Library http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html], providing infrastructure -currently for 12 languages. - -Requirements: GF, some natural languages, some localization platform - - -===Multilingual grammar applications for mobile phones=== - -GF grammars can be compiled into programs that can be run on different -platforms, such as web browsers and mobile phones. An example is a -[numeral translator http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html] running on both these platforms. - -The proposed project is rather open: find some cool applications of -the technology that are useful or entertaining for mobile phone users. A -part of the project is to investigate implementation issues such as making -the best use of the phone's resources. Possible applications have -something to do with translation; one suggestion is an sms editor/translator. - -Requirements: GF, JavaScript, some phone application development tools - - -===Multilingual grammar applications for the web=== - -This project is rather open: find some cool applications of -the technology that are useful or entertaining on the web. Examples include -- translators: see [demo http://129.16.250.57:41296/translate] -- multilingual wikis: see [demo http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi] -- fridge magnets: see [demo http://129.16.250.57:41296/fridge] - - -Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI - - -===GMail gadget for GF=== - -It is possible to add custom gadgets to GMail. If you are going to write -e-mail in a foreign language then you probably will need help from -dictonary or you may want to check something in the grammar. GF provides -all resources that you may need but you have to think about how to -design gadget that fits well in the GMail environment and what -functionality from GF you want to expose. - -Requirements: GF, Google Web Toolkit - - - -==Dissemination and intellectual property== - -All code suggested here will be released under the LGPL just like -the current resource grammars and run-time GF libraries, -with the copyright held by respective authors. - -As a rule, the code will be distributed via the GF web site. - diff --git a/doc/gf-people.html b/doc/gf-people.html index 690084d3c..bc09412d0 100644 --- a/doc/gf-people.html +++ b/doc/gf-people.html @@ -13,12 +13,13 @@ </center> -Most of the code is by -<a "http://www.chalmers.se/cse/EN/organization/divisions/computing-science/people/angelov-krasimir">Krasimir Angelov</a>, -<a href="http://www.cs.chalmers.se/~bringert">Bjrn Bringert</a>, +The current developers and maintainers are +<a href="http://www.chalmers.se/cse/EN/organization/divisions/computing-science/people/angelov-krasimir">Krasimir Angelov</a>, +<a href="http://www.cs.chalmers.se/~hallgren">Thomas Hallgren</a>, and -<a href="http://www.cs.chalmers.se/~aarne">Aarne Ranta</a>. Bug reports should be -posted via the <a href="http://trac.haskell.org/gf/">GF bug tracker</a>. +<a href="http://www.cse.chalmers.se/~aarne">Aarne Ranta</a>. Bug reports should be +posted via the +<a href="http://code.google.com/p/grammatical-framework/issues/list">GF bug tracker</a>. <p> @@ -27,19 +28,23 @@ Also the following people have contributed code to some of the versions: <p> -Hkan Burden (Chalmers) +Grgoire Dtrez (University of Gothenburg) +<br> +Ramona Enache (University of Gothenburg) +<br> +<a href="http://www.cse.chalmers.se/alumni/bringert">Bjrn Bringert</a> (University of Gothenburg) +<br> +Hkan Burden (University of Gothenburg) <br> Hans-Joachim Daniels (Karlsruhe) <br> <a href="http://www.cs.chalmers.se/~markus">Markus Forsberg</a> (Chalmers) <br> -<a href="http://www.cs.chalmers.se/~hallgren">Thomas Hallgren</a> (Chalmers) -<br> -<a href="http://www.cs.chalmers.se/~krijo">Kristofer Johannisson</a> (Chalmers) +<a href="http://www.cs.chalmers.se/~krijo">Kristofer Johannisson</a> (University of Gothenburg) <br> -<a href="http://www.cs.chalmers.se/~janna">Janna Khegai</a> (Chalmers) +<a href="http://www.cs.chalmers.se/~janna">Janna Khegai</a> (Chalmers) <br> -<a href="http://www.cs.chalmers.se/~peb">Peter Ljunglf</a> (Chalmers) +<a href="http://www.cs.chalmers.se/~peb">Peter Ljunglf</a> (University of Gothenburg) <br> Petri Menp (Nokia) diff --git a/doc/gf-quickstart.html b/doc/gf-quickstart.html index 7a6971953..cd508d474 100644 --- a/doc/gf-quickstart.html +++ b/doc/gf-quickstart.html @@ -9,7 +9,7 @@ <p> Aarne Ranta <p> -3 September, 2007 +22 December 2010 (3 September, 2007) <p> @@ -20,7 +20,7 @@ Aarne Ranta This Quick Start shows two examples of how GF can be used. We assume that you have downloaded and installed GF, so that the command <tt>gf</tt> works for you. See download and install -instructions <a href="http://digitalgrammars.com/gf/download/">here</a>. +instructions <a href="../download/index.html">here</a>. @@ -61,39 +61,11 @@ and start GF again with the same command. Now you can even translate <i>this bread is very Italian</i>. </ol> To lear more on GF commands and -grammar development, go to the -<a href="tutorial/gf-tutorial2.html">New Grammarian's Tutorial</a>. +grammar development, go to the one of the tutorials: +<ul> +<li> <a href="tutorial/gf-tutorial.html">GF Tutorial</a>: older, more programmer-oriented +<li> <a href="gf-lrec-2010.pdf">GF Resource Tutorial</a>: newer, more linguist-oriented +</ul> - -<h2>Multilingual authoring</h2> - -This demo also requires the GUI package, which makes the command -<tt>jgf</tt> work for you. -<ol> -<li> Download the file <a href="../examples/letter/Letter.gfcm"><tt>Letter.gfcm</tt></a>. -<li> Start the GF editor by the command -<pre> - gfeditor Letter.gfcm -</pre> -<li> When the editor window is open, select "Letter" from the "New" menu. -<li> Push the button "Random" in the lower end of the window. -<li> Move the pointer to some place in the text, e.g. to the first word (in any - of the languages), and click. The first word should now be highlighted and - a number of alternatives appear in the lower window part (a similar situation - is shown in the picture below). -<li> Double-click at some of the alternatives marked "ch ..." and observe how - the text changes in each of the languages. -</ol> -See the <a href="http://www.cs.chalmers.se/~aarne/GF2.0/doc/javaGUImanual/javaGUImanual.htm">Editor User Manual</a> -for more information on how to use the -editor. To change the grammars, you should not edit <tt>Letter.gfcm</tt>, -which is low-level code generated by the GF grammar compiler. Instead, you -can edit the files in <tt>examples/letter</tt> in the GF grammar package, -and compile by using the script <tt>mkLetter.gfs</tt> in the same package. - -<p> - -<img src="quick-editor.gif"> - </body></html> diff --git a/doc/gf-refman.html b/doc/gf-refman.html index 104f644c7..188a063a8 100644 --- a/doc/gf-refman.html +++ b/doc/gf-refman.html @@ -106,7 +106,7 @@ This document is not an introduction to GF; such introduction can be found in the GF tutorial available on line on the GF web page, </P> <P> -<A HREF="http://digitalgrammars.com/gf"><CODE>digitalgrammars.com/gf</CODE></A> +<A HREF="http://grammaticalframework.org"><CODE>grammaticalframework.org</CODE></A> </P> <P> This manual covers only the language, not the GF compiler or diff --git a/doc/gf-statistics.txt b/doc/gf-statistics.txt deleted file mode 100644 index 499ad7d09..000000000 --- a/doc/gf-statistics.txt +++ /dev/null @@ -1,289 +0,0 @@ -(Adapted from KeY statistics by Vladimir Klebanov) - -This is GF right now: - -Total Physical Source Lines of Code (SLOC) = 42,467 - -Development Effort Estimate, Person-Years (Person-Months) = 10.24 (122.932) - (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05)) - -Schedule Estimate, Years (Months) = 1.30 (15.56) - (Basic COCOMO model, Months = 2.5 * (person-months**0.38)) - -Estimated Average Number of Developers (Effort/Schedule) = 7.90 - -Total Estimated Cost to Develop = $ 1,383,870 - (average salary = $56,286/year, overhead = 2.40). - -SLOCCount, Copyright (C) 2001-2004 David A. Wheeler - - - ------------ basis of counting: Haskell code + BNFC code - generated Happy parsers - --- GF/src% wc -l *.hs GF/*.hs GF/*/*.hs GF/*/*/*.hs GF/*/*.cf JavaGUI/*.java --- date Fri Jun 3 10:00:31 CEST 2005 - - 104 GF.hs - 402 GF/API.hs - 98 GF/GFModes.hs - 379 GF/Shell.hs - 4 GF/Today.hs - 43 GF/API/BatchTranslate.hs - 145 GF/API/GrammarToHaskell.hs - 77 GF/API/IOGrammar.hs - 25 GF/API/MyParser.hs - 177 GF/Canon/AbsGFC.hs - 37 GF/Canon/ByLine.hs - 192 GF/Canon/CanonToGrammar.hs - 293 GF/Canon/CMacros.hs - 79 GF/Canon/GetGFC.hs - 86 GF/Canon/GFC.hs - 291 GF/Canon/LexGFC.hs - 201 GF/Canon/Look.hs - 235 GF/Canon/MkGFC.hs - 46 GF/Canon/PrExp.hs - 352 GF/Canon/PrintGFC.hs - 147 GF/Canon/Share.hs - 207 GF/Canon/SkelGFC.hs - 46 GF/Canon/TestGFC.hs - 49 GF/Canon/Unlex.hs - 202 GF/CF/CanonToCF.hs - 213 GF/CF/CF.hs - 217 GF/CF/CFIdent.hs - 62 GF/CF/CFtoGrammar.hs - 47 GF/CF/CFtoSRG.hs - 206 GF/CF/ChartParser.hs - 191 GF/CF/EBNF.hs - 45 GF/CFGM/AbsCFG.hs - 312 GF/CFGM/LexCFG.hs - 157 GF/CFGM/PrintCFG.hs - 109 GF/CFGM/PrintCFGrammar.hs - 85 GF/CF/PPrCF.hs - 150 GF/CF/PrLBNF.hs - 106 GF/CF/Profile.hs - 141 GF/Compile/BackOpt.hs - 763 GF/Compile/CheckGrammar.hs - 337 GF/Compile/Compile.hs - 136 GF/Compile/Extend.hs - 124 GF/Compile/GetGrammar.hs - 282 GF/Compile/GrammarToCanon.hs - 93 GF/Compile/MkConcrete.hs - 128 GF/Compile/MkResource.hs - 83 GF/Compile/MkUnion.hs - 146 GF/Compile/ModDeps.hs - 294 GF/Compile/NewRename.hs - 227 GF/Compile/Optimize.hs - 76 GF/Compile/PGrammar.hs - 84 GF/Compile/PrOld.hs - 119 GF/Compile/Rebuild.hs - 63 GF/Compile/RemoveLiT.hs - 274 GF/Compile/Rename.hs - 535 GF/Compile/ShellState.hs - 135 GF/Compile/Update.hs - 129 GF/Conversion/GFC.hs - 149 GF/Conversion/GFCtoSimple.hs - 53 GF/Conversion/MCFGtoCFG.hs - 46 GF/Conversion/RemoveEpsilon.hs - 102 GF/Conversion/RemoveErasing.hs - 82 GF/Conversion/RemoveSingletons.hs - 137 GF/Conversion/SimpleToFinite.hs - 26 GF/Conversion/SimpleToMCFG.hs - 230 GF/Conversion/Types.hs - 143 GF/Data/Assoc.hs - 118 GF/Data/BacktrackM.hs - 20 GF/Data/ErrM.hs - 119 GF/Data/GeneralDeduction.hs - 30 GF/Data/Glue.hs - 67 GF/Data/IncrementalDeduction.hs - 61 GF/Data/Map.hs - 662 GF/Data/Operations.hs - 127 GF/Data/OrdMap2.hs - 120 GF/Data/OrdSet.hs - 193 GF/Data/Parsers.hs - 64 GF/Data/RedBlack.hs - 150 GF/Data/RedBlackSet.hs - 19 GF/Data/SharedString.hs - 127 GF/Data/SortedList.hs - 134 GF/Data/Str.hs - 120 GF/Data/Trie2.hs - 129 GF/Data/Trie.hs - 71 GF/Data/Utilities.hs - 243 GF/Data/Zipper.hs - 78 GF/Embed/EmbedAPI.hs - 113 GF/Embed/EmbedCustom.hs - 137 GF/Embed/EmbedParsing.hs - 50 GF/Formalism/CFG.hs - 51 GF/Formalism/GCFG.hs - 58 GF/Formalism/MCFG.hs - 246 GF/Formalism/SimpleGFC.hs - 349 GF/Formalism/Utilities.hs - 30 GF/Fudgets/ArchEdit.hs - 134 GF/Fudgets/CommandF.hs - 51 GF/Fudgets/EventF.hs - 59 GF/Fudgets/FudgetOps.hs - 37 GF/Fudgets/UnicodeF.hs - 86 GF/Grammar/AbsCompute.hs - 38 GF/Grammar/Abstract.hs - 149 GF/Grammar/AppPredefined.hs - 312 GF/Grammar/Compute.hs - 215 GF/Grammar/Grammar.hs - 46 GF/Grammar/Lockfield.hs - 189 GF/Grammar/LookAbs.hs - 182 GF/Grammar/Lookup.hs - 745 GF/Grammar/Macros.hs - 340 GF/Grammar/MMacros.hs - 115 GF/Grammar/PatternMatch.hs - 279 GF/Grammar/PrGrammar.hs - 121 GF/Grammar/Refresh.hs - 44 GF/Grammar/ReservedWords.hs - 251 GF/Grammar/TC.hs - 301 GF/Grammar/TypeCheck.hs - 96 GF/Grammar/Unify.hs - 101 GF/Grammar/Values.hs - 89 GF/Infra/CheckM.hs - 43 GF/Infra/Comments.hs - 152 GF/Infra/Ident.hs - 390 GF/Infra/Modules.hs - 358 GF/Infra/Option.hs - 179 GF/Infra/Print.hs - 331 GF/Infra/ReadFiles.hs - 337 GF/Infra/UseIO.hs - 153 GF/OldParsing/CFGrammar.hs - 283 GF/OldParsing/ConvertFiniteGFC.hs - 121 GF/OldParsing/ConvertFiniteSimple.hs - 34 GF/OldParsing/ConvertGFCtoMCFG.hs - 122 GF/OldParsing/ConvertGFCtoSimple.hs - 44 GF/OldParsing/ConvertGrammar.hs - 52 GF/OldParsing/ConvertMCFGtoCFG.hs - 30 GF/OldParsing/ConvertSimpleToMCFG.hs - 43 GF/OldParsing/GCFG.hs - 86 GF/OldParsing/GeneralChart.hs - 148 GF/OldParsing/GrammarTypes.hs - 50 GF/OldParsing/IncrementalChart.hs - 206 GF/OldParsing/MCFGrammar.hs - 43 GF/OldParsing/ParseCFG.hs - 82 GF/OldParsing/ParseCF.hs - 177 GF/OldParsing/ParseGFC.hs - 37 GF/OldParsing/ParseMCFG.hs - 161 GF/OldParsing/SimpleGFC.hs - 188 GF/OldParsing/Utilities.hs - 51 GF/Parsing/CFG.hs - 66 GF/Parsing/CF.hs - 151 GF/Parsing/GFC.hs - 64 GF/Parsing/MCFG.hs - 83 GF/Printing/PrintParser.hs - 127 GF/Printing/PrintSimplifiedTerm.hs - 190 GF/Shell/CommandL.hs - 556 GF/Shell/Commands.hs - 524 GF/Shell/HelpFile.hs - 79 GF/Shell/JGF.hs - 171 GF/Shell/PShell.hs - 221 GF/Shell/ShellCommands.hs - 66 GF/Shell/SubShell.hs - 87 GF/Shell/TeachYourself.hs - 296 GF/Source/AbsGF.hs - 229 GF/Source/GrammarToSource.hs - 312 GF/Source/LexGF.hs - 528 GF/Source/PrintGF.hs - 353 GF/Source/SkelGF.hs - 657 GF/Source/SourceToGrammar.hs - 58 GF/Source/TestGF.hs - 72 GF/Speech/PrGSL.hs - 65 GF/Speech/PrJSGF.hs - 128 GF/Speech/SRG.hs - 103 GF/Speech/TransformCFG.hs - 30 GF/System/ArchEdit.hs - 90 GF/System/Arch.hs - 27 GF/System/NoReadline.hs - 27 GF/System/Readline.hs - 73 GF/System/Tracing.hs - 25 GF/System/UseReadline.hs - 63 GF/Text/Arabic.hs - 97 GF/Text/Devanagari.hs - 72 GF/Text/Ethiopic.hs - 99 GF/Text/ExtendedArabic.hs - 37 GF/Text/ExtraDiacritics.hs - 172 GF/Text/Greek.hs - 53 GF/Text/Hebrew.hs - 95 GF/Text/Hiragana.hs - 69 GF/Text/LatinASupplement.hs - 47 GF/Text/OCSCyrillic.hs - 45 GF/Text/Russian.hs - 77 GF/Text/Tamil.hs - 125 GF/Text/Text.hs - 69 GF/Text/Unicode.hs - 47 GF/Text/UTF8.hs - 56 GF/Translate/GFT.hs - 427 GF/UseGrammar/Custom.hs - 435 GF/UseGrammar/Editing.hs - 180 GF/UseGrammar/Generate.hs - 71 GF/UseGrammar/GetTree.hs - 143 GF/UseGrammar/Information.hs - 228 GF/UseGrammar/Linear.hs - 130 GF/UseGrammar/Morphology.hs - 70 GF/UseGrammar/Paraphrases.hs - 157 GF/UseGrammar/Parsing.hs - 66 GF/UseGrammar/Randomized.hs - 170 GF/UseGrammar/Session.hs - 186 GF/UseGrammar/Tokenize.hs - 43 GF/UseGrammar/Transfer.hs - 122 GF/Visualization/NewVisualizationGrammar.hs - 123 GF/Visualization/VisualizeGrammar.hs - 63 GF/Conversion/SimpleToMCFG/Coercions.hs - 256 GF/Conversion/SimpleToMCFG/Nondet.hs - 129 GF/Conversion/SimpleToMCFG/Strict.hs - 71 GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs - 281 GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs - 277 GF/OldParsing/ConvertGFCtoMCFG/Old.hs - 189 GF/OldParsing/ConvertGFCtoMCFG/Strict.hs - 70 GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs - 245 GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs - 277 GF/OldParsing/ConvertSimpleToMCFG/Old.hs - 139 GF/OldParsing/ConvertSimpleToMCFG/Strict.hs - 83 GF/OldParsing/ParseCFG/General.hs - 142 GF/OldParsing/ParseCFG/Incremental.hs - 156 GF/OldParsing/ParseMCFG/Basic.hs - 103 GF/Parsing/CFG/General.hs - 150 GF/Parsing/CFG/Incremental.hs - 98 GF/Parsing/CFG/PInfo.hs - 226 GF/Parsing/MCFG/Active2.hs - 304 GF/Parsing/MCFG/Active.hs - 144 GF/Parsing/MCFG/Incremental2.hs - 163 GF/Parsing/MCFG/Incremental.hs - 128 GF/Parsing/MCFG/Naive.hs - 163 GF/Parsing/MCFG/PInfo.hs - 194 GF/Parsing/MCFG/Range.hs - 183 GF/Parsing/MCFG/ViaCFG.hs - 167 GF/Canon/GFC.cf - 36 GF/CFGM/CFG.cf - 321 GF/Source/GF.cf - 272 JavaGUI/DynamicTree2.java - 272 JavaGUI/DynamicTree.java - 2357 JavaGUI/GFEditor2.java - 1420 JavaGUI/GFEditor.java - 30 JavaGUI/GrammarFilter.java - 13 JavaGUI/LinPosition.java - 18 JavaGUI/MarkedArea.java - 1552 JavaGUI/Numerals.java - 22 JavaGUI/Utils.java - 5956 total - 48713 total - -- 2131 GF/Canon/ParGFC.hs - 3336 GF/Source/ParGF.hs - 779 GF/CFGM/ParCFG.hs - - 42467 total - --------- - -sloccount sloc = - let - ksloc = sloc / 1000 - effort = 2.4 * (ksloc ** 1.05) - schedule = 2.5 * (effort ** 0.38) - develops = effort / schedule - cost = 56286 * (effort/12) * 2.4 - in - [sloc,ksloc,effort,effort/12,schedule,schedule/12,develops,cost] diff --git a/doc/gf-summerschool.txt b/doc/gf-summerschool.txt deleted file mode 100644 index 0acf9177d..000000000 --- a/doc/gf-summerschool.txt +++ /dev/null @@ -1,533 +0,0 @@ -GF Resource Grammar Summer School -Gothenburg, 17-28 August 2009 -Aarne Ranta (aarne at chalmers.se) - -%!Encoding : iso-8859-1 - -%!target:html -%!postproc(html): #BECE <center> -%!postproc(html): #ENCE </center> -%!postproc(html): #GRAY <font color="green" size="-1"> -%!postproc(html): #EGRAY </font> -%!postproc(html): #RED <font color="red"> -%!postproc(html): #YELLOW <font color="orange"> -%!postproc(html): #ERED </font> - -#BECE -[school-langs.png] -#ENCE - - -//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu// - - -==News== - -An on-line course //GF for Resource Grammar Writers// will start on -Monday 20 April at 15.30 CEST. The slides and recordings of the five -45-minute lectures will be made available via this web page. If requested, -the course may be repeated in the beginning of the summer school. - - -==Executive summary== - -GF Resource Grammar Library is an open-source computational grammar resource -that currently covers 12 languages. -The Summer School is a part of a collaborative effort to extend the library -to all of the 23 official EU languages. Also other languages -chosen by the participants are welcome. - -The missing EU languages are: -Czech, Dutch, Estonian, Greek, Hungarian, Irish, Latvian, Lithuanian, -Maltese, Portuguese, Slovak, and Slovenian. There is also more work to -be done on Polish and Romanian. - -The linguistic coverage of the library includes the inflectional morphology -and basic syntax of each language. It can be used in GF applications -and also ported to other formats. It can also be used for building other -linguistic resources, such as morphological lexica and parsers. -The library is licensed under LGPL. - -In the summer school, each language will be implemented by one or two students -working together. A morphology implementation will be credited -as a Chalmers course worth 7.5 ETCS points; adding a syntax implementation -will be worth more. The estimated total work load is 1-2 months for the -morphology, and 3-6 months for the whole grammar. - -Participation in the course is free. Registration is done via the courses's -Google group, [``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]. The registration deadline is 15 June 2009. - -Some travel grants will be available. They are distributed on the basis of a -GF programming contest in April and May. - -The summer school will be held on 17-28 August 2009, at the campus of -Chalmers University of Technology in Gothenburg, Sweden. - - -[align6.png] - -//Word alignment produced by GF from the resource grammar in Bulgarian, English, Italian, German, Finnish, French, and Swedish.// - -==Introduction== - -Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this -document. There is a growing need of linguistic resources for these -languages, to help in tasks such as translation and information retrieval. -These resources should be **portable** and **freely accessible**. -Languages marked in red in the diagram are of particular interest for -the summer school, since they are those on which the effort will be concentrated. - -GF (Grammatical Framework, -[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf]) -is a **functional programming language** designed for writing natural -language grammars. It provides an efficient platform for this task, due to -its modern characteristics: -- It is a functional programming language, similar to Haskell and ML. -- It has a static type system and type checker. -- It has a powerful module system supporting separate compilation - and data abstraction. -- It has an optimizing compiler to **Portable Grammar Format** (PGF). -- PGF can be further compiled to other formats, such as JavaScript and - speech recognition language models. -- GF has a **resource grammar library** giving access to the morphology and - basic syntax of 12 languages. - - -In addition to "ordinary" grammars for single languages, GF -supports **multilingual grammars**. A multilingual GF grammar consists of an -**abstract syntax** and a set of **concrete syntaxes**. -An abstract syntax is system of **trees**, serving as a semantic -model or an ontology. A concrete syntax is a mapping from abstract syntax -trees to strings of a particular language. - -These mappings defined in concrete syntax are **reversible**: they -can be used both for **generating** strings from trees, and for -**parsing** strings into trees. Combinations of generation and -parsing can be used for **translation**, where the abstract -syntax works as an **interlingua**. Thus GF has been used as a -framework for building translation systems in several areas -of application and large sets of languages. - - - -==The GF resource grammar library== - -The GF resource grammar library is a set of grammars usable as libraries when -building translation systems and other applications. -The library currently covers -the 9 languages coloured in green in the diagram above; in addition, -Catalan, Norwegian, and Russian are covered, and there is ongoing work on -Arabic, Hindi/Urdu, Polish, Romanian, and Thai. - -The purpose of the resource grammar library is to define the "low-level" structure -of a language: inflection, word order, agreement. This structure belongs to what -linguists call morphology and syntax. It can be very complex and requires -a lot of knowledge. Yet, when translating from one language to -another, knowing morphology and syntax is but a part of what is needed. -The translator (whether human -or machine) must understand the meaning of what is translated, and must also know -the idiomatic way to express the meaning in the target language. This knowledge -can be very domain-dependent and requires in general an expert in the field to -reach high quality: a mathematician in the field of mathematics, a meteorologist -in the field of weather reports, etc. - -The problem is to find a person who is an expert in both the domain of translation -and in the low-level linguistic details. It is the rareness of this combination -that has made it difficult to build interlingua-based translation systems. -The GF resource grammar library has the mission of helping in this task. -It encapsulates the low-level linguistics in program modules -accessed through easy-to-use interfaces. -Experts on different domains can build translation systems by using the library, -without knowing low-level linguistics. The idea is much the same as when a -programmer builds a graphical user interface (GUI) from high-level elements such as -buttons and menus, without having to care about pixels or geometrical forms. - - -===Missing EU languages, by the family=== - -Writing a grammar for a language is usually easier if other languages -from the same family already have grammars. The colours have the same -meaning as in the diagram above. - -Baltic: -#RED Latvian #ERED -#RED Lithuanian #ERED - -Celtic: -#RED Irish #ERED - -Fenno-Ugric: -#RED Estonian #ERED -#GRAY Finnish #EGRAY -#RED Hungarian #ERED - -Germanic: -#GRAY Danish #EGRAY -#RED Dutch #ERED -#GRAY English #EGRAY -#GRAY German #EGRAY -#GRAY Swedish #EGRAY - -Hellenic: -#RED Greek #ERED - -Romance: -#GRAY French #EGRAY -#GRAY Italian #EGRAY -#RED Portuguese #ERED -#YELLOW Romanian #ERED -#GRAY Spanish #EGRAY - -Semitic: -#RED Maltese #ERED - -Slavonic: -#GRAY Bulgarian #EGRAY -#RED Czech #ERED -#YELLOW Polish #ERED -#RED Slovak #ERED -#RED Slovenian #ERED - - - - - - -===Applications of the library=== - -In addition to translation, the library is also useful in **localization**, -that is, porting a piece of software to new languages. -The GF resource grammar library has been used in three major projects that need -interlingua-based translation or localization of systems to new languages: -- in KeY, - [``http://www.key-project.org/`` http://www.key-project.org/], - for writing formal and informal software specifications (3 languages) -- in WebALT, - [``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html], - for translating mathematical exercises to 7 languages -- in TALK [``http://www.talk-project.org`` http://www.talk-project.org], - where the library was used for localizing spoken dialogue systems - to six languages - - -The library is also a generic **linguistic resource**, -which can be used for tasks -such as language teaching and information retrieval. The liberal license (LGPL) -makes it usable for anyone and for any task. GF also has tools supporting the -use of grammars in programs written in other -programming languages: C, C++, Haskell, -Java, JavaScript, and Prolog. In connection with the TALK project, -support has also been -developed for translating GF grammars to language models used in speech -recognition (GSL/Nuance, HTK/ATK, SRGS, JSGF). - - - -===The structure of the library=== - -The library has the following main parts: -- **Inflection paradigms**, covering the inflection of each language. -- **Core Syntax**, covering a large set of syntax rule that - can be implemented for all languages involved. -- **Common Test Lexicon**, giving ca. 500 common words that can be used for - testing the library. -- **Language-Specific Syntax Extensions**, covering syntax rules that are - not implementable for all languages. -- **Language-Specific Lexica**, word lists for each language, with - accurate morphological and syntactic information. - - -The goal of the summer school is to implement, for each language, at least -the first three components. The latter three are more open-ended in character. - - -==The summer school== - -The goal of the summer school is to extend the GF resource grammar library -to covering all 23 EU languages, which means we need 15 new languages. -We also welcome other languages than these 23, -if there are interested participants. - -The amount of work and skill is between a Master's thesis and a PhD thesis. -The Russian implementation was made by Janna Khegai as a part of her -PhD thesis; the thesis contains other material, too. -The Arabic implementation was started by Ali El Dada in his Master's thesis, -but the thesis does not cover the whole API. The realistic amount of work is -somewhere between 3 and 8 person months, -but this is very much language-dependent. -Dutch, for instance, can profit from previous implementations of German and -Scandinavian languages, and will probably require less work. -Latvian and Lithuanian are the first languages of the Baltic family and -will probably require more work. - -In any case, the proposed allocation of work power is 2 participants per -language. They will do 1 months' worth of home work, followed -by 2 weeks of summer school, followed by 4 months work at home. -Who are these participants? - - -===Selecting participants=== - -Persons interested to participate in the Summer School should sign up in -the **Google Group** of the course, - -[``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/] - -The registration deadline is 15 June 2009. - -Notice: you can sign up in the Google -group even if you are not planning to attend the summer school, but are -just interested in the topic. There will be a separate registration to the -school itself later. - -The participants are recommended to learn GF in advance, by self-study from the -[tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html]. -This should take a couple of weeks. An **on-line course** will be -arranged on 20-29 April to help in getting started with GF. - -At the end of the on-line course, a **programming assignment** will be published. -This assignment will test skills required in resource grammar programming. -Work on the assignment will take a couple of weeks. -Those who are interested in getting a travel grant will submit -their sample resource grammar fragment -to the Summer School Committee by 12 May. -The Committee then decides who is given a travel grant of up to 1000 EUR. - -Notice: you can participate in the summer school without following the on-line -course or participating in the contest. These things are required only if you -want a travel grant. If requested by enough many participants, the lectures of -the on-line course will be repeated in the beginning of the summer school. - -The summer school itself is devoted for working on resource grammars. -In addition to grammar writing itself, testing and evaluation is -performed. One way to do this is via adding new languages -to resource grammar applications - in particular, to the WebALT mathematical -exercise translator. - -The resource grammars are expected to be completed by December 2009. They will -be published at GF website and licensed under LGPL. - -The participants are encouraged to contact each other and even work in groups. - - - -===Who is qualified=== - -Writing a resource grammar implementation requires good general programming -skills, and a good explicit knowledge of the grammar of the target language. -A typical participant could be -- native or fluent speaker of the target language -- interested in languages on the theoretical level, and preferably familiar - with many languages (to be able to think about them on an abstract level) -- familiar with functional programming languages such as ML or Haskell - (GF itself is a language similar to these) -- on Master's or PhD level in linguistics, computer science, or mathematics - - -But it is the quality of the assignment that is assessed, not any formal -requirements. The "typical participant" was described to give an idea of -who is likely to succeed in this. - - -===Costs=== - -The summer school is free of charge. - -Some travel grants are given, on the basis of a programming contest, -to cover travel and accommodation costs up to 1000 EUR -per person. - -The number of grants will be decided during Spring 2009, and the grand -holders will be notified before the beginning of June. - -Special terms will apply to students in -[GSLT http://www.gslt.hum.gu.se/] and -[NGSLT http://ngslt.org/]. - - - - - -===Teachers=== - -A list of teachers will be published here later. Some of the local teachers -probably involved are the following: -- Krasimir Angelov -- Robin Cooper -- Hkan Burden -- Markus Forsberg -- Harald Hammarstrm -- Peter Ljunglf -- Aarne Ranta - - -More teachers are welcome! If you are interested, please contact us so that -we can discuss your involvement and travel arrangements. - -In addition to teachers, we will look for consultants who can help to assess -the results for each language. Please contact us! - - - -===The Summer School Committee=== - -This committee consists of a number of teachers and informants, -who will select the participants. It will be selected by April 2009. - - -===Time and Place=== - -The summer school will -be organized at the campus of Chalmers University of Technology in Gothenburg, -Sweden, on 17-28 August 2009. - -Time schedule: -- February: announcement of summer school -- 20-29 April: on-line course -- 12 May: submission deadline for assignment work -- 31 May: review of assignments, notifications of acceptance -- 15 June: **registration deadline** -- 17-28 August: Summer School -- September-December: homework on resource grammars -- December: release of the extended Resource Grammar Library - - -===Dissemination and intellectual property=== - -The new resource grammars will be released under the LGPL just like -the current resource grammars, -with the copyright held by respective authors. - -The grammars will be distributed via the GF web site. - - - -==Why I should participate== - -Seven reasons: -+ participation in a pioneering language technology work in an - enthusiastic atmosphere -+ work and fun with people from all over Europe and the world -+ job opportunities and business ideas -+ credits: the school project will be established as a course at Chalmers worth - 7.5 or 15 ETCS points per person, depending on the work accompliched; also - extensions to Master's thesis will be considered (special credit arrangements - for [GSLT http://www.gslt.hum.gu.se/] and [NGSLT http://ngslt.org/]) -+ merits: the resulting grammar can easily lead to a published paper (see below) -+ contribution to the multilingual and multicultural development of Europe and the - world -+ free trip and stay in Gothenburg (for travel grant students) - - -==More information== - -[Course Google Group http://groups.google.com/group/gf-resource-school-2009/] - -[GF web page http://digitalgrammars.com/gf/] - -[GF tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html] - -[GF resource synopsis http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html] - -[Resource-HOWTO document http://digitalgrammars.com/gf/doc/Resource-HOWTO.html] - - -===Contact=== - -Hkan Burden: burden at chalmers se - -Aarne Ranta: aarne at chalmers se - - - -===Selected publications from earlier resource grammar projects=== - -K. Angelov. -Type-Theoretical Bulgarian Grammar. -In B. Nordstrm and A. Ranta (eds), -//Advances in Natural Language Processing (GoTAL 2008)//, -LNCS/LNAI 5221, Springer, -2008. - -B. Bringert. -//Programming Language Techniques for Natural Language Applications//. -Phd thesis, Computer Science, University of Gothenburg, -2008. - -A. El Dada and A. Ranta. -Implementing an Open Source Arabic Resource Grammar in GF. -In M. Mughazy (ed), -//Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26// -John Benjamins Publishing Company. -2007. - -A. El Dada. -Implementation of the Arabic Numerals and their Syntax in GF. -Computational Approaches to Semitic Languages: Common Issues and Resources, - ACL-2007 Workshop, -June 28, 2007, Prague. -2007. - -H. Hammarstrm and A. Ranta. -Cardinal Numerals Revisited in GF. -//Workshop on Numerals in the World's Languages//. -Dept. of Linguistics Max Planck Institute for Evolutionary Anthropology, Leipzig, -2004. - -M. Humayoun, H. Hammarstrm, and A. Ranta. -Urdu Morphology, Orthography and Lexicon Extraction. -//CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages//, -July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University. -2007. - -K. Johannisson. -//Formal and Informal Software Specifications.// -Phd thesis, Computer Science, University of Gothenburg, -2005. - -J. Khegai. -GF parallel resource grammars and Russian. -In proceedings of ACL2006 - (The joint conference of the International Committee on Computational - Linguistics and the Association for Computational Linguistics) (pp. 475-482), - Sydney, Australia, July 2006. - -J. Khegai. -//Language engineering in Grammatical Framework (GF)//. -Phd thesis, Computer Science, Chalmers University of Technology, -2006. - -W. Ng'ang'a. -Multilingual content development for eLearning in Africa. -eLearning Africa: 1st Pan-African Conference on ICT for Development, - Education and Training. 24-26 May 2006, Addis Ababa, Ethiopia. -2006. - -N. Perera and A. Ranta. -Dialogue System Localization with the GF Resource Grammar Library. -//SPEECHGRAM 2007: ACL Workshop on Grammar-Based Approaches to Spoken Language Processing//, -June 29, 2007, Prague. -2007. - -A. Ranta. -Modular Grammar Engineering in GF. -//Research on Language and Computation//, -5:133-158, 2007. - -A. Ranta. -How predictable is Finnish morphology? An experiment on lexicon construction. -In J. Nivre, M. Dahllf and B. Megyesi (eds), -//Resourceful Language Technology: Festschrift in Honor of Anna Sgvall Hein//, -University of Uppsala, -2008. - -A. Ranta. Grammars as Software Libraries. -To appear in -Y. Bertot, G. Huet, J-J. Lvy, and G. Plotkin (eds.), -//From Semantics to Computer Science//, -Cambridge University Press, Cambridge, 2009. - -A. Ranta and K. Angelov. -Implementing Controlled Languages in GF. -To appear in the proceedings of //CNL 2009//. - diff --git a/doc/gf3-release.html b/doc/gf3-release.html deleted file mode 100644 index 75557c94a..000000000 --- a/doc/gf3-release.html +++ /dev/null @@ -1,73 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<TITLE>GF 3.0</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> -<P ALIGN="center"><CENTER><H1>GF 3.0</H1> -<FONT SIZE="4"> -<I>Krasimir Angelov, Bjrn Bringert, and Aarne Ranta</I><BR> -Beta release, 27 June 2008 -</FONT></CENTER> - -<P> -GF Version 3.0 is a major revision of GF. The source language is a superset of the -language in 2.9, which means backward compatibility. But the target languages, the -compiler implementation, and the functionalities (e.g. the shell) have undergone -radical changes. -</P> -<H2>New features</H2> -<P> -Here is a summary of the main novelties visible to the user: -</P> -<UL> -<LI><B>Size</B>: the source code and the executable binary size have gone - down to about the half of 2.9. -<LI><B>Portability</B>: the new back end format PGF (Portable Grammar Format) is - much simpler than the old GFC format, and therefore easier to port to new - platforms. -<LI><B>Multilingual web page support</B>: as an example of portability, GF 3.0 provides a - compiler from PGF to JavaScript. There are also JavaScript libraries for creating - translators and syntax editors as client-side web applications. -<LI><B>Incremental parsing</B>: there is a possibility of word completion when - input strings are sent to the parser. -<LI><B>Application programmer's interfaces</B>: both source-GF and PGF formats, - the shell, and the compiler are accessible via high-level APIs. -<LI><B>Resource library version 1.4</B>: more coverage, more languages; some of - the new GF language features are exploited. -<LI><B>Uniform character encoding</B>: UTF8 in generated files, user-definable in - source files -</UL> - -<H2>Non-supported features</H2> -<P> -There are some features of GF 2.9 that will <I>not</I> work in the 3.0 beta release. -</P> -<UL> -<LI>Java Editor GUI: we now see the JavaScript editor as the main form of - syntax editing. -<LI>Pre-module multi-file grammar format: the grammar format of GF before version 2.0 - is still not yet supported. -<LI>Context-free and EBNF input grammar formats. -<LI>Probabilistic GF grammars. -<LI>Some output formats: LBNF. -<LI>Some GF shell commands: while the main ones will be supported with their familiar - syntax and options, some old commands have not been included. The GF shell - command <CODE>help -changes</CODE> gives the actual list. -</UL> - -<P> -Users who want to have these features are welcome to contact us, -and even more welcome to contribute code that restores them! -</P> -<H2>GF language extensions</H2> -<P> -Operations for defining patterns. -</P> -<P> -Inheritance of overload groups. -</P> - -<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -thtml doc/gf3-release.txt --> -</BODY></HTML> diff --git a/doc/gf3-release.txt b/doc/gf3-release.txt deleted file mode 100644 index 631752c90..000000000 --- a/doc/gf3-release.txt +++ /dev/null @@ -1,58 +0,0 @@ -GF 3.0 -Krasimir Angelov, Bjrn Bringert, and Aarne Ranta -Beta release, 27 June 2008 - - -GF Version 3.0 is a major revision of GF. The source language is a superset of the -language in 2.9, which means backward compatibility. But the target languages, the -compiler implementation, and the functionalities (e.g. the shell) have undergone -radical changes. - - -==New features== - -Here is a summary of the main novelties visible to the user: -- **Size**: the source code and the executable binary size have gone - down to about the half of 2.9. -- **Portability**: the new back end format PGF (Portable Grammar Format) is - much simpler than the old GFC format, and therefore easier to port to new - platforms. -- **Multilingual web page support**: as an example of portability, GF 3.0 provides a - compiler from PGF to JavaScript. There are also JavaScript libraries for creating - translators and syntax editors as client-side web applications. -- **Incremental parsing**: there is a possibility of word completion when - input strings are sent to the parser. -- **Application programmer's interfaces**: both source-GF and PGF formats, - the shell, and the compiler are accessible via high-level APIs. -- **Resource library version 1.4**: more coverage, more languages; some of - the new GF language features are exploited. -- **Uniform character encoding**: UTF8 in generated files, user-definable in - source files - - -==Non-supported features== - -There are some features of GF 2.9 that will //not// work in the 3.0 beta release. -- Java Editor GUI: we now see the JavaScript editor as the main form of - syntax editing. -- Pre-module multi-file grammar format: the grammar format of GF before version 2.0 - is still not yet supported. -- Context-free and EBNF input grammar formats. -- Probabilistic GF grammars. -- Some output formats: LBNF. -- Some GF shell commands: while the main ones will be supported with their familiar - syntax and options, some old commands have not been included. The GF shell - command ``help -changes`` gives the actual list. - - -Users who want to have these features are welcome to contact us, -and even more welcome to contribute code that restores them! - - -==GF language extensions== - -Operations for defining patterns. - -Inheritance of overload groups. - - diff --git a/doc/index.html b/doc/index.html index e4aa842ff..f6bbf7f1a 100644 --- a/doc/index.html +++ b/doc/index.html @@ -13,28 +13,20 @@ <h1>Grammatical Framework Documents</h1> </center> -<b>Top-3 documents</b>: -<a href="gf-tutorial.html">Tutorial</a> +<b>Top-5 documents</b>: -| - -<a href="gf-refman.html">ReferenceManual</a> - -| - -<a href="../lib/resource/doc/synopsis.html">LibrarySynopsis</a> +<a href="gf-quickstart.html">Quick start instruction</a>. +<a href="tutorial/gf-tutorial.html">Old Tutorial</a>, application-oriented. -<h2>Tutorials</h2> +<a href="gf-lrec-2010.pdf">New Tutorial</a>, linguistics-oriented. -<a href="gf-quickstart.html">Quick start instruction</a>. +<a href="gf-refman.html">ReferenceManual</a>. -<p> +<a href="../lib/resource/doc/synopsis.html">LibrarySynopsis</a>. -<a href="gf-tutorial.html">GF Tutorial</a>, -Now up-to-date for GF version 2.9. Covers all of GF. @@ -49,144 +41,13 @@ in a summary format. <a href="gf-refman.html">GF Reference Manual</a>. A full-scale reference manual of the GF language. -<p> - -<a href="gf-manual.html"> -User Manual</a> explaining the GF user interfaces and command language (slightly -outdated). - -<p> - -<a href="../../GF2.0/doc/javaGUImanual/javaGUImanual.htm">Editor User Manual</a> -on editing in the Java interface. - -<p> - -<a href="gf-compiler.png">Chart of GF grammar compiler phases</a>. - - - -<h2>Grammar library documentation</h2> - -<a href="gf-tutorial.html#chapfive">Resource Grammar Tutorial Chapter</a>. - -<p> - -<a href="../lib/resource/doc/synopsis.html">Resource Grammar Synopsis</a> -for library users. With APIs and use examples. - -<p> - -<a href="../lib/resource/doc/Resource-HOWTO.html"> -Resource Grammar HOWTO</a> -for library authors. - - - - -<h2>Embedding GF grammars in computer programs</h2> - -<a href="gf-tutorial.html#chapeight">Embedded Grammar Tutorial Chapter</a>. - -<p> - -<a href="http://www.cs.chalmers.se/~bringert/gf/gf-java.html"> -Embedded GF Interpreter</a> manual for using GF grammars in Java programs. - -<p> - -<a href="http://www.cs.chalmers.se/~aarne/GF/src/GF/GFCC/API.hs"> -Embedded GF API</a> for using GF grammars in Haskell programs. - -<p> - -<a href="http://www.ling.gu.se/~peb/index.cgi/Software"> -MCFG/GF library for Prolog</a>, -for using GF grammars in Prolog programs. - - - -<h2>Theoretical studies</h2> - -<a href="http://www.cs.chalmers.se/~aarne/articles/gf-jfp.ps.gz"> -Grammatical Framework: A Type-Theoretical -Grammar Formalism</a> (ps.gz). Theoretical paper on GF by A. Ranta. A later -version appeared -in <i>The Journal of Functional Programming</i>, vol. 14:2. 2004, pp. 145-189. -The standard reference on GF. - -<p> - -<a href="http://www.ling.gu.se/~peb/pubs/Ljunglof-2004a.pdf"> -Expressivity and Complexity of the Grammatical Framework</a>, -PhD Thesis by -<a href="http://www.ling.gu.se/~peb">Peter Ljunglf</a>. - - - -<h2>Introductory talks</h2> - -<a href="http://www.cs.chalmers.se/~aarne/GF2.0/doc/short/gf-short.html"> -GF in 25 Minutes</a> - overview for computer science audience. - -<p> - - -<a href="http://www.cs.chalmers.se/~aarne/slides/gf-rocquencourt.pdf"> -Slides on GF theory and implementation</a> given -at INRIA Rocquencourt in December 2003. - -<p> - -<a -href="http://www.cs.chalmers.se/~aarne/slides/webalt-2005.pdf"> -Slides on example-based grammar writing</a> and a short introduction -to GF grammars. - -<p> - -<a -href="http://www.cs.chalmers.se/~aarne/course-langtech/lectures/lectures.html"> -Course notes on Natural Language Technology</a>, includes -slides on using GF. - - - -<h2>Examples and applications</h2> - -<a href="http://www.cs.chalmers.se/~krijo/thesis/thesisA4.pdf"> -Formal and Informal Software Specifications</a>, -PhD Thesis by -<a href="http://www.cs.chalmers.se/~krijo">Kristofer Johannisson</a>. - - -<p> - -<a href="http://www.dtek.chalmers.se/~d00bring/publ/exjobb/embedded-grammars.pdf"> -Embedded grammars</a>, -Master's thesis by -<a href="http://www.cs.chalmers.se/~bringert/">Bjrn Bringert</a> - -<p> - -<a -href="http://www.cs.chalmers.se/~bringert/misc/tramdemo.avi">Demo film</a> -of a multimodal dialogue system built with embedded grammars. - - -<p> - -<a href="gfcc.pdf"> -GFCC</a> (pdf): -report on a compiler from a fragment of C to JVM, written in GF. -<h2>More</h2> +<h2>Publications</h2> <a href="gf-bibliography.html"> -Bibliography</a>: -more publications on GF, as well as background literature. +Bibliography</a>: more publications on GF, as well as background literature. </body></html> diff --git a/doc/school-langs.dot b/doc/school-langs.dot deleted file mode 100644 index 88e0a9c96..000000000 --- a/doc/school-langs.dot +++ /dev/null @@ -1,106 +0,0 @@ -graph{ - -size = "8,8" ; - -overlap = scale ; - -"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ; - -"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ; -"1" -- "Abs" [style = "solid"]; - -"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ; -"2" -- "Abs" [style = "solid"]; - -"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ; -"3" -- "Abs" [style = "solid"]; - -"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ; -"4" -- "Abs" [style = "solid"]; - -"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ; -"5" -- "Abs" [style = "solid"]; - -"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ; -"6" -- "Abs" [style = "solid"]; - -"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ; -"7" -- "Abs" [style = "solid"]; - -"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ; -"8" -- "Abs" [style = "solid"]; - -"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ; -"9" -- "Abs" [style = "solid"]; - -"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ; -"10" -- "Abs" [style = "solid"]; - -"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ; -"11" -- "Abs" [style = "solid"]; - -"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "12" [style = "solid"]; - -"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "13" [style = "solid"]; - -"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "14" [style = "solid"]; - -"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "15" [style = "solid"]; - -"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "16" [style = "solid"]; - -"17" [label = "Polish", style = "solid", shape = "ellipse", color = "orange"] ; -"Abs" -- "17" [style = "solid"]; - -"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "18" [style = "solid"]; - -"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "19" [style = "solid"]; - -"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ; -"Abs" -- "20" [style = "solid"]; - -"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "orange"] ; -"Abs" -- "21" [style = "solid"]; - -"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "22" [style = "solid"]; - -"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ; -"Abs" -- "23" [style = "solid"]; - -"24" [label = "Catalan", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "24" [style = "solid"]; - -"25" [label = "Norwegian", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "25" [style = "solid"]; - -"26" [label = "Russian", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "26" [style = "solid"]; - -"27" [label = "Interlingua", style = "dotted", shape = "ellipse", color = "green"] ; -"Abs" -- "27" [style = "solid"]; - -"28" [label = "Latin", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "28" [style = "solid"]; -"29" [label = "Turkish", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "29" [style = "solid"]; -"30" [label = "Hindi", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "30" [style = "solid"]; -"31" [label = "Thai", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "31" [style = "solid"]; -"32" [label = "Urdu", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "32" [style = "solid"]; -"33" [label = "Telugu", style = "dotted", shape = "ellipse", color = "red"] ; -"Abs" -- "33" [style = "solid"]; -"34" [label = "Arabic", style = "dotted", shape = "ellipse", color = "orange"] ; -"Abs" -- "34" [style = "solid"]; - - -} diff --git a/doc/school-langs.png b/doc/school-langs.png Binary files differdeleted file mode 100644 index 7230e0bff..000000000 --- a/doc/school-langs.png +++ /dev/null diff --git a/doc/summer-align.png b/doc/summer-align.png Binary files differdeleted file mode 100644 index 796754408..000000000 --- a/doc/summer-align.png +++ /dev/null diff --git a/doc/summer-langs.png b/doc/summer-langs.png Binary files differdeleted file mode 100644 index 729af722a..000000000 --- a/doc/summer-langs.png +++ /dev/null diff --git a/doc/10lang-small.png b/doc/tutorial/10lang-small.png Binary files differindex 49a3d0a98..49a3d0a98 100644 --- a/doc/10lang-small.png +++ b/doc/tutorial/10lang-small.png diff --git a/doc/categories.png b/doc/tutorial/categories.png Binary files differindex afc5873c5..afc5873c5 100644 --- a/doc/categories.png +++ b/doc/tutorial/categories.png diff --git a/doc/food-js.png b/doc/tutorial/food-js.png Binary files differindex fe579b1a9..fe579b1a9 100644 --- a/doc/food-js.png +++ b/doc/tutorial/food-js.png diff --git a/doc/food-magnet.png b/doc/tutorial/food-magnet.png Binary files differindex 8b137875d..8b137875d 100644 --- a/doc/food-magnet.png +++ b/doc/tutorial/food-magnet.png diff --git a/doc/foodmarket.png b/doc/tutorial/foodmarket.png Binary files differindex 6b0e3fbd7..6b0e3fbd7 100644 --- a/doc/foodmarket.png +++ b/doc/tutorial/foodmarket.png diff --git a/doc/gf-tutorial.html b/doc/tutorial/gf-tutorial.html index 230152005..46b17b96b 100644 --- a/doc/gf-tutorial.html +++ b/doc/tutorial/gf-tutorial.html @@ -8,264 +8,12 @@ <P ALIGN="center"><CENTER><H1>Grammatical Framework Tutorial</H1> <FONT SIZE="4"> <I>Aarne Ranta</I><BR> -Version 3.1.2, November 2008 +December 2010 (November 2008) </FONT></CENTER> -<P></P> -<HR NOSHADE SIZE=1> -<P></P> - <UL> - <LI><A HREF="#toc1">Overview</A> - <UL> - <LI><A HREF="#toc2">Outline</A> - <LI><A HREF="#toc3">Slides</A> - </UL> - <LI><A HREF="#toc4">Lesson 1: Getting Started with GF</A> - <UL> - <LI><A HREF="#toc5">What GF is</A> - <LI><A HREF="#toc6">GF grammars and language processing tasks</A> - <LI><A HREF="#toc7">Getting the GF system</A> - <LI><A HREF="#toc8">Running the GF system</A> - <LI><A HREF="#toc9">A "Hello World" grammar</A> - <UL> - <LI><A HREF="#toc10">The program: abstract syntax and concrete syntaxes</A> - <LI><A HREF="#toc11">Using grammars in the GF system</A> - <LI><A HREF="#toc12">Exercises on the Hello World grammar</A> - </UL> - <LI><A HREF="#toc13">Using grammars from outside GF</A> - <LI><A HREF="#toc14">GF scripts</A> - <LI><A HREF="#toc15">What else can be done with the grammar</A> - <LI><A HREF="#toc16">Embedded grammar applications</A> - </UL> - <LI><A HREF="#toc17">Lesson 2: Designing a grammar for complex phrases</A> - <UL> - <LI><A HREF="#toc18">The abstract syntax Food</A> - <LI><A HREF="#toc19">The concrete syntax FoodEng</A> - <UL> - <LI><A HREF="#toc20">Exercises on the Food grammar</A> - </UL> - <LI><A HREF="#toc21">Commands for testing grammars</A> - <UL> - <LI><A HREF="#toc22">Generating trees and strings</A> - <LI><A HREF="#toc23">Exercises on generation</A> - <LI><A HREF="#toc24">More on pipes: tracing</A> - <LI><A HREF="#toc25">Writing and reading files</A> - <LI><A HREF="#toc26">Visualizing trees</A> - <LI><A HREF="#toc27">System commands</A> - </UL> - <LI><A HREF="#toc28">An Italian concrete syntax</A> - <UL> - <LI><A HREF="#toc29">Exercises on multilinguality</A> - </UL> - <LI><A HREF="#toc30">Free variation</A> - <LI><A HREF="#toc31">More application of multilingual grammars</A> - <UL> - <LI><A HREF="#toc32">Multilingual treebanks</A> - <LI><A HREF="#toc33">Translation quiz</A> - </UL> - <LI><A HREF="#toc34">Context-free grammars and GF</A> - <UL> - <LI><A HREF="#toc35">The "cf" grammar format</A> - <LI><A HREF="#toc36">Restrictions of context-free grammars</A> - </UL> - <LI><A HREF="#toc37">Modules and files</A> - <LI><A HREF="#toc38">Using operations and resource modules</A> - <UL> - <LI><A HREF="#toc39">Operation definitions</A> - <LI><A HREF="#toc40">The ``resource`` module type</A> - <LI><A HREF="#toc41">Opening a resource</A> - <LI><A HREF="#toc42">Partial application</A> - <LI><A HREF="#toc43">Testing resource modules</A> - </UL> - <LI><A HREF="#toc44">Grammar architecture</A> - <UL> - <LI><A HREF="#toc45">Extending a grammar</A> - <LI><A HREF="#toc46">Multiple inheritance</A> - </UL> - </UL> - <LI><A HREF="#toc47">Lesson 3: Grammars with parameters</A> - <UL> - <LI><A HREF="#toc48">The problem: words have to be inflected</A> - <LI><A HREF="#toc49">Parameters and tables</A> - <LI><A HREF="#toc50">Inflection tables and paradigms</A> - <UL> - <LI><A HREF="#toc51">Exercises on morphology</A> - </UL> - <LI><A HREF="#toc52">Using parameters in concrete syntax</A> - <UL> - <LI><A HREF="#toc53">Agreement</A> - <LI><A HREF="#toc54">Determiners</A> - <LI><A HREF="#toc55">Parametric vs. inherent features</A> - </UL> - <LI><A HREF="#toc56">An English concrete syntax for Foods with parameters</A> - <LI><A HREF="#toc57">More on inflection paradigms</A> - <UL> - <LI><A HREF="#toc58">Worst-case functions</A> - <LI><A HREF="#toc59">Smart paradigms</A> - <LI><A HREF="#toc60">Exercises on regular patterns</A> - <LI><A HREF="#toc61">Function types with variables</A> - <LI><A HREF="#toc62">Separating operation types and definitions</A> - <LI><A HREF="#toc63">Overloading of operations</A> - <LI><A HREF="#toc64">Morphological analysis and morphology quiz</A> - </UL> - <LI><A HREF="#toc65">The Italian Foods grammar</A> - <UL> - <LI><A HREF="#toc66">Exercises on using parameters</A> - </UL> - <LI><A HREF="#toc67">Discontinuous constituents</A> - <LI><A HREF="#toc68">Strings at compile time vs. run time</A> - <UL> - <LI><A HREF="#toc69">Supplementary constructs for concrete syntax</A> - </UL> - </UL> - <LI><A HREF="#toc70">Lesson 4: Using the resource grammar library</A> - <UL> - <LI><A HREF="#toc71">The coverage of the library</A> - <LI><A HREF="#toc72">The structure of the library</A> - <UL> - <LI><A HREF="#toc73">Lexical vs. phrasal rules</A> - <LI><A HREF="#toc74">Lexical categories</A> - <LI><A HREF="#toc75">Lexical rules</A> - <LI><A HREF="#toc76">Resource lexicon</A> - <LI><A HREF="#toc77">Phrasal categories</A> - <LI><A HREF="#toc78">Syntactic combinations</A> - <LI><A HREF="#toc79">Example syntactic combination</A> - </UL> - <LI><A HREF="#toc80">The resource API</A> - <UL> - <LI><A HREF="#toc81">A miniature resource API: categories</A> - <LI><A HREF="#toc82">A miniature resource API: rules</A> - <LI><A HREF="#toc83">A miniature resource API: structural words</A> - <LI><A HREF="#toc84">A miniature resource API: paradigms</A> - <LI><A HREF="#toc85">A miniature resource API: more paradigms</A> - <LI><A HREF="#toc86">Exercises</A> - </UL> - <LI><A HREF="#toc87">Example: English</A> - <UL> - <LI><A HREF="#toc88">English example: linearization types and combination rules</A> - <LI><A HREF="#toc89">English example: lexical rules</A> - <LI><A HREF="#toc90">English example: exercises</A> - </UL> - <LI><A HREF="#toc91">Functor implementation of multilingual grammars</A> - <UL> - <LI><A HREF="#toc92">New language by copy and paste</A> - <LI><A HREF="#toc93">Functors: functions on the module level</A> - <LI><A HREF="#toc94">Code for the Foods functor</A> - <LI><A HREF="#toc95">Code for the LexFoods interface</A> - <LI><A HREF="#toc96">Code for a German instance of the lexicon</A> - <LI><A HREF="#toc97">Code for a German functor instantiation</A> - <LI><A HREF="#toc98">Adding languages to a functor implementation</A> - <LI><A HREF="#toc99">Example: adding Finnish</A> - <LI><A HREF="#toc100">A design pattern</A> - <LI><A HREF="#toc101">Functors: exercises</A> - </UL> - <LI><A HREF="#toc102">Restricted inheritance</A> - <UL> - <LI><A HREF="#toc103">A problem with functors</A> - <LI><A HREF="#toc104">Restricted inheritance: include or exclude</A> - <LI><A HREF="#toc105">The functor problem solved</A> - </UL> - <LI><A HREF="#toc106">Grammar reuse</A> - <UL> - <LI><A HREF="#toc107">Library exercises</A> - </UL> - <LI><A HREF="#toc108">Tenses</A> - </UL> - <LI><A HREF="#toc109">Lesson 5: Refining semantics in abstract syntax</A> - <UL> - <LI><A HREF="#toc110">Dependent types</A> - <UL> - <LI><A HREF="#toc111">A dependent type system</A> - <LI><A HREF="#toc112">Examples of devices and actions</A> - <LI><A HREF="#toc113">Linearization and parsing with dependent types</A> - <LI><A HREF="#toc114">Solving metavariables</A> - </UL> - <LI><A HREF="#toc115">Polymorphism</A> - <UL> - <LI><A HREF="#toc116">Dependent types: exercises</A> - </UL> - <LI><A HREF="#toc117">Proof objects</A> - <UL> - <LI><A HREF="#toc118">Proof-carrying documents</A> - </UL> - <LI><A HREF="#toc119">Restricted polymorphism</A> - <UL> - <LI><A HREF="#toc120">Example: classes for switching and dimming</A> - </UL> - <LI><A HREF="#toc121">Variable bindings</A> - <UL> - <LI><A HREF="#toc122">Higher-order abstract syntax</A> - <LI><A HREF="#toc123">Higher-order abstract syntax: linearization</A> - <LI><A HREF="#toc124">Eta expansion</A> - <LI><A HREF="#toc125">Parsing variable bindings</A> - <LI><A HREF="#toc126">Exercises on variable bindings</A> - </UL> - <LI><A HREF="#toc127">Semantic definitions</A> - <UL> - <LI><A HREF="#toc128">Computing a tree</A> - <LI><A HREF="#toc129">Definitional equality</A> - <LI><A HREF="#toc130">Judgement forms for constructors</A> - <LI><A HREF="#toc131">Exercises on semantic definitions</A> - </UL> - <LI><A HREF="#toc132">Lesson 6: Grammars of formal languages</A> - <UL> - <LI><A HREF="#toc133">Arithmetic expressions</A> - <LI><A HREF="#toc134">Concrete syntax: a simple approach</A> - </UL> - <LI><A HREF="#toc135">Lexing and unlexing</A> - <UL> - <LI><A HREF="#toc136">Most common lexers and unlexers</A> - </UL> - <LI><A HREF="#toc137">Precedence and fixity</A> - <UL> - <LI><A HREF="#toc138">Precedence as a parameter</A> - <LI><A HREF="#toc139">Fixities</A> - <LI><A HREF="#toc140">Exercises on precedence</A> - </UL> - <LI><A HREF="#toc141">Code generation as linearization</A> - <UL> - <LI><A HREF="#toc142">Programs with variables</A> - <LI><A HREF="#toc143">Exercises on code generation</A> - </UL> - </UL> - <LI><A HREF="#toc144">Lesson 7: Embedded grammars</A> - <UL> - <LI><A HREF="#toc145">Functionalities of an embedded grammar format</A> - <LI><A HREF="#toc146">The portable grammar format</A> - <UL> - <LI><A HREF="#toc147">Haskell: the EmbedAPI module</A> - <LI><A HREF="#toc148">First application: a translator</A> - <LI><A HREF="#toc149">Producing GFCC for the translator</A> - <LI><A HREF="#toc150">A translator loop</A> - <LI><A HREF="#toc151">A question-answer system</A> - <LI><A HREF="#toc152">Abstract syntax of the query system</A> - <LI><A HREF="#toc153">Exporting GF datatypes to Haskell</A> - <LI><A HREF="#toc154">The question-answer function</A> - <LI><A HREF="#toc155">Converting between Haskell and GF trees</A> - <LI><A HREF="#toc156">Putting it all together: the transfer definition</A> - <LI><A HREF="#toc157">Putting it all together: the Main module</A> - <LI><A HREF="#toc158">Putting it all together: the Makefile</A> - </UL> - <LI><A HREF="#toc159">Web server applications</A> - <LI><A HREF="#toc160">JavaScript applications</A> - <UL> - <LI><A HREF="#toc161">Compiling to JavaScript</A> - <LI><A HREF="#toc162">Using the JavaScript grammar</A> - </UL> - <LI><A HREF="#toc163">Language models for speech recognition</A> - <UL> - <LI><A HREF="#toc164">More speech recognition grammar formats</A> - </UL> - </UL> - </UL> - -<P></P> -<HR NOSHADE SIZE=1> -<P></P> <P> <!-- NEW --> </P> -<A NAME="toc1"></A> <H1>Overview</H1> <P> This is a hands-on introduction to grammar writing in GF. @@ -292,7 +40,6 @@ Prerequisites: <P> <!-- NEW --> </P> -<A NAME="toc2"></A> <H2>Outline</H2> <P> <a href="#chaptwo">Lesson 1</a>: a multilingual "Hello World" grammar. English, Finnish, Italian. @@ -319,7 +66,6 @@ and <B>semantic definitions</B>. <P> <!-- NEW --> </P> -<A NAME="toc3"></A> <H2>Slides</H2> <P> You can chop this tutorial into a set of slides by the command @@ -331,7 +77,7 @@ You can chop this tutorial into a set of slides by the command where the program <CODE>htmls</CODE> is distributed with GF (see below), in </P> <P> - <A HREF="http://digitalgrammars.com/gf/src/tools/Htmls.hs"><CODE>GF/src/tools/Htmls.hs</CODE></A> + <A HREF="http://grammaticalframework.org/src/tools/Htmls.hs"><CODE>GF/src/tools/Htmls.hs</CODE></A> </P> <P> The slides will appear as a set of files beginning with <CODE>01-gf-tutorial.htmls</CODE>. @@ -343,7 +89,6 @@ upper left corner of each slide, and the links behind the "Contents" link. <P> <!-- NEW --> </P> -<A NAME="toc4"></A> <H1>Lesson 1: Getting Started with GF</H1> <P> <a name="chaptwo"></a> @@ -360,7 +105,6 @@ Goals: <P> <!-- NEW --> </P> -<A NAME="toc5"></A> <H2>What GF is</H2> <P> We use the term GF for three different things: @@ -389,7 +133,6 @@ using the GF system. <P> <!-- NEW --> </P> -<A NAME="toc6"></A> <H2>GF grammars and language processing tasks</H2> <P> A GF program is called a <B>grammar</B>. @@ -417,13 +160,12 @@ In general, a GF grammar is <B>multilingual</B>: <P> <!-- NEW --> </P> -<A NAME="toc7"></A> <H2>Getting the GF system</H2> <P> Open-source free software, downloaded via the GF Homepage: </P> <P> -<A HREF="http://digitalgrammars.com/gf/"><CODE>digitalgrammars.com/gf</CODE></A> +<A HREF="http://grammaticalframework.org/"><CODE>grammaticalframework.org</CODE></A> </P> <P> There you find @@ -436,17 +178,16 @@ There you find <P> Many examples in this tutorial are -<A HREF="http://digitalgrammars.com/gf/examples/tutorial">online</A>. +<A HREF="http://grammaticalframework.org/examples/tutorial">online</A>. </P> <P> Normally you don't have to compile GF yourself. But, if you do want to compile GF from source follow the -instructions in the <A HREF="gf-developers.html">Developers Guide</A>. +instructions in the <A HREF="../gf-developers.html">Developers Guide</A>. </P> <P> <!-- NEW --> </P> -<A NAME="toc8"></A> <H2>Running the GF system</H2> <P> Type <CODE>gf</CODE> in the Unix (or Cygwin) shell: @@ -479,7 +220,6 @@ follow them. <P> <!-- NEW --> </P> -<A NAME="toc9"></A> <H2>A "Hello World" grammar</H2> <P> Like most programming language tutorials, we start with a @@ -497,7 +237,6 @@ Extra features: <P> <!-- NEW --> </P> -<A NAME="toc10"></A> <H3>The program: abstract syntax and concrete syntaxes</H3> <P> A GF program, in general, is a <B>multilingual grammar</B>. Its main parts @@ -617,7 +356,6 @@ Finnish and an Italian concrete syntaxes: <P> <!-- NEW --> </P> -<A NAME="toc11"></A> <H3>Using grammars in the GF system</H3> <P> In order to compile the grammar in GF, @@ -724,7 +462,6 @@ Linearization is by default to all available languages. <P> <!-- NEW --> </P> -<A NAME="toc12"></A> <H3>Exercises on the Hello World grammar</H3> <OL> <LI>Test the parsing and translation examples shown above, as well as @@ -754,7 +491,6 @@ of a variable. Inspect the error messages generated by GF. <P> <!-- NEW --> </P> -<A NAME="toc13"></A> <H2>Using grammars from outside GF</H2> <P> You can use the <CODE>gf</CODE> program in a Unix pipe. @@ -780,7 +516,6 @@ You can also write a <B>script</B>, a file containing the lines <P> <!-- NEW --> </P> -<A NAME="toc14"></A> <H2>GF scripts</H2> <P> If we name this script <CODE>hello.gfs</CODE>, we can do @@ -806,7 +541,6 @@ translation to the output. <P> <!-- NEW --> </P> -<A NAME="toc15"></A> <H2>What else can be done with the grammar</H2> <P> Some more functions that will be covered: @@ -825,7 +559,6 @@ Some more functions that will be covered: <P> <!-- NEW --> </P> -<A NAME="toc16"></A> <H2>Embedded grammar applications</H2> <P> Application programs, using techniques from <a href="#chapeight">Lesson 7</a>: @@ -847,7 +580,6 @@ Application programs, using techniques from <a href="#chapeight">Lesson 7</a>: <P> <!-- NEW --> </P> -<A NAME="toc17"></A> <H1>Lesson 2: Designing a grammar for complex phrases</H1> <P> <a name="chapthree"></a> @@ -864,7 +596,6 @@ Goals: <P> <!-- NEW --> </P> -<A NAME="toc18"></A> <H2>The abstract syntax Food</H2> <P> Phrases usable for speaking about food: @@ -912,7 +643,6 @@ Example <CODE>Phrase</CODE> <P> <!-- NEW --> </P> -<A NAME="toc19"></A> <H2>The concrete syntax FoodEng</H2> <PRE> concrete FoodEng of Food = { @@ -960,7 +690,6 @@ Parse in other categories setting the <CODE>cat</CODE> flag: <P> <!-- NEW --> </P> -<A NAME="toc20"></A> <H3>Exercises on the Food grammar</H3> <OL> <LI>Extend the <CODE>Food</CODE> grammar by ten new food kinds and @@ -977,9 +706,7 @@ the prefix can occur at most once. <P> <!-- NEW --> </P> -<A NAME="toc21"></A> <H2>Commands for testing grammars</H2> -<A NAME="toc22"></A> <H3>Generating trees and strings</H3> <P> Random generation (<CODE>generate_random = gr</CODE>): build @@ -1041,7 +768,6 @@ What options a command has can be seen by the <CODE>help = h</CODE> command: <P> <!-- NEW --> </P> -<A NAME="toc23"></A> <H3>Exercises on generation</H3> <OL> <LI>If the command <CODE>gt</CODE> generated all @@ -1055,7 +781,6 @@ use the Unix <B>word count</B> command <CODE>wc</CODE> to count lines. <P> <!-- NEW --> </P> -<A NAME="toc24"></A> <H3>More on pipes: tracing</H3> <P> Put the <B>tracing</B> option <CODE>-tr</CODE> to each command whose output you @@ -1080,7 +805,6 @@ strings, and try out the ambiguity test. <P> <!-- NEW --> </P> -<A NAME="toc25"></A> <H3>Writing and reading files</H3> <P> To save the outputs into a file, pipe it to the <CODE>write_file = wf</CODE> command, @@ -1105,7 +829,6 @@ of grammars - the most systematic way to do this is by <P> <!-- NEW --> </P> -<A NAME="toc26"></A> <H3>Visualizing trees</H3> <P> Parentheses give a linear representation of trees, @@ -1148,7 +871,6 @@ program (from the Graphviz package). <P> <!-- NEW --> </P> -<A NAME="toc27"></A> <H3>System commands</H3> <P> You can give a <B>system command</B> without leaving GF: @@ -1177,7 +899,6 @@ a system pipe from a GF command into a Unix command. <P> <!-- NEW --> </P> -<A NAME="toc28"></A> <H2>An Italian concrete syntax</H2> <P> <a name="secanitalian"></a> @@ -1232,7 +953,6 @@ which are introduced in <a href="#chaptwo">Lesson 3</a>.) <P> <!-- NEW --> </P> -<A NAME="toc29"></A> <H3>Exercises on multilinguality</H3> <OL> <LI>Write a concrete syntax of <CODE>Food</CODE> for some other language. @@ -1250,7 +970,6 @@ after having worked out <a href="#chaptwo">Lesson 3</a>. <P> <!-- NEW --> </P> -<A NAME="toc30"></A> <H2>Free variation</H2> <P> Semantically indistinguishable ways of expressing a thing. @@ -1298,9 +1017,7 @@ a variant list must be of the same type. <P> <!-- NEW --> </P> -<A NAME="toc31"></A> <H2>More application of multilingual grammars</H2> -<A NAME="toc32"></A> <H3>Multilingual treebanks</H3> <P> <a name="sectreebank"></a> @@ -1324,7 +1041,6 @@ linearizations in different languages: <P> <!-- NEW --> </P> -<A NAME="toc33"></A> <H3>Translation quiz</H3> <P> <CODE>translation_quiz = tq</CODE>: @@ -1356,9 +1072,7 @@ answer given in another language. <P> <!-- NEW --> </P> -<A NAME="toc34"></A> <H2>Context-free grammars and GF</H2> -<A NAME="toc35"></A> <H3>The "cf" grammar format</H3> <P> The grammar <CODE>FoodEng</CODE> can be written in a BNF format as follows: @@ -1392,7 +1106,6 @@ The compiler creates separate abstract and concrete modules internally. <P> <!-- NEW --> </P> -<A NAME="toc36"></A> <H3>Restrictions of context-free grammars</H3> <P> Separating concrete and abstract syntax allows @@ -1411,7 +1124,6 @@ copy language <CODE>{x x | x <- (a|b)*}</CODE> in GF. <P> <!-- NEW --> </P> -<A NAME="toc37"></A> <H2>Modules and files</H2> <P> GF uses suffixes to recognize different file formats: @@ -1457,9 +1169,7 @@ a second time? Try this in different situations: <P> <!-- NEW --> </P> -<A NAME="toc38"></A> <H2>Using operations and resource modules</H2> -<A NAME="toc39"></A> <H3>Operation definitions</H3> <P> The golden rule of functional programmin: @@ -1521,7 +1231,6 @@ sugar for abstraction: <P> <!-- NEW --> </P> -<A NAME="toc40"></A> <H3>The ``resource`` module type</H3> <P> The <CODE>resource</CODE> module type is used to package @@ -1540,7 +1249,6 @@ The <CODE>resource</CODE> module type is used to package <P> <!-- NEW --> </P> -<A NAME="toc41"></A> <H3>Opening a resource</H3> <P> Any number of <CODE>resource</CODE> modules can be @@ -1573,7 +1281,6 @@ Any number of <CODE>resource</CODE> modules can be <P> <!-- NEW --> </P> -<A NAME="toc42"></A> <H3>Partial application</H3> <P> <a name="secpartapp"></a> @@ -1611,7 +1318,6 @@ such that it allows you to write <P> <!-- NEW --> </P> -<A NAME="toc43"></A> <H3>Testing resource modules</H3> <P> Import with the flag <CODE>-retain</CODE>, @@ -1630,12 +1336,10 @@ Compute the value with <CODE>compute_concrete = cc</CODE>, <P> <!-- NEW --> </P> -<A NAME="toc44"></A> <H2>Grammar architecture</H2> <P> <a name="secarchitecture"></a> </P> -<A NAME="toc45"></A> <H3>Extending a grammar</H3> <P> A new module can <B>extend</B> an old one: @@ -1691,7 +1395,6 @@ possible to build resource hierarchies. <P> <!-- NEW --> </P> -<A NAME="toc46"></A> <H3>Multiple inheritance</H3> <P> Extend several grammars at the same time: @@ -1725,7 +1428,6 @@ where <P> <!-- NEW --> </P> -<A NAME="toc47"></A> <H1>Lesson 3: Grammars with parameters</H1> <P> <a name="chapfour"></a> @@ -1754,7 +1456,6 @@ could be left to library implementors. <P> <!-- NEW --> </P> -<A NAME="toc48"></A> <H2>The problem: words have to be inflected</H2> <P> Plural forms are needed in things like @@ -1787,7 +1488,6 @@ adjectives, and verbs can have in some languages that you know. <P> <!-- NEW --> </P> -<A NAME="toc49"></A> <H2>Parameters and tables</H2> <P> We define the <B>parameter type</B> of number in English by @@ -1898,7 +1598,6 @@ module, which you can test by using the command <CODE>compute_concrete</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc50"></A> <H2>Inflection tables and paradigms</H2> <P> A morphological <B>paradigm</B> is a formula telling how a class of @@ -1950,7 +1649,6 @@ uses a <B>wild card</B> pattern <CODE>_</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc51"></A> <H3>Exercises on morphology</H3> <OL> <LI>Identify cases in which the <CODE>regNoun</CODE> paradigm does not @@ -1963,7 +1661,6 @@ considered in earlier exercises. <P> <!-- NEW --> </P> -<A NAME="toc52"></A> <H2>Using parameters in concrete syntax</H2> <P> Purpose: a more radical @@ -1988,7 +1685,6 @@ This will force us to deal with gender- <P> <!-- NEW --> </P> -<A NAME="toc53"></A> <H3>Agreement</H3> <P> In English, the phrase-forming rule @@ -2030,7 +1726,6 @@ Now we can write <P> <!-- NEW --> </P> -<A NAME="toc54"></A> <H3>Determiners</H3> <P> How does an <CODE>Item</CODE> subject receive its number? The rules @@ -2100,7 +1795,6 @@ In a more <B>lexicalized</B> grammar, determiners would be a category: <P> <!-- NEW --> </P> -<A NAME="toc55"></A> <H3>Parametric vs. inherent features</H3> <P> <CODE>Kind</CODE>s have number as a <B>parametric feature</B>: both singular and plural @@ -2168,7 +1862,6 @@ Notice <P> <!-- NEW --> </P> -<A NAME="toc56"></A> <H2>An English concrete syntax for Foods with parameters</H2> <P> We use some string operations from the library <CODE>Prelude</CODE> are used. @@ -2233,7 +1926,6 @@ We use some string operations from the library <CODE>Prelude</CODE> are used. <P> <!-- NEW --> </P> -<A NAME="toc57"></A> <H2>More on inflection paradigms</H2> <P> <a name="secinflection"></a> @@ -2247,7 +1939,6 @@ add words to a lexicon. <P> <!-- NEW --> </P> -<A NAME="toc58"></A> <H3>Worst-case functions</H3> <P> We perform <B>data abstraction</B> from the type @@ -2337,7 +2028,6 @@ parameters. <P> <!-- NEW --> </P> -<A NAME="toc59"></A> <H3>Smart paradigms</H3> <P> The regular <I>dog</I>-<I>dogs</I> paradigm has @@ -2404,7 +2094,6 @@ the suffix <CODE>"oo"</CODE> prevents <I>bamboo</I> from matching the suffix <P> <!-- NEW --> </P> -<A NAME="toc60"></A> <H3>Exercises on regular patterns</H3> <OL> <LI>The same rules that form plural nouns in English also @@ -2429,7 +2118,6 @@ operation to see whether it correctly changes <I>Arzt</I> to <I>rzt</I>, <P> <!-- NEW --> </P> -<A NAME="toc61"></A> <H3>Function types with variables</H3> <P> In <a href="#chapsix">Lesson 5</a>, <B>dependent function types</B> need a notation @@ -2485,7 +2173,6 @@ looking like the expected forms: <P> <!-- NEW --> </P> -<A NAME="toc62"></A> <H3>Separating operation types and definitions</H3> <P> In librarues, it is useful to group type signatures separately from @@ -2505,7 +2192,6 @@ With the <CODE>interface</CODE> and <CODE>instance</CODE> module types <P> <!-- NEW --> </P> -<A NAME="toc63"></A> <H3>Overloading of operations</H3> <P> <B>Overloading</B>: different functions can be given the same name, as e.g. in C++. @@ -2547,7 +2233,6 @@ an overload group. <P> <!-- NEW --> </P> -<A NAME="toc64"></A> <H3>Morphological analysis and morphology quiz</H3> <P> The command <CODE>morpho_analyse = ma</CODE> @@ -2561,7 +2246,7 @@ can be used to read a text and return for each word its analyses The command <CODE>morpho_quiz = mq</CODE> generates inflection exercises. </P> <PRE> - % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfc + % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo > morpho_quiz -cat=V @@ -2584,7 +2269,6 @@ To create a list for later use, use the command <CODE>morpho_list = ml</CODE> <P> <!-- NEW --> </P> -<A NAME="toc65"></A> <H2>The Italian Foods grammar</H2> <P> <a name="secitalian"></a> @@ -2722,7 +2406,6 @@ The complete set of linearization rules: <P> <!-- NEW --> </P> -<A NAME="toc66"></A> <H3>Exercises on using parameters</H3> <OL> <LI>Experiment with multilingual generation and translation in the @@ -2742,7 +2425,6 @@ now aiming for complete grammatical correctness by the use of parameters. <P> <!-- NEW --> </P> -<A NAME="toc67"></A> <H2>Discontinuous constituents</H2> <P> A linearization record may contain more strings than one, and those @@ -2780,7 +2462,6 @@ but can be defined in GF by using discontinuous constituents. <P> <!-- NEW --> </P> -<A NAME="toc68"></A> <H2>Strings at compile time vs. run time</H2> <P> Tokens are created in the following ways: @@ -2839,7 +2520,6 @@ This topic will be covered in <a href="#seclexing">here</a>. <P> <!-- NEW --> </P> -<A NAME="toc69"></A> <H3>Supplementary constructs for concrete syntax</H3> <H4>Record extension and subtyping</H4> <P> @@ -2901,7 +2581,6 @@ Thus <P> <!-- NEW --> </P> -<A NAME="toc70"></A> <H1>Lesson 4: Using the resource grammar library</H1> <P> <a name="chapfive"></a> @@ -2918,7 +2597,6 @@ Goals: <P> <!-- NEW --> </P> -<A NAME="toc71"></A> <H2>The coverage of the library</H2> <P> The current 12 resource languages are @@ -2945,7 +2623,6 @@ The first three letters (<CODE>Eng</CODE> etc) are used in grammar module names <P> <!-- NEW --> </P> -<A NAME="toc72"></A> <H2>The structure of the library</H2> <P> <a name="seclexical"></a> @@ -2967,7 +2644,6 @@ wider coverage than with semantic grammars. <P> <!-- NEW --> </P> -<A NAME="toc73"></A> <H3>Lexical vs. phrasal rules</H3> <P> A resource grammar has two kinds of categories and two kinds of rules: @@ -2995,7 +2671,6 @@ But it is a good discipline to follow. <P> <!-- NEW --> </P> -<A NAME="toc74"></A> <H3>Lexical categories</H3> <P> Two kinds of lexical categories: @@ -3028,7 +2703,6 @@ Two kinds of lexical categories: <P> <!-- NEW --> </P> -<A NAME="toc75"></A> <H3>Lexical rules</H3> <P> Closed classes: module <CODE>Syntax</CODE>. In the <CODE>Foods</CODE> grammar, we need @@ -3061,7 +2735,6 @@ where we use <CODE>mkN</CODE> from <CODE>ParadigmsEng</CODE>: <P> <!-- NEW --> </P> -<A NAME="toc76"></A> <H3>Resource lexicon</H3> <P> Alternative concrete syntax for @@ -3092,7 +2765,6 @@ Advantages: <P> <!-- NEW --> </P> -<A NAME="toc77"></A> <H3>Phrasal categories</H3> <P> In <CODE>Foods</CODE>, we need just four phrasal categories: @@ -3113,7 +2785,6 @@ Common nouns are made into noun phrases by adding determiners. <P> <!-- NEW --> </P> -<A NAME="toc78"></A> <H3>Syntactic combinations</H3> <P> We need the following combinations: @@ -3142,7 +2813,6 @@ Heavy overloading: the current library <P> <!-- NEW --> </P> -<A NAME="toc79"></A> <H3>Example syntactic combination</H3> <P> The sentence @@ -3168,7 +2838,6 @@ this syntactic tree gives the value of linearizing the semantic tree <P> <!-- NEW --> </P> -<A NAME="toc80"></A> <H2>The resource API</H2> <P> Language-specific and language-independent parts - roughly, @@ -3185,12 +2854,11 @@ Language-specific and language-independent parts - roughly, Full API documentation on-line: the <B>resource synopsis</B>, </P> <P> -<A HREF="http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html"><CODE>digitalgrammars.com/gf/lib/resource/doc/synopsis.html</CODE></A> +<A HREF="http://grammaticalframework.org/lib/doc/synopsis.html"><CODE>grammaticalframework.org/lib/resource/doc/synopsis.html</CODE></A> </P> <P> <!-- NEW --> </P> -<A NAME="toc81"></A> <H3>A miniature resource API: categories</H3> <TABLE CELLPADDING="4" BORDER="1"> <TR> @@ -3248,7 +2916,6 @@ Full API documentation on-line: the <B>resource synopsis</B>, <P> <!-- NEW --> </P> -<A NAME="toc82"></A> <H3>A miniature resource API: rules</H3> <TABLE CELLPADDING="4" BORDER="1"> <TR> @@ -3296,7 +2963,6 @@ Full API documentation on-line: the <B>resource synopsis</B>, <P> <!-- NEW --> </P> -<A NAME="toc83"></A> <H3>A miniature resource API: structural words</H3> <TABLE CELLPADDING="4" BORDER="1"> <TR> @@ -3334,7 +3000,6 @@ Full API documentation on-line: the <B>resource synopsis</B>, <P> <!-- NEW --> </P> -<A NAME="toc84"></A> <H3>A miniature resource API: paradigms</H3> <P> From <CODE>ParadigmsEng</CODE>: @@ -3379,7 +3044,6 @@ From <CODE>ParadigmsIta</CODE>: <P> <!-- NEW --> </P> -<A NAME="toc85"></A> <H3>A miniature resource API: more paradigms</H3> <P> From <CODE>ParadigmsGer</CODE>: @@ -3444,7 +3108,6 @@ From <CODE>ParadigmsFin</CODE>: <P> <!-- NEW --> </P> -<A NAME="toc86"></A> <H3>Exercises</H3> <P> 1. Try out the morphological paradigms in different languages. Do @@ -3459,7 +3122,6 @@ as follows: <P> <!-- NEW --> </P> -<A NAME="toc87"></A> <H2>Example: English</H2> <P> <a name="secenglish"></a> @@ -3493,7 +3155,6 @@ Thus the beginning of the module is <P> <!-- NEW --> </P> -<A NAME="toc88"></A> <H3>English example: linearization types and combination rules</H3> <P> As linearization types, we use clauses for <CODE>Phrase</CODE>, noun phrases @@ -3523,7 +3184,6 @@ Now the combination rules we need almost write themselves automatically: <P> <!-- NEW --> </P> -<A NAME="toc89"></A> <H3>English example: lexical rules</H3> <P> We use resource paradigms and lexical insertion rules. @@ -3549,7 +3209,6 @@ The two-place noun paradigm is needed only once, for <P> <!-- NEW --> </P> -<A NAME="toc90"></A> <H3>English example: exercises</H3> <P> 1. Compile the grammar <CODE>FoodsEng</CODE> and generate @@ -3564,12 +3223,10 @@ grammars presented earlier in this tutorial. <P> <!-- NEW --> </P> -<A NAME="toc91"></A> <H2>Functor implementation of multilingual grammars</H2> <P> <a name="secfunctor"></a> </P> -<A NAME="toc92"></A> <H3>New language by copy and paste</H3> <P> If you write a concrete syntax of <CODE>Foods</CODE> for some other @@ -3600,7 +3257,6 @@ Can we avoid this programming by copy-and-paste? <P> <!-- NEW --> </P> -<A NAME="toc93"></A> <H3>Functors: functions on the module level</H3> <P> <B>Functors</B> familiar from the functional programming languages ML and OCaml, @@ -3645,7 +3301,6 @@ we can write a <B>functor instantiation</B>, <P> <!-- NEW --> </P> -<A NAME="toc94"></A> <H3>Code for the Foods functor</H3> <PRE> --# -path=.:../foods @@ -3681,7 +3336,6 @@ we can write a <B>functor instantiation</B>, <P> <!-- NEW --> </P> -<A NAME="toc95"></A> <H3>Code for the LexFoods interface</H3> <P> <a name="secinterface"></a> @@ -3705,7 +3359,6 @@ we can write a <B>functor instantiation</B>, <P> <!-- NEW --> </P> -<A NAME="toc96"></A> <H3>Code for a German instance of the lexicon</H3> <PRE> instance LexFoodsGer of LexFoods = open SyntaxGer, ParadigmsGer in { @@ -3726,7 +3379,6 @@ we can write a <B>functor instantiation</B>, <P> <!-- NEW --> </P> -<A NAME="toc97"></A> <H3>Code for a German functor instantiation</H3> <PRE> --# -path=.:../foods:present @@ -3739,7 +3391,6 @@ we can write a <B>functor instantiation</B>, <P> <!-- NEW --> </P> -<A NAME="toc98"></A> <H3>Adding languages to a functor implementation</H3> <P> Just two modules are needed: @@ -3765,7 +3416,6 @@ language: <P> <!-- NEW --> </P> -<A NAME="toc99"></A> <H3>Example: adding Finnish</H3> <P> Lexicon instance @@ -3799,7 +3449,6 @@ Functor instantiation <P> <!-- NEW --> </P> -<A NAME="toc100"></A> <H3>A design pattern</H3> <P> This can be seen as a <I>design pattern</I> for multilingual grammars: @@ -3822,7 +3471,6 @@ Of the hand-written modules, only <CODE>LexDomainL</CODE> is language-dependent. <P> <!-- NEW --> </P> -<A NAME="toc101"></A> <H3>Functors: exercises</H3> <P> 1. Compile and test <CODE>FoodsGer</CODE>. @@ -3863,9 +3511,7 @@ The implementation goes in the following phases: <P> <!-- NEW --> </P> -<A NAME="toc102"></A> <H2>Restricted inheritance</H2> -<A NAME="toc103"></A> <H3>A problem with functors</H3> <P> Problem: a functor only works when all languages use the resource <CODE>Syntax</CODE> @@ -3895,7 +3541,6 @@ Problem with this solution: <P> <!-- NEW --> </P> -<A NAME="toc104"></A> <H3>Restricted inheritance: include or exclude</H3> <P> A module may inherit just a selection of names. @@ -3916,7 +3561,6 @@ A concrete syntax of <CODE>Foodmarket</CODE> must make the analogous restriction <P> <!-- NEW --> </P> -<A NAME="toc105"></A> <H3>The functor problem solved</H3> <P> The English instantiation inherits the functor @@ -3938,7 +3582,6 @@ is defined in the body instead: <P> <!-- NEW --> </P> -<A NAME="toc106"></A> <H2>Grammar reuse</H2> <P> Abstract syntax modules can be used as interfaces, @@ -3960,7 +3603,6 @@ The following correspondencies are then applied: <P> <!-- NEW --> </P> -<A NAME="toc107"></A> <H3>Library exercises</H3> <P> 1. Find resource grammar terms for the following @@ -3985,7 +3627,6 @@ Then translate the phrases to other languages. <P> <!-- NEW --> </P> -<A NAME="toc108"></A> <H2>Tenses</H2> <P> <a name="sectense"></a> @@ -4077,7 +3718,6 @@ tenses and moods, e.g. the Romance languages. <P> <!-- NEW --> </P> -<A NAME="toc109"></A> <H1>Lesson 5: Refining semantics in abstract syntax</H1> <P> <a name="chapsix"></a> @@ -4105,7 +3745,6 @@ GF = logical framework + concrete syntax. <P> <!-- NEW --> </P> -<A NAME="toc110"></A> <H2>Dependent types</H2> <P> <a name="secsmarthouse"></a> @@ -4133,7 +3772,6 @@ defines voice commands for household appliances. <P> <!-- NEW --> </P> -<A NAME="toc111"></A> <H3>A dependent type system</H3> <P> Ontology: @@ -4162,7 +3800,6 @@ Abstract syntax formalizing this: <P> <!-- NEW --> </P> -<A NAME="toc112"></A> <H3>Examples of devices and actions</H3> <P> Assume the kinds <CODE>light</CODE> and <CODE>fan</CODE>, @@ -4195,7 +3832,6 @@ but we cannot form the trees <P> <!-- NEW --> </P> -<A NAME="toc113"></A> <H3>Linearization and parsing with dependent types</H3> <P> Concrete syntax does not know if a category is a dependent type. @@ -4238,7 +3874,6 @@ to mark incomplete parts of trees in the syntax editor. <P> <!-- NEW --> </P> -<A NAME="toc114"></A> <H3>Solving metavariables</H3> <P> Use the command <CODE>put_tree = pt</CODE> with the option <CODE>-typecheck</CODE>: @@ -4261,7 +3896,6 @@ is shown and no tree is returned: <P> <!-- NEW --> </P> -<A NAME="toc115"></A> <H2>Polymorphism</H2> <P> <a name="secpolymorphic"></a> @@ -4294,7 +3928,6 @@ to express Haskell-type library functions: <P> <!-- NEW --> </P> -<A NAME="toc116"></A> <H3>Dependent types: exercises</H3> <P> 1. Write an abstract syntax module with above contents @@ -4311,7 +3944,6 @@ and an appropriate English concrete syntax. Try to parse the commands <P> <!-- NEW --> </P> -<A NAME="toc117"></A> <H2>Proof objects</H2> <P> <B>Curry-Howard isomorphism</B> = <B>propositions as types principle</B>: @@ -4356,7 +3988,6 @@ Example: the fact that 2 is less that 4 has the proof object <P> <!-- NEW --> </P> -<A NAME="toc118"></A> <H3>Proof-carrying documents</H3> <P> Idea: to be semantically well-formed, the abstract syntax of a document @@ -4400,7 +4031,6 @@ A legal connection is formed by the function <P> <!-- NEW --> </P> -<A NAME="toc119"></A> <H2>Restricted polymorphism</H2> <P> Above, all Actions were either of @@ -4425,7 +4055,6 @@ The notion of class uses the Curry-Howard isomorphism as follows: <P> <!-- NEW --> </P> -<A NAME="toc120"></A> <H3>Example: classes for switching and dimming</H3> <P> We modify the smart house grammar: @@ -4448,7 +4077,6 @@ Classes for new actions can be added incrementally. <P> <!-- NEW --> </P> -<A NAME="toc121"></A> <H2>Variable bindings</H2> <P> <a name="secbinding"></a> @@ -4482,7 +4110,6 @@ Examples from informal mathematical language: <P> <!-- NEW --> </P> -<A NAME="toc122"></A> <H3>Higher-order abstract syntax</H3> <P> Abstract syntax can use functions as arguments: @@ -4520,7 +4147,6 @@ expressed using higher-order syntactic constructors. <P> <!-- NEW --> </P> -<A NAME="toc123"></A> <H3>Higher-order abstract syntax: linearization</H3> <P> HOAS has proved to be useful in the semantics and computer implementation of @@ -4554,7 +4180,6 @@ If there are more bindings, we add <CODE>$1</CODE>, <CODE>$2</CODE>, etc. <P> <!-- NEW --> </P> -<A NAME="toc124"></A> <H3>Eta expansion</H3> <P> To make sense of linearization, syntax trees must be @@ -4603,7 +4228,6 @@ The linearization of the variable <CODE>x</CODE> is, <P> <!-- NEW --> </P> -<A NAME="toc125"></A> <H3>Parsing variable bindings</H3> <P> GF can treat any one-word string as a variable symbol. @@ -4623,7 +4247,6 @@ Variables must be bound if they are used: <P> <!-- NEW --> </P> -<A NAME="toc126"></A> <H3>Exercises on variable bindings</H3> <P> 1. Write an abstract syntax of the whole @@ -4642,7 +4265,6 @@ guarantee non-ambiguity. <P> <!-- NEW --> </P> -<A NAME="toc127"></A> <H2>Semantic definitions</H2> <P> <a name="secdefdef"></a> @@ -4681,7 +4303,6 @@ The key word is <CODE>def</CODE>: <P> <!-- NEW --> </P> -<A NAME="toc128"></A> <H3>Computing a tree</H3> <P> Computation: follow a chain of definition until no definition @@ -4707,7 +4328,6 @@ Computation in GF is performed with the <CODE>put_term</CODE> command and the <P> <!-- NEW --> </P> -<A NAME="toc129"></A> <H3>Definitional equality</H3> <P> Two trees are definitionally equal if they compute into the same tree. @@ -4735,7 +4355,6 @@ so that an object of one also is an object of the other. <P> <!-- NEW --> </P> -<A NAME="toc130"></A> <H3>Judgement forms for constructors</H3> <P> The judgement form <CODE>data</CODE> tells that a category has @@ -4765,7 +4384,6 @@ marked as <CODE>data</CODE> will be treated as variables. <P> <!-- NEW --> </P> -<A NAME="toc131"></A> <H3>Exercises on semantic definitions</H3> <P> 1. Implement an interpreter of a small functional programming @@ -4781,7 +4399,6 @@ Type checking can be invoked with <CODE>put_term -transform=solve</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc132"></A> <H2>Lesson 6: Grammars of formal languages</H2> <P> <a name="chapseven"></a> @@ -4798,7 +4415,6 @@ Goals: <P> <!-- NEW --> </P> -<A NAME="toc133"></A> <H3>Arithmetic expressions</H3> <P> We construct a calculator with addition, subtraction, multiplication, and @@ -4829,7 +4445,6 @@ grammars are not allowed to declare functions with <CODE>Int</CODE> as value typ <P> <!-- NEW --> </P> -<A NAME="toc134"></A> <H3>Concrete syntax: a simple approach</H3> <P> We begin with a @@ -4871,7 +4486,6 @@ First problems: <P> <!-- NEW --> </P> -<A NAME="toc135"></A> <H2>Lexing and unlexing</H2> <P> <a name="seclexing"></a> @@ -4924,7 +4538,6 @@ In linearization, we use a corresponding <B>unlexer</B>: <P> <!-- NEW --> </P> -<A NAME="toc136"></A> <H3>Most common lexers and unlexers</H3> <TABLE ALIGN="center" CELLPADDING="4" BORDER="1"> <TR> @@ -4962,7 +4575,6 @@ In linearization, we use a corresponding <B>unlexer</B>: <P> <!-- NEW --> </P> -<A NAME="toc137"></A> <H2>Precedence and fixity</H2> <P> Arithmetic expressions should be unambiguous. If we write @@ -5001,7 +4613,6 @@ The usual precedence rules: <P> <!-- NEW --> </P> -<A NAME="toc138"></A> <H3>Precedence as a parameter</H3> <P> Precedence can be made into an inherent feature of expressions: @@ -5046,7 +4657,6 @@ This idea is encoded in the operation <P> <!-- NEW --> </P> -<A NAME="toc139"></A> <H3>Fixities</H3> <P> We can define left-associative infix expressions: @@ -5087,7 +4697,6 @@ Now we can write the whole concrete syntax of <CODE>Calculator</CODE> compactly: <P> <!-- NEW --> </P> -<A NAME="toc140"></A> <H3>Exercises on precedence</H3> <P> 1. Define non-associative and right-associative infix operations @@ -5101,7 +4710,6 @@ Test parsing with and without a pipe to <CODE>pt -transform=compute</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc141"></A> <H2>Code generation as linearization</H2> <P> Translate arithmetic (infix) to JVM (postfix): @@ -5131,7 +4739,6 @@ Just give linearization rules for JVM: <P> <!-- NEW --> </P> -<A NAME="toc142"></A> <H3>Programs with variables</H3> <P> A <B>straight code</B> programming language, with @@ -5180,7 +4787,6 @@ of the extension is <CODE>Prog</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc143"></A> <H3>Exercises on code generation</H3> <P> 1. Define a C-like concrete syntax of the straight-code language. @@ -5221,7 +4827,6 @@ point literals as arguments. <P> <!-- NEW --> </P> -<A NAME="toc144"></A> <H1>Lesson 7: Embedded grammars</H1> <P> <a name="chapeight"></a> @@ -5239,7 +4844,6 @@ Goals: <P> <!-- NEW --> </P> -<A NAME="toc145"></A> <H2>Functionalities of an embedded grammar format</H2> <P> GF grammars can be used as parts of programs written in other programming @@ -5256,17 +4860,16 @@ This facility is based on several components: <P> <!-- NEW --> </P> -<A NAME="toc146"></A> <H2>The portable grammar format</H2> <P> The portable format is called PGF, "Portable Grammar Format". </P> <P> -This format is produced by the GF batch compiler <CODE>gfc</CODE>, +This format is produced by the GF batch compiler <CODE>gf</CODE>, executable from the operative system shell: </P> <PRE> - % gfc --make SOURCE.gf + % gf --make SOURCE.gf </PRE> <P> PGF is the recommended format in @@ -5284,7 +4887,6 @@ general-purpose programming (or bytecode in Java). <P> <!-- NEW --> </P> -<A NAME="toc147"></A> <H3>Haskell: the EmbedAPI module</H3> <P> The Haskell API contains (among other things) the following types and functions: @@ -5313,7 +4915,6 @@ It is available as a part of the GF distribution, in the file <P> <!-- NEW --> </P> -<A NAME="toc148"></A> <H3>First application: a translator</H3> <P> Let us first build a stand-alone translator, which can translate @@ -5348,14 +4949,13 @@ For this, you need the Haskell compiler <A HREF="http://www.haskell.org/ghc">GHC <P> <!-- NEW --> </P> -<A NAME="toc149"></A> -<H3>Producing GFCC for the translator</H3> +<H3>Producing PGF for the translator</H3> <P> -Then produce a GFCC file. For instance, the <CODE>Food</CODE> grammar set can be +Then produce a PGF file. For instance, the <CODE>Food</CODE> grammar set can be compiled as follows: </P> <PRE> - % gfc --make FoodEng.gf FoodIta.gf + % gf --make FoodEng.gf FoodIta.gf </PRE> <P> This produces the file <CODE>Food.pgf</CODE> (its name comes from the abstract syntax). @@ -5376,7 +4976,6 @@ The result is given in all languages except the input language. <P> <!-- NEW --> </P> -<A NAME="toc150"></A> <H3>A translator loop</H3> <P> To avoid starting the translator over and over again: @@ -5398,7 +4997,6 @@ is <CODE>quit</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc151"></A> <H3>A question-answer system</H3> <P> <a name="secmathprogram"></a> @@ -5443,7 +5041,6 @@ To reply in the <I>same</I> language as the question: <P> <!-- NEW --> </P> -<A NAME="toc152"></A> <H3>Abstract syntax of the query system</H3> <P> Input: abstract syntax judgements @@ -5470,20 +5067,19 @@ Input: abstract syntax judgements <P> <!-- NEW --> </P> -<A NAME="toc153"></A> <H3>Exporting GF datatypes to Haskell</H3> <P> To make it easy to define a transfer function, we export the abstract syntax to a system of Haskell datatypes: </P> <PRE> - % gfc --output-format=haskell Query.pgf + % gf --output-format=haskell Query.pgf </PRE> <P> -It is also possible to produce the Haskell file together with GFCC, by +It is also possible to produce the Haskell file together with PGF, by </P> <PRE> - % gfc --make --output-format=haskell QueryEng.gf + % gf --make --output-format=haskell QueryEng.gf </PRE> <P> The result is a file named <CODE>Query.hs</CODE>, containing a @@ -5521,7 +5117,6 @@ The Haskell module name is the same as the abstract syntax name. <P> <!-- NEW --> </P> -<A NAME="toc154"></A> <H3>The question-answer function</H3> <P> Haskell's type checker guarantees that the functions are well-typed also with @@ -5545,7 +5140,6 @@ respect to GF. <P> <!-- NEW --> </P> -<A NAME="toc155"></A> <H3>Converting between Haskell and GF trees</H3> <P> The generated Haskell module also contains @@ -5578,7 +5172,6 @@ For the programmer, it is enougo to know: <P> <!-- NEW --> </P> -<A NAME="toc156"></A> <H3>Putting it all together: the transfer definition</H3> <PRE> module TransferDef where @@ -5612,7 +5205,6 @@ For the programmer, it is enougo to know: <P> <!-- NEW --> </P> -<A NAME="toc157"></A> <H3>Putting it all together: the Main module</H3> <P> Here is the complete code in the Haskell file <CODE>TransferLoop.hs</CODE>. @@ -5644,14 +5236,13 @@ Here is the complete code in the Haskell file <CODE>TransferLoop.hs</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc158"></A> <H3>Putting it all together: the Makefile</H3> <P> To automate the production of the system, we write a <CODE>Makefile</CODE> as follows: </P> <PRE> all: - gfc --make --output-format=haskell QueryEng + gf --make --output-format=haskell QueryEng ghc --make -o ./math TransferLoop.hs strip math </PRE> @@ -5682,7 +5273,6 @@ Just to summarize, the source of the application consists of the following files <P> <!-- NEW --> </P> -<A NAME="toc159"></A> <H2>Web server applications</H2> <P> PGF files can be used in web servers, for which there is a Haskell library included @@ -5701,7 +5291,6 @@ is an example of its application to the <CODE>Foods</CODE> grammars. <P> <!-- NEW --> </P> -<A NAME="toc160"></A> <H2>JavaScript applications</H2> <P> JavaScript is a programming language that has interpreters built in in most @@ -5715,14 +5304,13 @@ program compiled from GF grammars as run on an iPhone. <P> <!-- NEW --> </P> -<A NAME="toc161"></A> <H3>Compiling to JavaScript</H3> <P> JavaScript is one of the output formats of the GF batch compiler. Thus the following command generates a JavaScript file from two <CODE>Food</CODE> grammars. </P> <PRE> - % gfc --make --output-format=js FoodEng.gf FoodIta.gf + % gf --make --output-format=js FoodEng.gf FoodIta.gf </PRE> <P> The name of the generated file is <CODE>Food.js</CODE>, derived from the top-most abstract @@ -5731,7 +5319,6 @@ syntax name. This file contains the multilingual grammar as a JavaScript object. <P> <!-- NEW --> </P> -<A NAME="toc162"></A> <H3>Using the JavaScript grammar</H3> <P> To perform parsing and linearization, the run-time library @@ -5741,7 +5328,7 @@ as templates for building applications. </P> <P> An example of usage is -<A HREF="../lib/javascript/translator.html"><CODE>translator.html</CODE></A>, +<A HREF="http://grammaticalframework.org:41296"><CODE>translator.html</CODE></A>, which is in fact initialized with a pointer to the Food grammar, so that it provides translation between the English and Italian grammars: @@ -5752,12 +5339,11 @@ and Italian grammars: <P> The grammar must have the name <CODE>grammar.js</CODE>. The abstract syntax and start category names in <CODE>translator.html</CODE> must match the ones in the grammar. -With these changes, the translator works for any multilingual GF grammar. +With these changes, the translator works for any multilingual grammar. </P> <P> <!-- NEW --> </P> -<A NAME="toc163"></A> <H2>Language models for speech recognition</H2> <P> The standard way of using GF in speech recognition is by building @@ -5768,14 +5354,14 @@ GF supports several formats, including GSL, the formatused in the <A HREF="http://www.nuance.com">Nuance speech recognizer</A>. </P> <P> -GSL is produced from GF by running <CODE>gfc</CODE> with the flag +GSL is produced from GF by running <CODE>gf</CODE> with the flag <CODE>--output-format=gsl</CODE>. </P> <P> Example: GSL generated from <CODE>FoodsEng.gf</CODE>. </P> <PRE> - % gfc --make --output-format=gsl FoodsEng.gf + % gf --make --output-format=gsl FoodsEng.gf % more FoodsEng.gsl ;GSL2.0 @@ -5804,7 +5390,6 @@ Example: GSL generated from <CODE>FoodsEng.gf</CODE>. <P> <!-- NEW --> </P> -<A NAME="toc164"></A> <H3>More speech recognition grammar formats</H3> <P> Other formats available via the <CODE>--output-format</CODE> flag include: @@ -5849,9 +5434,9 @@ Other formats available via the <CODE>--output-format</CODE> flag include: </TABLE> <P> -All currently available formats can be seen with <CODE>gfc --help</CODE>. +All currently available formats can be seen with <CODE>gf --help</CODE>. </P> <!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -\-toc -thtml gf-tutorial.txt --> +<!-- cmdline: txt2tags gf-tutorial.txt --> </BODY></HTML> diff --git a/doc/gf-tutorial.txt b/doc/tutorial/gf-tutorial.txt index 8e8b8172a..8ae053a99 100644 --- a/doc/gf-tutorial.txt +++ b/doc/tutorial/gf-tutorial.txt @@ -1,6 +1,6 @@ Grammatical Framework Tutorial Aarne Ranta -Version 3.1.2, November 2008 +December 2010 (November 2008) % NOTE: this is a txt2tags file. @@ -626,7 +626,7 @@ You can chop this tutorial into a set of slides by the command ``` where the program ``htmls`` is distributed with GF (see below), in - [``GF/src/tools/Htmls.hs`` http://digitalgrammars.com/gf/src/tools/Htmls.hs] + [``GF/src/tools/Htmls.hs`` http://grammaticalframework.org/src/tools/Htmls.hs] The slides will appear as a set of files beginning with ``01-gf-tutorial.htmls``. @@ -700,7 +700,7 @@ In general, a GF grammar is **multilingual**: Open-source free software, downloaded via the GF Homepage: -[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf/] +[``grammaticalframework.org`` http://grammaticalframework.org/] There you find - binaries for Linux, Mac OS X, and Windows @@ -709,11 +709,11 @@ There you find Many examples in this tutorial are -[online http://digitalgrammars.com/gf/examples/tutorial]. +[online http://grammaticalframework.org/examples/tutorial]. Normally you don't have to compile GF yourself. But, if you do want to compile GF from source follow the -instructions in the [Developers Guide gf-developers.html]. +instructions in the [Developers Guide ../gf-developers.html]. #NEW @@ -2453,7 +2453,7 @@ can be used to read a text and return for each word its analyses ``` The command ``morpho_quiz = mq`` generates inflection exercises. ``` - % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfc + % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo > morpho_quiz -cat=V @@ -2970,7 +2970,7 @@ Language-specific and language-independent parts - roughly, Full API documentation on-line: the **resource synopsis**, -[``digitalgrammars.com/gf/lib/resource/doc/synopsis.html`` http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html] +[``grammaticalframework.org/lib/resource/doc/synopsis.html`` http://grammaticalframework.org/lib/doc/synopsis.html] #NEW @@ -4530,10 +4530,10 @@ This facility is based on several components: The portable format is called PGF, "Portable Grammar Format". -This format is produced by the GF batch compiler ``gfc``, +This format is produced by the GF batch compiler ``gf``, executable from the operative system shell: ``` - % gfc --make SOURCE.gf + % gf --make SOURCE.gf ``` PGF is the recommended format in which final grammar products are distributed, because they @@ -4605,12 +4605,12 @@ For this, you need the Haskell compiler [GHC http://www.haskell.org/ghc]. #NEW -===Producing GFCC for the translator=== +===Producing PGF for the translator=== -Then produce a GFCC file. For instance, the ``Food`` grammar set can be +Then produce a PGF file. For instance, the ``Food`` grammar set can be compiled as follows: ``` - % gfc --make FoodEng.gf FoodIta.gf + % gf --make FoodEng.gf FoodIta.gf ``` This produces the file ``Food.pgf`` (its name comes from the abstract syntax). @@ -4714,11 +4714,11 @@ abstract Query = { To make it easy to define a transfer function, we export the abstract syntax to a system of Haskell datatypes: ``` - % gfc --output-format=haskell Query.pgf + % gf --output-format=haskell Query.pgf ``` -It is also possible to produce the Haskell file together with GFCC, by +It is also possible to produce the Haskell file together with PGF, by ``` - % gfc --make --output-format=haskell QueryEng.gf + % gf --make --output-format=haskell QueryEng.gf ``` The result is a file named ``Query.hs``, containing a module named ``Query``. @@ -4871,7 +4871,7 @@ translate tr gr s = case parseAllLang gr (startCat gr) s of To automate the production of the system, we write a ``Makefile`` as follows: ``` all: - gfc --make --output-format=haskell QueryEng + gf --make --output-format=haskell QueryEng ghc --make -o ./math TransferLoop.hs strip math ``` @@ -4928,7 +4928,7 @@ program compiled from GF grammars as run on an iPhone. JavaScript is one of the output formats of the GF batch compiler. Thus the following command generates a JavaScript file from two ``Food`` grammars. ``` - % gfc --make --output-format=js FoodEng.gf FoodIta.gf + % gf --make --output-format=js FoodEng.gf FoodIta.gf ``` The name of the generated file is ``Food.js``, derived from the top-most abstract syntax name. This file contains the multilingual grammar as a JavaScript object. @@ -4944,7 +4944,7 @@ some other JavaScript and HTML files; these files can be used as templates for building applications. An example of usage is -[``translator.html`` ../lib/javascript/translator.html], +[``translator.html`` http://grammaticalframework.org:41296], which is in fact initialized with a pointer to the Food grammar, so that it provides translation between the English and Italian grammars: @@ -4969,12 +4969,12 @@ The standard way of using GF in speech recognition is by building GF supports several formats, including GSL, the formatused in the [Nuance speech recognizer www.nuance.com]. -GSL is produced from GF by running ``gfc`` with the flag +GSL is produced from GF by running ``gf`` with the flag ``--output-format=gsl``. Example: GSL generated from ``FoodsEng.gf``. ``` - % gfc --make --output-format=gsl FoodsEng.gf + % gf --make --output-format=gsl FoodsEng.gf % more FoodsEng.gsl ;GSL2.0 @@ -5017,6 +5017,6 @@ Other formats available via the ``--output-format`` flag include: | ``slf`` | finite automaton in the HTK SLF format | ``slf_sub`` | finite automaton with sub-automata in HTK SLF -All currently available formats can be seen with ``gfc --help``. +All currently available formats can be seen with ``gf --help``. diff --git a/doc/iphone.jpg b/doc/tutorial/iphone.jpg Binary files differindex d9e138b88..d9e138b88 100644 --- a/doc/iphone.jpg +++ b/doc/tutorial/iphone.jpg diff --git a/doc/mytree.png b/doc/tutorial/mytree.png Binary files differindex fafcc8772..fafcc8772 100644 --- a/doc/mytree.png +++ b/doc/tutorial/mytree.png diff --git a/doc/vr.html b/doc/vr.html deleted file mode 100644 index e5dee1885..000000000 --- a/doc/vr.html +++ /dev/null @@ -1,46 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<TITLE>Library-Based Grammar Engineering</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> -<P ALIGN="center"><CENTER><H1>Library-Based Grammar Engineering</H1> -<FONT SIZE="4"> -<I>VR Project 2006-2008</I><BR> -</FONT></CENTER> - -<H1>Staff</H1> -<P> -Lars Borin (co-leader) -</P> -<P> -Robin Cooper (co-leader) -</P> -<P> -Aarne Ranta (project responsible) -</P> -<P> -Sibylle Schupp (co-leader) -</P> -<H1>Publications</H1> -<P> -Ali El Dada, MSc Thesis -</P> -<P> -Muhammad Humayoun, MSc Thesis -</P> -<P> -Janna Khegai, -Language Engineering in GF, PhD Thesis, Chalmers. 2006. -</P> -<H1>Links</H1> -<P> -<A HREF="http://www.cs.chalmers.se/~aarne/GF/">GF</A> -</P> -<P> -<A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A> -</P> - -<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -thtml vr.txt --> -</BODY></HTML> diff --git a/doc/vr.txt b/doc/vr.txt deleted file mode 100644 index 9b5045978..000000000 --- a/doc/vr.txt +++ /dev/null @@ -1,32 +0,0 @@ -Library-Based Grammar Engineering -VR Project 2006-2008 - - -=Staff= - -Lars Borin (co-leader) - -Robin Cooper (co-leader) - -Aarne Ranta (project responsible) - -Sibylle Schupp (co-leader) - - - -=Publications= - -Ali El Dada, MSc Thesis - -Muhammad Humayoun, MSc Thesis - -Janna Khegai, -Language Engineering in GF, PhD Thesis, Chalmers. 2006. - - - -=Links= - -[GF http://www.cs.chalmers.se/~aarne/GF/] - -[Functional Morphology http://www.cs.chalmers.se/~markus/FM/] |
