summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2009-10-08 13:13:48 +0000
committerkrasimir <krasimir@chalmers.se>2009-10-08 13:13:48 +0000
commita84cdb32c75b11a2e7bae2906137cefae4ad03ce (patch)
tree5576798ba6f130070615beab0511ad1fd6e942db /examples
parentffb5fdcd2580b00203456636be887adcc9759bda (diff)
just for fun - grammar for trigram models in GF
Diffstat (limited to 'examples')
-rw-r--r--examples/trigram/Shaw.gf84
-rw-r--r--examples/trigram/ShawCnc.gf23
-rw-r--r--examples/trigram/Trigram.gf34
-rw-r--r--examples/trigram/TrigramCnc.gf12
4 files changed, 153 insertions, 0 deletions
diff --git a/examples/trigram/Shaw.gf b/examples/trigram/Shaw.gf
new file mode 100644
index 000000000..d8a2814ea
--- /dev/null
+++ b/examples/trigram/Shaw.gf
@@ -0,0 +1,84 @@
+abstract Shaw = Trigram ** {
+
+-- This module contains Trigram model of this quote from George Bernard Shaw:
+--
+-- The reasonable man adapts himself to the world; the unreasonable one persists in trying
+-- to adapt the world to himself. Therefore all progress depends on the unreasonable man.
+
+data
+ the_W,reasonable_W,man_W,adapts_W,himself_W,to_W,world_W,unreasonable_W,
+ one_W,persists_W,in_W,trying_W,adapt_W,therefore_W,all_W,progress_W,depends_W,on_W : Word ;
+
+ the_U : Unigram the_W ; --# prob 0.179
+ reasonable_U : Unigram reasonable_W ; --# prob 0.036
+ man_U : Unigram man_W ; --# prob 0.071
+ adapts_U : Unigram adapts_W ; --# prob 0.036
+ himself_U : Unigram himself_W ; --# prob 0.071
+ to_U : Unigram to_W ; --# prob 0.107
+ world_U : Unigram world_W ; --# prob 0.071
+ unreasonable_U : Unigram unreasonable_W ; --# prob 0.071
+ one_U : Unigram one_W ; --# prob 0.036
+ persists_U : Unigram persists_W ; --# prob 0.036
+ in_U : Unigram in_W ; --# prob 0.036
+ trying_U : Unigram trying_W ; --# prob 0.036
+ adapt_U : Unigram adapt_W ; --# prob 0.036
+ therefore_U : Unigram therefore_W ; --# prob 0.036
+ all_U : Unigram all_W ; --# prob 0.036
+ progress_U : Unigram progress_W ; --# prob 0.036
+ depends_U : Unigram depends_W ; --# prob 0.036
+ on_U : Unigram on_W ; --# prob 0.036
+
+ the_reasonable_B : Bigram the_W reasonable_W ; --# prob 0.037
+ reasonable_man_B : Bigram reasonable_W man_W ; --# prob 0.037
+ man_adapts_B : Bigram man_W adapts_W ; --# prob 0.037
+ adapts_himself_B : Bigram adapts_W himself_W ; --# prob 0.037
+ himself_to_B : Bigram himself_W to_W ; --# prob 0.037
+ to_the_B : Bigram to_W the_W ; --# prob 0.037
+ the_world_B : Bigram the_W world_W ; --# prob 0.037
+ world_the_B : Bigram world_W the_W ; --# prob 0.037
+ the_unreasonable_B : Bigram unreasonable_W one_W ; --# prob 0.074
+ unreasonable_one_B : Bigram unreasonable_W one_W ; --# prob 0.037
+ one_persists_B : Bigram one_W persists_W ; --# prob 0.037
+ persists_in_B : Bigram persists_W in_W ; --# prob 0.037
+ in_trying_B : Bigram in_W trying_W ; --# prob 0.037
+ trying_to_B : Bigram trying_W to_W ; --# prob 0.037
+ to_adapt_B : Bigram to_W adapt_W ; --# prob 0.037
+ adapt_the_B : Bigram adapt_W the_W ; --# prob 0.037
+ the_world_B : Bigram the_W world_W ; --# prob 0.037
+ world_to_B : Bigram world_W to_W ; --# prob 0.037
+ to_himself_B : Bigram to_W himself_W ; --# prob 0.037
+ himself_therefore_B : Bigram himself_W therefore_W ; --# prob 0.037
+ therefore_all_B : Bigram therefore_W all_W ; --# prob 0.037
+ all_progress_B : Bigram all_W progress_W ; --# prob 0.037
+ progress_depends_B : Bigram progress_W depends_W ; --# prob 0.037
+ depends_on_B : Bigram depends_W on_W ; --# prob 0.037
+ on_the_B : Bigram on_W the_W ; --# prob 0.037
+ unreasonable_man_B : Bigram unreasonable_W man_W ; --# prob 0.037
+
+ the_reasonable_man_T : Trigram the_W reasonable_W man_W ; --# prob 0.038
+ reasonable_man_adapts_T : Trigram reasonable_W man_W adapts_W ; --# prob 0.038
+ man_adapts_himself_T : Trigram man_W adapts_W himself_W ; --# prob 0.038
+ adapts_himself_to_T : Trigram adapts_W himself_W to_W ; --# prob 0.038
+ himself_to_the_T : Trigram himself_W to_W the_W ; --# prob 0.038
+ to_the_world_T : Trigram to_W the_W world_W ; --# prob 0.038
+ the_world_the_T : Trigram the_W world_W the_W ; --# prob 0.038
+ world_the_unreasonable_T : Trigram world_W the_W unreasonable_W ; --# prob 0.038
+ the_unreasonable_one_T : Trigram the_W unreasonable_W one_W ; --# prob 0.038
+ unreasonable_one_persists_T : Trigram unreasonable_W one_W persists_W ; --# prob 0.038
+ one_persists_in_T : Trigram one_W persists_W in_W ; --# prob 0.038
+ persists_in_trying_T : Trigram persists_W in_W trying_W ; --# prob 0.038
+ in_trying_to_T : Trigram in_W trying_W to_W ; --# prob 0.038
+ trying_to_adapt_T : Trigram trying_W to_W adapt_W ; --# prob 0.038
+ to_adapt_the_T : Trigram to_W adapt_W the_W ; --# prob 0.038
+ adapt_the_world_T : Trigram adapt_W the_W world_W ; --# prob 0.038
+ the_world_to_T : Trigram the_W world_W to_W ; --# prob 0.038
+ world_to_himself_T : Trigram world_W to_W himself_W ; --# prob 0.038
+ to_himself_therefore_T : Trigram to_W himself_W therefore_W ; --# prob 0.038
+ himself_therefore_all_T : Trigram himself_W therefore_W all_W ; --# prob 0.038
+ therefore_all_progress_T : Trigram therefore_W all_W progress_W ; --# prob 0.038
+ all_progress_depends_T : Trigram all_W progress_W depends_W ; --# prob 0.038
+ progress_depends_on_T : Trigram progress_W depends_W on_W ; --# prob 0.038
+ depends_on_the_T : Trigram depends_W on_W the_W ; --# prob 0.038
+ on_the_unreasonable_T : Trigram on_W the_W unreasonable_W ; --# prob 0.038
+ the_unreasonable_man_T : Trigram the_W unreasonable_W man_W ; --# prob 0.038
+} \ No newline at end of file
diff --git a/examples/trigram/ShawCnc.gf b/examples/trigram/ShawCnc.gf
new file mode 100644
index 000000000..b56ec16a9
--- /dev/null
+++ b/examples/trigram/ShawCnc.gf
@@ -0,0 +1,23 @@
+concrete ShawCnc of Shaw = TrigramCnc ** {
+
+lin
+ the_W = "the" ;
+ reasonable_W = "reasonable" ;
+ man_W = "man" ;
+ adapts_W = "adapts" ;
+ himself_W = "himself" ;
+ to_W = "to" ;
+ world_W = "world" ;
+ unreasonable_W = "unreasonable" ;
+ one_W = "one" ;
+ persists_W = "persists" ;
+ in_W = "in" ;
+ trying_W = "trying" ;
+ adapt_W = "adapt" ;
+ therefore_W = "therefore" ;
+ all_W = "all" ;
+ progress_W = "progress" ;
+ depends_W = "depends" ;
+ on_W = "on" ;
+
+} \ No newline at end of file
diff --git a/examples/trigram/Trigram.gf b/examples/trigram/Trigram.gf
new file mode 100644
index 000000000..2e6e18fdd
--- /dev/null
+++ b/examples/trigram/Trigram.gf
@@ -0,0 +1,34 @@
+abstract Trigram = {
+
+cat
+ -- A lexicon is a set of 'Word's
+ Word ;
+
+ -- All N-gram instances seen in the corpus are abstract syntax constants
+ Unigram (a : Word) ;
+ Bigram (a,b : Word) ;
+ Trigram (a,b,c : Word) ;
+
+ -- A text is a sequence words where the sequence is indexed by the last two tokens
+ Seq (a,b : Word) ;
+
+ -- The estimated probability of the trigram 'a b c' is the total probability of all
+ -- trees of type Prob a b c.
+ Prob (a,b,c : Word) ;
+
+data
+ -- Here we construct sequence by using nil and cons. The Prob argument ensures
+ -- that the sequence contains only valid N-grams and contributes with the right
+ -- probability mass
+ nil : (a,b,c : Word) -> Prob a b c -> Seq b c ;
+ cons : ({a,b} : Word) -> Seq a b -> (c : Word) -> Prob a b c -> Seq b c ;
+
+ -- Here we construct probabilities. There are two ways: by trigrams, by bigrams and
+ -- by unigrams. Since the trigramP, bigramP, unigramP functions have some associated
+ -- probabilities as well this results in linear smoothing between the unigram, bigram
+ -- and trigram models
+ trigramP : ({a,b,c} : Word) -> Trigram a b c -> Prob a b c ;
+ bigramP : ({a,b,c} : Word) -> Bigram a b -> Bigram b c -> Prob a b c ;
+ unigramP : ({a,b,c} : Word) -> Unigram a -> Unigram b -> Unigram c -> Prob a b c ;
+
+} \ No newline at end of file
diff --git a/examples/trigram/TrigramCnc.gf b/examples/trigram/TrigramCnc.gf
new file mode 100644
index 000000000..917aa3db8
--- /dev/null
+++ b/examples/trigram/TrigramCnc.gf
@@ -0,0 +1,12 @@
+concrete TrigramCnc of Trigram = {
+
+lincat
+ Word,Seq = Str;
+
+ Unigram, Bigram, Trigram, Prob = {} ;
+
+lin
+ nil a b c _ = a ++ b ++ c ;
+ cons _ _ l c _ = l ++ c ;
+
+} \ No newline at end of file