diff options
| author | krasimir <krasimir@chalmers.se> | 2009-10-08 13:13:48 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-10-08 13:13:48 +0000 |
| commit | a84cdb32c75b11a2e7bae2906137cefae4ad03ce (patch) | |
| tree | 5576798ba6f130070615beab0511ad1fd6e942db /examples/trigram/Trigram.gf | |
| parent | ffb5fdcd2580b00203456636be887adcc9759bda (diff) | |
just for fun - grammar for trigram models in GF
Diffstat (limited to 'examples/trigram/Trigram.gf')
| -rw-r--r-- | examples/trigram/Trigram.gf | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/examples/trigram/Trigram.gf b/examples/trigram/Trigram.gf new file mode 100644 index 000000000..2e6e18fdd --- /dev/null +++ b/examples/trigram/Trigram.gf @@ -0,0 +1,34 @@ +abstract Trigram = { + +cat + -- A lexicon is a set of 'Word's + Word ; + + -- All N-gram instances seen in the corpus are abstract syntax constants + Unigram (a : Word) ; + Bigram (a,b : Word) ; + Trigram (a,b,c : Word) ; + + -- A text is a sequence words where the sequence is indexed by the last two tokens + Seq (a,b : Word) ; + + -- The estimated probability of the trigram 'a b c' is the total probability of all + -- trees of type Prob a b c. + Prob (a,b,c : Word) ; + +data + -- Here we construct sequence by using nil and cons. The Prob argument ensures + -- that the sequence contains only valid N-grams and contributes with the right + -- probability mass + nil : (a,b,c : Word) -> Prob a b c -> Seq b c ; + cons : ({a,b} : Word) -> Seq a b -> (c : Word) -> Prob a b c -> Seq b c ; + + -- Here we construct probabilities. There are two ways: by trigrams, by bigrams and + -- by unigrams. Since the trigramP, bigramP, unigramP functions have some associated + -- probabilities as well this results in linear smoothing between the unigram, bigram + -- and trigram models + trigramP : ({a,b,c} : Word) -> Trigram a b c -> Prob a b c ; + bigramP : ({a,b,c} : Word) -> Bigram a b -> Bigram b c -> Prob a b c ; + unigramP : ({a,b,c} : Word) -> Unigram a -> Unigram b -> Unigram c -> Prob a b c ; + +}
\ No newline at end of file |
