summaryrefslogtreecommitdiff
path: root/examples/trigram/Trigram.gf
blob: 0ad99b0bfe4dc46486b3b10fcc150f17edb4d77c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
abstract Trigram = {

cat 
    -- A sentence
    S ;

    -- A lexicon is a set of 'Word's
    Word ;

    -- All N-gram instances seen in the corpus are abstract syntax constants
    Unigram (a     : Word) ;
    Bigram  (a,b   : Word) ;
    Trigram (a,b,c : Word) ;

    -- A text is a sequence words where the sequence is indexed by the last two tokens
    Seq (a,b : Word) ;

    -- The estimated probability of the trigram 'a b c' is the total probability of all 
    -- trees of type Prob a b c.
    Prob (a,b,c : Word) ;

data
    sent : ({a,b} : Word) -> Seq a b -> S ;

    -- Here we construct sequence by using nil and cons. The Prob argument ensures
    -- that the sequence contains only valid N-grams and contributes with the right
    -- probability mass
    nil  : (a,b,c : Word) -> Prob a b c -> Seq b c ;
    cons : ({a,b} : Word) -> Seq a b -> (c : Word) -> Prob a b c -> Seq b c ;

    -- Here we construct probabilities. There are two ways: by trigrams, by bigrams and
    -- by unigrams. Since the trigramP, bigramP, unigramP functions have some associated
    -- probabilities as well this results in linear smoothing between the unigram, bigram 
    -- and trigram models
    trigramP : ({a,b,c} : Word) -> Trigram a b c -> Prob a b c ;
    bigramP  : ({a,b,c} : Word) -> Bigram a b -> Bigram b c -> Prob a b c ;
    unigramP : ({a,b,c} : Word) -> Unigram a -> Unigram b -> Unigram c -> Prob a b c ;

}