summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <unknown>2005-11-01 08:10:54 +0000
committeraarne <unknown>2005-11-01 08:10:54 +0000
commita48236172fc24f32792d56675b0ef682d6119348 (patch)
treec8176b603a69c151ed3bd99c12bebf1ae54f7a18
parentf06638cc7d90eb8298180d36e79fc292a9f898dc (diff)
PCFG example
-rw-r--r--examples/TWA.cf52
-rw-r--r--src/GF/Probabilistic/Probabilistic.hs17
2 files changed, 64 insertions, 5 deletions
diff --git a/examples/TWA.cf b/examples/TWA.cf
new file mode 100644
index 000000000..56b14dd2c
--- /dev/null
+++ b/examples/TWA.cf
@@ -0,0 +1,52 @@
+-- example of probabilistic grammar from Jurafsky & Martin p. 449
+
+PredVP. S ::= NP VP ; --# prob 0.80
+PredAux. S ::= Aux NP VP ; --# prob 0.15
+JustVP. S ::= VP ; --# prob 0.05
+
+DetNO. NP ::= Det Nom ; --# prob 0.20
+PNounNP. NP ::= PNoun ; --# prob 0.35
+NomNP. NP ::= Nom ; --# prob 0.05
+ProNP. NP ::= Pro ; --# prob 0.40
+
+NounNom. Nom ::= Noun ; --# prob 0.75
+CompNom. Nom ::= Noun Nom ; --# prob 0.20
+PNounNom. Nom ::= PNoun Nom ; --# prob 0.05
+
+IntrVP. VP ::= Verb ; --# prob 0.55
+TrVP. VP ::= Verb NP ; --# prob 0.40
+DitrVP: VP ::= Verb NP NP ; --# prob 0.05
+
+that. Det ::= "that" ; --# prob 0.05
+the. Det ::= "the" ; --# prob 0.80
+a. Det ::= "a" ; --# prob 0.15
+
+bookN. Noun ::= "book" ; --# prob 0.10
+flights. Noun ::= "flights" ; --# prob 0.50
+meal. Noun ::= "meal" ; --# prob 0.40
+
+bookV. Verb ::= "book" ; --# prob 0.30
+includeV. Verb ::= "include" ; --# prob 0.30
+want. Verb ::= "want" ; --# prob 0.40
+
+can. Aux ::= "can" ; --# prob 0.40
+does. Aux ::= "does" ; --# prob 0.30
+do. Aux ::= "do" ; --# prob 0.30
+
+TWA. PNoun ::= "TWA" ; --# prob 0.40
+Denver. PNoun ::= "Denver" ; --# prob 0.60
+
+you. Pro ::= "you" ; --# prob 0.40
+I. Pro ::= "I" ; --# prob 0.60
+
+-- > p -prob "can you book TWA flights"
+--
+-- 4.3200000000000016e-7
+-- 3.7800000000000013e-7
+-- PredAux can (ProNP you) (TrVP bookV (NomNP (PNounNom TWA (NounNom flights))))
+-- PredAux can (ProNP you) (DitrVP bookV (PNounNP TWA) (NomNP (NounNom flights)))
+-- [0.15, 0.40,0.40, 0.40, 0.05, 0.30, 0.35, 0.40, 0.05, 0.75, 0.50]
+--
+-- J&M have different figures, but they seem to be wrong. For
+-- instance, the products have 12 terms although the trees have only
+-- 11 constructors.
diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs
index daf382790..1126776c8 100644
--- a/src/GF/Probabilistic/Probabilistic.hs
+++ b/src/GF/Probabilistic/Probabilistic.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 19:02:35 $
+-- > CVS $Date: 2005/11/01 09:10:54 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.3 $
+-- > CVS $Revision: 1.4 $
--
-- Probabilistic abstract syntax. AR 30\/10\/2005
--
@@ -78,12 +78,19 @@ getProbsFromFile :: Options -> FilePath -> IO Probs
getProbsFromFile opts file = do
s <- maybe (readFile file) readFile $ getOptVal opts probFile
return $ buildTree $ concatMap pProb $ lines s
- where
- pProb s = case words s of
+-- where
+pProb s = case words s of
"--#":"prob":f:p:_ | isDouble p -> [(zIdent f, read p)]
- f:p:_ | isDouble p -> [(zIdent f, read p)]
+ f:ps@(g:rest) -> case span (/= "--#") ps of
+ (_,_:"prob":p:_) | isDouble p -> [(zIdent f', readD p)] where
+ f' = if f=="fun" then ident g else ident f
+ _ -> []
_ -> []
+ where
isDouble = all (flip elem ('.':['0'..'9']))
+ ident = takeWhile (flip notElem ".:")
+ readD :: String -> Double
+ readD = read
type Probs = BinTree Ident Double