summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2010-01-26 21:08:04 +0000
committeraarne <aarne@chalmers.se>2010-01-26 21:08:04 +0000
commitdd4c792e67a3124706bef57ab23ff542d2d0d961 (patch)
tree5fc9953ce5b0809568bbdbf3f010d0909477bfe0 /src
parente91c610e5afd0083574d2f28cda07a03fe52ea8f (diff)
probability ranking (rt) and gr -probs=FILE
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Commands.hs46
-rw-r--r--src/runtime/haskell/PGF/Probabilistic.hs19
2 files changed, 59 insertions, 6 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index f537099f8..a032145a8 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -20,6 +20,8 @@ import PGF.Macros
import PGF.Data ----
import PGF.Morphology
import PGF.Printer
+import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
+import PGF.Generate (genRandomProb) ----
import GF.Compile.Export
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
import GF.Infra.UseIO
@@ -42,6 +44,7 @@ import System.Cmd
import Text.PrettyPrint
import Data.List (sort)
import Debug.Trace
+import System.Random (newStdGen) ----
type CommandOutput = ([Expr],String) ---- errors, etc
@@ -245,11 +248,14 @@ allCommands cod env@(pgf, mos) = Map.fromList [
flags = [
("cat","generation category"),
("lang","uses only functions that have linearizations in all these languages"),
- ("number","number of trees generated")
+ ("number","number of trees generated"),
+ ("probs", "file with biased probabilities (format 'f 0.4' one by line)")
],
exec = \opts _ -> do
let pgfr = optRestricted opts
- ts <- generateRandom pgfr (optType opts)
+ gen <- newStdGen
+ mprobs <- optProbs opts pgfr
+ ts <- return $ genRandomProb mprobs gen pgfr (optType opts)
returnFromExprs $ take (optNum opts) ts
}),
("gt", emptyCommandInfo {
@@ -540,6 +546,35 @@ allCommands cod env@(pgf, mos) = Map.fromList [
_ -> return (fromString s),
flags = [("file","the input file name")]
}),
+ ("rt", emptyCommandInfo {
+ longname = "rank_trees",
+ synopsis = "show trees in an order of decreasing probability",
+ explanation = unlines [
+ "Order trees from the most to the least probable, using either",
+ "even distribution in each category (default) or biased as specified",
+ "by the file given by flag -probs=FILE, where each line has the form",
+ "'function probability', e.g. 'youPol_Pron 0.01'."
+ ],
+ exec = \opts ts -> do
+ mprobs <- optProbs opts pgf
+ let probs = maybe (defaultProbabilities pgf) id mprobs
+ let tds = rankTreesByProbs probs ts
+ if isOpt "v" opts
+ then putStrLn $
+ unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds]
+ else return ()
+ returnFromExprs $ map fst tds,
+ flags = [
+ ("probs","probabilities from this file (format 'f 0.6' per line)")
+ ],
+ options = [
+ ("v","show all trees with their probability scores")
+ ],
+ examples = [
+ "p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result",
+ "se utf8 -- set encoding to utf8 (default)"
+ ]
+ }),
("tq", emptyCommandInfo {
longname = "translation_quiz",
synopsis = "start a translation quiz",
@@ -829,6 +864,13 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"" -> []
cats -> mapMaybe readType (chunks ',' cats)
+ optProbs opts pgfr = case valStrOpts "probs" "" opts of
+ "" -> return Nothing
+ file -> do
+ ps <- getProbsFromFile file pgf ---- pgfr!
+-- putStrLn $ prProbabilities ps
+ return $ Just ps
+
optType opts =
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
in case readType str of
diff --git a/src/runtime/haskell/PGF/Probabilistic.hs b/src/runtime/haskell/PGF/Probabilistic.hs
index c0422a784..e42698cfe 100644
--- a/src/runtime/haskell/PGF/Probabilistic.hs
+++ b/src/runtime/haskell/PGF/Probabilistic.hs
@@ -2,8 +2,10 @@ module PGF.Probabilistic (
probTree -- :: Probabilities -> Tree -> Double
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
,Probabilities -- data
+ ,prProbabilities -- Probabilities -> String
,catProbs
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
+ ,defaultProbabilities -- :: PGF -> Probabilities
) where
import PGF.CId
@@ -18,6 +20,10 @@ data Probabilities = Probs {
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
}
+prProbabilities :: Probabilities -> String
+prProbabilities = unlines . map pr . M.toList . funProbs where
+ pr (f,d) = showCId f ++ "\t" ++ show d
+
getProbsFromFile :: FilePath -> PGF -> IO Probabilities
getProbsFromFile file pgf = do
s <- readFile file
@@ -33,8 +39,8 @@ fillProbs pgf funs =
| (cat,_) <- M.toList (cats (abstract pgf)),
let fs = functionsToCat pgf cat]
cats1 = map fill cats0
- funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
- in Probs funs1 (M.fromList cats1)
+ funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
+ in Probs (M.fromList funs1) (M.fromList cats1)
where
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
where
@@ -43,8 +49,13 @@ fillProbs pgf funs =
pad :: [(Double,a)] -> [(Double,a)]
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
where
- deflt = 1 - sum poss / fromIntegral (length negs)
- (poss,negs) = partition (> (-1)) (map fst pfs)
+ deflt = case length negs of
+ 0 -> 0
+ _ -> (1 - sum poss) / fromIntegral (length negs)
+ (poss,negs) = partition (> (-0.5)) (map fst pfs)
+
+defaultProbabilities :: PGF -> Probabilities
+defaultProbabilities pgf = fillProbs pgf M.empty
-- | compute the probability of a given tree
probTree :: Probabilities -> Expr -> Double