summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <unknown>2005-10-31 18:02:34 +0000
committeraarne <unknown>2005-10-31 18:02:34 +0000
commitf06638cc7d90eb8298180d36e79fc292a9f898dc (patch)
treeb33a7459a5e777a319c3d85dbf21da62b8a34358 /src
parent94f87d85023fc9b0e759600435e3c85cf31e3bc4 (diff)
probabilities in ShellState
Diffstat (limited to 'src')
-rw-r--r--src/GF/API/IOGrammar.hs21
-rw-r--r--src/GF/Compile/ShellState.hs11
-rw-r--r--src/GF/Probabilistic/Probabilistic.hs44
-rw-r--r--src/GF/Shell.hs21
-rw-r--r--src/GF/Shell/HelpFile.hs11
-rw-r--r--src/GF/Shell/ShellCommands.hs14
-rw-r--r--src/GF/UseGrammar/Custom.hs8
-rw-r--r--src/HelpFile7
8 files changed, 79 insertions, 58 deletions
diff --git a/src/GF/API/IOGrammar.hs b/src/GF/API/IOGrammar.hs
index 987800e16..f06799da1 100644
--- a/src/GF/API/IOGrammar.hs
+++ b/src/GF/API/IOGrammar.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/04/21 16:45:58 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.18 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.19 $
--
-- for reading grammars and terms from strings and files
-----------------------------------------------------------------------------
@@ -21,6 +21,7 @@ import GF.Compile.PGrammar
import GF.Grammar.TypeCheck
import GF.Compile.Compile
import GF.Compile.ShellState
+import GF.Probabilistic.Probabilistic
import GF.Infra.Modules
import GF.Infra.ReadFiles (isOldFile)
@@ -50,7 +51,9 @@ string2annotTree gr m = annotate gr . string2absTerm (prt m) ---- prt
---string2paramList st = map (renameTrm (lookupConcrete st) . patt2term) . pPattList
shellStateFromFiles :: Options -> ShellState -> FilePath -> IOE ShellState
-shellStateFromFiles opts st file = case fileSuffix file of
+shellStateFromFiles opts st file = do
+ let top = identC $ justModuleName file
+ sh <- case fileSuffix file of
"gfcm" -> do
cenv <- compileOne opts (compileEnvShSt st []) file
ioeErr $ updateShellState opts Nothing st cenv
@@ -66,10 +69,14 @@ shellStateFromFiles opts st file = case fileSuffix file of
then addOptions (options []) opts' -- for old no emit
else addOptions (options [emitCode]) opts'
grts <- compileModule osb st file
- let top = identC $ justModuleName file
- mtop = if oElem showOld opts' then Nothing else Just top
+ let mtop = if oElem showOld opts' then Nothing else Just top
ioeErr $ updateShellState opts' mtop st grts
- --- liftM (changeModTimes rts) $ grammar2shellState opts gr
+ if (isSetFlag opts probFile || oElem (iOpt "prob") opts)
+ then do
+ probs <- ioeIO $ getProbsFromFile opts file
+ let lang = maybe top id $ concrete sh --- to work with cf, too
+ ioeErr $ addProbs (lang,probs) sh
+ else return sh
getShellStateFromFiles :: Options -> FilePath -> IO ShellState
getShellStateFromFiles os =
diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs
index e00e2e477..2d87bdf67 100644
--- a/src/GF/Compile/ShellState.hs
+++ b/src/GF/Compile/ShellState.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/30 23:44:00 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.49 $
+-- > CVS $Revision: 1.50 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
@@ -462,6 +462,13 @@ abstractOfState = maybe emptyAbstractST id . maybeStateAbstract
stateIsWord :: StateGrammar -> String -> Bool
stateIsWord sg = isKnownWord (stateMorpho sg)
+addProbs :: (Ident,Probs) -> ShellState -> Err ShellState
+addProbs ip@(lang,probs)
+ sh@(ShSt x y cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) = do
+ let gr = grammarOfLang sh lang
+ probs' <- checkGrammarProbs gr probs
+ let pbs' = (lang,probs') : filter ((/= lang) . fst) pbs
+ return (ShSt x y cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs' os rs acs s)
{-
diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs
index 81f9a60d0..daf382790 100644
--- a/src/GF/Probabilistic/Probabilistic.hs
+++ b/src/GF/Probabilistic/Probabilistic.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 08:12:18 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.2 $
+-- > CVS $Revision: 1.3 $
--
-- Probabilistic abstract syntax. AR 30\/10\/2005
--
@@ -26,6 +26,7 @@ module GF.Probabilistic.Probabilistic (
,Probs -- = BinTree Ident Double
,getProbsFromFile -- :: Opts -> IO Probs
,emptyProbs -- :: Probs
+ ,prProbs -- :: Probs -> String
) where
import GF.Canon.GFC
@@ -54,8 +55,10 @@ generateRandomTreesProb opts gen gr probs cat =
cat' = prt $ snd cat
-- | check that probabilities attached to a grammar make sense
-checkGrammarProbs :: GFCGrammar -> Probs -> Err ()
-checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs
+checkGrammarProbs :: GFCGrammar -> Probs -> Err Probs
+checkGrammarProbs gr probs =
+ err Bad (return . gr2probs) $ checkSGrammar $ gr2sgr gr probs where
+ gr2probs sgr = buildTree [(zIdent f,p) | (_,rs) <- tree2list sgr, ((p,f),_) <- rs]
-- | compute the probability of a given tree
computeProbTree :: Probs -> Tree -> Double
@@ -71,14 +74,14 @@ computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of
rankByScore :: Ord n => [(a,n)] -> [(a,n)]
rankByScore = sortBy (\ (_,p) (_,q) -> compare q p)
-getProbsFromFile :: Options -> IO Probs
-getProbsFromFile opts = do
- s <- maybe (return "") readFile $ getOptVal opts probFile
+getProbsFromFile :: Options -> FilePath -> IO Probs
+getProbsFromFile opts file = do
+ s <- maybe (readFile file) readFile $ getOptVal opts probFile
return $ buildTree $ concatMap pProb $ lines s
where
pProb s = case words s of
- "--":f:p:_ | isDouble p -> [(zIdent f, read p)]
- f:p:_ | isDouble p -> [(zIdent f, read p)]
+ "--#":"prob":f:p:_ | isDouble p -> [(zIdent f, read p)]
+ f:p:_ | isDouble p -> [(zIdent f, read p)]
_ -> []
isDouble = all (flip elem ('.':['0'..'9']))
@@ -86,7 +89,11 @@ type Probs = BinTree Ident Double
emptyProbs :: Probs
emptyProbs = emptyBinTree
-
+
+prProbs :: Probs -> String
+prProbs = unlines . map pr . tree2list where
+ pr (f,p) = prt f ++ "\t" ++ show p
+
------------------------------------------
-- translate grammar to simpler form and generated trees back
@@ -151,21 +158,14 @@ genTree :: [Double] -> SGrammar -> SCat -> (STree,Int)
genTree rs gr = gett rs where
gett ds "String" = (SString "foo",1)
gett ds "Int" = (SInt 1978,1)
- gett ds cat = let
+ gett ds cat = case look cat of
+ [] -> (SMeta cat,1) -- if no productions, return ?
+ fs -> let
d:ds2 = ds
- (pf,args) = getf d cat
+ (pf,args) = getf d fs
(ts,k) = getts ds2 args
in (SApp (pf,ts), k+1)
- getf d cat =
- let
- regs0 = [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat]
-{- not needed
- pstd = 1.0 / genericLength regs
- regs = if any (>1.0) (map fst regs0)
- then [(pstd,pa) | (_,pa) <- regs0]
- else regs0
--}
- in hitRegion d regs0
+ getf d fs = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- fs]
getts ds cats = case cats of
c:cs -> let
(t, k) = gett ds c
diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs
index 03a47a05c..488504c65 100644
--- a/src/GF/Shell.hs
+++ b/src/GF/Shell.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 08:12:18 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.48 $
+-- > CVS $Revision: 1.49 $
--
-- GF shell command interpreter.
-----------------------------------------------------------------------------
@@ -222,8 +222,8 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
let p = optParseArgErrMsg opts gro x
case p of
Ok (ts,msg)
- | isSetFlag opts probFile -> do
- probs <- getProbsFromFile opts
+ | oElem (iOpt "prob") opts -> do
+ let probs = stateProbs gro
let tps = rankByScore [(t,computeProbTree probs t) | t <- ts]
putStrLnFlush msg
mapM_ putStrLnFlush [show p | (t,p) <- tps]
@@ -235,17 +235,8 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
let a' = opST2CommandArg (optParseArgErr opts (sgr il)) a
returnArg (opTS2CommandArg (optLinearizeTreeVal opts (sgr ol)) a') sa
-
- CGenerateRandom | isSetFlag opts probFile -> do
- probs <- getProbsFromFile opts
- let cat = firstAbsCat opts gro
- let n = optIntOrN opts flagNumber 1
- gen <- newStdGen
- let ts = take n $ generateRandomTreesProb opts gen cgr probs cat
- returnArg (ATrms (map (term2tree gro) ts)) sa
-
- CGenerateRandom | oElem showCF opts -> do
- let probs = emptyProbs ---
+ CGenerateRandom | oElem showCF opts || oElem (iOpt "prob") opts -> do
+ let probs = stateProbs gro
let cat = firstAbsCat opts gro
let n = optIntOrN opts flagNumber 1
gen <- newStdGen
diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs
index b139ba647..e2216ce64 100644
--- a/src/GF/Shell/HelpFile.hs
+++ b/src/GF/Shell/HelpFile.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/12 12:38:30 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.17 $
+-- > CVS $Revision: 1.18 $
--
-- Help on shell commands. Generated from HelpFile by 'make help'.
-- PLEASE DON'T EDIT THIS FILE.
@@ -63,6 +63,7 @@ txtHelpFile =
"\n -noemit do not emit code (default with old grammar format)" ++
"\n -o do emit code (default with new grammar format)" ++
"\n -ex preprocess .gfe files if needed" ++
+ "\n -prob read probabilities from top grammar file (format --# prob Fun Double)" ++
"\n flags:" ++
"\n -abs set the name used for abstract syntax (with -old option)" ++
"\n -cnc set the name used for concrete syntax (with -old option)" ++
@@ -70,6 +71,7 @@ txtHelpFile =
"\n -path use the (colon-separated) search path to find modules" ++
"\n -optimize select an optimization to override file-defined flags" ++
"\n -conversion select parsing method (values strict|nondet)" ++
+ "\n -probs read probabilities from file (format (--# prob) Fun Double)" ++
"\n examples:" ++
"\n i English.gf -- ordinary import of Concrete" ++
"\n i -retain german/ParadigmsGer.gf -- import of Resource to test" ++
@@ -194,6 +196,7 @@ txtHelpFile =
"\n options for batch input:" ++
"\n -lines parse each line of input separately, ignoring empty lines" ++
"\n -all as -lines, but also parse empty lines" ++
+ "\n -prob rank results by probability" ++
"\n options for selecting parsing method:" ++
"\n (default)parse using an overgenerating CFG" ++
"\n -cfg parse using a much less overgenerating CFG" ++
@@ -270,6 +273,9 @@ txtHelpFile =
"\n Generates a random Tree of a given category. If a Tree" ++
"\n argument is given, the command completes the Tree with values to" ++
"\n the metavariables in the tree. " ++
+ "\n options:" ++
+ "\n -prob use probabilities (works for nondep types only)" ++
+ "\n -cf use a very fast method (works for nondep types only)" ++
"\n flags:" ++
"\n -cat generate in this category" ++
"\n -lang use the abstract syntax of this grammar" ++
@@ -566,6 +572,7 @@ txtHelpFile =
"\n *-printer=xml XML: DTD for the pg command, object for st" ++
"\n -printer=old old GF: file readable by GF 1.2" ++
"\n -printer=stat show some statistics of generated GFC" ++
+ "\n -printer=probs show probabilities of all functions" ++
"\n -printer=gsl Nuance GSL speech recognition grammar" ++
"\n -printer=jsgf Java Speech Grammar Format" ++
"\n -printer=slf a finite automaton in the HTK SLF format" ++
diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs
index eac97b22c..c5b0c479e 100644
--- a/src/GF/Shell/ShellCommands.hs
+++ b/src/GF/Shell/ShellCommands.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 08:12:18 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.44 $
+-- > CVS $Revision: 1.45 $
--
-- The datatype of shell commands and the list of their options.
-----------------------------------------------------------------------------
@@ -165,18 +165,18 @@ optionsOfCommand co = case co of
CSetFlag -> both "utf8 table struct record all multi"
"cat lang lexer parser number depth rawtrees unlexer optimize path conversion printer"
- CImport _ -> both "old v s src retain nocf nocheckcirc cflexer noemit o ex"
- "abs cnc res path optimize conversion cat"
+ CImport _ -> both "old v s src retain nocf nocheckcirc cflexer noemit o ex prob"
+ "abs cnc res path optimize conversion cat probs"
CRemoveLanguage _ -> none
CEmptyState -> none
CStripState -> none
CTransformGrammar _ -> flags "printer"
CConvertLatex _ -> none
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer"
- CParse -> both "new newer cfg mcfg n ign raw v lines all"
- "cat lang lexer parser number rawtrees probs"
+ CParse -> both "new newer cfg mcfg n ign raw v lines all prob"
+ "cat lang lexer parser number rawtrees"
CTranslate _ _ -> opts "cat lexer parser"
- CGenerateRandom -> flags "cat lang number depth probs"
+ CGenerateRandom -> both "cf prob" "cat lang number depth"
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
CPutTerm -> flags "transform number"
CWrapTerm _ -> opts "c"
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 9887a2371..67ed388f8 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 16:48:10 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.79 $
+-- > CVS $Date: 2005/10/31 19:02:35 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.80 $
--
-- A database for customizable GF shell commands.
--
@@ -68,6 +68,7 @@ import GF.UseGrammar.Information
import GF.API.GrammarToHaskell
-----import GrammarToCanon (showCanon, showCanonOpt)
-----import qualified GrammarToGFC as GFC
+import GF.Probabilistic.Probabilistic (prProbs)
-- the cf parsing algorithms
import GF.CF.ChartParser -- OBSOLETE
@@ -266,6 +267,7 @@ customGrammarPrinter =
,(strCI "words", unwords . stateGrammarWords)
,(strCI "printnames", C.prPrintnamesGrammar . stateGrammarST)
,(strCI "stat", prStatistics . stateGrammarST)
+ ,(strCI "probs", prProbs . stateProbs)
,(strCI "unpar", prCanon . unparametrizeCanon . stateGrammarST)
,(strCI "subs", prSubtermStat . stateGrammarST)
diff --git a/src/HelpFile b/src/HelpFile
index c28a9d2fc..573191204 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -34,6 +34,7 @@ i, import: i File
-noemit do not emit code (default with old grammar format)
-o do emit code (default with new grammar format)
-ex preprocess .gfe files if needed
+ -prob read probabilities from top grammar file (format --# prob Fun Double)
flags:
-abs set the name used for abstract syntax (with -old option)
-cnc set the name used for concrete syntax (with -old option)
@@ -41,6 +42,7 @@ i, import: i File
-path use the (colon-separated) search path to find modules
-optimize select an optimization to override file-defined flags
-conversion select parsing method (values strict|nondet)
+ -probs read probabilities from file (format (--# prob) Fun Double)
examples:
i English.gf -- ordinary import of Concrete
i -retain german/ParadigmsGer.gf -- import of Resource to test
@@ -165,6 +167,7 @@ p, parse: p String
options for batch input:
-lines parse each line of input separately, ignoring empty lines
-all as -lines, but also parse empty lines
+ -prob rank results by probability
options for selecting parsing method:
(default)parse using an overgenerating CFG
-cfg parse using a much less overgenerating CFG
@@ -241,6 +244,9 @@ gr, generate_random: gr Tree?
Generates a random Tree of a given category. If a Tree
argument is given, the command completes the Tree with values to
the metavariables in the tree.
+ options:
+ -prob use probabilities (works for nondep types only)
+ -cf use a very fast method (works for nondep types only)
flags:
-cat generate in this category
-lang use the abstract syntax of this grammar
@@ -537,6 +543,7 @@ q, quit: q
*-printer=xml XML: DTD for the pg command, object for st
-printer=old old GF: file readable by GF 1.2
-printer=stat show some statistics of generated GFC
+ -printer=probs show probabilities of all functions
-printer=gsl Nuance GSL speech recognition grammar
-printer=jsgf Java Speech Grammar Format
-printer=slf a finite automaton in the HTK SLF format