summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Commands.hs12
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs6
-rw-r--r--src/compiler/GF/Compile/PGFtoLProlog.hs6
-rw-r--r--src/compiler/GF/Compile/PGFtoProlog.hs2
-rw-r--r--src/compiler/GF/Grammar/Binary.hs6
-rw-r--r--src/compiler/GF/Grammar/Printer.hs4
-rw-r--r--src/compiler/GF/Infra/Option.hs33
-rw-r--r--src/compiler/GF/Speech/VoiceXML.hs2
-rw-r--r--src/compiler/GFC.hs2
-rw-r--r--src/compiler/SimpleEditor/Convert.hs6
-rw-r--r--src/runtime/c/pgf/data.h5
-rw-r--r--src/runtime/c/pgf/parser.c92
-rw-r--r--src/runtime/c/pgf/pgf.c57
-rw-r--r--src/runtime/c/pgf/pgf.h5
-rw-r--r--src/runtime/c/pgf/printer.c2
-rw-r--r--src/runtime/c/pgf/reader.c8
-rw-r--r--src/runtime/c/utils/pgf-translate.c19
-rw-r--r--src/runtime/haskell/PGF.hs12
-rw-r--r--src/runtime/haskell/PGF/Binary.hs4
-rw-r--r--src/runtime/haskell/PGF/Data.hs8
-rw-r--r--src/runtime/haskell/PGF/Macros.hs4
-rw-r--r--src/runtime/haskell/PGF/Printer.hs4
-rw-r--r--src/runtime/haskell/PGF/Probabilistic.hs25
-rw-r--r--src/runtime/haskell/PGF/TypeCheck.hs8
24 files changed, 153 insertions, 179 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 6769b283d..58e485768 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -1134,15 +1134,19 @@ allCommands = Map.fromList [
case arg of
[EFun id] -> case Map.lookup id (funs (abstract pgf)) of
Just fd -> do putStrLn $ render (ppFun id fd)
- putStrLn ("Probability: "++show (probTree pgf (EFun id)))
+ let (_,_,_,prob,_) = fd
+ putStrLn ("Probability: "++show prob)
return void
Nothing -> case Map.lookup id (cats (abstract pgf)) of
- Just hyps -> do putStrLn $
- render (ppCat id hyps $$
+ Just cd -> do putStrLn $
+ render (ppCat id cd $$
if null (functionsToCat pgf id)
then empty
else space $$
- vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id])
+ vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id] $$
+ space)
+ let (_,_,prob,_) = cd
+ putStrLn ("Probability: "++show prob)
return void
Nothing -> do putStrLn ("unknown category of function identifier "++show id)
return void
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index ce31d5dc1..25db4bac7 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -50,12 +50,12 @@ mkCanon2pgf opts gr am = do
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
Look.allOrigInfos gr am
- flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF aflags]
+ flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags]
funs = Map.fromList [(i2i f, (mkType [] ty, mkArrity ma, mkDef pty, 0, addr)) |
((m,f),AbsFun (Just (L _ ty)) ma pty _,addr) <- adefs]
- cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, addr)) |
+ cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0, addr)) |
((m,c),AbsCat (Just (L _ cont)),addr) <- adefs]
catfuns cat =
@@ -69,7 +69,7 @@ mkCanon2pgf opts gr am = do
([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++
Look.allOrigInfos gr cm)
- let flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF cflags]
+ let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
seqs = (mkSetArray . Set.fromList . concat) $
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
diff --git a/src/compiler/GF/Compile/PGFtoLProlog.hs b/src/compiler/GF/Compile/PGFtoLProlog.hs
index 2ab5a26d2..61e28eb17 100644
--- a/src/compiler/GF/Compile/PGFtoLProlog.hs
+++ b/src/compiler/GF/Compile/PGFtoLProlog.hs
@@ -12,7 +12,7 @@ import qualified Data.Map as Map
grammar2lambdaprolog_mod pgf = render $
text "module" <+> ppCId (absname pgf) <> char '.' $$
space $$
- vcat [ppClauses cat fns | (cat,(_,fs,_)) <- Map.toList (cats (abstract pgf)),
+ vcat [ppClauses cat fns | (cat,(_,fs,_,_)) <- Map.toList (cats (abstract pgf)),
let fns = [(f,fromJust (Map.lookup f (funs (abstract pgf)))) | (_,f) <- fs]]
where
ppClauses cat fns =
@@ -25,11 +25,11 @@ grammar2lambdaprolog_mod pgf = render $
grammar2lambdaprolog_sig pgf = render $
text "sig" <+> ppCId (absname pgf) <> char '.' $$
space $$
- vcat [ppCat c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$
+ vcat [ppCat c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$
space $$
vcat [ppFun f ty <> dot | (f,(ty,_,Nothing,_,_)) <- Map.toList (funs (abstract pgf))] $$
space $$
- vcat [ppExport c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$
+ vcat [ppExport c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$
vcat [ppFunPred f (hyps ++ [(Explicit,wildCId,DTyp [] c es)]) <> dot | (f,(DTyp hyps c es,_,Just _,_,_)) <- Map.toList (funs (abstract pgf))]
ppCat :: CId -> [Hypo] -> Doc
diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs
index 061d9e874..8fffd5f07 100644
--- a/src/compiler/GF/Compile/PGFtoProlog.hs
+++ b/src/compiler/GF/Compile/PGFtoProlog.hs
@@ -49,7 +49,7 @@ plAbstract name abs
(f, v) <- Map.assocs (aflags abs)] ++++
plFacts name "cat" 2 "(?Type, ?[X:Type,...])"
[[plType cat args, plHypos hypos'] |
- (cat, (hypos, _, _)) <- Map.assocs (cats abs),
+ (cat, (hypos,_,_,_)) <- Map.assocs (cats abs),
let ((_, subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos,
let args = reverse [EFun x | (_,x) <- subst]] ++++
plFacts name "fun" 3 "(?Fun, ?Type, ?[X:Type,...])"
diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs
index 6641b5718..0e897aa00 100644
--- a/src/compiler/GF/Grammar/Binary.hs
+++ b/src/compiler/GF/Grammar/Binary.hs
@@ -17,6 +17,7 @@ import Data.Binary
--import Control.Monad
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS
+import Text.PrettyPrint (render)
import GF.Data.Operations
import GF.Infra.Ident
@@ -24,9 +25,10 @@ import GF.Infra.Option
import GF.Grammar.Grammar
import PGF() -- Binary instances
+import PGF.Data(ppLit)
-- Please change this every time when the GFO format is changed
-gfoVersion = "GF02"
+gfoVersion = "GF03"
instance Binary Ident where
put id = put (ident2bs id)
@@ -91,7 +93,7 @@ instance Binary ModuleStatus where
instance Binary Options where
put = put . optionsGFO
get = do opts <- get
- case parseModuleOptions ["--" ++ flag ++ "=" ++ value | (flag,value) <- opts] of
+ case parseModuleOptions ["--" ++ flag ++ "=" ++ render (ppLit value) | (flag,value) <- opts] of
Ok x -> return x
Bad msg -> fail msg
diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs
index dba08ce44..e15e6e4d3 100644
--- a/src/compiler/GF/Grammar/Printer.hs
+++ b/src/compiler/GF/Grammar/Printer.hs
@@ -29,7 +29,7 @@ import GF.Infra.Option
import GF.Grammar.Values
import GF.Grammar.Grammar
-import PGF.Data (ppMeta)
+import PGF.Data (ppMeta, ppLit)
import PGF.Printer (ppFId, ppFunId, ppSeqId, ppSeq)
import Text.PrettyPrint
@@ -87,7 +87,7 @@ ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ mseqs jments) =
ppOptions opts =
text "flags" $$
- nest 2 (vcat [text option <+> equals <+> str value <+> semi | (option,value) <- optionsGFO opts])
+ nest 2 (vcat [text option <+> equals <+> ppLit value <+> semi | (option,value) <- optionsGFO opts])
ppJudgement q (id, AbsCat pcont ) =
text "cat" <+> ppIdent id <+>
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index e0cba05e4..1236e729c 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -43,6 +43,7 @@ import GF.Data.ErrM
import Data.Set (Set)
import qualified Data.Set as Set
+import PGF.Data(Literal(..))
usageHeader :: String
usageHeader = unlines
@@ -170,7 +171,9 @@ data Flags = Flags {
optWarnings :: [Warning],
optDump :: [Dump],
optTagsOnly :: Bool,
- optBeamSize :: Maybe Double,
+ optHeuristicFactor :: Maybe Double,
+ optMetaProb :: Maybe Double,
+ optMetaToknProb :: Maybe Double,
optNewComp :: Bool
}
deriving (Show)
@@ -206,16 +209,18 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- Showing options
-- | Pretty-print the options that are preserved in .gfo files.
-optionsGFO :: Options -> [(String,String)]
+optionsGFO :: Options -> [(String,Literal)]
optionsGFO opts = optionsPGF opts
- ++ [("coding", flag optEncoding opts)]
+ ++ [("coding", LStr (flag optEncoding opts))]
-- | Pretty-print the options that are preserved in .pgf files.
-optionsPGF :: Options -> [(String,String)]
+optionsPGF :: Options -> [(String,Literal)]
optionsPGF opts =
- maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
- ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
- ++ maybe [] (\x -> [("beam_size",show x)]) (flag optBeamSize opts)
+ maybe [] (\x -> [("language",LStr x)]) (flag optSpeechLanguage opts)
+ ++ maybe [] (\x -> [("startcat",LStr x)]) (flag optStartCat opts)
+ ++ maybe [] (\x -> [("heuristic_search_factor",LFlt x)]) (flag optHeuristicFactor opts)
+ ++ maybe [] (\x -> [("meta_prob",LFlt x)]) (flag optMetaProb opts)
+ ++ maybe [] (\x -> [("meta_token_prob",LFlt x)]) (flag optMetaToknProb opts)
-- Option manipulation
@@ -272,7 +277,9 @@ defaultFlags = Flags {
optWarnings = [],
optDump = [],
optTagsOnly = False,
- optBeamSize = Nothing,
+ optHeuristicFactor = Nothing,
+ optMetaProb = Nothing,
+ optMetaToknProb = Nothing,
optNewComp =
#ifdef NEW_COMP
True
@@ -358,7 +365,9 @@ optDescr =
Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).",
Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).",
Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...",
- Option [] ["beam_size"] (ReqArg readDouble "SIZE") "Set the beam size for statistical parsing",
+ Option [] ["heuristic_search_factor"] (ReqArg (readDouble (\d o -> o { optHeuristicFactor = Just d })) "FACTOR") "Set the heuristic search factor for statistical parsing",
+ Option [] ["meta_prob"] (ReqArg (readDouble (\d o -> o { optMetaProb = Just d })) "PROB") "Set the probability of introducting a meta variable in the parser",
+ Option [] ["meta_token_prob"] (ReqArg (readDouble (\d o -> o { optMetaToknProb = Just d })) "PROB") "Set the probability for skipping a token in the parser",
Option [] ["new-comp"] (NoArg (set $ \o -> o{optNewComp = True})) "Use the new experimental compiler.",
Option [] ["old-comp"] (NoArg (set $ \o -> o{optNewComp = False})) "Use old trusty compiler.",
dumpOption "source" Source,
@@ -433,9 +442,9 @@ optDescr =
Nothing -> fail $ "Unknown CFG transformation: " ++ x'
++ " Known: " ++ show (map fst cfgTransformNames)
- readDouble x = case reads x of
- [(d,"")] -> set $ \o -> o { optBeamSize = Just d }
- _ -> fail "A floating point number is expected"
+ readDouble f x = case reads x of
+ [(d,"")] -> set $ f d
+ _ -> fail "A floating point number is expected"
dumpOption s d = Option [] ["dump-"++s] (NoArg (set $ \o -> o { optDump = Dump d:optDump o})) ("Dump output of the " ++ s ++ " phase.")
diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs
index 24c2e4d86..3703fe7a2 100644
--- a/src/compiler/GF/Speech/VoiceXML.hs
+++ b/src/compiler/GF/Speech/VoiceXML.hs
@@ -39,7 +39,7 @@ type Skeleton = [(CId, [(CId, [CId])])]
pgfSkeleton :: PGF -> Skeleton
pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType (abstract pgf) f))) | (_,f) <- fs])
- | (c,(_,fs,_)) <- Map.toList (cats (abstract pgf))]
+ | (c,(_,fs,_,_)) <- Map.toList (cats (abstract pgf))]
--
-- * Questions to ask
diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs
index 61d5726b2..f75a39ab1 100644
--- a/src/compiler/GFC.hs
+++ b/src/compiler/GFC.hs
@@ -104,7 +104,7 @@ writeByteCode opts pgf
where
addrs =
[(id,addr) | (id,(_,_,_,_,addr)) <- Map.toList (funs (abstract pgf))] ++
- [(id,addr) | (id,(_,_,addr)) <- Map.toList (cats (abstract pgf))]
+ [(id,addr) | (id,(_,_,_,addr)) <- Map.toList (cats (abstract pgf))]
writePGF :: Options -> PGF -> IOE ()
writePGF opts pgf = do
diff --git a/src/compiler/SimpleEditor/Convert.hs b/src/compiler/SimpleEditor/Convert.hs
index 3554e3342..86580834a 100644
--- a/src/compiler/SimpleEditor/Convert.hs
+++ b/src/compiler/SimpleEditor/Convert.hs
@@ -23,6 +23,7 @@ import GF.Grammar.Printer(ppParams,ppTerm,getAbs,TermPrintQual(..))
import GF.Grammar.Parser(runP,pModDef)
import GF.Grammar.Lexer(Posn(..))
import GF.Data.ErrM
+import PGF.Data(Literal(LStr))
import SimpleEditor.Syntax as S
import SimpleEditor.JSON
@@ -57,7 +58,10 @@ convAbstract (modid,src) =
let cats = reverse cats0
funs = reverse funs0
flags = optionsGFO (mflags src)
- startcat = maybe "-" id $ lookup "startcat" flags
+ startcat =
+ case lookup "startcat" flags of
+ Just (LStr cat) -> cat
+ _ -> "-"
return $ Grammar (convId modid) extends (Abstract startcat cats funs) []
convExtends = mapM convExtend
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h
index 5b0401764..9bc73dd0a 100644
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -87,9 +87,7 @@ typedef struct {
PgfCId name;
PgfHypos* context;
- prob_t meta_prob;
- prob_t meta_token_prob;
- PgfMetaChildMap* meta_child_probs;
+ prob_t prob;
void* predicate;
} PgfAbsCat;
@@ -230,6 +228,7 @@ typedef GuSeq PgfCncFuns;
struct PgfConcr {
PgfCId name;
+ PgfAbstr* abstr;
PgfFlags* cflags;
PgfPrintNames* printnames;
GuMap* ccats;
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 4e4724c75..ec21fc84e 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -63,7 +63,10 @@ typedef struct {
int prod_full_count;
#endif
PgfItem* free_item;
- prob_t beam_size;
+
+ prob_t heuristic_factor;
+ prob_t meta_prob;
+ prob_t meta_token_prob;
} PgfParsing;
typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
@@ -1389,12 +1392,14 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
{
(void) (err);
- PgfAbsCat* abscat = (PgfAbsCat*) key;
- prob_t meta_prob = *((prob_t*) value);
+ PgfAbsCat* abscat = *((PgfAbsCat**) value);
PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn;
PgfParsing* ps = clo->ps;
PgfItem* meta_item = clo->meta_item;
+ if (abscat->prob == INFINITY)
+ return;
+
PgfCncCat* cnccat =
gu_map_get(ps->concr->cnccats, abscat->name, PgfCncCat*);
if (cnccat == NULL)
@@ -1412,7 +1417,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
PgfItem* item =
pgf_item_copy(meta_item, ps);
item->inside_prob +=
- ccat->viterbi_prob+meta_prob;
+ ccat->viterbi_prob+abscat->prob;
size_t nargs = gu_seq_length(meta_item->args);
item->args = gu_new_seq(PgfPArg, nargs+1, ps->pool);
@@ -1698,18 +1703,14 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item)
}
pgf_parsing_complete(ps, item, ep);
} else {
- prob_t meta_token_prob =
- item->conts->ccat->cnccat->abscat->meta_token_prob;
+ prob_t meta_token_prob =
+ ps->meta_token_prob;
if (meta_token_prob != INFINITY) {
pgf_parsing_meta_scan(ps, item, meta_token_prob);
}
- PgfCIdMap* meta_child_probs =
- item->conts->ccat->cnccat->abscat->meta_child_probs;
- if (meta_child_probs != NULL) {
- PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item };
- gu_map_iter(meta_child_probs, &clo.fn, NULL);
- }
+ PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item };
+ gu_map_iter(ps->concr->abstr->cats, &clo.fn, NULL);
}
} else {
pgf_parsing_symbol(ps, item, item->curr_sym);
@@ -1721,22 +1722,38 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item)
}
}
-static prob_t
-pgf_parsing_default_beam_size(PgfConcr* concr)
+static void
+pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr)
{
- PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral);
+ PgfLiteral lit;
- if (gu_variant_is_null(lit))
- return 0;
+ lit =
+ gu_map_get(abstr->aflags, "heuristic_search_factor", PgfLiteral);
+ if (!gu_variant_is_null(lit)) {
+ GuVariantInfo pi = gu_variant_open(lit);
+ gu_assert (pi.tag == PGF_LITERAL_FLT);
+ ps->heuristic_factor = ((PgfLiteralFlt*) pi.data)->val;
+ }
+
+ lit =
+ gu_map_get(abstr->aflags, "meta_prob", PgfLiteral);
+ if (!gu_variant_is_null(lit)) {
+ GuVariantInfo pi = gu_variant_open(lit);
+ gu_assert (pi.tag == PGF_LITERAL_FLT);
+ ps->meta_prob = - log(((PgfLiteralFlt*) pi.data)->val);
+ }
- GuVariantInfo pi = gu_variant_open(lit);
- gu_assert (pi.tag == PGF_LITERAL_FLT);
- return ((PgfLiteralFlt*) pi.data)->val;
+ lit =
+ gu_map_get(abstr->aflags, "meta_token_prob", PgfLiteral);
+ if (!gu_variant_is_null(lit)) {
+ GuVariantInfo pi = gu_variant_open(lit);
+ gu_assert (pi.tag == PGF_LITERAL_FLT);
+ ps->meta_token_prob = - log(((PgfLiteralFlt*) pi.data)->val);
+ }
}
static PgfParsing*
-pgf_new_parsing(PgfConcr* concr,
- GuString sentence, double heuristics,
+pgf_new_parsing(PgfConcr* concr, GuString sentence,
GuPool* pool, GuPool* out_pool)
{
PgfParsing* ps = gu_new(PgfParsing, pool);
@@ -1756,7 +1773,11 @@ pgf_new_parsing(PgfConcr* concr,
ps->prod_full_count = 0;
#endif
ps->free_item = NULL;
- ps->beam_size = heuristics;
+ ps->heuristic_factor = 0;
+ ps->meta_prob = INFINITY;
+ ps->meta_token_prob = INFINITY;
+
+ pgf_parsing_set_default_factors(ps, concr->abstr);
PgfExprMeta *expr_meta =
gu_new_variant(PGF_EXPR_META,
@@ -2107,7 +2128,7 @@ pgf_parse_result_is_new(PgfExprState* st)
// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat
static PgfParsing*
pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
- GuString sentence, double heuristics,
+ GuString sentence, double heuristic_factor,
GuExn* err,
GuPool* pool, GuPool* out_pool)
{
@@ -2121,12 +2142,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
gu_assert(lin_idx < cnccat->n_lins);
- if (heuristics < 0) {
- heuristics = pgf_parsing_default_beam_size(concr);
+ PgfParsing* ps =
+ pgf_new_parsing(concr, sentence, pool, out_pool);
+
+ if (heuristic_factor >= 0) {
+ ps->heuristic_factor = heuristic_factor;
}
- PgfParsing* ps =
- pgf_new_parsing(concr, sentence, heuristics, pool, out_pool);
PgfParseState* state =
pgf_new_parse_state(ps, 0, BIND_SOFT);
@@ -2156,11 +2178,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
}
- PgfItem *item =
- pgf_new_item(ps, conts, ps->meta_prod);
- item->inside_prob =
- ccat->cnccat->abscat->meta_prob;
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ if (ps->meta_prob != INFINITY) {
+ PgfItem *item =
+ pgf_new_item(ps, conts, ps->meta_prod);
+ item->inside_prob =
+ ps->meta_prob;
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ }
}
}
@@ -2200,7 +2224,7 @@ pgf_parsing_proceed(PgfParsing* ps)
prob_t state_delta =
(st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))*
- ps->beam_size;
+ ps->heuristic_factor;
delta_prob += state_delta;
st = st->next;
}
diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c
index e804f5ce7..93dea300a 100644
--- a/src/runtime/c/pgf/pgf.c
+++ b/src/runtime/c/pgf/pgf.c
@@ -35,63 +35,6 @@ pgf_read(const char* fpath,
return pgf;
}
-void
-pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
- GuPool* pool, GuExn* err)
-{
- FILE *fp = fopen(fpath, "r");
- if (!fp) {
- gu_raise_errno(err);
- return;
- }
-
- GuPool* tmp_pool = gu_new_pool();
-
- for (;;) {
- char cat1[21];
- char cat2[21];
- prob_t prob;
-
- if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3)
- break;
-
- prob = - log(prob);
-
- PgfAbsCat* abscat1 =
- gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*);
- if (abscat1 == NULL) {
- GuExnData* exn = gu_raise(err, PgfExn);
- exn->data = "Unknown category name";
- goto close;
- }
-
- if (strcmp(cat2, "*") == 0) {
- abscat1->meta_prob = prob;
- } else if (strcmp(cat2, "_") == 0) {
- abscat1->meta_token_prob = prob;
- } else {
- PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*);
- if (abscat2 == NULL) {
- gu_raise(err, PgfExn);
- GuExnData* exn = gu_raise(err, PgfExn);
- exn->data = "Unknown category name";
- goto close;
- }
-
- if (abscat1->meta_child_probs == NULL) {
- abscat1->meta_child_probs =
- gu_map_type_new(PgfMetaChildMap, pool);
- }
-
- gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob);
- }
- }
-
-close:
- gu_pool_free(tmp_pool);
- fclose(fp);
-}
-
GuString
pgf_abstract_name(PgfPGF* pgf)
{
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 61b8bea6c..ffc293306 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -80,11 +80,6 @@ pgf_read(const char* fpath,
*
*/
-
-void
-pgf_load_meta_child_probs(PgfPGF*, const char* fpath,
- GuPool* pool, GuExn* err);
-
GuString
pgf_abstract_name(PgfPGF*);
diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c
index da7c70d7c..78c2b74db 100644
--- a/src/runtime/c/pgf/printer.c
+++ b/src/runtime/c/pgf/printer.c
@@ -48,7 +48,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
ctxt = next;
}
- gu_printf(out, err, " ; -- %f\n",cat->meta_prob);
+ gu_printf(out, err, " ; -- %f\n", cat->prob);
}
void
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c
index d215f25e1..12605b89a 100644
--- a/src/runtime/c/pgf/reader.c
+++ b/src/runtime/c/pgf/reader.c
@@ -516,10 +516,6 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
gu_return_on_exn(rdr->err, NULL);
}
- abscat->meta_prob = INFINITY;
- abscat->meta_token_prob = INFINITY;
- abscat->meta_child_probs = NULL;
-
GuBuf* functions = gu_new_buf(PgfAbsFun*, rdr->tmp_pool);
size_t n_functions = pgf_read_len(rdr);
@@ -538,6 +534,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
gu_buf_push(functions, PgfAbsFun*, absfun);
}
+ abscat->prob = - log(gu_in_f64be(rdr->in, rdr->err));
+
pgf_jit_predicate(rdr->jit_state, abscats, abscat, functions);
return abscat;
@@ -1155,6 +1153,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun)
pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
+ concr->abstr = abstr;
+
concr->cflags =
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c
index 79a4fdd42..32f8323ab 100644
--- a/src/runtime/c/utils/pgf-translate.c
+++ b/src/runtime/c/utils/pgf-translate.c
@@ -53,18 +53,17 @@ int main(int argc, char* argv[]) {
// Create the pool that is used to allocate everything
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
- if (argc < 5 || argc > 6) {
- fprintf(stderr, "usage: %s pgf cat from-lang to-lang [probs-file]\n", argv[0]);
+ if (argc < 5) {
+ fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
- char* filename = argv[1];
+ GuString filename = argv[1];
GuString cat = argv[2];
-
GuString from_lang = argv[3];
GuString to_lang = argv[4];
-
+
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
@@ -78,16 +77,6 @@ int main(int argc, char* argv[]) {
goto fail;
}
- if (argc == 6) {
- char* meta_probs_filename = argv[5];
- pgf_load_meta_child_probs(pgf, meta_probs_filename, pool, err);
- if (!gu_ok(err)) {
- fprintf(stderr, "Loading meta child probs failed\n");
- status = EXIT_FAILURE;
- goto fail;
- }
- }
-
// Look up the source and destination concrete categories
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
PgfConcr* to_concr = pgf_get_language(pgf, to_lang);
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
index e7e5c53c5..0e3c79f40 100644
--- a/src/runtime/haskell/PGF.hs
+++ b/src/runtime/haskell/PGF.hs
@@ -292,8 +292,8 @@ categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
categoryContext pgf cat =
case Map.lookup cat (cats (abstract pgf)) of
- Just (hypos,_,_) -> Just hypos
- Nothing -> Nothing
+ Just (hypos,_,_,_) -> Just hypos
+ Nothing -> Nothing
startCat pgf = DTyp [] (lookStartCat pgf) []
@@ -301,8 +301,8 @@ functions pgf = Map.keys (funs (abstract pgf))
functionsByCat pgf cat =
case Map.lookup cat (cats (abstract pgf)) of
- Just (_,fns,_) -> map snd fns
- Nothing -> []
+ Just (_,fns,_,_) -> map snd fns
+ Nothing -> []
functionType pgf fun =
case Map.lookup fun (funs (abstract pgf)) of
@@ -325,8 +325,8 @@ browse pgf id = fmap (\def -> (def,producers,consumers)) definition
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
Just (ty,_,Nothing, _,_) -> Just $ render (text "data" <+> ppCId id <+> colon <+> ppType 0 [] ty)
Nothing -> case Map.lookup id (cats (abstract pgf)) of
- Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)))
- Nothing -> Nothing
+ Just (hyps,_,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)))
+ Nothing -> Nothing
(producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
where
diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs
index 202939f04..2debcf12d 100644
--- a/src/runtime/haskell/PGF/Binary.hs
+++ b/src/runtime/haskell/PGF/Binary.hs
@@ -40,13 +40,13 @@ instance Binary CId where
instance Binary Abstr where
put abs = put (aflags abs,
fmap (\(w,x,y,z,_) -> (w,x,y,z)) (funs abs),
- fmap (\(x,y,_) -> (x,y)) (cats abs))
+ fmap (\(x,y,z,_) -> (x,y,z)) (cats abs))
get = do aflags <- get
funs <- get
cats <- get
return (Abstr{ aflags=aflags
, funs=fmap (\(w,x,y,z) -> (w,x,y,z,0)) funs
- , cats=fmap (\(x,y) -> (x,y,0)) cats
+ , cats=fmap (\(x,y,z) -> (x,y,z,0)) cats
, code=BS.empty
})
diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs
index e37b243d0..f5797739f 100644
--- a/src/runtime/haskell/PGF/Data.hs
+++ b/src/runtime/haskell/PGF/Data.hs
@@ -29,10 +29,10 @@ data PGF = PGF {
data Abstr = Abstr {
aflags :: Map.Map CId Literal, -- ^ value of a flag
funs :: Map.Map CId (Type,Int,Maybe [Equation],Double,BCAddr), -- ^ type, arrity and definition of function + probability
- cats :: Map.Map CId ([Hypo],[(Double, CId)],BCAddr), -- ^ 1. context of a category
- -- ^ 2. functions of a category. The order in the list is important,
- -- this is the order in which the type singatures are given in the source.
- -- The termination of the exhaustive generation might depend on this.
+ cats :: Map.Map CId ([Hypo],[(Double, CId)],Double,BCAddr), -- ^ 1. context of a category
+ -- 2. functions of a category. The functions are stored
+ -- in decreasing probability order.
+ -- 3. probability
code :: BS.ByteString
}
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index 830a16674..ce75b1c91 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -67,7 +67,7 @@ functionsToCat :: PGF -> CId -> [(CId,Type)]
functionsToCat pgf cat =
[(f,ty) | (_,f) <- fs, Just (ty,_,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
where
- (_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf
+ (_,fs,_,_) = lookMap ([],[],0,0) cat $ cats $ abstract pgf
-- | List of functions that lack linearizations in the given language.
missingLins :: PGF -> Language -> [CId]
@@ -82,7 +82,7 @@ restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
abstract = abstr {
funs = Map.filterWithKey (\c _ -> cond c) (funs abstr),
- cats = Map.map (\(hyps,fs,addr) -> (hyps,filter (cond . snd) fs,addr)) (cats abstr)
+ cats = Map.map (\(hyps,fs,p,addr) -> (hyps,filter (cond . snd) fs,p,addr)) (cats abstr)
}
} ---- restrict concrs also, might be needed
where
diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs
index 66d8530f0..d3a5ea1d9 100644
--- a/src/runtime/haskell/PGF/Printer.hs
+++ b/src/runtime/haskell/PGF/Printer.hs
@@ -26,8 +26,8 @@ ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$
ppFlag :: CId -> Literal -> Doc
ppFlag flag value = text "flag" <+> ppCId flag <+> char '=' <+> ppLit value <+> char ';'
-ppCat :: CId -> ([Hypo],[(Double,CId)],BCAddr) -> Doc
-ppCat c (hyps,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';'
+ppCat :: CId -> ([Hypo],[(Double,CId)],Double,BCAddr) -> Doc
+ppCat c (hyps,_,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';'
ppFun :: CId -> (Type,Int,Maybe [Equation],Double,BCAddr) -> Doc
ppFun f (t,_,Just eqs,_,_) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t <+> char ';' $$
diff --git a/src/runtime/haskell/PGF/Probabilistic.hs b/src/runtime/haskell/PGF/Probabilistic.hs
index 7f980254b..095ade022 100644
--- a/src/runtime/haskell/PGF/Probabilistic.hs
+++ b/src/runtime/haskell/PGF/Probabilistic.hs
@@ -24,13 +24,14 @@ import Data.Maybe (fromMaybe) --, fromJust
-- the probabilities for the different functions in a grammar.
data Probabilities = Probs {
funProbs :: Map.Map CId Double,
- catProbs :: Map.Map CId [(Double, CId)]
+ catProbs :: Map.Map CId (Double, [(Double, CId)])
}
-- | Renders the probability structure as string
showProbabilities :: Probabilities -> String
-showProbabilities = unlines . map pr . Map.toList . funProbs where
- pr (f,d) = showCId f ++ "\t" ++ show d
+showProbabilities = unlines . concatMap prProb . Map.toList . catProbs where
+ prProb (c,(p,fns)) = pr (p,c) : map pr fns
+ pr (p,f) = showCId f ++ "\t" ++ show p
-- | Reads the probabilities from a file.
-- This should be a text file where on every line
@@ -50,8 +51,12 @@ readProbabilitiesFromFile file pgf = do
-- for the result category.
mkProbabilities :: PGF -> Map.Map CId Double -> Probabilities
mkProbabilities pgf probs =
- let funs1 = Map.fromList [(f,p) | (_,cf) <- Map.toList cats1, (p,f) <- cf]
- cats1 = Map.map (\(_,fs,_) -> sortBy cmpProb (fill fs)) (cats (abstract pgf))
+ let funs1 = Map.fromList [(f,p) | (_,(_,fns)) <- Map.toList cats1, (p,f) <- fns]
+ cats1 = Map.mapWithKey (\c (_,fns,_,_) ->
+ let p' = fromMaybe 0 (Map.lookup c probs)
+ fns' = sortBy cmpProb (fill fns)
+ in (p', fns'))
+ (cats (abstract pgf))
in Probs funs1 cats1
where
cmpProb (p1,_) (p2,_) = compare p2 p1
@@ -71,15 +76,15 @@ defaultProbabilities pgf = mkProbabilities pgf Map.empty
getProbabilities :: PGF -> Probabilities
getProbabilities pgf = Probs {
- funProbs = Map.map (\(_,_,_,p,_) -> p) (funs (abstract pgf)),
- catProbs = Map.map (\(_,fns,_) -> fns) (cats (abstract pgf))
+ funProbs = Map.map (\(_,_,_,p,_) -> p ) (funs (abstract pgf)),
+ catProbs = Map.map (\(_,fns,p,_) -> (p,fns)) (cats (abstract pgf))
}
setProbabilities :: Probabilities -> PGF -> PGF
setProbabilities probs pgf = pgf {
abstract = (abstract pgf) {
- funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df,p,addr)) (funs (abstract pgf)) (funProbs probs),
- cats = mapUnionWith (\(hypos,_,addr) fns -> (hypos,fns,addr)) (cats (abstract pgf)) (catProbs probs)
+ funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df, p,addr)) (funs (abstract pgf)) (funProbs probs),
+ cats = mapUnionWith (\(hypos,_,_,addr) (p,fns) -> (hypos,fns,p,addr)) (cats (abstract pgf)) (catProbs probs)
}}
where
mapUnionWith f map1 map2 =
@@ -102,7 +107,7 @@ rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p)
mkProbDefs :: PGF -> ([[CId]],[(CId,Type,[Equation])])
mkProbDefs pgf =
- let cs = [(c,hyps,fns) | (c,(hyps0,fs,_)) <- Map.toList (cats (abstract pgf)),
+ let cs = [(c,hyps,fns) | (c,(hyps0,fs,_,_)) <- Map.toList (cats (abstract pgf)),
not (elem c [cidString,cidInt,cidFloat]),
let hyps = zipWith (\(bt,_,ty) n -> (bt,mkCId ('v':show n),ty))
hyps0
diff --git a/src/runtime/haskell/PGF/TypeCheck.hs b/src/runtime/haskell/PGF/TypeCheck.hs
index 141189193..e582f97af 100644
--- a/src/runtime/haskell/PGF/TypeCheck.hs
+++ b/src/runtime/haskell/PGF/TypeCheck.hs
@@ -121,8 +121,8 @@ runTcM abstr f ms s = unTcM f abstr (\x ms s cp b -> let (es,xs) = cp b
lookupCatHyps :: CId -> TcM s [Hypo]
lookupCatHyps cat = TcM (\abstr k h ms -> case Map.lookup cat (cats abstr) of
- Just (hyps,_,_) -> k hyps ms
- Nothing -> h (UnknownCat cat))
+ Just (hyps,_,_,_) -> k hyps ms
+ Nothing -> h (UnknownCat cat))
lookupFunType :: CId -> TcM s Type
lookupFunType fun = TcM (\abstr k h ms -> case Map.lookup fun (funs abstr) of
@@ -143,8 +143,8 @@ typeGenerators scope cat = fmap normalize (liftM2 (++) x y)
| cat == cidString = return [(1.0,ELit (LStr "Foo"),TTyp [] (DTyp [] cat []))]
| otherwise = TcM (\abstr k h ms ->
case Map.lookup cat (cats abstr) of
- Just (_,fns,_) -> unTcM (mapM helper fns) abstr k h ms
- Nothing -> h (UnknownCat cat))
+ Just (_,fns,_,_) -> unTcM (mapM helper fns) abstr k h ms
+ Nothing -> h (UnknownCat cat))
helper (p,fn) = do
ty <- lookupFunType fn