summaryrefslogtreecommitdiff
path: root/src/runtime/c
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/c')
-rw-r--r--src/runtime/c/pgf/data.h5
-rw-r--r--src/runtime/c/pgf/parser.c92
-rw-r--r--src/runtime/c/pgf/pgf.c57
-rw-r--r--src/runtime/c/pgf/pgf.h5
-rw-r--r--src/runtime/c/pgf/printer.c2
-rw-r--r--src/runtime/c/pgf/reader.c8
-rw-r--r--src/runtime/c/utils/pgf-translate.c19
7 files changed, 69 insertions, 119 deletions
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h
index 5b0401764..9bc73dd0a 100644
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -87,9 +87,7 @@ typedef struct {
PgfCId name;
PgfHypos* context;
- prob_t meta_prob;
- prob_t meta_token_prob;
- PgfMetaChildMap* meta_child_probs;
+ prob_t prob;
void* predicate;
} PgfAbsCat;
@@ -230,6 +228,7 @@ typedef GuSeq PgfCncFuns;
struct PgfConcr {
PgfCId name;
+ PgfAbstr* abstr;
PgfFlags* cflags;
PgfPrintNames* printnames;
GuMap* ccats;
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 4e4724c75..ec21fc84e 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -63,7 +63,10 @@ typedef struct {
int prod_full_count;
#endif
PgfItem* free_item;
- prob_t beam_size;
+
+ prob_t heuristic_factor;
+ prob_t meta_prob;
+ prob_t meta_token_prob;
} PgfParsing;
typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
@@ -1389,12 +1392,14 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
{
(void) (err);
- PgfAbsCat* abscat = (PgfAbsCat*) key;
- prob_t meta_prob = *((prob_t*) value);
+ PgfAbsCat* abscat = *((PgfAbsCat**) value);
PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn;
PgfParsing* ps = clo->ps;
PgfItem* meta_item = clo->meta_item;
+ if (abscat->prob == INFINITY)
+ return;
+
PgfCncCat* cnccat =
gu_map_get(ps->concr->cnccats, abscat->name, PgfCncCat*);
if (cnccat == NULL)
@@ -1412,7 +1417,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
PgfItem* item =
pgf_item_copy(meta_item, ps);
item->inside_prob +=
- ccat->viterbi_prob+meta_prob;
+ ccat->viterbi_prob+abscat->prob;
size_t nargs = gu_seq_length(meta_item->args);
item->args = gu_new_seq(PgfPArg, nargs+1, ps->pool);
@@ -1698,18 +1703,14 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item)
}
pgf_parsing_complete(ps, item, ep);
} else {
- prob_t meta_token_prob =
- item->conts->ccat->cnccat->abscat->meta_token_prob;
+ prob_t meta_token_prob =
+ ps->meta_token_prob;
if (meta_token_prob != INFINITY) {
pgf_parsing_meta_scan(ps, item, meta_token_prob);
}
- PgfCIdMap* meta_child_probs =
- item->conts->ccat->cnccat->abscat->meta_child_probs;
- if (meta_child_probs != NULL) {
- PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item };
- gu_map_iter(meta_child_probs, &clo.fn, NULL);
- }
+ PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item };
+ gu_map_iter(ps->concr->abstr->cats, &clo.fn, NULL);
}
} else {
pgf_parsing_symbol(ps, item, item->curr_sym);
@@ -1721,22 +1722,38 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item)
}
}
-static prob_t
-pgf_parsing_default_beam_size(PgfConcr* concr)
+static void
+pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr)
{
- PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral);
+ PgfLiteral lit;
- if (gu_variant_is_null(lit))
- return 0;
+ lit =
+ gu_map_get(abstr->aflags, "heuristic_search_factor", PgfLiteral);
+ if (!gu_variant_is_null(lit)) {
+ GuVariantInfo pi = gu_variant_open(lit);
+ gu_assert (pi.tag == PGF_LITERAL_FLT);
+ ps->heuristic_factor = ((PgfLiteralFlt*) pi.data)->val;
+ }
+
+ lit =
+ gu_map_get(abstr->aflags, "meta_prob", PgfLiteral);
+ if (!gu_variant_is_null(lit)) {
+ GuVariantInfo pi = gu_variant_open(lit);
+ gu_assert (pi.tag == PGF_LITERAL_FLT);
+ ps->meta_prob = - log(((PgfLiteralFlt*) pi.data)->val);
+ }
- GuVariantInfo pi = gu_variant_open(lit);
- gu_assert (pi.tag == PGF_LITERAL_FLT);
- return ((PgfLiteralFlt*) pi.data)->val;
+ lit =
+ gu_map_get(abstr->aflags, "meta_token_prob", PgfLiteral);
+ if (!gu_variant_is_null(lit)) {
+ GuVariantInfo pi = gu_variant_open(lit);
+ gu_assert (pi.tag == PGF_LITERAL_FLT);
+ ps->meta_token_prob = - log(((PgfLiteralFlt*) pi.data)->val);
+ }
}
static PgfParsing*
-pgf_new_parsing(PgfConcr* concr,
- GuString sentence, double heuristics,
+pgf_new_parsing(PgfConcr* concr, GuString sentence,
GuPool* pool, GuPool* out_pool)
{
PgfParsing* ps = gu_new(PgfParsing, pool);
@@ -1756,7 +1773,11 @@ pgf_new_parsing(PgfConcr* concr,
ps->prod_full_count = 0;
#endif
ps->free_item = NULL;
- ps->beam_size = heuristics;
+ ps->heuristic_factor = 0;
+ ps->meta_prob = INFINITY;
+ ps->meta_token_prob = INFINITY;
+
+ pgf_parsing_set_default_factors(ps, concr->abstr);
PgfExprMeta *expr_meta =
gu_new_variant(PGF_EXPR_META,
@@ -2107,7 +2128,7 @@ pgf_parse_result_is_new(PgfExprState* st)
// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat
static PgfParsing*
pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
- GuString sentence, double heuristics,
+ GuString sentence, double heuristic_factor,
GuExn* err,
GuPool* pool, GuPool* out_pool)
{
@@ -2121,12 +2142,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
gu_assert(lin_idx < cnccat->n_lins);
- if (heuristics < 0) {
- heuristics = pgf_parsing_default_beam_size(concr);
+ PgfParsing* ps =
+ pgf_new_parsing(concr, sentence, pool, out_pool);
+
+ if (heuristic_factor >= 0) {
+ ps->heuristic_factor = heuristic_factor;
}
- PgfParsing* ps =
- pgf_new_parsing(concr, sentence, heuristics, pool, out_pool);
PgfParseState* state =
pgf_new_parse_state(ps, 0, BIND_SOFT);
@@ -2156,11 +2178,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
}
- PgfItem *item =
- pgf_new_item(ps, conts, ps->meta_prod);
- item->inside_prob =
- ccat->cnccat->abscat->meta_prob;
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ if (ps->meta_prob != INFINITY) {
+ PgfItem *item =
+ pgf_new_item(ps, conts, ps->meta_prod);
+ item->inside_prob =
+ ps->meta_prob;
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ }
}
}
@@ -2200,7 +2224,7 @@ pgf_parsing_proceed(PgfParsing* ps)
prob_t state_delta =
(st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))*
- ps->beam_size;
+ ps->heuristic_factor;
delta_prob += state_delta;
st = st->next;
}
diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c
index e804f5ce7..93dea300a 100644
--- a/src/runtime/c/pgf/pgf.c
+++ b/src/runtime/c/pgf/pgf.c
@@ -35,63 +35,6 @@ pgf_read(const char* fpath,
return pgf;
}
-void
-pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
- GuPool* pool, GuExn* err)
-{
- FILE *fp = fopen(fpath, "r");
- if (!fp) {
- gu_raise_errno(err);
- return;
- }
-
- GuPool* tmp_pool = gu_new_pool();
-
- for (;;) {
- char cat1[21];
- char cat2[21];
- prob_t prob;
-
- if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3)
- break;
-
- prob = - log(prob);
-
- PgfAbsCat* abscat1 =
- gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*);
- if (abscat1 == NULL) {
- GuExnData* exn = gu_raise(err, PgfExn);
- exn->data = "Unknown category name";
- goto close;
- }
-
- if (strcmp(cat2, "*") == 0) {
- abscat1->meta_prob = prob;
- } else if (strcmp(cat2, "_") == 0) {
- abscat1->meta_token_prob = prob;
- } else {
- PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*);
- if (abscat2 == NULL) {
- gu_raise(err, PgfExn);
- GuExnData* exn = gu_raise(err, PgfExn);
- exn->data = "Unknown category name";
- goto close;
- }
-
- if (abscat1->meta_child_probs == NULL) {
- abscat1->meta_child_probs =
- gu_map_type_new(PgfMetaChildMap, pool);
- }
-
- gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob);
- }
- }
-
-close:
- gu_pool_free(tmp_pool);
- fclose(fp);
-}
-
GuString
pgf_abstract_name(PgfPGF* pgf)
{
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 61b8bea6c..ffc293306 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -80,11 +80,6 @@ pgf_read(const char* fpath,
*
*/
-
-void
-pgf_load_meta_child_probs(PgfPGF*, const char* fpath,
- GuPool* pool, GuExn* err);
-
GuString
pgf_abstract_name(PgfPGF*);
diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c
index da7c70d7c..78c2b74db 100644
--- a/src/runtime/c/pgf/printer.c
+++ b/src/runtime/c/pgf/printer.c
@@ -48,7 +48,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
ctxt = next;
}
- gu_printf(out, err, " ; -- %f\n",cat->meta_prob);
+ gu_printf(out, err, " ; -- %f\n", cat->prob);
}
void
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c
index d215f25e1..12605b89a 100644
--- a/src/runtime/c/pgf/reader.c
+++ b/src/runtime/c/pgf/reader.c
@@ -516,10 +516,6 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
gu_return_on_exn(rdr->err, NULL);
}
- abscat->meta_prob = INFINITY;
- abscat->meta_token_prob = INFINITY;
- abscat->meta_child_probs = NULL;
-
GuBuf* functions = gu_new_buf(PgfAbsFun*, rdr->tmp_pool);
size_t n_functions = pgf_read_len(rdr);
@@ -538,6 +534,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
gu_buf_push(functions, PgfAbsFun*, absfun);
}
+ abscat->prob = - log(gu_in_f64be(rdr->in, rdr->err));
+
pgf_jit_predicate(rdr->jit_state, abscats, abscat, functions);
return abscat;
@@ -1155,6 +1153,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun)
pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
+ concr->abstr = abstr;
+
concr->cflags =
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c
index 79a4fdd42..32f8323ab 100644
--- a/src/runtime/c/utils/pgf-translate.c
+++ b/src/runtime/c/utils/pgf-translate.c
@@ -53,18 +53,17 @@ int main(int argc, char* argv[]) {
// Create the pool that is used to allocate everything
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
- if (argc < 5 || argc > 6) {
- fprintf(stderr, "usage: %s pgf cat from-lang to-lang [probs-file]\n", argv[0]);
+ if (argc < 5) {
+ fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
- char* filename = argv[1];
+ GuString filename = argv[1];
GuString cat = argv[2];
-
GuString from_lang = argv[3];
GuString to_lang = argv[4];
-
+
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
@@ -78,16 +77,6 @@ int main(int argc, char* argv[]) {
goto fail;
}
- if (argc == 6) {
- char* meta_probs_filename = argv[5];
- pgf_load_meta_child_probs(pgf, meta_probs_filename, pool, err);
- if (!gu_ok(err)) {
- fprintf(stderr, "Loading meta child probs failed\n");
- status = EXIT_FAILURE;
- goto fail;
- }
- }
-
// Look up the source and destination concrete categories
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
PgfConcr* to_concr = pgf_get_language(pgf, to_lang);