diff options
Diffstat (limited to 'src/runtime/c')
| -rw-r--r-- | src/runtime/c/pgf/data.h | 5 | ||||
| -rw-r--r-- | src/runtime/c/pgf/parser.c | 92 | ||||
| -rw-r--r-- | src/runtime/c/pgf/pgf.c | 57 | ||||
| -rw-r--r-- | src/runtime/c/pgf/pgf.h | 5 | ||||
| -rw-r--r-- | src/runtime/c/pgf/printer.c | 2 | ||||
| -rw-r--r-- | src/runtime/c/pgf/reader.c | 8 | ||||
| -rw-r--r-- | src/runtime/c/utils/pgf-translate.c | 19 |
7 files changed, 69 insertions, 119 deletions
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 5b0401764..9bc73dd0a 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -87,9 +87,7 @@ typedef struct { PgfCId name; PgfHypos* context; - prob_t meta_prob; - prob_t meta_token_prob; - PgfMetaChildMap* meta_child_probs; + prob_t prob; void* predicate; } PgfAbsCat; @@ -230,6 +228,7 @@ typedef GuSeq PgfCncFuns; struct PgfConcr { PgfCId name; + PgfAbstr* abstr; PgfFlags* cflags; PgfPrintNames* printnames; GuMap* ccats; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 4e4724c75..ec21fc84e 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -63,7 +63,10 @@ typedef struct { int prod_full_count; #endif PgfItem* free_item; - prob_t beam_size; + + prob_t heuristic_factor; + prob_t meta_prob; + prob_t meta_token_prob; } PgfParsing; typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE; @@ -1389,12 +1392,14 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err { (void) (err); - PgfAbsCat* abscat = (PgfAbsCat*) key; - prob_t meta_prob = *((prob_t*) value); + PgfAbsCat* abscat = *((PgfAbsCat**) value); PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn; PgfParsing* ps = clo->ps; PgfItem* meta_item = clo->meta_item; + if (abscat->prob == INFINITY) + return; + PgfCncCat* cnccat = gu_map_get(ps->concr->cnccats, abscat->name, PgfCncCat*); if (cnccat == NULL) @@ -1412,7 +1417,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err PgfItem* item = pgf_item_copy(meta_item, ps); item->inside_prob += - ccat->viterbi_prob+meta_prob; + ccat->viterbi_prob+abscat->prob; size_t nargs = gu_seq_length(meta_item->args); item->args = gu_new_seq(PgfPArg, nargs+1, ps->pool); @@ -1698,18 +1703,14 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item) } pgf_parsing_complete(ps, item, ep); } else { - prob_t meta_token_prob = - item->conts->ccat->cnccat->abscat->meta_token_prob; + prob_t meta_token_prob = + ps->meta_token_prob; if (meta_token_prob != INFINITY) { pgf_parsing_meta_scan(ps, item, meta_token_prob); } - PgfCIdMap* meta_child_probs = - item->conts->ccat->cnccat->abscat->meta_child_probs; - if (meta_child_probs != NULL) { - PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item }; - gu_map_iter(meta_child_probs, &clo.fn, NULL); - } + PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item }; + gu_map_iter(ps->concr->abstr->cats, &clo.fn, NULL); } } else { pgf_parsing_symbol(ps, item, item->curr_sym); @@ -1721,22 +1722,38 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item) } } -static prob_t -pgf_parsing_default_beam_size(PgfConcr* concr) +static void +pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr) { - PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral); + PgfLiteral lit; - if (gu_variant_is_null(lit)) - return 0; + lit = + gu_map_get(abstr->aflags, "heuristic_search_factor", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->heuristic_factor = ((PgfLiteralFlt*) pi.data)->val; + } + + lit = + gu_map_get(abstr->aflags, "meta_prob", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->meta_prob = - log(((PgfLiteralFlt*) pi.data)->val); + } - GuVariantInfo pi = gu_variant_open(lit); - gu_assert (pi.tag == PGF_LITERAL_FLT); - return ((PgfLiteralFlt*) pi.data)->val; + lit = + gu_map_get(abstr->aflags, "meta_token_prob", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->meta_token_prob = - log(((PgfLiteralFlt*) pi.data)->val); + } } static PgfParsing* -pgf_new_parsing(PgfConcr* concr, - GuString sentence, double heuristics, +pgf_new_parsing(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool) { PgfParsing* ps = gu_new(PgfParsing, pool); @@ -1756,7 +1773,11 @@ pgf_new_parsing(PgfConcr* concr, ps->prod_full_count = 0; #endif ps->free_item = NULL; - ps->beam_size = heuristics; + ps->heuristic_factor = 0; + ps->meta_prob = INFINITY; + ps->meta_token_prob = INFINITY; + + pgf_parsing_set_default_factors(ps, concr->abstr); PgfExprMeta *expr_meta = gu_new_variant(PGF_EXPR_META, @@ -2107,7 +2128,7 @@ pgf_parse_result_is_new(PgfExprState* st) // TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat static PgfParsing* pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, - GuString sentence, double heuristics, + GuString sentence, double heuristic_factor, GuExn* err, GuPool* pool, GuPool* out_pool) { @@ -2121,12 +2142,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, gu_assert(lin_idx < cnccat->n_lins); - if (heuristics < 0) { - heuristics = pgf_parsing_default_beam_size(concr); + PgfParsing* ps = + pgf_new_parsing(concr, sentence, pool, out_pool); + + if (heuristic_factor >= 0) { + ps->heuristic_factor = heuristic_factor; } - PgfParsing* ps = - pgf_new_parsing(concr, sentence, heuristics, pool, out_pool); PgfParseState* state = pgf_new_parse_state(ps, 0, BIND_SOFT); @@ -2156,11 +2178,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); } - PgfItem *item = - pgf_new_item(ps, conts, ps->meta_prod); - item->inside_prob = - ccat->cnccat->abscat->meta_prob; - gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + if (ps->meta_prob != INFINITY) { + PgfItem *item = + pgf_new_item(ps, conts, ps->meta_prod); + item->inside_prob = + ps->meta_prob; + gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + } } } @@ -2200,7 +2224,7 @@ pgf_parsing_proceed(PgfParsing* ps) prob_t state_delta = (st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))* - ps->beam_size; + ps->heuristic_factor; delta_prob += state_delta; st = st->next; } diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index e804f5ce7..93dea300a 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -35,63 +35,6 @@ pgf_read(const char* fpath, return pgf; } -void -pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, - GuPool* pool, GuExn* err) -{ - FILE *fp = fopen(fpath, "r"); - if (!fp) { - gu_raise_errno(err); - return; - } - - GuPool* tmp_pool = gu_new_pool(); - - for (;;) { - char cat1[21]; - char cat2[21]; - prob_t prob; - - if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3) - break; - - prob = - log(prob); - - PgfAbsCat* abscat1 = - gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*); - if (abscat1 == NULL) { - GuExnData* exn = gu_raise(err, PgfExn); - exn->data = "Unknown category name"; - goto close; - } - - if (strcmp(cat2, "*") == 0) { - abscat1->meta_prob = prob; - } else if (strcmp(cat2, "_") == 0) { - abscat1->meta_token_prob = prob; - } else { - PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*); - if (abscat2 == NULL) { - gu_raise(err, PgfExn); - GuExnData* exn = gu_raise(err, PgfExn); - exn->data = "Unknown category name"; - goto close; - } - - if (abscat1->meta_child_probs == NULL) { - abscat1->meta_child_probs = - gu_map_type_new(PgfMetaChildMap, pool); - } - - gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob); - } - } - -close: - gu_pool_free(tmp_pool); - fclose(fp); -} - GuString pgf_abstract_name(PgfPGF* pgf) { diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 61b8bea6c..ffc293306 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -80,11 +80,6 @@ pgf_read(const char* fpath, * */ - -void -pgf_load_meta_child_probs(PgfPGF*, const char* fpath, - GuPool* pool, GuExn* err); - GuString pgf_abstract_name(PgfPGF*); diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index da7c70d7c..78c2b74db 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -48,7 +48,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value, ctxt = next; } - gu_printf(out, err, " ; -- %f\n",cat->meta_prob); + gu_printf(out, err, " ; -- %f\n", cat->prob); } void diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index d215f25e1..12605b89a 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -516,10 +516,6 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats) gu_return_on_exn(rdr->err, NULL); } - abscat->meta_prob = INFINITY; - abscat->meta_token_prob = INFINITY; - abscat->meta_child_probs = NULL; - GuBuf* functions = gu_new_buf(PgfAbsFun*, rdr->tmp_pool); size_t n_functions = pgf_read_len(rdr); @@ -538,6 +534,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats) gu_buf_push(functions, PgfAbsFun*, absfun); } + abscat->prob = - log(gu_in_f64be(rdr->in, rdr->err)); + pgf_jit_predicate(rdr->jit_state, abscats, abscat, functions); return abscat; @@ -1155,6 +1153,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun) pgf_read_cid(rdr, rdr->opool); gu_return_on_exn(rdr->err, NULL); + concr->abstr = abstr; + concr->cflags = pgf_read_flags(rdr); gu_return_on_exn(rdr->err, NULL); diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index 79a4fdd42..32f8323ab 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -53,18 +53,17 @@ int main(int argc, char* argv[]) { // Create the pool that is used to allocate everything GuPool* pool = gu_new_pool(); int status = EXIT_SUCCESS; - if (argc < 5 || argc > 6) { - fprintf(stderr, "usage: %s pgf cat from-lang to-lang [probs-file]\n", argv[0]); + if (argc < 5) { + fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]); status = EXIT_FAILURE; goto fail; } - char* filename = argv[1]; + GuString filename = argv[1]; GuString cat = argv[2]; - GuString from_lang = argv[3]; GuString to_lang = argv[4]; - + // Create an exception frame that catches all errors. GuExn* err = gu_new_exn(NULL, gu_kind(type), pool); @@ -78,16 +77,6 @@ int main(int argc, char* argv[]) { goto fail; } - if (argc == 6) { - char* meta_probs_filename = argv[5]; - pgf_load_meta_child_probs(pgf, meta_probs_filename, pool, err); - if (!gu_ok(err)) { - fprintf(stderr, "Loading meta child probs failed\n"); - status = EXIT_FAILURE; - goto fail; - } - } - // Look up the source and destination concrete categories PgfConcr* from_concr = pgf_get_language(pgf, from_lang); PgfConcr* to_concr = pgf_get_language(pgf, to_lang); |
