diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2012-10-25 14:42:53 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2012-10-25 14:42:53 +0000 |
| commit | d18593895294b3d2203f7e6fab253f1b5835ff85 (patch) | |
| tree | 8c82ebafdc0a72ec75edc532c4b69cb101811390 /src/runtime/c/pgf/reader.c | |
| parent | 619c341ee9971bb6f8c263df2e8e84b4e06923ec (diff) | |
a major refactoring in the robust parser: bottom-up filtering and garbage collection for the chart
Diffstat (limited to 'src/runtime/c/pgf/reader.c')
| -rw-r--r-- | src/runtime/c/pgf/reader.c | 67 |
1 files changed, 15 insertions, 52 deletions
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 1823c0c02..a020d3fcc 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -36,8 +36,6 @@ #include <gu/log.h> -typedef GuMap PgfContsMap; - // // PgfReader // @@ -53,7 +51,6 @@ struct PgfReader { PgfAbstr* curr_abstr; PgfConcr* curr_concr; GuMap* curr_lindefs; - PgfContsMap* curr_conts_map; // used temporary for building the bu index for the parser GuTypeMap* read_to_map; GuTypeMap* read_new_map; void* curr_key; @@ -445,6 +442,7 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to) ccat->prods = gu_null_seq; ccat->viterbi_prob = 0; ccat->fid = fid; + ccat->conts = NULL; gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat); } @@ -468,6 +466,7 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to) ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool); ccat->viterbi_prob = 0; ccat->fid = *fidp; + ccat->conts = NULL; size_t top = 0; size_t bot = n_prods-1; @@ -716,6 +715,9 @@ pgf_ccat_set_cnccat(PgfCCat* ccat) return ccat->cnccat; } +extern float +pgf_ccat_set_viterbi_prob(PgfCCat* ccat); + static void pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) { @@ -723,41 +725,14 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) PgfCCat* ccat = *((PgfCCat**) value); pgf_ccat_set_cnccat(ccat); +// pgf_ccat_set_viterbi_prob(ccat); } -extern GU_DECLARE_TYPE(PgfContsMap, GuMap); - void -pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, - PgfContsMap* conts_map, - GuPool *pool, GuPool *tmp_pool); +pgf_parser_index(PgfConcr* concr, GuPool *pool); void -pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, - GuPool *pool); - -static void -pgf_index_prods(GuMapItor* fn, const void* key, void* value, GuExn* err) -{ - (void) (key && err); - - PgfIndexFn* clo = (PgfIndexFn*) fn; - PgfCCat* ccat = *((PgfCCat**) value); - PgfReader *rdr = clo->rdr; - - if (gu_seq_is_null(ccat->prods)) - return; - - size_t n_prods = gu_seq_length(ccat->prods); - for (size_t i = 0; i < n_prods; i++) { - PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i); - - pgf_parser_bu_index(rdr->curr_concr, ccat, prod, - rdr->curr_conts_map, - rdr->opool, rdr->tmp_pool); - pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool); - } -} +pgf_lzr_index(PgfConcr* concr, GuPool *pool); static void* pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, @@ -777,20 +752,18 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap); rdr->curr_lindefs = gu_map_type_make(lindefs_t, rdr->tmp_pool); pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs); - rdr->curr_conts_map = gu_map_type_new(PgfContsMap, rdr->tmp_pool); GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap); concr->ccats = gu_new_int_map(PgfCCat*, &gu_null_struct, pool); concr->fun_indices = gu_map_type_new(PgfFunIndices, pool); concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); - concr->lexicon_idx = gu_map_type_new(PgfTransitions,pool); - concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool); + concr->epsilon_idx = gu_map_type_new(PgfProductionIdx, pool); + concr->leftcorner_cat_idx = gu_map_type_new(PgfLeftcornerCatIdx,pool); + concr->leftcorner_tok_idx = gu_map_type_new(PgfLeftcornerTokIdx,pool); pgf_read_into_map(ccats_t, rdr, concr->ccats); concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL); concr->callbacks = pgf_new_callbacks_map(concr, pool); concr->total_cats = pgf_read_int(rdr); - concr->max_fid = concr->total_cats; - concr->item_quota = 2000000; // set the function ids int n_funs = gu_list_length(concr->cncfuns); @@ -805,9 +778,9 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr }; gu_map_iter(concr->ccats, &clo1.fn, NULL); - - PgfIndexFn clo2 = { { pgf_index_prods }, rdr }; - gu_map_iter(concr->ccats, &clo2.fn, NULL); + + pgf_parser_index(concr, pool); + pgf_lzr_index(concr, pool); return concr; } @@ -844,6 +817,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool, ccat->prods = gu_null_seq; ccat->viterbi_prob = 0; ccat->fid = fid; + ccat->conts = NULL; gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat); } @@ -922,7 +896,6 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err) rdr->curr_abstr = NULL; rdr->curr_concr = NULL; rdr->curr_lindefs = NULL; - rdr->curr_conts_map = NULL; rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool); rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool); return rdr; @@ -989,13 +962,3 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, GuPool* pool) fclose(fp); return true; } - -void -pgf_set_item_quota(PgfConcr* concr, int quota) { - concr->item_quota = quota > 0 ? quota : 0; -} - -int -pgf_get_item_quota(PgfConcr* concr) { - return concr->item_quota; -} |
