summaryrefslogtreecommitdiff
path: root/src/runtime/c/pgf/reader.c
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2012-10-25 14:42:53 +0000
committerkr.angelov <kr.angelov@gmail.com>2012-10-25 14:42:53 +0000
commitd18593895294b3d2203f7e6fab253f1b5835ff85 (patch)
tree8c82ebafdc0a72ec75edc532c4b69cb101811390 /src/runtime/c/pgf/reader.c
parent619c341ee9971bb6f8c263df2e8e84b4e06923ec (diff)
a major refactoring in the robust parser: bottom-up filtering and garbage collection for the chart
Diffstat (limited to 'src/runtime/c/pgf/reader.c')
-rw-r--r--src/runtime/c/pgf/reader.c67
1 files changed, 15 insertions, 52 deletions
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c
index 1823c0c02..a020d3fcc 100644
--- a/src/runtime/c/pgf/reader.c
+++ b/src/runtime/c/pgf/reader.c
@@ -36,8 +36,6 @@
#include <gu/log.h>
-typedef GuMap PgfContsMap;
-
//
// PgfReader
//
@@ -53,7 +51,6 @@ struct PgfReader {
PgfAbstr* curr_abstr;
PgfConcr* curr_concr;
GuMap* curr_lindefs;
- PgfContsMap* curr_conts_map; // used temporary for building the bu index for the parser
GuTypeMap* read_to_map;
GuTypeMap* read_new_map;
void* curr_key;
@@ -445,6 +442,7 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to)
ccat->prods = gu_null_seq;
ccat->viterbi_prob = 0;
ccat->fid = fid;
+ ccat->conts = NULL;
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
}
@@ -468,6 +466,7 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool);
ccat->viterbi_prob = 0;
ccat->fid = *fidp;
+ ccat->conts = NULL;
size_t top = 0;
size_t bot = n_prods-1;
@@ -716,6 +715,9 @@ pgf_ccat_set_cnccat(PgfCCat* ccat)
return ccat->cnccat;
}
+extern float
+pgf_ccat_set_viterbi_prob(PgfCCat* ccat);
+
static void
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
@@ -723,41 +725,14 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
PgfCCat* ccat = *((PgfCCat**) value);
pgf_ccat_set_cnccat(ccat);
+// pgf_ccat_set_viterbi_prob(ccat);
}
-extern GU_DECLARE_TYPE(PgfContsMap, GuMap);
-
void
-pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
- PgfContsMap* conts_map,
- GuPool *pool, GuPool *tmp_pool);
+pgf_parser_index(PgfConcr* concr, GuPool *pool);
void
-pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
- GuPool *pool);
-
-static void
-pgf_index_prods(GuMapItor* fn, const void* key, void* value, GuExn* err)
-{
- (void) (key && err);
-
- PgfIndexFn* clo = (PgfIndexFn*) fn;
- PgfCCat* ccat = *((PgfCCat**) value);
- PgfReader *rdr = clo->rdr;
-
- if (gu_seq_is_null(ccat->prods))
- return;
-
- size_t n_prods = gu_seq_length(ccat->prods);
- for (size_t i = 0; i < n_prods; i++) {
- PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
-
- pgf_parser_bu_index(rdr->curr_concr, ccat, prod,
- rdr->curr_conts_map,
- rdr->opool, rdr->tmp_pool);
- pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool);
- }
-}
+pgf_lzr_index(PgfConcr* concr, GuPool *pool);
static void*
pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
@@ -777,20 +752,18 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap);
rdr->curr_lindefs = gu_map_type_make(lindefs_t, rdr->tmp_pool);
pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs);
- rdr->curr_conts_map = gu_map_type_new(PgfContsMap, rdr->tmp_pool);
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
concr->ccats =
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
- concr->lexicon_idx = gu_map_type_new(PgfTransitions,pool);
- concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool);
+ concr->epsilon_idx = gu_map_type_new(PgfProductionIdx, pool);
+ concr->leftcorner_cat_idx = gu_map_type_new(PgfLeftcornerCatIdx,pool);
+ concr->leftcorner_tok_idx = gu_map_type_new(PgfLeftcornerTokIdx,pool);
pgf_read_into_map(ccats_t, rdr, concr->ccats);
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL);
concr->callbacks = pgf_new_callbacks_map(concr, pool);
concr->total_cats = pgf_read_int(rdr);
- concr->max_fid = concr->total_cats;
- concr->item_quota = 2000000;
// set the function ids
int n_funs = gu_list_length(concr->cncfuns);
@@ -805,9 +778,9 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr };
gu_map_iter(concr->ccats, &clo1.fn, NULL);
-
- PgfIndexFn clo2 = { { pgf_index_prods }, rdr };
- gu_map_iter(concr->ccats, &clo2.fn, NULL);
+
+ pgf_parser_index(concr, pool);
+ pgf_lzr_index(concr, pool);
return concr;
}
@@ -844,6 +817,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool,
ccat->prods = gu_null_seq;
ccat->viterbi_prob = 0;
ccat->fid = fid;
+ ccat->conts = NULL;
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
}
@@ -922,7 +896,6 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
rdr->curr_abstr = NULL;
rdr->curr_concr = NULL;
rdr->curr_lindefs = NULL;
- rdr->curr_conts_map = NULL;
rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool);
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
return rdr;
@@ -989,13 +962,3 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, GuPool* pool)
fclose(fp);
return true;
}
-
-void
-pgf_set_item_quota(PgfConcr* concr, int quota) {
- concr->item_quota = quota > 0 ? quota : 0;
-}
-
-int
-pgf_get_item_quota(PgfConcr* concr) {
- return concr->item_quota;
-}