summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2012-06-13 05:49:30 +0000
committerkr.angelov <kr.angelov@gmail.com>2012-06-13 05:49:30 +0000
commit3ad5493758227a3d273dea66061fa254f14c886d (patch)
tree1f6f3219f23b5ba1b21a527858fe19bcf901ebe2
parentcc58e2d05f75710f018efd5e026686146e9b4b9d (diff)
Use a separated tag for meta productions in the robust parser. This cleans up the code a lot
-rw-r--r--src/runtime/c/gu/seq.c9
-rw-r--r--src/runtime/c/gu/seq.h3
-rw-r--r--src/runtime/c/pgf/data.c9
-rw-r--r--src/runtime/c/pgf/data.h14
-rw-r--r--src/runtime/c/pgf/parser.c446
5 files changed, 294 insertions, 187 deletions
diff --git a/src/runtime/c/gu/seq.c b/src/runtime/c/gu/seq.c
index fda1be99c..d994d5990 100644
--- a/src/runtime/c/gu/seq.c
+++ b/src/runtime/c/gu/seq.c
@@ -66,7 +66,12 @@ gu_make_buf(size_t elem_size, GuPool* pool)
return buf;
}
-static const GuWord gu_empty_seq[2] = {0, 0};
+static const GuWord gu_empty_seq_[2] = {0, 0};
+
+GuSeq
+gu_empty_seq() {
+ return (GuSeq) { gu_tagged((void*)&gu_empty_seq_[1], 0) };
+}
GuSeq
gu_make_seq(size_t elem_size, size_t length, GuPool* pool)
@@ -76,7 +81,7 @@ gu_make_seq(size_t elem_size, size_t length, GuPool* pool)
void* buf = gu_malloc(pool, size);
return (GuSeq) { gu_tagged(buf, length) };
} else if (size == 0) {
- return (GuSeq) { gu_tagged((void*)&gu_empty_seq[1], 0) };
+ return gu_empty_seq();
} else {
void* buf = gu_malloc_prefixed(pool,
gu_alignof(GuWord),
diff --git a/src/runtime/c/gu/seq.h b/src/runtime/c/gu/seq.h
index b21933a65..52b2bfc7e 100644
--- a/src/runtime/c/gu/seq.h
+++ b/src/runtime/c/gu/seq.h
@@ -10,6 +10,9 @@ typedef struct GuBuf GuBuf;
typedef GuOpaque() GuSeq;
GuSeq
+gu_empty_seq();
+
+GuSeq
gu_make_seq(size_t elem_size, size_t len, GuPool* pool);
#define gu_new_seq(T, N, POOL) \
diff --git a/src/runtime/c/pgf/data.c b/src/runtime/c/pgf/data.c
index 74dba9cb8..ce37659f4 100644
--- a/src/runtime/c/pgf/data.c
+++ b/src/runtime/c/pgf/data.c
@@ -127,9 +127,12 @@ GU_DEFINE_TYPE(
GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)),
GU_CONSTRUCTOR_S(
PGF_PRODUCTION_EXTERN, PgfProductionExtern,
- GU_MEMBER(PgfProductionExtern, fun, PgfFunId),
- GU_MEMBER(PgfProductionExtern, args, PgfPArgs),
- GU_MEMBER(PgfProductionExtern, callback, PgfLiteralCallback)));
+ GU_MEMBER(PgfProductionExtern, callback, PgfLiteralCallback),
+ GU_MEMBER(PgfProductionExtern, lins, GuSeq)),
+ GU_CONSTRUCTOR_S(
+ PGF_PRODUCTION_META, PgfProductionMeta,
+ GU_MEMBER(PgfProductionMeta, lins, GuSeq),
+ GU_MEMBER(PgfProductionMeta, args, PgfPArgs)));
GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction));
GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction));
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h
index f85e68cca..f1cd2bafc 100644
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -42,7 +42,6 @@ typedef struct PgfAbstr PgfAbstr;
extern GU_DECLARE_TYPE(PgfAbstr, struct);
typedef struct PgfFunDecl PgfFunDecl;
-typedef struct PgfConcr PgfConcr;
typedef int PgfLength;
typedef struct GuVariant PgfSymbol;
@@ -293,7 +292,8 @@ typedef struct PgfSymbolKP
typedef enum {
PGF_PRODUCTION_APPLY,
PGF_PRODUCTION_COERCE,
- PGF_PRODUCTION_EXTERN
+ PGF_PRODUCTION_EXTERN,
+ PGF_PRODUCTION_META
} PgfProductionTag;
typedef struct PgfPArg PgfPArg;
@@ -323,11 +323,17 @@ typedef struct PgfProductionCoerce
} PgfProductionCoerce;
typedef struct {
- PgfFunId fun;
- PgfPArgs args;
PgfLiteralCallback *callback;
+ PgfExprProb *ep;
+ GuSeq lins;
} PgfProductionExtern;
+typedef struct {
+ PgfExprProb *ep;
+ GuSeq lins;
+ PgfPArgs args;
+} PgfProductionMeta;
+
extern GU_DECLARE_TYPE(PgfPatt, GuVariant);
extern GU_DECLARE_TYPE(PgfProduction, GuVariant);
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 10bff32d6..54121b18a 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -29,6 +29,7 @@ typedef struct {
PgfConcr* concr;
PgfCCat* completed;
PgfItem* target;
+ PgfExpr meta_var;
PgfProduction meta_prod;
int max_fid;
int item_count;
@@ -46,7 +47,6 @@ struct PgfParseState {
PgfItemBuf* agenda;
PgfContsMap* conts_map;
PgfGenCatMap* generated_cats;
- PgfExprProb* meta_ep;
int offset;
PgfParsing* ps;
@@ -161,8 +161,26 @@ pgf_item_sequence_length(PgfItem* item)
PgfProductionExtern* pext = i.data;
PgfSequence seq;
- if (pext->fun != NULL &&
- !gu_seq_is_null(seq = pext->fun->lins[item->base->lin_idx])) {
+ if (!gu_seq_is_null(pext->lins) &&
+ !gu_seq_is_null(seq = gu_seq_get(pext->lins,PgfSequence,item->base->lin_idx))) {
+ return gu_seq_length(seq);
+ } else {
+ int seq_len = 0;
+ PgfSymbol sym = item->curr_sym;
+ while (!gu_variant_is_null(sym)) {
+ seq_len++;
+ sym = pgf_prev_extern_sym(sym);
+ }
+
+ return seq_len;
+ }
+ }
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = i.data;
+ PgfSequence seq;
+
+ if (!gu_seq_is_null(pmeta->lins) &&
+ !gu_seq_is_null(seq = gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
return gu_seq_length(seq);
} else {
int seq_len = 0;
@@ -222,8 +240,17 @@ pgf_item_sequence(PgfItem* item,
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
- if (pext->fun == NULL ||
- gu_seq_is_null(*seq = pext->fun->lins[item->base->lin_idx])) {
+ if (gu_seq_is_null(pext->lins) ||
+ gu_seq_is_null(*seq = gu_seq_get(pext->lins, PgfSequence, item->base->lin_idx))) {
+ *seq = pgf_extern_seq_get(item, pool);
+ }
+ break;
+ }
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = i.data;
+
+ if (gu_seq_is_null(pmeta->lins) ||
+ gu_seq_is_null(*seq = gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
*seq = pgf_extern_seq_get(item, pool);
}
break;
@@ -284,9 +311,14 @@ pgf_print_production(int fid, PgfProduction prod,
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
gu_printf(wtr,err,"<extern>(");
- pgf_print_expr(pext->fun->ep->expr, 0, wtr, err);
- gu_printf(wtr,err,")[");
- pgf_print_production_args(pext->args,wtr,err);
+ pgf_print_expr(pext->ep->expr, 0, wtr, err);
+ gu_printf(wtr,err,")[]\n");
+ break;
+ }
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = i.data;
+ gu_printf(wtr,err,"<meta>[");
+ pgf_print_production_args(pmeta->args,wtr,err);
gu_printf(wtr,err,"]\n");
break;
}
@@ -349,9 +381,9 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err, GuPool* pool)
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
gu_printf(wtr, err, "<extern>");
- if (pext->fun != NULL) {
+ if (pext->ep != NULL) {
gu_printf(wtr, err, "(");
- pgf_print_expr(pext->fun->ep->expr, 0, wtr, err);
+ pgf_print_expr(pext->ep->expr, 0, wtr, err);
gu_printf(wtr, err, ")");
}
gu_printf(wtr, err, "[");
@@ -359,6 +391,12 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err, GuPool* pool)
gu_printf(wtr, err, "]; ");
break;
}
+ case PGF_PRODUCTION_META: {
+ gu_printf(wtr, err, "<meta>[");
+ pgf_print_production_args(item->args, wtr, err);
+ gu_printf(wtr, err, "]; ");
+ break;
+ }
default:
gu_impossible();
}
@@ -486,6 +524,9 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
case PGF_PRODUCTION_EXTERN: {
break;
}
+ case PGF_PRODUCTION_META: {
+ break;
+ }
default:
gu_impossible();
}
@@ -529,8 +570,20 @@ pgf_new_item(int pos, PgfCCat* ccat, size_t lin_idx,
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = pi.data;
- item->args = pext->args;
- item->inside_prob = pext->fun ? pext->fun->ep->prob : 0;
+ item->args = gu_empty_seq();
+ item->inside_prob = pext->ep ? pext->ep->prob : 0;
+
+ int n_args = gu_seq_length(item->args);
+ for (int i = 0; i < n_args; i++) {
+ PgfPArg *arg = gu_seq_index(item->args, PgfPArg, i);
+ item->inside_prob += arg->ccat->viterbi_prob;
+ }
+ break;
+ }
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = pi.data;
+ item->args = pmeta->args;
+ item->inside_prob = pmeta->ep ? pmeta->ep->prob : 0;
int n_args = gu_seq_length(item->args);
for (int i = 0; i < n_args; i++) {
@@ -621,10 +674,10 @@ pgf_parsing_combine(PgfParseState* before, PgfParseState* after,
bool extend = false;
GuVariantInfo i = gu_variant_open(cont->base->prod);
- if (i.tag == PGF_PRODUCTION_EXTERN) {
- PgfProductionExtern* pext = i.data;
- if (pext->fun == NULL ||
- gu_seq_is_null(pext->fun->lins[cont->base->lin_idx])) {
+ if (i.tag == PGF_PRODUCTION_META) {
+ PgfProductionMeta* pmeta = i.data;
+ if (gu_seq_is_null(pmeta->lins) ||
+ gu_seq_is_null(gu_seq_get(pmeta->lins,PgfSequence,cont->base->lin_idx))) {
extend = true;
}
}
@@ -719,39 +772,72 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
- PgfCncFun* fun = pext->fun;
-
- if (fun == NULL ||
- gu_seq_is_null(fun->lins[item->base->lin_idx])) {
+ if (gu_seq_is_null(pext->lins) ||
+ gu_seq_is_null(gu_seq_get(pext->lins,PgfSequence,item->base->lin_idx))) {
PgfSequence seq =
pgf_extern_seq_get(item, pool);
- PgfCncCat *cnccat = item->base->ccat->cnccat;
- size_t size = GU_FLEX_SIZE(PgfCncFun, lins, cnccat->n_lins);
- fun = gu_malloc(pool, size);
- if (pext->fun == NULL) {
- fun->name = gu_empty_string;
- fun->ep = ep;
- fun->funid = -1;
- fun->n_lins = cnccat->n_lins;
-
- for (size_t i = 0; i < fun->n_lins; i++) {
- fun->lins[i] = gu_null_seq;
+ size_t n_lins = item->base->ccat->cnccat->n_lins;
+
+ PgfProductionExtern* new_pext = (PgfProductionExtern*)
+ gu_new_variant(PGF_PRODUCTION_EXTERN,
+ PgfProductionExtern,
+ &prod, pool);
+ new_pext->callback = pext->callback;
+ new_pext->ep = ep;
+ new_pext->lins = gu_new_seq(PgfSequence, n_lins, pool);
+
+ if (gu_seq_is_null(pext->lins)) {
+ for (size_t i = 0; i < n_lins; i++) {
+ gu_seq_set(new_pext->lins,PgfSequence,i,
+ gu_null_seq);
}
} else {
- memcpy(fun, pext->fun, size);
+ for (size_t i = 0; i < n_lins; i++) {
+ gu_seq_set(new_pext->lins,PgfSequence,i,
+ gu_seq_get(pext->lins,PgfSequence,i));
+ }
}
- fun->lins[item->base->lin_idx] = seq;
+ gu_seq_set(new_pext->lins,PgfSequence,item->base->lin_idx,seq);
+ } else {
+ prod = item->base->prod;
}
+ break;
+ }
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = i.data;
+
+ PgfProductionMeta* new_pmeta =
+ gu_new_variant(PGF_PRODUCTION_META,
+ PgfProductionMeta,
+ &prod, pool);
+ new_pmeta->ep = ep;
+ new_pmeta->lins = pmeta->lins;
+ new_pmeta->args = item->args;
+
+ if (gu_seq_is_null(pmeta->lins) ||
+ gu_seq_is_null(gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
+ PgfSequence seq =
+ pgf_extern_seq_get(item, pool);
+
+ size_t n_lins = item->base->ccat->cnccat->n_lins;
+
+ new_pmeta->lins = gu_new_seq(PgfSequence, n_lins, pool);
- PgfProductionExtern* new_pext =
- gu_new_variant(PGF_PRODUCTION_EXTERN,
- PgfProductionExtern,
- &prod, pool);
- new_pext->fun = fun;
- new_pext->args = item->args;
- new_pext->callback = pext->callback;
+ if (gu_seq_is_null(pmeta->lins)) {
+ for (size_t i = 0; i < n_lins; i++) {
+ gu_seq_set(new_pmeta->lins,PgfSequence,i,
+ gu_null_seq);
+ }
+ } else {
+ for (size_t i = 0; i < n_lins; i++) {
+ gu_seq_set(new_pmeta->lins,PgfSequence,i,
+ gu_seq_get(pmeta->lins,PgfSequence,i));
+ }
+ }
+ gu_seq_set(new_pmeta->lins,PgfSequence,item->base->lin_idx,seq);
+ }
break;
}
default:
@@ -871,7 +957,7 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
gu_seq_get(prods, PgfProduction, i);
pgf_parsing_production(before, ccat, lin_idx, prod, conts, delta_prob);
}
-
+
if (ccat->cnccat->abscat->meta_prob != INFINITY &&
ccat->fid < before->ps->concr->total_cats) {
// Top-down prediction for meta rules
@@ -944,6 +1030,41 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
gu_exit("<-");
}
+typedef struct {
+ GuMapItor fn;
+ PgfParseState* before;
+ PgfParseState* after;
+ PgfItem* meta_item;
+} PgfMetaPredictFn;
+
+static void
+pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err)
+{
+ (void) (err);
+
+ PgfCId abscat = *((PgfCId*) key);
+ float meta_prob = *((float*) value);
+ PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn;
+ PgfParseState* before = clo->before;
+ PgfParseState* after = clo->after;
+ PgfItem* meta_item = clo->meta_item;
+
+ PgfCncCat* cnccat =
+ gu_map_get(before->ps->concr->cnccats, &abscat, PgfCncCat*);
+ if (cnccat == NULL)
+ return;
+
+ size_t n_cats = gu_list_length(cnccat->cats);
+ for (size_t i = 0; i < n_cats; i++) {
+ PgfCCat* ccat = gu_list_index(cnccat->cats, i);
+
+ for (size_t lin_idx = 0; lin_idx < cnccat->n_lins; lin_idx++) {
+ pgf_parsing_td_predict(before, after,
+ meta_item, ccat, lin_idx, meta_prob);
+ }
+ }
+}
+
static float
pgf_parsing_bu_predict(PgfParseState* before, PgfParseState* after,
PgfItemBuf* index, PgfItem* meta_item,
@@ -1179,11 +1300,11 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
PgfProduction prod;
PgfProductionExtern* pext =
gu_new_variant(PGF_PRODUCTION_EXTERN,
- PgfProductionExtern,
- &prod, before->pool);
- pext->fun = NULL;
- pext->args = gu_new_seq(PgfPArg, 0, before->pool);
+ PgfProductionExtern,
+ &prod, before->pool);
pext->callback = callback;
+ pext->ep = NULL;
+ pext->lins = gu_null_seq;
pgf_parsing_production(before, parg->ccat, slit->r,
prod, conts, 0);
@@ -1220,9 +1341,6 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
}
}
-PgfParseState *meta_after = NULL;
-static PgfLiteralCallback pgf_meta_callback;
-
static void
pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
{
@@ -1268,11 +1386,10 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
- PgfCncFun* fun = pext->fun;
PgfSequence seq;
- if (fun != NULL &&
- !gu_seq_is_null(seq = fun->lins[item->base->lin_idx])) {
+ if (!gu_seq_is_null(pext->lins) &&
+ !gu_seq_is_null(seq = gu_seq_get(pext->lins,PgfSequence,item->base->lin_idx))) {
if (item->seq_idx == gu_seq_length(seq)) {
pgf_parsing_complete(before, after, item, NULL);
} else {
@@ -1284,25 +1401,16 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
PgfToken tok = (after != NULL) ? after->ts->tok
: gu_empty_string;
- meta_after = after;
+ PgfExprProb *ep = NULL;
bool accepted =
pext->callback->match(before->ps->concr, item,
tok,
- &before->meta_ep, before->pool);
- meta_after = NULL;
+ &ep, before->pool);
- if (before->meta_ep != NULL)
- pgf_parsing_complete(before, after, item, before->meta_ep);
+ if (ep != NULL)
+ pgf_parsing_complete(before, after, item, ep);
if (accepted && after != NULL) {
- if (pext->callback == &pgf_meta_callback) {
- float meta_token_prob =
- item->base->ccat->cnccat->abscat->meta_token_prob;
- if (meta_token_prob == INFINITY)
- break;
- item->inside_prob += meta_token_prob;
- }
-
PgfSymbol prev = item->curr_sym;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
@@ -1322,109 +1430,78 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
}
break;
}
- default:
- gu_impossible();
- }
-}
-
-typedef struct {
- GuMapItor fn;
- PgfParseState* before;
- PgfParseState* after;
- PgfItem* meta_item;
-} PgfMetaPredictFn;
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = i.data;
-static void
-pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err)
-{
- (void) (err);
-
- PgfCId abscat = *((PgfCId*) key);
- float meta_prob = *((float*) value);
- PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn;
- PgfParseState* before = clo->before;
- PgfParseState* after = clo->after;
- PgfItem* meta_item = clo->meta_item;
-{
- GuPool* tmp_pool = gu_new_pool();
- GuOut* out = gu_file_out(stdout, tmp_pool);
- GuWriter* wtr = gu_new_utf8_writer(out, tmp_pool);
- GuExn* err = gu_exn(NULL, type, tmp_pool);
- gu_string_write(abscat, wtr, err);
- gu_pool_free(tmp_pool);
-}
- PgfCncCat* cnccat =
- gu_map_get(before->ps->concr->cnccats, &abscat, PgfCncCat*);
- if (cnccat == NULL)
- return;
-
- size_t n_cats = gu_list_length(cnccat->cats);
- for (size_t i = 0; i < n_cats; i++) {
- PgfCCat* ccat = gu_list_index(cnccat->cats, i);
-
- for (size_t lin_idx = 0; lin_idx < cnccat->n_lins; lin_idx++) {
- pgf_parsing_td_predict(before, after,
- meta_item, ccat, lin_idx, meta_prob);
- }
- }
-}
-
-static bool
-pgf_match_meta(PgfConcr* concr, PgfItem *item, PgfToken tok,
- PgfExprProb** out_ep, GuPool *pool)
-{
- size_t n_syms = pgf_item_sequence_length(item);
-
- if (n_syms > 0) {
- PgfExprProb *ep = gu_new(PgfExprProb, pool);
- ep->prob = item->inside_prob;
- size_t n_args = gu_seq_length(item->args);
- for (size_t i = 0; i < n_args; i++) {
- PgfPArg* arg = gu_seq_index(item->args, PgfPArg, i);
- ep->prob -= arg->ccat->viterbi_prob;
- }
- PgfExprMeta *expr_meta =
- gu_new_variant(PGF_EXPR_META,
- PgfExprMeta,
- &ep->expr, pool);
- expr_meta->id = 0;
+ PgfSequence seq;
+ if (!gu_seq_is_null(pmeta->lins) &&
+ !gu_seq_is_null(seq = gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
+ if (item->seq_idx == gu_seq_length(seq)) {
+ pgf_parsing_complete(before, after, item, NULL);
+ } else {
+ PgfSymbol sym =
+ gu_seq_get(seq, PgfSymbol, item->seq_idx);
+ pgf_parsing_symbol(before, after, item, sym);
+ }
+ } else {
+ if (!gu_variant_is_null(item->curr_sym)) {
+ PgfExprProb *ep = gu_new(PgfExprProb, before->pool);
+ ep->expr = before->ps->meta_var;
+ ep->prob = item->inside_prob;
+ size_t n_args = gu_seq_length(item->args);
+ for (size_t i = 0; i < n_args; i++) {
+ PgfPArg* arg = gu_seq_index(item->args, PgfPArg, i);
+ ep->prob -= arg->ccat->viterbi_prob;
+ }
+ pgf_parsing_complete(before, after, item, ep);
+ }
- *out_ep = ep;
- } else {
- *out_ep = NULL;
- }
+ if (after != NULL) {
+ if (after->ts->lexicon_idx == NULL) {
+ float meta_token_prob =
+ item->base->ccat->cnccat->abscat->meta_token_prob;
+ if (meta_token_prob == INFINITY)
+ break;
+ item->inside_prob += meta_token_prob;
- PgfParseState* after = meta_after;
- if (after != NULL) {
- if (after->ts->lexicon_idx == NULL)
- return true;
- else {
- PgfParseState* before =
- gu_container(out_ep, PgfParseState, meta_ep);
-
- PgfCIdMap* meta_child_probs =
- item->base->ccat->cnccat->abscat->meta_child_probs;
- if (meta_child_probs != NULL) {
- PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, before, after, item };
- gu_map_iter(meta_child_probs, &clo.fn, NULL);
- }
+ PgfSymbol prev = item->curr_sym;
+ PgfSymbolKS* sks = (PgfSymbolKS*)
+ gu_alloc_variant(PGF_SYMBOL_KS,
+ sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
+ gu_alignof(PgfSymbolKS),
+ &item->curr_sym, after->pool);
+ *((PgfSymbol*)(sks+1)) = prev;
+ sks->tokens = gu_new_seq(PgfToken, 1, after->pool);
+ gu_seq_set(sks->tokens, PgfToken, 0, after->ts->tok);
+
+ item->seq_idx++;
+#ifdef PGF_PARSER_DEBUG
+ item->end++;
+#endif
+ pgf_parsing_add_transition(before, after, after->ts->tok, item);
+ } else {
+ PgfCIdMap* meta_child_probs =
+ item->base->ccat->cnccat->abscat->meta_child_probs;
+ if (meta_child_probs != NULL) {
+ PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, before, after, item };
+ gu_map_iter(meta_child_probs, &clo.fn, NULL);
+ }
/*
- fprintf(stderr, "------------------------------------\n");
- pgf_parsing_bu_predict(before, after,
- after->ts->lexicon_idx, item,
- NULL);
- fprintf(stderr, "------------------------------------\n");*/
- return false;
+ fprintf(stderr, "------------------------------------\n");
+ pgf_parsing_bu_predict(before, after,
+ after->ts->lexicon_idx, item,
+ NULL);
+ fprintf(stderr, "------------------------------------\n");*/
+ }
+ }
}
+ break;
+ }
+ default:
+ gu_impossible();
}
-
- return false;
}
-static PgfLiteralCallback pgf_meta_callback =
- { pgf_match_meta } ;
-
-
static void
pgf_parsing_proceed(PgfParseState* state, void** output) {
while (*output == NULL) {
@@ -1484,13 +1561,19 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
ps->max_fid = concr->max_fid;
ps->item_count = 0;
- PgfProductionExtern* pext =
- gu_new_variant(PGF_PRODUCTION_EXTERN,
- PgfProductionExtern,
+ PgfExprMeta *expr_meta =
+ gu_new_variant(PGF_EXPR_META,
+ PgfExprMeta,
+ &ps->meta_var, pool);
+ expr_meta->id = 0;
+
+ PgfProductionMeta* pmeta =
+ gu_new_variant(PGF_PRODUCTION_META,
+ PgfProductionMeta,
&ps->meta_prod, pool);
- pext->fun = NULL;
- pext->args = gu_new_seq(PgfPArg, 0, pool);
- pext->callback = &pgf_meta_callback;
+ pmeta->ep = NULL;
+ pmeta->lins = gu_null_seq;
+ pmeta->args = gu_new_seq(PgfPArg, 0, pool);
return ps;
}
@@ -1507,7 +1590,6 @@ pgf_new_parse_state(PgfParsing* ps,
state->agenda = gu_new_buf(PgfItem*, pool);
state->generated_cats = gu_map_type_new(PgfGenCatMap, pool);
state->conts_map = gu_map_type_new(PgfContsMap, pool);
- state->meta_ep = NULL;
state->offset = offset;
state->ps = ps;
state->ts = ts;
@@ -1615,11 +1697,24 @@ pgf_result_cat_init(PgfParseResult* pr,
PgfExprState *st = gu_new(PgfExprState, pr->tmp_pool);
st->cont = cont;
- st->expr = pext->fun->ep->expr;
- st->args = pext->args;
+ st->expr = pext->ep->expr;
+ st->args = gu_empty_seq();
st->arg_idx = 0;
- PgfExprQState q = {st, cont_prob + pext->fun->ep->prob};
+ PgfExprQState q = {st, cont_prob + pext->ep->prob};
+ gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
+ break;
+ }
+ case PGF_PRODUCTION_META: {
+ PgfProductionMeta* pmeta = pi.data;
+
+ PgfExprState *st = gu_new(PgfExprState, pr->tmp_pool);
+ st->cont = cont;
+ st->expr = pmeta->ep->expr;
+ st->args = pmeta->args;
+ st->arg_idx = 0;
+
+ PgfExprQState q = {st, cont_prob + pmeta->ep->prob};
size_t n_args = gu_seq_length(st->args);
for (size_t k = 0; k < n_args; k++) {
PgfPArg* parg = gu_seq_index(st->args, PgfPArg, k);
@@ -1629,6 +1724,8 @@ pgf_result_cat_init(PgfParseResult* pr,
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
break;
}
+ default:
+ gu_impossible();
}
}
}
@@ -1655,19 +1752,12 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
if (q.st->arg_idx < gu_seq_length(q.st->args)) {
PgfPArg* arg = gu_seq_index(q.st->args, PgfPArg, q.st->arg_idx);
float cont_prob = q.prob - arg->ccat->viterbi_prob;
- if (arg->ccat->fid < pr->state->ps->concr->total_cats) {
- PgfExpr arg_meta;
- PgfExprMeta *expr_meta =
- gu_new_variant(PGF_EXPR_META,
- PgfExprMeta,
- &arg_meta, pool);
- expr_meta->id = 0;
-
+ if (arg->ccat->fid < pr->state->ps->concr->total_cats) {
q.st->expr =
gu_new_variant_i(pool, PGF_EXPR_APP,
PgfExprApp,
.fun = q.st->expr,
- .arg = arg_meta);
+ .arg = pr->state->ps->meta_var);
q.st->arg_idx++;
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
} else {