summaryrefslogtreecommitdiff
path: root/src/runtime/c/pgf/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/c/pgf/parser.c')
-rw-r--r--src/runtime/c/pgf/parser.c232
1 files changed, 190 insertions, 42 deletions
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 9876a548e..8190a9df1 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -21,20 +21,11 @@ struct PgfParse {
};
typedef struct {
- double prob;
- PgfExpr expr;
-} PgfExprProb;
-
-typedef struct {
PgfExprProb ep;
PgfPArgs args;
size_t arg_idx;
} PgfExprState;
-static GU_DEFINE_TYPE(PgfExprProb, struct,
- GU_MEMBER(PgfExprProb, prob, double),
- GU_MEMBER(PgfExprProb, expr, PgfExpr));
-
typedef GuMap PgfExprCache;
static GU_DEFINE_TYPE(PgfExprCache, GuMap,
gu_type(PgfCCat), NULL,
@@ -103,10 +94,16 @@ GU_DEFINE_TYPE(PgfTransitions, GuStringMap,
typedef struct PgfParsing PgfParsing;
+typedef struct {
+ PgfTokens tokens;
+ PgfExprProb ep;
+} PgfLiteralCandidate;
+
typedef const struct PgfLexCallback PgfLexCallback;
struct PgfLexCallback {
void (*lex)(PgfLexCallback* self, PgfToken tok, PgfItem* item);
+ GuEnum *(*lit)(PgfLexCallback* self, PgfCCat* cat);
};
struct PgfParsing {
@@ -133,7 +130,7 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data;
gu_printf(wtr,err,"F%d(",papp->fun->funid);
- gu_string_write(papp->fun->name, wtr, err);
+ pgf_print_expr(papp->fun->ep->expr, 0, wtr, err);
gu_printf(wtr,err,")[");
size_t n_args = gu_seq_length(papp->args);
for (size_t j = 0; j < n_args; j++) {
@@ -195,7 +192,7 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err)
PgfProductionApply* papp = i.data;
PgfCncFun* fun = papp->fun;
gu_printf(wtr, err, "F%d(", fun->funid);
- gu_string_write(fun->name, wtr, err);
+ pgf_print_expr(fun->ep->expr, 0, wtr, err);
gu_printf(wtr, err, ")[");
for (size_t i = 0; i < gu_seq_length(item->args); i++) {
PgfPArg arg = gu_seq_get(item->args, PgfPArg, i);
@@ -445,10 +442,20 @@ pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat)
PgfItem* item = NULL;
if (!gu_variant_is_null(cont->curr_sym)) {
- gu_assert(gu_variant_tag(cont->curr_sym) == PGF_SYMBOL_CAT);
- PgfSymbolCat* scat = gu_variant_data(cont->curr_sym);
-
- item = pgf_item_update_arg(cont, scat->d, cat, parsing->pool);
+ switch (gu_variant_tag(cont->curr_sym)) {
+ case PGF_SYMBOL_CAT: {
+ PgfSymbolCat* scat = gu_variant_data(cont->curr_sym);
+ item = pgf_item_update_arg(cont, scat->d, cat, parsing->pool);
+ break;
+ }
+ case PGF_SYMBOL_LIT: {
+ PgfSymbolLit* slit = gu_variant_data(cont->curr_sym);
+ item = pgf_item_update_arg(cont, slit->d, cat, parsing->pool);
+ break;
+ }
+ default:
+ gu_impossible();
+ }
} else {
item = pgf_item_copy(cont, parsing->pool);
size_t nargs = gu_seq_length(cont->args);
@@ -772,9 +779,66 @@ pgf_parsing_symbol(PgfParsing* parsing, PgfItem* item, PgfSymbol sym) {
}
break;
}
- case PGF_SYMBOL_LIT:
- // XXX TODO proper support
+ case PGF_SYMBOL_LIT: {
+ PgfSymbolLit* slit = gu_variant_data(sym);
+ PgfPArg* parg = gu_seq_index(item->args, PgfPArg, slit->d);
+
+ PgfCncCat *cnccat = parg->ccat->cnccat;
+
+ // the linearization category must be {s : Str}
+ gu_assert(cnccat->n_lins == 1);
+ gu_assert(gu_list_length(cnccat->cats) == 1);
+
+ PgfItemBuf* conts =
+ pgf_parsing_get_conts(parsing->conts_map,
+ parg->ccat, slit->r,
+ parsing->pool, parsing->tmp_pool);
+ gu_buf_push(conts, PgfItem*, item);
+ if (gu_buf_length(conts) == 1) {
+ /* This is the first time when we encounter this
+ * literal category so we must call the callback */
+
+ GuEnum* en = parsing->callback->lit(parsing->callback, parg->ccat);
+ for (;;) {
+ PgfLiteralCandidate* candidate =
+ gu_next(en, PgfLiteralCandidate*, parsing->pool);
+ if (candidate == NULL)
+ break;
+
+ PgfSymbol sym = gu_null_variant;
+ PgfSymbolKS* sks =
+ gu_new_variant(PGF_SYMBOL_KS,
+ PgfSymbolKS,
+ &sym, parsing->pool);
+ sks->tokens = candidate->tokens;
+
+ PgfSequence seq = gu_new_seq(PgfSymbol, 1, parsing->pool);
+ gu_seq_set(seq, PgfSymbol, 0, sym);
+
+ PgfCncFun* fun =
+ gu_malloc(parsing->pool,
+ sizeof(PgfCncFun)+
+ sizeof(PgfSequence*)*cnccat->n_lins);
+ fun->name = gu_empty_string;
+ fun->ep = &candidate->ep;
+ fun->funid = -1;
+ fun->n_lins = cnccat->n_lins;
+ fun->lins[0] = seq;
+
+ PgfProduction prod;
+ PgfProductionApply* papp =
+ gu_new_variant(PGF_PRODUCTION_APPLY,
+ PgfProductionApply,
+ &prod, parsing->pool);
+ papp->fun = fun;
+ papp->args = gu_new_seq(PgfPArg, 0, parsing->pool);
+
+ pgf_parsing_production(parsing, parg->ccat, slit->r,
+ prod, conts);
+ }
+ }
break;
+ }
case PGF_SYMBOL_VAR:
// XXX TODO proper support
break;
@@ -864,10 +928,29 @@ pgf_new_parse(PgfConcr* concr, int max_fid, GuPool* pool)
return parse;
}
+static void
+pgf_lex_noop(PgfLexCallback* self, PgfToken tok, PgfItem* item)
+{
+}
+
+static void
+pgf_enum_null(GuEnum* self, void* to, GuPool* pool)
+{
+ *((PgfLiteralCandidate**) to) = NULL;
+}
+
+static GuEnum*
+pgf_lit_noop(PgfLexCallback* self, PgfCCat* ccat)
+{
+ static GuEnum en = { pgf_enum_null };
+ return &en;
+}
+
typedef struct {
PgfLexCallback fn;
PgfToken tok;
PgfItemBuf* agenda;
+ GuPool *pool;
} PgfParseTokenCallback;
static
@@ -881,6 +964,79 @@ void pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item)
}
typedef struct {
+ GuEnum en;
+ PgfLiteralCandidate candidate;
+ size_t idx;
+} PgfLitEnum;
+
+static void
+pgf_enum_lits(GuEnum* self, void* to, GuPool* pool)
+{
+ PgfLitEnum* en = (PgfLitEnum*) self;
+
+ *((PgfLiteralCandidate**) to) =
+ (en->idx++ > 0) ? NULL : &en->candidate;
+}
+
+static GuEnum*
+pgf_match_lit(PgfLexCallback* self, PgfCCat* ccat)
+{
+ PgfParseTokenCallback *clo = (PgfParseTokenCallback *) self;
+
+ PgfLiteral lit;
+
+ switch (ccat->fid) {
+ case -1: {
+ PgfLiteralStr *lit_str =
+ gu_new_variant(PGF_LITERAL_STR,
+ PgfLiteralStr,
+ &lit, clo->pool);
+ lit_str->val = clo->tok;
+ break;
+ }
+ case -2: {
+ PgfLiteralInt *lit_int =
+ gu_new_variant(PGF_LITERAL_INT,
+ PgfLiteralInt,
+ &lit, clo->pool);
+ if (!gu_string_to_int(clo->tok, &lit_int->val))
+ return pgf_lit_noop(self, ccat);
+ break;
+ }
+ case -3: {
+ PgfLiteralFlt *lit_flt =
+ gu_new_variant(PGF_LITERAL_FLT,
+ PgfLiteralFlt,
+ &lit, clo->pool);
+ if (!gu_string_to_double(clo->tok, &lit_flt->val))
+ return pgf_lit_noop(self, ccat);
+ break;
+ }
+ default:
+ gu_impossible();
+ }
+
+ PgfTokens tokens = gu_new_seq(PgfToken, 1, clo->pool);
+ gu_seq_set(tokens, PgfToken, 0, clo->tok);
+
+ PgfExpr expr = gu_null_variant;
+ PgfExprLit *expr_lit =
+ gu_new_variant(PGF_EXPR_LIT,
+ PgfExprLit,
+ &expr, clo->pool);
+ expr_lit->lit = lit;
+
+ PgfLitEnum* en = gu_new(PgfLitEnum, clo->pool);
+ en->en.next = pgf_enum_lits;
+ en->candidate.tokens = tokens;
+ en->candidate.ep.prob = INFINITY;
+ en->candidate.ep.expr = expr;
+ en->idx = 0;
+
+ return &en->en;
+}
+
+typedef struct {
GuMapItor fn;
PgfProduction prod;
PgfItemBuf *metas;
@@ -924,7 +1080,8 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool)
{
PgfItemBuf* agenda = gu_new_buf(PgfItem*, pool);
- PgfParseTokenCallback clo1 = {{ pgf_match_token }, tok, agenda};
+ PgfParseTokenCallback clo1 = {{ pgf_match_token, pgf_match_lit },
+ tok, agenda, pool};
GuPool* tmp_pool = gu_new_pool();
PgfParsing* parsing = pgf_new_parsing(parse->concr, &clo1.fn, parse->max_fid, pool, tmp_pool);
@@ -993,9 +1150,7 @@ pgf_production_to_expr(PgfConcr* concr, PgfProduction prod,
switch (pi.tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = pi.data;
- PgfExpr expr = gu_new_variant_i(pool, PGF_EXPR_FUN,
- PgfExprFun,
- .fun = papp->fun->name);
+ PgfExpr expr = papp->fun->ep->expr;
size_t n_args = gu_seq_length(papp->args);
for (size_t i = 0; i < n_args; i++) {
PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, i);
@@ -1105,18 +1260,18 @@ pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
*(PgfExpr*)to = pgf_parse_result_next(pr, pool);
}
-static
-void pgf_noop(PgfLexCallback* self, PgfToken tok, PgfItem* item)
-{
-}
+static PgfLexCallback lex_callback_noop =
+ { pgf_lex_noop, pgf_lit_noop };
PgfExprEnum*
pgf_parse_result(PgfParse* parse, GuPool* pool)
{
- PgfLexCallback fn = { pgf_noop };
-
GuPool* tmp_pool = gu_new_pool();
- PgfParsing* parsing = pgf_new_parsing(parse->concr, &fn, parse->max_fid, pool, tmp_pool);
+ PgfParsing* parsing =
+ pgf_new_parsing(parse->concr,
+ &lex_callback_noop,
+ parse->max_fid,
+ pool, tmp_pool);
size_t n_items = gu_buf_length(parse->agenda);
for (size_t i = 0; i < n_items; i++) {
PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i);
@@ -1164,18 +1319,10 @@ pgf_parse_best_result_init(PgfCCat *ccat, GuBuf *pqueue,
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = pi.data;
- gu_assert(papp->fun->absfun != NULL);
-
PgfExprState *st = gu_new(PgfExprState, tmp_pool);
- st->ep.prob = - log(papp->fun->absfun->prob);
- PgfExprFun *expr_fun =
- gu_new_variant(PGF_EXPR_FUN,
- PgfExprFun,
- &st->ep.expr, out_pool);
- expr_fun->fun = papp->fun->name;
+ st->ep = *papp->fun->ep;
st->args = papp->args;
st->arg_idx = 0;
-
gu_buf_heap_push(pqueue, &pgf_expr_prob_order, &st);
break;
}
@@ -1274,11 +1421,12 @@ pgf_parse_best_ccat_result(
PgfExpr
pgf_parse_best_result(PgfParse* parse, GuPool* pool)
{
- PgfLexCallback fn = { pgf_noop };
-
GuPool* tmp_pool = gu_new_pool();
- PgfParsing* parsing = pgf_new_parsing(parse->concr, &fn, parse->max_fid,
- pool, tmp_pool);
+ PgfParsing* parsing =
+ pgf_new_parsing(parse->concr,
+ &lex_callback_noop,
+ parse->max_fid,
+ pool, tmp_pool);
size_t n_items = gu_buf_length(parse->agenda);
for (size_t i = 0; i < n_items; i++) {
PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i);
@@ -1441,7 +1589,7 @@ pgf_parser_bu_item(PgfConcr* concr, PgfItem* item,
break;
}
case PGF_SYMBOL_LIT:
- // XXX TODO proper support
+ // Nothing to be done here
break;
case PGF_SYMBOL_VAR:
// XXX TODO proper support