diff options
| author | krasimir <krasimir@chalmers.se> | 2015-05-19 09:15:48 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2015-05-19 09:15:48 +0000 |
| commit | 786da46a9913df3ad27e1631cf4de75d9186aeb0 (patch) | |
| tree | 24b64410dbced495b52df434e03b7f505d035ce9 /src/runtime/c/pgf | |
| parent | 616a24cba4fe018adfc2c071eb16b542ae08ec86 (diff) | |
added callback in C for unknown words
Diffstat (limited to 'src/runtime/c/pgf')
| -rw-r--r-- | src/runtime/c/pgf/literals.c | 104 | ||||
| -rw-r--r-- | src/runtime/c/pgf/literals.h | 3 |
2 files changed, 97 insertions, 10 deletions
diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c index 9008ccdc7..ac57b150f 100644 --- a/src/runtime/c/pgf/literals.c +++ b/src/runtime/c/pgf/literals.c @@ -177,15 +177,15 @@ typedef struct { PgfExpr expr; bool is_known; GuPool* out_pool; -} PgfMatchNameMorphoCallback; +} PgfMatchMorphoCallback; -void +static void pgf_match_name_morpho_callback(PgfMorphoCallback* self_, PgfCId lemma, GuString analysis, prob_t prob, GuExn* err) { - PgfMatchNameMorphoCallback* self = - gu_container(self_, PgfMatchNameMorphoCallback, callback); + PgfMatchMorphoCallback* self = + gu_container(self_, PgfMatchMorphoCallback, callback); PgfAbsFun* absfun = gu_seq_binsearch(self->abstract->funs, pgf_absfun_order, PgfAbsFun, lemma); @@ -277,12 +277,12 @@ pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr, } } - PgfMatchNameMorphoCallback clo = { { pgf_match_name_morpho_callback }, - concr->abstr, - gu_null_variant, - false, - out_pool - }; + PgfMatchMorphoCallback clo = { { pgf_match_name_morpho_callback }, + concr->abstr, + gu_null_variant, + false, + out_pool + }; pgf_lookup_morpho(concr, name, &clo.callback, NULL); if (clo.is_known) { @@ -337,6 +337,90 @@ pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr, PgfLiteralCallback pgf_nerc_literal_callback = { pgf_match_name_lit, pgf_predict_empty } ; +static void +pgf_match_unknown_morpho_callback(PgfMorphoCallback* self_, + PgfCId lemma, GuString analysis, prob_t prob, + GuExn* err) +{ + PgfMatchMorphoCallback* self = + gu_container(self_, PgfMatchMorphoCallback, callback); + self->is_known = true; +} + +static PgfExprProb* +pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr, + size_t lin_idx, + GuString sentence, size_t* poffset, + GuPool *out_pool) +{ + const uint8_t* buf = (uint8_t*) (sentence + *poffset); + const uint8_t* p = buf; + + PgfExprProb* ep = NULL; + + GuUCS ucs = gu_utf8_decode(&p); + if (!gu_ucs_is_upper(ucs)) { + GuPool* tmp_pool = gu_local_pool(); + GuStringBuf *sbuf = gu_string_buf(tmp_pool); + GuOut* out = gu_string_buf_out(sbuf); + GuExn* err = gu_new_exn(tmp_pool); + + gu_out_utf8(ucs, out, err); + *poffset = p - ((uint8_t*) sentence); + + ucs = gu_utf8_decode(&p); + while (ucs != 0 && !gu_ucs_is_space(ucs)) { + gu_out_utf8(ucs, out, err); + *poffset = p - ((uint8_t*) sentence); + + ucs = gu_utf8_decode(&p); + } + + GuString word = gu_string_buf_freeze(sbuf, tmp_pool); + + PgfMatchMorphoCallback clo = { { pgf_match_unknown_morpho_callback }, + concr->abstr, + gu_null_variant, + false, + out_pool + }; + pgf_lookup_morpho(concr, word, &clo.callback, NULL); + + if (!clo.is_known) { + ep = gu_new(PgfExprProb, out_pool); + ep->prob = 0; + + PgfExprApp *expr_app = + gu_new_variant(PGF_EXPR_APP, + PgfExprApp, + &ep->expr, out_pool); + GuString con = "MkSymb"; + PgfExprFun *expr_fun = + gu_new_flex_variant(PGF_EXPR_FUN, + PgfExprFun, + fun, strlen(con)+1, + &expr_app->fun, out_pool); + strcpy(expr_fun->fun, con); + PgfExprLit *expr_lit = + gu_new_variant(PGF_EXPR_LIT, + PgfExprLit, + &expr_app->arg, out_pool); + PgfLiteralStr *lit_str = + gu_new_flex_variant(PGF_LITERAL_STR, + PgfLiteralStr, + val, strlen(word)+1, + &expr_lit->lit, out_pool); + strcpy(lit_str->val, word); + } + + gu_pool_free(tmp_pool); + } + + return ep; +} + +PgfLiteralCallback pgf_unknown_literal_callback = + { pgf_match_unknown_lit, pgf_predict_empty } ; PgfCallbacksMap* pgf_new_callbacks_map(PgfConcr* concr, GuPool *pool) diff --git a/src/runtime/c/pgf/literals.h b/src/runtime/c/pgf/literals.h index bf071c202..7de17b121 100644 --- a/src/runtime/c/pgf/literals.h +++ b/src/runtime/c/pgf/literals.h @@ -6,6 +6,9 @@ // literal for named entities recognition extern PgfLiteralCallback pgf_nerc_literal_callback; +// literal for finding unknown words +extern PgfLiteralCallback pgf_unknown_literal_callback; + PgfCCat* pgf_literal_cat(PgfConcr* concr, PgfLiteral lit); |
