summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2015-05-19 09:15:48 +0000
committerkrasimir <krasimir@chalmers.se>2015-05-19 09:15:48 +0000
commit786da46a9913df3ad27e1631cf4de75d9186aeb0 (patch)
tree24b64410dbced495b52df434e03b7f505d035ce9 /src
parent616a24cba4fe018adfc2c071eb16b542ae08ec86 (diff)
added callback in C for unknown words
Diffstat (limited to 'src')
-rw-r--r--src/runtime/c/pgf/literals.c104
-rw-r--r--src/runtime/c/pgf/literals.h3
-rw-r--r--src/runtime/c/utils/pgf-translate.c2
3 files changed, 99 insertions, 10 deletions
diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c
index 9008ccdc7..ac57b150f 100644
--- a/src/runtime/c/pgf/literals.c
+++ b/src/runtime/c/pgf/literals.c
@@ -177,15 +177,15 @@ typedef struct {
PgfExpr expr;
bool is_known;
GuPool* out_pool;
-} PgfMatchNameMorphoCallback;
+} PgfMatchMorphoCallback;
-void
+static void
pgf_match_name_morpho_callback(PgfMorphoCallback* self_,
PgfCId lemma, GuString analysis, prob_t prob,
GuExn* err)
{
- PgfMatchNameMorphoCallback* self =
- gu_container(self_, PgfMatchNameMorphoCallback, callback);
+ PgfMatchMorphoCallback* self =
+ gu_container(self_, PgfMatchMorphoCallback, callback);
PgfAbsFun* absfun =
gu_seq_binsearch(self->abstract->funs, pgf_absfun_order, PgfAbsFun, lemma);
@@ -277,12 +277,12 @@ pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr,
}
}
- PgfMatchNameMorphoCallback clo = { { pgf_match_name_morpho_callback },
- concr->abstr,
- gu_null_variant,
- false,
- out_pool
- };
+ PgfMatchMorphoCallback clo = { { pgf_match_name_morpho_callback },
+ concr->abstr,
+ gu_null_variant,
+ false,
+ out_pool
+ };
pgf_lookup_morpho(concr, name, &clo.callback, NULL);
if (clo.is_known) {
@@ -337,6 +337,90 @@ pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr,
PgfLiteralCallback pgf_nerc_literal_callback =
{ pgf_match_name_lit, pgf_predict_empty } ;
+static void
+pgf_match_unknown_morpho_callback(PgfMorphoCallback* self_,
+ PgfCId lemma, GuString analysis, prob_t prob,
+ GuExn* err)
+{
+ PgfMatchMorphoCallback* self =
+ gu_container(self_, PgfMatchMorphoCallback, callback);
+ self->is_known = true;
+}
+
+static PgfExprProb*
+pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr,
+ size_t lin_idx,
+ GuString sentence, size_t* poffset,
+ GuPool *out_pool)
+{
+ const uint8_t* buf = (uint8_t*) (sentence + *poffset);
+ const uint8_t* p = buf;
+
+ PgfExprProb* ep = NULL;
+
+ GuUCS ucs = gu_utf8_decode(&p);
+ if (!gu_ucs_is_upper(ucs)) {
+ GuPool* tmp_pool = gu_local_pool();
+ GuStringBuf *sbuf = gu_string_buf(tmp_pool);
+ GuOut* out = gu_string_buf_out(sbuf);
+ GuExn* err = gu_new_exn(tmp_pool);
+
+ gu_out_utf8(ucs, out, err);
+ *poffset = p - ((uint8_t*) sentence);
+
+ ucs = gu_utf8_decode(&p);
+ while (ucs != 0 && !gu_ucs_is_space(ucs)) {
+ gu_out_utf8(ucs, out, err);
+ *poffset = p - ((uint8_t*) sentence);
+
+ ucs = gu_utf8_decode(&p);
+ }
+
+ GuString word = gu_string_buf_freeze(sbuf, tmp_pool);
+
+ PgfMatchMorphoCallback clo = { { pgf_match_unknown_morpho_callback },
+ concr->abstr,
+ gu_null_variant,
+ false,
+ out_pool
+ };
+ pgf_lookup_morpho(concr, word, &clo.callback, NULL);
+
+ if (!clo.is_known) {
+ ep = gu_new(PgfExprProb, out_pool);
+ ep->prob = 0;
+
+ PgfExprApp *expr_app =
+ gu_new_variant(PGF_EXPR_APP,
+ PgfExprApp,
+ &ep->expr, out_pool);
+ GuString con = "MkSymb";
+ PgfExprFun *expr_fun =
+ gu_new_flex_variant(PGF_EXPR_FUN,
+ PgfExprFun,
+ fun, strlen(con)+1,
+ &expr_app->fun, out_pool);
+ strcpy(expr_fun->fun, con);
+ PgfExprLit *expr_lit =
+ gu_new_variant(PGF_EXPR_LIT,
+ PgfExprLit,
+ &expr_app->arg, out_pool);
+ PgfLiteralStr *lit_str =
+ gu_new_flex_variant(PGF_LITERAL_STR,
+ PgfLiteralStr,
+ val, strlen(word)+1,
+ &expr_lit->lit, out_pool);
+ strcpy(lit_str->val, word);
+ }
+
+ gu_pool_free(tmp_pool);
+ }
+
+ return ep;
+}
+
+PgfLiteralCallback pgf_unknown_literal_callback =
+ { pgf_match_unknown_lit, pgf_predict_empty } ;
PgfCallbacksMap*
pgf_new_callbacks_map(PgfConcr* concr, GuPool *pool)
diff --git a/src/runtime/c/pgf/literals.h b/src/runtime/c/pgf/literals.h
index bf071c202..7de17b121 100644
--- a/src/runtime/c/pgf/literals.h
+++ b/src/runtime/c/pgf/literals.h
@@ -6,6 +6,9 @@
// literal for named entities recognition
extern PgfLiteralCallback pgf_nerc_literal_callback;
+// literal for finding unknown words
+extern PgfLiteralCallback pgf_unknown_literal_callback;
+
PgfCCat*
pgf_literal_cat(PgfConcr* concr, PgfLiteral lit);
diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c
index b99a8f3fd..00506e4cc 100644
--- a/src/runtime/c/utils/pgf-translate.c
+++ b/src/runtime/c/utils/pgf-translate.c
@@ -91,6 +91,8 @@ int main(int argc, char* argv[]) {
pgf_new_callbacks_map(from_concr, pool);
pgf_callbacks_map_add_literal(from_concr, callbacks,
"PN", &pgf_nerc_literal_callback);
+ pgf_callbacks_map_add_literal(from_concr, callbacks,
+ "Symb", &pgf_unknown_literal_callback);
// Create an output stream for stdout
GuOut* out = gu_file_out(stdout, pool);