summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-08-16 15:02:24 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-08-16 15:02:24 +0000
commit87e34b11dc9cb7a09734f18326c2e4b1dc7560b1 (patch)
treef208fd08cbec7b1a4fc6d6525fc74cf5b4aba19f
parent662a35bbb3724edab3647937411a079f52bd59c6 (diff)
implemented lookupMorpho for C and Python
-rw-r--r--src/runtime/c/pgf/parser.c103
-rw-r--r--src/runtime/c/pgf/pgf.h11
-rw-r--r--src/runtime/python/pypgf.c84
3 files changed, 198 insertions, 0 deletions
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 52fa0cf9a..ac863db0a 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -2311,6 +2311,109 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
PgfLiteralCallback*, callback);
}
+typedef struct {
+ GuMapItor fn;
+ PgfTokens tokens;
+ PgfMorphoCallback* callback;
+} PgfMorphoFn;
+
+static void
+pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
+{
+ PgfMorphoFn* clo = (PgfMorphoFn*) fn;
+ PgfCFCat cfc = *((PgfCFCat*) key);
+ PgfProductionSeq prods = *((PgfProductionSeq*) value);
+
+ if (gu_seq_is_null(prods))
+ return;
+
+ GuString analysis = cfc.ccat->cnccat->labels[cfc.lin_idx];
+
+ size_t n_prods = gu_seq_length(prods);
+ for (size_t i = 0; i < n_prods; i++) {
+ PgfProduction prod =
+ gu_seq_get(prods, PgfProduction, i);
+
+ GuVariantInfo i = gu_variant_open(prod);
+ switch (i.tag) {
+ case PGF_PRODUCTION_APPLY: {
+ PgfProductionApply* papp = i.data;
+
+ // match the tokens with the production
+ size_t pos = 0;
+ PgfSequence seq = papp->fun->lins[cfc.lin_idx];
+ size_t len = gu_seq_length(seq);
+ for (size_t i = 0; i < len; i++) {
+ PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
+
+ GuVariantInfo i = gu_variant_open(sym);
+ switch (i.tag) {
+ case PGF_SYMBOL_KS: {
+ PgfSymbolKS* symks = i.data;
+ size_t len = gu_seq_length(symks->tokens);
+ for (size_t i = 0; i < len; i++) {
+ if (pos >= gu_seq_length(clo->tokens))
+ goto cont;
+
+ PgfToken tok1 = gu_seq_get(symks->tokens, PgfToken, i);
+ PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
+
+ if (!gu_string_eq(tok1, tok2))
+ goto cont;
+ }
+ }
+ default:
+ continue;
+ }
+ }
+
+ if (pos != gu_seq_length(clo->tokens))
+ goto cont;
+
+ PgfCId lemma = papp->fun->absfun->name;
+ prob_t prob = papp->fun->absfun->ep.prob;
+ clo->callback->callback(clo->callback, clo->tokens,
+ lemma, analysis, prob, err);
+ }
+ }
+ cont:;
+ }
+}
+
+void
+pgf_lookup_morpho(PgfConcr *concr, PgfLexer *lexer,
+ PgfMorphoCallback* callback, GuExn* err)
+{
+ GuPool* tmp_pool = gu_local_pool();
+
+ GuBuf* tokens = gu_new_buf(PgfToken, tmp_pool);
+ GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
+
+ PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
+ if (gu_exn_is_raised(lex_err)) {
+ gu_raise(err, PgfExn);
+ gu_pool_free(tmp_pool);
+ return;
+ }
+
+ PgfProductionIdx* lexicon_idx =
+ gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
+ if (lexicon_idx == NULL) {
+ gu_pool_free(tmp_pool);
+ return;
+ }
+
+ do {
+ gu_buf_push(tokens, PgfToken, tok);
+ tok = pgf_lexer_read_token(lexer, lex_err);
+ } while (!gu_exn_is_raised(lex_err));
+
+ PgfMorphoFn clo = { { pgf_morpho_iter }, gu_buf_seq(tokens), callback };
+ gu_map_iter(lexicon_idx, &clo.fn, err);
+
+ gu_pool_free(tmp_pool);
+}
+
static void
pgf_parser_leftcorner_add_token(PgfConcr* concr,
PgfTokens tokens, PgfItem* item,
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index a575a5188..40ae1ca45 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -124,6 +124,17 @@ PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuPool* pool, GuPool* out_pool);
+typedef struct PgfMorphoCallback PgfMorphoCallback;
+struct PgfMorphoCallback {
+ void (*callback)(PgfMorphoCallback* self, PgfTokens tokens,
+ PgfCId lemma, GuString analysis, prob_t prob,
+ GuExn* err);
+};
+
+void
+pgf_lookup_morpho(PgfConcr *concr, PgfLexer *lexer,
+ PgfMorphoCallback* callback, GuExn* err);
+
PgfExprEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
double heuristics,
diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c
index fd2b39906..5b4ae9193 100644
--- a/src/runtime/python/pypgf.c
+++ b/src/runtime/python/pypgf.c
@@ -1684,6 +1684,87 @@ Concr_graphvizParseTree(ConcrObject* self, PyObject *args) {
return pystr;
}
+typedef struct {
+ PgfMorphoCallback fn;
+ PyObject* analyses;
+} PyMorphoCallback;
+
+static void
+pypgf_collect_morpho(PgfMorphoCallback* self, PgfTokens tokens,
+ PgfCId lemma, GuString analysis, prob_t prob,
+ GuExn* err)
+{
+ PyMorphoCallback* callback = (PyMorphoCallback*) self;
+
+ PyObject* py_lemma = gu2py_string(lemma);
+ PyObject* py_analysis = gu2py_string(analysis);
+ PyObject* res =
+ Py_BuildValue("OOf", py_lemma, py_analysis, prob);
+
+ if (PyList_Append(callback->analyses, res) != 0) {
+ gu_raise(err, PgfExn);
+ }
+
+ Py_DECREF(py_lemma);
+ Py_DECREF(py_analysis);
+ Py_DECREF(res);
+}
+
+static PyObject*
+Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) {
+ static char *kwlist[] = {"sentence", "tokens", NULL};
+
+ int len;
+ const uint8_t *buf = NULL;
+ PyObject* py_lexer = NULL;
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist,
+ &buf, &len, &py_lexer))
+ return NULL;
+
+ if ((buf == NULL && py_lexer == NULL) ||
+ (buf != NULL && py_lexer != NULL)) {
+ PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
+ return NULL;
+ }
+
+ GuPool* tmp_pool = gu_local_pool();
+
+ PgfLexer *lexer = NULL;
+ if (buf != NULL) {
+ GuIn* in = gu_data_in(buf, len, tmp_pool);
+ GuReader* rdr = gu_new_utf8_reader(in, tmp_pool);
+ lexer = pgf_new_simple_lexer(rdr, tmp_pool);
+ }
+ if (py_lexer != NULL) {
+ // get an iterator out of the iterable object
+ py_lexer = PyObject_GetIter(py_lexer);
+ if (py_lexer == NULL) {
+ gu_pool_free(tmp_pool);
+ return NULL;
+ }
+
+ lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
+ }
+
+ GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
+
+ PyObject* analyses = PyList_New(0);
+
+ PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
+ pgf_lookup_morpho(self->concr, lexer, &callback.fn, err);
+
+ Py_XDECREF(py_lexer);
+
+ gu_pool_free(tmp_pool);
+
+ if (!gu_ok(err)) {
+ Py_DECREF(analyses);
+ return NULL;
+ }
+
+ return analyses;
+}
+
static PyGetSetDef Concr_getseters[] = {
{"name",
(getter)Concr_getName, NULL,
@@ -1726,6 +1807,9 @@ static PyMethodDef Concr_methods[] = {
{"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS,
"Renders an abstract syntax tree as a parse tree in Graphviz format"
},
+ {"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS,
+ "Looks up a word in the lexicon of the grammar"
+ },
{NULL} /* Sentinel */
};