summaryrefslogtreecommitdiff
path: root/src/runtime/python
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-10-09 12:08:51 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-10-09 12:08:51 +0000
commit8cf03bc5b6895568eb08be1415985a64793bb81c (patch)
tree814e95e1b0422362066fef389e8b03741e8478ad /src/runtime/python
parent20e4970ec19949da10b86facd3c6a5a4abb03acb (diff)
a major redesign in the C runtime. The parser and the linearizer now fully support BIND. The following things are still broken: parseval, word completion, handling 'pre', the robust mode
Diffstat (limited to 'src/runtime/python')
-rw-r--r--src/runtime/python/pypgf.c204
1 files changed, 37 insertions, 167 deletions
diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c
index 056cdc924..d5d3e78b1 100644
--- a/src/runtime/python/pypgf.c
+++ b/src/runtime/python/pypgf.c
@@ -1046,48 +1046,6 @@ Concr_printName(ConcrObject* self, PyObject *args)
return PyString_FromString(pgf_print_name(self->concr, name));
}
-typedef struct {
- PgfLexer base;
- PyObject* pylexer;
- GuPool* pool;
-} PgfPythonLexer;
-
-GU_DEFINE_TYPE(PyPgfLexerExn, abstract, _);
-
-static PgfToken
-pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
-{
- PgfPythonLexer* lexer = (PgfPythonLexer*) base;
- lexer->base.tok = "";
-
- PyObject* item = PyIter_Next(lexer->pylexer);
- if (item == NULL)
- if (PyErr_Occurred() != NULL)
- gu_raise(err, PyPgfLexerExn);
- else
- gu_raise(err, GuEOF);
- else {
- const char* str = PyString_AsString(item);
- if (str == NULL)
- gu_raise(err, PyPgfLexerExn);
- else
- lexer->base.tok = gu_string_copy(str, lexer->pool);
- }
-
- return lexer->base.tok;
-}
-
-static PgfLexer*
-pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
-{
- PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool);
- lexer->base.read_token = pypgf_python_lexer_read_token;
- lexer->base.tok = "";
- lexer->pylexer = pylexer;
- lexer->pool = pool;
- return ((PgfLexer*) lexer);
-}
-
#if ( (PY_VERSION_HEX < 0x02070000) \
|| ((PY_VERSION_HEX >= 0x03000000) \
&& (PY_VERSION_HEX < 0x03010000)) )
@@ -1114,35 +1072,19 @@ void pypgf_container_descructor(PyObject *capsule)
static IterObject*
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
{
- static char *kwlist[] = {"sentence", "tokens", "cat", "n", "heuristics", NULL};
+ static char *kwlist[] = {"sentence", "cat", "n", "heuristics", NULL};
- int len;
- const uint8_t *buf = NULL;
- PyObject* py_lexer = NULL;
+ const char *sentence = NULL;
PgfCId catname = pgf_start_cat(self->grammar->pgf);
int max_count = -1;
double heuristics = -1;
- if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist,
- &buf, &len, &py_lexer, &catname, &max_count, &heuristics))
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|sid", kwlist,
+ &sentence, &catname, &max_count, &heuristics))
return NULL;
- if ((buf == NULL && py_lexer == NULL) ||
- (buf != NULL && py_lexer != NULL)) {
- PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
- return NULL;
- }
-
- if (py_lexer != NULL) {
- // get an iterator out of the iterable object
- py_lexer = PyObject_GetIter(py_lexer);
- if (py_lexer == NULL)
- return NULL;
- }
-
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL) {
- Py_XDECREF(py_lexer);
return NULL;
}
@@ -1160,30 +1102,22 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->counter = 0;
pyres->fetch = Iter_fetch_expr;
- PgfLexer *lexer = NULL;
- if (buf != NULL) {
- GuIn* in = gu_data_in(buf, len, pyres->pool);
- lexer = pgf_new_simple_lexer(in, pyres->pool);
- }
- if (py_lexer != NULL) {
- lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
- }
+ GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pyres->pool);
pyres->res =
- pgf_parse_with_heuristics(self->concr, catname, lexer,
- heuristics, pyres->pool, out_pool);
-
- if (pyres->res == NULL) {
- PgfToken tok =
- pgf_lexer_current_token(lexer);
-
- if (*tok == 0)
- PyErr_SetString(PGFError, "The sentence cannot be parsed");
- else {
+ pgf_parse_with_heuristics(self->concr, catname, sentence,
+ heuristics, parse_err,
+ pyres->pool, out_pool);
+
+ if (!gu_ok(parse_err)) {
+ if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
+ GuString msg = (GuString) gu_exn_caught_data(parse_err);
+ PyErr_SetString(PGFError, msg);
+ } else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
+ GuString tok = (GuString) gu_exn_caught_data(parse_err);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
- PyErr_Format(ParseError, "Unexpected token: \"%s\"",
- PyString_AsString(py_tok));
+ PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
Py_DECREF(py_tok);
}
@@ -1191,45 +1125,26 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres = NULL;
}
- Py_XDECREF(py_lexer);
-
return pyres;
}
static IterObject*
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
{
- static char *kwlist[] = {"sentence", "tokens", "cat",
- "prefix", "n", NULL};
+ static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
- int len;
- const uint8_t *buf = NULL;
- PyObject* py_lexer = NULL;
+ const char *sentence = NULL;
GuString catname = pgf_start_cat(self->grammar->pgf);
GuString prefix = "";
int max_count = -1;
- if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
- &buf, &len, &py_lexer, &catname,
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist,
+ &sentence, &catname,
&prefix, &max_count))
return NULL;
- if ((buf == NULL && py_lexer == NULL) ||
- (buf != NULL && py_lexer != NULL)) {
- PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
- return NULL;
- }
-
- if (py_lexer != NULL) {
- // get an iterator out of the iterable object
- py_lexer = PyObject_GetIter(py_lexer);
- if (py_lexer == NULL)
- return NULL;
- }
-
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL) {
- Py_XDECREF(py_lexer);
return NULL;
}
@@ -1245,37 +1160,27 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
GuPool *tmp_pool = gu_local_pool();
- PgfLexer *lexer = NULL;
- if (buf != NULL) {
- GuIn* in = gu_data_in(buf, len, tmp_pool);
- lexer = pgf_new_simple_lexer(in, tmp_pool);
- }
- if (py_lexer != NULL) {
- lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
- }
-
+ GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
+
pyres->res =
- pgf_complete(self->concr, catname, lexer, prefix, pyres->pool);
+ pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool);
- if (pyres->res == NULL) {
+ if (!gu_ok(parse_err)) {
Py_DECREF(pyres);
pyres = NULL;
- PgfToken tok =
- pgf_lexer_current_token(lexer);
-
- if (*tok == 0)
- PyErr_SetString(PGFError, "The sentence cannot be parsed");
- else {
+ if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
+ GuString msg = (GuString) gu_exn_caught_data(parse_err);
+ PyErr_SetString(PGFError, msg);
+ } else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
+ GuString tok = (GuString) gu_exn_caught_data(parse_err);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
- PyErr_Format(ParseError, "Unexpected token: \"%s\"",
- PyString_AsString(py_tok));
+ PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
Py_DECREF(py_tok);
}
}
- Py_XDECREF(py_lexer);
gu_pool_free(tmp_pool);
return pyres;
@@ -1671,56 +1576,21 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
}
static PyObject*
-Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) {
- static char *kwlist[] = {"sentence", "tokens", NULL};
-
- int len;
- const uint8_t *buf = NULL;
- PyObject* py_lexer = NULL;
- if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist,
- &buf, &len, &py_lexer))
+Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
+ GuString sent;
+ if (!PyArg_ParseTuple(args, "s", &sent))
return NULL;
- if ((buf == NULL && py_lexer == NULL) ||
- (buf != NULL && py_lexer != NULL)) {
- PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
- return NULL;
- }
-
- GuPool* tmp_pool = gu_local_pool();
-
- PgfLexer *lexer = NULL;
- if (buf != NULL) {
- GuIn* in = gu_data_in(buf, len, tmp_pool);
- lexer = pgf_new_simple_lexer(in, tmp_pool);
- }
- if (py_lexer != NULL) {
- // get an iterator out of the iterable object
- py_lexer = PyObject_GetIter(py_lexer);
- if (py_lexer == NULL) {
- gu_pool_free(tmp_pool);
- return NULL;
- }
-
- lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
- }
-
- GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
+ GuPool *tmp_pool = gu_local_pool();
+ GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
PyObject* analyses = PyList_New(0);
PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
- pgf_lookup_morpho(self->concr, lexer, &callback.fn, err);
-
- Py_XDECREF(py_lexer);
+ pgf_lookup_morpho(self->concr, sent, &callback.fn, err);
gu_pool_free(tmp_pool);
- if (!gu_ok(err)) {
- Py_DECREF(analyses);
- return NULL;
- }
-
return analyses;
}
@@ -1833,7 +1703,7 @@ static PyMethodDef Concr_methods[] = {
{"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS,
"Renders an abstract syntax tree as a parse tree in Graphviz format"
},
- {"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS,
+ {"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
"Looks up a word in the lexicon of the grammar"
},
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,