diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2013-10-09 12:08:51 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2013-10-09 12:08:51 +0000 |
| commit | 8cf03bc5b6895568eb08be1415985a64793bb81c (patch) | |
| tree | 814e95e1b0422362066fef389e8b03741e8478ad /src/runtime/python | |
| parent | 20e4970ec19949da10b86facd3c6a5a4abb03acb (diff) | |
a major redesign in the C runtime. The parser and the linearizer now fully support BIND. The following things are still broken: parseval, word completion, handling 'pre', the robust mode
Diffstat (limited to 'src/runtime/python')
| -rw-r--r-- | src/runtime/python/pypgf.c | 204 |
1 files changed, 37 insertions, 167 deletions
diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 056cdc924..d5d3e78b1 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1046,48 +1046,6 @@ Concr_printName(ConcrObject* self, PyObject *args) return PyString_FromString(pgf_print_name(self->concr, name)); } -typedef struct { - PgfLexer base; - PyObject* pylexer; - GuPool* pool; -} PgfPythonLexer; - -GU_DEFINE_TYPE(PyPgfLexerExn, abstract, _); - -static PgfToken -pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err) -{ - PgfPythonLexer* lexer = (PgfPythonLexer*) base; - lexer->base.tok = ""; - - PyObject* item = PyIter_Next(lexer->pylexer); - if (item == NULL) - if (PyErr_Occurred() != NULL) - gu_raise(err, PyPgfLexerExn); - else - gu_raise(err, GuEOF); - else { - const char* str = PyString_AsString(item); - if (str == NULL) - gu_raise(err, PyPgfLexerExn); - else - lexer->base.tok = gu_string_copy(str, lexer->pool); - } - - return lexer->base.tok; -} - -static PgfLexer* -pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool) -{ - PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool); - lexer->base.read_token = pypgf_python_lexer_read_token; - lexer->base.tok = ""; - lexer->pylexer = pylexer; - lexer->pool = pool; - return ((PgfLexer*) lexer); -} - #if ( (PY_VERSION_HEX < 0x02070000) \ || ((PY_VERSION_HEX >= 0x03000000) \ && (PY_VERSION_HEX < 0x03010000)) ) @@ -1114,35 +1072,19 @@ void pypgf_container_descructor(PyObject *capsule) static IterObject* Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) { - static char *kwlist[] = {"sentence", "tokens", "cat", "n", "heuristics", NULL}; + static char *kwlist[] = {"sentence", "cat", "n", "heuristics", NULL}; - int len; - const uint8_t *buf = NULL; - PyObject* py_lexer = NULL; + const char *sentence = NULL; PgfCId catname = pgf_start_cat(self->grammar->pgf); int max_count = -1; double heuristics = -1; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist, - &buf, &len, &py_lexer, &catname, &max_count, &heuristics)) + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|sid", kwlist, + &sentence, &catname, &max_count, &heuristics)) return NULL; - if ((buf == NULL && py_lexer == NULL) || - (buf != NULL && py_lexer != NULL)) { - PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided"); - return NULL; - } - - if (py_lexer != NULL) { - // get an iterator out of the iterable object - py_lexer = PyObject_GetIter(py_lexer); - if (py_lexer == NULL) - return NULL; - } - IterObject* pyres = (IterObject*) pgf_IterType.tp_alloc(&pgf_IterType, 0); if (pyres == NULL) { - Py_XDECREF(py_lexer); return NULL; } @@ -1160,30 +1102,22 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) pyres->counter = 0; pyres->fetch = Iter_fetch_expr; - PgfLexer *lexer = NULL; - if (buf != NULL) { - GuIn* in = gu_data_in(buf, len, pyres->pool); - lexer = pgf_new_simple_lexer(in, pyres->pool); - } - if (py_lexer != NULL) { - lexer = pypgf_new_python_lexer(py_lexer, pyres->pool); - } + GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pyres->pool); pyres->res = - pgf_parse_with_heuristics(self->concr, catname, lexer, - heuristics, pyres->pool, out_pool); - - if (pyres->res == NULL) { - PgfToken tok = - pgf_lexer_current_token(lexer); - - if (*tok == 0) - PyErr_SetString(PGFError, "The sentence cannot be parsed"); - else { + pgf_parse_with_heuristics(self->concr, catname, sentence, + heuristics, parse_err, + pyres->pool, out_pool); + + if (!gu_ok(parse_err)) { + if (gu_exn_caught(parse_err) == gu_type(PgfExn)) { + GuString msg = (GuString) gu_exn_caught_data(parse_err); + PyErr_SetString(PGFError, msg); + } else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) { + GuString tok = (GuString) gu_exn_caught_data(parse_err); PyObject* py_tok = PyString_FromString(tok); PyObject_SetAttrString(ParseError, "token", py_tok); - PyErr_Format(ParseError, "Unexpected token: \"%s\"", - PyString_AsString(py_tok)); + PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok); Py_DECREF(py_tok); } @@ -1191,45 +1125,26 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) pyres = NULL; } - Py_XDECREF(py_lexer); - return pyres; } static IterObject* Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds) { - static char *kwlist[] = {"sentence", "tokens", "cat", - "prefix", "n", NULL}; + static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL}; - int len; - const uint8_t *buf = NULL; - PyObject* py_lexer = NULL; + const char *sentence = NULL; GuString catname = pgf_start_cat(self->grammar->pgf); GuString prefix = ""; int max_count = -1; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist, - &buf, &len, &py_lexer, &catname, + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist, + &sentence, &catname, &prefix, &max_count)) return NULL; - if ((buf == NULL && py_lexer == NULL) || - (buf != NULL && py_lexer != NULL)) { - PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided"); - return NULL; - } - - if (py_lexer != NULL) { - // get an iterator out of the iterable object - py_lexer = PyObject_GetIter(py_lexer); - if (py_lexer == NULL) - return NULL; - } - IterObject* pyres = (IterObject*) pgf_IterType.tp_alloc(&pgf_IterType, 0); if (pyres == NULL) { - Py_XDECREF(py_lexer); return NULL; } @@ -1245,37 +1160,27 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds) GuPool *tmp_pool = gu_local_pool(); - PgfLexer *lexer = NULL; - if (buf != NULL) { - GuIn* in = gu_data_in(buf, len, tmp_pool); - lexer = pgf_new_simple_lexer(in, tmp_pool); - } - if (py_lexer != NULL) { - lexer = pypgf_new_python_lexer(py_lexer, tmp_pool); - } - + GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), tmp_pool); + pyres->res = - pgf_complete(self->concr, catname, lexer, prefix, pyres->pool); + pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool); - if (pyres->res == NULL) { + if (!gu_ok(parse_err)) { Py_DECREF(pyres); pyres = NULL; - PgfToken tok = - pgf_lexer_current_token(lexer); - - if (*tok == 0) - PyErr_SetString(PGFError, "The sentence cannot be parsed"); - else { + if (gu_exn_caught(parse_err) == gu_type(PgfExn)) { + GuString msg = (GuString) gu_exn_caught_data(parse_err); + PyErr_SetString(PGFError, msg); + } else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) { + GuString tok = (GuString) gu_exn_caught_data(parse_err); PyObject* py_tok = PyString_FromString(tok); PyObject_SetAttrString(ParseError, "token", py_tok); - PyErr_Format(ParseError, "Unexpected token: \"%s\"", - PyString_AsString(py_tok)); + PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok); Py_DECREF(py_tok); } } - Py_XDECREF(py_lexer); gu_pool_free(tmp_pool); return pyres; @@ -1671,56 +1576,21 @@ pypgf_collect_morpho(PgfMorphoCallback* self, } static PyObject* -Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) { - static char *kwlist[] = {"sentence", "tokens", NULL}; - - int len; - const uint8_t *buf = NULL; - PyObject* py_lexer = NULL; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist, - &buf, &len, &py_lexer)) +Concr_lookupMorpho(ConcrObject* self, PyObject *args) { + GuString sent; + if (!PyArg_ParseTuple(args, "s", &sent)) return NULL; - if ((buf == NULL && py_lexer == NULL) || - (buf != NULL && py_lexer != NULL)) { - PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided"); - return NULL; - } - - GuPool* tmp_pool = gu_local_pool(); - - PgfLexer *lexer = NULL; - if (buf != NULL) { - GuIn* in = gu_data_in(buf, len, tmp_pool); - lexer = pgf_new_simple_lexer(in, tmp_pool); - } - if (py_lexer != NULL) { - // get an iterator out of the iterable object - py_lexer = PyObject_GetIter(py_lexer); - if (py_lexer == NULL) { - gu_pool_free(tmp_pool); - return NULL; - } - - lexer = pypgf_new_python_lexer(py_lexer, tmp_pool); - } - - GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); + GuPool *tmp_pool = gu_local_pool(); + GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); PyObject* analyses = PyList_New(0); PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses }; - pgf_lookup_morpho(self->concr, lexer, &callback.fn, err); - - Py_XDECREF(py_lexer); + pgf_lookup_morpho(self->concr, sent, &callback.fn, err); gu_pool_free(tmp_pool); - if (!gu_ok(err)) { - Py_DECREF(analyses); - return NULL; - } - return analyses; } @@ -1833,7 +1703,7 @@ static PyMethodDef Concr_methods[] = { {"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS, "Renders an abstract syntax tree as a parse tree in Graphviz format" }, - {"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS, + {"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS, "Looks up a word in the lexicon of the grammar" }, {"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS, |
