summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2012-12-14 15:32:49 +0000
committerkr.angelov <kr.angelov@gmail.com>2012-12-14 15:32:49 +0000
commit20aaa4a9899ce454d3c20594a2b7d2d3d38dbc88 (patch)
tree0f2556c655a29e08cb961b6aa56f0c6d51aa51b6
parent9ab3a6034d017f87bfc62ac2fc9351e514841a24 (diff)
The first prototype for exhaustive generation in the C runtime. The trees are always listed in decreasing probability order. There is also an API for generation from Python
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs4
-rw-r--r--src/runtime/c/Makefile.am1
-rw-r--r--src/runtime/c/pgf/parser.h5
-rw-r--r--src/runtime/c/pgf/pgf.h8
-rw-r--r--src/runtime/c/pgf/reasoner.c163
-rw-r--r--src/runtime/python/pypgf.c108
-rw-r--r--src/runtime/python/test.py4
7 files changed, 255 insertions, 38 deletions
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index f9b008bf8..6fc572fc9 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -25,7 +25,6 @@ import GF.Infra.Option
import GF.Data.Operations
import Data.List
-import Data.Function
import Data.Char (isDigit,isSpace)
import qualified Data.Set as Set
import qualified Data.Map as Map
@@ -63,8 +62,7 @@ mkCanon2pgf opts gr am = do
((m,c),AbsCat (Just (L _ cont)),addr) <- adefs]
catfuns cat =
- (map (\x -> (0,snd x)) . sortBy (compare `on` fst))
- [(loc,i2i f) | ((m,f),AbsFun (Just (L loc ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
+ [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
mkConcr cm = do
let cflags = concatOptions [mflags mo | (i,mo) <- modules gr, isModCnc mo,
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am
index f6e8a263a..cfb7461df 100644
--- a/src/runtime/c/Makefile.am
+++ b/src/runtime/c/Makefile.am
@@ -111,6 +111,7 @@ libpgf_la_SOURCES = \
pgf/reader.h \
pgf/reader.c \
pgf/linearize.c \
+ pgf/reasoner.c \
pgf/printer.c \
pgf/pgf.c \
pgf/pgf.h
diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h
index dcc3ca3af..6952d33ab 100644
--- a/src/runtime/c/pgf/parser.h
+++ b/src/runtime/c/pgf/parser.h
@@ -82,11 +82,6 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
* @{
*/
-
-/// An enumeration of #PgfExpr elements.
-typedef GuEnum PgfExprEnum;
-
-
/// Retrieve the current parses from the parse state.
PgfExprEnum*
pgf_parse_result(PgfParseState* state, GuPool* pool);
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 9963534b5..1f3947bff 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -54,6 +54,9 @@ extern GU_DECLARE_TYPE(PgfConcr, struct);
#include <pgf/expr.h>
#include <pgf/lexer.h>
+/// An enumeration of #PgfExpr elements.
+typedef GuEnum PgfExprEnum;
+
PgfPGF*
pgf_read(const char* fpath,
GuPool* pool, GuExn* err);
@@ -109,9 +112,12 @@ pgf_print_name(PgfConcr*, PgfCId id);
void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
-GuEnum*
+PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
+PgfExprEnum*
+pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
+
// an experimental function. Please don't use it
void
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
diff --git a/src/runtime/c/pgf/reasoner.c b/src/runtime/c/pgf/reasoner.c
new file mode 100644
index 000000000..9a15bded7
--- /dev/null
+++ b/src/runtime/c/pgf/reasoner.c
@@ -0,0 +1,163 @@
+#include <pgf/pgf.h>
+#include <pgf/data.h>
+#include <math.h>
+#include <stdio.h>
+
+typedef struct PgfExprState PgfExprState;
+
+struct PgfExprState {
+ PgfExprState* cont;
+ PgfExpr expr;
+ PgfHypos hypos;
+ size_t arg_idx;
+};
+
+typedef struct {
+ PgfExprState *st;
+ prob_t cont_prob;
+ size_t fun_idx;
+ PgfCat* abscat;
+} PgfExprQState;
+
+typedef struct {
+ GuPool* tmp_pool;
+ PgfAbstr* abstract;
+ GuBuf* pqueue;
+ PgfExprEnum en;
+} PgfReasoner;
+
+static int
+cmp_expr_qstate(GuOrder* self, const void* a, const void* b)
+{
+ PgfExprQState *q1 = (PgfExprQState *) a;
+ PgfExprQState *q2 = (PgfExprQState *) b;
+
+ prob_t prob1 = q1->cont_prob-log(q1->abscat->functions[q1->fun_idx].prob);
+ prob_t prob2 = q2->cont_prob-log(q2->abscat->functions[q2->fun_idx].prob);
+
+ if (prob1 < prob2)
+ return -1;
+ else if (prob1 > prob2)
+ return 1;
+ else
+ return 0;
+}
+
+static GuOrder
+pgf_expr_qstate_order = { cmp_expr_qstate };
+
+static bool
+pgf_reasoner_cat_init(PgfReasoner* rs,
+ PgfExprState* cont, prob_t cont_prob, PgfCId cat,
+ GuPool* pool)
+{
+ // Checking for loops in the chart
+ if (cont != NULL) {
+ PgfExprState* st = cont->cont;
+ while (st != NULL) {
+ PgfHypo* hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
+ if (gu_string_eq(hypo->type->cid, cat))
+ return false;
+
+ st = st->cont;
+ }
+ }
+
+ PgfCat* abscat = gu_map_get(rs->abstract->cats, &cat, PgfCat*);
+ if (abscat == NULL) {
+ return false;
+ }
+
+ PgfExprQState q = {cont, cont_prob, 0, abscat};
+ gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
+ return true;
+}
+
+static PgfExprProb*
+pgf_reasoner_next(PgfReasoner* rs, GuPool* pool)
+{
+ if (rs->pqueue == NULL)
+ return NULL;
+
+ while (gu_buf_length(rs->pqueue) > 0) {
+ PgfExprQState q;
+ gu_buf_heap_pop(rs->pqueue, &pgf_expr_qstate_order, &q);
+
+ PgfCId fun = q.abscat->functions[q.fun_idx++].fun;
+ PgfFunDecl* absfun =
+ gu_map_get(rs->abstract->funs, &fun, PgfFunDecl*);
+
+ if (q.fun_idx < q.abscat->n_functions) {
+ gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
+ }
+
+ if (absfun == NULL)
+ continue;
+
+ PgfExprState *st = gu_new(PgfExprState, rs->tmp_pool);
+ st->cont = q.st;
+ st->expr =
+ gu_new_variant_i(pool, PGF_EXPR_FUN,
+ PgfExprFun,
+ .fun = fun);
+ st->hypos = absfun->type->hypos;
+ st->arg_idx = 0;
+
+ for (;;) {
+ prob_t prob = q.cont_prob+absfun->ep.prob;
+
+ if (st->arg_idx < gu_seq_length(st->hypos)) {
+ PgfHypo *hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
+ pgf_reasoner_cat_init(rs, st, prob,
+ hypo->type->cid, pool);
+ break;
+ } else {
+ PgfExprState* cont = st->cont;
+ if (cont == NULL) {
+ PgfExprProb* ep = gu_new(PgfExprProb, pool);
+ ep->expr = st->expr;
+ ep->prob = prob;
+ return ep;
+ }
+
+ st->cont = cont->cont;
+ st->expr =
+ gu_new_variant_i(pool, PGF_EXPR_APP,
+ PgfExprApp,
+ .fun = cont->expr, .arg = st->expr);
+ st->hypos = cont->hypos;
+ st->arg_idx = cont->arg_idx+1;
+ }
+ }
+ }
+
+ gu_pool_free(rs->tmp_pool);
+ rs->tmp_pool = NULL;
+ rs->pqueue = NULL;
+ return NULL;
+}
+
+static void
+pgf_reasoner_enum_next(GuEnum* self, void* to, GuPool* pool)
+{
+ PgfReasoner* pr = gu_container(self, PgfReasoner, en);
+ *(PgfExprProb**)to = pgf_reasoner_next(pr, pool);
+}
+
+PgfExprEnum*
+pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool)
+{
+ GuPool* tmp_pool = gu_new_pool();
+ GuBuf* pqueue = gu_new_buf(PgfExprQState, tmp_pool);
+
+ PgfReasoner* rs =
+ gu_new_i(pool, PgfReasoner,
+ .tmp_pool = tmp_pool,
+ .abstract = &pgf->abstract,
+ .pqueue = pqueue,
+ .en.next = pgf_reasoner_enum_next);
+
+ pgf_reasoner_cat_init(rs, NULL, 0, cat, pool);
+
+ return &rs->en;
+}
diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c
index 65fa4f561..0941b9a1a 100644
--- a/src/runtime/python/pypgf.c
+++ b/src/runtime/python/pypgf.c
@@ -138,15 +138,19 @@ static PyTypeObject pgf_ExprType = {
typedef struct {
PyObject_HEAD
GuPool* pool;
+ int max_count;
+ int counter;
GuEnum* res;
-} ParseResultObject;
+} ExprIterObject;
-static ParseResultObject*
-ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+static ExprIterObject*
+ExprIter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- ParseResultObject* self = (ParseResultObject *)type->tp_alloc(type, 0);
+ ExprIterObject* self = (ExprIterObject *)type->tp_alloc(type, 0);
if (self != NULL) {
self->pool = NULL;
+ self->max_count = -1;
+ self->counter = 0;
self->res = NULL;
}
@@ -154,7 +158,7 @@ ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
}
static void
-ParseResult_dealloc(ParseResultObject* self)
+ExprIter_dealloc(ExprIterObject* self)
{
if (self->pool != NULL)
gu_pool_free(self->pool);
@@ -163,21 +167,26 @@ ParseResult_dealloc(ParseResultObject* self)
}
static int
-ParseResult_init(ParseResultObject *self, PyObject *args, PyObject *kwds)
+ExprIter_init(ExprIterObject *self, PyObject *args, PyObject *kwds)
{
return -1;
}
static PyObject*
-ParseResult_iter(ParseResultObject *self)
+ExprIter_iter(ExprIterObject *self)
{
Py_INCREF(self);
return (PyObject*) self;
}
-static ExprObject*
-ParseResult_iternext(ParseResultObject *self)
+static PyObject*
+ExprIter_iternext(ExprIterObject *self)
{
+ if (self->max_count > 0 && self->counter >= self->max_count) {
+ return NULL;
+ }
+ self->counter++;
+
PgfExprProb* ep = gu_next(self->res, PgfExprProb*, self->pool);
if (ep == NULL)
return NULL;
@@ -190,20 +199,23 @@ ParseResult_iternext(ParseResultObject *self)
pyexpr->master = (PyObject*) self;
Py_INCREF(self);
- return pyexpr;
+ PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
+ Py_DECREF(pyexpr);
+
+ return res;
}
-static PyMethodDef ParseResult_methods[] = {
+static PyMethodDef ExprIter_methods[] = {
{NULL} /* Sentinel */
};
-static PyTypeObject pgf_ParseResultType = {
+static PyTypeObject pgf_ExprIterType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
- "pgf.ParseResult", /*tp_name*/
- sizeof(ParseResultObject), /*tp_basicsize*/
+ "pgf.ExprIter", /*tp_name*/
+ sizeof(ExprIterObject), /*tp_basicsize*/
0, /*tp_itemsize*/
- (destructor)ParseResult_dealloc, /*tp_dealloc*/
+ (destructor)ExprIter_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
@@ -219,14 +231,14 @@ static PyTypeObject pgf_ParseResultType = {
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
- "parsing result", /*tp_doc*/
+ "an iterator over a sequence of expressions",/*tp_doc*/
0, /*tp_traverse */
0, /*tp_clear */
0, /*tp_richcompare */
0, /*tp_weaklistoffset */
- (getiterfunc) ParseResult_iter, /*tp_iter */
- (iternextfunc) ParseResult_iternext, /*tp_iternext */
- ParseResult_methods, /*tp_methods */
+ (getiterfunc) ExprIter_iter, /*tp_iter */
+ (iternextfunc) ExprIter_iternext, /*tp_iternext */
+ ExprIter_methods, /*tp_methods */
0, /*tp_members */
0, /*tp_getset */
0, /*tp_base */
@@ -234,9 +246,9 @@ static PyTypeObject pgf_ParseResultType = {
0, /*tp_descr_get */
0, /*tp_descr_set */
0, /*tp_dictoffset */
- (initproc)ParseResult_init,/*tp_init */
+ (initproc)ExprIter_init, /*tp_init */
0, /*tp_alloc */
- (newfunc) ParseResult_new, /*tp_new */
+ (newfunc) ExprIter_new, /*tp_new */
};
typedef struct {
@@ -285,7 +297,7 @@ Concr_printName(ConcrObject* self, PyObject *args)
return pyname;
}
-static ParseResultObject*
+static ExprIterObject*
Concr_parse(ConcrObject* self, PyObject *args)
{
size_t len;
@@ -294,13 +306,15 @@ Concr_parse(ConcrObject* self, PyObject *args)
if (!PyArg_ParseTuple(args, "ss#", &catname_s, &buf, &len))
return NULL;
- ParseResultObject* pyres = (ParseResultObject*)
- pgf_ExprType.tp_alloc(&pgf_ParseResultType, 0);
+ ExprIterObject* pyres = (ExprIterObject*)
+ pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
if (pyres == NULL) {
return NULL;
}
pyres->pool = gu_new_pool();
+ pyres->max_count = -1;
+ pyres->counter = 0;
GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool);
@@ -658,6 +672,43 @@ PGF_functionsByCat(PGFObject* self, PyObject *args)
return functions;
}
+static ExprIterObject*
+PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
+{
+ static char *kwlist[] = {"cat", "n", NULL};
+
+ const char *catname_s;
+ int max_count = -1;
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|i", kwlist,
+ &catname_s, &max_count))
+ return NULL;
+
+ ExprIterObject* pyres = (ExprIterObject*)
+ pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
+ if (pyres == NULL) {
+ return NULL;
+ }
+
+ pyres->pool = gu_new_pool();
+ pyres->max_count = max_count;
+ pyres->counter = 0;
+
+ GuPool *tmp_pool = gu_local_pool();
+ GuString catname = gu_str_string(catname_s, tmp_pool);
+
+ pyres->res =
+ pgf_generate(self->pgf, catname, pyres->pool);
+ if (pyres->res == NULL) {
+ Py_DECREF(pyres);
+ gu_pool_free(tmp_pool);
+ return NULL;
+ }
+
+ gu_pool_free(tmp_pool);
+
+ return pyres;
+}
+
static PyGetSetDef PGF_getseters[] = {
{"abstractName",
(getter)PGF_getAbstractName, NULL,
@@ -688,7 +739,10 @@ static PyMemberDef PGF_members[] = {
static PyMethodDef PGF_methods[] = {
{"functionsByCat", (PyCFunction)PGF_functionsByCat, METH_VARARGS,
- "Return the list of functions for a given category"
+ "Returns the list of functions for a given category"
+ },
+ {"generate", (PyCFunction)PGF_generate, METH_VARARGS | METH_KEYWORDS,
+ "Generates abstract syntax trees of given category in decreasing probability order"
},
{NULL} /* Sentinel */
};
@@ -816,7 +870,7 @@ initpgf(void)
if (PyType_Ready(&pgf_ExprType) < 0)
return;
- if (PyType_Ready(&pgf_ParseResultType) < 0)
+ if (PyType_Ready(&pgf_ExprIterType) < 0)
return;
m = Py_InitModule("pgf", module_methods);
@@ -836,5 +890,5 @@ initpgf(void)
Py_INCREF(&pgf_PGFType);
Py_INCREF(&pgf_ConcrType);
Py_INCREF(&pgf_ExprType);
- Py_INCREF(&pgf_ParseResultType);
+ Py_INCREF(&pgf_ExprIterType);
}
diff --git a/src/runtime/python/test.py b/src/runtime/python/test.py
index cb0ba5452..ae0427d60 100644
--- a/src/runtime/python/test.py
+++ b/src/runtime/python/test.py
@@ -19,8 +19,8 @@ while True:
break;
try:
- for e in gr.languages["ParseEng"].parse(gr.startCat,line):
- print e
+ for (p,e) in gr.languages["ParseEng"].parse(gr.startCat,line):
+ sys.stdout.write("["+str(p)+"] "+str(e)+"\n")
print gr.languages["ParseEngBul"].linearize(e)
except pgf.ParseError as e:
print e.message