From 0aae4702edbd4889159e3772b72d0a4c10b7e57a Mon Sep 17 00:00:00 2001 From: "gregoire.detrez" Date: Thu, 24 Jan 2013 13:31:34 +0000 Subject: Python binding: add a parsing function that accepts directly a list of tokens. Is allows to define a tokenizer in python (or use an existing one, from nltk for instance.) --- src/runtime/c/pgf/pgf.c | 27 +++++++++++++++++++++++++++ src/runtime/c/pgf/pgf.h | 3 +++ 2 files changed, 30 insertions(+) (limited to 'src/runtime/c') diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index ccee4bf24..2b720f093 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -227,6 +227,33 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) return pgf_parse_result(state, pool); } +// Same as previous but accept a list of tokens as input instead of a +// lexer +GuEnum* +pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char **tokens, int len, GuPool* pool) +{ + // Begin parsing a sentence of the specified category + PgfParseState* state = + pgf_parser_init_state(concr, cat, 0, pool); + if (state == NULL) { + return NULL; + } + + // Parsing + PgfToken tok; + for (int i = 0; i < len; i++) { + tok = gu_str_string(tokens[i], pool); + + state = pgf_parser_next_state(state, tok, pool); + if (state == NULL) { + return NULL; + } + } + + // Now begin enumerating the resulting syntax trees + return pgf_parse_result(state, pool); +} + void pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) { diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 1f3947bff..afef6ec48 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -115,6 +115,9 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err); PgfExprEnum* pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); +PgfExprEnum* +pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char* tokens[], int len, GuPool* pool); + PgfExprEnum* pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool); -- cgit v1.2.3