summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/Makefile.am6
-rw-r--r--src/runtime/c/pgf/parser.c100
-rw-r--r--src/runtime/c/pgf/parser.h9
-rw-r--r--src/runtime/c/utils/pgf-chunk.c160
4 files changed, 267 insertions, 8 deletions
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am
index f30a909ee..3757dbbf4 100644
--- a/src/runtime/c/Makefile.am
+++ b/src/runtime/c/Makefile.am
@@ -116,7 +116,8 @@ libpgf_la_SOURCES = \
bin_PROGRAMS = \
utils/pgf2yaml \
utils/pgf-print \
- utils/pgf-translate
+ utils/pgf-translate \
+ utils/pgf-chunk
utils_pgf2yaml_SOURCES = utils/pgf2yaml.c
utils_pgf2yaml_LDADD = libpgf.la libgu.la
@@ -127,6 +128,9 @@ utils_pgf_print_LDADD = libpgf.la libgu.la
utils_pgf_translate_SOURCES = utils/pgf-translate.c
utils_pgf_translate_LDADD = libpgf.la libgu.la
+utils_pgf_chunk_SOURCES = utils/pgf-chunk.c
+utils_pgf_chunk_LDADD = libpgf.la libgu.la
+
AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2
ACLOCAL_AMFLAGS = -I m4
include doxygen.am
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index ad792ebb4..6159ab859 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -85,9 +85,7 @@ struct PgfParseState {
PgfItem* meta_item;
PgfContsMap* conts_map;
PgfGenCatMap* generated_cats;
-#ifdef PGF_PARSER_DEBUG
unsigned short offset;
-#endif
prob_t viterbi_prob;
@@ -1630,9 +1628,7 @@ pgf_new_parse_state(PgfParsing* ps,
state->meta_item = NULL;
state->generated_cats = gu_map_type_new(PgfGenCatMap, pool);
state->conts_map = gu_map_type_new(PgfContsMap, pool);
-#ifdef PGF_PARSER_DEBUG
state->offset = next ? next->offset+1 : 0;
-#endif
state->viterbi_prob = 0;
state->ps = ps;
state->ts = ts;
@@ -1884,6 +1880,102 @@ pgf_parse_result(PgfParseState* state, GuPool* pool)
return en;
}
+void
+pgf_parse_print_chunks(PgfParseState* state)
+{
+ if (state->ps->completed == NULL) {
+ while (state->ps->completed == NULL) {
+ if (!pgf_parsing_proceed(state))
+ break;
+ }
+ if (state->ps->completed == NULL)
+ return;
+ }
+
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stdout, tmp_pool);
+ GuWriter* wtr = gu_new_utf8_writer(out, tmp_pool);
+ GuExn* err = gu_exn(NULL, type, tmp_pool);
+
+ PgfCCat* completed = state->ps->completed;
+ if (gu_seq_length(completed->prods) == 0)
+ return;
+
+ size_t n_args = 0;
+ size_t arg_idx = 0;
+ PgfCCat* ccat = NULL;
+ PgfProductionMeta* pmeta = NULL;
+
+ PgfProduction prod = gu_seq_get(completed->prods, PgfProduction, 0);
+ GuVariantInfo pi = gu_variant_open(prod);
+ switch (pi.tag) {
+ case PGF_PRODUCTION_APPLY:
+ n_args = 1;
+ arg_idx = 0;
+ ccat = completed;
+ break;
+ case PGF_PRODUCTION_META:
+ pmeta = pi.data;
+ n_args = gu_seq_length(pmeta->args);
+ arg_idx = 0;
+ ccat = gu_seq_index(pmeta->args, PgfPArg, arg_idx)->ccat;
+ break;
+ }
+
+ PgfParseState* next = NULL;
+ while (state != NULL) {
+ PgfParseState* tmp = state->next;
+ state->next = next;
+ next = state;
+ state = tmp;
+ }
+
+ int offset = 0;
+
+ state = next;
+ next = NULL;
+ while (state != NULL) {
+ if (state->ts != NULL)
+ {
+ if (ccat != NULL &&
+ offset == ((ccat->conts->state != NULL) ? ccat->conts->state->offset : 0)) {
+ PgfCCat *ccat2 = ccat;
+ while (ccat2->conts != NULL) {
+ ccat2 = ccat2->conts->ccat;
+ }
+
+ gu_putc('(', wtr, err);
+ gu_string_write(ccat2->cnccat->abscat->name, wtr, err);
+ gu_putc(' ', wtr, err);
+ }
+
+ gu_string_write(state->ts->tok, wtr, err);
+ offset++;
+
+ if (ccat != NULL &&
+ ccat ==
+ gu_map_get(state->generated_cats, ccat->conts, PgfCCat*)) {
+ gu_putc(')', wtr, err);
+
+ arg_idx++;
+ ccat =
+ (arg_idx >= n_args) ?
+ NULL :
+ gu_seq_index(pmeta->args, PgfPArg, arg_idx)->ccat;
+ }
+
+ gu_putc(' ', wtr, err);
+ }
+
+ PgfParseState* tmp = state->next;
+ state->next = next;
+ next = state;
+ state = tmp;
+ }
+ gu_putc('\n', wtr, err);
+
+ gu_pool_free(tmp_pool);
+}
// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat
PgfParseState*
diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h
index 65997f601..dcc3ca3af 100644
--- a/src/runtime/c/pgf/parser.h
+++ b/src/runtime/c/pgf/parser.h
@@ -102,9 +102,12 @@ pgf_parse_result(PgfParseState* state, GuPool* pool);
* succesful, or ambiguously successful.
*/
-PgfExpr
-pgf_parse_best_result(PgfParseState* state, GuPool* pool);
-
+// Use this procedure only on your own risk.
+// It is dirty and it will probably be removed or replaced
+// with something else. Currently it is here only for experimental
+// purposes.
+void
+pgf_parse_print_chunks(PgfParseState* state);
size_t
pgf_item_lin_idx(PgfItem* item);
diff --git a/src/runtime/c/utils/pgf-chunk.c b/src/runtime/c/utils/pgf-chunk.c
new file mode 100644
index 000000000..c4d0d0b3f
--- /dev/null
+++ b/src/runtime/c/utils/pgf-chunk.c
@@ -0,0 +1,160 @@
+// Don't give too much hope to this script. It is doing the wrong thing
+// but let's see how far we can get with it.
+
+#include <gu/variant.h>
+#include <gu/map.h>
+#include <gu/dump.h>
+#include <gu/log.h>
+#include <gu/enum.h>
+#include <gu/file.h>
+#include <pgf/pgf.h>
+#include <pgf/data.h>
+#include <pgf/parser.h>
+#include <pgf/lexer.h>
+#include <pgf/literals.h>
+#include <pgf/linearize.h>
+#include <pgf/expr.h>
+#include <pgf/edsl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <time.h>
+
+int main(int argc, char* argv[]) {
+ // Set the character locale, so we can produce proper output.
+ setlocale(LC_CTYPE, "");
+
+ // Create the pool that is used to allocate everything
+ GuPool* pool = gu_new_pool();
+ int status = EXIT_SUCCESS;
+ if (argc != 4) {
+ fprintf(stderr, "usage: %s pgf cat from_lang\n", argv[0]);
+ status = EXIT_FAILURE;
+ goto fail;
+ }
+ char* filename = argv[1];
+
+ GuString cat = gu_str_string(argv[2], pool);
+
+ GuString from_lang = gu_str_string(argv[3], pool);
+
+ FILE* infile = fopen(filename, "r");
+ if (infile == NULL) {
+ fprintf(stderr, "couldn't open %s\n", filename);
+ status = EXIT_FAILURE;
+ goto fail;
+ }
+
+ // Create an input stream from the input file
+ GuIn* in = gu_file_in(infile, pool);
+
+ // Create an exception frame that catches all errors.
+ GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
+
+ // Read the PGF grammar.
+ PgfPGF* pgf = pgf_read(in, pool, err);
+
+ // If an error occured, it shows in the exception frame
+ if (!gu_ok(err)) {
+ fprintf(stderr, "Reading PGF failed\n");
+ status = EXIT_FAILURE;
+ goto fail_read;
+ }
+
+ if (!pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool)) {
+ fprintf(stderr, "Loading meta child probs failed\n");
+ status = EXIT_FAILURE;
+ goto fail_read;
+ }
+
+ // Look up the source and destination concrete categories
+ PgfConcr* from_concr =
+ gu_map_get(pgf->concretes, &from_lang, PgfConcr*);
+ if (!from_concr) {
+ fprintf(stderr, "Unknown language\n");
+ status = EXIT_FAILURE;
+ goto fail_concr;
+ }
+
+ // Register a callback for the literal category Symbol
+ pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
+ &pgf_nerc_literal_callback);
+
+ // Create an output stream for stdout
+ GuOut* out = gu_file_out(stdout, pool);
+
+ // Locale-encoding writers are currently unsupported
+ // GuWriter* wtr = gu_locale_writer(out, pool);
+ // Use a writer with hard-coded utf-8 encoding for now.
+ GuWriter* wtr = gu_new_utf8_writer(out, pool);
+
+ // We will keep the latest results in the 'ppool' and
+ // we will iterate over them by using 'result'.
+ GuPool* ppool = NULL;
+
+ // The interactive translation loop.
+ // XXX: This currently reads stdin directly, so it doesn't support
+ // encodings properly. TODO: use a locale reader for input
+ while (true) {
+ char buf[4096];
+ char* line = fgets(buf, sizeof(buf), stdin);
+ if (line == NULL) {
+ if (ferror(stdin)) {
+ fprintf(stderr, "Input error\n");
+ status = EXIT_FAILURE;
+ }
+ break;
+ } else if (strcmp(line, "") == 0) {
+ // End nicely on empty input
+ break;
+ }
+
+ // We create a temporary pool for translating a single
+ // sentence, so our memory usage doesn't increase over time.
+ ppool = gu_new_pool();
+
+ // Begin parsing a sentence of the specified category
+ PgfParseState* state =
+ pgf_parser_init_state(from_concr, cat, 0, ppool);
+ if (state == NULL) {
+ fprintf(stderr, "Couldn't begin parsing\n");
+ status = EXIT_FAILURE;
+ break;
+ }
+
+ GuReader *rdr =
+ gu_string_reader(gu_str_string(line, ppool), ppool);
+ PgfLexer *lexer =
+ pgf_new_lexer(rdr, ppool);
+
+ // Tokenization
+ GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
+ PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
+ while (!gu_exn_is_raised(lex_err)) {
+ // feed the token to get a new parse state
+ state = pgf_parser_next_state(state, tok, ppool);
+ if (!state) {
+ gu_puts("Unexpected token: \"", wtr, err);
+ gu_string_write(tok, wtr, err);
+ gu_puts("\"\n", wtr, err);
+ goto fail_parse;
+ }
+
+ tok = pgf_lexer_next_token(lexer, lex_err, ppool);
+ }
+
+ pgf_parse_print_chunks(state);
+ continue;
+ fail_parse:
+ // Free all resources allocated during parsing and linearization
+ gu_pool_free(ppool);
+ ppool = NULL;
+ }
+fail_concr:
+fail_read:
+ fclose(infile);
+fail:
+ gu_pool_free(pool);
+ return status;
+}