diff options
| author | peter.ljunglof <peter.ljunglof@gu.se> | 2013-01-08 10:19:53 +0000 |
|---|---|---|
| committer | peter.ljunglof <peter.ljunglof@gu.se> | 2013-01-08 10:19:53 +0000 |
| commit | 599b51830fc2ef49b6b55f311a7df84bdfee6090 (patch) | |
| tree | e075798676e18a680afab41490354c3aa3020fc7 /src/runtime/c/utils | |
| parent | 9b78da535700f561952d0b6498d84b187e9a1791 (diff) | |
New PGF utility: pgf-parse
Diffstat (limited to 'src/runtime/c/utils')
| -rw-r--r-- | src/runtime/c/utils/pgf-parse.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/src/runtime/c/utils/pgf-parse.c b/src/runtime/c/utils/pgf-parse.c new file mode 100644 index 000000000..654f9d2e7 --- /dev/null +++ b/src/runtime/c/utils/pgf-parse.c @@ -0,0 +1,153 @@ +#include <gu/variant.h> +#include <gu/map.h> +#include <gu/dump.h> +#include <gu/log.h> +#include <gu/enum.h> +#include <gu/file.h> +#include <pgf/pgf.h> +#include <pgf/data.h> +#include <pgf/parser.h> +#include <pgf/lexer.h> +#include <pgf/literals.h> +#include <pgf/linearizer.h> +#include <pgf/expr.h> +#include <pgf/edsl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <locale.h> +#include <time.h> + +int main(int argc, char* argv[]) { + // Set the character locale, so we can produce proper output. + setlocale(LC_CTYPE, ""); + + // Create the pool that is used to allocate everything + GuPool* pool = gu_new_pool(); + int status = EXIT_SUCCESS; + if (argc != 4) { + fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang\n", argv[0]); + status = EXIT_FAILURE; + goto fail; + } + char* filename = argv[1]; + GuString cat = gu_str_string(argv[2], pool); + GuString lang = gu_str_string(argv[3], pool); + + // Create an exception frame that catches all errors. + GuExn* err = gu_new_exn(NULL, gu_kind(type), pool); + + + clock_t start = clock(); + + // Read the PGF grammar. + PgfPGF* pgf = pgf_read(filename, pool, err); + + // If an error occured, it shows in the exception frame + if (!gu_ok(err)) { + fprintf(stderr, "Reading PGF failed\n"); + status = EXIT_FAILURE; + goto fail; + } + + // Look up the source and destination concrete categories + PgfConcr* concr = pgf_get_language(pgf, lang); + if (!concr) { + fprintf(stderr, "Unknown language\n"); + status = EXIT_FAILURE; + goto fail; + } + + /* // Register a callback for the literal category Symbol */ + /* pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool), */ + /* &pgf_nerc_literal_callback); */ + + clock_t end = clock(); + double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + + fprintf(stderr, "(%.0f ms) Ready to parse!\n", 1000.0 * cpu_time_used); + + // Create an output stream for stdout + GuOut* out = gu_file_out(stdout, pool); + + // Locale-encoding writers are currently unsupported + // GuWriter* wtr = gu_locale_writer(out, pool); + // Use a writer with hard-coded utf-8 encoding for now. + GuWriter* wtr = gu_new_utf8_writer(out, pool); + + // We will keep the latest results in the 'ppool' and + // we will iterate over them by using 'result'. + GuPool* ppool = NULL; + GuEnum* result = NULL; + + // The interactive PARSING loop. + // XXX: This currently reads stdin directly, so it doesn't support + // encodings properly. TODO: use a locale reader for input + for (int ctr = 0; true; ctr++) { + /* fprintf(stdout, "> "); */ + /* fflush(stdout); */ + char buf[4096]; + char* line = fgets(buf, sizeof(buf), stdin); + if (line == NULL) { + if (ferror(stdin)) { + fprintf(stderr, "Input error\n"); + status = EXIT_FAILURE; + } + break; + } else if (strcmp(line, "") == 0) { + // End nicely on empty input + break; + } else if (strcmp(line, "\n") == 0) { + // Empty line -> skip + continue; + } + + // We release the last results + if (ppool != NULL) { + gu_pool_free(ppool); + ppool = NULL; + result = NULL; + } + + // We create a temporary pool for translating a single + // sentence, so our memory usage doesn't increase over time. + ppool = gu_new_pool(); + + clock_t start = clock(); + + // Begin parsing a sentence of the specified category + PgfParseState* state = + pgf_parser_init_state(concr, cat, 0, ppool); + if (state == NULL) { + fprintf(stderr, "Couldn't begin parsing\n"); + status = EXIT_FAILURE; + break; + } + + GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool); + PgfLexer *lexer = pgf_new_lexer(rdr, ppool); + GuEnum* result = pgf_parse(concr, cat, lexer, ppool); + + PgfExprProb* ep = NULL; + if (result != NULL) + ep = gu_next(result, PgfExprProb*, ppool); + + clock_t end = clock(); + double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + + gu_printf(wtr, err, "%d (%.0f ms): ", ctr, 1000.0 * cpu_time_used); + if (ep != NULL) { + gu_printf(wtr, err, "[%.4f] (", ep->prob); + pgf_print_expr(ep->expr, 0, wtr, err); + gu_printf(wtr, err, ")\n"); + } else { + gu_printf(wtr, err, "---\n"); + } + gu_writer_flush(wtr, err); + } + + fail: + gu_pool_free(pool); + return status; +} + |
