diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2013-06-26 07:36:03 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2013-06-26 07:36:03 +0000 |
| commit | 3c2d1890d0c5de25bbaa7c582c20bcd67bc47d8c (patch) | |
| tree | 9091c47c4693ab1ef14b98fc76726fec8c56c2f5 /src/runtime/c/utils | |
| parent | d94b6146f2074608fba07421dd6185ac3a296680 (diff) | |
patch for adjustable heuristics from Python
Diffstat (limited to 'src/runtime/c/utils')
| -rw-r--r-- | src/runtime/c/utils/pgf-chunk.c | 112 | ||||
| -rw-r--r-- | src/runtime/c/utils/pgf-parse.c | 22 |
2 files changed, 9 insertions, 125 deletions
diff --git a/src/runtime/c/utils/pgf-chunk.c b/src/runtime/c/utils/pgf-chunk.c deleted file mode 100644 index 5f4b8972a..000000000 --- a/src/runtime/c/utils/pgf-chunk.c +++ /dev/null @@ -1,112 +0,0 @@ -// Don't give too much hope to this script. It is doing the wrong thing -// but let's see how far we can get with it. - -#include <gu/variant.h> -#include <gu/map.h> -#include <gu/dump.h> -#include <gu/log.h> -#include <gu/enum.h> -#include <gu/file.h> -#include <pgf/pgf.h> -#include <pgf/parser.h> -#include <pgf/lexer.h> -#include <pgf/literals.h> -#include <pgf/linearizer.h> -#include <pgf/expr.h> -#include <pgf/edsl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <locale.h> -#include <time.h> - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - // Create the pool that is used to allocate everything - GuPool* pool = gu_new_pool(); - int status = EXIT_SUCCESS; - if (argc != 4) { - fprintf(stderr, "usage: %s pgf cat from_lang\n", argv[0]); - status = EXIT_FAILURE; - goto fail; - } - char* filename = argv[1]; - - GuString cat = gu_str_string(argv[2], pool); - - GuString from_lang = gu_str_string(argv[3], pool); - - // Create an exception frame that catches all errors. - GuExn* err = gu_new_exn(NULL, gu_kind(type), pool); - - // Read the PGF grammar. - PgfPGF* pgf = pgf_read(filename, pool, err); - - // If an error occured, it shows in the exception frame - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = EXIT_FAILURE; - goto fail; - } - - pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool, err); - if (!gu_ok(err)) { - fprintf(stderr, "Loading meta child probs failed\n"); - status = EXIT_FAILURE; - goto fail; - } - - // Look up the source and destination concrete categories - PgfConcr* from_concr = pgf_get_language(pgf, from_lang); - if (!from_concr) { - fprintf(stderr, "Unknown language\n"); - status = EXIT_FAILURE; - goto fail_concr; - } - - // Register a callback for the literal category Symbol - pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool), - &pgf_nerc_literal_callback); - - // We will keep the latest results in the 'ppool' and - // we will iterate over them by using 'result'. - GuPool* ppool = NULL; - - // The interactive translation loop. - // XXX: This currently reads stdin directly, so it doesn't support - // encodings properly. TODO: use a locale reader for input - while (true) { - char buf[4096]; - char* line = fgets(buf, sizeof(buf), stdin); - if (line == NULL) { - if (ferror(stdin)) { - fprintf(stderr, "Input error\n"); - status = EXIT_FAILURE; - } - break; - } else if (strcmp(line, "") == 0) { - // End nicely on empty input - break; - } - - // We create a temporary pool for translating a single - // sentence, so our memory usage doesn't increase over time. - ppool = gu_new_pool(); - - GuReader *rdr = - gu_string_reader(gu_str_string(line, ppool), ppool); - PgfLexer *lexer = - pgf_new_simple_lexer(rdr, ppool); - - pgf_print_chunks(from_concr, cat, lexer, ppool); - - // Free all resources allocated during parsing and linearization - gu_pool_free(ppool); - } -fail_concr: -fail: - gu_pool_free(pool); - return status; -} diff --git a/src/runtime/c/utils/pgf-parse.c b/src/runtime/c/utils/pgf-parse.c index a05d7988b..ba1088890 100644 --- a/src/runtime/c/utils/pgf-parse.c +++ b/src/runtime/c/utils/pgf-parse.c @@ -25,8 +25,8 @@ int main(int argc, char* argv[]) { // Create the pool that is used to allocate everything GuPool* pool = gu_new_pool(); int status = EXIT_SUCCESS; - if (argc != 4) { - fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang\n", argv[0]); + if (argc < 4 || argc > 5) { + fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang [heuristics]\n(0.0 <= heuristics < 1.0, default: 0.95)\n", argv[0]); status = EXIT_FAILURE; goto fail; } @@ -34,6 +34,11 @@ int main(int argc, char* argv[]) { GuString cat = gu_str_string(argv[2], pool); GuString lang = gu_str_string(argv[3], pool); + double heuristics = 0.95; + if (argc == 5) { + heuristics = atof(argv[4]); + } + // Create an exception frame that catches all errors. GuExn* err = gu_new_exn(NULL, gu_kind(type), pool); @@ -65,7 +70,7 @@ int main(int argc, char* argv[]) { clock_t end = clock(); double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - fprintf(stderr, "(%.0f ms) Ready to parse!\n", 1000.0 * cpu_time_used); + fprintf(stderr, "(%.0f ms) Ready to parse [heuristics=%.2f]!\n", 1000.0 * cpu_time_used, heuristics); // Create an output stream for stdout GuOut* out = gu_file_out(stdout, pool); @@ -113,18 +118,9 @@ int main(int argc, char* argv[]) { clock_t start = clock(); - // Begin parsing a sentence of the specified category - PgfParseState* state = - pgf_parser_init_state(concr, cat, 0, ppool, ppool); - if (state == NULL) { - fprintf(stderr, "Couldn't begin parsing\n"); - status = EXIT_FAILURE; - break; - } - GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool); PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool); - GuEnum* result = pgf_parse(concr, cat, lexer, ppool, ppool); + GuEnum* result = pgf_parse_with_heuristics(concr, cat, lexer, heuristics, ppool, ppool); PgfExprProb* ep = NULL; if (result != NULL) |
