diff options
Diffstat (limited to 'src/runtime/c')
| -rw-r--r-- | src/runtime/c/Makefile.am | 14 | ||||
| -rw-r--r-- | src/runtime/c/pgf/parser.c | 16 | ||||
| -rw-r--r-- | src/runtime/c/pgf/parseval.c | 4 | ||||
| -rw-r--r-- | src/runtime/c/pgf/pgf.c | 35 | ||||
| -rw-r--r-- | src/runtime/c/pgf/pgf.h | 16 | ||||
| -rw-r--r-- | src/runtime/c/pgf/reasoner.c | 6 | ||||
| -rw-r--r-- | src/runtime/c/utils/pgf-parse.c | 133 | ||||
| -rw-r--r-- | src/runtime/c/utils/pgf-print.c | 36 | ||||
| -rw-r--r-- | src/runtime/c/utils/pgf-translate.c | 203 |
9 files changed, 45 insertions, 418 deletions
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index af377ab62..a429f5f92 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -92,19 +92,7 @@ libsg_la_SOURCES = \ sg/sg.c libsg_la_LIBADD = libgu.la libpgf.la -bin_PROGRAMS = \ - utils/pgf-print \ - utils/pgf-translate \ - utils/pgf-parse - -utils_pgf_print_SOURCES = utils/pgf-print.c -utils_pgf_print_LDADD = libpgf.la libgu.la - -utils_pgf_translate_SOURCES = utils/pgf-translate.c -utils_pgf_translate_LDADD = libpgf.la libgu.la - -utils_pgf_parse_SOURCES = utils/pgf-parse.c -utils_pgf_parse_LDADD = libpgf.la libgu.la +bin_PROGRAMS = AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2 ACLOCAL_AMFLAGS = -I m4 diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 8843a5f37..fb2fbfc22 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -2106,16 +2106,16 @@ pgf_parsing_last_token(PgfParsing* ps, GuPool* pool) } GuEnum* -pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence, GuExn* err, GuPool* pool, GuPool* out_pool) { PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool); - return pgf_parse_with_heuristics(concr, cat, sentence, -1.0, callbacks, err, pool, out_pool); + return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool); } GuEnum* -pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence, double heuristics, PgfCallbacksMap* callbacks, GuExn* err, @@ -2132,7 +2132,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, // Begin parsing a sentence with the specified category PgfParsing* ps = - pgf_parsing_init(concr, cat, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool); + pgf_parsing_init(concr, typ->cid, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool); if (ps == NULL) { return NULL; } @@ -2159,7 +2159,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, } PgfExprEnum* -pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat, +pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ, GuString sentence, PgfOracleCallback* oracle, GuExn* err, @@ -2177,7 +2177,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat, // Begin parsing a sentence with the specified category PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool); PgfParsing* ps = - pgf_parsing_init(concr, cat, 0, sentence, -1, callbacks, oracle, err, pool, out_pool); + pgf_parsing_init(concr, typ->cid, 0, sentence, -1, callbacks, oracle, err, pool, out_pool); if (ps == NULL) { return NULL; } @@ -2223,7 +2223,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool) } GuEnum* -pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence, GuString prefix, GuExn *err, GuPool* pool) { if (concr->sequences == NULL || @@ -2239,7 +2239,7 @@ pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence, PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, pool); PgfParsing* ps = - pgf_parsing_init(concr, cat, 0, sentence, -1.0, callbacks, NULL, err, pool, pool); + pgf_parsing_init(concr, type->cid, 0, sentence, -1.0, callbacks, NULL, err, pool, pool); if (ps == NULL) { return NULL; } diff --git a/src/runtime/c/pgf/parseval.c b/src/runtime/c/pgf/parseval.c index cbea3d429..7ef41577d 100644 --- a/src/runtime/c/pgf/parseval.c +++ b/src/runtime/c/pgf/parseval.c @@ -129,7 +129,7 @@ static PgfLinFuncs pgf_metrics_lin_funcs2 = { }; bool -pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, +pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type, double *precision, double *recall, double *exact) { GuPool* pool = gu_new_pool(); @@ -174,7 +174,7 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, gu_string_buf_freeze(sbuf, pool); GuEnum* en_trees = - pgf_parse(concr, cat, sentence, + pgf_parse(concr, type, sentence, state.err, pool, pool); PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool); if (ep == NULL) { diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index 370b9411b..fe9c1d140 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -86,24 +86,35 @@ pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err) } } -PgfCId -pgf_start_cat(PgfPGF* pgf) +PgfType* +pgf_start_cat(PgfPGF* pgf, GuPool* pool) { PgfFlag* flag = gu_seq_binsearch(pgf->abstract.aflags, pgf_flag_order, PgfFlag, "startcat"); - if (flag == NULL) - return "S"; - - GuVariantInfo i = gu_variant_open(flag->value); - switch (i.tag) { - case PGF_LITERAL_STR: { - PgfLiteralStr *lstr = (PgfLiteralStr *) i.data; - return lstr->val; - } + if (flag != NULL) { + GuVariantInfo i = gu_variant_open(flag->value); + switch (i.tag) { + case PGF_LITERAL_STR: { + PgfLiteralStr *lstr = (PgfLiteralStr *) i.data; + + GuPool* tmp_pool = gu_local_pool(); + GuIn* in = gu_string_in(lstr->val,tmp_pool); + GuExn* err = gu_new_exn(tmp_pool); + PgfType *type = pgf_read_type(in, pool, err); + if (!gu_ok(err)) + break; + gu_pool_free(tmp_pool); + return type; + } + } } - return "S"; + PgfType* type = gu_new_flex(pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = "S"; + type->n_exprs = 0; + return type; } GuString diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 365d20d73..e5679a5e6 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -53,8 +53,8 @@ pgf_language_code(PgfConcr* concr); void pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err); -PgfCId -pgf_start_cat(PgfPGF* pgf); +PgfType* +pgf_start_cat(PgfPGF* pgf, GuPool* pool); void pgf_iter_functions(PgfPGF* pgf, GuMapItor* itor, GuExn* err); @@ -89,7 +89,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr, GuExn* err, GuPool* pool); bool -pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, +pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type, double *precision, double *recall, double *exact); PgfExpr @@ -97,11 +97,11 @@ pgf_compute(PgfPGF* pgf, PgfExpr expr, GuExn* err, GuPool* pool, GuPool* out_pool); PgfExprEnum* -pgf_generate_all(PgfPGF* pgf, PgfCId cat, +pgf_generate_all(PgfPGF* pgf, PgfType* ty, GuExn* err, GuPool* pool, GuPool* out_pool); PgfExprEnum* -pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence, GuExn* err, GuPool* pool, GuPool* out_pool); typedef struct PgfMorphoCallback PgfMorphoCallback; @@ -134,7 +134,7 @@ pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix, typedef GuMap PgfCallbacksMap; PgfExprEnum* -pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, +pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence, double heuristics, PgfCallbacksMap* callbacks, GuExn* err, @@ -159,7 +159,7 @@ struct PgfOracleCallback { }; PgfExprEnum* -pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat, +pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ, GuString sentence, PgfOracleCallback* oracle, GuExn* err, @@ -172,7 +172,7 @@ typedef struct { } PgfTokenProb; GuEnum* -pgf_complete(PgfConcr* concr, PgfCId cat, GuString string, +pgf_complete(PgfConcr* concr, PgfType* type, GuString string, GuString prefix, GuExn* err, GuPool* pool); typedef struct PgfLiteralCallback PgfLiteralCallback; diff --git a/src/runtime/c/pgf/reasoner.c b/src/runtime/c/pgf/reasoner.c index 75f7ee0c6..5d604a4cc 100644 --- a/src/runtime/c/pgf/reasoner.c +++ b/src/runtime/c/pgf/reasoner.c @@ -454,7 +454,7 @@ pgf_new_reasoner(PgfPGF* pgf, GuExn* err, GuPool* pool, GuPool* out_pool) } PgfExprEnum* -pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_pool) +pgf_generate_all(PgfPGF* pgf, PgfType* typ, GuExn* err, GuPool* pool, GuPool* out_pool) { PgfReasoner* rs = pgf_new_reasoner(pgf, err, pool, out_pool); @@ -462,9 +462,9 @@ pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_ answers->parents = gu_new_buf(PgfExprState*, rs->pool); answers->exprs = rs->exprs; answers->outside_prob = 0; - gu_map_put(rs->table, cat, PgfAnswers*, answers); + gu_map_put(rs->table, typ->cid, PgfAnswers*, answers); - PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, cat); + PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, typ->cid); if (abscat != NULL) { rs->start = gu_new(PgfClosure, rs->pool); rs->start->code = abscat->predicate; diff --git a/src/runtime/c/utils/pgf-parse.c b/src/runtime/c/utils/pgf-parse.c deleted file mode 100644 index 088fe409d..000000000 --- a/src/runtime/c/utils/pgf-parse.c +++ /dev/null @@ -1,133 +0,0 @@ -#include <gu/variant.h> -#include <gu/map.h> -#include <gu/enum.h> -#include <gu/file.h> -#include <pgf/pgf.h> -#include <pgf/data.h> -#include <pgf/literals.h> -#include <pgf/linearizer.h> -#include <pgf/expr.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <locale.h> -#include <time.h> - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - // Create the pool that is used to allocate everything - GuPool* pool = gu_new_pool(); - int status = EXIT_SUCCESS; - if (argc < 4 || argc > 5) { - fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang [heuristics]\n(0.0 <= heuristics < 1.0, default: 0.95)\n", argv[0]); - status = EXIT_FAILURE; - goto fail; - } - char* filename = argv[1]; - GuString cat = argv[2]; - GuString lang = argv[3]; - - double heuristics = 0.95; - if (argc == 5) { - heuristics = atof(argv[4]); - } - - // Create an exception frame that catches all errors. - GuExn* err = gu_new_exn(pool); - - - clock_t start = clock(); - - // Read the PGF grammar. - PgfPGF* pgf = pgf_read(filename, pool, err); - - // If an error occured, it shows in the exception frame - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = EXIT_FAILURE; - goto fail; - } - - // Look up the source and destination concrete categories - PgfConcr* concr = pgf_get_language(pgf, lang); - if (!concr) { - fprintf(stderr, "Unknown language\n"); - status = EXIT_FAILURE; - goto fail; - } - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - - fprintf(stderr, "(%.0f ms) Ready to parse [heuristics=%.2f]!\n", 1000.0 * cpu_time_used, heuristics); - - // Create an output stream for stdout - GuOut* out = gu_file_out(stdout, pool); - - // We will keep the latest results in the 'ppool' and - // we will iterate over them by using 'result'. - GuPool* ppool = NULL; - - // The interactive PARSING loop. - // XXX: This currently reads stdin directly, so it doesn't support - // encodings properly. TODO: use a locale reader for input - for (int ctr = 0; true; ctr++) { - // We release the last results - if (ppool != NULL) { - gu_pool_free(ppool); - ppool = NULL; - } - - /* fprintf(stdout, "> "); */ - /* fflush(stdout); */ - char buf[4096]; - char* line = fgets(buf, sizeof(buf), stdin); - if (line == NULL) { - if (ferror(stdin)) { - fprintf(stderr, "Input error\n"); - status = EXIT_FAILURE; - } - break; - } else if (strcmp(line, "") == 0) { - // End nicely on empty input - break; - } else if (strcmp(line, "\n") == 0) { - // Empty line -> skip - continue; - } - - // We create a temporary pool for translating a single - // sentence, so our memory usage doesn't increase over time. - ppool = gu_new_pool(); - - clock_t start = clock(); - - GuExn* parse_err = gu_new_exn(ppool); - PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, ppool); - GuEnum* result = pgf_parse_with_heuristics(concr, cat, line, heuristics, callbacks, parse_err, ppool, ppool); - - PgfExprProb* ep = NULL; - if (gu_ok(parse_err)) - ep = gu_next(result, PgfExprProb*, ppool); - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - - gu_printf(out, err, "%d (%.0f ms): ", ctr, 1000.0 * cpu_time_used); - if (ep != NULL) { - gu_printf(out, err, "[%.4f] (", ep->prob); - pgf_print_expr(ep->expr, NULL, 0, out, err); - gu_printf(out, err, ")\n"); - } else { - gu_printf(out, err, "---\n"); - } - gu_out_flush(out, err); - } - - fail: - gu_pool_free(pool); - return status; -} - diff --git a/src/runtime/c/utils/pgf-print.c b/src/runtime/c/utils/pgf-print.c deleted file mode 100644 index 07b343a4d..000000000 --- a/src/runtime/c/utils/pgf-print.c +++ /dev/null @@ -1,36 +0,0 @@ -#include <pgf/pgf.h> -#include <pgf/data.h> - -#include <gu/file.h> -#include <gu/utf8.h> - -#include <locale.h> -#include <stdlib.h> - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - if (argc != 2) { - fprintf(stderr, "usage: %s pgf\n", argv[0]); - return EXIT_FAILURE; - } - char* filename = argv[1]; - - GuPool* pool = gu_new_pool(); - GuExn* err = gu_exn(pool); - PgfPGF* pgf = pgf_read(filename, pool, err); - int status = 0; - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = 1; - goto fail_read; - } - GuOut* out = gu_file_out(stdout, pool); - pgf_print(pgf, out, err); - gu_out_flush(out, err); -fail_read: - gu_pool_free(pool); - return status; -} - diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c deleted file mode 100644 index 00506e4cc..000000000 --- a/src/runtime/c/utils/pgf-translate.c +++ /dev/null @@ -1,203 +0,0 @@ -#include <gu/variant.h> -#include <gu/map.h> -#include <gu/enum.h> -#include <gu/file.h> -#include <gu/exn.h> -#include <pgf/pgf.h> -#include <pgf/literals.h> -#include <pgf/linearizer.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <locale.h> -#include <time.h> - -static void -print_result(PgfExprProb* ep, PgfConcr* to_concr, - GuOut* out, GuExn* err, GuPool* ppool) -{ - // Write out the abstract syntax tree - gu_printf(out, err, " [%f] ", ep->prob); - pgf_print_expr(ep->expr, NULL, 0, out, err); - gu_putc('\n', out, err); - - // Enumerate the concrete syntax trees corresponding - // to the abstract tree. - GuEnum* cts = pgf_lzr_concretize(to_concr, ep->expr, err, ppool); - while (true) { - PgfCncTree ctree = - gu_next(cts, PgfCncTree, ppool); - if (gu_variant_is_null(ctree)) { - break; - } - gu_putc(' ', out, err); - // Linearize the concrete tree as a simple - // sequence of strings. - pgf_lzr_linearize_simple(to_concr, ctree, 0, out, err, ppool); - - if (gu_exn_caught(err, PgfLinNonExist)) { - // encountered nonExist. Unfortunately there - // might be some output printed already. The - // right solution should be to use GuStringBuf. - gu_exn_clear(err); - } - gu_putc('\n', out, err); - gu_out_flush(out, err); - } -} - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - // Create the pool that is used to allocate everything - GuPool* pool = gu_new_pool(); - int status = EXIT_SUCCESS; - if (argc < 5) { - fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]); - status = EXIT_FAILURE; - goto fail; - } - - GuString filename = argv[1]; - GuString cat = argv[2]; - GuString from_lang = argv[3]; - GuString to_lang = argv[4]; - - // Create an exception frame that catches all errors. - GuExn* err = gu_new_exn(pool); - - // Read the PGF grammar. - PgfPGF* pgf = pgf_read(filename, pool, err); - - // If an error occured, it shows in the exception frame - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = EXIT_FAILURE; - goto fail; - } - - // Look up the source and destination concrete categories - PgfConcr* from_concr = pgf_get_language(pgf, from_lang); - PgfConcr* to_concr = pgf_get_language(pgf, to_lang); - if (!from_concr || !to_concr) { - fprintf(stderr, "Unknown language\n"); - status = EXIT_FAILURE; - goto fail_concr; - } - - // Register a callback for the literal category Symbol - PgfCallbacksMap* callbacks = - pgf_new_callbacks_map(from_concr, pool); - pgf_callbacks_map_add_literal(from_concr, callbacks, - "PN", &pgf_nerc_literal_callback); - pgf_callbacks_map_add_literal(from_concr, callbacks, - "Symb", &pgf_unknown_literal_callback); - - // Create an output stream for stdout - GuOut* out = gu_file_out(stdout, pool); - - // We will keep the latest results in the 'ppool' and - // we will iterate over them by using 'result'. - GuPool* ppool = NULL; - GuEnum* result = NULL; - - // The interactive translation loop. - // XXX: This currently reads stdin directly, so it doesn't support - // encodings properly. TODO: use a locale reader for input - while (true) { - fprintf(stdout, "> "); - fflush(stdout); - char buf[4096]; - char* line = fgets(buf, sizeof(buf), stdin); - if (line == NULL) { - if (ferror(stdin)) { - fprintf(stderr, "Input error\n"); - status = EXIT_FAILURE; - } - break; - } else if (strcmp(line, "") == 0) { - // End nicely on empty input - break; - } else if (strcmp(line, "\n") == 0) { - // Empty line -> show the next tree for the last sentence - - if (result != NULL) { - clock_t start = clock(); - - PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool); - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - printf("%.2f sec\n", cpu_time_used); - - // The enumerator will return a null variant at the - // end of the results. - if (ep == NULL) { - goto fail_parse; - } - - print_result(ep, to_concr, out, err, ppool); - } - continue; - } - - // We release the last results - if (ppool != NULL) { - gu_pool_free(ppool); - ppool = NULL; - result = NULL; - } - - // We create a temporary pool for translating a single - // sentence, so our memory usage doesn't increase over time. - ppool = gu_new_pool(); - - clock_t start = clock(); - - GuExn* parse_err = gu_new_exn(ppool); - result = - pgf_parse_with_heuristics(from_concr, cat, line, - -1, callbacks, - parse_err, ppool, ppool); - if (!gu_ok(parse_err)) { - if (gu_exn_caught(parse_err, PgfExn)) { - GuString msg = gu_exn_caught_data(parse_err); - gu_string_write(msg, out, err); - gu_putc('\n', out, err); - } else if (gu_exn_caught(parse_err, PgfParseError)) { - gu_puts("Unexpected token: \"", out, err); - GuString tok = gu_exn_caught_data(parse_err); - gu_string_write(tok, out, err); - gu_puts("\"\n", out, err); - } - - goto fail_parse; - } - - PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool); - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - printf("%.2f sec\n", cpu_time_used); - - // The enumerator will return null at the end of the results. - if (ep == NULL) { - goto fail_parse; - } - - print_result(ep, to_concr, out, err, ppool); - - continue; - fail_parse: - // Free all resources allocated during parsing and linearization - gu_pool_free(ppool); - ppool = NULL; - result = NULL; - } -fail_concr: -fail: - gu_pool_free(pool); - return status; -} - |
