summaryrefslogtreecommitdiff
path: root/src/runtime/c/utils/pgf-parse.c
blob: 654f9d2e7aa3f16eccf3eb49c905a110d6f26438 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#include <gu/variant.h>
#include <gu/map.h>
#include <gu/dump.h>
#include <gu/log.h>
#include <gu/enum.h>
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/parser.h>
#include <pgf/lexer.h>
#include <pgf/literals.h>
#include <pgf/linearizer.h>
#include <pgf/expr.h>
#include <pgf/edsl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <time.h>

int main(int argc, char* argv[]) {
  // Set the character locale, so we can produce proper output.
  setlocale(LC_CTYPE, "");

  // Create the pool that is used to allocate everything
  GuPool* pool = gu_new_pool();
  int status = EXIT_SUCCESS;
  if (argc != 4) {
    fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang\n", argv[0]);
    status = EXIT_FAILURE;
    goto fail;
  }
  char* filename = argv[1];
  GuString cat = gu_str_string(argv[2], pool);
  GuString lang = gu_str_string(argv[3], pool);

  // Create an exception frame that catches all errors.
  GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);


  clock_t start = clock();

  // Read the PGF grammar.
  PgfPGF* pgf = pgf_read(filename, pool, err);

  // If an error occured, it shows in the exception frame
  if (!gu_ok(err)) {
    fprintf(stderr, "Reading PGF failed\n");
    status = EXIT_FAILURE;
    goto fail;
  }

  // Look up the source and destination concrete categories
  PgfConcr* concr = pgf_get_language(pgf, lang);
  if (!concr) {
    fprintf(stderr, "Unknown language\n");
    status = EXIT_FAILURE;
    goto fail;
  }

  /* // Register a callback for the literal category Symbol */
  /* pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool), */
  /*                        &pgf_nerc_literal_callback); */

  clock_t end = clock();
  double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;

  fprintf(stderr, "(%.0f ms) Ready to parse!\n", 1000.0 * cpu_time_used);

  // Create an output stream for stdout
  GuOut* out = gu_file_out(stdout, pool);

  // Locale-encoding writers are currently unsupported
  // GuWriter* wtr = gu_locale_writer(out, pool);
  // Use a writer with hard-coded utf-8 encoding for now.
  GuWriter* wtr = gu_new_utf8_writer(out, pool);

  // We will keep the latest results in the 'ppool' and
  // we will iterate over them by using 'result'.
  GuPool* ppool = NULL;
  GuEnum* result = NULL;

  // The interactive PARSING loop.
  // XXX: This currently reads stdin directly, so it doesn't support
  // encodings properly. TODO: use a locale reader for input
  for (int ctr = 0; true; ctr++) {
    /* fprintf(stdout, "> "); */
    /* fflush(stdout); */
    char buf[4096];
    char* line = fgets(buf, sizeof(buf), stdin);
    if (line == NULL) {
      if (ferror(stdin)) {
        fprintf(stderr, "Input error\n");
        status = EXIT_FAILURE;
      }
      break;
    } else if (strcmp(line, "") == 0) {
      // End nicely on empty input
      break;
    } else if (strcmp(line, "\n") == 0) {
      // Empty line -> skip
      continue;
    }

    // We release the last results
    if (ppool != NULL) {
      gu_pool_free(ppool);
      ppool  = NULL;
      result = NULL;
    }

    // We create a temporary pool for translating a single
    // sentence, so our memory usage doesn't increase over time.
    ppool = gu_new_pool();

    clock_t start = clock();

    // Begin parsing a sentence of the specified category
    PgfParseState* state =
      pgf_parser_init_state(concr, cat, 0, ppool);
    if (state == NULL) {
      fprintf(stderr, "Couldn't begin parsing\n");
      status = EXIT_FAILURE;
      break;
    }

    GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
    PgfLexer *lexer = pgf_new_lexer(rdr, ppool);
    GuEnum* result = pgf_parse(concr, cat, lexer, ppool);

    PgfExprProb* ep = NULL;
    if (result != NULL) 
      ep = gu_next(result, PgfExprProb*, ppool);

    clock_t end = clock();
    double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;

    gu_printf(wtr, err, "%d (%.0f ms): ", ctr, 1000.0 * cpu_time_used);
    if (ep != NULL) {
      gu_printf(wtr, err, "[%.4f] (", ep->prob);
      pgf_print_expr(ep->expr, 0, wtr, err);
      gu_printf(wtr, err, ")\n");
    } else {
      gu_printf(wtr, err, "---\n");
    }
    gu_writer_flush(wtr, err);
  }

 fail:
  gu_pool_free(pool);
  return status;
}