summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2012-12-13 14:44:33 +0000
committerkr.angelov <kr.angelov@gmail.com>2012-12-13 14:44:33 +0000
commit14e721dda9a38762695ac5435c24818265629b02 (patch)
tree9dc11a3db929482f9b4f6cb8bf59978c9288d818 /src/runtime
parent68249a11d2daf6a7d639110e218418af84fa75d2 (diff)
a top-level API for parsing in the C runtime
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/pgf/data.c1
-rw-r--r--src/runtime/c/pgf/data.h10
-rw-r--r--src/runtime/c/pgf/expr.c2
-rw-r--r--src/runtime/c/pgf/expr.h10
-rw-r--r--src/runtime/c/pgf/lexer.c20
-rw-r--r--src/runtime/c/pgf/lexer.h9
-rw-r--r--src/runtime/c/pgf/pgf.c74
-rw-r--r--src/runtime/c/pgf/pgf.h23
-rw-r--r--src/runtime/c/utils/pgf-chunk.c39
-rw-r--r--src/runtime/c/utils/pgf-translate.c41
10 files changed, 138 insertions, 91 deletions
diff --git a/src/runtime/c/pgf/data.c b/src/runtime/c/pgf/data.c
index dbb0b1899..d5607031b 100644
--- a/src/runtime/c/pgf/data.c
+++ b/src/runtime/c/pgf/data.c
@@ -1,5 +1,4 @@
#include "data.h"
-#include "expr.h"
#include <gu/type.h>
#include <gu/variant.h>
#include <gu/assert.h>
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h
index f5435cee5..267823127 100644
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -27,7 +27,6 @@
#include <gu/type.h>
#include <gu/seq.h>
#include <pgf/pgf.h>
-#include <pgf/expr.h>
typedef struct PgfCCat PgfCCat;
typedef PgfCCat* PgfCCatId;
@@ -123,15 +122,6 @@ struct PgfPGF {
extern GU_DECLARE_TYPE(PgfPGF, struct);
-typedef float prob_t;
-
-typedef struct {
- prob_t prob;
- PgfExpr expr;
-} PgfExprProb;
-
-extern GU_DECLARE_TYPE(PgfExprProb, struct);
-
struct PgfFunDecl {
PgfType* type;
int arity;
diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c
index 8f2fc875e..a90e9b474 100644
--- a/src/runtime/c/pgf/expr.c
+++ b/src/runtime/c/pgf/expr.c
@@ -1,4 +1,4 @@
-#include "expr.h"
+#include "pgf.h"
#include <gu/intern.h>
#include <gu/assert.h>
#include <ctype.h>
diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h
index f4d5881c7..4c1bddbae 100644
--- a/src/runtime/c/pgf/expr.h
+++ b/src/runtime/c/pgf/expr.h
@@ -5,7 +5,6 @@
#include <gu/write.h>
#include <gu/variant.h>
#include <gu/seq.h>
-#include <pgf/pgf.h>
/// Abstract syntax trees
/// @file
@@ -125,6 +124,15 @@ typedef struct {
PgfExpr expr;
} PgfExprImplArg;
+typedef float prob_t;
+
+typedef struct {
+ prob_t prob;
+ PgfExpr expr;
+} PgfExprProb;
+
+extern GU_DECLARE_TYPE(PgfExprProb, struct);
+
int
pgf_expr_arity(PgfExpr expr);
diff --git a/src/runtime/c/pgf/lexer.c b/src/runtime/c/pgf/lexer.c
index 05372eca0..acb4cd0c4 100644
--- a/src/runtime/c/pgf/lexer.c
+++ b/src/runtime/c/pgf/lexer.c
@@ -1,11 +1,13 @@
#include <gu/list.h>
-#include <pgf/lexer.h>
+#include <pgf/pgf.h>
#include <pgf/data.h>
#include <wctype.h>
struct PgfLexer {
GuReader* rdr;
+ GuPool* pool;
GuUCS ucs;
+ PgfToken tok;
};
PgfLexer*
@@ -13,17 +15,17 @@ pgf_new_lexer(GuReader *rdr, GuPool *pool)
{
PgfLexer* lexer = gu_new(PgfLexer, pool);
lexer->rdr = rdr;
+ lexer->pool = pool;
lexer->ucs = ' ';
+ lexer->tok = gu_empty_string;
return lexer;
}
PgfToken
-pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
+pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
- PgfToken tok;
-
GuStringBuf* buf = gu_string_buf(tmp_pool);
GuWriter* wtr = gu_string_buf_writer(buf);
@@ -109,8 +111,14 @@ pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
}
stop:
- tok = gu_string_buf_freeze(buf, pool);
+ lexer->tok = gu_string_buf_freeze(buf, lexer->pool);
gu_pool_free(tmp_pool);
- return tok;
+ return lexer->tok;
+}
+
+PgfToken
+pgf_lexer_current_token(PgfLexer *lexer)
+{
+ return lexer->tok;
}
diff --git a/src/runtime/c/pgf/lexer.h b/src/runtime/c/pgf/lexer.h
index 9bead9c7e..6f01d4d10 100644
--- a/src/runtime/c/pgf/lexer.h
+++ b/src/runtime/c/pgf/lexer.h
@@ -2,7 +2,9 @@
#define PGF_LEXER_H_
#include <gu/read.h>
-#include <pgf/data.h>
+
+/// A single lexical token
+typedef GuString PgfToken;
typedef struct PgfLexer PgfLexer;
@@ -10,6 +12,9 @@ PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool);
PgfToken
-pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool);
+pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
+
+PgfToken
+pgf_lexer_current_token(PgfLexer *lexer);
#endif // PGF_LEXER_H_
diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c
index 6e54193dd..ceeff23bf 100644
--- a/src/runtime/c/pgf/pgf.c
+++ b/src/runtime/c/pgf/pgf.c
@@ -2,8 +2,12 @@
#include <pgf/data.h>
#include <pgf/expr.h>
#include <pgf/reader.h>
+#include <pgf/linearize.h>
+#include <pgf/parser.h>
+#include <pgf/lexer.h>
#include <gu/file.h>
#include <gu/string.h>
+#include <gu/enum.h>
#include <stdio.h>
#include <math.h>
@@ -167,3 +171,73 @@ pgf_print_name(PgfConcr* concr, PgfCId id)
name = id;
return name;
}
+
+void
+pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
+{
+ GuPool* tmp_pool = gu_local_pool();
+
+ GuEnum* cts =
+ pgf_lzr_concretize(concr, expr, tmp_pool);
+ PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
+ if (!gu_variant_is_null(ctree)) {
+ pgf_lzr_linearize_simple(concr, ctree, 0, wtr, err);
+ }
+
+ gu_pool_free(tmp_pool);
+}
+
+GuEnum*
+pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
+{
+ // Begin parsing a sentence of the specified category
+ PgfParseState* state =
+ pgf_parser_init_state(concr, cat, 0, pool);
+ if (state == NULL) {
+ return NULL;
+ }
+
+ // Tokenization
+ GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
+ PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
+ while (!gu_exn_is_raised(lex_err)) {
+ // feed the token to get a new parse state
+ state = pgf_parser_next_state(state, tok, pool);
+ if (state == NULL) {
+ return NULL;
+ }
+
+ tok = pgf_lexer_read_token(lexer, lex_err);
+ }
+
+ // Now begin enumerating the resulting syntax trees
+ return pgf_parse_result(state, pool);
+}
+
+void
+pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
+{
+ // Begin parsing a sentence of the specified category
+ PgfParseState* state =
+ pgf_parser_init_state(concr, cat, 0, pool);
+ if (state == NULL) {
+ printf("\n");
+ return;
+ }
+
+ // Tokenization
+ GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
+ PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
+ while (!gu_exn_is_raised(lex_err)) {
+ // feed the token to get a new parse state
+ state = pgf_parser_next_state(state, tok, pool);
+ if (state == NULL) {
+ printf("\n");
+ return;
+ }
+
+ tok = pgf_lexer_read_token(lexer, lex_err);
+ }
+
+ pgf_parse_print_chunks(state);
+}
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 40b290617..9963534b5 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -28,6 +28,7 @@
#include <gu/exn.h>
#include <gu/mem.h>
#include <gu/map.h>
+#include <gu/enum.h>
#include <gu/string.h>
@@ -37,19 +38,21 @@ extern GU_DECLARE_TYPE(PgfCId, typedef);
extern GU_DECLARE_TYPE(PgfExn, abstract);
-
-/// A single lexical token
-typedef GuString PgfToken;
-
/// @name PGF Grammar objects
/// @{
typedef struct PgfPGF PgfPGF;
+extern GU_DECLARE_TYPE(PgfPGF, struct);
+
typedef struct PgfConcr PgfConcr;
+extern GU_DECLARE_TYPE(PgfConcr, struct);
+
/**< A representation of a PGF grammar.
*/
+#include <pgf/expr.h>
+#include <pgf/lexer.h>
PgfPGF*
pgf_read(const char* fpath,
@@ -103,8 +106,16 @@ pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
GuString
pgf_print_name(PgfConcr*, PgfCId id);
-#include <gu/type.h>
-extern GU_DECLARE_TYPE(PgfPGF, struct);
+void
+pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
+
+GuEnum*
+pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
+
+// an experimental function. Please don't use it
+void
+pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
+
/// @}
diff --git a/src/runtime/c/utils/pgf-chunk.c b/src/runtime/c/utils/pgf-chunk.c
index d5e203368..575534cd3 100644
--- a/src/runtime/c/utils/pgf-chunk.c
+++ b/src/runtime/c/utils/pgf-chunk.c
@@ -70,14 +70,6 @@ int main(int argc, char* argv[]) {
pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
&pgf_nerc_literal_callback);
- // Create an output stream for stdout
- GuOut* out = gu_file_out(stdout, pool);
-
- // Locale-encoding writers are currently unsupported
- // GuWriter* wtr = gu_locale_writer(out, pool);
- // Use a writer with hard-coded utf-8 encoding for now.
- GuWriter* wtr = gu_new_utf8_writer(out, pool);
-
// We will keep the latest results in the 'ppool' and
// we will iterate over them by using 'result'.
GuPool* ppool = NULL;
@@ -103,42 +95,15 @@ int main(int argc, char* argv[]) {
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
- // Begin parsing a sentence of the specified category
- PgfParseState* state =
- pgf_parser_init_state(from_concr, cat, 0, ppool);
- if (state == NULL) {
- fprintf(stderr, "Couldn't begin parsing\n");
- status = EXIT_FAILURE;
- break;
- }
-
GuReader *rdr =
gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer =
pgf_new_lexer(rdr, ppool);
- // Tokenization
- GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
- PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
- while (!gu_exn_is_raised(lex_err)) {
- // feed the token to get a new parse state
- state = pgf_parser_next_state(state, tok, ppool);
- if (!state) {
- gu_puts("Unexpected token: \"", wtr, err);
- gu_string_write(tok, wtr, err);
- gu_puts("\"\n", wtr, err);
- goto fail_parse;
- }
-
- tok = pgf_lexer_next_token(lexer, lex_err, ppool);
- }
-
- pgf_parse_print_chunks(state);
- continue;
- fail_parse:
+ pgf_print_chunks(from_concr, cat, lexer, ppool);
+
// Free all resources allocated during parsing and linearization
gu_pool_free(ppool);
- ppool = NULL;
}
fail_concr:
fail:
diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c
index 2cf1dcfe7..03b3635f0 100644
--- a/src/runtime/c/utils/pgf-translate.c
+++ b/src/runtime/c/utils/pgf-translate.c
@@ -9,7 +9,6 @@
#include <pgf/lexer.h>
#include <pgf/literals.h>
#include <pgf/linearize.h>
-#include <pgf/expr.h>
#include <pgf/edsl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -160,40 +159,29 @@ int main(int argc, char* argv[]) {
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
- clock_t start = clock();
-
- // Begin parsing a sentence of the specified category
- PgfParseState* state =
- pgf_parser_init_state(from_concr, cat, 0, ppool);
- if (state == NULL) {
- fprintf(stderr, "Couldn't begin parsing\n");
- status = EXIT_FAILURE;
- break;
- }
-
GuReader *rdr =
gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer =
pgf_new_lexer(rdr, ppool);
- // Tokenization
- GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
- PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
- while (!gu_exn_is_raised(lex_err)) {
- // feed the token to get a new parse state
- state = pgf_parser_next_state(state, tok, ppool);
- if (!state) {
+ clock_t start = clock();
+
+ GuEnum* result =
+ pgf_parse(from_concr, cat, lexer, ppool);
+ if (result == NULL) {
+ PgfToken tok =
+ pgf_lexer_current_token(lexer);
+
+ if (gu_string_eq(tok, gu_empty_string))
+ gu_puts("Couldn't begin parsing", wtr, err);
+ else {
gu_puts("Unexpected token: \"", wtr, err);
gu_string_write(tok, wtr, err);
gu_puts("\"\n", wtr, err);
- goto fail_parse;
}
-
- tok = pgf_lexer_next_token(lexer, lex_err, ppool);
- }
- // Now begin enumerating the resulting syntax trees
- result = pgf_parse_result(state, ppool);
+ goto fail_parse;
+ }
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
@@ -201,8 +189,7 @@ int main(int argc, char* argv[]) {
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("%.2f sec\n", cpu_time_used);
- // The enumerator will return a null variant at the
- // end of the results.
+ // The enumerator will return null at the end of the results.
if (ep == NULL) {
goto fail_parse;
}