summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2012-03-09 09:14:44 +0000
committerkr.angelov <kr.angelov@gmail.com>2012-03-09 09:14:44 +0000
commit1726995921f6c05686b2b06f672b1376e589f8ac (patch)
treebdec5f4aad7dc5c07eebd7869e5c4453047a50c7 /src/runtime
parentd536d02d9bd3a31ca2bb105ab69ad201015e8c97 (diff)
libpgf: added simple lexer
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/Makefile.am3
-rw-r--r--src/runtime/c/pgf/lexer.c103
-rw-r--r--src/runtime/c/pgf/lexer.h15
-rw-r--r--src/runtime/c/pgf/parser.c4
-rw-r--r--src/runtime/c/utils/pgf-translate.c23
5 files changed, 138 insertions, 10 deletions
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am
index 710dd7330..35d79afea 100644
--- a/src/runtime/c/Makefile.am
+++ b/src/runtime/c/Makefile.am
@@ -47,6 +47,7 @@ pgfinclude_HEADERS = \
pgf/expr.h \
pgf/linearize.h \
pgf/parser.h \
+ pgf/lexer.h \
pgf/pgf.h
libgu_la_SOURCES = \
@@ -87,6 +88,8 @@ libpgf_la_SOURCES = \
pgf/expr.h \
pgf/parser.c \
pgf/parser.h \
+ pgf/lexer.c \
+ pgf/lexer.h \
pgf/reader.c \
pgf/linearize.c \
pgf/printer.c
diff --git a/src/runtime/c/pgf/lexer.c b/src/runtime/c/pgf/lexer.c
new file mode 100644
index 000000000..fd196bc1b
--- /dev/null
+++ b/src/runtime/c/pgf/lexer.c
@@ -0,0 +1,103 @@
+#include <gu/list.h>
+#include <pgf/lexer.h>
+#include <pgf/data.h>
+#include <wctype.h>
+
+struct PgfLexer {
+ GuReader* rdr;
+ GuUCS ucs;
+};
+
+PgfLexer*
+pgf_new_lexer(GuReader *rdr, GuPool *pool)
+{
+ PgfLexer* lexer = gu_new(PgfLexer, pool);
+ lexer->rdr = rdr;
+ lexer->ucs = ' ';
+ return lexer;
+}
+
+PgfToken
+pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
+{
+ GuPool* tmp_pool = gu_new_pool();
+
+ PgfToken tok;
+
+ GuStringBuf* buf = gu_string_buf(tmp_pool);
+ GuWriter* wtr = gu_string_buf_writer(buf);
+
+ while (iswspace(lexer->ucs)) {
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ }
+
+ if (iswalpha(lexer->ucs) ||
+ lexer->ucs == '\'' ||
+ lexer->ucs == '_') {
+ do {
+ gu_ucs_write(lexer->ucs, wtr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ } while (iswalnum(lexer->ucs) ||
+ lexer->ucs == '\'' ||
+ lexer->ucs == '_');
+ } else if (iswdigit(lexer->ucs) || lexer->ucs == '-') {
+ if (lexer->ucs == '-') {
+ gu_ucs_write(lexer->ucs, wtr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+
+ if (!iswdigit(lexer->ucs))
+ goto stop;
+ }
+
+ do {
+ gu_ucs_write(lexer->ucs, wtr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ } while (iswdigit(lexer->ucs));
+
+ if (lexer->ucs == '.') {
+ gu_ucs_write(lexer->ucs, wtr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+
+ while (iswdigit(lexer->ucs)) {
+ gu_ucs_write(lexer->ucs, wtr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ }
+ }
+ } else {
+ gu_ucs_write(lexer->ucs, wtr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ lexer->ucs = gu_read_ucs(lexer->rdr, err);
+ if (gu_exn_is_raised(err))
+ goto stop;
+ }
+
+stop:
+ tok = gu_string_buf_freeze(buf, pool);
+
+ gu_pool_free(tmp_pool);
+ return tok;
+}
diff --git a/src/runtime/c/pgf/lexer.h b/src/runtime/c/pgf/lexer.h
new file mode 100644
index 000000000..9bead9c7e
--- /dev/null
+++ b/src/runtime/c/pgf/lexer.h
@@ -0,0 +1,15 @@
+#ifndef PGF_LEXER_H_
+#define PGF_LEXER_H_
+
+#include <gu/read.h>
+#include <pgf/data.h>
+
+typedef struct PgfLexer PgfLexer;
+
+PgfLexer*
+pgf_new_lexer(GuReader *rdr, GuPool *pool);
+
+PgfToken
+pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool);
+
+#endif // PGF_LEXER_H_
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 8190a9df1..5cd6e2bda 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -953,8 +953,8 @@ typedef struct {
GuPool *pool;
} PgfParseTokenCallback;
-static
-void pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item)
+static void
+pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item)
{
PgfParseTokenCallback *clo = (PgfParseTokenCallback *) self;
diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c
index 751bcf15b..b2a8bae59 100644
--- a/src/runtime/c/utils/pgf-translate.c
+++ b/src/runtime/c/utils/pgf-translate.c
@@ -7,6 +7,7 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/parser.h>
+#include <pgf/lexer.h>
#include <pgf/linearize.h>
#include <pgf/expr.h>
#include <pgf/edsl.h>
@@ -121,20 +122,26 @@ int main(int argc, char* argv[]) {
status = EXIT_FAILURE;
break;
}
+
+ GuReader *rdr =
+ gu_string_reader(gu_str_string(line, pool), pool);
+ PgfLexer *lexer =
+ pgf_new_lexer(rdr, pool);
// naive tokenization
- char* tok = strtok(line, " \n");
- while (tok) {
- GuString tok_s = gu_str_string(tok, pool);
- gu_debug("parsing token \"%s\"", tok);
+ GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
+ PgfToken tok = pgf_lexer_next_token(lexer, lex_err, pool);
+ while (!gu_exn_is_raised(lex_err)) {
// feed the token to get a new parse state
- parse = pgf_parse_token(parse, tok_s, robust_mode, ppool);
+ parse = pgf_parse_token(parse, tok, robust_mode, ppool);
if (!parse) {
- fprintf(stderr,
- "Unexpected token: \"%s\"\n", tok);
+ gu_puts("Unexpected token: \"", wtr, err);
+ gu_string_write(tok, wtr, err);
+ gu_puts("\"\n", wtr, err);
goto fail_parse;
}
- tok = strtok(NULL, " \n");
+
+ tok = pgf_lexer_next_token(lexer, lex_err, pool);
}
if (robust_mode) {