summaryrefslogtreecommitdiff
path: root/src/runtime/c/pgf/lexer.c
blob: 48ab6eb702b855594047bfd1806aaaaaaf774e9b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <gu/utf8.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <wctype.h>

typedef struct {
	PgfLexer base;
	GuIn* in;
	GuPool* pool;
	GuUCS ucs;
} PgfSimpleLexer;

static void
pgf_lexer_read_ucs(PgfSimpleLexer *lexer, GuExn* err)
{
	lexer->ucs = gu_in_utf8(lexer->in, err);
	if (gu_exn_is_raised(err)) {
		gu_exn_clear(err);
		lexer->ucs = ' ';
	}
}

static PgfToken
pgf_simple_lexer_read_token(PgfLexer *base, GuExn* err)
{
	PgfSimpleLexer* lexer = (PgfSimpleLexer*) base;
	GuPool* tmp_pool = gu_new_pool();

	GuStringBuf* buf = gu_string_buf(tmp_pool);
	GuOut* out = gu_string_buf_out(buf);

	while (iswspace(lexer->ucs)) {
		lexer->ucs = gu_in_utf8(lexer->in, err);
		if (gu_exn_is_raised(err))
			goto stop;
	}

	if (iswalpha(lexer->ucs) ||
	    lexer->ucs == '\''   ||
	    lexer->ucs == '_') {
		int counter = 0;
		do {
			gu_out_utf8(lexer->ucs, out, err);
			if (gu_exn_is_raised(err))
				goto stop;
			counter++;
			pgf_lexer_read_ucs(lexer, err);

			if (lexer->ucs == '.' && counter < 4) {
				// perhaps an abreviation
				gu_out_utf8(lexer->ucs, out, err);
				if (gu_exn_is_raised(err))
					goto stop;
				counter = 0;
				pgf_lexer_read_ucs(lexer, err);
			}
		} while (iswalnum(lexer->ucs) ||
		         lexer->ucs == '\''   ||
		         lexer->ucs == '_');
	} else if (iswdigit(lexer->ucs) || lexer->ucs == '-') {
		if (lexer->ucs == '-') {
			gu_out_utf8(lexer->ucs, out, err);
			if (gu_exn_is_raised(err))
				goto stop;
				
			pgf_lexer_read_ucs(lexer, err);
			if (!iswdigit(lexer->ucs))
				goto stop;
		}

		do {
			gu_out_utf8(lexer->ucs, out, err);
			if (gu_exn_is_raised(err))
				goto stop;

			pgf_lexer_read_ucs(lexer, err);
		} while (iswdigit(lexer->ucs));
		
		if (lexer->ucs == '.') {
			gu_out_utf8(lexer->ucs, out, err);
			if (gu_exn_is_raised(err))
				goto stop;

			pgf_lexer_read_ucs(lexer, err);
			while (iswdigit(lexer->ucs)) {
				gu_out_utf8(lexer->ucs, out, err);
				if (gu_exn_is_raised(err))
					goto stop;
				pgf_lexer_read_ucs(lexer, err);
			}
		}
	} else {
		gu_out_utf8(lexer->ucs, out, err);
		if (gu_exn_is_raised(err))
			goto stop;
		pgf_lexer_read_ucs(lexer, err);
	}

stop:
	lexer->base.tok = gu_string_buf_freeze(buf, lexer->pool);

	gu_pool_free(tmp_pool);
	return lexer->base.tok;
}

PgfLexer*
pgf_new_simple_lexer(GuIn *in, GuPool *pool)
{
	PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
	lexer->base.read_token = pgf_simple_lexer_read_token;
	lexer->base.tok = gu_empty_string;
	lexer->in = in;
	lexer->pool = pool;
	lexer->ucs = ' ';	
	return ((PgfLexer*) lexer);
}

PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
{
	return lexer->read_token(lexer, err);
}

PgfToken
pgf_lexer_current_token(PgfLexer *lexer)
{
	return lexer->tok;
}