summaryrefslogtreecommitdiff
path: root/src/runtime/c/pgf/lexer.c
blob: 15caab1514bf1a6601930c949de1c4bf5c9e6e0a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include <gu/list.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <wctype.h>

struct PgfLexer {
	GuReader* rdr;
	GuPool* pool;
	GuUCS ucs;
	PgfToken tok;
};

PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool)
{
	PgfLexer* lexer = gu_new(PgfLexer, pool);
	lexer->rdr = rdr;
	lexer->pool = pool;
	lexer->ucs = ' ';
	lexer->tok = gu_empty_string;
	return lexer;
}

static void
pgf_lexer_read_ucs(PgfLexer *lexer, GuExn* err)
{
	lexer->ucs = gu_read_ucs(lexer->rdr, err);
	if (gu_exn_is_raised(err)) {
		gu_exn_clear(err);
		lexer->ucs = ' ';
	}
}

PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
{
	GuPool* tmp_pool = gu_new_pool();

	GuStringBuf* buf = gu_string_buf(tmp_pool);
	GuWriter* wtr = gu_string_buf_writer(buf);

	while (iswspace(lexer->ucs)) {
		lexer->ucs = gu_read_ucs(lexer->rdr, err);
		if (gu_exn_is_raised(err))
			goto stop;
	}

	if (iswalpha(lexer->ucs) ||
	    lexer->ucs == '\''   ||
	    lexer->ucs == '_') {
		int counter = 0;
		do {
			gu_ucs_write(lexer->ucs, wtr, err);
			if (gu_exn_is_raised(err))
				goto stop;
			counter++;
			pgf_lexer_read_ucs(lexer, err);

			if (lexer->ucs == '.' && counter < 4) {
				// perhaps an abreviation
				gu_ucs_write(lexer->ucs, wtr, err);
				if (gu_exn_is_raised(err))
					goto stop;
				counter = 0;
				pgf_lexer_read_ucs(lexer, err);
			}
		} while (iswalnum(lexer->ucs) ||
		         lexer->ucs == '\''   ||
		         lexer->ucs == '_');
	} else if (iswdigit(lexer->ucs) || lexer->ucs == '-') {
		if (lexer->ucs == '-') {
			gu_ucs_write(lexer->ucs, wtr, err);
			if (gu_exn_is_raised(err))
				goto stop;
				
			pgf_lexer_read_ucs(lexer, err);
			if (!iswdigit(lexer->ucs))
				goto stop;
		}

		do {
			gu_ucs_write(lexer->ucs, wtr, err);
			if (gu_exn_is_raised(err))
				goto stop;

			pgf_lexer_read_ucs(lexer, err);
		} while (iswdigit(lexer->ucs));
		
		if (lexer->ucs == '.') {
			gu_ucs_write(lexer->ucs, wtr, err);
			if (gu_exn_is_raised(err))
				goto stop;

			pgf_lexer_read_ucs(lexer, err);
			while (iswdigit(lexer->ucs)) {
				gu_ucs_write(lexer->ucs, wtr, err);
				if (gu_exn_is_raised(err))
					goto stop;
				pgf_lexer_read_ucs(lexer, err);
			}
		}
	} else {
		gu_ucs_write(lexer->ucs, wtr, err);
		if (gu_exn_is_raised(err))
			goto stop;
		pgf_lexer_read_ucs(lexer, err);
	}

stop:
	lexer->tok = gu_string_buf_freeze(buf, lexer->pool);

	gu_pool_free(tmp_pool);
	return lexer->tok;
}

PgfToken
pgf_lexer_current_token(PgfLexer *lexer)
{
	return lexer->tok;
}