diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2013-09-13 08:28:20 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2013-09-13 08:28:20 +0000 |
| commit | 2e8f05516416095dc5bee68552bfdeb4f88c10e5 (patch) | |
| tree | 8ca3c368579da84c906749d84ab63703ca769d5f /src/runtime/c/gu/utf8.c | |
| parent | 4d8ed02f6d091eb5e45c87a9b355d22298e4551b (diff) | |
optimize the reading of strings in the grammar loader.
Diffstat (limited to 'src/runtime/c/gu/utf8.c')
| -rw-r--r-- | src/runtime/c/gu/utf8.c | 49 |
1 files changed, 45 insertions, 4 deletions
diff --git a/src/runtime/c/gu/utf8.c b/src/runtime/c/gu/utf8.c index 38eb91a9f..2377f1ac2 100644 --- a/src/runtime/c/gu/utf8.c +++ b/src/runtime/c/gu/utf8.c @@ -72,8 +72,8 @@ fail: return 0; } -extern inline void -gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err); +extern inline GuUCS +gu_in_utf8(GuIn* in, GuExn* err); static size_t gu_advance_utf8(GuUCS ucs, uint8_t* buf) @@ -121,5 +121,46 @@ gu_out_utf8_(GuUCS ucs, GuOut* out, GuExn* err) } } -extern inline GuUCS -gu_in_utf8(GuIn* in, GuExn* err); +extern inline void +gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err); + +void +gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err) +{ + uint8_t* p = *buf; + + uint8_t c = gu_in_u8(in, err); + if (!gu_ok(err)) { + return; + } + *(p++) = c; + int len = (c < 0x80 ? 0 : + c < 0xc2 ? -1 : + c < 0xe0 ? 1 : + c < 0xf0 ? 2 : + c < 0xf5 ? 3 : + -1); + if (len < 0) { + goto fail; + } else if (len == 0) { + *buf = p; + return; + } + static const uint8_t mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 }; + // If reading the extra bytes causes EOF, it is an encoding + // error, not a legitimate end of character stream. + GuExn* tmp_err = gu_exn(err, GuEOF, NULL); + gu_in_bytes(in, p, len, tmp_err); + if (tmp_err->caught) { + goto fail; + } + if (!gu_ok(err)) { + return; + } + *buf = p; + return; + +fail: + gu_raise(err, GuUCSExn); + return; +} |
