diff options
Diffstat (limited to 'src/runtime/c/gu')
| -rw-r--r-- | src/runtime/c/gu/in.c | 1 | ||||
| -rw-r--r-- | src/runtime/c/gu/string.c | 38 | ||||
| -rw-r--r-- | src/runtime/c/gu/string.h | 6 | ||||
| -rw-r--r-- | src/runtime/c/gu/utf8.c | 49 | ||||
| -rw-r--r-- | src/runtime/c/gu/utf8.h | 5 |
5 files changed, 94 insertions, 5 deletions
diff --git a/src/runtime/c/gu/in.c b/src/runtime/c/gu/in.c index 835badfa7..ddac81102 100644 --- a/src/runtime/c/gu/in.c +++ b/src/runtime/c/gu/in.c @@ -253,7 +253,6 @@ gu_in_f64be(GuIn* in, GuExn* err) return gu_decode_double(gu_in_u64le(in, err)); } - static void gu_in_fini(GuFinalizer* fin) { diff --git a/src/runtime/c/gu/string.c b/src/runtime/c/gu/string.c index e2708aae1..c8b443daf 100644 --- a/src/runtime/c/gu/string.c +++ b/src/runtime/c/gu/string.c @@ -5,6 +5,7 @@ #include <gu/string.h> #include <gu/utf8.h> #include <gu/assert.h> +#include <stdlib.h> const GuString gu_empty_string = { 1 }; @@ -166,6 +167,43 @@ gu_string_write(GuString s, GuOut* out, GuExn* err) } GuString +gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err) +{ + uint8_t* buf = alloca(len*4); + uint8_t* p = buf; + for (size_t i = 0; i < len; i++) { + gu_in_utf8_buf(&p, in, err); + } + return gu_utf8_string(buf, p-buf, pool); +} + +GuString +gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err) +{ + if (len < GU_MIN(sizeof(GuWord), 128)) { + GuWord w = 0; + for (size_t n = 0; n < len; n++) { + w = w << 8 | gu_in_u8(in, err); + } + w = w << 8 | (len << 1) | 1; + return (GuString) { w }; + } + uint8_t* p = NULL; + if (len < 256) { + p = gu_malloc_aligned(pool, 1 + len, 2); + p[0] = (uint8_t) len; + } else { + p = gu_malloc_prefixed(pool, gu_alignof(size_t), + sizeof(size_t), 1, 1 + len); + ((size_t*) p)[-1] = len; + p[0] = 0; + } + + gu_in_bytes(in, &p[1], len, err); + return (GuString) { (GuWord) (void*) p }; +} + +GuString gu_format_string_v(const char* fmt, va_list args, GuPool* pool) { GuPool* tmp_pool = gu_local_pool(); diff --git a/src/runtime/c/gu/string.h b/src/runtime/c/gu/string.h index 310c725b5..a24fe3068 100644 --- a/src/runtime/c/gu/string.h +++ b/src/runtime/c/gu/string.h @@ -34,6 +34,12 @@ gu_string_copy(GuString string, GuPool* pool); void gu_string_write(GuString string, GuOut* out, GuExn* err); +GuString +gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err); + +GuString +gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err); + GuIn* gu_string_in(GuString string, GuPool* pool); diff --git a/src/runtime/c/gu/utf8.c b/src/runtime/c/gu/utf8.c index 38eb91a9f..2377f1ac2 100644 --- a/src/runtime/c/gu/utf8.c +++ b/src/runtime/c/gu/utf8.c @@ -72,8 +72,8 @@ fail: return 0; } -extern inline void -gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err); +extern inline GuUCS +gu_in_utf8(GuIn* in, GuExn* err); static size_t gu_advance_utf8(GuUCS ucs, uint8_t* buf) @@ -121,5 +121,46 @@ gu_out_utf8_(GuUCS ucs, GuOut* out, GuExn* err) } } -extern inline GuUCS -gu_in_utf8(GuIn* in, GuExn* err); +extern inline void +gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err); + +void +gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err) +{ + uint8_t* p = *buf; + + uint8_t c = gu_in_u8(in, err); + if (!gu_ok(err)) { + return; + } + *(p++) = c; + int len = (c < 0x80 ? 0 : + c < 0xc2 ? -1 : + c < 0xe0 ? 1 : + c < 0xf0 ? 2 : + c < 0xf5 ? 3 : + -1); + if (len < 0) { + goto fail; + } else if (len == 0) { + *buf = p; + return; + } + static const uint8_t mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 }; + // If reading the extra bytes causes EOF, it is an encoding + // error, not a legitimate end of character stream. + GuExn* tmp_err = gu_exn(err, GuEOF, NULL); + gu_in_bytes(in, p, len, tmp_err); + if (tmp_err->caught) { + goto fail; + } + if (!gu_ok(err)) { + return; + } + *buf = p; + return; + +fail: + gu_raise(err, GuUCSExn); + return; +} diff --git a/src/runtime/c/gu/utf8.h b/src/runtime/c/gu/utf8.h index 7cf42d56a..7674c6e02 100644 --- a/src/runtime/c/gu/utf8.h +++ b/src/runtime/c/gu/utf8.h @@ -29,7 +29,12 @@ gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err) } } +// Helper functions used in other modules + GuUCS gu_utf8_decode(const uint8_t** utf8); +void +gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err); + #endif // GU_UTF8_H_ |
