summaryrefslogtreecommitdiff
path: root/src/runtime/c
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/c')
-rw-r--r--src/runtime/c/gu/in.c1
-rw-r--r--src/runtime/c/gu/string.c38
-rw-r--r--src/runtime/c/gu/string.h6
-rw-r--r--src/runtime/c/gu/utf8.c49
-rw-r--r--src/runtime/c/gu/utf8.h5
-rw-r--r--src/runtime/c/pgf/reader.c27
6 files changed, 96 insertions, 30 deletions
diff --git a/src/runtime/c/gu/in.c b/src/runtime/c/gu/in.c
index 835badfa7..ddac81102 100644
--- a/src/runtime/c/gu/in.c
+++ b/src/runtime/c/gu/in.c
@@ -253,7 +253,6 @@ gu_in_f64be(GuIn* in, GuExn* err)
return gu_decode_double(gu_in_u64le(in, err));
}
-
static void
gu_in_fini(GuFinalizer* fin)
{
diff --git a/src/runtime/c/gu/string.c b/src/runtime/c/gu/string.c
index e2708aae1..c8b443daf 100644
--- a/src/runtime/c/gu/string.c
+++ b/src/runtime/c/gu/string.c
@@ -5,6 +5,7 @@
#include <gu/string.h>
#include <gu/utf8.h>
#include <gu/assert.h>
+#include <stdlib.h>
const GuString gu_empty_string = { 1 };
@@ -166,6 +167,43 @@ gu_string_write(GuString s, GuOut* out, GuExn* err)
}
GuString
+gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err)
+{
+ uint8_t* buf = alloca(len*4);
+ uint8_t* p = buf;
+ for (size_t i = 0; i < len; i++) {
+ gu_in_utf8_buf(&p, in, err);
+ }
+ return gu_utf8_string(buf, p-buf, pool);
+}
+
+GuString
+gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err)
+{
+ if (len < GU_MIN(sizeof(GuWord), 128)) {
+ GuWord w = 0;
+ for (size_t n = 0; n < len; n++) {
+ w = w << 8 | gu_in_u8(in, err);
+ }
+ w = w << 8 | (len << 1) | 1;
+ return (GuString) { w };
+ }
+ uint8_t* p = NULL;
+ if (len < 256) {
+ p = gu_malloc_aligned(pool, 1 + len, 2);
+ p[0] = (uint8_t) len;
+ } else {
+ p = gu_malloc_prefixed(pool, gu_alignof(size_t),
+ sizeof(size_t), 1, 1 + len);
+ ((size_t*) p)[-1] = len;
+ p[0] = 0;
+ }
+
+ gu_in_bytes(in, &p[1], len, err);
+ return (GuString) { (GuWord) (void*) p };
+}
+
+GuString
gu_format_string_v(const char* fmt, va_list args, GuPool* pool)
{
GuPool* tmp_pool = gu_local_pool();
diff --git a/src/runtime/c/gu/string.h b/src/runtime/c/gu/string.h
index 310c725b5..a24fe3068 100644
--- a/src/runtime/c/gu/string.h
+++ b/src/runtime/c/gu/string.h
@@ -34,6 +34,12 @@ gu_string_copy(GuString string, GuPool* pool);
void
gu_string_write(GuString string, GuOut* out, GuExn* err);
+GuString
+gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err);
+
+GuString
+gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err);
+
GuIn*
gu_string_in(GuString string, GuPool* pool);
diff --git a/src/runtime/c/gu/utf8.c b/src/runtime/c/gu/utf8.c
index 38eb91a9f..2377f1ac2 100644
--- a/src/runtime/c/gu/utf8.c
+++ b/src/runtime/c/gu/utf8.c
@@ -72,8 +72,8 @@ fail:
return 0;
}
-extern inline void
-gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err);
+extern inline GuUCS
+gu_in_utf8(GuIn* in, GuExn* err);
static size_t
gu_advance_utf8(GuUCS ucs, uint8_t* buf)
@@ -121,5 +121,46 @@ gu_out_utf8_(GuUCS ucs, GuOut* out, GuExn* err)
}
}
-extern inline GuUCS
-gu_in_utf8(GuIn* in, GuExn* err);
+extern inline void
+gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err);
+
+void
+gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err)
+{
+ uint8_t* p = *buf;
+
+ uint8_t c = gu_in_u8(in, err);
+ if (!gu_ok(err)) {
+ return;
+ }
+ *(p++) = c;
+ int len = (c < 0x80 ? 0 :
+ c < 0xc2 ? -1 :
+ c < 0xe0 ? 1 :
+ c < 0xf0 ? 2 :
+ c < 0xf5 ? 3 :
+ -1);
+ if (len < 0) {
+ goto fail;
+ } else if (len == 0) {
+ *buf = p;
+ return;
+ }
+ static const uint8_t mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
+ // If reading the extra bytes causes EOF, it is an encoding
+ // error, not a legitimate end of character stream.
+ GuExn* tmp_err = gu_exn(err, GuEOF, NULL);
+ gu_in_bytes(in, p, len, tmp_err);
+ if (tmp_err->caught) {
+ goto fail;
+ }
+ if (!gu_ok(err)) {
+ return;
+ }
+ *buf = p;
+ return;
+
+fail:
+ gu_raise(err, GuUCSExn);
+ return;
+}
diff --git a/src/runtime/c/gu/utf8.h b/src/runtime/c/gu/utf8.h
index 7cf42d56a..7674c6e02 100644
--- a/src/runtime/c/gu/utf8.h
+++ b/src/runtime/c/gu/utf8.h
@@ -29,7 +29,12 @@ gu_out_utf8(GuUCS ucs, GuOut* out, GuExn* err)
}
}
+// Helper functions used in other modules
+
GuUCS
gu_utf8_decode(const uint8_t** utf8);
+void
+gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err);
+
#endif // GU_UTF8_H_
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c
index 1b776beec..b6d3c8854 100644
--- a/src/runtime/c/pgf/reader.c
+++ b/src/runtime/c/pgf/reader.c
@@ -91,38 +91,15 @@ pgf_read_len(PgfReader* rdr)
static PgfCId
pgf_read_cid(PgfReader* rdr)
{
- GuPool* tmp_pool = gu_new_pool();
- GuStringBuf* sbuf = gu_string_buf(tmp_pool);
- GuOut* out = gu_string_buf_out(sbuf);
-
size_t len = pgf_read_len(rdr);
- for (size_t i = 0; i < len; i++) {
- // CIds are in latin-1
- GuUCS ucs = gu_in_u8(rdr->in, rdr->err);
- gu_out_utf8(ucs, out, rdr->err);
- }
- GuString str = gu_string_buf_freeze(sbuf, rdr->opool);
- gu_pool_free(tmp_pool);
- return str;
+ return gu_string_read_latin1(len, rdr->opool, rdr->in, rdr->err);
}
static GuString
pgf_read_string(PgfReader* rdr)
{
- GuPool* tmp_pool = gu_new_pool();
- GuStringBuf* sbuf = gu_string_buf(tmp_pool);
- GuOut* out = gu_string_buf_out(sbuf);
-
GuLength len = pgf_read_len(rdr);
-
- for (size_t i = 0; i < len; i++) {
- GuUCS ucs = gu_in_utf8(rdr->in, rdr->err);
- gu_out_utf8(ucs, out, rdr->err);
- }
- GuString str = gu_string_buf_freeze(sbuf, rdr->opool);
- gu_pool_free(tmp_pool);
-
- return str;
+ return gu_string_read(len, rdr->opool, rdr->in, rdr->err);
}
static void