From e8335806afc45e31157937b880ff39b75c14a2cd Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Fri, 4 Oct 2013 12:04:39 +0000 Subject: GuString is now an ordinary C string - it makes live easier. In addition PgfSymbolKS, PgfExprFun and PgfLiteralStr now keep their strings as embedded flexible arrays. The latest change gives us the same compactness as the old representation but it is a lot easier to use. --- src/runtime/c/gu/string.c | 399 +++++++--------------------------------------- 1 file changed, 56 insertions(+), 343 deletions(-) (limited to 'src/runtime/c/gu/string.c') diff --git a/src/runtime/c/gu/string.c b/src/runtime/c/gu/string.c index ba7eaf0c6..995812fbf 100644 --- a/src/runtime/c/gu/string.c +++ b/src/runtime/c/gu/string.c @@ -7,21 +7,17 @@ #include #include -const GuString gu_empty_string = { 1 }; - struct GuStringBuf { - GuByteBuf* bbuf; + GuBuf* buf; GuOut* out; }; GuStringBuf* gu_string_buf(GuPool* pool) { - GuBuf* buf = gu_new_buf(uint8_t, pool); - GuOut* out = gu_buf_out(buf, pool); GuStringBuf* sbuf = gu_new(GuStringBuf, pool); - sbuf->bbuf = buf; - sbuf->out = out; + sbuf->buf = gu_new_buf(char, pool); + sbuf->out = gu_buf_out(sbuf->buf, pool); return sbuf; } @@ -31,176 +27,64 @@ gu_string_buf_out(GuStringBuf* sb) return sb->out; } -static GuString -gu_utf8_string(const uint8_t* buf, size_t sz, GuPool* pool) -{ - if (sz < GU_MIN(sizeof(GuWord), 128)) { - GuWord w = 0; - for (size_t n = 0; n < sz; n++) { - w = w << 8 | buf[n]; - } - w = w << 8 | (sz << 1) | 1; - return (GuString) { w }; - } - uint8_t* p = NULL; - if (sz < 256) { - p = gu_malloc_aligned(pool, 1 + sz, 2); - p[0] = (uint8_t) sz; - } else { - p = gu_malloc_prefixed(pool, gu_alignof(size_t), - sizeof(size_t), 1, 1 + sz); - ((size_t*) p)[-1] = sz; - p[0] = 0; - } - memcpy(&p[1], buf, sz); - return (GuString) { (GuWord) (void*) p }; -} - - - GuString gu_string_buf_freeze(GuStringBuf* sb, GuPool* pool) { gu_out_flush(sb->out, NULL); - uint8_t* data = gu_buf_data(sb->bbuf); - size_t len = gu_buf_length(sb->bbuf); - return gu_utf8_string(data, len, pool); -} - -GuIn* -gu_string_in(GuString s, GuPool* pool) -{ - GuWord w = s.w_; - uint8_t* buf = NULL; - size_t len = 0; - if (w & 1) { - len = (w & 0xff) >> 1; - buf = gu_new_n(uint8_t, len, pool); - for (int i = len - 1; i >= 0; i--) { - w >>= 8; - buf[i] = w & 0xff; - } - } else { - uint8_t* p = (void*) w; - len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - buf = &p[1]; - } - return gu_data_in(buf, len, pool); -} - -static bool -gu_string_is_long(GuString s) -{ - return !(s.w_ & 1); -} - -bool -gu_string_is_stable(GuString s) -{ - return !gu_string_is_long(s); -} + char* data = gu_buf_data(sb->buf); + size_t len = gu_buf_length(sb->buf); -static size_t -gu_string_long_length(GuString s) -{ - gu_assert(gu_string_is_long(s)); - uint8_t* p = (void*) s.w_; - uint8_t len = p[0]; - if (len > 0) { - return len; - } - return ((size_t*) p)[-1]; -} + char* p = gu_malloc_aligned(pool, len+1, 2); + memcpy(p, data, len); + p[len] = 0; -size_t -gu_string_length(GuString s) -{ - if (gu_string_is_long(s)) { - return gu_string_long_length(s); - } - return (s.w_ & 0xff) >> 1; + return p; } -static uint8_t* -gu_string_long_data(GuString s) +GuIn* +gu_string_in(GuString s, GuPool* pool) { - gu_require(gu_string_is_long(s)); - uint8_t* p = (void*) s.w_; - return &p[1]; + return gu_data_in((uint8_t*) s, strlen(s), pool); } GuString gu_string_copy(GuString string, GuPool* pool) { - if (gu_string_is_long(string)) { - uint8_t* data = gu_string_long_data(string); - size_t len = gu_string_long_length(string); - return gu_utf8_string(data, len, pool); - } else { - return string; - } + size_t len = strlen(string); + char* p = gu_malloc_aligned(pool, len+1, 2); + memcpy(p, string, len+1); + return p; } - void gu_string_write(GuString s, GuOut* out, GuExn* err) { - GuWord w = s.w_; - uint8_t buf[sizeof(GuWord)]; - uint8_t* src; - size_t sz; - if (w & 1) { - sz = (w & 0xff) >> 1; - gu_assert(sz <= sizeof(GuWord)); - size_t i = sz; - while (i > 0) { - w >>= 8; - buf[--i] = w & 0xff; - } - src = buf; - } else { - uint8_t* p = (void*) w; - sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - src = &p[1]; - } - gu_out_bytes(out, src, sz, err); + gu_out_bytes(out, (uint8_t*) s, strlen(s), err); } GuString gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err) { - uint8_t* buf = alloca(len*4); - uint8_t* p = buf; + char* buf = alloca(len*6+1); + char* p = buf; for (size_t i = 0; i < len; i++) { - gu_in_utf8_buf(&p, in, err); + gu_in_utf8_buf((uint8_t**) &p, in, err); } - return gu_utf8_string(buf, p-buf, pool); + *p++ = 0; + + p = gu_malloc_aligned(pool, p-buf, 2); + strcpy(p, buf); + + return p; } GuString gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err) { - if (len < GU_MIN(sizeof(GuWord), 128)) { - GuWord w = 0; - for (size_t n = 0; n < len; n++) { - w = w << 8 | gu_in_u8(in, err); - } - w = w << 8 | (len << 1) | 1; - return (GuString) { w }; - } - uint8_t* p = NULL; - if (len < 256) { - p = gu_malloc_aligned(pool, 1 + len, 2); - p[0] = (uint8_t) len; - } else { - p = gu_malloc_prefixed(pool, gu_alignof(size_t), - sizeof(size_t), 1, 1 + len); - ((size_t*) p)[-1] = len; - p[0] = 0; - } - - gu_in_bytes(in, &p[1], len, err); - return (GuString) { (GuWord) (void*) p }; + char* p = gu_malloc_aligned(pool, len+1, 2); + gu_in_bytes(in, (uint8_t*)p, len, err); + p[len] = 0; + return p; } GuString @@ -226,52 +110,24 @@ gu_format_string(GuPool* pool, const char* fmt, ...) return s; } -GuString -gu_str_string(const char* str, GuPool* pool) -{ - return gu_utf8_string((const uint8_t*) str, strlen(str), pool); -} - bool gu_string_to_int(GuString s, int *res) { - GuWord w = s.w_; - uint8_t buf[sizeof(GuWord)]; - char* src; - size_t sz; - if (w & 1) { - sz = (w & 0xff) >> 1; - gu_assert(sz <= sizeof(GuWord)); - size_t i = sz; - while (i > 0) { - w >>= 8; - buf[--i] = w & 0xff; - } - src = (char*) buf; - } else { - uint8_t* p = (void*) w; - sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - src = (char*) &p[1]; - } - - size_t i = 0; - bool neg = false; - - if (src[i] == '-') { + if (*s == '-') { neg = true; - i++; + s++; } - if (i >= sz) + if (*s == 0) return false; int n = 0; - for (; i < sz; i++) { - if (src[i] < '0' || src[i] > '9') + for (; *s; s++) { + if (*s < '0' || *s > '9') return false; - n = n * 10 + (src[i] - '0'); + n = n * 10 + (*s - '0'); } *res = neg ? -n : n; @@ -281,54 +137,33 @@ gu_string_to_int(GuString s, int *res) bool gu_string_to_double(GuString s, double *res) { - GuWord w = s.w_; - uint8_t buf[sizeof(GuWord)]; - char* src; - size_t sz; - if (w & 1) { - sz = (w & 0xff) >> 1; - gu_assert(sz <= sizeof(GuWord)); - size_t i = sz; - while (i > 0) { - w >>= 8; - buf[--i] = w & 0xff; - } - src = (char*) buf; - } else { - uint8_t* p = (void*) w; - sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - src = (char*) &p[1]; - } - - size_t i = 0; - bool neg = false; bool dec = false; double exp = 1; - if (src[i] == '-') { + if (*s == '-') { neg = true; - i++; + s++; } - if (i >= sz) + if (*s == 0) return false; double d = 0; - for (; i < sz; i++) { - if (src[i] == '.') { + for (; *s; s++) { + if (*s == '.') { if (dec) return false; dec = true; continue; } - if (src[i] < '0' || src[i] > '9') + if (*s < '0' || *s > '9') return false; if (dec) exp = exp * 10; - d = d * 10 + (src[i] - '0'); + d = d * 10 + (*s - '0'); } *res = (neg ? -d : d) / exp; @@ -338,54 +173,18 @@ gu_string_to_double(GuString s, double *res) bool gu_string_is_prefix(GuString s1, GuString s2) { - GuWord w1 = s1.w_; - uint8_t buf1[sizeof(GuWord)]; - size_t sz1; - char* str1; - if (w1 & 1) { - sz1 = (w1 & 0xff) >> 1; - gu_assert(sz1 <= sizeof(GuWord)); - size_t i = sz1; - while (i > 0) { - w1 >>= 8; - buf1[--i] = w1 & 0xff; - } - str1 = (char*) buf1; - } else { - uint8_t* p = (void*) w1; - sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - str1 = (char*) &p[1]; - } + size_t len1 = strlen(s1); + size_t len2 = strlen(s2); - GuWord w2 = s2.w_; - uint8_t buf2[sizeof(GuWord)]; - size_t sz2; - char* str2; - if (w2 & 1) { - sz2 = (w2 & 0xff) >> 1; - gu_assert(sz2 <= sizeof(GuWord)); - size_t i = sz2; - while (i > 0) { - w2 >>= 8; - buf2[--i] = w2 & 0xff; - } - str2 = (char*) buf2; - } else { - uint8_t* p = (void*) w2; - sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - str2 = (char*) &p[1]; - } - - - if (sz1 > sz2) + if (len1 > len2) return false; - for (size_t sz = sz1; sz--; sz > 0) { - if (*str1 != *str2) + for (size_t len = len1; len--; len > 0) { + if (*s1 != *s2) return false; - - str1++; - str2++; + + s1++; + s2++; } return true; @@ -394,108 +193,23 @@ gu_string_is_prefix(GuString s1, GuString s2) GuHash gu_string_hash(GuHash h, GuString s) { - if (s.w_ & 1) { - return h*101 + s.w_; - } - size_t len = gu_string_length(s); - uint8_t* data = gu_string_long_data(s); - return gu_hash_bytes(h, data, len); -} - -bool -gu_string_eq(GuString s1, GuString s2) -{ - if (s1.w_ == s2.w_) { - return true; - } else if (gu_string_is_long(s1) && gu_string_is_long(s2)) { - size_t len1 = gu_string_long_length(s1); - size_t len2 = gu_string_long_length(s2); - if (len1 != len2) { - return false; - } - uint8_t* data1 = gu_string_long_data(s1); - uint8_t* data2 = gu_string_long_data(s2); - return (memcmp(data1, data2, len1) == 0); - } - return false; - + return gu_hash_bytes(h, (uint8_t*)s, strlen(s)); } static bool gu_string_eq_fn(GuEquality* self, const void* p1, const void* p2) { (void) self; - const GuString* sp1 = p1; - const GuString* sp2 = p2; - return gu_string_eq(*sp1, *sp2); + return strcmp((GuString) p1, (GuString) p2) == 0; } GuEquality gu_string_equality[1] = { { gu_string_eq_fn } }; -int -gu_string_cmp(GuString s1, GuString s2) -{ - uint8_t buf1[sizeof(GuWord)]; - char* src1; - size_t sz1; - if (s1.w_ & 1) { - sz1 = (s1.w_ & 0xff) >> 1; - gu_assert(sz1 <= sizeof(GuWord)); - size_t i = sz1; - while (i > 0) { - s1.w_ >>= 8; - buf1[--i] = s1.w_ & 0xff; - } - src1 = (char*) buf1; - } else { - uint8_t* p = (void*) s1.w_; - sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - src1 = (char*) &p[1]; - } - - uint8_t buf2[sizeof(GuWord)]; - char* src2; - size_t sz2; - if (s2.w_ & 1) { - sz2 = (s2.w_ & 0xff) >> 1; - gu_assert(sz2 <= sizeof(GuWord)); - size_t i = sz2; - while (i > 0) { - s2.w_ >>= 8; - buf2[--i] = s2.w_ & 0xff; - } - src2 = (char*) buf2; - } else { - uint8_t* p = (void*) s2.w_; - sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0]; - src2 = (char*) &p[1]; - } - - for (size_t i = 0; ; i++) { - if (sz1 == i && i == sz2) - break; - - if (sz1 <= i) - return -1; - if (i >= sz2) - return 1; - - if (src1[i] > src2[i]) - return 1; - else if (src1[i] < src2[i]) - return -1; - } - - return 0; -} - static int gu_string_cmp_fn(GuOrder* self, const void* p1, const void* p2) { (void) self; - const GuString* sp1 = p1; - const GuString* sp2 = p2; - return gu_string_cmp(*sp1, *sp2); + return strcmp((GuString) p1, (GuString) p2); } GuOrder gu_string_order[1] = { { gu_string_cmp_fn } }; @@ -504,8 +218,7 @@ static GuHash gu_string_hasher_hash(GuHasher* self, const void* p) { (void) self; - const GuString* sp = p; - return gu_string_hash(0, *sp); + return gu_string_hash(0, (GuString) p); } GuHasher gu_string_hasher[1] = { @@ -516,5 +229,5 @@ GuHasher gu_string_hasher[1] = { }; -GU_DEFINE_TYPE(GuString, GuOpaque, _); +GU_DEFINE_KIND(GuString, pointer); GU_DEFINE_KIND(GuStringMap, GuMap); -- cgit v1.2.3