summaryrefslogtreecommitdiff
path: root/src/runtime/c/gu/string.c
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-10-04 12:04:39 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-10-04 12:04:39 +0000
commite8335806afc45e31157937b880ff39b75c14a2cd (patch)
tree5e3233cfdf934ff350f66d755bde759512ef5f20 /src/runtime/c/gu/string.c
parent27091048ce8276cc542e909588695d273e95b087 (diff)
GuString is now an ordinary C string - it makes live easier. In addition PgfSymbolKS, PgfExprFun and PgfLiteralStr now keep their strings as embedded flexible arrays. The latest change gives us the same compactness as the old representation but it is a lot easier to use.
Diffstat (limited to 'src/runtime/c/gu/string.c')
-rw-r--r--src/runtime/c/gu/string.c399
1 files changed, 56 insertions, 343 deletions
diff --git a/src/runtime/c/gu/string.c b/src/runtime/c/gu/string.c
index ba7eaf0c6..995812fbf 100644
--- a/src/runtime/c/gu/string.c
+++ b/src/runtime/c/gu/string.c
@@ -7,21 +7,17 @@
#include <gu/assert.h>
#include <stdlib.h>
-const GuString gu_empty_string = { 1 };
-
struct GuStringBuf {
- GuByteBuf* bbuf;
+ GuBuf* buf;
GuOut* out;
};
GuStringBuf*
gu_string_buf(GuPool* pool)
{
- GuBuf* buf = gu_new_buf(uint8_t, pool);
- GuOut* out = gu_buf_out(buf, pool);
GuStringBuf* sbuf = gu_new(GuStringBuf, pool);
- sbuf->bbuf = buf;
- sbuf->out = out;
+ sbuf->buf = gu_new_buf(char, pool);
+ sbuf->out = gu_buf_out(sbuf->buf, pool);
return sbuf;
}
@@ -31,176 +27,64 @@ gu_string_buf_out(GuStringBuf* sb)
return sb->out;
}
-static GuString
-gu_utf8_string(const uint8_t* buf, size_t sz, GuPool* pool)
-{
- if (sz < GU_MIN(sizeof(GuWord), 128)) {
- GuWord w = 0;
- for (size_t n = 0; n < sz; n++) {
- w = w << 8 | buf[n];
- }
- w = w << 8 | (sz << 1) | 1;
- return (GuString) { w };
- }
- uint8_t* p = NULL;
- if (sz < 256) {
- p = gu_malloc_aligned(pool, 1 + sz, 2);
- p[0] = (uint8_t) sz;
- } else {
- p = gu_malloc_prefixed(pool, gu_alignof(size_t),
- sizeof(size_t), 1, 1 + sz);
- ((size_t*) p)[-1] = sz;
- p[0] = 0;
- }
- memcpy(&p[1], buf, sz);
- return (GuString) { (GuWord) (void*) p };
-}
-
-
-
GuString
gu_string_buf_freeze(GuStringBuf* sb, GuPool* pool)
{
gu_out_flush(sb->out, NULL);
- uint8_t* data = gu_buf_data(sb->bbuf);
- size_t len = gu_buf_length(sb->bbuf);
- return gu_utf8_string(data, len, pool);
-}
-
-GuIn*
-gu_string_in(GuString s, GuPool* pool)
-{
- GuWord w = s.w_;
- uint8_t* buf = NULL;
- size_t len = 0;
- if (w & 1) {
- len = (w & 0xff) >> 1;
- buf = gu_new_n(uint8_t, len, pool);
- for (int i = len - 1; i >= 0; i--) {
- w >>= 8;
- buf[i] = w & 0xff;
- }
- } else {
- uint8_t* p = (void*) w;
- len = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- buf = &p[1];
- }
- return gu_data_in(buf, len, pool);
-}
-
-static bool
-gu_string_is_long(GuString s)
-{
- return !(s.w_ & 1);
-}
-
-bool
-gu_string_is_stable(GuString s)
-{
- return !gu_string_is_long(s);
-}
+ char* data = gu_buf_data(sb->buf);
+ size_t len = gu_buf_length(sb->buf);
-static size_t
-gu_string_long_length(GuString s)
-{
- gu_assert(gu_string_is_long(s));
- uint8_t* p = (void*) s.w_;
- uint8_t len = p[0];
- if (len > 0) {
- return len;
- }
- return ((size_t*) p)[-1];
-}
+ char* p = gu_malloc_aligned(pool, len+1, 2);
+ memcpy(p, data, len);
+ p[len] = 0;
-size_t
-gu_string_length(GuString s)
-{
- if (gu_string_is_long(s)) {
- return gu_string_long_length(s);
- }
- return (s.w_ & 0xff) >> 1;
+ return p;
}
-static uint8_t*
-gu_string_long_data(GuString s)
+GuIn*
+gu_string_in(GuString s, GuPool* pool)
{
- gu_require(gu_string_is_long(s));
- uint8_t* p = (void*) s.w_;
- return &p[1];
+ return gu_data_in((uint8_t*) s, strlen(s), pool);
}
GuString
gu_string_copy(GuString string, GuPool* pool)
{
- if (gu_string_is_long(string)) {
- uint8_t* data = gu_string_long_data(string);
- size_t len = gu_string_long_length(string);
- return gu_utf8_string(data, len, pool);
- } else {
- return string;
- }
+ size_t len = strlen(string);
+ char* p = gu_malloc_aligned(pool, len+1, 2);
+ memcpy(p, string, len+1);
+ return p;
}
-
void
gu_string_write(GuString s, GuOut* out, GuExn* err)
{
- GuWord w = s.w_;
- uint8_t buf[sizeof(GuWord)];
- uint8_t* src;
- size_t sz;
- if (w & 1) {
- sz = (w & 0xff) >> 1;
- gu_assert(sz <= sizeof(GuWord));
- size_t i = sz;
- while (i > 0) {
- w >>= 8;
- buf[--i] = w & 0xff;
- }
- src = buf;
- } else {
- uint8_t* p = (void*) w;
- sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- src = &p[1];
- }
- gu_out_bytes(out, src, sz, err);
+ gu_out_bytes(out, (uint8_t*) s, strlen(s), err);
}
GuString
gu_string_read(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
- uint8_t* buf = alloca(len*4);
- uint8_t* p = buf;
+ char* buf = alloca(len*6+1);
+ char* p = buf;
for (size_t i = 0; i < len; i++) {
- gu_in_utf8_buf(&p, in, err);
+ gu_in_utf8_buf((uint8_t**) &p, in, err);
}
- return gu_utf8_string(buf, p-buf, pool);
+ *p++ = 0;
+
+ p = gu_malloc_aligned(pool, p-buf, 2);
+ strcpy(p, buf);
+
+ return p;
}
GuString
gu_string_read_latin1(size_t len, GuPool* pool, GuIn* in, GuExn* err)
{
- if (len < GU_MIN(sizeof(GuWord), 128)) {
- GuWord w = 0;
- for (size_t n = 0; n < len; n++) {
- w = w << 8 | gu_in_u8(in, err);
- }
- w = w << 8 | (len << 1) | 1;
- return (GuString) { w };
- }
- uint8_t* p = NULL;
- if (len < 256) {
- p = gu_malloc_aligned(pool, 1 + len, 2);
- p[0] = (uint8_t) len;
- } else {
- p = gu_malloc_prefixed(pool, gu_alignof(size_t),
- sizeof(size_t), 1, 1 + len);
- ((size_t*) p)[-1] = len;
- p[0] = 0;
- }
-
- gu_in_bytes(in, &p[1], len, err);
- return (GuString) { (GuWord) (void*) p };
+ char* p = gu_malloc_aligned(pool, len+1, 2);
+ gu_in_bytes(in, (uint8_t*)p, len, err);
+ p[len] = 0;
+ return p;
}
GuString
@@ -226,52 +110,24 @@ gu_format_string(GuPool* pool, const char* fmt, ...)
return s;
}
-GuString
-gu_str_string(const char* str, GuPool* pool)
-{
- return gu_utf8_string((const uint8_t*) str, strlen(str), pool);
-}
-
bool
gu_string_to_int(GuString s, int *res)
{
- GuWord w = s.w_;
- uint8_t buf[sizeof(GuWord)];
- char* src;
- size_t sz;
- if (w & 1) {
- sz = (w & 0xff) >> 1;
- gu_assert(sz <= sizeof(GuWord));
- size_t i = sz;
- while (i > 0) {
- w >>= 8;
- buf[--i] = w & 0xff;
- }
- src = (char*) buf;
- } else {
- uint8_t* p = (void*) w;
- sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- src = (char*) &p[1];
- }
-
- size_t i = 0;
-
bool neg = false;
-
- if (src[i] == '-') {
+ if (*s == '-') {
neg = true;
- i++;
+ s++;
}
- if (i >= sz)
+ if (*s == 0)
return false;
int n = 0;
- for (; i < sz; i++) {
- if (src[i] < '0' || src[i] > '9')
+ for (; *s; s++) {
+ if (*s < '0' || *s > '9')
return false;
- n = n * 10 + (src[i] - '0');
+ n = n * 10 + (*s - '0');
}
*res = neg ? -n : n;
@@ -281,54 +137,33 @@ gu_string_to_int(GuString s, int *res)
bool
gu_string_to_double(GuString s, double *res)
{
- GuWord w = s.w_;
- uint8_t buf[sizeof(GuWord)];
- char* src;
- size_t sz;
- if (w & 1) {
- sz = (w & 0xff) >> 1;
- gu_assert(sz <= sizeof(GuWord));
- size_t i = sz;
- while (i > 0) {
- w >>= 8;
- buf[--i] = w & 0xff;
- }
- src = (char*) buf;
- } else {
- uint8_t* p = (void*) w;
- sz = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- src = (char*) &p[1];
- }
-
- size_t i = 0;
-
bool neg = false;
bool dec = false;
double exp = 1;
- if (src[i] == '-') {
+ if (*s == '-') {
neg = true;
- i++;
+ s++;
}
- if (i >= sz)
+ if (*s == 0)
return false;
double d = 0;
- for (; i < sz; i++) {
- if (src[i] == '.') {
+ for (; *s; s++) {
+ if (*s == '.') {
if (dec) return false;
dec = true;
continue;
}
- if (src[i] < '0' || src[i] > '9')
+ if (*s < '0' || *s > '9')
return false;
if (dec) exp = exp * 10;
- d = d * 10 + (src[i] - '0');
+ d = d * 10 + (*s - '0');
}
*res = (neg ? -d : d) / exp;
@@ -338,54 +173,18 @@ gu_string_to_double(GuString s, double *res)
bool
gu_string_is_prefix(GuString s1, GuString s2)
{
- GuWord w1 = s1.w_;
- uint8_t buf1[sizeof(GuWord)];
- size_t sz1;
- char* str1;
- if (w1 & 1) {
- sz1 = (w1 & 0xff) >> 1;
- gu_assert(sz1 <= sizeof(GuWord));
- size_t i = sz1;
- while (i > 0) {
- w1 >>= 8;
- buf1[--i] = w1 & 0xff;
- }
- str1 = (char*) buf1;
- } else {
- uint8_t* p = (void*) w1;
- sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- str1 = (char*) &p[1];
- }
+ size_t len1 = strlen(s1);
+ size_t len2 = strlen(s2);
- GuWord w2 = s2.w_;
- uint8_t buf2[sizeof(GuWord)];
- size_t sz2;
- char* str2;
- if (w2 & 1) {
- sz2 = (w2 & 0xff) >> 1;
- gu_assert(sz2 <= sizeof(GuWord));
- size_t i = sz2;
- while (i > 0) {
- w2 >>= 8;
- buf2[--i] = w2 & 0xff;
- }
- str2 = (char*) buf2;
- } else {
- uint8_t* p = (void*) w2;
- sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- str2 = (char*) &p[1];
- }
-
-
- if (sz1 > sz2)
+ if (len1 > len2)
return false;
- for (size_t sz = sz1; sz--; sz > 0) {
- if (*str1 != *str2)
+ for (size_t len = len1; len--; len > 0) {
+ if (*s1 != *s2)
return false;
-
- str1++;
- str2++;
+
+ s1++;
+ s2++;
}
return true;
@@ -394,108 +193,23 @@ gu_string_is_prefix(GuString s1, GuString s2)
GuHash
gu_string_hash(GuHash h, GuString s)
{
- if (s.w_ & 1) {
- return h*101 + s.w_;
- }
- size_t len = gu_string_length(s);
- uint8_t* data = gu_string_long_data(s);
- return gu_hash_bytes(h, data, len);
-}
-
-bool
-gu_string_eq(GuString s1, GuString s2)
-{
- if (s1.w_ == s2.w_) {
- return true;
- } else if (gu_string_is_long(s1) && gu_string_is_long(s2)) {
- size_t len1 = gu_string_long_length(s1);
- size_t len2 = gu_string_long_length(s2);
- if (len1 != len2) {
- return false;
- }
- uint8_t* data1 = gu_string_long_data(s1);
- uint8_t* data2 = gu_string_long_data(s2);
- return (memcmp(data1, data2, len1) == 0);
- }
- return false;
-
+ return gu_hash_bytes(h, (uint8_t*)s, strlen(s));
}
static bool
gu_string_eq_fn(GuEquality* self, const void* p1, const void* p2)
{
(void) self;
- const GuString* sp1 = p1;
- const GuString* sp2 = p2;
- return gu_string_eq(*sp1, *sp2);
+ return strcmp((GuString) p1, (GuString) p2) == 0;
}
GuEquality gu_string_equality[1] = { { gu_string_eq_fn } };
-int
-gu_string_cmp(GuString s1, GuString s2)
-{
- uint8_t buf1[sizeof(GuWord)];
- char* src1;
- size_t sz1;
- if (s1.w_ & 1) {
- sz1 = (s1.w_ & 0xff) >> 1;
- gu_assert(sz1 <= sizeof(GuWord));
- size_t i = sz1;
- while (i > 0) {
- s1.w_ >>= 8;
- buf1[--i] = s1.w_ & 0xff;
- }
- src1 = (char*) buf1;
- } else {
- uint8_t* p = (void*) s1.w_;
- sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- src1 = (char*) &p[1];
- }
-
- uint8_t buf2[sizeof(GuWord)];
- char* src2;
- size_t sz2;
- if (s2.w_ & 1) {
- sz2 = (s2.w_ & 0xff) >> 1;
- gu_assert(sz2 <= sizeof(GuWord));
- size_t i = sz2;
- while (i > 0) {
- s2.w_ >>= 8;
- buf2[--i] = s2.w_ & 0xff;
- }
- src2 = (char*) buf2;
- } else {
- uint8_t* p = (void*) s2.w_;
- sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
- src2 = (char*) &p[1];
- }
-
- for (size_t i = 0; ; i++) {
- if (sz1 == i && i == sz2)
- break;
-
- if (sz1 <= i)
- return -1;
- if (i >= sz2)
- return 1;
-
- if (src1[i] > src2[i])
- return 1;
- else if (src1[i] < src2[i])
- return -1;
- }
-
- return 0;
-}
-
static int
gu_string_cmp_fn(GuOrder* self, const void* p1, const void* p2)
{
(void) self;
- const GuString* sp1 = p1;
- const GuString* sp2 = p2;
- return gu_string_cmp(*sp1, *sp2);
+ return strcmp((GuString) p1, (GuString) p2);
}
GuOrder gu_string_order[1] = { { gu_string_cmp_fn } };
@@ -504,8 +218,7 @@ static GuHash
gu_string_hasher_hash(GuHasher* self, const void* p)
{
(void) self;
- const GuString* sp = p;
- return gu_string_hash(0, *sp);
+ return gu_string_hash(0, (GuString) p);
}
GuHasher gu_string_hasher[1] = {
@@ -516,5 +229,5 @@ GuHasher gu_string_hasher[1] = {
};
-GU_DEFINE_TYPE(GuString, GuOpaque, _);
+GU_DEFINE_KIND(GuString, pointer);
GU_DEFINE_KIND(GuStringMap, GuMap);