summaryrefslogtreecommitdiff
path: root/src/runtime/c/pgf/scanner.c
diff options
context:
space:
mode:
authorkrangelov <kr.angelov@gmail.com>2019-06-25 12:58:28 +0200
committerkrangelov <kr.angelov@gmail.com>2019-06-25 12:58:28 +0200
commitb56591c6b6b3315bdb97164dca574c43292f4d98 (patch)
tree3d27c146eba75902edfa4038406cd4771736b1f3 /src/runtime/c/pgf/scanner.c
parentb94bb50ec91440585b4fe55d9441c89cb3559c88 (diff)
the parser now ensures that all word senses are in the chart
Diffstat (limited to 'src/runtime/c/pgf/scanner.c')
-rw-r--r--src/runtime/c/pgf/scanner.c34
1 files changed, 17 insertions, 17 deletions
diff --git a/src/runtime/c/pgf/scanner.c b/src/runtime/c/pgf/scanner.c
index cdbf56cc8..dae857ff1 100644
--- a/src/runtime/c/pgf/scanner.c
+++ b/src/runtime/c/pgf/scanner.c
@@ -3,7 +3,7 @@
#include <gu/utf8.h>
PGF_INTERNAL int
-cmp_string(GuString* psent, size_t* ppos, GuString tok,
+cmp_string(PgfCohortSpot* spot, GuString tok,
bool case_sensitive)
{
for (;;) {
@@ -11,7 +11,7 @@ cmp_string(GuString* psent, size_t* ppos, GuString tok,
if (c2 == 0)
return 0;
- const uint8_t* p = (uint8_t*) *psent;
+ const uint8_t* p = (uint8_t*) spot->ptr;
GuUCS c1 = gu_utf8_decode(&p);
if (c1 == 0)
return -1;
@@ -22,8 +22,8 @@ cmp_string(GuString* psent, size_t* ppos, GuString tok,
if (c1 != c2)
return (c1-c2);
- *psent = (GuString) p;
- (*ppos)++;
+ spot->ptr = (GuString) p;
+ spot->pos++;
}
}
@@ -40,7 +40,7 @@ skip_space(GuString* psent, size_t* ppos)
}
PGF_INTERNAL int
-pgf_symbols_cmp(GuString* psent, size_t* ppos,
+pgf_symbols_cmp(PgfCohortSpot* spot,
PgfSymbols* syms, size_t* sym_idx,
bool case_sensitive)
{
@@ -49,14 +49,14 @@ pgf_symbols_cmp(GuString* psent, size_t* ppos,
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
if (*sym_idx > 0) {
- if (!skip_space(psent,ppos)) {
- if (**psent == 0)
+ if (!skip_space(&spot->ptr,&spot->pos)) {
+ if (*spot->ptr == 0)
return -1;
return 1;
}
- while (**psent != 0) {
- if (!skip_space(psent,ppos))
+ while (*spot->ptr != 0) {
+ if (!skip_space(&spot->ptr,&spot->pos))
break;
}
}
@@ -66,16 +66,16 @@ pgf_symbols_cmp(GuString* psent, size_t* ppos,
case PGF_SYMBOL_CAT:
case PGF_SYMBOL_LIT:
case PGF_SYMBOL_VAR: {
- if (**psent == 0)
+ if (*spot->ptr == 0)
return -1;
return 1;
}
case PGF_SYMBOL_KS: {
PgfSymbolKS* pks = inf.data;
- if (**psent == 0)
+ if (*spot->ptr == 0)
return -1;
- int cmp = cmp_string(psent,ppos,pks->token, case_sensitive);
+ int cmp = cmp_string(spot,pks->token, case_sensitive);
if (cmp != 0)
return cmp;
break;
@@ -131,14 +131,13 @@ pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
{
PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
- size_t pos = 0;
- GuString sent = (GuString) p1;
+ PgfCohortSpot spot = {0, (GuString) p1};
const PgfSequence* sp2 = p2;
size_t sym_idx = 0;
- int res = pgf_symbols_cmp(&sent, &pos, sp2->syms, &sym_idx, self->case_sensitive);
- if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) {
+ int res = pgf_symbols_cmp(&spot, sp2->syms, &sym_idx, self->case_sensitive);
+ if (res == 0 && (*spot.ptr != 0 || sym_idx != gu_seq_length(sp2->syms))) {
res = 1;
}
@@ -210,7 +209,7 @@ pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
PgfCohortSpot current = *spot;
size_t sym_idx = 0;
- int cmp = pgf_symbols_cmp(&current.ptr, &current.pos, seq->syms, &sym_idx, state->case_sensitive);
+ int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, state->case_sensitive);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
@@ -273,6 +272,7 @@ pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
1, (state->sentence+state->len)-spot.ptr);
if (gu_buf_length(state->found) == 0) {
+ // skip one character and try again
gu_utf8_decode((const uint8_t**) &spot.ptr);
spot.pos++;
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);