diff options
| author | krangelov <kr.angelov@gmail.com> | 2019-06-25 12:58:28 +0200 |
|---|---|---|
| committer | krangelov <kr.angelov@gmail.com> | 2019-06-25 12:58:28 +0200 |
| commit | b56591c6b6b3315bdb97164dca574c43292f4d98 (patch) | |
| tree | 3d27c146eba75902edfa4038406cd4771736b1f3 /src/runtime/c/pgf/scanner.c | |
| parent | b94bb50ec91440585b4fe55d9441c89cb3559c88 (diff) | |
the parser now ensures that all word senses are in the chart
Diffstat (limited to 'src/runtime/c/pgf/scanner.c')
| -rw-r--r-- | src/runtime/c/pgf/scanner.c | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/src/runtime/c/pgf/scanner.c b/src/runtime/c/pgf/scanner.c index cdbf56cc8..dae857ff1 100644 --- a/src/runtime/c/pgf/scanner.c +++ b/src/runtime/c/pgf/scanner.c @@ -3,7 +3,7 @@ #include <gu/utf8.h> PGF_INTERNAL int -cmp_string(GuString* psent, size_t* ppos, GuString tok, +cmp_string(PgfCohortSpot* spot, GuString tok, bool case_sensitive) { for (;;) { @@ -11,7 +11,7 @@ cmp_string(GuString* psent, size_t* ppos, GuString tok, if (c2 == 0) return 0; - const uint8_t* p = (uint8_t*) *psent; + const uint8_t* p = (uint8_t*) spot->ptr; GuUCS c1 = gu_utf8_decode(&p); if (c1 == 0) return -1; @@ -22,8 +22,8 @@ cmp_string(GuString* psent, size_t* ppos, GuString tok, if (c1 != c2) return (c1-c2); - *psent = (GuString) p; - (*ppos)++; + spot->ptr = (GuString) p; + spot->pos++; } } @@ -40,7 +40,7 @@ skip_space(GuString* psent, size_t* ppos) } PGF_INTERNAL int -pgf_symbols_cmp(GuString* psent, size_t* ppos, +pgf_symbols_cmp(PgfCohortSpot* spot, PgfSymbols* syms, size_t* sym_idx, bool case_sensitive) { @@ -49,14 +49,14 @@ pgf_symbols_cmp(GuString* psent, size_t* ppos, PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx); if (*sym_idx > 0) { - if (!skip_space(psent,ppos)) { - if (**psent == 0) + if (!skip_space(&spot->ptr,&spot->pos)) { + if (*spot->ptr == 0) return -1; return 1; } - while (**psent != 0) { - if (!skip_space(psent,ppos)) + while (*spot->ptr != 0) { + if (!skip_space(&spot->ptr,&spot->pos)) break; } } @@ -66,16 +66,16 @@ pgf_symbols_cmp(GuString* psent, size_t* ppos, case PGF_SYMBOL_CAT: case PGF_SYMBOL_LIT: case PGF_SYMBOL_VAR: { - if (**psent == 0) + if (*spot->ptr == 0) return -1; return 1; } case PGF_SYMBOL_KS: { PgfSymbolKS* pks = inf.data; - if (**psent == 0) + if (*spot->ptr == 0) return -1; - int cmp = cmp_string(psent,ppos,pks->token, case_sensitive); + int cmp = cmp_string(spot,pks->token, case_sensitive); if (cmp != 0) return cmp; break; @@ -131,14 +131,13 @@ pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2) { PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order); - size_t pos = 0; - GuString sent = (GuString) p1; + PgfCohortSpot spot = {0, (GuString) p1}; const PgfSequence* sp2 = p2; size_t sym_idx = 0; - int res = pgf_symbols_cmp(&sent, &pos, sp2->syms, &sym_idx, self->case_sensitive); - if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) { + int res = pgf_symbols_cmp(&spot, sp2->syms, &sym_idx, self->case_sensitive); + if (res == 0 && (*spot.ptr != 0 || sym_idx != gu_seq_length(sp2->syms))) { res = 1; } @@ -210,7 +209,7 @@ pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot, PgfCohortSpot current = *spot; size_t sym_idx = 0; - int cmp = pgf_symbols_cmp(¤t.ptr, ¤t.pos, seq->syms, &sym_idx, state->case_sensitive); + int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, state->case_sensitive); if (cmp < 0) { j = k-1; } else if (cmp > 0) { @@ -273,6 +272,7 @@ pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool) 1, (state->sentence+state->len)-spot.ptr); if (gu_buf_length(state->found) == 0) { + // skip one character and try again gu_utf8_decode((const uint8_t**) &spot.ptr); spot.pos++; gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot); |
