summaryrefslogtreecommitdiff
path: root/src/runtime/c/pgf/lookup.c
diff options
context:
space:
mode:
authorKrasimir Angelov <kr.angelov@gmail.com>2018-02-22 11:35:54 +0100
committerKrasimir Angelov <kr.angelov@gmail.com>2018-02-22 11:35:54 +0100
commitbb4218433fe8afabbc5239b2ca41731bb3e2da4b (patch)
tree7aa00a51498cdae9e6c26776cac5b7689626eb12 /src/runtime/c/pgf/lookup.c
parenta16fe3415a76a4b101a2cedf96d876fc691b8155 (diff)
- tweak the tokenizer in pgf_lookup_sentence to threat .!?,: as separate tokens
+ bugfix which causes crashes
Diffstat (limited to 'src/runtime/c/pgf/lookup.c')
-rw-r--r--src/runtime/c/pgf/lookup.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/runtime/c/pgf/lookup.c b/src/runtime/c/pgf/lookup.c
index 21c82450f..5918275c1 100644
--- a/src/runtime/c/pgf/lookup.c
+++ b/src/runtime/c/pgf/lookup.c
@@ -119,7 +119,7 @@ typedef struct {
static PgfAbsProduction*
pgf_lookup_new_production(PgfAbsFun* fun, GuPool *pool)
{
- size_t n_hypos = gu_seq_length(fun->type->hypos);
+ size_t n_hypos = fun->type->hypos ? gu_seq_length(fun->type->hypos) : 0;
PgfAbsProduction* prod = gu_new_flex(pool, PgfAbsProduction, args, n_hypos);
prod->fun = fun;
prod->count = 0;
@@ -699,8 +699,12 @@ pgf_lookup_tokenize(GuMap* lexicon_idx, GuString sentence, GuPool* pool)
break;
const uint8_t* start = p-1;
- while (c != 0 && !gu_ucs_is_space(c)) {
+ if (strchr(".!?,:",c) != NULL)
c = gu_utf8_decode(&p);
+ else {
+ while (c != 0 && strchr(".!?,:",c) == NULL && !gu_ucs_is_space(c)) {
+ c = gu_utf8_decode(&p);
+ }
}
const uint8_t* end = p-1;