summaryrefslogtreecommitdiff
path: root/src/runtime/c
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-10-28 12:35:37 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-10-28 12:35:37 +0000
commit151f86c1e9e518c34a76ff5fdab9afd34000fe0d (patch)
tree5d16b65e0abaae250f83e48b7bb849fce79180ba /src/runtime/c
parentcd5a0de253152b9aa484b6c684d9943094ac87dc (diff)
fix the handling of 'pre' in the C runtime
Diffstat (limited to 'src/runtime/c')
-rw-r--r--src/runtime/c/gu/seq.c34
-rw-r--r--src/runtime/c/gu/seq.h13
-rw-r--r--src/runtime/c/pgf/data.h1
-rw-r--r--src/runtime/c/pgf/parser.c90
-rw-r--r--src/runtime/c/pgf/reader.c4
5 files changed, 130 insertions, 12 deletions
diff --git a/src/runtime/c/gu/seq.c b/src/runtime/c/gu/seq.c
index b798d2923..8ec6480cc 100644
--- a/src/runtime/c/gu/seq.c
+++ b/src/runtime/c/gu/seq.c
@@ -255,7 +255,7 @@ gu_buf_sort(GuBuf *buf, GuOrder *order)
}
void*
-gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key)
+gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key)
{
size_t i = 0;
size_t j = seq->len-1;
@@ -263,8 +263,8 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off
while (i <= j) {
size_t k = (i+j) / 2;
uint8_t* elem_p = &seq->data[elem_size * k];
- int cmp = order->compare(order, key, elem_p + field_offset);
-
+ int cmp = order->compare(order, key, elem_p);
+
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
@@ -273,10 +273,36 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off
return elem_p;
}
}
-
+
return NULL;
}
+bool
+gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size,
+ void *key, size_t *pindex)
+{
+ size_t i = 0;
+ size_t j = seq->len-1;
+
+ while (i <= j) {
+ size_t k = (i+j) / 2;
+ uint8_t* elem_p = &seq->data[elem_size * k];
+ int cmp = order->compare(order, key, elem_p);
+
+ if (cmp < 0) {
+ j = k-1;
+ } else if (cmp > 0) {
+ i = k+1;
+ } else {
+ *pindex = k;
+ return true;
+ }
+ }
+
+ *pindex = j;
+ return false;
+}
+
static void
gu_heap_siftdown(GuBuf *buf, GuOrder *order,
const void *value, int startpos, int pos)
diff --git a/src/runtime/c/gu/seq.h b/src/runtime/c/gu/seq.h
index cb18a90b5..2b0020bab 100644
--- a/src/runtime/c/gu/seq.h
+++ b/src/runtime/c/gu/seq.h
@@ -114,11 +114,18 @@ gu_seq_resize_tail(GuSeq seq, ptrdiff_t change);
void
gu_buf_sort(GuBuf *buf, GuOrder *order);
-#define gu_seq_binsearch(S, O, T, N, V) \
- ((T*) gu_seq_binsearch_(S, O, sizeof(T), offsetof(T,N), V))
+#define gu_seq_binsearch(S, O, T, V) \
+ ((T*) gu_seq_binsearch_(S, O, sizeof(T), V))
void*
-gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key);
+gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key);
+
+#define gu_seq_binsearch_index(S, O, T, V, PI) \
+ gu_seq_binsearch_index_(S, O, sizeof(T), V, PI)
+
+bool
+gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size,
+ void *key, size_t *pindex);
// Using a buffer as a heap
void
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h
index 163d45d95..ea932d111 100644
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -237,6 +237,7 @@ struct PgfConcr {
PgfCncOverloadMap* coerce_idx;
PgfCncFuns* cncfuns;
PgfSequences* sequences;
+ GuBuf* pre_sequences;
PgfCIdMap* cnccats;
PgfCallbacksMap* callbacks;
int total_cats;
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 205655cdc..b857ae7d0 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -2,7 +2,7 @@
#include <pgf/linearizer.h>
#include <gu/seq.h>
#include <gu/assert.h>
-
+#include <gu/choice.h>
#include <gu/file.h>
#include <gu/utf8.h>
#include <math.h>
@@ -1176,7 +1176,21 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state)
}
}
-next:
+next:;
+ size_t n_pres = gu_buf_length(ps->concr->pre_sequences);
+ for (size_t pi = 0; pi < n_pres; pi++) {
+ PgfSequence* seq = gu_buf_index(ps->concr->pre_sequences, PgfSequence, pi);
+
+ GuString current = ps->sentence + state->end_offset;
+ BIND_TYPE bind_type = state->needs_bind ? BIND_NONE : BIND_HARD;
+ if (pgf_symbols_cmp(&current, n, &bind_type, seq->syms) == 0) {
+ PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
+ entry->idx = seq->idx;
+ entry->bind_type = bind_type;
+ entry->offset = (current - ps->sentence);
+ }
+ }
+
j = s;
n++;
}
@@ -2426,8 +2440,8 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err)
{
PgfSequence* seq = (PgfSequence*)
- gu_seq_binsearch_(concr->sequences, pgf_sequence_order,
- sizeof(PgfSequence), 0, (void*) sentence);
+ gu_seq_binsearch(concr->sequences, pgf_sequence_order,
+ PgfSequence, (void*) sentence);
if (seq != NULL && seq->idx != NULL)
pgf_morpho_iter(seq->idx, callback, err);
@@ -2493,11 +2507,76 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
pgf_morpho_iter(entry->idx, callback, err);
}
+// The 'pre' construction needs a special handling since
+// it cannot be sorted alphabetically (a single pre contains
+// many alternative tokens).
+static GuBuf*
+pgf_parser_index_pre_(GuBuf* buf, PgfSymbols* syms,
+ GuChoice* ch, GuPool *pool)
+{
+ size_t n_syms = gu_seq_length(syms);
+ for (size_t i = 0; i < n_syms; i++) {
+ PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
+ GuVariantInfo inf = gu_variant_open(sym);
+ if (inf.tag == PGF_SYMBOL_KP) {
+ PgfSymbolKP* skp = inf.data;
+
+ if (buf == NULL) {
+ // Since most of the sequences doesn't contain 'pre'
+ // we create the buffer on demand. This minimizes
+ // the overhead.
+ buf = gu_new_buf(PgfSymbol, pool);
+ gu_buf_extend_n(buf, i);
+ for (size_t j = 0; j < i; j++) {
+ PgfSymbol sym = gu_seq_get(syms, PgfSymbol, j);
+ gu_buf_set(buf, PgfSymbol, j, sym);
+ }
+ }
+
+ int idx = gu_choice_next(ch, skp->n_forms+1);
+ if (idx == 0) {
+ buf = pgf_parser_index_pre_(buf, skp->default_form, ch, pool);
+ } else {
+ buf = pgf_parser_index_pre_(buf, skp->forms[idx-1].form, ch, pool);
+ }
+ } else {
+ if (buf != NULL) {
+ gu_buf_push(buf, PgfSymbol, sym);
+ }
+ }
+ }
+
+ return buf;
+}
+
+static void
+pgf_parser_index_pre(PgfConcr* concr, PgfSequence* seq,
+ GuChoice* ch, GuPool *pool)
+{
+ do {
+ GuChoiceMark mark = gu_choice_mark(ch);
+
+ GuBuf* buf =
+ pgf_parser_index_pre_(NULL, seq->syms, ch, pool);
+
+ if (buf != NULL) {
+ PgfSequence* pre_seq = gu_buf_extend(concr->pre_sequences);
+ pre_seq->syms = gu_buf_data_seq(buf);
+ pre_seq->idx = seq->idx;
+ }
+
+ gu_choice_reset(ch, mark);
+ } while (gu_choice_advance(ch));
+}
+
void
pgf_parser_index(PgfConcr* concr,
PgfCCat* ccat, PgfProduction prod,
GuPool *pool)
{
+ GuPool* tmp_pool = gu_local_pool();
+ GuChoice* choice = gu_new_choice(tmp_pool); // we need this for the pres
+
for (size_t lin_idx = 0; lin_idx < ccat->cnccat->n_lins; lin_idx++) {
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
@@ -2510,6 +2589,7 @@ pgf_parser_index(PgfConcr* concr,
PgfSequence* seq = papp->fun->lins[lin_idx];
if (seq->idx == NULL) {
seq->idx = gu_new_buf(PgfProductionIdxEntry, pool);
+ pgf_parser_index_pre(concr, seq, choice, pool);
}
PgfProductionIdxEntry* entry = gu_buf_extend(seq->idx);
@@ -2525,6 +2605,8 @@ pgf_parser_index(PgfConcr* concr,
gu_impossible();
}
}
+
+ gu_pool_free(tmp_pool);
}
prob_t
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c
index 1e29c119c..41619a0b8 100644
--- a/src/runtime/c/pgf/reader.c
+++ b/src/runtime/c/pgf/reader.c
@@ -1142,7 +1142,9 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
concr->sequences =
pgf_read_sequences(rdr);
gu_return_on_exn(rdr->err, NULL);
-
+
+ concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool);
+
concr->cncfuns =
pgf_read_cncfuns(rdr, abstr, concr);
gu_return_on_exn(rdr->err, NULL);