summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/runtime/c/Makefile.am1
-rw-r--r--src/runtime/c/pgf/parser.c276
-rw-r--r--src/runtime/c/pgf/pgf.h16
-rw-r--r--src/runtime/c/pgf/scanner.c435
-rw-r--r--src/runtime/haskell-bind/PGF2.hsc32
-rw-r--r--src/runtime/haskell-bind/PGF2/FFI.hsc5
6 files changed, 502 insertions, 263 deletions
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am
index 8f9c8bf56..adec93e6d 100644
--- a/src/runtime/c/Makefile.am
+++ b/src/runtime/c/Makefile.am
@@ -68,6 +68,7 @@ libpgf_la_SOURCES = \
pgf/data.h \
pgf/expr.c \
pgf/expr.h \
+ pgf/scanner.c \
pgf/parser.c \
pgf/lookup.c \
pgf/jit.c \
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index cb59b2a55..ec623253d 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -1,6 +1,5 @@
#include <pgf/data.h>
#include <pgf/expr.h>
-#include <pgf/linearizer.h>
#include <gu/enum.h>
#include <gu/seq.h>
#include <gu/assert.h>
@@ -502,39 +501,12 @@ pgf_print_expr_state0(PgfExprState* st,
#endif
#endif
-static int
-cmp_string(GuString* psent, GuString tok, bool case_sensitive)
-{
- for (;;) {
- GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
- if (c2 == 0)
- return 0;
-
- const uint8_t* p = (uint8_t*) *psent;
- GuUCS c1 = gu_utf8_decode(&p);
- if (c1 == 0)
- return -1;
-
- if (!case_sensitive)
- c1 = gu_ucs_to_lower(c1);
-
- if (c1 != c2)
- return (c1-c2);
-
- *psent = (GuString) p;
- }
-}
+PGF_INTERNAL_DECL int
+cmp_string(GuString* psent, size_t* ppos, GuString tok,
+ bool case_sensitive);
-static bool
-skip_space(GuString* psent)
-{
- const uint8_t* p = (uint8_t*) *psent;
- if (!gu_ucs_is_space(gu_utf8_decode(&p)))
- return false;
-
- *psent = (GuString) p;
- return true;
-}
+PGF_INTERNAL_DECL bool
+skip_space(GuString* psent, size_t* ppos);
static int
cmp_item_prob(GuOrder* self, const void* a, const void* b)
@@ -1060,63 +1032,10 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
}
}
-static int
-pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_sensitive)
-{
- size_t n_syms = gu_seq_length(syms);
- while (*sym_idx < n_syms) {
- PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
-
- if (*sym_idx > 0) {
- if (!skip_space(psent)) {
- if (**psent == 0)
- return -1;
- return 1;
- }
-
- while (**psent != 0) {
- if (!skip_space(psent))
- break;
- }
- }
-
- GuVariantInfo inf = gu_variant_open(sym);
- switch (inf.tag) {
- case PGF_SYMBOL_CAT:
- case PGF_SYMBOL_LIT:
- case PGF_SYMBOL_VAR: {
- if (**psent == 0)
- return -1;
- return 1;
- }
- case PGF_SYMBOL_KS: {
- PgfSymbolKS* pks = inf.data;
- if (**psent == 0)
- return -1;
-
- int cmp = cmp_string(psent, pks->token, case_sensitive);
- if (cmp != 0)
- return cmp;
- break;
- }
- case PGF_SYMBOL_KP:
- case PGF_SYMBOL_BIND:
- case PGF_SYMBOL_NE:
- case PGF_SYMBOL_SOFT_BIND:
- case PGF_SYMBOL_SOFT_SPACE:
- case PGF_SYMBOL_CAPIT:
- case PGF_SYMBOL_ALL_CAPIT: {
- return -1;
- }
- default:
- gu_impossible();
- }
-
- (*sym_idx)++;
- }
-
- return 0;
-}
+PGF_INTERNAL_DECL int
+pgf_symbols_cmp(GuString* psent, size_t* ppos,
+ PgfSymbols* syms, size_t* sym_idx,
+ bool case_sensitive);
static void
pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
@@ -1133,8 +1052,9 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
GuString start = ps->sentence + state->end_offset;
GuString current = start;
+ size_t pos = 0;
size_t sym_idx = 0;
- int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
+ int cmp = pgf_symbols_cmp(&current, &pos, seq->syms, &sym_idx, ps->case_sensitive);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
@@ -1206,7 +1126,8 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
size_t end_offset = start_offset;
GuString current = ps->sentence + end_offset;
- while (skip_space(&current)) {
+ size_t pos = 0;
+ while (skip_space(&current, &pos)) {
end_offset++;
}
@@ -1257,6 +1178,7 @@ static void
pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
{
GuString current = ps->sentence + ps->before->end_offset;
+ size_t pos = 0;
if (ps->prefix != NULL && *current == 0) {
if (gu_string_is_prefix(ps->prefix, tok)) {
@@ -1269,7 +1191,7 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
ps->tp->prob = item->inside_prob + item->conts->outside_prob;
}
} else {
- if (!ps->before->needs_bind && cmp_string(&current, tok, ps->case_sensitive) == 0) {
+ if (!ps->before->needs_bind && cmp_string(&current, &pos, tok, ps->case_sensitive) == 0) {
PgfParseState* state =
pgf_new_parse_state(ps, (current - ps->sentence),
BIND_NONE,
@@ -1454,7 +1376,6 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
case PGF_SYMBOL_KP: {
PgfSymbolKP* skp = gu_variant_data(sym);
- PgfSymbol sym;
if (item->alt == 0) {
PgfItem* new_item;
@@ -2345,173 +2266,6 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
return &ps->en;
}
-static void
-pgf_morpho_iter(PgfProductionIdx* idx,
- PgfMorphoCallback* callback,
- GuExn* err)
-{
- size_t n_entries = gu_buf_length(idx);
- for (size_t i = 0; i < n_entries; i++) {
- PgfProductionIdxEntry* entry =
- gu_buf_index(idx, PgfProductionIdxEntry, i);
-
- PgfCId lemma = entry->papp->fun->absfun->name;
- GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
-
- prob_t prob = entry->ccat->cnccat->abscat->prob +
- entry->papp->fun->absfun->ep.prob;
- callback->callback(callback,
- lemma, analysis, prob, err);
- if (!gu_ok(err))
- return;
- }
-}
-
-typedef struct {
- GuOrder order;
- bool case_sensitive;
-} PgfSequenceOrder;
-
-static int
-pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
-{
- PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
- GuString sent = (GuString) p1;
- const PgfSequence* sp2 = p2;
-
- size_t sym_idx = 0;
- int res = pgf_symbols_cmp(&sent, sp2->syms, &sym_idx, self->case_sensitive);
- if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) {
- res = 1;
- }
-
- return res;
-}
-
-PGF_API void
-pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
- PgfMorphoCallback* callback, GuExn* err)
-{
- if (concr->sequences == NULL) {
- GuExnData* err_data = gu_raise(err, PgfExn);
- if (err_data) {
- err_data->data = "The concrete syntax is not loaded";
- return;
- }
- }
-
- bool case_sensitive =
- (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
-
- PgfSequenceOrder order = { { pgf_sequence_cmp_fn }, case_sensitive };
- PgfSequence* seq = (PgfSequence*)
- gu_seq_binsearch(concr->sequences, &order.order,
- PgfSequence, (void*) sentence);
-
- if (seq != NULL && seq->idx != NULL)
- pgf_morpho_iter(seq->idx, callback, err);
-}
-
-typedef struct {
- GuEnum en;
- PgfSequences* sequences;
- GuString prefix;
- size_t seq_idx;
-} PgfFullFormState;
-
-struct PgfFullFormEntry {
- GuString tokens;
- PgfProductionIdx* idx;
-};
-
-static void
-gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
-{
- PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
- PgfFullFormEntry* entry = NULL;
-
- if (st->sequences != NULL) {
- size_t n_seqs = gu_seq_length(st->sequences);
- while (st->seq_idx < n_seqs) {
- PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
- GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
-
- if (!gu_string_is_prefix(st->prefix, tokens)) {
- st->seq_idx = n_seqs;
- break;
- }
-
- if (*tokens != 0 && seq->idx != NULL) {
- entry = gu_new(PgfFullFormEntry, pool);
- entry->tokens = tokens;
- entry->idx = seq->idx;
-
- st->seq_idx++;
- break;
- }
-
- st->seq_idx++;
- }
- }
-
- *((PgfFullFormEntry**) to) = entry;
-}
-
-PGF_API GuEnum*
-pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
-{
- PgfFullFormState* st = gu_new(PgfFullFormState, pool);
- st->en.next = gu_fullform_enum_next;
- st->sequences = concr->sequences;
- st->prefix = "";
- st->seq_idx = 0;
- return &st->en;
-}
-
-PGF_API GuString
-pgf_fullform_get_string(PgfFullFormEntry* entry)
-{
- return entry->tokens;
-}
-
-PGF_API void
-pgf_fullform_get_analyses(PgfFullFormEntry* entry,
- PgfMorphoCallback* callback, GuExn* err)
-{
- pgf_morpho_iter(entry->idx, callback, err);
-}
-
-PGF_API GuEnum*
-pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
- GuPool* pool, GuExn* err)
-{
- if (concr->sequences == NULL) {
- GuExnData* err_data = gu_raise(err, PgfExn);
- if (err_data) {
- err_data->data = "The concrete syntax is not loaded";
- return NULL;
- }
- }
-
- PgfFullFormState* state = gu_new(PgfFullFormState, pool);
- state->en.next = gu_fullform_enum_next;
- state->sequences = concr->sequences;
- state->prefix = prefix;
- state->seq_idx = 0;
-
- bool case_sensitive =
- (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
-
- PgfSequenceOrder order = { { pgf_sequence_cmp_fn }, case_sensitive };
- if (!gu_seq_binsearch_index(concr->sequences, &order.order,
- PgfSequence, (void*) prefix,
- &state->seq_idx)) {
- state->seq_idx++;
- }
-
- return &state->en;
-}
-
PGF_API void
pgf_parser_index(PgfConcr* concr,
PgfCCat* ccat, PgfProduction prod,
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 6dd040b49..8fdc52b62 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -167,6 +167,22 @@ PGF_API_DECL void
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err);
+typedef struct {
+ size_t pos;
+ GuString ptr;
+} PgfCohortSpot;
+
+typedef struct {
+ PgfCohortSpot start;
+ PgfCohortSpot end;
+ GuBuf* buf;
+} PgfCohortRange;
+
+PGF_API_DECL GuEnum*
+pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
+ PgfMorphoCallback* callback,
+ GuPool* pool, GuExn* err);
+
typedef struct PgfFullFormEntry PgfFullFormEntry;
PGF_API_DECL GuEnum*
diff --git a/src/runtime/c/pgf/scanner.c b/src/runtime/c/pgf/scanner.c
new file mode 100644
index 000000000..71444ca25
--- /dev/null
+++ b/src/runtime/c/pgf/scanner.c
@@ -0,0 +1,435 @@
+#include <pgf/data.h>
+#include <pgf/linearizer.h>
+#include <gu/utf8.h>
+
+PGF_INTERNAL int
+cmp_string(GuString* psent, size_t* ppos, GuString tok,
+ bool case_sensitive)
+{
+ for (;;) {
+ GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
+ if (c2 == 0)
+ return 0;
+
+ const uint8_t* p = (uint8_t*) *psent;
+ GuUCS c1 = gu_utf8_decode(&p);
+ if (c1 == 0)
+ return -1;
+
+ if (!case_sensitive)
+ c1 = gu_ucs_to_lower(c1);
+
+ if (c1 != c2)
+ return (c1-c2);
+
+ *psent = (GuString) p;
+ (*ppos)++;
+ }
+}
+
+PGF_INTERNAL bool
+skip_space(GuString* psent, size_t* ppos)
+{
+ const uint8_t* p = (uint8_t*) *psent;
+ if (!gu_ucs_is_space(gu_utf8_decode(&p)))
+ return false;
+
+ *psent = (GuString) p;
+ (*ppos)++;
+ return true;
+}
+
+PGF_INTERNAL int
+pgf_symbols_cmp(GuString* psent, size_t* ppos,
+ PgfSymbols* syms, size_t* sym_idx,
+ bool case_sensitive)
+{
+ size_t n_syms = gu_seq_length(syms);
+ while (*sym_idx < n_syms) {
+ PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
+
+ if (*sym_idx > 0) {
+ if (!skip_space(psent,ppos)) {
+ if (**psent == 0)
+ return -1;
+ return 1;
+ }
+
+ while (**psent != 0) {
+ if (!skip_space(psent,ppos))
+ break;
+ }
+ }
+
+ GuVariantInfo inf = gu_variant_open(sym);
+ switch (inf.tag) {
+ case PGF_SYMBOL_CAT:
+ case PGF_SYMBOL_LIT:
+ case PGF_SYMBOL_VAR: {
+ if (**psent == 0)
+ return -1;
+ return 1;
+ }
+ case PGF_SYMBOL_KS: {
+ PgfSymbolKS* pks = inf.data;
+ if (**psent == 0)
+ return -1;
+
+ int cmp = cmp_string(psent,ppos,pks->token, case_sensitive);
+ if (cmp != 0)
+ return cmp;
+ break;
+ }
+ case PGF_SYMBOL_KP:
+ case PGF_SYMBOL_BIND:
+ case PGF_SYMBOL_NE:
+ case PGF_SYMBOL_SOFT_BIND:
+ case PGF_SYMBOL_SOFT_SPACE:
+ case PGF_SYMBOL_CAPIT:
+ case PGF_SYMBOL_ALL_CAPIT: {
+ return -1;
+ }
+ default:
+ gu_impossible();
+ }
+
+ (*sym_idx)++;
+ }
+
+ return 0;
+}
+
+static void
+pgf_morpho_iter(PgfProductionIdx* idx,
+ PgfMorphoCallback* callback,
+ GuExn* err)
+{
+ size_t n_entries = gu_buf_length(idx);
+ for (size_t i = 0; i < n_entries; i++) {
+ PgfProductionIdxEntry* entry =
+ gu_buf_index(idx, PgfProductionIdxEntry, i);
+
+ PgfCId lemma = entry->papp->fun->absfun->name;
+ GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
+
+ prob_t prob = entry->ccat->cnccat->abscat->prob +
+ entry->papp->fun->absfun->ep.prob;
+ callback->callback(callback,
+ lemma, analysis, prob, err);
+ if (!gu_ok(err))
+ return;
+ }
+}
+
+typedef struct {
+ GuOrder order;
+ bool case_sensitive;
+} PgfSequenceOrder;
+
+static int
+pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
+{
+ PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
+
+ size_t pos = 0;
+ GuString sent = (GuString) p1;
+
+ const PgfSequence* sp2 = p2;
+
+ size_t sym_idx = 0;
+ int res = pgf_symbols_cmp(&sent, &pos, sp2->syms, &sym_idx, self->case_sensitive);
+ if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) {
+ res = 1;
+ }
+
+ return res;
+}
+
+PGF_API void
+pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
+ PgfMorphoCallback* callback, GuExn* err)
+{
+ if (concr->sequences == NULL) {
+ GuExnData* err_data = gu_raise(err, PgfExn);
+ if (err_data) {
+ err_data->data = "The concrete syntax is not loaded";
+ return;
+ }
+ }
+
+ bool case_sensitive =
+ (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
+
+ PgfSequenceOrder order = { { pgf_sequence_cmp_fn }, case_sensitive };
+ PgfSequence* seq = (PgfSequence*)
+ gu_seq_binsearch(concr->sequences, &order.order,
+ PgfSequence, (void*) sentence);
+
+ if (seq != NULL && seq->idx != NULL)
+ pgf_morpho_iter(seq->idx, callback, err);
+}
+
+typedef struct {
+ GuEnum en;
+ PgfConcr* concr;
+ GuString sentence;
+ GuString current;
+ size_t len;
+ PgfMorphoCallback* callback;
+ GuExn* err;
+ bool case_sensitive;
+ GuBuf* spots;
+ GuBuf* found;
+} PgfCohortsState;
+
+static int
+cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
+{
+ PgfCohortSpot *s1 = (PgfCohortSpot *) a;
+ PgfCohortSpot *s2 = (PgfCohortSpot *) b;
+
+ return (s1->ptr-s2->ptr);
+}
+
+static GuOrder
+pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
+
+static void
+pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
+ int i, int j, ptrdiff_t min, ptrdiff_t max)
+{
+ // This is a variation of a binary search algorithm which
+ // can retrieve all prefixes of a string with minimal
+ // comparisons, i.e. there is no need to lookup every
+ // prefix separately.
+
+ while (i <= j) {
+ int k = (i+j) / 2;
+ PgfSequence* seq = gu_seq_index(state->concr->sequences, PgfSequence, k);
+
+ PgfCohortSpot current = *spot;
+
+ size_t sym_idx = 0;
+ int cmp = pgf_symbols_cmp(&current.ptr, &current.pos, seq->syms, &sym_idx, state->case_sensitive);
+ if (cmp < 0) {
+ j = k-1;
+ } else if (cmp > 0) {
+ ptrdiff_t len = current.ptr - spot->ptr;
+
+ if (min <= len)
+ pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
+
+ if (len+1 <= max)
+ pgf_lookup_cohorts_helper(state, spot, k+1, j, len+1, max);
+
+ break;
+ } else {
+ ptrdiff_t len = current.ptr - spot->ptr;
+
+ if (min <= len-1)
+ pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len-1);
+
+ if (seq->idx != NULL) {
+ PgfCohortRange* range = gu_buf_insert(state->found, 0);
+ range->start = *spot;
+ range->end = current;
+ range->buf = seq->idx;
+
+ while (*current.ptr != 0) {
+ if (!skip_space(&current.ptr, &current.pos))
+ break;
+ }
+
+ gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &current);
+ }
+
+ if (len+1 <= max)
+ pgf_lookup_cohorts_helper(state, spot, k+1, j, len+1, max);
+
+ break;
+ }
+ }
+}
+
+static void
+pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
+{
+ PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
+
+ while (gu_buf_length(state->found) == 0 &&
+ gu_buf_length(state->spots) > 0) {
+ PgfCohortSpot spot;
+ gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
+
+ if (spot.ptr == state->current)
+ continue;
+
+ if (*spot.ptr == 0)
+ break;
+
+ pgf_lookup_cohorts_helper
+ (state, &spot,
+ 0, gu_seq_length(state->concr->sequences)-1,
+ 1, (state->sentence+state->len)-spot.ptr);
+
+ if (gu_buf_length(state->found) == 0) {
+ gu_utf8_decode((const uint8_t**) &spot.ptr);
+ spot.pos++;
+ gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
+ }
+ }
+
+ PgfCohortRange* pRes = (PgfCohortRange*)to;
+
+ if (gu_buf_length(state->found) == 0) {
+ pRes->start.pos = 0;
+ pRes->start.ptr = NULL;
+ pRes->end.pos = 0;
+ pRes->end.ptr = NULL;
+ pRes->buf = NULL;
+ state->current = NULL;
+ return;
+ } else {
+ *pRes = gu_buf_pop(state->found, PgfCohortRange);
+ state->current = pRes->start.ptr;
+ pgf_morpho_iter(pRes->buf, state->callback, state->err);
+ }
+}
+
+PGF_API GuEnum*
+pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
+ PgfMorphoCallback* callback,
+ GuPool* pool, GuExn* err)
+{
+ if (concr->sequences == NULL) {
+ GuExnData* err_data = gu_raise(err, PgfExn);
+ if (err_data) {
+ err_data->data = "The concrete syntax is not loaded";
+ return NULL;
+ }
+ }
+
+ bool case_sensitive =
+ (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
+
+ PgfCohortsState* state = gu_new(PgfCohortsState, pool);
+ state->en.next = pgf_lookup_cohorts_enum_next;
+ state->concr = concr;
+ state->sentence= sentence;
+ state->len = strlen(sentence);
+ state->callback= callback;
+ state->err = err;
+ state->case_sensitive = case_sensitive;
+ state->spots = gu_new_buf(PgfCohortSpot, pool);
+ state->found = gu_new_buf(PgfCohortRange, pool);
+
+ PgfCohortSpot spot = {0,sentence};
+ while (*spot.ptr != 0) {
+ if (!skip_space(&spot.ptr, &spot.pos))
+ break;
+ }
+
+ gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
+
+ return &state->en;
+}
+
+typedef struct {
+ GuEnum en;
+ PgfSequences* sequences;
+ GuString prefix;
+ size_t seq_idx;
+} PgfFullFormState;
+
+struct PgfFullFormEntry {
+ GuString tokens;
+ PgfProductionIdx* idx;
+};
+
+static void
+gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
+{
+ PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
+ PgfFullFormEntry* entry = NULL;
+
+ if (st->sequences != NULL) {
+ size_t n_seqs = gu_seq_length(st->sequences);
+ while (st->seq_idx < n_seqs) {
+ PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
+ GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
+
+ if (!gu_string_is_prefix(st->prefix, tokens)) {
+ st->seq_idx = n_seqs;
+ break;
+ }
+
+ if (*tokens != 0 && seq->idx != NULL) {
+ entry = gu_new(PgfFullFormEntry, pool);
+ entry->tokens = tokens;
+ entry->idx = seq->idx;
+
+ st->seq_idx++;
+ break;
+ }
+
+ st->seq_idx++;
+ }
+ }
+
+ *((PgfFullFormEntry**) to) = entry;
+}
+
+PGF_API GuEnum*
+pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
+{
+ PgfFullFormState* st = gu_new(PgfFullFormState, pool);
+ st->en.next = gu_fullform_enum_next;
+ st->sequences = concr->sequences;
+ st->prefix = "";
+ st->seq_idx = 0;
+ return &st->en;
+}
+
+PGF_API GuString
+pgf_fullform_get_string(PgfFullFormEntry* entry)
+{
+ return entry->tokens;
+}
+
+PGF_API void
+pgf_fullform_get_analyses(PgfFullFormEntry* entry,
+ PgfMorphoCallback* callback, GuExn* err)
+{
+ pgf_morpho_iter(entry->idx, callback, err);
+}
+
+PGF_API GuEnum*
+pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
+ GuPool* pool, GuExn* err)
+{
+ if (concr->sequences == NULL) {
+ GuExnData* err_data = gu_raise(err, PgfExn);
+ if (err_data) {
+ err_data->data = "The concrete syntax is not loaded";
+ return NULL;
+ }
+ }
+
+ PgfFullFormState* state = gu_new(PgfFullFormState, pool);
+ state->en.next = gu_fullform_enum_next;
+ state->sequences = concr->sequences;
+ state->prefix = prefix;
+ state->seq_idx = 0;
+
+ bool case_sensitive =
+ (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
+
+ PgfSequenceOrder order = { { pgf_sequence_cmp_fn }, case_sensitive };
+ if (!gu_seq_binsearch_index(concr->sequences, &order.order,
+ PgfSequence, (void*) prefix,
+ &state->seq_idx)) {
+ state->seq_idx++;
+ }
+
+ return &state->en;
+}
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index 75afabb3d..5644b6ce8 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -70,7 +70,7 @@ module PGF2 (-- * PGF
-- ** Generation
generateAll,
-- ** Morphological Analysis
- MorphoAnalysis, lookupMorpho, fullFormLexicon,
+ MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
-- ** Visualizations
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree, graphvizWordAlignment,
@@ -481,6 +481,36 @@ lookupMorpho (Concr concr master) sent =
freeHaskellFunPtr fptr
readIORef ref
+lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
+lookupCohorts lang@(Concr concr master) sent =
+ unsafePerformIO $
+ do pl <- gu_new_pool
+ ref <- newIORef []
+ cback <- gu_malloc pl (#size PgfMorphoCallback)
+ fptr <- wrapLookupMorphoCallback (getAnalysis ref)
+ (#poke PgfMorphoCallback, callback) cback fptr
+ c_sent <- newUtf8CString sent pl
+ enum <- pgf_lookup_cohorts concr c_sent cback pl nullPtr
+ fpl <- newForeignPtr gu_pool_finalizer pl
+ fromCohortRange enum fpl fptr ref
+ where
+ fromCohortRange enum fpl fptr ref =
+ allocaBytes (#size PgfCohortRange) $ \ptr ->
+ withForeignPtr fpl $ \pl ->
+ do gu_enum_next enum ptr pl
+ buf <- (#peek PgfCohortRange, buf) ptr
+ if buf == nullPtr
+ then do finalizeForeignPtr fpl
+ freeHaskellFunPtr fptr
+ touchConcr lang
+ return []
+ else do start <- (#peek PgfCohortRange, start.pos) ptr
+ end <- (#peek PgfCohortRange, end.pos) ptr
+ ans <- readIORef ref
+ writeIORef ref []
+ cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr ref)
+ return ((start,ans,end):cohs)
+
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang =
unsafePerformIO $
diff --git a/src/runtime/haskell-bind/PGF2/FFI.hsc b/src/runtime/haskell-bind/PGF2/FFI.hsc
index 39b18fcf3..713adcecc 100644
--- a/src/runtime/haskell-bind/PGF2/FFI.hsc
+++ b/src/runtime/haskell-bind/PGF2/FFI.hsc
@@ -100,7 +100,7 @@ foreign import ccall unsafe "gu/string.h gu_string_buf_out"
foreign import ccall unsafe "gu/file.h gu_file_in"
gu_file_in :: Ptr () -> Ptr GuPool -> IO (Ptr GuIn)
-foreign import ccall unsafe "gu/enum.h gu_enum_next"
+foreign import ccall safe "gu/enum.h gu_enum_next"
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
@@ -401,6 +401,9 @@ foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
+foreign import ccall "pgf/pgf.h pgf_lookup_cohorts"
+ pgf_lookup_cohorts :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuEnum)
+
type LookupMorphoCallback = Ptr PgfMorphoCallback -> CString -> CString -> Float -> Ptr GuExn -> IO ()
foreign import ccall "wrapper"