summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2017-05-03 16:28:20 +0000
committerkrasimir <krasimir@chalmers.se>2017-05-03 16:28:20 +0000
commitd0269e8513c60c62621c4d6ae5d264a315cbab4e (patch)
treed6d5311cdf82174b483abe6fcf6cb10d24b96eef /src/runtime
parent5f4c441361a62accb6783d1dab46050cf68177f9 (diff)
first steps for sentence lookup in the C runtime
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/CMakeLists.txt1
-rw-r--r--src/runtime/c/Makefile.am1
-rw-r--r--src/runtime/c/pgf/lookup.c188
-rw-r--r--src/runtime/c/pgf/pgf.h3
4 files changed, 193 insertions, 0 deletions
diff --git a/src/runtime/c/CMakeLists.txt b/src/runtime/c/CMakeLists.txt
index 82dfe259a..45a13a067 100644
--- a/src/runtime/c/CMakeLists.txt
+++ b/src/runtime/c/CMakeLists.txt
@@ -49,6 +49,7 @@ set(libpgf_la_SOURCES
pgf/expr.c
pgf/expr.h
pgf/parser.c
+ pgf/lookup.c
pgf/jit.c
pgf/parseval.c
pgf/literals.c
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am
index 941c404ae..9f6ce9a76 100644
--- a/src/runtime/c/Makefile.am
+++ b/src/runtime/c/Makefile.am
@@ -68,6 +68,7 @@ libpgf_la_SOURCES = \
pgf/expr.c \
pgf/expr.h \
pgf/parser.c \
+ pgf/lookup.c \
pgf/jit.c \
pgf/parseval.c \
pgf/literals.c \
diff --git a/src/runtime/c/pgf/lookup.c b/src/runtime/c/pgf/lookup.c
new file mode 100644
index 000000000..9e6f58bb7
--- /dev/null
+++ b/src/runtime/c/pgf/lookup.c
@@ -0,0 +1,188 @@
+#include <gu/map.h>
+#include <gu/mem.h>
+#include <gu/file.h>
+#include <gu/string.h>
+#include <pgf/data.h>
+#include <stdio.h>
+
+#define PGF_LOOKUP_DEBUG
+
+typedef struct {
+ PgfAbsFun* fun;
+ size_t arg_idx;
+} PgfAbsBottomUpEntry;
+
+typedef struct {
+ PgfAbsFun* fun;
+ PgfMetaId args[0];
+} PgfAbsProduction;
+
+#ifdef PGF_LOOKUP_DEBUG
+static void
+pgf_print_abs_production(PgfMetaId id,
+ PgfAbsProduction* prod,
+ GuOut* out, GuExn* err)
+{
+ gu_printf(out,err,"?%d = %s",id,prod->fun->name);
+ size_t n_hypos = gu_seq_length(prod->fun->type->hypos);
+ for (size_t i = 0; i < n_hypos; i++) {
+ gu_printf(out,err," ?%d", prod->args[i]);
+ }
+ gu_putc('\n',out,err);
+}
+#endif
+
+static void
+pgf_lookup_index_syms(GuMap* lexicon_idx, PgfSymbols* syms, PgfProductionIdx* idx, GuPool* pool) {
+ size_t n_syms = gu_seq_length(syms);
+ for (size_t j = 0; j < n_syms; j++) {
+ PgfSymbol sym = gu_seq_get(syms, PgfSymbol, j);
+ GuVariantInfo i = gu_variant_open(sym);
+ switch (i.tag) {
+ case PGF_SYMBOL_KP: {
+ PgfSymbolKP* skp = (PgfSymbolKP*) i.data;
+ pgf_lookup_index_syms(lexicon_idx, skp->default_form, idx, pool);
+ for (size_t k = 0; k < skp->n_forms; k++) {
+ pgf_lookup_index_syms(lexicon_idx, skp->forms[k].form, idx, pool);
+ }
+ break;
+ }
+ case PGF_SYMBOL_KS: {
+ PgfSymbolKS* sks = (PgfSymbolKS*) i.data;
+ GuBuf* funs = gu_map_get(lexicon_idx, sks->token, GuBuf*);
+ if (funs == NULL) {
+ funs = gu_new_buf(PgfAbsFun*, pool);
+ gu_map_put(lexicon_idx, sks->token, GuBuf*, funs);
+ }
+
+ size_t n_idx = gu_buf_length(idx);
+ for (size_t k = 0; k < n_idx; k++) {
+ PgfProductionIdxEntry* entry =
+ gu_buf_index(idx, PgfProductionIdxEntry, k);
+ gu_buf_push(funs, PgfAbsFun*, entry->papp->fun->absfun);
+ }
+ break;
+ }
+ }
+ }
+}
+
+typedef struct {
+ GuMap* function_idx;
+ GuMap* cat_ids;
+ PgfMetaId next_id;
+ GuPool* pool;
+} PgfSpineBuilder;
+
+static PgfAbsProduction*
+pgf_lookup_new_production(PgfSpineBuilder* builder, PgfAbsFun* fun) {
+ size_t n_hypos = gu_seq_length(fun->type->hypos);
+ PgfAbsProduction* prod = gu_new_flex(builder->pool, PgfAbsProduction, args, n_hypos);
+ prod->fun = fun;
+ for (size_t i = 0; i < n_hypos; i++) {
+ prod->args[i] = 0;
+ }
+ return prod;
+}
+
+static PgfMetaId
+pgf_lookup_add_spine_nodes(PgfSpineBuilder* builder, PgfCId cat) {
+ PgfMetaId cat_id = gu_map_get(builder->cat_ids, cat, PgfMetaId);
+ if (cat_id != 0) {
+ return cat_id;
+ }
+
+ cat_id = ++builder->next_id;
+ gu_map_put(builder->cat_ids, cat, PgfMetaId, cat_id);
+
+ GuBuf* entries = gu_map_get(builder->function_idx, cat, GuBuf*);
+ if (entries != NULL) {
+ size_t n_entries = gu_buf_length(entries);
+ for (size_t i = 0; i < n_entries; i++) {
+ PgfAbsBottomUpEntry* entry = gu_buf_index(entries, PgfAbsBottomUpEntry, i);
+
+ PgfMetaId id = pgf_lookup_add_spine_nodes(builder, entry->fun->type->cid);
+
+ PgfAbsProduction* prod = pgf_lookup_new_production(builder, entry->fun);
+ prod->args[entry->arg_idx] = cat_id;
+
+#ifdef PGF_LOOKUP_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ pgf_print_abs_production(id, prod, out, err);
+ gu_pool_free(tmp_pool);
+#endif
+ }
+ }
+
+ return cat_id;
+}
+
+static void
+pgf_lookup_add_spine_leaf(PgfSpineBuilder* builder, PgfAbsFun *fun)
+{
+ PgfMetaId id = pgf_lookup_add_spine_nodes(builder, fun->type->cid);
+ PgfAbsProduction* prod = pgf_lookup_new_production(builder, fun);
+
+#ifdef PGF_LOOKUP_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ pgf_print_abs_production(id, prod, out, err);
+ gu_pool_free(tmp_pool);
+#endif
+}
+
+PGF_API GuEnum*
+pgf_lookup_sentence(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool)
+{
+ //// building search indices //
+ GuMap* lexicon_idx = gu_new_string_map(GuBuf*, &gu_null_struct, pool);
+ size_t n_seqs = gu_seq_length(concr->sequences);
+ for (size_t i = 0; i < n_seqs; i++) {
+ PgfSequence* seq = gu_seq_index(concr->sequences, PgfSequence, i);
+ if (seq->idx != NULL) {
+ pgf_lookup_index_syms(lexicon_idx, seq->syms, seq->idx, pool);
+ }
+ }
+
+ GuMap* function_idx = gu_new_string_map(GuBuf*, &gu_null_struct, pool);
+ size_t n_funs = gu_seq_length(concr->abstr->funs);
+ for (size_t i = 0; i < n_funs; i++) {
+ PgfAbsFun* fun = gu_seq_index(concr->abstr->funs, PgfAbsFun, i);
+
+ size_t n_hypos = gu_seq_length(fun->type->hypos);
+ for (size_t j = 0; j < n_hypos; j++) {
+ PgfHypo* hypo = gu_seq_index(fun->type->hypos, PgfHypo, j);
+
+ GuBuf* funs = gu_map_get(function_idx, hypo->type->cid, GuBuf*);
+ if (funs == NULL) {
+ funs = gu_new_buf(PgfAbsBottomUpEntry, pool);
+ gu_map_put(function_idx, hypo->type->cid, GuBuf*, funs);
+ }
+
+ PgfAbsBottomUpEntry* entry = gu_buf_extend(funs);
+ entry->fun = fun;
+ entry->arg_idx = j;
+ }
+ }
+ ///////////////////////////////
+
+ PgfSpineBuilder builder;
+ builder.function_idx = function_idx;
+ builder.cat_ids = gu_new_string_map(PgfMetaId, &gu_null_struct, pool);
+ builder.next_id = 0;
+ builder.pool = pool;
+
+ GuBuf* funs = gu_map_get(lexicon_idx, sentence, GuBuf*);
+ if (funs != NULL) {
+ size_t n_funs = gu_buf_length(funs);
+ for (size_t i = 0; i < n_funs; i++) {
+ PgfAbsFun* absfun =
+ gu_buf_get(funs, PgfAbsFun*, i);
+ pgf_lookup_add_spine_leaf(&builder, absfun);
+ }
+ }
+ return NULL;
+}
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 2a1cdcb80..c5e9cd52a 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -126,6 +126,9 @@ PGF_API_DECL PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
GuExn* err, GuPool* pool, GuPool* out_pool);
+PGF_API_DECL GuEnum*
+pgf_lookup_sentence(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool);
+
typedef struct PgfMorphoCallback PgfMorphoCallback;
struct PgfMorphoCallback {
void (*callback)(PgfMorphoCallback* self,