Merge remote-tracking branch 'origin/master' into build-binary-packages

author: John J. Camilleri <john@digitalgrammars.com> 2020-11-25 20:57:01 +0100
committer: John J. Camilleri <john@digitalgrammars.com> 2020-11-25 20:57:01 +0100
commit: 70811d83beb37f7eebc76451858d56be76b3e521 (patch)
tree: e09aff21a86cfb72cfcb1fa22787c2d5ea64c556
parent: 0ed6b726a2c9a2365fadc05a75177c569469b4fd (diff)
parent: 37c63a0c22ccc73e60222335263c702873b6af2c (diff)
8 files changed, 399 insertions, 220 deletions
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 000000000..131a37b5d
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,47 @@
+# GF Core releases
+
+🚨 WARNING! The information here is preliminary!
+
+## Creating a new release
+
+### 1. Prepare the repository
+
+**Web pages**
+
+1. Create `download/index-X.Y.md` with installation instructions.
+1. Create `download/release-X.Y.md` with changelog information.
+1. Update `download/index.html` to redirect to the new version.
+1. Add announcement in news section in `index.html`
+
+**Version numbers**
+
+1. Update version number in `gf.cabal` (ommitting `-git` suffix)
+1. Add a new line in `debian/changelog`
+
+### 2. Create GitHub release
+
+1. When the above changes are committed to the `master` branch in the repository,
+   check that all builds are successful:
+  - https://github.com/GrammaticalFramework/gf-core/actions
+  - https://travis-ci.org/github/GrammaticalFramework/gf-core
+1. Create a GitHub release here: https://github.com/GrammaticalFramework/gf-core/releases/new
+  with a tag format `RELEASE-X.Y`
+
+### 3. Binary packages
+
+Build and attach binaries to the release by running the relevant GitHub Actions workflows (TODO):
+
+1. Go to https://github.com/GrammaticalFramework/gf-rgl/actions
+1. Click "Build [platform] package" under _Workflows_
+1. Click "Run workflow" and specify the tag `RELEASE-X.Y`
+
+### 4. Upload to Hackage
+
+1. Run `make sdist`
+1. Visit `https://hackage.haskell.org/upload` and upload the file `dist/gf-X.Y.tar.gz`,
+   OR upload directly with Cabal (≥2.4): `cabal upload dist/gf-X.Y.tar.gz`
+1. If the documentation-building fails on the Hackage server, do:
+```
+cabal v2-haddock --builddir=dist/docs --haddock-for-hackage --enable-doc
+cabal upload --documentation dist/docs/*-docs.tar.gz
+```
diff --git a/bin/update_html b/bin/update_html
index 912ff1fa0..717670085 100755
--- a/bin/update_html
+++ b/bin/update_html
@@ -147,7 +147,7 @@ else
     fi
   done
   find . -name '*.md' | while read file ; do
-    if [[ "$file" == *"README.md" ]] ; then continue ; fi
+    if [[ "$file" == *"README.md" ]] || [[ "$file" == *"RELEASE.md" ]] ; then continue ; fi
     html="${file%.md}.html"
     if [ "$file" -nt "$html" ] || [ "$template" -nt "$html" ] ; then
       render_md_html "$file" "$html"
diff --git a/download/gfc b/download/gfc
deleted file mode 100644
index 7c1d30515..000000000
--- a/download/gfc
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/sh
-
-prefix="/usr/local"
-
-case "i386-apple-darwin9.3.0" in
-      *-cygwin)
-          prefix=`cygpath -w "$prefix"`;;
-esac
-
-exec_prefix="${prefix}"
-GF_BIN_DIR="${exec_prefix}/bin"
-GF_DATA_DIR="${prefix}/share/GF-3.0-beta"
-
-GFBIN="$GF_BIN_DIR/gf"
-
-if [ ! -x "${GFBIN}" ]; then
-   GFBIN=`which gf`
-fi
-
-if [ ! -x "${GFBIN}" ]; then
-    echo "gf not found."
-    exit 1
-fi
-
-exec $GFBIN --batch "$@"
diff --git a/download/index.md b/download/index-3.10.md
index 44eb6db3c..44eb6db3c 100644
--- a/download/index.md
+++ b/download/index-3.10.md
diff --git a/download/index-3.11.md b/download/index-3.11.md
new file mode 100644
index 000000000..4e225f631
--- /dev/null
+++ b/download/index-3.11.md
@@ -0,0 +1,182 @@
+---
+title: Grammatical Framework Download and Installation
+...
+
+**GF 3.11** was released on ? December 2020.
+
+What's new? See the [release notes](release-3.11.html).
+
+## Binary packages
+
+Unlike previous versions, these binary packages include only the GF core (compiler and runtime).
+
+| Platform        | Download                                           | Features       | How to install                   |
+|:----------------|:---------------------------------------------------|:---------------|:---------------------------------|
+| macOS           | [gf-3.11.pkg](gf-3.11.pkg)                         | GF, S, C, J, P | Double-click on the package icon |
+| Ubuntu (32-bit) | [gf\_3.11\_i386.deb](gf_3.11_i386.deb)             | GF, S, C, J, P | `sudo dpkg -i gf_3.11_i386.deb`  |
+| Ubuntu (64-bit) | [gf\_3.11\_amd64.deb](gf_3.11_amd64.deb)           | GF, S, C, J, P | `sudo dpkg -i gf_3.11_amd64.deb` |
+| Windows         | [gf-3.11-bin-windows.zip](gf-3.11-bin-windows.zip) | GF, S          | `unzip gf-3.11-bin-windows.zip`  |
+
+**Features**
+
+- GF = GF shell and grammar compiler
+- S = `gf -server` mode
+- C = C run-time system
+- J/P = Java/Python binding to the C run-time system
+
+### Notes
+
+The Windows package is installed by just unpacking it anywhere. You will
+probably need to set the `PATH` and `GF_LIB_PATH` environment variables,
+see Inari's notes on [Installing GF on Windows](http://www.grammaticalframework.org/~inari/gf-windows.html#toc3).
+
+The Ubuntu `.deb` packages should work on Ubuntu 16.04 and 18.04 and
+similar Linux distributions. The `.deb` packages were updated
+to version 3.10-2 after the release of GF 3.10.
+(Because of a packaging bug the Resource Grammar Library was missing
+in the 3.10-1 packages.)
+
+The packages for macOS (Mac OS X) should work on at least 10.13 and
+10.14 (High Sierra and Mojave)
+
+## Installing the latest release from source
+
+[GF is on Hackage](http://hackage.haskell.org/package/gf), so under
+normal circumstances the procedure is fairly simple:
+
+1.  Install a recent version of the [Haskell
+    Platform](http://hackage.haskell.org/platform) (see note below)
+2.  `cabal update`
+3.  On Linux: install some C libraries from your Linux distribution (see note below)
+4.  `cabal install gf`
+
+This installs the GF executable and Haskell libraries, but **does not include the RGL**.
+
+You can also download the source code release from [GitHub](https://github.com/GrammaticalFramework/gf-core/releases),
+and follow the instructions below under **Installing from the latest developer source code**.
+
+### Notes
+
+**Installation location**
+
+The above steps installs GF for a single user. The executables are put
+in `$HOME/.cabal/bin` (or, with recent versions of the Haskell platform
+on Mac OS X, in `$HOME/Library/Haskell/bin`), so it is a good idea to
+put a line in your `.bash_profile` or `.profile` to add that directory
+to you path:
+
+```
+PATH=$HOME/.cabal/bin:$PATH
+```
+
+or
+
+```
+PATH=$HOME/Library/Haskell/bin:$PATH
+```
+
+**Build tools**
+
+In order to compile GF you need the build tools **Alex** and **Happy**.
+These can be installed via Cabal, e.g.:
+
+```
+cabal install alex happy
+```
+
+or obtained by other means, depending on your OS.
+
+**Haskeline**
+
+GF uses [`haskeline`](http://hackage.haskell.org/package/haskeline), which
+on Linux depends on some non-Haskell libraries that won't be installed
+automatically by cabal, and therefore need to be installed manually.
+Here is one way to do this:
+
+- On Ubuntu: `sudo apt-get install libghc-haskeline-dev`
+- On Fedora: `sudo dnf install ghc-haskeline-devel`
+
+**GHC version**
+
+The GF source code has been updated to compile with GHC 8.4.
+Using older versions of GHC (e.g. 8.2, 8.0 and 7.10) should still work too.
+
+## Installing from the latest developer source code
+
+If you haven't already, clone the repository with:
+
+```
+git clone https://github.com/GrammaticalFramework/gf-core.git
+```
+
+If you've already cloned the repository previously, update with:
+
+```
+git pull
+```
+
+Then install with:
+
+```
+cabal install
+```
+
+or, if you're a Stack user:
+
+```
+stack install
+```
+
+The above notes for installing from source apply also in these cases.
+For more info on working with the GF source code, see the
+[GF Developers Guide](../doc/gf-developers.html).
+
+## Installing the RGL from source
+
+To install the RGL from source,
+you can download a release from [GitHub](https://github.com/GrammaticalFramework/gf-rgl/releases)
+or get the latest version by cloning the repository:
+
+```
+git clone https://github.com/GrammaticalFramework/gf-rgl.git
+```
+
+In both cases, once you have the RGL sources you can install them by running:
+
+```
+make
+```
+
+in the RGL folder.
+This assumes that you already have GF installed.
+For more details about building the RGL, see the [RGL README](https://github.com/GrammaticalFramework/gf-rgl/blob/master/README.md).
+
+## Installing the Python bindings from PyPI
+
+The Python library is available on PyPI as `pgf`, so it can be installed using:
+
+```
+pip install pgf
+```
+
+We provide binary wheels for Linux and OSX (with Windows missing so far), which
+include the C runtime and a ready-to-go.  If there is no binary distribution for
+your platform, this will install the source tarball, which will attempt to build
+the binding during installation, and requires the GF C runtime to be installed on
+your system.
+
+## Older releases
+
+- [GF 3.10](index-3.10.html) (December 2018)
+- [GF 3.9](index-3.9.html) (August 2017)
+- [GF 3.8](index-3.8.html) (June 2016)
+- [GF 3.7.1](index-3.7.1.html) (October 2015)
+- [GF 3.7](index-3.7.html) (June 2015)
+- [GF 3.6](index-3.6.html) (June 2014)
+- [GF 3.5](index-3.5.html) (August 2013)
+- [GF 3.4](index-3.4.html) (January 2013)
+- [GF 3.3.3](index-3.3.3.html) (March 2012)
+- [GF 3.3](index-3.3.html) (October 2011)
+- [GF 3.2.9](index-3.2.9.html) source-only snapshot (September 2011)
+- [GF 3.2](index-3.2.html) (December 2010)
+- [GF 3.1.6](index-3.1.6.html) (April 2010)
diff --git a/download/index.html b/download/index.html
new file mode 100644
index 000000000..eb32412f8
--- /dev/null
+++ b/download/index.html
@@ -0,0 +1,8 @@
+<html>
+<head>
+  <meta http-equiv="refresh" content="0; URL=/download/index-3.10.html" />
+</head>
+<body>
+  You are being redirected to <a href="index-3.10.html">the current version</a> of this page.
+</body>
+</html>
diff --git a/download/release-3.11.md b/download/release-3.11.md
new file mode 100644
index 000000000..a6af7fafe
--- /dev/null
+++ b/download/release-3.11.md
@@ -0,0 +1,25 @@
+---
+title: GF 3.11 Release Notes
+date: ? December 2020
+...
+
+## Installation
+
+See the [download page](index.html).
+
+## What's new
+
+From this release, the binary GF core packages do not contain the RGL.
+The RGL's release cycle is now completely separate from GF's. See [RGL releases](https://github.com/GrammaticalFramework/gf-rgl/releases).
+
+Over ... changes have been pushed to GF core
+since the release of GF 3.10 in December 2018.
+
+## General
+
+- Testsuite.
+- Compatibiilty with new versions of GHC.
+
+## GF compiler and run-time library
+
+- More improvements to error messages.
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 1ee24ac59..d558908ab 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -61,6 +61,14 @@ typedef struct {
 
 typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
 
+typedef struct {
+	PgfProductionIdx* idx;
+	size_t offset;
+	size_t sym_idx;
+} PgfLexiconIdxEntry;
+
+typedef GuBuf PgfLexiconIdx;
+
 struct PgfParseState {
 	PgfParseState* next;
 
@@ -74,6 +82,8 @@ struct PgfParseState {
     size_t end_offset;
 
 	prob_t viterbi_prob;
+
+    PgfLexiconIdx* lexicon_idx;
 };
 
 typedef struct PgfAnswers {
@@ -687,16 +697,6 @@ static void
 pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep);
 
 static void
-pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
-{
-	if (gu_buf_length(state->agenda) == 0) {
-		state->viterbi_prob =
-			item->inside_prob+item->conts->outside_prob;
-	}
-	gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
-}
-
-static void
 pgf_parsing_push_production(PgfParsing* ps, PgfParseState* state,
                             PgfItemConts* conts, PgfProduction prod)
 {
@@ -727,7 +727,7 @@ pgf_parsing_combine(PgfParsing* ps,
 	}
 
 	pgf_item_advance(item, ps->pool);
-	pgf_parsing_push_item(before, item);
+	gu_buf_heap_push(before->agenda, pgf_item_prob_order, &item);
 }
 
 static PgfProduction
@@ -898,9 +898,65 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
     }
 }
 
+PGF_INTERNAL_DECL int
+pgf_symbols_cmp(PgfCohortSpot* spot,
+                PgfSymbols* syms, size_t* sym_idx,
+                bool case_sensitive);
+
+static void
+pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
+                      int i, int j, ptrdiff_t min, ptrdiff_t max)
+{
+	// This is a variation of a binary search algorithm which
+	// can retrieve all prefixes of a string with minimal
+	// comparisons, i.e. there is no need to lookup every
+	// prefix separately.
+
+	while (i <= j) {
+		int k  = (i+j) / 2;
+		PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
+
+		PgfCohortSpot start   = {0, ps->sentence + state->end_offset};
+   		PgfCohortSpot current = start;
+		size_t sym_idx = 0;
+		int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
+		if (cmp < 0) {
+			j = k-1;
+		} else if (cmp > 0) {
+			ptrdiff_t len = current.ptr - start.ptr;
+
+			if (min <= len)
+				pgf_parsing_lookahead(ps, state, i, k-1, min, len);
+
+			if (len+1 <= max)
+				pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
+
+			break;
+		} else {
+			ptrdiff_t len = current.ptr - start.ptr;
+
+			if (min <= len-1)
+				pgf_parsing_lookahead(ps, state, i, k-1, min, len-1);
+
+			if (seq->idx != NULL) {
+				PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
+				entry->idx        = seq->idx;
+				entry->offset     = (size_t) (current.ptr - ps->sentence);
+				entry->sym_idx    = sym_idx;
+			}
+
+			if (len+1 <= max)
+				pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
+
+			break;
+		}
+	}
+}
+
 static PgfParseState*
 pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
-                    BIND_TYPE bind_type)
+                    BIND_TYPE bind_type,
+                    prob_t viterbi_prob)
 {
 	PgfParseState** pstate;
 	if (ps->before == NULL && start_offset == 0)
@@ -953,170 +1009,34 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
 	                    (start_offset == end_offset);
 	state->start_offset = start_offset;
 	state->end_offset = end_offset;
-	state->viterbi_prob = 0;
+	state->viterbi_prob = viterbi_prob;
+ 	state->lexicon_idx =
+		gu_new_buf(PgfLexiconIdxEntry, ps->pool);
 
 	if (ps->before == NULL && start_offset == 0)
 		state->needs_bind = false;
 
-	*pstate = state;
-
-	return state;
-}
-
-PGF_INTERNAL_DECL int
-pgf_symbols_cmp(PgfCohortSpot* spot,
-                PgfSymbols* syms, size_t* sym_idx,
-                bool case_sensitive);
-
-static bool
-pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
-                        int i, int j, ptrdiff_t min, ptrdiff_t max)
-{
-	// This is a variation of a binary search algorithm which
-	// can retrieve all prefixes of a string with minimal
-	// comparisons, i.e. there is no need to lookup every
-	// prefix separately.
-
-	bool found = false;
-	while (i <= j) {
-		int k  = (i+j) / 2;
-		PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
-
-		PgfCohortSpot start   = {0, ps->sentence+state->end_offset}; 
-		PgfCohortSpot current = start;
-
-		size_t sym_idx = 0;
-		int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
-		if (cmp < 0) {
-			j = k-1;
-		} else if (cmp > 0) {
-			ptrdiff_t len = current.ptr - start.ptr;
-
-			if (min <= len)
-				if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
-					found = true;
-
-			if (len+1 <= max)
-				if (pgf_parsing_scan_helper(ps, state, k+1, j, len+1, max))
-					found = true;
-
-			break;
-		} else {
-			ptrdiff_t len = current.ptr - start.ptr;
-
-			if (min <= len)
-				if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
-					found = true;
-
-			// Here we do bottom-up prediction for all lexical categories. 
-			// The epsilon productions will be predicted in top-down
-			// fashion while parsing.
-			if (seq->idx != NULL && len > 0) {
-				found = true;
-
-				// A new state will mark the end of the current match
-				PgfParseState* new_state =
-					pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
-
-				// Bottom-up prediction for lexical rules
-				size_t n_entries = gu_buf_length(seq->idx);
-				for (size_t i = 0; i < n_entries; i++) {
-					PgfProductionIdxEntry* entry =
-						gu_buf_index(seq->idx, PgfProductionIdxEntry, i);
-
-					PgfItemConts* conts =
-						pgf_parsing_get_conts(state,
-						                      entry->ccat, entry->lin_idx,
-						                      ps->pool);
-
-					// Create the new category if it doesn't exist yet
-					PgfCCat* tmp_ccat = pgf_parsing_get_completed(new_state, conts);
-					PgfCCat* ccat = tmp_ccat;
-					if (ccat == NULL) {
-						ccat = pgf_parsing_create_completed(ps, new_state, conts, INFINITY);
-					}
-
-					// Add the production
-					if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
-						ccat->prods = gu_realloc_seq(ccat->prods, PgfProduction, ccat->n_synprods+1);
-					}
-					GuVariantInfo i;
-					i.tag  = PGF_PRODUCTION_APPLY;
-					i.data = entry->papp;
-					PgfProduction prod = gu_variant_close(i);
-					gu_seq_set(ccat->prods, PgfProduction, ccat->n_synprods++, prod);
-
-					// Update the category's probability to be minimum
-					if (ccat->viterbi_prob > entry->papp->fun->ep->prob)
-						ccat->viterbi_prob = entry->papp->fun->ep->prob;
-
-#ifdef PGF_PARSER_DEBUG
-					GuPool* tmp_pool = gu_new_pool();
-					GuOut* out = gu_file_out(stderr, tmp_pool);
-					GuExn* err = gu_exn(tmp_pool);
-					if (tmp_ccat == NULL) {
-						gu_printf(out, err, "[");
-						pgf_print_range(state, new_state, out, err);
-						gu_puts("; ", out, err);
-						pgf_print_fid(conts->ccat->fid, out, err);
-						gu_printf(out, err, "; %d; ",
-											conts->lin_idx);
-						pgf_print_fid(ccat->fid, out, err);
-						gu_puts("] ", out, err);
-						pgf_print_fid(ccat->fid, out, err);
-						gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
-					}
-					pgf_print_production(ccat->fid, prod, out, err);
-					gu_pool_free(tmp_pool);
-#endif
-				}
-			}
-
-			if (len <= max)
-				if (pgf_parsing_scan_helper(ps, state, k+1, j, len, max))
-					found = true;
-
-			break;
+    if (gu_seq_length(ps->concr->sequences) > 0) {
+		// Add epsilon lexical rules to the bottom up index
+		PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
+		if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
+			PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
+			entry->idx    = seq->idx;
+			entry->offset = state->start_offset;
+			entry->sym_idx= 0;
 		}
-	}
-
-	return found;
-}
-
-static void
-pgf_parsing_scan(PgfParsing *ps)
-{
-	size_t len = strlen(ps->sentence);
 
-	PgfParseState* state =
-		pgf_new_parse_state(ps, 0, BIND_SOFT);
-
-	while (state != NULL && state->end_offset < len) {
-		if (state->needs_bind) {
-			// We have encountered two tokens without space in between.
-			// Those can be accepted only if there is a BIND token
-			// in between. We encode this by having one more state
-			// at the same offset. A transition between these two
-			// states is possible only with the BIND token.
-			state =
-				pgf_new_parse_state(ps, state->end_offset, BIND_HARD);
+		// Add non-epsilon lexical rules to the bottom up index
+		if (!state->needs_bind) {
+			pgf_parsing_lookahead(ps, state,
+			                      0, gu_seq_length(ps->concr->sequences)-1,
+			                      1, strlen(ps->sentence)-state->end_offset);
 		}
+	}
 
-		if (!pgf_parsing_scan_helper
-					   (ps, state,
-						0, gu_seq_length(ps->concr->sequences)-1,
-						1, len-state->end_offset)) {
-			// skip one character and try again
-			GuString s = ps->sentence+state->end_offset;
-			gu_utf8_decode((const uint8_t**) &s);
-			pgf_new_parse_state(ps, s-ps->sentence, BIND_NONE);
-		}
+	*pstate = state;
 
-		if (state == ps->before)
-			state = ps->after;
-		else
-			state = state->next;
-	}
+	return state;
 }
 
 static void
@@ -1138,8 +1058,9 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
 		if (!ps->before->needs_bind && cmp_string(&current, tok, ps->case_sensitive) == 0) {
 			PgfParseState* state =
 				pgf_new_parse_state(ps, (current.ptr - ps->sentence),
-				                    BIND_NONE);
-			pgf_parsing_push_item(state, item);
+				                    BIND_NONE,
+				                    item->inside_prob+item->conts->outside_prob);
+            gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
 		} else {
 			pgf_item_free(ps, item);
 		}
@@ -1147,6 +1068,27 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
 }
 
 static void
+pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
+                           PgfProductionIdxEntry* entry,
+                           size_t offset, size_t sym_idx)
+{
+	GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
+	PgfProduction prod = gu_variant_close(i);
+	PgfItem* item =
+		pgf_new_item(ps, conts, prod);
+	PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
+	item->sym_idx = sym_idx;
+	pgf_item_set_curr_symbol(item, ps->pool);
+	prob_t prob = item->inside_prob+item->conts->outside_prob;
+	PgfParseState* state =
+		pgf_new_parse_state(ps, offset, BIND_NONE, prob);
+	if (state->viterbi_prob > prob) {
+		state->viterbi_prob = prob;
+	}
+	gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+}
+
+static void
 pgf_parsing_td_predict(PgfParsing* ps,
                        PgfItem* item, PgfCCat* ccat, size_t lin_idx)
 {
@@ -1193,36 +1135,34 @@ pgf_parsing_td_predict(PgfParsing* ps,
 				pgf_parsing_push_production(ps, ps->before, conts, prod);
 			}
 
-			// Top-down prediction for epsilon lexical rules if any
-			PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
-			if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
+			// Bottom-up prediction for lexical and epsilon rules
+			size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
+			for (size_t i = 0; i < n_idcs; i++) {
+				PgfLexiconIdxEntry* lentry =
+					gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
 
 				PgfProductionIdxEntry key;
 				key.ccat    = ccat;
 				key.lin_idx = lin_idx;
 				key.papp    = NULL;
 				PgfProductionIdxEntry* value =
-					gu_seq_binsearch(gu_buf_data_seq(seq->idx),
+					gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
 									 pgf_production_idx_entry_order,
 									 PgfProductionIdxEntry, &key);
 
 				if (value != NULL) {
-					GuVariantInfo i = { PGF_PRODUCTION_APPLY, value->papp };
-					PgfProduction prod = gu_variant_close(i);
-					pgf_parsing_push_production(ps, ps->before, conts, prod);
+					pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx);
 
 					PgfProductionIdxEntry* start =
-						gu_buf_data(seq->idx);
+						gu_buf_data(lentry->idx);
 					PgfProductionIdxEntry* end =
-						start + gu_buf_length(seq->idx)-1;
+						start + gu_buf_length(lentry->idx)-1;
 
 					PgfProductionIdxEntry* left = value-1;
 					while (left >= start &&
 						   value->ccat->fid == left->ccat->fid &&
 						   value->lin_idx   == left->lin_idx) {
-						GuVariantInfo i = { PGF_PRODUCTION_APPLY, left->papp };
-						PgfProduction prod = gu_variant_close(i);
-						pgf_parsing_push_production(ps, ps->before, conts, prod);
+						pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx);
 						left--;
 					}
 
@@ -1230,9 +1170,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
 					while (right <= end &&
 						   value->ccat->fid == right->ccat->fid &&
 						   value->lin_idx   == right->lin_idx) {
-						GuVariantInfo i = { PGF_PRODUCTION_APPLY, right->papp };
-						PgfProduction prod = gu_variant_close(i);
-						pgf_parsing_push_production(ps, ps->before, conts, prod);
+						pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx);
 						right++;
 					}
 				}
@@ -1271,7 +1209,7 @@ pgf_parsing_pre(PgfParsing* ps, PgfItem* item, PgfSymbols* syms)
 	} else {
 		item->alt = 0;
 		pgf_item_advance(item, ps->pool);
-		pgf_parsing_push_item(ps->before, item);
+        gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
 	}
 }
 
@@ -1401,8 +1339,9 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
 						item->curr_sym = gu_null_variant;
 						item->sym_idx  = gu_seq_length(syms);
 						PgfParseState* state =
-							pgf_new_parse_state(ps, offset, BIND_NONE);
-						pgf_parsing_push_item(state, item);
+							pgf_new_parse_state(ps, offset, BIND_NONE,
+							                    item->inside_prob+item->conts->outside_prob);
+                        gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
 						match = true;
 					}
 				}
@@ -1445,10 +1384,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
 		if (ps->before->start_offset == ps->before->end_offset &&
 		    ps->before->needs_bind) {
 			PgfParseState* state =
-				pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
+				pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
+				                    item->inside_prob+item->conts->outside_prob);
 			if (state != NULL) {
 				pgf_item_advance(item, ps->pool);
-				pgf_parsing_push_item(state, item);
+                gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
 			} else {
 				pgf_item_free(ps, item);
 			}
@@ -1462,10 +1402,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
 		if (ps->before->start_offset == ps->before->end_offset) {
 			if (ps->before->needs_bind) {
 				PgfParseState* state =
-					pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
+					pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
+					                    item->inside_prob+item->conts->outside_prob);
 				if (state != NULL) {
 					pgf_item_advance(item, ps->pool);
-					pgf_parsing_push_item(state, item);
+                    gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
 				} else {
 					pgf_item_free(ps, item);
 				}
@@ -1474,7 +1415,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
 			}
 		} else {
 			pgf_item_advance(item, ps->pool);
-			pgf_parsing_push_item(ps->before, item);
+            gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
 		}
 		break;
 	}
@@ -1725,7 +1666,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
 		ps->heuristic_factor = heuristic_factor;
 	}
 
-	pgf_parsing_scan(ps);
+    PgfParseState* state =
+        pgf_new_parse_state(ps, 0, BIND_SOFT, 0);
 
 	int fidString = -1;
 	PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool);
@@ -1745,7 +1687,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
 #endif
 
 	PgfItemConts* conts =
-		pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
+		pgf_parsing_get_conts(state, start_ccat, 0, ps->pool);
     gu_buf_push(conts->items, PgfItem*, NULL);
 
 	size_t n_ccats = gu_seq_length(cnccat->cats);
author	John J. Camilleri <john@digitalgrammars.com>	2020-11-25 20:57:01 +0100
committer	John J. Camilleri <john@digitalgrammars.com>	2020-11-25 20:57:01 +0100
commit	70811d83beb37f7eebc76451858d56be76b3e521 (patch)
tree	e09aff21a86cfb72cfcb1fa22787c2d5ea64c556
parent	0ed6b726a2c9a2365fadc05a75177c569469b4fd (diff)
parent	37c63a0c22ccc73e60222335263c702873b6af2c (diff)