summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkrangelov <kr.angelov@gmail.com>2021-01-20 20:16:26 +0100
committerkrangelov <kr.angelov@gmail.com>2021-01-20 20:16:26 +0100
commit04f6f113f07712a35128a924558dac9a3f83a822 (patch)
tree6f60a5dd879f17fd7a6b0c9c5de7daf1df7e374f
parentbac619f025e8b3680e93cc64eddd9cebfc350249 (diff)
parentd77921005a429406398167c713969c6d807e56fa (diff)
Merge branch 'master' of https://github.com/GrammaticalFramework/gf-core
-rw-r--r--.github/workflows/build-all-versions.yml8
-rw-r--r--.github/workflows/build-binary-packages.yml185
-rw-r--r--.github/workflows/build-debian-package.yml49
-rw-r--r--.github/workflows/build-python-package.yml12
-rw-r--r--.gitignore6
-rw-r--r--README.md2
-rw-r--r--RELEASE.md64
-rwxr-xr-xbin/build-binary-dist.sh72
-rwxr-xr-xbin/update_html2
-rw-r--r--debian/control4
-rwxr-xr-xdebian/rules10
-rw-r--r--doc/tutorial/gf-tutorial.t2t25
-rw-r--r--download/gfc25
-rw-r--r--download/index-3.10.md (renamed from download/index.md)0
-rw-r--r--download/index-3.11.md182
-rw-r--r--download/index.html8
-rw-r--r--download/release-3.11.md40
-rw-r--r--src/compiler/GF/Grammar/Lexer.x8
-rw-r--r--src/compiler/GF/Infra/SIO.hs2
-rw-r--r--src/runtime/c/pgf/parser.c330
20 files changed, 689 insertions, 345 deletions
diff --git a/.github/workflows/build-all-versions.yml b/.github/workflows/build-all-versions.yml
index 52db74850..df71f0ac0 100644
--- a/.github/workflows/build-all-versions.yml
+++ b/.github/workflows/build-all-versions.yml
@@ -13,16 +13,16 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
- os: [ubuntu-latest, macOS-latest, windows-latest]
+ os: [ubuntu-latest, macos-latest, windows-latest]
cabal: ["3.2"]
ghc:
- "8.6.5"
- "8.8.3"
- "8.10.1"
exclude:
- - os: macOS-latest
+ - os: macos-latest
ghc: 8.8.3
- - os: macOS-latest
+ - os: macos-latest
ghc: 8.6.5
- os: windows-latest
ghc: 8.8.3
@@ -92,4 +92,4 @@ jobs:
# - name: Test
# run: |
- # stack test --system-ghc \ No newline at end of file
+ # stack test --system-ghc
diff --git a/.github/workflows/build-binary-packages.yml b/.github/workflows/build-binary-packages.yml
new file mode 100644
index 000000000..810fa1352
--- /dev/null
+++ b/.github/workflows/build-binary-packages.yml
@@ -0,0 +1,185 @@
+name: Build Binary Packages
+
+on:
+ workflow_dispatch:
+ release:
+
+jobs:
+
+# ---
+
+ ubuntu:
+ name: Build Ubuntu package
+ runs-on: ubuntu-18.04
+ # strategy:
+ # matrix:
+ # ghc: ["8.6.5"]
+ # cabal: ["2.4"]
+
+ steps:
+ - uses: actions/checkout@v2
+
+ # Note: `haskell-platform` is listed as requirement in debian/control,
+ # which is why it's installed using apt instead of the Setup Haskell action.
+
+ # - name: Setup Haskell
+ # uses: actions/setup-haskell@v1
+ # id: setup-haskell-cabal
+ # with:
+ # ghc-version: ${{ matrix.ghc }}
+ # cabal-version: ${{ matrix.cabal }}
+
+ - name: Install build tools
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ make \
+ dpkg-dev \
+ debhelper \
+ haskell-platform \
+ libghc-json-dev \
+ python-dev \
+ default-jdk \
+ libtool-bin
+
+ - name: Build package
+ run: |
+ make deb
+
+ - name: Copy package
+ run: |
+ cp ../gf_*.deb dist/
+
+ - name: Upload artifact
+ uses: actions/upload-artifact@v2
+ with:
+ name: gf-${{ github.sha }}-ubuntu
+ path: dist/gf_*.deb
+ if-no-files-found: error
+
+# ---
+
+ macos:
+ name: Build macOS package
+ runs-on: macos-10.15
+ strategy:
+ matrix:
+ ghc: ["8.6.5"]
+ cabal: ["2.4"]
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Setup Haskell
+ uses: actions/setup-haskell@v1
+ id: setup-haskell-cabal
+ with:
+ ghc-version: ${{ matrix.ghc }}
+ cabal-version: ${{ matrix.cabal }}
+
+ - name: Install build tools
+ run: |
+ brew install \
+ automake
+ cabal v1-install alex happy
+
+ - name: Build package
+ run: |
+ sudo mkdir -p /Library/Java/Home
+ sudo ln -s /usr/local/opt/openjdk/include /Library/Java/Home/include
+ make pkg
+
+ - name: Upload artifact
+ uses: actions/upload-artifact@v2
+ with:
+ name: gf-${{ github.sha }}-macos
+ path: dist/gf-*.pkg
+ if-no-files-found: error
+
+# ---
+
+ windows:
+ name: Build Windows package
+ runs-on: windows-2019
+ strategy:
+ matrix:
+ ghc: ["8.6.5"]
+ cabal: ["2.4"]
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Setup MSYS2
+ uses: msys2/setup-msys2@v2
+ with:
+ install: >-
+ base-devel
+ gcc
+ python-devel
+
+ - name: Prepare dist folder
+ shell: msys2 {0}
+ run: |
+ mkdir /c/tmp-dist
+ mkdir /c/tmp-dist/c
+ mkdir /c/tmp-dist/java
+ mkdir /c/tmp-dist/python
+
+ - name: Build C runtime
+ shell: msys2 {0}
+ run: |
+ cd src/runtime/c
+ autoreconf -i
+ ./configure
+ make
+ make install
+ cp /mingw64/bin/libpgf-0.dll /c/tmp-dist/c
+ cp /mingw64/bin/libgu-0.dll /c/tmp-dist/c
+
+ - name: Build Java bindings
+ shell: msys2 {0}
+ run: |
+ export PATH="${PATH}:/c/Program Files/Java/jdk8u275-b01/bin"
+ cd src/runtime/java
+ make \
+ JNI_INCLUDES="-I \"/c/Program Files/Java/jdk8u275-b01/include\" -I \"/c/Program Files/Java/jdk8u275-b01/include/win32\" -I \"/mingw64/include\" -D__int64=int64_t" \
+ WINDOWS_LDFLAGS="-L\"/mingw64/lib\" -no-undefined"
+ make install
+ cp .libs//msys-jpgf-0.dll /c/tmp-dist/java/jpgf.dll
+ cp jpgf.jar /c/tmp-dist/java
+
+ - name: Build Python bindings
+ shell: msys2 {0}
+ env:
+ EXTRA_INCLUDE_DIRS: /mingw64/include
+ EXTRA_LIB_DIRS: /mingw64/lib
+ run: |
+ cd src/runtime/python
+ python setup.py build
+ python setup.py install
+ cp /usr/lib/python3.8/site-packages/pgf* /c/tmp-dist/python
+
+ - name: Setup Haskell
+ uses: actions/setup-haskell@v1
+ id: setup-haskell-cabal
+ with:
+ ghc-version: ${{ matrix.ghc }}
+ cabal-version: ${{ matrix.cabal }}
+
+ - name: Install Haskell build tools
+ run: |
+ cabal install alex happy
+
+ - name: Build GF
+ run: |
+ cabal install --only-dependencies -fserver
+ cabal configure -fserver
+ cabal build
+ copy dist\build\gf\gf.exe C:\tmp-dist
+
+ - name: Upload artifact
+ uses: actions/upload-artifact@v2
+ with:
+ name: gf-${{ github.sha }}-windows
+ path: C:\tmp-dist\*
+ if-no-files-found: error
diff --git a/.github/workflows/build-debian-package.yml b/.github/workflows/build-debian-package.yml
deleted file mode 100644
index 09719aaa8..000000000
--- a/.github/workflows/build-debian-package.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: Build Debian Package
-
-on: [push, pull_request]
-
-jobs:
- build:
- name: Build on ${{ matrix.os }}
- runs-on: ${{ matrix.os }}
- strategy:
- fail-fast: true
- matrix:
- os: [ubuntu-18.04]
- env:
- LC_ALL: C.UTF-8
-
- steps:
- - uses: actions/checkout@v1
-
- - name: Install build tools
- run: |
- sudo apt update
- sudo apt install -y \
- make \
- dpkg-dev \
- debhelper \
- haskell-platform \
- libghc-json-dev \
- python-dev \
- default-jdk \
- libtool-bin \
- txt2tags \
- pandoc
-
- - name: Checkout RGL
- run: |
- git clone --depth 1 https://github.com/GrammaticalFramework/gf-rgl.git ../gf-rgl
-
- - name: Build Debian package
- run: |
- make deb
-
- - name: Copy packages
- run: |
- mkdir debian/dist
- cp ../gf_*.deb debian/dist/
-
- - uses: actions/upload-artifact@v2
- with:
- path: debian/dist
diff --git a/.github/workflows/build-python-package.yml b/.github/workflows/build-python-package.yml
index 45e94b853..6326821dc 100644
--- a/.github/workflows/build-python-package.yml
+++ b/.github/workflows/build-python-package.yml
@@ -1,6 +1,10 @@
name: Build & Publish Python Package
-on: [push, pull_request]
+# Trigger the workflow on push or pull request, but only for the master branch
+on:
+ pull_request:
+ push:
+ branches: [master]
jobs:
build_wheels:
@@ -9,7 +13,7 @@ jobs:
strategy:
fail-fast: true
matrix:
- os: [ubuntu-18.04, macos-latest]
+ os: [ubuntu-18.04, macos-10.15]
steps:
- uses: actions/checkout@v1
@@ -21,7 +25,7 @@ jobs:
- name: Install cibuildwheel
run: |
- python -m pip install cibuildwheel==1.4.2
+ python -m pip install git+https://github.com/joerick/cibuildwheel.git@master
- name: Install build tools for OSX
if: startsWith(matrix.os, 'macos')
@@ -69,7 +73,7 @@ jobs:
needs: [build_wheels, build_sdist]
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
-
+
steps:
- uses: actions/checkout@v2
diff --git a/.gitignore b/.gitignore
index 10968810e..01b58ccb4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,12 @@
*.jar
*.gfo
*.pgf
+debian/.debhelper
+debian/debhelper-build-stamp
+debian/gf
+debian/gf.debhelper.log
+debian/gf.substvars
+debian/files
dist/
dist-newstyle/
src/runtime/c/.libs/
diff --git a/README.md b/README.md
index 5ee8967a6..2afa476ea 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,6 @@
# Grammatical Framework (GF)
-[![Build Status](https://travis-ci.org/GrammaticalFramework/gf-core.svg?branch=master)](https://travis-ci.org/GrammaticalFramework/gf-core)
-
The Grammatical Framework is a grammar formalism based on type theory.
It consists of:
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 000000000..3a771b78d
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,64 @@
+# GF Core releases
+
+**Note:**
+The RGL is now released completely separately from GF Core.
+See the [RGL's RELEASE.md](https://github.com/GrammaticalFramework/gf-rgl/blob/master/RELEASE.md).
+
+## Creating a new release
+
+### 1. Prepare the repository
+
+**Web pages**
+
+1. Create `download/index-X.Y.md` with installation instructions.
+2. Create `download/release-X.Y.md` with changelog information.
+3. Update `download/index.html` to redirect to the new version.
+4. Add announcement in news section in `index.html`.
+
+**Version numbers**
+
+1. Update version number in `gf.cabal` (ommitting `-git` suffix).
+2. Add a new line in `debian/changelog`.
+
+### 2. Create GitHub release
+
+1. When the above changes are committed to the `master` branch in the repository
+ and pushed, check that all CI workflows are successful (fixing as necessary):
+ - <https://github.com/GrammaticalFramework/gf-core/actions>
+ - <https://travis-ci.org/github/GrammaticalFramework/gf-core>
+2. Create a GitHub release [here](https://github.com/GrammaticalFramework/gf-core/releases/new):
+ - Tag version format `RELEASE-X.Y`
+ - Title: "GF X.Y"
+ - Description: mention major changes since last release
+3. Publish the release to trigger the building of the binary packages (below).
+
+### 3. Binary packages
+
+The binaries will be built automatically by GitHub Actions when the release is created,
+but the generated _artifacts_ must be manually attached to the release as _assets_.
+
+1. Go to the [actions page](https://github.com/GrammaticalFramework/gf-core/actions) and click "Build Binary Packages" under _Workflows_.
+2. Choose the workflow run corresponding to the newly created release.
+3. Download the artifacts locally. Extract the Ubuntu and macOS ones to get the `.deb` and `.pkg` files.
+4. Go back to the [releases page](https://github.com/GrammaticalFramework/gf-core/releases) and click to edit the release information.
+5. Add the downloaded artifacts as release assets, giving them names with format `gf-X.Y-PLATFORM.EXT` (e.g. `gf-3.11-macos.pkg`).
+
+### 4. Upload to Hackage
+
+1. Run `make sdist`
+2. Upload the package, either:
+ 1. **Manually**: visit <https://hackage.haskell.org/upload> and upload the file `dist/gf-X.Y.tar.gz`
+ 2. **via Cabal (≥2.4)**: `cabal upload dist/gf-X.Y.tar.gz`
+3. If the documentation-building fails on the Hackage server, do:
+```
+cabal v2-haddock --builddir=dist/docs --haddock-for-hackage --enable-doc
+cabal upload --documentation dist/docs/*-docs.tar.gz
+```
+
+## Miscellaneous
+
+### What is the tag `GF-3.10`?
+
+For GF 3.10, the Core and RGL repositories had already been separated, however
+the binary packages still included the RGL. `GF-3.10` is a tag that was created
+in both repositories ([gf-core](https://github.com/GrammaticalFramework/gf-core/releases/tag/GF-3.10) and [gf-rgl](https://github.com/GrammaticalFramework/gf-rgl/releases/tag/GF-3.10)) to indicate which versions of each went into the binaries.
diff --git a/bin/build-binary-dist.sh b/bin/build-binary-dist.sh
index 7f6ca5d72..4ea1c31a3 100755
--- a/bin/build-binary-dist.sh
+++ b/bin/build-binary-dist.sh
@@ -1,15 +1,18 @@
#! /bin/bash
-### This script builds a binary distribution of GF from the source
-### package that this script is a part of. It assumes that you have installed
-### a recent version of the Haskell Platform.
-### Two binary package formats are supported: plain tar files (.tar.gz) and
-### OS X Installer packages (.pkg).
+### This script builds a binary distribution of GF from source.
+### It assumes that you have Haskell and Cabal installed.
+### Two binary package formats are supported (specified with the FMT env var):
+### - plain tar files (.tar.gz)
+### - macOS installer packages (.pkg)
os=$(uname) # Operating system name (e.g. Darwin or Linux)
hw=$(uname -m) # Hardware name (e.g. i686 or x86_64)
-# GF version number:
+cabal="cabal v1-" # Cabal >= 2.4
+# cabal="cabal " # Cabal <= 2.2
+
+## Get GF version number from Cabal file
ver=$(grep -i ^version: gf.cabal | sed -e 's/version://' -e 's/ //g')
name="gf-$ver"
@@ -29,6 +32,7 @@ set -x # print commands before executing them
pushd src/runtime/c
bash setup.sh configure --prefix="$prefix"
bash setup.sh build
+bash setup.sh install prefix="$prefix" # hack required for GF build on macOS
bash setup.sh install prefix="$destdir$prefix"
popd
@@ -38,11 +42,11 @@ if which >/dev/null python; then
EXTRA_INCLUDE_DIRS="$extrainclude" EXTRA_LIB_DIRS="$extralib" python setup.py build
python setup.py install --prefix="$destdir$prefix"
if [ "$fmt" == pkg ] ; then
- # A hack for Python on OS X to find the PGF modules
- pyver=$(ls "$destdir$prefix/lib" | sed -n 's/^python//p')
- pydest="$destdir/Library/Python/$pyver/site-packages"
- mkdir -p "$pydest"
- ln "$destdir$prefix/lib/python$pyver/site-packages"/pgf* "$pydest"
+ # A hack for Python on macOS to find the PGF modules
+ pyver=$(ls "$destdir$prefix/lib" | sed -n 's/^python//p')
+ pydest="$destdir/Library/Python/$pyver/site-packages"
+ mkdir -p "$pydest"
+ ln "$destdir$prefix/lib/python$pyver/site-packages"/pgf* "$pydest"
fi
popd
else
@@ -55,54 +59,40 @@ if which >/dev/null javac && which >/dev/null jar ; then
rm -f libjpgf.la # In case it contains the wrong INSTALL_PATH
if make CFLAGS="-I$extrainclude -L$extralib" INSTALL_PATH="$prefix"
then
- make INSTALL_PATH="$destdir$prefix" install
+ make INSTALL_PATH="$destdir$prefix" install
else
- echo "*** Skipping the Java binding because of errors"
+ echo "Skipping the Java binding because of errors"
fi
popd
else
echo "Java SDK is not installed, so the Java binding will not be included"
fi
-## To find dynamic C run-time libraries when running GF below
+## To find dynamic C run-time libraries when building GF below
export DYLD_LIBRARY_PATH="$extralib" LD_LIBRARY_PATH="$extralib"
-
## Build GF, with C run-time support enabled
-cabal install -w "$ghc" --only-dependencies -fserver -fc-runtime $extra
-cabal configure -w "$ghc" --prefix="$prefix" -fserver -fc-runtime $extra
-cabal build
- # Building the example grammars will fail, because the RGL is missing
-cabal copy --destdir="$destdir" # create www directory
-
-## Build the RGL and copy it to $destdir
-PATH=$PWD/dist/build/gf:$PATH
-export GF_LIB_PATH="$(dirname $(find "$destdir" -name www))/lib" # hmm
-mkdir -p "$GF_LIB_PATH"
-pushd ../gf-rgl
-make build
-make copy
-popd
-
-# Build GF again, including example grammars that need the RGL
-cabal build
+${cabal}install -w "$ghc" --only-dependencies -fserver -fc-runtime $extra
+${cabal}configure -w "$ghc" --prefix="$prefix" -fserver -fc-runtime $extra
+${cabal}build
## Copy GF to $destdir
-cabal copy --destdir="$destdir"
+${cabal}copy --destdir="$destdir"
libdir=$(dirname $(find "$destdir" -name PGF.hi))
-cabal register --gen-pkg-config=$libdir/gf-$ver.conf
+${cabal}register --gen-pkg-config="$libdir/gf-$ver.conf"
## Create the binary distribution package
case $fmt in
tar.gz)
- targz="$name-bin-$hw-$os.tar.gz" # the final tar file
- tar -C "$destdir/$prefix" -zcf "dist/$targz" .
- echo "Created $targz, consider renaming it to something more user friendly"
- ;;
+ targz="$name-bin-$hw-$os.tar.gz" # the final tar file
+ tar --directory "$destdir/$prefix" --gzip --create --file "dist/$targz" .
+ echo "Created $targz"
+ ;;
pkg)
- pkg=$name.pkg
- pkgbuild --identifier org.grammaticalframework.gf.pkg --version "$ver" --root "$destdir" --install-location / dist/$pkg
- echo "Created $pkg"
+ pkg=$name.pkg
+ pkgbuild --identifier org.grammaticalframework.gf.pkg --version "$ver" --root "$destdir" --install-location / dist/$pkg
+ echo "Created $pkg"
esac
+## Cleanup
rm -r "$destdir"
diff --git a/bin/update_html b/bin/update_html
index 912ff1fa0..717670085 100755
--- a/bin/update_html
+++ b/bin/update_html
@@ -147,7 +147,7 @@ else
fi
done
find . -name '*.md' | while read file ; do
- if [[ "$file" == *"README.md" ]] ; then continue ; fi
+ if [[ "$file" == *"README.md" ]] || [[ "$file" == *"RELEASE.md" ]] ; then continue ; fi
html="${file%.md}.html"
if [ "$file" -nt "$html" ] || [ "$template" -nt "$html" ] ; then
render_md_html "$file" "$html"
diff --git a/debian/control b/debian/control
index a07187983..12eb6b9d9 100644
--- a/debian/control
+++ b/debian/control
@@ -3,14 +3,14 @@ Section: devel
Priority: optional
Maintainer: Thomas Hallgren <hallgren@chalmers.se>
Standards-Version: 3.9.2
-Build-Depends: debhelper (>= 5), haskell-platform (>= 2011.2.0.1), libghc-haskeline-dev, libghc-mtl-dev, libghc-json-dev, autoconf, automake, libtool-bin, python-dev, java-sdk, txt2tags, pandoc
+Build-Depends: debhelper (>= 5), haskell-platform (>= 2011.2.0.1), libghc-haskeline-dev, libghc-mtl-dev, libghc-json-dev, autoconf, automake, libtool-bin, python-dev, java-sdk
Homepage: http://www.grammaticalframework.org/
Package: gf
Architecture: any
Depends: ${shlibs:Depends}
Description: Tools for GF, a grammar formalism based on type theory
- Grammatical Framework (GF) is a grammar formalism based on type theory.
+ Grammatical Framework (GF) is a grammar formalism based on type theory.
It consists of a special-purpose programming language,
a compiler of the language, and a generic grammar processor.
.
diff --git a/debian/rules b/debian/rules
index 917801826..8bd3c1f85 100755
--- a/debian/rules
+++ b/debian/rules
@@ -1,6 +1,6 @@
#!/usr/bin/make -f
-%:
+%:
+dh $@
#dh_shlibdeps has a problem finding which package some of the Haskell
@@ -26,14 +26,10 @@ override_dh_auto_build:
cd src/runtime/python && EXTRA_INCLUDE_DIRS=$(CURDIR)/src/runtime/c EXTRA_LIB_DIRS=$(CURDIR)/src/runtime/c/.libs python setup.py build
cd src/runtime/java && make CFLAGS="-I$(CURDIR)/src/runtime/c -L$(CURDIR)/src/runtime/c/.libs" INSTALL_PATH=/usr
echo $(SET_LDL)
- -$(SET_LDL) cabal build # builds gf, fails to build example grammars
- export $(SET_LDL); PATH=$(CURDIR)/dist/build/gf:$$PATH && make -C ../gf-rgl build
- GF_LIB_PATH=$(CURDIR)/../gf-rgl/dist $(SET_LDL) cabal build # have RGL now, ok to build example grammars
- make html
+ -$(SET_LDL) cabal build
override_dh_auto_install:
- $(SET_LDL) cabal copy --destdir=$(CURDIR)/debian/gf # creates www directory
- export GF_LIB_PATH="$$(dirname $$(find "$(CURDIR)/debian/gf" -name www))/lib" && echo "GF_LIB_PATH=$$GF_LIB_PATH" && mkdir -p "$$GF_LIB_PATH" && make -C ../gf-rgl copy
+ $(SET_LDL) cabal copy --destdir=$(CURDIR)/debian/gf
cd src/runtime/c && bash setup.sh copy prefix=$(CURDIR)/debian/gf/usr
cd src/runtime/python && python setup.py install --prefix=$(CURDIR)/debian/gf/usr
cd src/runtime/java && make INSTALL_PATH=$(CURDIR)/debian/gf/usr install
diff --git a/doc/tutorial/gf-tutorial.t2t b/doc/tutorial/gf-tutorial.t2t
index 7467e107e..63407a38a 100644
--- a/doc/tutorial/gf-tutorial.t2t
+++ b/doc/tutorial/gf-tutorial.t2t
@@ -2475,7 +2475,7 @@ can be used to read a text and return for each word its analyses
```
The command ``morpho_quiz = mq`` generates inflection exercises.
```
- % gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo
+ % gf alltenses/IrregFre.gfo
> morpho_quiz -cat=V
@@ -2488,11 +2488,6 @@ The command ``morpho_quiz = mq`` generates inflection exercises.
réapparaîtriez
Score 0/1
```
-To create a list for later use, use the command ``morpho_list = ml``
-```
- > morpho_list -number=25 -cat=V | write_file exx.txt
-```
-
@@ -2651,12 +2646,12 @@ The verb //switch off// is called a
We can define transitive verbs and their combinations as follows:
```
- lincat TV = {s : Number => Str ; part : Str} ;
+ lincat V2 = {s : Number => Str ; part : Str} ;
- fun AppTV : Item -> TV -> Item -> Phrase ;
+ fun AppV2 : Item -> V2 -> Item -> Phrase ;
- lin AppTV subj tv obj =
- {s = subj.s ++ tv.s ! subj.n ++ obj.s ++ tv.part} ;
+ lin AppV2 subj v2 obj =
+ {s = subj.s ++ v2.s ! subj.n ++ obj.s ++ v2.part} ;
```
**Exercise**. Define the language ``a^n b^n c^n`` in GF, i.e.
@@ -2722,11 +2717,11 @@ This topic will be covered in #Rseclexing.
The symbol ``**`` is used for both record types and record objects.
```
- lincat TV = Verb ** {c : Case} ;
+ lincat V2 = Verb ** {c : Case} ;
lin Follow = regVerb "folgen" ** {c = Dative} ;
```
-``TV`` becomes a **subtype** of ``Verb``.
+``V2`` (transitive verb) becomes a **subtype** of ``Verb``.
If //T// is a subtype of //R//, an object of //T// can be used whenever
an object of //R// is required.
@@ -2757,7 +2752,11 @@ Thus the labels ``p1, p2,...`` are hard-coded.
English indefinite article:
```
oper artIndef : Str =
- pre {"a" ; "an" / strs {"a" ; "e" ; "i" ; "o"}} ;
+ pre {
+ ("a" | "e" | "i" | "o") => "an" ;
+ _ => "a"
+ } ;
+
```
Thus
```
diff --git a/download/gfc b/download/gfc
deleted file mode 100644
index 7c1d30515..000000000
--- a/download/gfc
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/sh
-
-prefix="/usr/local"
-
-case "i386-apple-darwin9.3.0" in
- *-cygwin)
- prefix=`cygpath -w "$prefix"`;;
-esac
-
-exec_prefix="${prefix}"
-GF_BIN_DIR="${exec_prefix}/bin"
-GF_DATA_DIR="${prefix}/share/GF-3.0-beta"
-
-GFBIN="$GF_BIN_DIR/gf"
-
-if [ ! -x "${GFBIN}" ]; then
- GFBIN=`which gf`
-fi
-
-if [ ! -x "${GFBIN}" ]; then
- echo "gf not found."
- exit 1
-fi
-
-exec $GFBIN --batch "$@"
diff --git a/download/index.md b/download/index-3.10.md
index 44eb6db3c..44eb6db3c 100644
--- a/download/index.md
+++ b/download/index-3.10.md
diff --git a/download/index-3.11.md b/download/index-3.11.md
new file mode 100644
index 000000000..c128e77ce
--- /dev/null
+++ b/download/index-3.11.md
@@ -0,0 +1,182 @@
+---
+title: Grammatical Framework Download and Installation
+...
+
+**GF 3.11** was released on ... December 2020.
+
+What's new? See the [release notes](release-3.11.html).
+
+#### Note: GF core and the RGL
+
+The following instructions explain how to install **GF core**, i.e. the compiler, shell and run-time systems.
+Obtaining the **Resource Grammar Library (RGL)** is done separately; see the section at the bottom of this page.
+
+---
+
+## Installing from a binary package
+
+Binary packages are available for Debian/Ubuntu, macOS, and Windows and include:
+
+- GF shell and grammar compiler
+- `gf -server` mode
+- C run-time system
+- Java & Python bindings to the C run-time system
+
+Unlike in previous versions, the binaries **do not** include the RGL.
+
+[Binary packages on GitHub](https://github.com/GrammaticalFramework/gf-core/releases/tag/RELEASE-3.11)
+
+#### Debian/Ubuntu
+
+To install the package use:
+```
+sudo dpkg -i gf_3.11.deb
+```
+
+The Ubuntu `.deb` packages should work on Ubuntu 16.04, 18.04 and similar Linux distributions.
+
+#### macOS
+
+To install the package, just double-click it and follow the installer instructions.
+
+The packages should work on at least 10.13 (High Sierra) and 10.14 (Mojave).
+
+#### Windows
+
+To install the package, unpack it anywhere.
+
+You will probably need to update the `PATH` environment variable to include your chosen install location.
+
+For more information, see [Using GF on Windows](https://www.grammaticalframework.org/~inari/gf-windows.html) (latest updated for Windows 10).
+
+## Installing the latest release from source
+
+[GF is on Hackage](http://hackage.haskell.org/package/gf), so under
+normal circumstances the procedure is fairly simple:
+
+1. Install a recent version of the [Haskell Platform](http://hackage.haskell.org/platform) (see note below)
+2. `cabal update`
+3. On Linux: install some C libraries from your Linux distribution (see note below)
+4. `cabal install gf`
+
+You can also download the source code release from [GitHub](https://github.com/GrammaticalFramework/gf-core/releases),
+and follow the instructions below under **Installing from the latest developer source code**.
+
+### Notes
+
+**Installation location**
+
+The above steps installs GF for a single user.
+The executables are put in `$HOME/.cabal/bin` (or on macOS in `$HOME/Library/Haskell/bin`),
+so you might want to add this directory to your path (in `.bash_profile` or similar):
+
+```
+PATH=$HOME/.cabal/bin:$PATH
+```
+
+**Build tools**
+
+In order to compile GF you need the build tools **Alex** and **Happy**.
+These can be installed via Cabal, e.g.:
+
+```
+cabal install alex happy
+```
+
+or obtained by other means, depending on your OS.
+
+**Haskeline**
+
+GF uses [`haskeline`](http://hackage.haskell.org/package/haskeline), which
+on Linux depends on some non-Haskell libraries that won't be installed
+automatically by cabal, and therefore need to be installed manually.
+Here is one way to do this:
+
+- On Ubuntu: `sudo apt-get install libghc-haskeline-dev`
+- On Fedora: `sudo dnf install ghc-haskeline-devel`
+
+**GHC version**
+
+The GF source code has been updated to compile with GHC versions 7.10 through to 8.8.
+
+## Installing from the latest developer source code
+
+If you haven't already, clone the repository with:
+
+```
+git clone https://github.com/GrammaticalFramework/gf-core.git
+```
+
+If you've already cloned the repository previously, update with:
+
+```
+git pull
+```
+
+Then install with:
+
+```
+cabal install
+```
+
+or, if you're a Stack user:
+
+```
+stack install
+```
+
+The above notes for installing from source apply also in these cases.
+For more info on working with the GF source code, see the
+[GF Developers Guide](../doc/gf-developers.html).
+
+## Installing the Python bindings from PyPI
+
+The Python library is available on PyPI as `pgf`, so it can be installed using:
+
+```
+pip install pgf
+```
+
+We provide binary wheels for Linux and macOS, which include the C runtime and are ready-to-go.
+If there is no binary distribution for your platform, this will install the source tarball,
+which will attempt to build the binding during installation,
+and requires the GF C runtime to be installed on your system.
+
+---
+
+## Installing the RGL from a binary release
+
+Binary releases of the RGL are made available on [GitHub](https://github.com/GrammaticalFramework/gf-rgl/releases).
+In general the steps to follow are:
+
+1. Download a binary release and extract it somewhere on your system.
+2. Set the environment variable `GF_LIB_PATH` to point to wherever you extracted the RGL.
+
+## Installing the RGL from source
+
+To compile the RGL, you will need to have GF already installed and in your path.
+
+1. Obtain the RGL source code, either by:
+ - cloning with `git clone https://github.com/GrammaticalFramework/gf-rgl.git`
+ - downloading a source archive [here](https://github.com/GrammaticalFramework/gf-rgl/archive/master.zip)
+2. Run `make` in the source code folder.
+
+For more options, see the [RGL README](https://github.com/GrammaticalFramework/gf-rgl/blob/master/README.md).
+
+---
+
+## Older releases
+
+- [GF 3.10](index-3.10.html) (December 2018)
+- [GF 3.9](index-3.9.html) (August 2017)
+- [GF 3.8](index-3.8.html) (June 2016)
+- [GF 3.7.1](index-3.7.1.html) (October 2015)
+- [GF 3.7](index-3.7.html) (June 2015)
+- [GF 3.6](index-3.6.html) (June 2014)
+- [GF 3.5](index-3.5.html) (August 2013)
+- [GF 3.4](index-3.4.html) (January 2013)
+- [GF 3.3.3](index-3.3.3.html) (March 2012)
+- [GF 3.3](index-3.3.html) (October 2011)
+- [GF 3.2.9](index-3.2.9.html) source-only snapshot (September 2011)
+- [GF 3.2](index-3.2.html) (December 2010)
+- [GF 3.1.6](index-3.1.6.html) (April 2010)
diff --git a/download/index.html b/download/index.html
new file mode 100644
index 000000000..eb32412f8
--- /dev/null
+++ b/download/index.html
@@ -0,0 +1,8 @@
+<html>
+<head>
+ <meta http-equiv="refresh" content="0; URL=/download/index-3.10.html" />
+</head>
+<body>
+ You are being redirected to <a href="index-3.10.html">the current version</a> of this page.
+</body>
+</html>
diff --git a/download/release-3.11.md b/download/release-3.11.md
new file mode 100644
index 000000000..3cb448303
--- /dev/null
+++ b/download/release-3.11.md
@@ -0,0 +1,40 @@
+---
+title: GF 3.11 Release Notes
+date: ... December 2020
+...
+
+## Installation
+
+See the [download page](index-3.11.html).
+
+## What's new
+
+From this release, the binary GF core packages do not contain the RGL.
+The RGL's release cycle is now completely separate from GF's. See [RGL releases](https://github.com/GrammaticalFramework/gf-rgl/releases).
+
+Over 400 changes have been pushed to GF core
+since the release of GF 3.10 in December 2018.
+
+## General
+
+- Make the test suite work again.
+- Compatibility with new versions of GHC, including multiple Stack files for the different versions.
+- Updates to build scripts and CI.
+- Bug fixes.
+
+## GF compiler and run-time library
+
+- Huge improvements in time & space requirements for grammar compilation (pending [#87](https://github.com/GrammaticalFramework/gf-core/pull/87)).
+- Add CoNLL output to `visualize_tree` shell command.
+- Add canonical GF as output format in the compiler.
+- Add PGF JSON as output format in the compiler.
+- Deprecate JavaScript runtime in favour of updated [TypeScript runtime](https://github.com/GrammaticalFramework/gf-typescript).
+- Improvements to Haskell export.
+- Improvements to the C runtime.
+- Improvements to `gf -server` mode.
+- Clearer compiler error messages.
+
+## Other
+
+- Web page and documentation improvements.
+- Add WordNet module to GFSE.
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index fe455c58a..bde0aa064 100644
--- a/src/compiler/GF/Grammar/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -35,7 +35,7 @@ $u = [.\n] -- universal: any character
:-
"--" [.]* ; -- Toss single line comments
-"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
+"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
$white+ ;
@rsyms { tok ident }
@@ -138,7 +138,7 @@ data Token
res = eitherResIdent
eitherResIdent :: (Ident -> Token) -> Ident -> Token
-eitherResIdent tv s =
+eitherResIdent tv s =
case Map.lookup s resWords of
Just t -> t
Nothing -> tv s
@@ -285,6 +285,10 @@ instance Monad P where
POk s a -> unP (k a) s
PFailed posn err -> PFailed posn err
+#if !(MIN_VERSION_base(4,13,0))
+ -- Monad(fail) will be removed in GHC 8.8+
+ fail = Fail.fail
+#endif
instance Fail.MonadFail P where
fail msg = P $ \(_,AI posn _ _) -> PFailed posn msg
diff --git a/src/compiler/GF/Infra/SIO.hs b/src/compiler/GF/Infra/SIO.hs
index 0ce431380..906f39345 100644
--- a/src/compiler/GF/Infra/SIO.hs
+++ b/src/compiler/GF/Infra/SIO.hs
@@ -60,7 +60,7 @@ instance Monad SIO where
SIO m1 >>= xm2 = SIO $ \ h -> m1 h >>= \ x -> unS (xm2 x) h
instance Fail.MonadFail SIO where
- fail = liftSIO . fail
+ fail = lift0 . fail
instance Output SIO where
ePutStr = lift0 . ePutStr
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 1ee24ac59..d558908ab 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -61,6 +61,14 @@ typedef struct {
typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
+typedef struct {
+ PgfProductionIdx* idx;
+ size_t offset;
+ size_t sym_idx;
+} PgfLexiconIdxEntry;
+
+typedef GuBuf PgfLexiconIdx;
+
struct PgfParseState {
PgfParseState* next;
@@ -74,6 +82,8 @@ struct PgfParseState {
size_t end_offset;
prob_t viterbi_prob;
+
+ PgfLexiconIdx* lexicon_idx;
};
typedef struct PgfAnswers {
@@ -687,16 +697,6 @@ static void
pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep);
static void
-pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
-{
- if (gu_buf_length(state->agenda) == 0) {
- state->viterbi_prob =
- item->inside_prob+item->conts->outside_prob;
- }
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
-}
-
-static void
pgf_parsing_push_production(PgfParsing* ps, PgfParseState* state,
PgfItemConts* conts, PgfProduction prod)
{
@@ -727,7 +727,7 @@ pgf_parsing_combine(PgfParsing* ps,
}
pgf_item_advance(item, ps->pool);
- pgf_parsing_push_item(before, item);
+ gu_buf_heap_push(before->agenda, pgf_item_prob_order, &item);
}
static PgfProduction
@@ -898,9 +898,65 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
}
}
+PGF_INTERNAL_DECL int
+pgf_symbols_cmp(PgfCohortSpot* spot,
+ PgfSymbols* syms, size_t* sym_idx,
+ bool case_sensitive);
+
+static void
+pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
+ int i, int j, ptrdiff_t min, ptrdiff_t max)
+{
+ // This is a variation of a binary search algorithm which
+ // can retrieve all prefixes of a string with minimal
+ // comparisons, i.e. there is no need to lookup every
+ // prefix separately.
+
+ while (i <= j) {
+ int k = (i+j) / 2;
+ PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
+
+ PgfCohortSpot start = {0, ps->sentence + state->end_offset};
+ PgfCohortSpot current = start;
+ size_t sym_idx = 0;
+ int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
+ if (cmp < 0) {
+ j = k-1;
+ } else if (cmp > 0) {
+ ptrdiff_t len = current.ptr - start.ptr;
+
+ if (min <= len)
+ pgf_parsing_lookahead(ps, state, i, k-1, min, len);
+
+ if (len+1 <= max)
+ pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
+
+ break;
+ } else {
+ ptrdiff_t len = current.ptr - start.ptr;
+
+ if (min <= len-1)
+ pgf_parsing_lookahead(ps, state, i, k-1, min, len-1);
+
+ if (seq->idx != NULL) {
+ PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
+ entry->idx = seq->idx;
+ entry->offset = (size_t) (current.ptr - ps->sentence);
+ entry->sym_idx = sym_idx;
+ }
+
+ if (len+1 <= max)
+ pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
+
+ break;
+ }
+ }
+}
+
static PgfParseState*
pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
- BIND_TYPE bind_type)
+ BIND_TYPE bind_type,
+ prob_t viterbi_prob)
{
PgfParseState** pstate;
if (ps->before == NULL && start_offset == 0)
@@ -953,170 +1009,34 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
(start_offset == end_offset);
state->start_offset = start_offset;
state->end_offset = end_offset;
- state->viterbi_prob = 0;
+ state->viterbi_prob = viterbi_prob;
+ state->lexicon_idx =
+ gu_new_buf(PgfLexiconIdxEntry, ps->pool);
if (ps->before == NULL && start_offset == 0)
state->needs_bind = false;
- *pstate = state;
-
- return state;
-}
-
-PGF_INTERNAL_DECL int
-pgf_symbols_cmp(PgfCohortSpot* spot,
- PgfSymbols* syms, size_t* sym_idx,
- bool case_sensitive);
-
-static bool
-pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
- int i, int j, ptrdiff_t min, ptrdiff_t max)
-{
- // This is a variation of a binary search algorithm which
- // can retrieve all prefixes of a string with minimal
- // comparisons, i.e. there is no need to lookup every
- // prefix separately.
-
- bool found = false;
- while (i <= j) {
- int k = (i+j) / 2;
- PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
-
- PgfCohortSpot start = {0, ps->sentence+state->end_offset};
- PgfCohortSpot current = start;
-
- size_t sym_idx = 0;
- int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
- if (cmp < 0) {
- j = k-1;
- } else if (cmp > 0) {
- ptrdiff_t len = current.ptr - start.ptr;
-
- if (min <= len)
- if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
- found = true;
-
- if (len+1 <= max)
- if (pgf_parsing_scan_helper(ps, state, k+1, j, len+1, max))
- found = true;
-
- break;
- } else {
- ptrdiff_t len = current.ptr - start.ptr;
-
- if (min <= len)
- if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
- found = true;
-
- // Here we do bottom-up prediction for all lexical categories.
- // The epsilon productions will be predicted in top-down
- // fashion while parsing.
- if (seq->idx != NULL && len > 0) {
- found = true;
-
- // A new state will mark the end of the current match
- PgfParseState* new_state =
- pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
-
- // Bottom-up prediction for lexical rules
- size_t n_entries = gu_buf_length(seq->idx);
- for (size_t i = 0; i < n_entries; i++) {
- PgfProductionIdxEntry* entry =
- gu_buf_index(seq->idx, PgfProductionIdxEntry, i);
-
- PgfItemConts* conts =
- pgf_parsing_get_conts(state,
- entry->ccat, entry->lin_idx,
- ps->pool);
-
- // Create the new category if it doesn't exist yet
- PgfCCat* tmp_ccat = pgf_parsing_get_completed(new_state, conts);
- PgfCCat* ccat = tmp_ccat;
- if (ccat == NULL) {
- ccat = pgf_parsing_create_completed(ps, new_state, conts, INFINITY);
- }
-
- // Add the production
- if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
- ccat->prods = gu_realloc_seq(ccat->prods, PgfProduction, ccat->n_synprods+1);
- }
- GuVariantInfo i;
- i.tag = PGF_PRODUCTION_APPLY;
- i.data = entry->papp;
- PgfProduction prod = gu_variant_close(i);
- gu_seq_set(ccat->prods, PgfProduction, ccat->n_synprods++, prod);
-
- // Update the category's probability to be minimum
- if (ccat->viterbi_prob > entry->papp->fun->ep->prob)
- ccat->viterbi_prob = entry->papp->fun->ep->prob;
-
-#ifdef PGF_PARSER_DEBUG
- GuPool* tmp_pool = gu_new_pool();
- GuOut* out = gu_file_out(stderr, tmp_pool);
- GuExn* err = gu_exn(tmp_pool);
- if (tmp_ccat == NULL) {
- gu_printf(out, err, "[");
- pgf_print_range(state, new_state, out, err);
- gu_puts("; ", out, err);
- pgf_print_fid(conts->ccat->fid, out, err);
- gu_printf(out, err, "; %d; ",
- conts->lin_idx);
- pgf_print_fid(ccat->fid, out, err);
- gu_puts("] ", out, err);
- pgf_print_fid(ccat->fid, out, err);
- gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
- }
- pgf_print_production(ccat->fid, prod, out, err);
- gu_pool_free(tmp_pool);
-#endif
- }
- }
-
- if (len <= max)
- if (pgf_parsing_scan_helper(ps, state, k+1, j, len, max))
- found = true;
-
- break;
+ if (gu_seq_length(ps->concr->sequences) > 0) {
+ // Add epsilon lexical rules to the bottom up index
+ PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
+ if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
+ PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
+ entry->idx = seq->idx;
+ entry->offset = state->start_offset;
+ entry->sym_idx= 0;
}
- }
-
- return found;
-}
-
-static void
-pgf_parsing_scan(PgfParsing *ps)
-{
- size_t len = strlen(ps->sentence);
- PgfParseState* state =
- pgf_new_parse_state(ps, 0, BIND_SOFT);
-
- while (state != NULL && state->end_offset < len) {
- if (state->needs_bind) {
- // We have encountered two tokens without space in between.
- // Those can be accepted only if there is a BIND token
- // in between. We encode this by having one more state
- // at the same offset. A transition between these two
- // states is possible only with the BIND token.
- state =
- pgf_new_parse_state(ps, state->end_offset, BIND_HARD);
+ // Add non-epsilon lexical rules to the bottom up index
+ if (!state->needs_bind) {
+ pgf_parsing_lookahead(ps, state,
+ 0, gu_seq_length(ps->concr->sequences)-1,
+ 1, strlen(ps->sentence)-state->end_offset);
}
+ }
- if (!pgf_parsing_scan_helper
- (ps, state,
- 0, gu_seq_length(ps->concr->sequences)-1,
- 1, len-state->end_offset)) {
- // skip one character and try again
- GuString s = ps->sentence+state->end_offset;
- gu_utf8_decode((const uint8_t**) &s);
- pgf_new_parse_state(ps, s-ps->sentence, BIND_NONE);
- }
+ *pstate = state;
- if (state == ps->before)
- state = ps->after;
- else
- state = state->next;
- }
+ return state;
}
static void
@@ -1138,8 +1058,9 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
if (!ps->before->needs_bind && cmp_string(&current, tok, ps->case_sensitive) == 0) {
PgfParseState* state =
pgf_new_parse_state(ps, (current.ptr - ps->sentence),
- BIND_NONE);
- pgf_parsing_push_item(state, item);
+ BIND_NONE,
+ item->inside_prob+item->conts->outside_prob);
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
@@ -1147,6 +1068,27 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
}
static void
+pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
+ PgfProductionIdxEntry* entry,
+ size_t offset, size_t sym_idx)
+{
+ GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
+ PgfProduction prod = gu_variant_close(i);
+ PgfItem* item =
+ pgf_new_item(ps, conts, prod);
+ PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
+ item->sym_idx = sym_idx;
+ pgf_item_set_curr_symbol(item, ps->pool);
+ prob_t prob = item->inside_prob+item->conts->outside_prob;
+ PgfParseState* state =
+ pgf_new_parse_state(ps, offset, BIND_NONE, prob);
+ if (state->viterbi_prob > prob) {
+ state->viterbi_prob = prob;
+ }
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+}
+
+static void
pgf_parsing_td_predict(PgfParsing* ps,
PgfItem* item, PgfCCat* ccat, size_t lin_idx)
{
@@ -1193,36 +1135,34 @@ pgf_parsing_td_predict(PgfParsing* ps,
pgf_parsing_push_production(ps, ps->before, conts, prod);
}
- // Top-down prediction for epsilon lexical rules if any
- PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
- if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
+ // Bottom-up prediction for lexical and epsilon rules
+ size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
+ for (size_t i = 0; i < n_idcs; i++) {
+ PgfLexiconIdxEntry* lentry =
+ gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
PgfProductionIdxEntry key;
key.ccat = ccat;
key.lin_idx = lin_idx;
key.papp = NULL;
PgfProductionIdxEntry* value =
- gu_seq_binsearch(gu_buf_data_seq(seq->idx),
+ gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
pgf_production_idx_entry_order,
PgfProductionIdxEntry, &key);
if (value != NULL) {
- GuVariantInfo i = { PGF_PRODUCTION_APPLY, value->papp };
- PgfProduction prod = gu_variant_close(i);
- pgf_parsing_push_production(ps, ps->before, conts, prod);
+ pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx);
PgfProductionIdxEntry* start =
- gu_buf_data(seq->idx);
+ gu_buf_data(lentry->idx);
PgfProductionIdxEntry* end =
- start + gu_buf_length(seq->idx)-1;
+ start + gu_buf_length(lentry->idx)-1;
PgfProductionIdxEntry* left = value-1;
while (left >= start &&
value->ccat->fid == left->ccat->fid &&
value->lin_idx == left->lin_idx) {
- GuVariantInfo i = { PGF_PRODUCTION_APPLY, left->papp };
- PgfProduction prod = gu_variant_close(i);
- pgf_parsing_push_production(ps, ps->before, conts, prod);
+ pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx);
left--;
}
@@ -1230,9 +1170,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
while (right <= end &&
value->ccat->fid == right->ccat->fid &&
value->lin_idx == right->lin_idx) {
- GuVariantInfo i = { PGF_PRODUCTION_APPLY, right->papp };
- PgfProduction prod = gu_variant_close(i);
- pgf_parsing_push_production(ps, ps->before, conts, prod);
+ pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx);
right++;
}
}
@@ -1271,7 +1209,7 @@ pgf_parsing_pre(PgfParsing* ps, PgfItem* item, PgfSymbols* syms)
} else {
item->alt = 0;
pgf_item_advance(item, ps->pool);
- pgf_parsing_push_item(ps->before, item);
+ gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
}
}
@@ -1401,8 +1339,9 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
item->curr_sym = gu_null_variant;
item->sym_idx = gu_seq_length(syms);
PgfParseState* state =
- pgf_new_parse_state(ps, offset, BIND_NONE);
- pgf_parsing_push_item(state, item);
+ pgf_new_parse_state(ps, offset, BIND_NONE,
+ item->inside_prob+item->conts->outside_prob);
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
match = true;
}
}
@@ -1445,10 +1384,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (ps->before->start_offset == ps->before->end_offset &&
ps->before->needs_bind) {
PgfParseState* state =
- pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
+ pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
+ item->inside_prob+item->conts->outside_prob);
if (state != NULL) {
pgf_item_advance(item, ps->pool);
- pgf_parsing_push_item(state, item);
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
@@ -1462,10 +1402,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (ps->before->start_offset == ps->before->end_offset) {
if (ps->before->needs_bind) {
PgfParseState* state =
- pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
+ pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
+ item->inside_prob+item->conts->outside_prob);
if (state != NULL) {
pgf_item_advance(item, ps->pool);
- pgf_parsing_push_item(state, item);
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
@@ -1474,7 +1415,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
}
} else {
pgf_item_advance(item, ps->pool);
- pgf_parsing_push_item(ps->before, item);
+ gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
}
break;
}
@@ -1725,7 +1666,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
ps->heuristic_factor = heuristic_factor;
}
- pgf_parsing_scan(ps);
+ PgfParseState* state =
+ pgf_new_parse_state(ps, 0, BIND_SOFT, 0);
int fidString = -1;
PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool);
@@ -1745,7 +1687,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
#endif
PgfItemConts* conts =
- pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
+ pgf_parsing_get_conts(state, start_ccat, 0, ps->pool);
gu_buf_push(conts->items, PgfItem*, NULL);
size_t n_ccats = gu_seq_length(cnccat->cats);