summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Grammar
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
committerhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
commit9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (patch)
tree9ea97377d9938fc382c2036fa4c8fef9c33e33d8 /src/compiler/GF/Grammar
parent3210a506484864430504ed1caf2f547bb674e701 (diff)
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
Diffstat (limited to 'src/compiler/GF/Grammar')
-rw-r--r--src/compiler/GF/Grammar/Lexer.hs481
-rw-r--r--src/compiler/GF/Grammar/Lexer.x (renamed from src/compiler/GF/Grammar/lexer/Lexer.x)51
-rw-r--r--src/compiler/GF/Grammar/Parser.y8
3 files changed, 36 insertions, 504 deletions
diff --git a/src/compiler/GF/Grammar/Lexer.hs b/src/compiler/GF/Grammar/Lexer.hs
deleted file mode 100644
index ebb14a31f..000000000
--- a/src/compiler/GF/Grammar/Lexer.hs
+++ /dev/null
@@ -1,481 +0,0 @@
-{-# LANGUAGE CPP,MagicHash,BangPatterns #-}
-{-# LINE 1 "lexer/Lexer.x" #-}
-
-module GF.Grammar.Lexer
- ( Token(..), Posn(..)
- , P, runP, lexer, getPosn, failLoc
- , isReservedWord
- ) where
-
-import GF.Infra.Ident
-import qualified Data.ByteString.Char8 as BS
-import qualified Data.Map as Map
-
-
-#if __GLASGOW_HASKELL__ >= 603
-#include "ghcconfig.h"
-#elif defined(__GLASGOW_HASKELL__)
-#include "config.h"
-#endif
-#if __GLASGOW_HASKELL__ >= 503
-import Data.Array
-import Data.Char (ord)
-import Data.Array.Base (unsafeAt)
-#else
-import Array
-import Char (ord)
-#endif
-#if __GLASGOW_HASKELL__ >= 503
-import GHC.Exts
-#else
-import GlaExts
-#endif
-alex_base :: AlexAddr
-alex_base = AlexA# "\x01\x00\x00\x00\x38\x00\x00\x00\x39\x00\x00\x00\x17\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x44\x00\x00\x00\x45\x00\x00\x00\x19\x00\x00\x00\x24\x00\x00\x00\x25\x00\x00\x00\x4d\x00\x00\x00\x00\x00\x00\x00\x29\x00\x00\x00\x30\x00\x00\x00\xf8\xff\xff\xff\x26\x00\x00\x00\x9c\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\xd2\x00\x00\x00\x9e\x00\x00\x00\x6c\x01\x00\x00\x3c\x02\x00\x00\x00\x00\x00\x00\x17\x01\x00\x00\xe7\x01\x00\x00\xa5\x01\x00\x00\xf2\x00\x00\x00\xb7\x01\x00\x00\xff\x00\x00\x00\x0a\x01\x00\x00\x18\x01\x00\x00\x22\x01\x00\x00"#
-
-alex_table :: AlexAddr
-alex_table = AlexA# "\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0b\x00\x0c\x00\x19\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\x14\x00\x0c\x00\x0c\x00\x0d\x00\x0e\x00\x0c\x00\x03\x00\x0c\x00\x0c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x0c\x00\x0c\x00\x0c\x00\x10\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\xff\xff\x01\x00\x07\x00\x07\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x07\x00\x08\x00\x0c\x00\x0c\x00\x0c\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0c\x00\x0c\x00\x0f\x00\x0c\x00\xff\xff\x11\x00\xff\xff\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x07\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x04\x00\x05\x00\x17\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\xff\xff\x00\x00\xff\xff\x00\x00\xff\xff\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x12\x00\xff\xff\xff\xff\x13\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f\x00\xff\xff\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x15\x00\x00\x00\x15\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x18\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x00\x00\x21\x00\x00\x00\x00\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1b\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x17\x00\x1a\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1b\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00"#
-
-alex_check :: AlexAddr
-alex_check = AlexA# "\xff\xff\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00\x07\x00\x08\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x0e\x00\x0f\x00\x10\x00\x11\x00\x12\x00\x13\x00\x14\x00\x15\x00\x16\x00\x17\x00\x18\x00\x19\x00\x1a\x00\x1b\x00\x1c\x00\x1d\x00\x1e\x00\x1f\x00\x20\x00\x21\x00\x22\x00\x23\x00\x24\x00\x25\x00\x26\x00\x27\x00\x28\x00\x29\x00\x2a\x00\x2b\x00\x2c\x00\x2d\x00\x2e\x00\x2f\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x3a\x00\x3b\x00\x3c\x00\x3d\x00\x3e\x00\x3f\x00\x40\x00\x0a\x00\x0a\x00\x2d\x00\x2d\x00\x2d\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x2d\x00\x2d\x00\x2a\x00\x5c\x00\x3e\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x2b\x00\x5b\x00\x5c\x00\x5d\x00\x5e\x00\x5f\x00\x60\x00\xff\xff\xff\xff\x3e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x20\x00\xff\xff\xff\xff\xff\xff\x2d\x00\x2d\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7b\x00\x7c\x00\x7d\x00\x7e\x00\x7f\x00\x80\x00\x81\x00\x82\x00\x83\x00\x84\x00\x85\x00\x86\x00\x87\x00\x88\x00\x89\x00\x8a\x00\x8b\x00\x8c\x00\x8d\x00\x8e\x00\x8f\x00\x90\x00\x91\x00\x92\x00\x93\x00\x94\x00\x95\x00\x96\x00\x97\x00\x98\x00\x99\x00\x9a\x00\x9b\x00\x9c\x00\x9d\x00\x9e\x00\x9f\x00\xa0\x00\xa1\x00\xa2\x00\xa3\x00\xa4\x00\xa5\x00\xa6\x00\xa7\x00\xa8\x00\xa9\x00\xaa\x00\xab\x00\xac\x00\xad\x00\xae\x00\xaf\x00\xb0\x00\xb1\x00\xb2\x00\xb3\x00\xb4\x00\xb5\x00\xb6\x00\xb7\x00\xb8\x00\xb9\x00\xba\x00\xbb\x00\xbc\x00\xbd\x00\xbe\x00\xbf\x00\x7d\x00\x7d\x00\x27\x00\xff\xff\x27\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xd7\x00\xff\xff\x0a\x00\xff\xff\x0a\x00\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x27\x00\xf7\x00\x27\x00\x5c\x00\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x2e\x00\x0a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x5c\x00\xff\xff\x5c\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x22\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\x2d\x00\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x22\x00\xff\xff\xff\xff\xff\xff\x5f\x00\x27\x00\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x0a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x5c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x22\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x6e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x74\x00\xff\xff\xff\xff\x65\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\xff\xff\xff\xff\xff\xff\xff\xff\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\xff\xff\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\xff\xff\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00"#
-
-alex_deflt :: AlexAddr
-alex_deflt = AlexA# "\x16\x00\x02\x00\x02\x00\xff\xff\x09\x00\xff\xff\x09\x00\x09\x00\x09\x00\x09\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x13\x00\x13\x00\xff\xff\xff\xff\xff\xff\xff\xff\x1a\x00\x1a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
-
-alex_accept = listArray (0::Int,33) [[],[(AlexAccSkip)],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAccSkip)],[],[],[],[],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_4))],[],[],[],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_6))],[],[],[],[(AlexAcc (alex_action_7))],[(AlexAcc (alex_action_8))],[(AlexAcc (alex_action_8))],[],[],[]]
-{-# LINE 39 "lexer/Lexer.x" #-}
-
-
-tok f p s = f s
-
-data Token
- = T_exclmark
- | T_patt
- | T_int_label
- | T_oparen
- | T_cparen
- | T_tilde
- | T_star
- | T_starstar
- | T_plus
- | T_plusplus
- | T_comma
- | T_minus
- | T_rarrow
- | T_dot
- | T_alt
- | T_colon
- | T_semicolon
- | T_less
- | T_equal
- | T_big_rarrow
- | T_great
- | T_questmark
- | T_obrack
- | T_lam
- | T_lamlam
- | T_cbrack
- | T_ocurly
- | T_bar
- | T_ccurly
- | T_underscore
- | T_at
- | T_PType
- | T_Str
- | T_Strs
- | T_Tok
- | T_Type
- | T_abstract
- | T_case
- | T_cat
- | T_concrete
- | T_data
- | T_def
- | T_flags
- | T_fn
- | T_fun
- | T_in
- | T_incomplete
- | T_instance
- | T_interface
- | T_let
- | T_lin
- | T_lincat
- | T_lindef
- | T_linref
- | T_of
- | T_open
- | T_oper
- | T_param
- | T_pattern
- | T_pre
- | T_printname
- | T_resource
- | T_strs
- | T_table
- | T_transfer
- | T_variants
- | T_where
- | T_with
- | T_String String -- string literals
- | T_Integer Int -- integer literals
- | T_Double Double -- double precision float literals
- | T_Ident Ident
- | T_EOF
-
-eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
-eitherResIdent tv s =
- case Map.lookup s resWords of
- Just t -> t
- Nothing -> tv s
-
-isReservedWord :: BS.ByteString -> Bool
-isReservedWord s = Map.member s resWords
-
-resWords = Map.fromList
- [ b "!" T_exclmark
- , b "#" T_patt
- , b "$" T_int_label
- , b "(" T_oparen
- , b ")" T_cparen
- , b "~" T_tilde
- , b "*" T_star
- , b "**" T_starstar
- , b "+" T_plus
- , b "++" T_plusplus
- , b "," T_comma
- , b "-" T_minus
- , b "->" T_rarrow
- , b "." T_dot
- , b "/" T_alt
- , b ":" T_colon
- , b ";" T_semicolon
- , b "<" T_less
- , b "=" T_equal
- , b "=>" T_big_rarrow
- , b ">" T_great
- , b "?" T_questmark
- , b "[" T_obrack
- , b "]" T_cbrack
- , b "\\" T_lam
- , b "\\\\" T_lamlam
- , b "{" T_ocurly
- , b "}" T_ccurly
- , b "|" T_bar
- , b "_" T_underscore
- , b "@" T_at
- , b "PType" T_PType
- , b "Str" T_Str
- , b "Strs" T_Strs
- , b "Tok" T_Tok
- , b "Type" T_Type
- , b "abstract" T_abstract
- , b "case" T_case
- , b "cat" T_cat
- , b "concrete" T_concrete
- , b "data" T_data
- , b "def" T_def
- , b "flags" T_flags
- , b "fn" T_fn
- , b "fun" T_fun
- , b "in" T_in
- , b "incomplete" T_incomplete
- , b "instance" T_instance
- , b "interface" T_interface
- , b "let" T_let
- , b "lin" T_lin
- , b "lincat" T_lincat
- , b "lindef" T_lindef
- , b "linref" T_linref
- , b "of" T_of
- , b "open" T_open
- , b "oper" T_oper
- , b "param" T_param
- , b "pattern" T_pattern
- , b "pre" T_pre
- , b "printname" T_printname
- , b "resource" T_resource
- , b "strs" T_strs
- , b "table" T_table
- , b "transfer" T_transfer
- , b "variants" T_variants
- , b "where" T_where
- , b "with" T_with
- ]
- where b s t = (BS.pack s, t)
-
-unescapeInitTail :: String -> String
-unescapeInitTail = unesc . tail where
- unesc s = case s of
- '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
- '\\':'n':cs -> '\n' : unesc cs
- '\\':'t':cs -> '\t' : unesc cs
- '"':[] -> []
- '\'':[] -> []
- c:cs -> c : unesc cs
- _ -> []
-
--------------------------------------------------------------------
--- Alex wrapper code.
--- A modified "posn" wrapper.
--------------------------------------------------------------------
-
-data Posn = Pn {-# UNPACK #-} !Int
- {-# UNPACK #-} !Int
-
-alexMove :: Posn -> Char -> Posn
-alexMove (Pn l c) '\n' = Pn (l+1) 1
-alexMove (Pn l c) _ = Pn l (c+1)
-
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI p _ s) =
- case BS.uncons s of
- Nothing -> Nothing
- Just (c,s) ->
- let p' = alexMove p c
- in p' `seq` Just (c, (AI p' c s))
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI p c s) = c
-
-data AlexInput = AI {-# UNPACK #-} !Posn -- current position,
- {-# UNPACK #-} !Char -- previous char
- {-# UNPACK #-} !BS.ByteString -- current input string
-
-data ParseResult a
- = POk a
- | PFailed Posn -- The position of the error
- String -- The error message
-
-newtype P a = P { unP :: AlexInput -> ParseResult a }
-
-instance Monad P where
- return a = a `seq` (P $ \s -> POk a)
- (P m) >>= k = P $ \ s -> case m s of
- POk a -> unP (k a) s
- PFailed posn err -> PFailed posn err
- fail msg = P $ \(AI posn _ _) -> PFailed posn msg
-
-runP :: P a -> BS.ByteString -> Either (Posn,String) a
-runP (P f) txt =
- case f (AI (Pn 1 0) ' ' txt) of
- POk x -> Right x
- PFailed pos msg -> Left (pos,msg)
-
-failLoc :: Posn -> String -> P a
-failLoc pos msg = P $ \_ -> PFailed pos msg
-
-lexer :: (Token -> P a) -> P a
-lexer cont = P go
- where
- go inp@(AI pos _ str) =
- case alexScan inp 0 of
- AlexEOF -> unP (cont T_EOF) inp
- AlexError (AI pos _ _) -> PFailed pos "lexical error"
- AlexSkip inp' len -> go inp'
- AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
-
-getPosn :: P Posn
-getPosn = P $ \inp@(AI pos _ _) -> POk pos
-
-
-alex_action_3 = tok (eitherResIdent (T_Ident . identC . rawIdentC))
-alex_action_4 = tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack))
-alex_action_5 = tok (eitherResIdent (T_Ident . identC . rawIdentC))
-alex_action_6 = tok (T_String . unescapeInitTail . BS.unpack)
-alex_action_7 = tok (T_Integer . read . BS.unpack)
-alex_action_8 = tok (T_Double . read . BS.unpack)
-{-# LINE 1 "templates/GenericTemplate.hs" #-}
-{-# LINE 1 "templates/GenericTemplate.hs" #-}
-{-# LINE 1 "<built-in>" #-}
-{-# LINE 1 "<command-line>" #-}
-{-# LINE 1 "templates/GenericTemplate.hs" #-}
--- -----------------------------------------------------------------------------
--- ALEX TEMPLATE
---
--- This code is in the PUBLIC DOMAIN; you may copy it freely and use
--- it for any purpose whatsoever.
-
--- -----------------------------------------------------------------------------
--- INTERNALS and main scanner engine
-
-{-# LINE 37 "templates/GenericTemplate.hs" #-}
-
-{-# LINE 47 "templates/GenericTemplate.hs" #-}
-
-
-data AlexAddr = AlexA# Addr#
-
-#if __GLASGOW_HASKELL__ < 503
-uncheckedShiftL# = shiftL#
-#endif
-
-{-# INLINE alexIndexInt16OffAddr #-}
-alexIndexInt16OffAddr (AlexA# arr) off =
-#ifdef WORDS_BIGENDIAN
- narrow16Int# i
- where
- !i = word2Int# ((high `uncheckedShiftL#` 8#) `or#` low)
- !high = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#)))
- !low = int2Word# (ord# (indexCharOffAddr# arr off'))
- !off' = off *# 2#
-#else
- indexInt16OffAddr# arr off
-#endif
-
-
-
-
-
-{-# INLINE alexIndexInt32OffAddr #-}
-alexIndexInt32OffAddr (AlexA# arr) off =
-#ifdef WORDS_BIGENDIAN
- narrow32Int# i
- where
- !i = word2Int# ((b3 `uncheckedShiftL#` 24#) `or#`
- (b2 `uncheckedShiftL#` 16#) `or#`
- (b1 `uncheckedShiftL#` 8#) `or#` b0)
- !b3 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 3#)))
- !b2 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 2#)))
- !b1 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#)))
- !b0 = int2Word# (ord# (indexCharOffAddr# arr off'))
- !off' = off *# 4#
-#else
- indexInt32OffAddr# arr off
-#endif
-
-
-
-
-
-#if __GLASGOW_HASKELL__ < 503
-quickIndex arr i = arr ! i
-#else
--- GHC >= 503, unsafeAt is available from Data.Array.Base.
-quickIndex = unsafeAt
-#endif
-
-
-
-
--- -----------------------------------------------------------------------------
--- Main lexing routines
-
-data AlexReturn a
- = AlexEOF
- | AlexError !AlexInput
- | AlexSkip !AlexInput !Int
- | AlexToken !AlexInput !Int a
-
--- alexScan :: AlexInput -> StartCode -> AlexReturn a
-alexScan input (I# (sc))
- = alexScanUser undefined input (I# (sc))
-
-alexScanUser user input (I# (sc))
- = case alex_scan_tkn user input 0# input sc AlexNone of
- (AlexNone, input') ->
- case alexGetChar input of
- Nothing ->
-
-
-
- AlexEOF
- Just _ ->
-
-
-
- AlexError input'
-
- (AlexLastSkip input'' len, _) ->
-
-
-
- AlexSkip input'' len
-
- (AlexLastAcc k input''' len, _) ->
-
-
-
- AlexToken input''' len k
-
-
--- Push the input through the DFA, remembering the most recent accepting
--- state it encountered.
-
-alex_scan_tkn user orig_input len input s last_acc =
- input `seq` -- strict in the input
- let
- new_acc = check_accs (alex_accept `quickIndex` (I# (s)))
- in
- new_acc `seq`
- case alexGetChar input of
- Nothing -> (new_acc, input)
- Just (c, new_input) ->
-
-
-
- let
- (!(base)) = alexIndexInt32OffAddr alex_base s
- (!((I# (ord_c)))) = ord c
- (!(offset)) = (base +# ord_c)
- (!(check)) = alexIndexInt16OffAddr alex_check offset
-
- (!(new_s)) = if (offset >=# 0#) && (check ==# ord_c)
- then alexIndexInt16OffAddr alex_table offset
- else alexIndexInt16OffAddr alex_deflt s
- in
- case new_s of
- -1# -> (new_acc, input)
- -- on an error, we want to keep the input *before* the
- -- character that failed, not after.
- _ -> alex_scan_tkn user orig_input (len +# 1#)
- new_input new_s new_acc
-
- where
- check_accs [] = last_acc
- check_accs (AlexAcc a : _) = AlexLastAcc a input (I# (len))
- check_accs (AlexAccSkip : _) = AlexLastSkip input (I# (len))
- check_accs (AlexAccPred a predx : rest)
- | predx user orig_input (I# (len)) input
- = AlexLastAcc a input (I# (len))
- check_accs (AlexAccSkipPred predx : rest)
- | predx user orig_input (I# (len)) input
- = AlexLastSkip input (I# (len))
- check_accs (_ : rest) = check_accs rest
-
-data AlexLastAcc a
- = AlexNone
- | AlexLastAcc a !AlexInput !Int
- | AlexLastSkip !AlexInput !Int
-
-data AlexAcc a user
- = AlexAcc a
- | AlexAccSkip
- | AlexAccPred a (AlexAccPred user)
- | AlexAccSkipPred (AlexAccPred user)
-
-type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool
-
--- -----------------------------------------------------------------------------
--- Predicates on a rule
-
-alexAndPred p1 p2 user in1 len in2
- = p1 user in1 len in2 && p2 user in1 len in2
-
---alexPrevCharIsPred :: Char -> AlexAccPred _
-alexPrevCharIs c _ input _ _ = c == alexInputPrevChar input
-
---alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _
-alexPrevCharIsOneOf arr _ input _ _ = arr ! alexInputPrevChar input
-
---alexRightContext :: Int -> AlexAccPred _
-alexRightContext (I# (sc)) user _ _ input =
- case alex_scan_tkn user input 0# input sc AlexNone of
- (AlexNone, _) -> False
- _ -> True
- -- TODO: there's no need to find the longest
- -- match when checking the right context, just
- -- the first match will do.
-
--- used by wrappers
-iUnbox (I# (i)) = i
diff --git a/src/compiler/GF/Grammar/lexer/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index 460e7f452..60c51f814 100644
--- a/src/compiler/GF/Grammar/lexer/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -1,3 +1,4 @@
+-- -*- haskell -*-
{
module GF.Grammar.Lexer
( Token(..), Posn(..)
@@ -6,9 +7,14 @@ module GF.Grammar.Lexer
) where
import GF.Infra.Ident
+--import GF.Data.Operations
import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString as WBS
+import qualified Data.ByteString.Internal as BS(w2c)
+import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map
-
+import Data.Word(Word8)
+--import Debug.Trace(trace)
}
@@ -17,7 +23,7 @@ $c = [A-Z\192-\221] # [\215]
$s = [a-z\222-\255] # [\247]
$d = [0-9] -- digit
$i = [$l $d _ '] -- identifier character
-$u = [\0-\255] -- universal: any character
+$u = [.\n] -- universal: any character
@rsyms = -- symbols and non-identifier-like reserved words
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
@@ -27,17 +33,20 @@ $u = [\0-\255] -- universal: any character
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
$white+ ;
-@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
-\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) }
-(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
+@rsyms { tok (res (T_Ident . identS)) }
+\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . unpack)) }
+(\_ | $l)($l | $d | \_ | \')* { tok (res (T_Ident . identS)) }
-\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) }
+\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
-(\-)? $d+ { tok (T_Integer . read . BS.unpack) }
-(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) }
+(\-)? $d+ { tok (T_Integer . read . unpack) }
+(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
{
+--unpack = BS.unpack
+unpack = id
+tok :: (String->Token) -> Posn -> String -> Token
tok f p s = f s
data Token
@@ -114,15 +123,17 @@ data Token
| T_Double Double -- double precision float literals
| T_Ident Ident
| T_EOF
+-- deriving Show -- debug
-eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
+res = eitherResIdent
+eitherResIdent :: (String -> Token) -> String -> Token
eitherResIdent tv s =
case Map.lookup s resWords of
Just t -> t
Nothing -> tv s
isReservedWord :: BS.ByteString -> Bool
-isReservedWord s = Map.member s resWords
+isReservedWord s = Map.member (BS.unpack s) resWords
resWords = Map.fromList
[ b "!" T_exclmark
@@ -194,7 +205,7 @@ resWords = Map.fromList
, b "where" T_where
, b "with" T_with
]
- where b s t = (BS.pack s, t)
+ where b s t = (s, t)
unescapeInitTail :: String -> String
unescapeInitTail = unesc . tail where
@@ -202,7 +213,7 @@ unescapeInitTail = unesc . tail where
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
'\\':'n':cs -> '\n' : unesc cs
'\\':'t':cs -> '\t' : unesc cs
- '"':[] -> []
+ '\"':[] -> []
'\'':[] -> []
c:cs -> c : unesc cs
_ -> []
@@ -219,13 +230,14 @@ alexMove :: Posn -> Char -> Posn
alexMove (Pn l c) '\n' = Pn (l+1) 1
alexMove (Pn l c) _ = Pn l (c+1)
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI p _ s) =
- case BS.uncons s of
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (AI p _ s) =
+ case WBS.uncons s of
Nothing -> Nothing
- Just (c,s) ->
+ Just (w,s) ->
let p' = alexMove p c
- in p' `seq` Just (c, (AI p' c s))
+ c = BS.w2c w
+ in p' `seq` Just (w, (AI p' c s))
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (AI p c s) = c
@@ -260,12 +272,13 @@ failLoc pos msg = P $ \_ -> PFailed pos msg
lexer :: (Token -> P a) -> P a
lexer cont = P go
where
+ --cont' t = trace (show t) (cont t)
go inp@(AI pos _ str) =
case alexScan inp 0 of
AlexEOF -> unP (cont T_EOF) inp
AlexError (AI pos _ _) -> PFailed pos "lexical error"
- AlexSkip inp' len -> go inp'
- AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
+ AlexSkip inp' len -> {-trace (show len) $-} go inp'
+ AlexToken inp' len act -> unP (cont (act pos (UTF8.toString (UTF8.take len str)))) inp'
getPosn :: P Posn
getPosn = P $ \inp@(AI pos _ _) -> POk pos
diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y
index bc2a394b1..028da18c6 100644
--- a/src/compiler/GF/Grammar/Parser.y
+++ b/src/compiler/GF/Grammar/Parser.y
@@ -17,8 +17,8 @@ import GF.Grammar.Grammar
import GF.Grammar.Macros
import GF.Grammar.Lexer
import GF.Compile.Update (buildAnyTree)
-import Codec.Binary.UTF8.String(decodeString)
-import Data.Char(toLower)
+--import Codec.Binary.UTF8.String(decodeString)
+--import Data.Char(toLower)
}
%name pModDef ModDef
@@ -616,9 +616,9 @@ happyError = fail "syntax error"
-- Quick fix to render error messages from UTF-8-encoded source files correctly.
optDecode opts =
- if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"]
+ {-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"]
then decodeString
- else id
+ else-} id
mkListId,mkConsId,mkBaseId :: Ident -> Ident
mkListId = prefixIdent "List"