From 9d7fdf7c9a525a3b5659a566f76d26d151dcd664 Mon Sep 17 00:00:00 2001 From: hallgren Date: Mon, 25 Nov 2013 21:12:11 +0000 Subject: Change how GF deals with character encodings in grammar files 1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform. --- src/compiler/GF/Grammar/Parser.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/compiler/GF/Grammar/Parser.y') diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index bc2a394b1..028da18c6 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -17,8 +17,8 @@ import GF.Grammar.Grammar import GF.Grammar.Macros import GF.Grammar.Lexer import GF.Compile.Update (buildAnyTree) -import Codec.Binary.UTF8.String(decodeString) -import Data.Char(toLower) +--import Codec.Binary.UTF8.String(decodeString) +--import Data.Char(toLower) } %name pModDef ModDef @@ -616,9 +616,9 @@ happyError = fail "syntax error" -- Quick fix to render error messages from UTF-8-encoded source files correctly. optDecode opts = - if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"] + {-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"] then decodeString - else id + else-} id mkListId,mkConsId,mkBaseId :: Ident -> Ident mkListId = prefixIdent "List" -- cgit v1.2.3