From 9d7fdf7c9a525a3b5659a566f76d26d151dcd664 Mon Sep 17 00:00:00 2001 From: hallgren Date: Mon, 25 Nov 2013 21:12:11 +0000 Subject: Change how GF deals with character encodings in grammar files 1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform. --- src/compiler/GF/Infra/Option.hs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/compiler/GF/Infra') diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 115665419..08f0df18b 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -20,7 +20,7 @@ module GF.Infra.Option helpMessage, -- * Checking specific options flag, cfgTransform, haskellOption, readOutputFormat, - isLexicalCat, isLiteralCat, renameEncoding, + isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding, -- * Setting specific options setOptimization, setCFGTransform, -- * Convenience methods for checking options @@ -157,7 +157,7 @@ data Flags = Flags { optRetainResource :: Bool, optName :: Maybe String, optPreprocessors :: [String], - optEncoding :: String, + optEncoding :: Maybe String, optPMCFG :: Bool, optOptimizations :: Set Optimization, optOptimizePGF :: Bool, @@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) -- | Pretty-print the options that are preserved in .gfo files. optionsGFO :: Options -> [(String,Literal)] optionsGFO opts = optionsPGF opts - ++ [("coding", LStr (flag optEncoding opts))] + ++ [("coding", LStr (getEncoding opts))] -- | Pretty-print the options that are preserved in .pgf files. optionsPGF :: Options -> [(String,Literal)] @@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions modifyFlags :: (Flags -> Flags) -> Options modifyFlags = Options +getEncoding :: Options -> String +getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding +defaultEncoding = "UTF-8" + -- Default options defaultFlags :: Flags @@ -264,7 +268,7 @@ defaultFlags = Flags { optName = Nothing, optPreprocessors = [], - optEncoding = "latin1", + optEncoding = Nothing, optPMCFG = True, optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], optOptimizePGF = False, @@ -419,7 +423,7 @@ optDescr = addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } - coding x = set $ \o -> o { optEncoding = x } + coding x = set $ \o -> o { optEncoding = Just x } startcat x = set $ \o -> o { optStartCat = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x } lexer x = set $ \o -> o { optLexer = Just x } -- cgit v1.2.3