summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Infra
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
committerhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
commit9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (patch)
tree9ea97377d9938fc382c2036fa4c8fef9c33e33d8 /src/compiler/GF/Infra
parent3210a506484864430504ed1caf2f547bb674e701 (diff)
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
Diffstat (limited to 'src/compiler/GF/Infra')
-rw-r--r--src/compiler/GF/Infra/Option.hs14
1 files changed, 9 insertions, 5 deletions
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index 115665419..08f0df18b 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -20,7 +20,7 @@ module GF.Infra.Option
helpMessage,
-- * Checking specific options
flag, cfgTransform, haskellOption, readOutputFormat,
- isLexicalCat, isLiteralCat, renameEncoding,
+ isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
-- * Setting specific options
setOptimization, setCFGTransform,
-- * Convenience methods for checking options
@@ -157,7 +157,7 @@ data Flags = Flags {
optRetainResource :: Bool,
optName :: Maybe String,
optPreprocessors :: [String],
- optEncoding :: String,
+ optEncoding :: Maybe String,
optPMCFG :: Bool,
optOptimizations :: Set Optimization,
optOptimizePGF :: Bool,
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- | Pretty-print the options that are preserved in .gfo files.
optionsGFO :: Options -> [(String,Literal)]
optionsGFO opts = optionsPGF opts
- ++ [("coding", LStr (flag optEncoding opts))]
+ ++ [("coding", LStr (getEncoding opts))]
-- | Pretty-print the options that are preserved in .pgf files.
optionsPGF :: Options -> [(String,Literal)]
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
modifyFlags :: (Flags -> Flags) -> Options
modifyFlags = Options
+getEncoding :: Options -> String
+getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
+defaultEncoding = "UTF-8"
+
-- Default options
defaultFlags :: Flags
@@ -264,7 +268,7 @@ defaultFlags = Flags {
optName = Nothing,
optPreprocessors = [],
- optEncoding = "latin1",
+ optEncoding = Nothing,
optPMCFG = True,
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optOptimizePGF = False,
@@ -419,7 +423,7 @@ optDescr =
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
- coding x = set $ \o -> o { optEncoding = x }
+ coding x = set $ \o -> o { optEncoding = Just x }
startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x }