diff options
| author | hallgren <hallgren@chalmers.se> | 2013-11-25 21:12:11 +0000 |
|---|---|---|
| committer | hallgren <hallgren@chalmers.se> | 2013-11-25 21:12:11 +0000 |
| commit | 9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (patch) | |
| tree | 9ea97377d9938fc382c2036fa4c8fef9c33e33d8 /src/compiler/GF/Infra | |
| parent | 3210a506484864430504ed1caf2f547bb674e701 (diff) | |
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8.
2. Alternate encodings should be specified as "--# -coding=enc", the old
"flags coding=enc" declarations have no effect but are still checked for
consistency.
3. A transitional warning is generated for files that contain non-ASCII
characters without specifying a character encoding:
"Warning: default encoding has changed from Latin-1 to UTF-8"
4. Conversion to Unicode is now done *before* lexing. This makes it possible
to allow arbitrary Unicode characters in identifiers. But identifiers are
still stored as ByteStrings, so they are limited to Latin-1 characters
for now.
5. Lexer.hs is no longer part of the repository. We now generate the lexer
from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were
needed. These bugs might already be fixed in newer versions of alex, but
we should be compatible with what is shipped in the Haskell Platform.
Diffstat (limited to 'src/compiler/GF/Infra')
| -rw-r--r-- | src/compiler/GF/Infra/Option.hs | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 115665419..08f0df18b 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -20,7 +20,7 @@ module GF.Infra.Option helpMessage, -- * Checking specific options flag, cfgTransform, haskellOption, readOutputFormat, - isLexicalCat, isLiteralCat, renameEncoding, + isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding, -- * Setting specific options setOptimization, setCFGTransform, -- * Convenience methods for checking options @@ -157,7 +157,7 @@ data Flags = Flags { optRetainResource :: Bool, optName :: Maybe String, optPreprocessors :: [String], - optEncoding :: String, + optEncoding :: Maybe String, optPMCFG :: Bool, optOptimizations :: Set Optimization, optOptimizePGF :: Bool, @@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) -- | Pretty-print the options that are preserved in .gfo files. optionsGFO :: Options -> [(String,Literal)] optionsGFO opts = optionsPGF opts - ++ [("coding", LStr (flag optEncoding opts))] + ++ [("coding", LStr (getEncoding opts))] -- | Pretty-print the options that are preserved in .pgf files. optionsPGF :: Options -> [(String,Literal)] @@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions modifyFlags :: (Flags -> Flags) -> Options modifyFlags = Options +getEncoding :: Options -> String +getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding +defaultEncoding = "UTF-8" + -- Default options defaultFlags :: Flags @@ -264,7 +268,7 @@ defaultFlags = Flags { optName = Nothing, optPreprocessors = [], - optEncoding = "latin1", + optEncoding = Nothing, optPMCFG = True, optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize], optOptimizePGF = False, @@ -419,7 +423,7 @@ optDescr = addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o } setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x } preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] } - coding x = set $ \o -> o { optEncoding = x } + coding x = set $ \o -> o { optEncoding = Just x } startcat x = set $ \o -> o { optStartCat = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x } lexer x = set $ \o -> o { optLexer = Just x } |
