summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Compile/GetGrammar.hs
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
committerhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
commit9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (patch)
tree9ea97377d9938fc382c2036fa4c8fef9c33e33d8 /src/compiler/GF/Compile/GetGrammar.hs
parent3210a506484864430504ed1caf2f547bb674e701 (diff)
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
Diffstat (limited to 'src/compiler/GF/Compile/GetGrammar.hs')
-rw-r--r--src/compiler/GF/Compile/GetGrammar.hs59
1 files changed, 40 insertions, 19 deletions
diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs
index 2f40d0242..10a857bf9 100644
--- a/src/compiler/GF/Compile/GetGrammar.hs
+++ b/src/compiler/GF/Compile/GetGrammar.hs
@@ -18,40 +18,61 @@ import Prelude hiding (catch)
import GF.Data.Operations
-import GF.System.Catch
+--import GF.System.Catch
import GF.Infra.UseIO
-import GF.Infra.Option(Options,optPreprocessors,addOptions,optEncoding,flag,renameEncoding)
+import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
import GF.Grammar.Lexer
import GF.Grammar.Parser
import GF.Grammar.Grammar
-import GF.Compile.Coding
+--import GF.Compile.Coding
+import GF.Compile.ReadFiles(parseSource,lift)
+--import GF.Text.Coding(decodeUnicodeIO)
import qualified Data.ByteString.Char8 as BS
-import Control.Monad (foldM)
+import Data.Char(isAscii)
+import Control.Monad (foldM,when,unless)
import System.Cmd (system)
-import System.IO(mkTextEncoding)
+--import System.IO(mkTextEncoding) --,utf8
import System.Directory(removeFile)
getSourceModule :: Options -> FilePath -> IOE SourceModule
-getSourceModule opts file0 = ioe $
- do tmp <- foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
- content <- keepTemp tmp
- case runP pModDef content of
- Left (Pn l c,msg) -> do file <- writeTemp tmp
+getSourceModule opts file0 =
+ errIn file0 $
+ do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
+ raw <- lift $ keepTemp tmp
+ --ePutStrLn $ "1 "++file0
+ (optCoding,parsed) <- parseSource opts pModDef raw
+ case parsed of
+ Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp
let location = file++":"++show l++":"++show c
- return (Bad (location++":\n "++msg))
- Right (i,mi00) ->
- do removeTemp tmp
- let mi0 =mi00 {mflags=mflags mi00 `addOptions` opts, msrc=file0}
- mi <- transcodeModule (i,mi0)
- return (Ok mi)
- `catch` (return . Bad . show)
-
+ raise (location++":\n "++msg)
+ Right (i,mi0) ->
+ do lift $ removeTemp tmp
+ let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0}
+ optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0)
+ case (optCoding,optCoding') of
+ (Nothing,Nothing) ->
+ unless (BS.all isAscii raw) $
+ ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8"
+ (_,Just coding') ->
+ when (coding/=coding') $
+ raise $ "Encoding mismatch: "++coding++" /= "++coding'
+ where coding = maybe defaultEncoding renameEncoding optCoding
+ _ -> return ()
+ --lift $ transcodeModule' (i,mi) -- old lexer
+ return (i,mi) -- new lexer
+
+{-
transcodeModule sm00 =
- do enc <- mkTextEncoding (renameEncoding (flag optEncoding (mflags (snd sm00))))
+ do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
let sm = decodeStringsInModule enc sm00
return sm
+transcodeModule' sm00 =
+ do let enc = utf8
+ let sm = decodeStringsInModule enc sm00
+ return sm
+-}
runPreprocessor :: Temporary -> String -> IO Temporary
runPreprocessor tmp0 p =