summaryrefslogtreecommitdiff
path: root/src/compiler/GF
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
committerhallgren <hallgren@chalmers.se>2013-11-25 21:12:11 +0000
commit9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (patch)
tree9ea97377d9938fc382c2036fa4c8fef9c33e33d8 /src/compiler/GF
parent3210a506484864430504ed1caf2f547bb674e701 (diff)
Change how GF deals with character encodings in grammar files
1. The default encoding is changed from Latin-1 to UTF-8. 2. Alternate encodings should be specified as "--# -coding=enc", the old "flags coding=enc" declarations have no effect but are still checked for consistency. 3. A transitional warning is generated for files that contain non-ASCII characters without specifying a character encoding: "Warning: default encoding has changed from Latin-1 to UTF-8" 4. Conversion to Unicode is now done *before* lexing. This makes it possible to allow arbitrary Unicode characters in identifiers. But identifiers are still stored as ByteStrings, so they are limited to Latin-1 characters for now. 5. Lexer.hs is no longer part of the repository. We now generate the lexer from Lexer.x with alex>=3. Some workarounds for bugs in alex-3.0 were needed. These bugs might already be fixed in newer versions of alex, but we should be compatible with what is shipped in the Haskell Platform.
Diffstat (limited to 'src/compiler/GF')
-rw-r--r--src/compiler/GF/Compile/GetGrammar.hs59
-rw-r--r--src/compiler/GF/Compile/ReadFiles.hs69
-rw-r--r--src/compiler/GF/Grammar/Lexer.hs481
-rw-r--r--src/compiler/GF/Grammar/Lexer.x (renamed from src/compiler/GF/Grammar/lexer/Lexer.x)51
-rw-r--r--src/compiler/GF/Grammar/Parser.y8
-rw-r--r--src/compiler/GF/Infra/Option.hs14
-rw-r--r--src/compiler/GF/Text/Coding.hs15
7 files changed, 149 insertions, 548 deletions
diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs
index 2f40d0242..10a857bf9 100644
--- a/src/compiler/GF/Compile/GetGrammar.hs
+++ b/src/compiler/GF/Compile/GetGrammar.hs
@@ -18,40 +18,61 @@ import Prelude hiding (catch)
import GF.Data.Operations
-import GF.System.Catch
+--import GF.System.Catch
import GF.Infra.UseIO
-import GF.Infra.Option(Options,optPreprocessors,addOptions,optEncoding,flag,renameEncoding)
+import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
import GF.Grammar.Lexer
import GF.Grammar.Parser
import GF.Grammar.Grammar
-import GF.Compile.Coding
+--import GF.Compile.Coding
+import GF.Compile.ReadFiles(parseSource,lift)
+--import GF.Text.Coding(decodeUnicodeIO)
import qualified Data.ByteString.Char8 as BS
-import Control.Monad (foldM)
+import Data.Char(isAscii)
+import Control.Monad (foldM,when,unless)
import System.Cmd (system)
-import System.IO(mkTextEncoding)
+--import System.IO(mkTextEncoding) --,utf8
import System.Directory(removeFile)
getSourceModule :: Options -> FilePath -> IOE SourceModule
-getSourceModule opts file0 = ioe $
- do tmp <- foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
- content <- keepTemp tmp
- case runP pModDef content of
- Left (Pn l c,msg) -> do file <- writeTemp tmp
+getSourceModule opts file0 =
+ errIn file0 $
+ do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts)
+ raw <- lift $ keepTemp tmp
+ --ePutStrLn $ "1 "++file0
+ (optCoding,parsed) <- parseSource opts pModDef raw
+ case parsed of
+ Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp
let location = file++":"++show l++":"++show c
- return (Bad (location++":\n "++msg))
- Right (i,mi00) ->
- do removeTemp tmp
- let mi0 =mi00 {mflags=mflags mi00 `addOptions` opts, msrc=file0}
- mi <- transcodeModule (i,mi0)
- return (Ok mi)
- `catch` (return . Bad . show)
-
+ raise (location++":\n "++msg)
+ Right (i,mi0) ->
+ do lift $ removeTemp tmp
+ let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0}
+ optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0)
+ case (optCoding,optCoding') of
+ (Nothing,Nothing) ->
+ unless (BS.all isAscii raw) $
+ ePutStrLn $ file0++":\n Warning: default encoding has changed from Latin-1 to UTF-8"
+ (_,Just coding') ->
+ when (coding/=coding') $
+ raise $ "Encoding mismatch: "++coding++" /= "++coding'
+ where coding = maybe defaultEncoding renameEncoding optCoding
+ _ -> return ()
+ --lift $ transcodeModule' (i,mi) -- old lexer
+ return (i,mi) -- new lexer
+
+{-
transcodeModule sm00 =
- do enc <- mkTextEncoding (renameEncoding (flag optEncoding (mflags (snd sm00))))
+ do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
let sm = decodeStringsInModule enc sm00
return sm
+transcodeModule' sm00 =
+ do let enc = utf8
+ let sm = decodeStringsInModule enc sm00
+ return sm
+-}
runPreprocessor :: Temporary -> String -> IO Temporary
runPreprocessor tmp0 p =
diff --git a/src/compiler/GF/Compile/ReadFiles.hs b/src/compiler/GF/Compile/ReadFiles.hs
index 5e65dcba6..70b0d6ee6 100644
--- a/src/compiler/GF/Compile/ReadFiles.hs
+++ b/src/compiler/GF/Compile/ReadFiles.hs
@@ -21,7 +21,8 @@
module GF.Compile.ReadFiles
( getAllFiles,ModName,ModEnv,importsOfModule,
gfoFile,gfFile,isGFO,gf2gfo,
- getOptionsFromFile) where
+ parseSource,lift,
+ getOptionsFromFile,getPragmas) where
import Prelude hiding (catch)
import GF.System.Catch
@@ -34,6 +35,10 @@ import GF.Grammar.Parser
import GF.Grammar.Grammar
import GF.Grammar.Binary
+import System.IO(mkTextEncoding)
+import qualified Data.ByteString.UTF8 as UTF8
+import GF.Text.Coding(decodeUnicodeIO)
+
import Control.Monad
import Data.Maybe(isJust)
import qualified Data.ByteString.Char8 as BS
@@ -50,7 +55,7 @@ type ModEnv = Map.Map ModName (UTCTime,[ModName])
-- | Returns a list of all files to be compiled in topological order i.e.
-- the low level (leaf) modules are first.
-getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
+--getAllFiles :: (MonadIO m,ErrorMonad m) => Options -> [InitPath] -> ModEnv -> FileName -> m [FullPath]
getAllFiles opts ps env file = do
-- read module headers from all files recursively
ds <- liftM reverse $ get [] [] (justModuleName file)
@@ -117,14 +122,10 @@ getAllFiles opts ps env file = do
Just mo -> return (st,importsOfModule mo)
Nothing
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
- | otherwise -> do s <- liftIO $ BS.readFile file
- case runP pModHeader s of
- Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
- Right mo -> return (CSComp,importsOfModule mo)
- CSComp -> do s <- liftIO $ BS.readFile file
- case runP pModHeader s of
- Left (Pn l c,msg) -> raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
- Right mo -> return (st,importsOfModule mo)
+ | otherwise -> do mo <- parseModHeader opts file
+ return (CSComp,importsOfModule mo)
+ CSComp -> do mo <- parseModHeader opts file
+ return (st,importsOfModule mo)
testErr (mname == name)
("module name" +++ mname +++ "differs from file name" +++ name)
return (name,st,t,isJust gfTime,imps,dropFileName file)
@@ -209,17 +210,55 @@ importsOfModule (m,mi) = (modName m,depModInfo mi [])
modName = showIdent
+
+parseModHeader opts file =
+ do --ePutStrLn file
+ (_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file)
+ case parsed of
+ Right mo -> return mo
+ Left (Pn l c,msg) ->
+ raise (file ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)
+
+parseSource opts p raw =
+ do (coding,utf8) <- toUTF8 opts raw
+ return (coding,runP p utf8)
+
+toUTF8 opts0 raw =
+ do opts <- getPragmas raw
+ let given = flag optEncoding opts -- explicitly given encoding
+ coding = getEncoding $ opts0 `addOptions` opts
+ utf8 <- if coding=="UTF-8"
+ then return raw
+ else lift $ do --ePutStrLn $ "toUTF8 from "++coding
+ enc <- mkTextEncoding coding
+ -- decodeUnicodeIO uses a lot of stack space,
+ -- so we need to split the file into smaller pieces
+ ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
+ return $ UTF8.fromString (unlines ls)
+ return (given,utf8)
+
+--lift io = ioe (fmap Ok io `catch` (return . Bad . show))
+lift io = liftIO io
+
-- | options can be passed to the compiler by comments in @--#@, in the main file
getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
getOptionsFromFile file = do
s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<<
liftIO (try $ BS.readFile file)
- let ls = filter (BS.isPrefixOf (BS.pack "--#")) $ BS.lines s
- fs = map (BS.unpack . BS.unwords . BS.words . BS.drop 3) ls
- parseModuleOptions fs
+ opts <- getPragmas s
+ -- The coding flag should not be inherited by other files
+ return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing}))
+
+
+getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options
+getPragmas = parseModuleOptions .
+ map (BS.unpack . BS.unwords . BS.words . BS.drop 3) .
+ filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines
-getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
-getFilePath paths file = liftIO $ get paths
+--getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
+getFilePath paths file =
+ liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file
+ get paths
where
get [] = return Nothing
get (p:ps) = do
diff --git a/src/compiler/GF/Grammar/Lexer.hs b/src/compiler/GF/Grammar/Lexer.hs
deleted file mode 100644
index ebb14a31f..000000000
--- a/src/compiler/GF/Grammar/Lexer.hs
+++ /dev/null
@@ -1,481 +0,0 @@
-{-# LANGUAGE CPP,MagicHash,BangPatterns #-}
-{-# LINE 1 "lexer/Lexer.x" #-}
-
-module GF.Grammar.Lexer
- ( Token(..), Posn(..)
- , P, runP, lexer, getPosn, failLoc
- , isReservedWord
- ) where
-
-import GF.Infra.Ident
-import qualified Data.ByteString.Char8 as BS
-import qualified Data.Map as Map
-
-
-#if __GLASGOW_HASKELL__ >= 603
-#include "ghcconfig.h"
-#elif defined(__GLASGOW_HASKELL__)
-#include "config.h"
-#endif
-#if __GLASGOW_HASKELL__ >= 503
-import Data.Array
-import Data.Char (ord)
-import Data.Array.Base (unsafeAt)
-#else
-import Array
-import Char (ord)
-#endif
-#if __GLASGOW_HASKELL__ >= 503
-import GHC.Exts
-#else
-import GlaExts
-#endif
-alex_base :: AlexAddr
-alex_base = AlexA# "\x01\x00\x00\x00\x38\x00\x00\x00\x39\x00\x00\x00\x17\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x44\x00\x00\x00\x45\x00\x00\x00\x19\x00\x00\x00\x24\x00\x00\x00\x25\x00\x00\x00\x4d\x00\x00\x00\x00\x00\x00\x00\x29\x00\x00\x00\x30\x00\x00\x00\xf8\xff\xff\xff\x26\x00\x00\x00\x9c\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\xd2\x00\x00\x00\x9e\x00\x00\x00\x6c\x01\x00\x00\x3c\x02\x00\x00\x00\x00\x00\x00\x17\x01\x00\x00\xe7\x01\x00\x00\xa5\x01\x00\x00\xf2\x00\x00\x00\xb7\x01\x00\x00\xff\x00\x00\x00\x0a\x01\x00\x00\x18\x01\x00\x00\x22\x01\x00\x00"#
-
-alex_table :: AlexAddr
-alex_table = AlexA# "\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0b\x00\x0c\x00\x19\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\x14\x00\x0c\x00\x0c\x00\x0d\x00\x0e\x00\x0c\x00\x03\x00\x0c\x00\x0c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x0c\x00\x0c\x00\x0c\x00\x10\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\xff\xff\x01\x00\x07\x00\x07\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x07\x00\x08\x00\x0c\x00\x0c\x00\x0c\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0b\x00\x0c\x00\x0c\x00\x0f\x00\x0c\x00\xff\xff\x11\x00\xff\xff\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x07\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x0c\x00\x0c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x04\x00\x05\x00\x17\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\xff\xff\x00\x00\xff\xff\x00\x00\xff\xff\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x12\x00\xff\xff\xff\xff\x13\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f\x00\xff\xff\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x1c\x00\x15\x00\x00\x00\x15\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x18\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x00\x00\x21\x00\x00\x00\x00\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x1e\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1b\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x17\x00\x1a\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\x1d\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x1b\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x00\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00\x17\x00"#
-
-alex_check :: AlexAddr
-alex_check = AlexA# "\xff\xff\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00\x07\x00\x08\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x0e\x00\x0f\x00\x10\x00\x11\x00\x12\x00\x13\x00\x14\x00\x15\x00\x16\x00\x17\x00\x18\x00\x19\x00\x1a\x00\x1b\x00\x1c\x00\x1d\x00\x1e\x00\x1f\x00\x20\x00\x21\x00\x22\x00\x23\x00\x24\x00\x25\x00\x26\x00\x27\x00\x28\x00\x29\x00\x2a\x00\x2b\x00\x2c\x00\x2d\x00\x2e\x00\x2f\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x3a\x00\x3b\x00\x3c\x00\x3d\x00\x3e\x00\x3f\x00\x40\x00\x0a\x00\x0a\x00\x2d\x00\x2d\x00\x2d\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x2d\x00\x2d\x00\x2a\x00\x5c\x00\x3e\x00\x09\x00\x0a\x00\x0b\x00\x0c\x00\x0d\x00\x2b\x00\x5b\x00\x5c\x00\x5d\x00\x5e\x00\x5f\x00\x60\x00\xff\xff\xff\xff\x3e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x20\x00\xff\xff\xff\xff\xff\xff\x2d\x00\x2d\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7b\x00\x7c\x00\x7d\x00\x7e\x00\x7f\x00\x80\x00\x81\x00\x82\x00\x83\x00\x84\x00\x85\x00\x86\x00\x87\x00\x88\x00\x89\x00\x8a\x00\x8b\x00\x8c\x00\x8d\x00\x8e\x00\x8f\x00\x90\x00\x91\x00\x92\x00\x93\x00\x94\x00\x95\x00\x96\x00\x97\x00\x98\x00\x99\x00\x9a\x00\x9b\x00\x9c\x00\x9d\x00\x9e\x00\x9f\x00\xa0\x00\xa1\x00\xa2\x00\xa3\x00\xa4\x00\xa5\x00\xa6\x00\xa7\x00\xa8\x00\xa9\x00\xaa\x00\xab\x00\xac\x00\xad\x00\xae\x00\xaf\x00\xb0\x00\xb1\x00\xb2\x00\xb3\x00\xb4\x00\xb5\x00\xb6\x00\xb7\x00\xb8\x00\xb9\x00\xba\x00\xbb\x00\xbc\x00\xbd\x00\xbe\x00\xbf\x00\x7d\x00\x7d\x00\x27\x00\xff\xff\x27\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xd7\x00\xff\xff\x0a\x00\xff\xff\x0a\x00\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x27\x00\xf7\x00\x27\x00\x5c\x00\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x2e\x00\x0a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x5c\x00\xff\xff\x5c\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x22\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\x2d\x00\xff\xff\xff\xff\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\x22\x00\xff\xff\xff\xff\xff\xff\x5f\x00\x27\x00\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\x0a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x5c\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x22\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x6e\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x74\x00\xff\xff\xff\xff\x65\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\x5c\x00\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\x27\x00\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00\x30\x00\x31\x00\x32\x00\x33\x00\x34\x00\x35\x00\x36\x00\x37\x00\x38\x00\x39\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x41\x00\x42\x00\x43\x00\x44\x00\x45\x00\x46\x00\x47\x00\x48\x00\x49\x00\x4a\x00\x4b\x00\x4c\x00\x4d\x00\x4e\x00\x4f\x00\x50\x00\x51\x00\x52\x00\x53\x00\x54\x00\x55\x00\x56\x00\x57\x00\x58\x00\x59\x00\x5a\x00\xff\xff\xff\xff\xff\xff\xff\xff\x5f\x00\xff\xff\x61\x00\x62\x00\x63\x00\x64\x00\x65\x00\x66\x00\x67\x00\x68\x00\x69\x00\x6a\x00\x6b\x00\x6c\x00\x6d\x00\x6e\x00\x6f\x00\x70\x00\x71\x00\x72\x00\x73\x00\x74\x00\x75\x00\x76\x00\x77\x00\x78\x00\x79\x00\x7a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\xc1\x00\xc2\x00\xc3\x00\xc4\x00\xc5\x00\xc6\x00\xc7\x00\xc8\x00\xc9\x00\xca\x00\xcb\x00\xcc\x00\xcd\x00\xce\x00\xcf\x00\xd0\x00\xd1\x00\xd2\x00\xd3\x00\xd4\x00\xd5\x00\xd6\x00\xff\xff\xd8\x00\xd9\x00\xda\x00\xdb\x00\xdc\x00\xdd\x00\xde\x00\xdf\x00\xe0\x00\xe1\x00\xe2\x00\xe3\x00\xe4\x00\xe5\x00\xe6\x00\xe7\x00\xe8\x00\xe9\x00\xea\x00\xeb\x00\xec\x00\xed\x00\xee\x00\xef\x00\xf0\x00\xf1\x00\xf2\x00\xf3\x00\xf4\x00\xf5\x00\xf6\x00\xff\xff\xf8\x00\xf9\x00\xfa\x00\xfb\x00\xfc\x00\xfd\x00\xfe\x00\xff\x00"#
-
-alex_deflt :: AlexAddr
-alex_deflt = AlexA# "\x16\x00\x02\x00\x02\x00\xff\xff\x09\x00\xff\xff\x09\x00\x09\x00\x09\x00\x09\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x13\x00\x13\x00\xff\xff\xff\xff\xff\xff\xff\xff\x1a\x00\x1a\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
-
-alex_accept = listArray (0::Int,33) [[],[(AlexAccSkip)],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAccSkip)],[],[],[],[],[(AlexAcc (alex_action_3))],[(AlexAccSkip)],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_3))],[(AlexAcc (alex_action_4))],[],[],[],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_5))],[(AlexAcc (alex_action_6))],[],[],[],[(AlexAcc (alex_action_7))],[(AlexAcc (alex_action_8))],[(AlexAcc (alex_action_8))],[],[],[]]
-{-# LINE 39 "lexer/Lexer.x" #-}
-
-
-tok f p s = f s
-
-data Token
- = T_exclmark
- | T_patt
- | T_int_label
- | T_oparen
- | T_cparen
- | T_tilde
- | T_star
- | T_starstar
- | T_plus
- | T_plusplus
- | T_comma
- | T_minus
- | T_rarrow
- | T_dot
- | T_alt
- | T_colon
- | T_semicolon
- | T_less
- | T_equal
- | T_big_rarrow
- | T_great
- | T_questmark
- | T_obrack
- | T_lam
- | T_lamlam
- | T_cbrack
- | T_ocurly
- | T_bar
- | T_ccurly
- | T_underscore
- | T_at
- | T_PType
- | T_Str
- | T_Strs
- | T_Tok
- | T_Type
- | T_abstract
- | T_case
- | T_cat
- | T_concrete
- | T_data
- | T_def
- | T_flags
- | T_fn
- | T_fun
- | T_in
- | T_incomplete
- | T_instance
- | T_interface
- | T_let
- | T_lin
- | T_lincat
- | T_lindef
- | T_linref
- | T_of
- | T_open
- | T_oper
- | T_param
- | T_pattern
- | T_pre
- | T_printname
- | T_resource
- | T_strs
- | T_table
- | T_transfer
- | T_variants
- | T_where
- | T_with
- | T_String String -- string literals
- | T_Integer Int -- integer literals
- | T_Double Double -- double precision float literals
- | T_Ident Ident
- | T_EOF
-
-eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
-eitherResIdent tv s =
- case Map.lookup s resWords of
- Just t -> t
- Nothing -> tv s
-
-isReservedWord :: BS.ByteString -> Bool
-isReservedWord s = Map.member s resWords
-
-resWords = Map.fromList
- [ b "!" T_exclmark
- , b "#" T_patt
- , b "$" T_int_label
- , b "(" T_oparen
- , b ")" T_cparen
- , b "~" T_tilde
- , b "*" T_star
- , b "**" T_starstar
- , b "+" T_plus
- , b "++" T_plusplus
- , b "," T_comma
- , b "-" T_minus
- , b "->" T_rarrow
- , b "." T_dot
- , b "/" T_alt
- , b ":" T_colon
- , b ";" T_semicolon
- , b "<" T_less
- , b "=" T_equal
- , b "=>" T_big_rarrow
- , b ">" T_great
- , b "?" T_questmark
- , b "[" T_obrack
- , b "]" T_cbrack
- , b "\\" T_lam
- , b "\\\\" T_lamlam
- , b "{" T_ocurly
- , b "}" T_ccurly
- , b "|" T_bar
- , b "_" T_underscore
- , b "@" T_at
- , b "PType" T_PType
- , b "Str" T_Str
- , b "Strs" T_Strs
- , b "Tok" T_Tok
- , b "Type" T_Type
- , b "abstract" T_abstract
- , b "case" T_case
- , b "cat" T_cat
- , b "concrete" T_concrete
- , b "data" T_data
- , b "def" T_def
- , b "flags" T_flags
- , b "fn" T_fn
- , b "fun" T_fun
- , b "in" T_in
- , b "incomplete" T_incomplete
- , b "instance" T_instance
- , b "interface" T_interface
- , b "let" T_let
- , b "lin" T_lin
- , b "lincat" T_lincat
- , b "lindef" T_lindef
- , b "linref" T_linref
- , b "of" T_of
- , b "open" T_open
- , b "oper" T_oper
- , b "param" T_param
- , b "pattern" T_pattern
- , b "pre" T_pre
- , b "printname" T_printname
- , b "resource" T_resource
- , b "strs" T_strs
- , b "table" T_table
- , b "transfer" T_transfer
- , b "variants" T_variants
- , b "where" T_where
- , b "with" T_with
- ]
- where b s t = (BS.pack s, t)
-
-unescapeInitTail :: String -> String
-unescapeInitTail = unesc . tail where
- unesc s = case s of
- '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
- '\\':'n':cs -> '\n' : unesc cs
- '\\':'t':cs -> '\t' : unesc cs
- '"':[] -> []
- '\'':[] -> []
- c:cs -> c : unesc cs
- _ -> []
-
--------------------------------------------------------------------
--- Alex wrapper code.
--- A modified "posn" wrapper.
--------------------------------------------------------------------
-
-data Posn = Pn {-# UNPACK #-} !Int
- {-# UNPACK #-} !Int
-
-alexMove :: Posn -> Char -> Posn
-alexMove (Pn l c) '\n' = Pn (l+1) 1
-alexMove (Pn l c) _ = Pn l (c+1)
-
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI p _ s) =
- case BS.uncons s of
- Nothing -> Nothing
- Just (c,s) ->
- let p' = alexMove p c
- in p' `seq` Just (c, (AI p' c s))
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI p c s) = c
-
-data AlexInput = AI {-# UNPACK #-} !Posn -- current position,
- {-# UNPACK #-} !Char -- previous char
- {-# UNPACK #-} !BS.ByteString -- current input string
-
-data ParseResult a
- = POk a
- | PFailed Posn -- The position of the error
- String -- The error message
-
-newtype P a = P { unP :: AlexInput -> ParseResult a }
-
-instance Monad P where
- return a = a `seq` (P $ \s -> POk a)
- (P m) >>= k = P $ \ s -> case m s of
- POk a -> unP (k a) s
- PFailed posn err -> PFailed posn err
- fail msg = P $ \(AI posn _ _) -> PFailed posn msg
-
-runP :: P a -> BS.ByteString -> Either (Posn,String) a
-runP (P f) txt =
- case f (AI (Pn 1 0) ' ' txt) of
- POk x -> Right x
- PFailed pos msg -> Left (pos,msg)
-
-failLoc :: Posn -> String -> P a
-failLoc pos msg = P $ \_ -> PFailed pos msg
-
-lexer :: (Token -> P a) -> P a
-lexer cont = P go
- where
- go inp@(AI pos _ str) =
- case alexScan inp 0 of
- AlexEOF -> unP (cont T_EOF) inp
- AlexError (AI pos _ _) -> PFailed pos "lexical error"
- AlexSkip inp' len -> go inp'
- AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
-
-getPosn :: P Posn
-getPosn = P $ \inp@(AI pos _ _) -> POk pos
-
-
-alex_action_3 = tok (eitherResIdent (T_Ident . identC . rawIdentC))
-alex_action_4 = tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack))
-alex_action_5 = tok (eitherResIdent (T_Ident . identC . rawIdentC))
-alex_action_6 = tok (T_String . unescapeInitTail . BS.unpack)
-alex_action_7 = tok (T_Integer . read . BS.unpack)
-alex_action_8 = tok (T_Double . read . BS.unpack)
-{-# LINE 1 "templates/GenericTemplate.hs" #-}
-{-# LINE 1 "templates/GenericTemplate.hs" #-}
-{-# LINE 1 "<built-in>" #-}
-{-# LINE 1 "<command-line>" #-}
-{-# LINE 1 "templates/GenericTemplate.hs" #-}
--- -----------------------------------------------------------------------------
--- ALEX TEMPLATE
---
--- This code is in the PUBLIC DOMAIN; you may copy it freely and use
--- it for any purpose whatsoever.
-
--- -----------------------------------------------------------------------------
--- INTERNALS and main scanner engine
-
-{-# LINE 37 "templates/GenericTemplate.hs" #-}
-
-{-# LINE 47 "templates/GenericTemplate.hs" #-}
-
-
-data AlexAddr = AlexA# Addr#
-
-#if __GLASGOW_HASKELL__ < 503
-uncheckedShiftL# = shiftL#
-#endif
-
-{-# INLINE alexIndexInt16OffAddr #-}
-alexIndexInt16OffAddr (AlexA# arr) off =
-#ifdef WORDS_BIGENDIAN
- narrow16Int# i
- where
- !i = word2Int# ((high `uncheckedShiftL#` 8#) `or#` low)
- !high = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#)))
- !low = int2Word# (ord# (indexCharOffAddr# arr off'))
- !off' = off *# 2#
-#else
- indexInt16OffAddr# arr off
-#endif
-
-
-
-
-
-{-# INLINE alexIndexInt32OffAddr #-}
-alexIndexInt32OffAddr (AlexA# arr) off =
-#ifdef WORDS_BIGENDIAN
- narrow32Int# i
- where
- !i = word2Int# ((b3 `uncheckedShiftL#` 24#) `or#`
- (b2 `uncheckedShiftL#` 16#) `or#`
- (b1 `uncheckedShiftL#` 8#) `or#` b0)
- !b3 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 3#)))
- !b2 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 2#)))
- !b1 = int2Word# (ord# (indexCharOffAddr# arr (off' +# 1#)))
- !b0 = int2Word# (ord# (indexCharOffAddr# arr off'))
- !off' = off *# 4#
-#else
- indexInt32OffAddr# arr off
-#endif
-
-
-
-
-
-#if __GLASGOW_HASKELL__ < 503
-quickIndex arr i = arr ! i
-#else
--- GHC >= 503, unsafeAt is available from Data.Array.Base.
-quickIndex = unsafeAt
-#endif
-
-
-
-
--- -----------------------------------------------------------------------------
--- Main lexing routines
-
-data AlexReturn a
- = AlexEOF
- | AlexError !AlexInput
- | AlexSkip !AlexInput !Int
- | AlexToken !AlexInput !Int a
-
--- alexScan :: AlexInput -> StartCode -> AlexReturn a
-alexScan input (I# (sc))
- = alexScanUser undefined input (I# (sc))
-
-alexScanUser user input (I# (sc))
- = case alex_scan_tkn user input 0# input sc AlexNone of
- (AlexNone, input') ->
- case alexGetChar input of
- Nothing ->
-
-
-
- AlexEOF
- Just _ ->
-
-
-
- AlexError input'
-
- (AlexLastSkip input'' len, _) ->
-
-
-
- AlexSkip input'' len
-
- (AlexLastAcc k input''' len, _) ->
-
-
-
- AlexToken input''' len k
-
-
--- Push the input through the DFA, remembering the most recent accepting
--- state it encountered.
-
-alex_scan_tkn user orig_input len input s last_acc =
- input `seq` -- strict in the input
- let
- new_acc = check_accs (alex_accept `quickIndex` (I# (s)))
- in
- new_acc `seq`
- case alexGetChar input of
- Nothing -> (new_acc, input)
- Just (c, new_input) ->
-
-
-
- let
- (!(base)) = alexIndexInt32OffAddr alex_base s
- (!((I# (ord_c)))) = ord c
- (!(offset)) = (base +# ord_c)
- (!(check)) = alexIndexInt16OffAddr alex_check offset
-
- (!(new_s)) = if (offset >=# 0#) && (check ==# ord_c)
- then alexIndexInt16OffAddr alex_table offset
- else alexIndexInt16OffAddr alex_deflt s
- in
- case new_s of
- -1# -> (new_acc, input)
- -- on an error, we want to keep the input *before* the
- -- character that failed, not after.
- _ -> alex_scan_tkn user orig_input (len +# 1#)
- new_input new_s new_acc
-
- where
- check_accs [] = last_acc
- check_accs (AlexAcc a : _) = AlexLastAcc a input (I# (len))
- check_accs (AlexAccSkip : _) = AlexLastSkip input (I# (len))
- check_accs (AlexAccPred a predx : rest)
- | predx user orig_input (I# (len)) input
- = AlexLastAcc a input (I# (len))
- check_accs (AlexAccSkipPred predx : rest)
- | predx user orig_input (I# (len)) input
- = AlexLastSkip input (I# (len))
- check_accs (_ : rest) = check_accs rest
-
-data AlexLastAcc a
- = AlexNone
- | AlexLastAcc a !AlexInput !Int
- | AlexLastSkip !AlexInput !Int
-
-data AlexAcc a user
- = AlexAcc a
- | AlexAccSkip
- | AlexAccPred a (AlexAccPred user)
- | AlexAccSkipPred (AlexAccPred user)
-
-type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool
-
--- -----------------------------------------------------------------------------
--- Predicates on a rule
-
-alexAndPred p1 p2 user in1 len in2
- = p1 user in1 len in2 && p2 user in1 len in2
-
---alexPrevCharIsPred :: Char -> AlexAccPred _
-alexPrevCharIs c _ input _ _ = c == alexInputPrevChar input
-
---alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _
-alexPrevCharIsOneOf arr _ input _ _ = arr ! alexInputPrevChar input
-
---alexRightContext :: Int -> AlexAccPred _
-alexRightContext (I# (sc)) user _ _ input =
- case alex_scan_tkn user input 0# input sc AlexNone of
- (AlexNone, _) -> False
- _ -> True
- -- TODO: there's no need to find the longest
- -- match when checking the right context, just
- -- the first match will do.
-
--- used by wrappers
-iUnbox (I# (i)) = i
diff --git a/src/compiler/GF/Grammar/lexer/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index 460e7f452..60c51f814 100644
--- a/src/compiler/GF/Grammar/lexer/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -1,3 +1,4 @@
+-- -*- haskell -*-
{
module GF.Grammar.Lexer
( Token(..), Posn(..)
@@ -6,9 +7,14 @@ module GF.Grammar.Lexer
) where
import GF.Infra.Ident
+--import GF.Data.Operations
import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString as WBS
+import qualified Data.ByteString.Internal as BS(w2c)
+import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map
-
+import Data.Word(Word8)
+--import Debug.Trace(trace)
}
@@ -17,7 +23,7 @@ $c = [A-Z\192-\221] # [\215]
$s = [a-z\222-\255] # [\247]
$d = [0-9] -- digit
$i = [$l $d _ '] -- identifier character
-$u = [\0-\255] -- universal: any character
+$u = [.\n] -- universal: any character
@rsyms = -- symbols and non-identifier-like reserved words
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
@@ -27,17 +33,20 @@ $u = [\0-\255] -- universal: any character
"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
$white+ ;
-@rsyms { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
-\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . BS.unpack)) }
-(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC . rawIdentC)) }
+@rsyms { tok (res (T_Ident . identS)) }
+\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (eitherResIdent (T_Ident . identC . rawIdentS . unescapeInitTail . unpack)) }
+(\_ | $l)($l | $d | \_ | \')* { tok (res (T_Ident . identS)) }
-\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) }
+\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
-(\-)? $d+ { tok (T_Integer . read . BS.unpack) }
-(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) }
+(\-)? $d+ { tok (T_Integer . read . unpack) }
+(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
{
+--unpack = BS.unpack
+unpack = id
+tok :: (String->Token) -> Posn -> String -> Token
tok f p s = f s
data Token
@@ -114,15 +123,17 @@ data Token
| T_Double Double -- double precision float literals
| T_Ident Ident
| T_EOF
+-- deriving Show -- debug
-eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
+res = eitherResIdent
+eitherResIdent :: (String -> Token) -> String -> Token
eitherResIdent tv s =
case Map.lookup s resWords of
Just t -> t
Nothing -> tv s
isReservedWord :: BS.ByteString -> Bool
-isReservedWord s = Map.member s resWords
+isReservedWord s = Map.member (BS.unpack s) resWords
resWords = Map.fromList
[ b "!" T_exclmark
@@ -194,7 +205,7 @@ resWords = Map.fromList
, b "where" T_where
, b "with" T_with
]
- where b s t = (BS.pack s, t)
+ where b s t = (s, t)
unescapeInitTail :: String -> String
unescapeInitTail = unesc . tail where
@@ -202,7 +213,7 @@ unescapeInitTail = unesc . tail where
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
'\\':'n':cs -> '\n' : unesc cs
'\\':'t':cs -> '\t' : unesc cs
- '"':[] -> []
+ '\"':[] -> []
'\'':[] -> []
c:cs -> c : unesc cs
_ -> []
@@ -219,13 +230,14 @@ alexMove :: Posn -> Char -> Posn
alexMove (Pn l c) '\n' = Pn (l+1) 1
alexMove (Pn l c) _ = Pn l (c+1)
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI p _ s) =
- case BS.uncons s of
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (AI p _ s) =
+ case WBS.uncons s of
Nothing -> Nothing
- Just (c,s) ->
+ Just (w,s) ->
let p' = alexMove p c
- in p' `seq` Just (c, (AI p' c s))
+ c = BS.w2c w
+ in p' `seq` Just (w, (AI p' c s))
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (AI p c s) = c
@@ -260,12 +272,13 @@ failLoc pos msg = P $ \_ -> PFailed pos msg
lexer :: (Token -> P a) -> P a
lexer cont = P go
where
+ --cont' t = trace (show t) (cont t)
go inp@(AI pos _ str) =
case alexScan inp 0 of
AlexEOF -> unP (cont T_EOF) inp
AlexError (AI pos _ _) -> PFailed pos "lexical error"
- AlexSkip inp' len -> go inp'
- AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
+ AlexSkip inp' len -> {-trace (show len) $-} go inp'
+ AlexToken inp' len act -> unP (cont (act pos (UTF8.toString (UTF8.take len str)))) inp'
getPosn :: P Posn
getPosn = P $ \inp@(AI pos _ _) -> POk pos
diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y
index bc2a394b1..028da18c6 100644
--- a/src/compiler/GF/Grammar/Parser.y
+++ b/src/compiler/GF/Grammar/Parser.y
@@ -17,8 +17,8 @@ import GF.Grammar.Grammar
import GF.Grammar.Macros
import GF.Grammar.Lexer
import GF.Compile.Update (buildAnyTree)
-import Codec.Binary.UTF8.String(decodeString)
-import Data.Char(toLower)
+--import Codec.Binary.UTF8.String(decodeString)
+--import Data.Char(toLower)
}
%name pModDef ModDef
@@ -616,9 +616,9 @@ happyError = fail "syntax error"
-- Quick fix to render error messages from UTF-8-encoded source files correctly.
optDecode opts =
- if map toLower (flag optEncoding opts) `elem` ["utf8","utf-8"]
+ {-if map toLower (getEncoding opts) `elem` ["utf8","utf-8"]
then decodeString
- else id
+ else-} id
mkListId,mkConsId,mkBaseId :: Ident -> Ident
mkListId = prefixIdent "List"
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index 115665419..08f0df18b 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -20,7 +20,7 @@ module GF.Infra.Option
helpMessage,
-- * Checking specific options
flag, cfgTransform, haskellOption, readOutputFormat,
- isLexicalCat, isLiteralCat, renameEncoding,
+ isLexicalCat, isLiteralCat, renameEncoding, getEncoding, defaultEncoding,
-- * Setting specific options
setOptimization, setCFGTransform,
-- * Convenience methods for checking options
@@ -157,7 +157,7 @@ data Flags = Flags {
optRetainResource :: Bool,
optName :: Maybe String,
optPreprocessors :: [String],
- optEncoding :: String,
+ optEncoding :: Maybe String,
optPMCFG :: Bool,
optOptimizations :: Set Optimization,
optOptimizePGF :: Bool,
@@ -213,7 +213,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- | Pretty-print the options that are preserved in .gfo files.
optionsGFO :: Options -> [(String,Literal)]
optionsGFO opts = optionsPGF opts
- ++ [("coding", LStr (flag optEncoding opts))]
+ ++ [("coding", LStr (getEncoding opts))]
-- | Pretty-print the options that are preserved in .pgf files.
optionsPGF :: Options -> [(String,Literal)]
@@ -241,6 +241,10 @@ concatOptions = foldr addOptions noOptions
modifyFlags :: (Flags -> Flags) -> Options
modifyFlags = Options
+getEncoding :: Options -> String
+getEncoding = renameEncoding . maybe defaultEncoding id . flag optEncoding
+defaultEncoding = "UTF-8"
+
-- Default options
defaultFlags :: Flags
@@ -264,7 +268,7 @@ defaultFlags = Flags {
optName = Nothing,
optPreprocessors = [],
- optEncoding = "latin1",
+ optEncoding = Nothing,
optPMCFG = True,
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optOptimizePGF = False,
@@ -419,7 +423,7 @@ optDescr =
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
- coding x = set $ \o -> o { optEncoding = x }
+ coding x = set $ \o -> o { optEncoding = Just x }
startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x }
diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs
index e347730e0..3669733d0 100644
--- a/src/compiler/GF/Text/Coding.hs
+++ b/src/compiler/GF/Text/Coding.hs
@@ -31,7 +31,7 @@ encodeUnicode enc s =
(cbuf,bbuf) <- cod cbuf bbuf
#endif
if isEmptyBuffer bbuf
- then ioe_invalidCharacter
+ then ioe_invalidCharacter1
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
bss <- translate cod cbuf
return (bs:bss)
@@ -41,8 +41,9 @@ encodeUnicode enc s =
w = bufR cbuf
decodeUnicode :: TextEncoding -> ByteString -> String
-decodeUnicode enc (PS fptr l len) =
- unsafePerformIO $ do
+decodeUnicode enc bs = unsafePerformIO $ decodeUnicodeIO enc bs
+
+decodeUnicodeIO enc (PS fptr l len) = do
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
cbuf <- newCharBuffer 128 WriteBuffer
case enc of
@@ -59,7 +60,7 @@ decodeUnicode enc (PS fptr l len) =
(bbuf,cbuf) <- cod bbuf cbuf
#endif
if isEmptyBuffer cbuf
- then ioe_invalidCharacter
+ then ioe_invalidCharacter2
else unpack cod bbuf cbuf
| otherwise = return []
where
@@ -75,6 +76,10 @@ decodeUnicode enc (PS fptr l len) =
i = bufL cbuf
w = bufR cbuf
-ioe_invalidCharacter = ioException
+ioe_invalidCharacter1 = ioException
(IOError Nothing InvalidArgument ""
("invalid byte sequence for this encoding") Nothing Nothing)
+
+ioe_invalidCharacter2 = ioException
+ (IOError Nothing InvalidArgument ""
+ ("invalid byte sequence for this decoding") Nothing Nothing)