summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2010-04-19 09:38:36 +0000
committerkrasimir <krasimir@chalmers.se>2010-04-19 09:38:36 +0000
commit6313244eacf992fb10a5091bee28582e84540809 (patch)
tree8208fb18a5e1ab9447bd060cf08a3d78ed0a8c0a
parent8b5827fc892c2f395ae26f1811da2d4cc3b1669d (diff)
use the native unicode support from GHC 6.12
-rw-r--r--GF.cabal1
-rw-r--r--src/compiler/GF.hs4
-rw-r--r--src/compiler/GF/Command/Commands.hs49
-rw-r--r--src/compiler/GF/Command/Interpreter.hs28
-rw-r--r--src/compiler/GF/Compile.hs6
-rw-r--r--src/compiler/GF/Compile/Coding.hs10
-rw-r--r--src/compiler/GF/Compile/PGFtoHaskell.hs3
-rw-r--r--src/compiler/GF/Compile/PGFtoJS.hs3
-rw-r--r--src/compiler/GF/Compile/PGFtoProlog.hs1
-rw-r--r--src/compiler/GF/Data/XML.hs3
-rw-r--r--src/compiler/GF/Grammar/Parser.y4
-rw-r--r--src/compiler/GF/Infra/Option.hs43
-rw-r--r--src/compiler/GF/Quiz.hs16
-rw-r--r--src/compiler/GF/Text/CP1250.hs91
-rw-r--r--src/compiler/GF/Text/CP1251.hs86
-rw-r--r--src/compiler/GF/Text/CP1252.hs17
-rw-r--r--src/compiler/GF/Text/CP1254.hs84
-rw-r--r--src/compiler/GF/Text/Coding.hs85
-rw-r--r--src/compiler/GF/Text/Lexing.hs6
-rw-r--r--src/compiler/GF/Text/Transliterations.hs2
-rw-r--r--src/compiler/GF/Text/UTF8.hs48
-rw-r--r--src/compiler/GFC.hs11
-rw-r--r--src/compiler/GFI.hs66
23 files changed, 177 insertions, 490 deletions
diff --git a/GF.cabal b/GF.cabal
index 2b3fd725d..7a4c990fc 100644
--- a/GF.cabal
+++ b/GF.cabal
@@ -89,7 +89,6 @@ executable gf
GF.JavaScript.AbsJS
GF.JavaScript.PrintJS
GF.Infra.CompactPrint
- GF.Text.UTF8
GF.Data.TrieMap
GF.Data.Utilities
GF.Data.SortedList
diff --git a/src/compiler/GF.hs b/src/compiler/GF.hs
index 32a95ca1f..503253589 100644
--- a/src/compiler/GF.hs
+++ b/src/compiler/GF.hs
@@ -24,6 +24,10 @@ main = do
codepage <- getACP
setConsoleCP codepage
setConsoleOutputCP codepage
+ enc <- mkTextEncoding ("CP"++show codepage)
+ hSetEncoding stdin enc
+ hSetEncoding stdout enc
+ hSetEncoding stderr enc
#endif
args <- getArgs
case parseOptions args of
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 0ca54839c..00fc8305b 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -24,7 +24,7 @@ import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabiliti
import PGF.Generate (generateRandomFrom) ----
import GF.Compile.Export
import GF.Compile.ExampleBased
-import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
+import GF.Infra.Option (noOptions, readOutputFormat)
import GF.Infra.UseIO
import GF.Data.ErrM ----
import GF.Command.Abstract
@@ -36,7 +36,6 @@ import GF.Quiz
import GF.Command.TreeOperations ---- temporary place for typecheck and compute
import GF.Data.Operations
-import GF.Text.Coding
import Data.List
import Data.Maybe
@@ -77,10 +76,10 @@ emptyCommandInfo = CommandInfo {
lookCommand :: String -> Map.Map String CommandInfo -> Maybe CommandInfo
lookCommand = Map.lookup
-commandHelpAll :: Encoding -> PGFEnv -> [Option] -> String
-commandHelpAll cod pgf opts = unlines
+commandHelpAll :: PGFEnv -> [Option] -> String
+commandHelpAll pgf opts = unlines
[commandHelp (isOpt "full" opts) (co,info)
- | (co,info) <- Map.assocs (allCommands cod pgf)]
+ | (co,info) <- Map.assocs (allCommands pgf)]
commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [
@@ -120,8 +119,8 @@ commandHelpTags full (co,info) = unlines $ [
type PGFEnv = (PGF, Map.Map Language Morpho)
-- this list must no more be kept sorted by the command name
-allCommands :: Encoding -> PGFEnv -> Map.Map String CommandInfo
-allCommands cod env@(pgf, mos) = Map.fromList [
+allCommands :: PGFEnv -> Map.Map String CommandInfo
+allCommands env@(pgf, mos) = Map.fromList [
("!", emptyCommandInfo {
synopsis = "system command: escape to system shell",
syntax = "! SYSTEMCOMMAND",
@@ -156,7 +155,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grph." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
- writeFile (file "dot") (enc grph)
+ writeFile (file "dot") grph
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
@@ -365,10 +364,10 @@ allCommands cod env@(pgf, mos) = Map.fromList [
_ | isOpt "coding" opts -> codingMsg
_ | isOpt "license" opts -> licenseMsg
[t] -> let co = getCommandOp (showExpr [] t) in
- case lookCommand co (allCommands cod env) of ---- new map ??!!
+ case lookCommand co (allCommands env) of ---- new map ??!!
Just info -> commandHelp True (co,info)
_ -> "command not found"
- _ -> commandHelpAll cod env opts
+ _ -> commandHelpAll env opts
in return (fromString msg),
needsTypeCheck = False
}),
@@ -458,7 +457,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let typ = optType opts
mprobs <- optProbs opts pgf
let mt = mexp xs
- morphologyQuiz mt mprobs cod pgf lang typ
+ morphologyQuiz mt mprobs pgf lang typ
return void,
flags = [
("lang","language of the quiz"),
@@ -656,7 +655,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let typ = optType opts
let mt = mexp xs
mprobs <- optProbs opts pgf
- translationQuiz mt mprobs cod pgf from to typ
+ translationQuiz mt mprobs pgf from to typ
return void,
flags = [
("from","translate from this language"),
@@ -687,7 +686,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts arg -> do
let tmpi = "_tmpi" ---
let tmpo = "_tmpo"
- writeFile tmpi $ enc $ toString arg
+ writeFile tmpi $ toString arg
let syst = optComm opts ++ " " ++ tmpi
system $ syst ++ " <" ++ tmpi ++ " >" ++ tmpo
s <- readFile tmpo
@@ -738,7 +737,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grphd." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
- writeFile (file "dot") (enc grphs)
+ writeFile (file "dot") grphs
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
@@ -779,7 +778,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grph." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
- writeFile (file "dot") (enc grph)
+ writeFile (file "dot") grph
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
@@ -819,7 +818,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file s = "_grph." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
- writeFile (file "dot") (enc grph)
+ writeFile (file "dot") grph
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
@@ -844,8 +843,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts arg -> do
let file = valStrOpts "file" "_gftmp" opts
if isOpt "append" opts
- then appendFile file (enc (toString arg))
- else writeFile file (enc (toString arg))
+ then appendFile file (toString arg)
+ else writeFile file (toString arg)
return void,
options = [
("append","append to file, instead of overwriting it")
@@ -889,8 +888,6 @@ allCommands cod env@(pgf, mos) = Map.fromList [
})
]
where
- enc = encodeUnicode cod
-
par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
@@ -1063,17 +1060,17 @@ stringOpOptions = sort $ [
treeOpOptions pgf = [(op,expl) | (op,(expl,Left _)) <- allTreeOps pgf]
treeOpFlags pgf = [(op,expl) | (op,(expl,Right _)) <- allTreeOps pgf]
-translationQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding ->
+translationQuiz :: Maybe Expr -> Maybe Probabilities ->
PGF -> Language -> Language -> Type -> IO ()
-translationQuiz mex mprobs cod pgf ig og typ = do
+translationQuiz mex mprobs pgf ig og typ = do
tts <- translationList mex mprobs pgf ig og typ infinity
- mkQuiz cod "Welcome to GF Translation Quiz." tts
+ mkQuiz "Welcome to GF Translation Quiz." tts
-morphologyQuiz :: Maybe Expr -> Maybe Probabilities -> Encoding ->
+morphologyQuiz :: Maybe Expr -> Maybe Probabilities ->
PGF -> Language -> Type -> IO ()
-morphologyQuiz mex mprobs cod pgf ig typ = do
+morphologyQuiz mex mprobs pgf ig typ = do
tts <- morphologyList mex mprobs pgf ig typ infinity
- mkQuiz cod "Welcome to GF Morphology Quiz." tts
+ mkQuiz "Welcome to GF Morphology Quiz." tts
-- | the maximal number of precompiled quiz problems
infinity :: Int
diff --git a/src/compiler/GF/Command/Interpreter.hs b/src/compiler/GF/Command/Interpreter.hs
index ff84da8a3..4f146bb93 100644
--- a/src/compiler/GF/Command/Interpreter.hs
+++ b/src/compiler/GF/Command/Interpreter.hs
@@ -29,24 +29,24 @@ data CommandEnv = CommandEnv {
expmacros :: Map.Map String Expr
}
-mkCommandEnv :: Encoding -> PGF -> CommandEnv
-mkCommandEnv enc pgf =
+mkCommandEnv :: PGF -> CommandEnv
+mkCommandEnv pgf =
let mos = Map.fromList [(la,buildMorpho pgf la) | la <- languages pgf] in
- CommandEnv pgf mos (allCommands enc (pgf, mos)) Map.empty Map.empty
+ CommandEnv pgf mos (allCommands (pgf, mos)) Map.empty Map.empty
emptyCommandEnv :: CommandEnv
-emptyCommandEnv = mkCommandEnv UTF_8 emptyPGF
+emptyCommandEnv = mkCommandEnv emptyPGF
-interpretCommandLine :: (String -> String) -> CommandEnv -> String -> IO ()
-interpretCommandLine enc env line =
+interpretCommandLine :: CommandEnv -> String -> IO ()
+interpretCommandLine env line =
case readCommandLine line of
Just [] -> return ()
- Just pipes -> mapM_ (interpretPipe enc env) pipes
+ Just pipes -> mapM_ (interpretPipe env) pipes
Nothing -> putStrLnFlush "command not parsed"
-interpretPipe enc env cs = do
+interpretPipe env cs = do
v@(_,s) <- intercs ([],"") cs
- putStrLnFlush $ enc s
+ putStrLnFlush s
return v
where
intercs treess [] = return treess
@@ -57,14 +57,14 @@ interpretPipe enc env cs = do
'%':f -> case Map.lookup f (commandmacros env) of
Just css ->
case getCommandTrees env False arg es of
- Right es -> do mapM_ (interpretPipe enc env) (appLine es css)
+ Right es -> do mapM_ (interpretPipe env) (appLine es css)
return ([],[])
Left msg -> do putStrLn ('\n':msg)
return ([],[])
Nothing -> do
putStrLn $ "command macro " ++ co ++ " not interpreted"
return ([],[])
- _ -> interpret enc env es comm
+ _ -> interpret env es comm
appLine es = map (map (appCommand es))
-- macro definition applications: replace ?i by (exps !! i)
@@ -81,14 +81,14 @@ appCommand xs c@(Command i os arg) = case arg of
EFun x -> EFun x
-- return the trees to be sent in pipe, and the output possibly printed
-interpret :: (String -> String) -> CommandEnv -> [Expr] -> Command -> IO CommandOutput
-interpret enc env trees comm =
+interpret :: CommandEnv -> [Expr] -> Command -> IO CommandOutput
+interpret env trees comm =
case getCommand env trees comm of
Left msg -> do putStrLn ('\n':msg)
return ([],[])
Right (info,opts,trees) -> do tss@(_,s) <- exec info opts trees
if isOpt "tr" opts
- then putStrLn (enc s)
+ then putStrLn s
else return ()
return tss
diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs
index a862f85e2..1aebeaf31 100644
--- a/src/compiler/GF/Compile.hs
+++ b/src/compiler/GF/Compile.hs
@@ -12,7 +12,6 @@ import GF.Compile.Update
import GF.Compile.Refresh
import GF.Compile.Coding
-import GF.Text.UTF8 ----
import GF.Grammar.Grammar
import GF.Grammar.Lookup
@@ -82,7 +81,7 @@ compileSourceGrammar opts gr@(MGrammar ms) = do
-- to output an intermediate stage
intermOut :: Options -> Dump -> Doc -> IOE ()
intermOut opts d doc
- | dump opts d = ioeIO (hPutStrLn stderr (encodeUTF8 (render (text "\n\n--#" <+> text (show d) $$ doc))))
+ | dump opts d = ioeIO (hPutStrLn stderr (render (text "\n\n--#" <+> text (show d) $$ doc)))
| otherwise = return ()
-- | the environment
@@ -162,7 +161,8 @@ compileOne opts env@(_,srcgr,_) file = do
sm00 <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ") $
getSourceModule opts file
- let sm0 = decodeStringsInModule sm00
+ enc <- ioeIO $ mkTextEncoding (renameEncoding (flag optEncoding (flagsModule sm00)))
+ let sm0 = decodeStringsInModule enc sm00
intermOut opts DumpSource (ppModule Qualified sm0)
diff --git a/src/compiler/GF/Compile/Coding.hs b/src/compiler/GF/Compile/Coding.hs
index b909aac7d..01285eef1 100644
--- a/src/compiler/GF/Compile/Coding.hs
+++ b/src/compiler/GF/Compile/Coding.hs
@@ -8,12 +8,14 @@ import GF.Infra.Option
import GF.Data.Operations
import Data.Char
+import System.IO
+import qualified Data.ByteString.Char8 as BS
-encodeStringsInModule :: SourceModule -> SourceModule
-encodeStringsInModule = codeSourceModule (encodeUnicode UTF_8)
+encodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule
+encodeStringsInModule enc = codeSourceModule (BS.unpack . encodeUnicode enc)
-decodeStringsInModule :: SourceModule -> SourceModule
-decodeStringsInModule mo = codeSourceModule (decodeUnicode (flag optEncoding (flagsModule mo))) mo
+decodeStringsInModule :: TextEncoding -> SourceModule -> SourceModule
+decodeStringsInModule enc mo = codeSourceModule (decodeUnicode enc . BS.pack) mo
codeSourceModule :: (String -> String) -> SourceModule -> SourceModule
codeSourceModule co (id,mo) = (id,replaceJudgements mo (mapTree codj (jments mo)))
diff --git a/src/compiler/GF/Compile/PGFtoHaskell.hs b/src/compiler/GF/Compile/PGFtoHaskell.hs
index 252fc95ee..ecc70cb5e 100644
--- a/src/compiler/GF/Compile/PGFtoHaskell.hs
+++ b/src/compiler/GF/Compile/PGFtoHaskell.hs
@@ -22,7 +22,6 @@ import PGF.Macros
import GF.Data.Operations
import GF.Infra.Option
-import GF.Text.UTF8
import Data.List --(isPrefixOf, find, intersperse)
import qualified Data.Map as Map
@@ -34,7 +33,7 @@ grammar2haskell :: Options
-> String -- ^ Module name.
-> PGF
-> String
-grammar2haskell opts name gr = encodeUTF8 $ foldr (++++) [] $
+grammar2haskell opts name gr = foldr (++++) [] $
pragmas ++ haskPreamble name ++ [types, gfinstances gId lexical gr']
where gr' = hSkeleton gr
gadt = haskellOption opts HaskellGADT
diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs
index bb29ff7c5..f6725bf4f 100644
--- a/src/compiler/GF/Compile/PGFtoJS.hs
+++ b/src/compiler/GF/Compile/PGFtoJS.hs
@@ -6,7 +6,6 @@ import qualified PGF.Macros as M
import qualified GF.JavaScript.AbsJS as JS
import qualified GF.JavaScript.PrintJS as JS
-import GF.Text.UTF8
import GF.Data.ErrM
import GF.Infra.Option
@@ -21,7 +20,7 @@ import qualified Data.IntMap as IntMap
pgf2js :: PGF -> String
pgf2js pgf =
- encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]]
+ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]]
where
n = showCId $ absname pgf
as = abstract pgf
diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs
index 3a5df0256..9e390e87b 100644
--- a/src/compiler/GF/Compile/PGFtoProlog.hs
+++ b/src/compiler/GF/Compile/PGFtoProlog.hs
@@ -15,7 +15,6 @@ import PGF.Data
import PGF.Macros
import GF.Data.Operations
-import GF.Text.UTF8
import qualified Data.Map as Map
import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, ord)
diff --git a/src/compiler/GF/Data/XML.hs b/src/compiler/GF/Data/XML.hs
index bdc6f98a1..4d4a3e8ca 100644
--- a/src/compiler/GF/Data/XML.hs
+++ b/src/compiler/GF/Data/XML.hs
@@ -7,7 +7,6 @@
module GF.Data.XML (XML(..), Attr, comments, showXMLDoc, showsXMLDoc, showsXML, bottomUpXML) where
import GF.Data.Utilities
-import GF.Text.UTF8
data XML = Data String | CData String | Tag String [Attr] [XML] | ETag String [Attr] | Comment String | Empty
deriving (Ord,Eq,Show)
@@ -21,7 +20,7 @@ showXMLDoc :: XML -> String
showXMLDoc xml = showsXMLDoc xml ""
showsXMLDoc :: XML -> ShowS
-showsXMLDoc xml = encodeUTF8 . showString header . showsXML xml
+showsXMLDoc xml = showString header . showsXML xml
where header = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
showsXML :: XML -> ShowS
diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y
index 3be1b3519..867776607 100644
--- a/src/compiler/GF/Grammar/Parser.y
+++ b/src/compiler/GF/Grammar/Parser.y
@@ -441,9 +441,7 @@ Exp6
| '?' { Meta 0 }
| '[' ']' { Empty }
| '[' Ident Exps ']' { foldl App (Vr (mkListId $2)) $3 }
- | '[' String ']' { case $2 of
- [] -> Empty
- str -> foldr1 C (map K (words str)) }
+ | '[' String ']' { K $2 }
| '{' ListLocDef '}' {% mkR $2 }
| '<' ListTupleComp '>' { R (tuple2record $2) }
| '<' Exp ':' Exp '>' { Typed $2 $4 }
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index 24b967aff..ee8d76b45 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -3,7 +3,7 @@ module GF.Infra.Option
-- * Option types
Options,
Flags(..),
- Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..),
+ Mode(..), Phase(..), Verbosity(..), OutputFormat(..),
SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
Dump(..), Printer(..), Recomp(..),
-- * Option parsing
@@ -17,7 +17,7 @@ module GF.Infra.Option
helpMessage,
-- * Checking specific options
flag, cfgTransform, haskellOption, readOutputFormat,
- isLexicalCat, encodings,
+ isLexicalCat, renameEncoding,
-- * Setting specific options
setOptimization, setCFGTransform,
-- * Convenience methods for checking options
@@ -25,12 +25,13 @@ module GF.Infra.Option
) where
import Control.Monad
-import Data.Char (toLower)
+import Data.Char (toLower, isDigit)
import Data.List
import Data.Maybe
import GF.Infra.GetOpt
--import System.Console.GetOpt
import System.FilePath
+import System.IO
import GF.Data.ErrM
@@ -77,9 +78,6 @@ data Verbosity = Quiet | Normal | Verbose | Debug
data Phase = Preproc | Convert | Compile | Link
deriving (Show,Eq,Ord)
-data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254
- deriving (Eq,Ord)
-
data OutputFormat = FmtPGFPretty
| FmtJavaScript
| FmtHaskell
@@ -161,7 +159,7 @@ data Flags = Flags {
optCncName :: Maybe String,
optResName :: Maybe String,
optPreprocessors :: [String],
- optEncoding :: Encoding,
+ optEncoding :: String,
optOptimizations :: Set Optimization,
optCFGTransforms :: Set CFGTransform,
optLibraryPath :: [FilePath],
@@ -207,7 +205,7 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- | Pretty-print the options that are preserved in .gfo files.
optionsGFO :: Options -> [(String,String)]
optionsGFO opts = optionsPGF opts
- ++ [("coding", show (flag optEncoding opts))]
+ ++ [("coding", flag optEncoding opts)]
-- | Pretty-print the options that are preserved in .pgf files.
optionsPGF :: Options -> [(String,String)]
@@ -260,7 +258,7 @@ defaultFlags = Flags {
optCncName = Nothing,
optResName = Nothing,
optPreprocessors = [],
- optEncoding = ISO_8859_1,
+ optEncoding = "latin1",
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter,
CFGTopDownFilter, CFGMergeIdentical],
@@ -343,8 +341,7 @@ optDescr =
(unlines ["Use CMD to preprocess input files.",
"Multiple preprocessors can be used by giving this option multiple times."]),
Option [] ["coding"] (ReqArg coding "ENCODING")
- ("Character encoding of the source grammar, ENCODING = "
- ++ concat (intersperse " | " (map fst encodings)) ++ "."),
+ ("Character encoding of the source grammar, ENCODING = utf8, latin1, cp1251, ..."),
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.",
@@ -400,9 +397,7 @@ optDescr =
addLibDir x = set $ \o -> o { optLibraryPath = x:optLibraryPath o }
setLibPath x = set $ \o -> o { optLibraryPath = splitInModuleSearchPath x }
preproc x = set $ \o -> o { optPreprocessors = optPreprocessors o ++ [x] }
- coding x = case lookup x encodings of
- Just c -> set $ \o -> o { optEncoding = c }
- Nothing -> fail $ "Unknown character encoding: " ++ x
+ coding x = set $ \o -> o { optEncoding = x }
startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x }
@@ -483,18 +478,14 @@ haskellOptionNames =
("gadt", HaskellGADT),
("lexical", HaskellLexical)]
-encodings :: [(String,Encoding)]
-encodings =
- [("utf8", UTF_8),
- ("cp1250", CP_1250),
- ("cp1251", CP_1251),
- ("cp1252", CP_1252),
- ("cp1254", CP_1254),
- ("latin1", ISO_8859_1)
- ]
-
-instance Show Encoding where
- show = lookupShow encodings
+-- | This is for bacward compatibility. Since GHC 6.12 we
+-- started using the native Unicode support in GHC but it
+-- uses different names for the code pages.
+renameEncoding :: String -> String
+renameEncoding "utf8" = "UTF-8"
+renameEncoding "latin1" = "CP1252"
+renameEncoding ('c':'p':s) | all isDigit s = 'C':'P':s
+renameEncoding s = s
lookupShow :: Eq a => [(String,a)] -> a -> String
lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]
diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs
index 9a3540645..4a4caafc8 100644
--- a/src/compiler/GF/Quiz.hs
+++ b/src/compiler/GF/Quiz.hs
@@ -23,7 +23,6 @@ import PGF.Linearize
import GF.Data.Operations
import GF.Infra.UseIO
import GF.Infra.Option
-import GF.Text.Coding
import PGF.Probabilistic
import System.Random
@@ -33,9 +32,9 @@ import Data.List (nub)
-- generic quiz function
-mkQuiz :: Encoding -> String -> [(String,[String])] -> IO ()
-mkQuiz cod msg tts = do
- let qas = [ (encodeUnicode cod q, mkAnswer cod as) | (q,as) <- tts]
+mkQuiz :: String -> [(String,[String])] -> IO ()
+mkQuiz msg tts = do
+ let qas = [(q, mkAnswer as) | (q,as) <- tts]
teachDialogue qas msg
translationList ::
@@ -62,14 +61,13 @@ morphologyList mex mprobs pgf ig typ number = do
(pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss]
-- | compare answer to the list of right answers, increase score and give feedback
-mkAnswer :: Encoding -> [String] -> String -> (Integer, String)
-mkAnswer cod as s =
+mkAnswer :: [String] -> String -> (Integer, String)
+mkAnswer as s =
if (elem (norm s) as)
then (1,"Yes.")
- else (0,"No, not" +++ s ++ ", but" ++++ enc (unlines as))
+ else (0,"No, not" +++ s ++ ", but" ++++ unlines as)
where
- norm = unwords . words . decodeUnicode cod
- enc = encodeUnicode cod
+ norm = unwords . words
norml = unwords . words
diff --git a/src/compiler/GF/Text/CP1250.hs b/src/compiler/GF/Text/CP1250.hs
deleted file mode 100644
index 2ed263877..000000000
--- a/src/compiler/GF/Text/CP1250.hs
+++ /dev/null
@@ -1,91 +0,0 @@
------------------------------------------------------------------------------
--- |
--- Module : GF.Text.CP1250
--- Maintainer : Krasimir Angelov
---
--- cp1250 is a code page used under Microsoft Windows to represent texts
--- in Central European and Eastern European languages that use Latin script,
--- such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian,
--- Serbian (Latin script), Romanian and Albanian. It may also be used with
--- the German language; German-language texts encoded with cp1250 and cp1252
--- are identical.
---
------------------------------------------------------------------------------
-
-module GF.Text.CP1250 where
-
-import Data.Char
-
-decodeCP1250 = map convert where
- convert c
- | c == '\x80' = chr 0x20AC
- | c == '\x82' = chr 0x201A
- | c == '\x84' = chr 0x201E
- | c == '\x85' = chr 0x2026
- | c == '\x86' = chr 0x2020
- | c == '\x87' = chr 0x2021
- | c == '\x89' = chr 0x2030
- | c == '\x8A' = chr 0x0160
- | c == '\x8B' = chr 0x2039
- | c == '\x8C' = chr 0x015A
- | c == '\x8D' = chr 0x0164
- | c == '\x8E' = chr 0x017D
- | c == '\x8F' = chr 0x0179
- | c == '\x91' = chr 0x2018
- | c == '\x92' = chr 0x2019
- | c == '\x93' = chr 0x201C
- | c == '\x94' = chr 0x201D
- | c == '\x95' = chr 0x2022
- | c == '\x96' = chr 0x2013
- | c == '\x97' = chr 0x2014
- | c == '\x99' = chr 0x2122
- | c == '\x9A' = chr 0x0161
- | c == '\x9B' = chr 0x203A
- | c == '\x9C' = chr 0x015B
- | c == '\x9D' = chr 0x0165
- | c == '\x9E' = chr 0x017E
- | c == '\x9F' = chr 0x017A
- | c == '\xA1' = chr 0x02C7
- | c == '\xA5' = chr 0x0104
- | c == '\xB9' = chr 0x0105
- | c == '\xBC' = chr 0x013D
- | c == '\xBE' = chr 0x013E
- | otherwise = c
-
-
-encodeCP1250 = map convert where
- convert c
- | oc == 0x20AC = '\x80'
- | oc == 0x201A = '\x82'
- | oc == 0x201E = '\x84'
- | oc == 0x2026 = '\x85'
- | oc == 0x2020 = '\x86'
- | oc == 0x2021 = '\x87'
- | oc == 0x2030 = '\x89'
- | oc == 0x0160 = '\x8A'
- | oc == 0x2039 = '\x8B'
- | oc == 0x015A = '\x8C'
- | oc == 0x0164 = '\x8D'
- | oc == 0x017D = '\x8E'
- | oc == 0x0179 = '\x8F'
- | oc == 0x2018 = '\x91'
- | oc == 0x2019 = '\x92'
- | oc == 0x201C = '\x93'
- | oc == 0x201D = '\x94'
- | oc == 0x2022 = '\x95'
- | oc == 0x2013 = '\x96'
- | oc == 0x2014 = '\x97'
- | oc == 0x2122 = '\x99'
- | oc == 0x0161 = '\x9A'
- | oc == 0x203A = '\x9B'
- | oc == 0x015B = '\x9C'
- | oc == 0x0165 = '\x9D'
- | oc == 0x017E = '\x9E'
- | oc == 0x017A = '\x9F'
- | oc == 0x02C7 = '\xA1'
- | oc == 0x0104 = '\xA5'
- | oc == 0x0105 = '\xB9'
- | oc == 0x013D = '\xBC'
- | oc == 0x013E = '\xBE'
- | otherwise = c
- where oc = ord c
diff --git a/src/compiler/GF/Text/CP1251.hs b/src/compiler/GF/Text/CP1251.hs
deleted file mode 100644
index 8d8ceebf6..000000000
--- a/src/compiler/GF/Text/CP1251.hs
+++ /dev/null
@@ -1,86 +0,0 @@
------------------------------------------------------------------------------
--- |
--- Module : GF.Text.CP1251
--- Maintainer : Krasimir Angelov
---
--- cp1251 is a popular 8-bit character encoding, designed to cover languages
--- that use the Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic
--- and other languages. It is the most widely used for encoding the Bulgarian,
--- Serbian and Macedonian languages.
---
------------------------------------------------------------------------------
-
-module GF.Text.CP1251 where
-
-import Data.Char
-
-decodeCP1251 = map convert where
- convert c
- | c >= '\xC0' && c <= '\xFF' = chr (ord c + (0x410-0xC0))
- | c == '\xA8' = chr 0x401 -- cyrillic capital letter lo
- | c == '\x80' = chr 0x402
- | c == '\x81' = chr 0x403
- | c == '\xAA' = chr 0x404
- | c == '\xBD' = chr 0x405
- | c == '\xB2' = chr 0x406
- | c == '\xAF' = chr 0x407
- | c == '\xA3' = chr 0x408
- | c == '\x8A' = chr 0x409
- | c == '\x8C' = chr 0x40A
- | c == '\x8E' = chr 0x40B
- | c == '\x8D' = chr 0x40C
- | c == '\xA1' = chr 0x40E
- | c == '\x8F' = chr 0x40F
- | c == '\xB8' = chr 0x451 -- cyrillic small letter lo
- | c == '\x90' = chr 0x452
- | c == '\x83' = chr 0x453
- | c == '\xBA' = chr 0x454
- | c == '\xBE' = chr 0x455
- | c == '\xB3' = chr 0x456
- | c == '\xBF' = chr 0x457
- | c == '\xBC' = chr 0x458
- | c == '\x9A' = chr 0x459
- | c == '\x9C' = chr 0x45A
- | c == '\x9E' = chr 0x45B
- | c == '\x9D' = chr 0x45C
- | c == '\xA2' = chr 0x45E
- | c == '\x9F' = chr 0x45F
- | c == '\xA5' = chr 0x490
- | c == '\xB4' = chr 0x491
- | otherwise = c
-
-encodeCP1251 = map convert where
- convert c
- | oc >= 0x410 && oc <= 0x44F = chr (oc - (0x410-0xC0))
- | oc == 0x401 = '\xA8' -- cyrillic capital letter lo
- | oc == 0x402 = '\x80'
- | oc == 0x403 = '\x81'
- | oc == 0x404 = '\xAA'
- | oc == 0x405 = '\xBD'
- | oc == 0x406 = '\xB2'
- | oc == 0x407 = '\xAF'
- | oc == 0x408 = '\xA3'
- | oc == 0x409 = '\x8A'
- | oc == 0x40A = '\x8C'
- | oc == 0x40B = '\x8E'
- | oc == 0x40C = '\x8D'
- | oc == 0x40E = '\xA1'
- | oc == 0x40F = '\x8F'
- | oc == 0x451 = '\xB8' -- cyrillic small letter lo
- | oc == 0x452 = '\x90'
- | oc == 0x453 = '\x83'
- | oc == 0x454 = '\xBA'
- | oc == 0x455 = '\xBE'
- | oc == 0x456 = '\xB3'
- | oc == 0x457 = '\xBF'
- | oc == 0x458 = '\xBC'
- | oc == 0x459 = '\x9A'
- | oc == 0x45A = '\x9C'
- | oc == 0x45B = '\x9E'
- | oc == 0x45C = '\x9D'
- | oc == 0x45E = '\xA2'
- | oc == 0x45F = '\x9F'
- | oc == 0x490 = '\xA5'
- | oc == 0x491 = '\xB4'
- | otherwise = c
- where oc = ord c
diff --git a/src/compiler/GF/Text/CP1252.hs b/src/compiler/GF/Text/CP1252.hs
deleted file mode 100644
index a1d8ab8f3..000000000
--- a/src/compiler/GF/Text/CP1252.hs
+++ /dev/null
@@ -1,17 +0,0 @@
------------------------------------------------------------------------------
--- |
--- Module : GF.Text.CP1252
--- Maintainer : Krasimir Angelov
---
--- cp1252 is a character encoding of the Latin alphabet, used by default in
--- the legacy components of Microsoft Windows in English and some other
--- Western languages.
---
------------------------------------------------------------------------------
-
-module GF.Text.CP1252 where
-
-import Data.Char
-
-decodeCP1252 = map id
-encodeCP1252 = map (\x -> if x <= '\255' then x else '?')
diff --git a/src/compiler/GF/Text/CP1254.hs b/src/compiler/GF/Text/CP1254.hs
deleted file mode 100644
index 488359d70..000000000
--- a/src/compiler/GF/Text/CP1254.hs
+++ /dev/null
@@ -1,84 +0,0 @@
------------------------------------------------------------------------------
--- |
--- Module : GF.Text.CP1254
--- Maintainer : Krasimir Angelov
---
--- cp1254 is a code page used under Microsoft Windows to write Turkish.
--- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
---
------------------------------------------------------------------------------
-
-module GF.Text.CP1254 where
-
-import Data.Char
-
-decodeCP1254 = map convert where
- convert c
- | c == '\x80' = chr 0x20AC
- | c == '\x82' = chr 0x201A
- | c == '\x83' = chr 0x192
- | c == '\x84' = chr 0x201E
- | c == '\x85' = chr 0x2026
- | c == '\x86' = chr 0x2020
- | c == '\x87' = chr 0x2021
- | c == '\x88' = chr 0x2C6
- | c == '\x89' = chr 0x2030
- | c == '\x8A' = chr 0x160
- | c == '\x8B' = chr 0x2039
- | c == '\x8C' = chr 0x152
- | c == '\x91' = chr 0x2018
- | c == '\x92' = chr 0x2019
- | c == '\x93' = chr 0x201C
- | c == '\x94' = chr 0x201D
- | c == '\x95' = chr 0x2022
- | c == '\x96' = chr 0x2013
- | c == '\x97' = chr 0x2014
- | c == '\x98' = chr 0x2DC
- | c == '\x99' = chr 0x2122
- | c == '\x9A' = chr 0x161
- | c == '\x9B' = chr 0x203A
- | c == '\x9C' = chr 0x153
- | c == '\x9F' = chr 0x178
- | c == '\xD0' = chr 0x11E
- | c == '\xDD' = chr 0x130
- | c == '\xDE' = chr 0x15E
- | c == '\xF0' = chr 0x11F
- | c == '\xFD' = chr 0x131
- | c == '\xFE' = chr 0x15F
- | otherwise = c
-
-encodeCP1254 = map convert where
- convert c
- | oc == 0x20AC = '\x80'
- | oc == 0x201A = '\x82'
- | oc == 0x192 = '\x83'
- | oc == 0x201E = '\x84'
- | oc == 0x2026 = '\x85'
- | oc == 0x2020 = '\x86'
- | oc == 0x2021 = '\x87'
- | oc == 0x2C6 = '\x88'
- | oc == 0x2030 = '\x89'
- | oc == 0x160 = '\x8A'
- | oc == 0x2039 = '\x8B'
- | oc == 0x152 = '\x8C'
- | oc == 0x2018 = '\x91'
- | oc == 0x2019 = '\x92'
- | oc == 0x201C = '\x93'
- | oc == 0x201D = '\x94'
- | oc == 0x2022 = '\x95'
- | oc == 0x2013 = '\x96'
- | oc == 0x2014 = '\x97'
- | oc == 0x2DC = '\x98'
- | oc == 0x2122 = '\x99'
- | oc == 0x161 = '\x9A'
- | oc == 0x203A = '\x9B'
- | oc == 0x153 = '\x9C'
- | oc == 0x178 = '\x9F'
- | oc == 0x11E = '\xD0'
- | oc == 0x130 = '\xDD'
- | oc == 0x15E = '\xDE'
- | oc == 0x11F = '\xF0'
- | oc == 0x131 = '\xFD'
- | oc == 0x15F = '\xFE'
- | otherwise = c
- where oc = ord c
diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs
index 3481b278d..a206bb4d2 100644
--- a/src/compiler/GF/Text/Coding.hs
+++ b/src/compiler/GF/Text/Coding.hs
@@ -1,24 +1,69 @@
module GF.Text.Coding where
-import GF.Infra.Option
-import GF.Text.UTF8
-import GF.Text.CP1250
-import GF.Text.CP1251
-import GF.Text.CP1252
-import GF.Text.CP1254
+import qualified Data.ByteString as BS
+import Data.ByteString.Internal
+import GHC.IO
+import GHC.IO.Buffer
+import GHC.IO.Encoding
+import GHC.IO.Exception
+import Control.Monad
-encodeUnicode e = case e of
- UTF_8 -> encodeUTF8
- CP_1250 -> encodeCP1250
- CP_1251 -> encodeCP1251
- CP_1252 -> encodeCP1252
- CP_1254 -> encodeCP1254
- _ -> id
+encodeUnicode :: TextEncoding -> String -> ByteString
+encodeUnicode enc s =
+ unsafePerformIO $ do
+ let len = length s
+ cbuf0 <- newCharBuffer (len*4) ReadBuffer
+ foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s
+ let cbuf = cbuf0{bufR=len}
+ case enc of
+ TextEncoding {mkTextEncoder=mk} -> do encoder <- mk
+ bss <- translate (encode encoder) cbuf
+ close encoder
+ return (BS.concat bss)
+ where
+ translate cod cbuf
+ | i < w = do bbuf <- newByteBuffer 128 WriteBuffer
+ (cbuf,bbuf) <- cod cbuf bbuf
+ if isEmptyBuffer bbuf
+ then ioe_invalidCharacter
+ else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
+ bss <- translate cod cbuf
+ return (bs:bss)
+ | otherwise = return []
+ where
+ i = bufL cbuf
+ w = bufR cbuf
-decodeUnicode e = case e of
- UTF_8 -> decodeUTF8
- CP_1250 -> decodeCP1250
- CP_1251 -> decodeCP1251
- CP_1252 -> decodeCP1252
- CP_1254 -> decodeCP1254
- _ -> id
+decodeUnicode :: TextEncoding -> ByteString -> String
+decodeUnicode enc (PS fptr l len) =
+ unsafePerformIO $ do
+ let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
+ cbuf <- newCharBuffer 128 WriteBuffer
+ case enc of
+ TextEncoding {mkTextDecoder=mk} -> do decoder <- mk
+ s <- translate (encode decoder) bbuf cbuf
+ close decoder
+ return s
+ where
+ translate cod bbuf cbuf
+ | i < w = do (bbuf,cbuf) <- cod bbuf cbuf
+ if isEmptyBuffer cbuf
+ then ioe_invalidCharacter
+ else unpack cod bbuf cbuf
+ | otherwise = return []
+ where
+ i = bufL bbuf
+ w = bufR bbuf
+
+ unpack cod bbuf cbuf
+ | i < w = do (c,i') <- readCharBuf (bufRaw cbuf) i
+ cs <- unpack cod bbuf cbuf{bufL=i'}
+ return (c:cs)
+ | otherwise = translate cod bbuf cbuf{bufL=0,bufR=0}
+ where
+ i = bufL cbuf
+ w = bufR cbuf
+
+ioe_invalidCharacter = ioException
+ (IOError Nothing InvalidArgument ""
+ ("invalid byte sequence for this encoding") Nothing Nothing)
diff --git a/src/compiler/GF/Text/Lexing.hs b/src/compiler/GF/Text/Lexing.hs
index a5a2c71eb..ec030e158 100644
--- a/src/compiler/GF/Text/Lexing.hs
+++ b/src/compiler/GF/Text/Lexing.hs
@@ -1,8 +1,6 @@
module GF.Text.Lexing (stringOp,opInEnv) where
import GF.Text.Transliterations
-import GF.Text.UTF8
-import GF.Text.CP1251
import Data.Char
import Data.List (intersperse)
@@ -23,10 +21,6 @@ stringOp name = case name of
"unlexmixed" -> Just $ capitInit . appUnlexer (unlexMixed . unquote)
"unwords" -> Just $ appUnlexer unwords
"to_html" -> Just wrapHTML
- "to_utf8" -> Just encodeUTF8
- "from_utf8" -> Just decodeUTF8
- "to_cp1251" -> Just encodeCP1251
- "from_cp1251" -> Just decodeCP1251
_ -> transliterate name
-- perform op in environments beg--end, t.ex. between "--"
diff --git a/src/compiler/GF/Text/Transliterations.hs b/src/compiler/GF/Text/Transliterations.hs
index bd56f5f89..cbe8baf15 100644
--- a/src/compiler/GF/Text/Transliterations.hs
+++ b/src/compiler/GF/Text/Transliterations.hs
@@ -5,8 +5,6 @@ module GF.Text.Transliterations (
transliterationPrintNames
) where
-import GF.Text.UTF8
-
import Data.Char
import Numeric
import qualified Data.Map as Map
diff --git a/src/compiler/GF/Text/UTF8.hs b/src/compiler/GF/Text/UTF8.hs
deleted file mode 100644
index 5e9687684..000000000
--- a/src/compiler/GF/Text/UTF8.hs
+++ /dev/null
@@ -1,48 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : UTF8
--- Maintainer : AR
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/04/21 16:23:42 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.5 $
---
--- From the Char module supplied with HBC.
--- code by Thomas Hallgren (Jul 10 1999)
------------------------------------------------------------------------------
-
-module GF.Text.UTF8 (decodeUTF8, encodeUTF8) where
-
--- | Take a Unicode string and encode it as a string
--- with the UTF8 method.
-decodeUTF8 :: String -> String
-decodeUTF8 "" = ""
-decodeUTF8 (c:cs) | c < '\x80' = c : decodeUTF8 cs
-decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
- '\x80' <= c' && c' <= '\xbf' =
- toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs
-decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
- '\x80' <= c' && c' <= '\xbf' &&
- '\x80' <= c'' && c'' <= '\xbf' =
- toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs
-decodeUTF8 s = s ---- AR workaround 22/6/2006
-----decodeUTF8 _ = error "UniChar.decodeUTF8: bad data"
-
-encodeUTF8 :: String -> String
-encodeUTF8 "" = ""
-encodeUTF8 (c:cs) =
- if c > '\x0000' && c < '\x0080' then
- c : encodeUTF8 cs
- else if c < toEnum 0x0800 then
- let i = fromEnum c
- in toEnum (0xc0 + i `div` 0x40) :
- toEnum (0x80 + i `mod` 0x40) :
- encodeUTF8 cs
- else
- let i = fromEnum c
- in toEnum (0xe0 + i `div` 0x1000) :
- toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
- toEnum (0x80 + i `mod` 0x40) :
- encodeUTF8 cs
diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs
index 8037d4f1a..1f0ac870b 100644
--- a/src/compiler/GFC.hs
+++ b/src/compiler/GFC.hs
@@ -17,6 +17,7 @@ import Data.Maybe
import Data.Binary
import System.FilePath
import System.IO
+import Control.Exception
mainGFC :: Options -> [FilePath] -> IOE ()
@@ -81,8 +82,8 @@ writeOutput opts file str =
do let path = case flag optOutputDir opts of
Nothing -> file
Just dir -> dir </> file
- writeOutputFile opts path str
-
-writeOutputFile :: Options -> FilePath -> String -> IOE ()
-writeOutputFile opts outfile output =
- do putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ writeFile outfile output
+ putPointE Normal opts ("Writing " ++ path ++ "...") $ ioeIO $
+ bracket
+ (openFile path WriteMode)
+ (hClose)
+ (\h -> hSetEncoding h utf8 >> hPutStr h str)
diff --git a/src/compiler/GFI.hs b/src/compiler/GFI.hs
index 9561c407f..a0806ce94 100644
--- a/src/compiler/GFI.hs
+++ b/src/compiler/GFI.hs
@@ -21,7 +21,6 @@ import GF.Infra.Option
import GF.Infra.Modules (greatestResource, modules, emptyModInfo)
import GF.System.Readline
-import GF.Text.Coding
import GF.Compile.Coding
import PGF
@@ -34,6 +33,7 @@ import Data.List(isPrefixOf)
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS
import qualified Text.ParserCombinators.ReadP as RP
+import System.IO
import System.Cmd
import System.CPUTime
import System.Directory
@@ -86,9 +86,7 @@ loop opts gfenv0 = do
s0 <- fetch
let gfenv = gfenv0 {history = s0 : history gfenv0}
let
- enc = encode gfenv
- s = decode gfenv s0
- pwords = case words s of
+ pwords = case words s0 of
w:ws -> getCommandOp w :ws
ws -> ws
@@ -130,8 +128,8 @@ loop opts gfenv0 = do
case runP pExp (BS.pack s) of
Left (_,msg) -> putStrLn msg
Right t -> case checkComputeTerm sgr (codeTerm (decode gfenv) (L (0,0) t)) of
- Ok x -> putStrLn $ enc (showTerm sgr style q x)
- Bad s -> putStrLn $ enc s
+ Ok x -> putStrLn $ showTerm sgr style q x
+ Bad s -> putStrLn $ s
loopNewCPU gfenv
"dg":ws -> do
let stop = case ws of
@@ -141,7 +139,7 @@ loop opts gfenv0 = do
putStrLn "wrote graph in file _gfdepgraph.dot"
loopNewCPU gfenv
"eh":w:_ -> do
- cs <- readFile w >>= return . map (interpretCommandLine enc env) . lines
+ cs <- readFile w >>= return . map (interpretCommandLine env) . lines
loopNewCPU gfenv
"i":args -> do
@@ -179,25 +177,28 @@ loop opts gfenv0 = do
_ -> putStrLn "value definition not parsed" >> loopNewCPU gfenv
"ph":_ ->
- mapM_ (putStrLn . enc) (reverse (history gfenv0)) >> loopNewCPU gfenv
- "se":c:_ ->
- case lookup c encodings of
- Just cod -> do
+ mapM_ putStrLn (reverse (history gfenv0)) >> loopNewCPU gfenv
+ "se":c:_ -> do
+ let cod = renameEncoding c
#ifdef mingw32_HOST_OS
- case c of
- 'c':'p':c -> case reads c of
- [(cp,"")] -> setConsoleCP cp >> setConsoleOutputCP cp
- _ -> return ()
- "utf8" -> setConsoleCP 65001 >> setConsoleOutputCP 65001
- _ -> return ()
+ case cod of
+ 'C':'P':c -> case reads c of
+ [(cp,"")] -> do setConsoleCP cp
+ setConsoleOutputCP cp
+ _ -> return ()
+ "UTF-8" -> do setConsoleCP 65001
+ setConsoleOutputCP 65001
+ _ -> return ()
#endif
- loopNewCPU $ gfenv {coding = cod}
- Nothing -> do putStrLn "unknown encoding"
- loopNewCPU gfenv
+ enc <- mkTextEncoding cod
+ hSetEncoding stdin enc
+ hSetEncoding stdout enc
+ hSetEncoding stderr enc
+ loopNewCPU gfenv
-- ordinary commands, working on CommandEnv
_ -> do
- interpretCommandLine enc env s
+ interpretCommandLine env s0
loopNewCPU gfenv
-- gfenv' <- return $ either (const gfenv) id r
gfenv' <- either (\e -> (print e >> return gfenv)) return r
@@ -215,7 +216,7 @@ importInEnv gfenv opts files
if (verbAtLeast opts Normal)
then putStrLnFlush $ unwords $ "\nLanguages:" : map showCId (languages pgf1)
else return ()
- return $ gfenv { commandenv = mkCommandEnv (coding gfenv) pgf1 }
+ return $ gfenv { commandenv = mkCommandEnv pgf1 }
tryGetLine = do
res <- try getLine
@@ -252,24 +253,16 @@ data GFEnv = GFEnv {
sourcegrammar :: SourceGrammar, -- gfo grammar -retain
commandenv :: CommandEnv,
history :: [String],
- cputime :: Integer,
- coding :: Encoding
+ cputime :: Integer
}
emptyGFEnv :: IO GFEnv
emptyGFEnv = do
-#ifdef mingw32_HOST_OS
- codepage <- getACP
- let coding = fromMaybe UTF_8 (lookup ("cp"++show codepage) encodings)
-#else
- let coding = UTF_8
-#endif
- return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv coding emptyPGF) [] 0 coding
+ return $ GFEnv emptySourceGrammar{modules=[(identW,emptyModInfo)]} (mkCommandEnv emptyPGF) [] 0
-encode = encodeUnicode . coding
-decode = decodeUnicode . coding
+decode _ = id -- decodeUnicode . coding
-wordCompletion gfenv line0 prefix0 p =
+wordCompletion gfenv line prefix p =
case wc_type (take p line) of
CmplCmd pref
-> ret ' ' [name | name <- Map.keys (commands cmdEnv), isPrefixOf pref name]
@@ -280,7 +273,7 @@ wordCompletion gfenv line0 prefix0 p =
in case loop state0 ws of
Nothing -> ret ' ' []
Just state -> let compls = getCompletions state prefix
- in ret ' ' (map (encode gfenv) (Map.keys compls))
+ in ret ' ' (Map.keys compls)
Left (_ :: SomeException) -> ret ' ' []
CmplOpt (Just (Command n _ _)) pref
-> case Map.lookup n (commands cmdEnv) of
@@ -298,9 +291,6 @@ wordCompletion gfenv line0 prefix0 p =
Left (_ :: SomeException) -> ret ' ' []
_ -> ret ' ' []
where
- line = decode gfenv line0
- prefix = decode gfenv prefix0
-
pgf = multigrammar cmdEnv
cmdEnv = commandenv gfenv
optLang opts = valCIdOpts "lang" (head (languages pgf)) opts