diff options
| author | hallgren <hallgren@chalmers.se> | 2014-08-19 14:27:15 +0000 |
|---|---|---|
| committer | hallgren <hallgren@chalmers.se> | 2014-08-19 14:27:15 +0000 |
| commit | 61760e4205d03c5e05dbaa6cf2ff5ffdd1c43662 (patch) | |
| tree | edfe9770607b4c69055a4cea72e42c6601d8b53e /src/compiler | |
| parent | 60b5990792384ef75b0ac5f81b556a1f5ee42ce0 (diff) | |
GF.Compile.ReadFiles: a simpler & faster way to convert from Latin1 to UTF-8
Diffstat (limited to 'src/compiler')
| -rw-r--r-- | src/compiler/GF/Compile/ReadFiles.hs | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/src/compiler/GF/Compile/ReadFiles.hs b/src/compiler/GF/Compile/ReadFiles.hs index dbb10b352..ecbd88b54 100644 --- a/src/compiler/GF/Compile/ReadFiles.hs +++ b/src/compiler/GF/Compile/ReadFiles.hs @@ -232,12 +232,15 @@ toUTF8 opts0 raw = coding = getEncoding $ opts0 `addOptions` opts utf8 <- if coding=="UTF-8" then return raw - else lift $ do --ePutStrLn $ "toUTF8 from "++coding - enc <- mkTextEncoding coding - -- decodeUnicodeIO uses a lot of stack space, - -- so we need to split the file into smaller pieces - ls <- mapM (decodeUnicodeIO enc) (BS.lines raw) - return $ UTF8.fromString (unlines ls) + else if coding=="CP1252" -- Latin1 + then return . UTF8.fromString $ BS.unpack raw -- faster + else lift $ + do --ePutStrLn $ "toUTF8 from "++coding + enc <- mkTextEncoding coding + -- decodeUnicodeIO uses a lot of stack space, + -- so we need to split the file into smaller pieces + ls <- mapM (decodeUnicodeIO enc) (BS.lines raw) + return $ UTF8.fromString (unlines ls) return (given,utf8) --lift io = ioe (fmap Ok io `catch` (return . Bad . show)) |
