summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2008-12-17 23:53:12 +0000
committerkrasimir <krasimir@chalmers.se>2008-12-17 23:53:12 +0000
commit9d0df9e0b49e06f4e182269f563555e803ff27ac (patch)
tree82a680d75a23641b30fcf3a980a5b2015ab609ce
parent124738068a401b872369b8f4fb957bbdcd1bda87 (diff)
added codepage 1250 i.e. central european. Useful for Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian, Romanian and Albanian
-rw-r--r--src/GF/Text/CP1250.hs77
-rw-r--r--src/GF/Text/Coding.hs3
2 files changed, 80 insertions, 0 deletions
diff --git a/src/GF/Text/CP1250.hs b/src/GF/Text/CP1250.hs
new file mode 100644
index 000000000..474c04ace
--- /dev/null
+++ b/src/GF/Text/CP1250.hs
@@ -0,0 +1,77 @@
+module GF.Text.CP1250 where
+
+import Data.Char
+
+decodeCP1250 = map convert where
+ convert c
+ | c == '\x80' = chr 0x20AC
+ | c == '\x82' = chr 0x201A
+ | c == '\x84' = chr 0x201E
+ | c == '\x85' = chr 0x2026
+ | c == '\x86' = chr 0x2020
+ | c == '\x87' = chr 0x2021
+ | c == '\x89' = chr 0x2030
+ | c == '\x8A' = chr 0x0160
+ | c == '\x8B' = chr 0x2039
+ | c == '\x8C' = chr 0x015A
+ | c == '\x8D' = chr 0x0164
+ | c == '\x8E' = chr 0x017D
+ | c == '\x8F' = chr 0x0179
+ | c == '\x91' = chr 0x2018
+ | c == '\x92' = chr 0x2019
+ | c == '\x93' = chr 0x201C
+ | c == '\x94' = chr 0x201D
+ | c == '\x95' = chr 0x2022
+ | c == '\x96' = chr 0x2013
+ | c == '\x97' = chr 0x2014
+ | c == '\x99' = chr 0x2122
+ | c == '\x9A' = chr 0x0161
+ | c == '\x9B' = chr 0x203A
+ | c == '\x9C' = chr 0x015B
+ | c == '\x9D' = chr 0x0165
+ | c == '\x9E' = chr 0x017E
+ | c == '\x9F' = chr 0x017A
+ | c == '\xA1' = chr 0x02C7
+ | c == '\xA5' = chr 0x0104
+ | c == '\xB9' = chr 0x0105
+ | c == '\xBC' = chr 0x013D
+ | c == '\xBE' = chr 0x013E
+ | otherwise = c
+
+
+encodeCP1250 = map convert where
+ convert c
+ | oc == 0x20AC = '\x80'
+ | oc == 0x201A = '\x82'
+ | oc == 0x201E = '\x84'
+ | oc == 0x2026 = '\x85'
+ | oc == 0x2020 = '\x86'
+ | oc == 0x2021 = '\x87'
+ | oc == 0x2030 = '\x89'
+ | oc == 0x0160 = '\x8A'
+ | oc == 0x2039 = '\x8B'
+ | oc == 0x015A = '\x8C'
+ | oc == 0x0164 = '\x8D'
+ | oc == 0x017D = '\x8E'
+ | oc == 0x0179 = '\x8F'
+ | oc == 0x2018 = '\x91'
+ | oc == 0x2019 = '\x92'
+ | oc == 0x201C = '\x93'
+ | oc == 0x201D = '\x94'
+ | oc == 0x2022 = '\x95'
+ | oc == 0x2013 = '\x96'
+ | oc == 0x2014 = '\x97'
+ | oc == 0x2122 = '\x99'
+ | oc == 0x0161 = '\x9A'
+ | oc == 0x203A = '\x9B'
+ | oc == 0x015B = '\x9C'
+ | oc == 0x0165 = '\x9D'
+ | oc == 0x017E = '\x9E'
+ | oc == 0x017A = '\x9F'
+ | oc == 0x02C7 = '\xA1'
+ | oc == 0x0104 = '\xA5'
+ | oc == 0x0105 = '\xB9'
+ | oc == 0x013D = '\xBC'
+ | oc == 0x013E = '\xBE'
+ | otherwise = c
+ where oc = ord c
diff --git a/src/GF/Text/Coding.hs b/src/GF/Text/Coding.hs
index ca0322d06..2860b79d2 100644
--- a/src/GF/Text/Coding.hs
+++ b/src/GF/Text/Coding.hs
@@ -1,17 +1,20 @@
module GF.Text.Coding where
import GF.Text.UTF8
+import GF.Text.CP1250
import GF.Text.CP1251
import GF.Text.CP1252
encodeUnicode e = case e of
"utf8" -> encodeUTF8
+ "cp1250" -> encodeCP1250
"cp1251" -> encodeCP1251
"cp1252" -> encodeCP1252
_ -> id
decodeUnicode e = case e of
"utf8" -> decodeUTF8
+ "cp1250" -> decodeCP1250
"cp1251" -> decodeCP1251
"cp1252" -> decodeCP1252
_ -> id