diff options
| author | krasimir <krasimir@chalmers.se> | 2008-12-17 23:53:12 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2008-12-17 23:53:12 +0000 |
| commit | 9d0df9e0b49e06f4e182269f563555e803ff27ac (patch) | |
| tree | 82a680d75a23641b30fcf3a980a5b2015ab609ce /src | |
| parent | 124738068a401b872369b8f4fb957bbdcd1bda87 (diff) | |
added codepage 1250 i.e. central european. Useful for Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian, Romanian and Albanian
Diffstat (limited to 'src')
| -rw-r--r-- | src/GF/Text/CP1250.hs | 77 | ||||
| -rw-r--r-- | src/GF/Text/Coding.hs | 3 |
2 files changed, 80 insertions, 0 deletions
diff --git a/src/GF/Text/CP1250.hs b/src/GF/Text/CP1250.hs new file mode 100644 index 000000000..474c04ace --- /dev/null +++ b/src/GF/Text/CP1250.hs @@ -0,0 +1,77 @@ +module GF.Text.CP1250 where
+
+import Data.Char
+
+decodeCP1250 = map convert where
+ convert c
+ | c == '\x80' = chr 0x20AC
+ | c == '\x82' = chr 0x201A
+ | c == '\x84' = chr 0x201E
+ | c == '\x85' = chr 0x2026
+ | c == '\x86' = chr 0x2020
+ | c == '\x87' = chr 0x2021
+ | c == '\x89' = chr 0x2030
+ | c == '\x8A' = chr 0x0160
+ | c == '\x8B' = chr 0x2039
+ | c == '\x8C' = chr 0x015A
+ | c == '\x8D' = chr 0x0164
+ | c == '\x8E' = chr 0x017D
+ | c == '\x8F' = chr 0x0179
+ | c == '\x91' = chr 0x2018
+ | c == '\x92' = chr 0x2019
+ | c == '\x93' = chr 0x201C
+ | c == '\x94' = chr 0x201D
+ | c == '\x95' = chr 0x2022
+ | c == '\x96' = chr 0x2013
+ | c == '\x97' = chr 0x2014
+ | c == '\x99' = chr 0x2122
+ | c == '\x9A' = chr 0x0161
+ | c == '\x9B' = chr 0x203A
+ | c == '\x9C' = chr 0x015B
+ | c == '\x9D' = chr 0x0165
+ | c == '\x9E' = chr 0x017E
+ | c == '\x9F' = chr 0x017A
+ | c == '\xA1' = chr 0x02C7
+ | c == '\xA5' = chr 0x0104
+ | c == '\xB9' = chr 0x0105
+ | c == '\xBC' = chr 0x013D
+ | c == '\xBE' = chr 0x013E
+ | otherwise = c
+
+
+encodeCP1250 = map convert where
+ convert c
+ | oc == 0x20AC = '\x80'
+ | oc == 0x201A = '\x82'
+ | oc == 0x201E = '\x84'
+ | oc == 0x2026 = '\x85'
+ | oc == 0x2020 = '\x86'
+ | oc == 0x2021 = '\x87'
+ | oc == 0x2030 = '\x89'
+ | oc == 0x0160 = '\x8A'
+ | oc == 0x2039 = '\x8B'
+ | oc == 0x015A = '\x8C'
+ | oc == 0x0164 = '\x8D'
+ | oc == 0x017D = '\x8E'
+ | oc == 0x0179 = '\x8F'
+ | oc == 0x2018 = '\x91'
+ | oc == 0x2019 = '\x92'
+ | oc == 0x201C = '\x93'
+ | oc == 0x201D = '\x94'
+ | oc == 0x2022 = '\x95'
+ | oc == 0x2013 = '\x96'
+ | oc == 0x2014 = '\x97'
+ | oc == 0x2122 = '\x99'
+ | oc == 0x0161 = '\x9A'
+ | oc == 0x203A = '\x9B'
+ | oc == 0x015B = '\x9C'
+ | oc == 0x0165 = '\x9D'
+ | oc == 0x017E = '\x9E'
+ | oc == 0x017A = '\x9F'
+ | oc == 0x02C7 = '\xA1'
+ | oc == 0x0104 = '\xA5'
+ | oc == 0x0105 = '\xB9'
+ | oc == 0x013D = '\xBC'
+ | oc == 0x013E = '\xBE'
+ | otherwise = c
+ where oc = ord c
diff --git a/src/GF/Text/Coding.hs b/src/GF/Text/Coding.hs index ca0322d06..2860b79d2 100644 --- a/src/GF/Text/Coding.hs +++ b/src/GF/Text/Coding.hs @@ -1,17 +1,20 @@ module GF.Text.Coding where import GF.Text.UTF8 +import GF.Text.CP1250 import GF.Text.CP1251 import GF.Text.CP1252 encodeUnicode e = case e of "utf8" -> encodeUTF8 + "cp1250" -> encodeCP1250 "cp1251" -> encodeCP1251 "cp1252" -> encodeCP1252 _ -> id decodeUnicode e = case e of "utf8" -> decodeUTF8 + "cp1250" -> decodeCP1250 "cp1251" -> decodeCP1251 "cp1252" -> decodeCP1252 _ -> id |
