diff options
| author | krasimir <krasimir@chalmers.se> | 2010-03-23 13:44:17 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2010-03-23 13:44:17 +0000 |
| commit | 1e51690b71f13c877b19230e70b3be95a154e3ac (patch) | |
| tree | 5a17a0b58a9c35958be427abc4fb89b979b5931e /src/compiler/GF/Text | |
| parent | 850b897f088d7d94ee49f571f7b32494fbe53e4a (diff) | |
added codepage for Turkish
Diffstat (limited to 'src/compiler/GF/Text')
| -rw-r--r-- | src/compiler/GF/Text/CP1254.hs | 84 | ||||
| -rw-r--r-- | src/compiler/GF/Text/Coding.hs | 3 |
2 files changed, 87 insertions, 0 deletions
diff --git a/src/compiler/GF/Text/CP1254.hs b/src/compiler/GF/Text/CP1254.hs new file mode 100644 index 000000000..488359d70 --- /dev/null +++ b/src/compiler/GF/Text/CP1254.hs @@ -0,0 +1,84 @@ +-----------------------------------------------------------------------------
+-- |
+-- Module : GF.Text.CP1254
+-- Maintainer : Krasimir Angelov
+--
+-- cp1254 is a code page used under Microsoft Windows to write Turkish.
+-- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
+--
+-----------------------------------------------------------------------------
+
+module GF.Text.CP1254 where
+
+import Data.Char
+
+decodeCP1254 = map convert where
+ convert c
+ | c == '\x80' = chr 0x20AC
+ | c == '\x82' = chr 0x201A
+ | c == '\x83' = chr 0x192
+ | c == '\x84' = chr 0x201E
+ | c == '\x85' = chr 0x2026
+ | c == '\x86' = chr 0x2020
+ | c == '\x87' = chr 0x2021
+ | c == '\x88' = chr 0x2C6
+ | c == '\x89' = chr 0x2030
+ | c == '\x8A' = chr 0x160
+ | c == '\x8B' = chr 0x2039
+ | c == '\x8C' = chr 0x152
+ | c == '\x91' = chr 0x2018
+ | c == '\x92' = chr 0x2019
+ | c == '\x93' = chr 0x201C
+ | c == '\x94' = chr 0x201D
+ | c == '\x95' = chr 0x2022
+ | c == '\x96' = chr 0x2013
+ | c == '\x97' = chr 0x2014
+ | c == '\x98' = chr 0x2DC
+ | c == '\x99' = chr 0x2122
+ | c == '\x9A' = chr 0x161
+ | c == '\x9B' = chr 0x203A
+ | c == '\x9C' = chr 0x153
+ | c == '\x9F' = chr 0x178
+ | c == '\xD0' = chr 0x11E
+ | c == '\xDD' = chr 0x130
+ | c == '\xDE' = chr 0x15E
+ | c == '\xF0' = chr 0x11F
+ | c == '\xFD' = chr 0x131
+ | c == '\xFE' = chr 0x15F
+ | otherwise = c
+
+encodeCP1254 = map convert where
+ convert c
+ | oc == 0x20AC = '\x80'
+ | oc == 0x201A = '\x82'
+ | oc == 0x192 = '\x83'
+ | oc == 0x201E = '\x84'
+ | oc == 0x2026 = '\x85'
+ | oc == 0x2020 = '\x86'
+ | oc == 0x2021 = '\x87'
+ | oc == 0x2C6 = '\x88'
+ | oc == 0x2030 = '\x89'
+ | oc == 0x160 = '\x8A'
+ | oc == 0x2039 = '\x8B'
+ | oc == 0x152 = '\x8C'
+ | oc == 0x2018 = '\x91'
+ | oc == 0x2019 = '\x92'
+ | oc == 0x201C = '\x93'
+ | oc == 0x201D = '\x94'
+ | oc == 0x2022 = '\x95'
+ | oc == 0x2013 = '\x96'
+ | oc == 0x2014 = '\x97'
+ | oc == 0x2DC = '\x98'
+ | oc == 0x2122 = '\x99'
+ | oc == 0x161 = '\x9A'
+ | oc == 0x203A = '\x9B'
+ | oc == 0x153 = '\x9C'
+ | oc == 0x178 = '\x9F'
+ | oc == 0x11E = '\xD0'
+ | oc == 0x130 = '\xDD'
+ | oc == 0x15E = '\xDE'
+ | oc == 0x11F = '\xF0'
+ | oc == 0x131 = '\xFD'
+ | oc == 0x15F = '\xFE'
+ | otherwise = c
+ where oc = ord c
diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs index e3cd7b0ea..3481b278d 100644 --- a/src/compiler/GF/Text/Coding.hs +++ b/src/compiler/GF/Text/Coding.hs @@ -5,12 +5,14 @@ import GF.Text.UTF8 import GF.Text.CP1250 import GF.Text.CP1251 import GF.Text.CP1252 +import GF.Text.CP1254 encodeUnicode e = case e of UTF_8 -> encodeUTF8 CP_1250 -> encodeCP1250 CP_1251 -> encodeCP1251 CP_1252 -> encodeCP1252 + CP_1254 -> encodeCP1254 _ -> id decodeUnicode e = case e of @@ -18,4 +20,5 @@ decodeUnicode e = case e of CP_1250 -> decodeCP1250 CP_1251 -> decodeCP1251 CP_1252 -> decodeCP1252 + CP_1254 -> decodeCP1254 _ -> id |
