summaryrefslogtreecommitdiff
path: root/src/compiler/GF
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2010-03-23 13:44:17 +0000
committerkrasimir <krasimir@chalmers.se>2010-03-23 13:44:17 +0000
commit1e51690b71f13c877b19230e70b3be95a154e3ac (patch)
tree5a17a0b58a9c35958be427abc4fb89b979b5931e /src/compiler/GF
parent850b897f088d7d94ee49f571f7b32494fbe53e4a (diff)
added codepage for Turkish
Diffstat (limited to 'src/compiler/GF')
-rw-r--r--src/compiler/GF/Infra/Option.hs3
-rw-r--r--src/compiler/GF/Text/CP1254.hs84
-rw-r--r--src/compiler/GF/Text/Coding.hs3
3 files changed, 89 insertions, 1 deletions
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index 3c35fef00..24b967aff 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -77,7 +77,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug
data Phase = Preproc | Convert | Compile | Link
deriving (Show,Eq,Ord)
-data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252
+data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254
deriving (Eq,Ord)
data OutputFormat = FmtPGFPretty
@@ -489,6 +489,7 @@ encodings =
("cp1250", CP_1250),
("cp1251", CP_1251),
("cp1252", CP_1252),
+ ("cp1254", CP_1254),
("latin1", ISO_8859_1)
]
diff --git a/src/compiler/GF/Text/CP1254.hs b/src/compiler/GF/Text/CP1254.hs
new file mode 100644
index 000000000..488359d70
--- /dev/null
+++ b/src/compiler/GF/Text/CP1254.hs
@@ -0,0 +1,84 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module : GF.Text.CP1254
+-- Maintainer : Krasimir Angelov
+--
+-- cp1254 is a code page used under Microsoft Windows to write Turkish.
+-- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
+--
+-----------------------------------------------------------------------------
+
+module GF.Text.CP1254 where
+
+import Data.Char
+
+decodeCP1254 = map convert where
+ convert c
+ | c == '\x80' = chr 0x20AC
+ | c == '\x82' = chr 0x201A
+ | c == '\x83' = chr 0x192
+ | c == '\x84' = chr 0x201E
+ | c == '\x85' = chr 0x2026
+ | c == '\x86' = chr 0x2020
+ | c == '\x87' = chr 0x2021
+ | c == '\x88' = chr 0x2C6
+ | c == '\x89' = chr 0x2030
+ | c == '\x8A' = chr 0x160
+ | c == '\x8B' = chr 0x2039
+ | c == '\x8C' = chr 0x152
+ | c == '\x91' = chr 0x2018
+ | c == '\x92' = chr 0x2019
+ | c == '\x93' = chr 0x201C
+ | c == '\x94' = chr 0x201D
+ | c == '\x95' = chr 0x2022
+ | c == '\x96' = chr 0x2013
+ | c == '\x97' = chr 0x2014
+ | c == '\x98' = chr 0x2DC
+ | c == '\x99' = chr 0x2122
+ | c == '\x9A' = chr 0x161
+ | c == '\x9B' = chr 0x203A
+ | c == '\x9C' = chr 0x153
+ | c == '\x9F' = chr 0x178
+ | c == '\xD0' = chr 0x11E
+ | c == '\xDD' = chr 0x130
+ | c == '\xDE' = chr 0x15E
+ | c == '\xF0' = chr 0x11F
+ | c == '\xFD' = chr 0x131
+ | c == '\xFE' = chr 0x15F
+ | otherwise = c
+
+encodeCP1254 = map convert where
+ convert c
+ | oc == 0x20AC = '\x80'
+ | oc == 0x201A = '\x82'
+ | oc == 0x192 = '\x83'
+ | oc == 0x201E = '\x84'
+ | oc == 0x2026 = '\x85'
+ | oc == 0x2020 = '\x86'
+ | oc == 0x2021 = '\x87'
+ | oc == 0x2C6 = '\x88'
+ | oc == 0x2030 = '\x89'
+ | oc == 0x160 = '\x8A'
+ | oc == 0x2039 = '\x8B'
+ | oc == 0x152 = '\x8C'
+ | oc == 0x2018 = '\x91'
+ | oc == 0x2019 = '\x92'
+ | oc == 0x201C = '\x93'
+ | oc == 0x201D = '\x94'
+ | oc == 0x2022 = '\x95'
+ | oc == 0x2013 = '\x96'
+ | oc == 0x2014 = '\x97'
+ | oc == 0x2DC = '\x98'
+ | oc == 0x2122 = '\x99'
+ | oc == 0x161 = '\x9A'
+ | oc == 0x203A = '\x9B'
+ | oc == 0x153 = '\x9C'
+ | oc == 0x178 = '\x9F'
+ | oc == 0x11E = '\xD0'
+ | oc == 0x130 = '\xDD'
+ | oc == 0x15E = '\xDE'
+ | oc == 0x11F = '\xF0'
+ | oc == 0x131 = '\xFD'
+ | oc == 0x15F = '\xFE'
+ | otherwise = c
+ where oc = ord c
diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs
index e3cd7b0ea..3481b278d 100644
--- a/src/compiler/GF/Text/Coding.hs
+++ b/src/compiler/GF/Text/Coding.hs
@@ -5,12 +5,14 @@ import GF.Text.UTF8
import GF.Text.CP1250
import GF.Text.CP1251
import GF.Text.CP1252
+import GF.Text.CP1254
encodeUnicode e = case e of
UTF_8 -> encodeUTF8
CP_1250 -> encodeCP1250
CP_1251 -> encodeCP1251
CP_1252 -> encodeCP1252
+ CP_1254 -> encodeCP1254
_ -> id
decodeUnicode e = case e of
@@ -18,4 +20,5 @@ decodeUnicode e = case e of
CP_1250 -> decodeCP1250
CP_1251 -> decodeCP1251
CP_1252 -> decodeCP1252
+ CP_1254 -> decodeCP1254
_ -> id