added codepage for Turkish

author: krasimir <krasimir@chalmers.se> 2010-03-23 13:44:17 +0000
committer: krasimir <krasimir@chalmers.se> 2010-03-23 13:44:17 +0000
commit: 1e51690b71f13c877b19230e70b3be95a154e3ac (patch)
tree: 5a17a0b58a9c35958be427abc4fb89b979b5931e /src/compiler/GF
parent: 850b897f088d7d94ee49f571f7b32494fbe53e4a (diff)
3 files changed, 89 insertions, 1 deletions
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index 3c35fef00..24b967aff 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -77,7 +77,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug
 data Phase = Preproc | Convert | Compile | Link
   deriving (Show,Eq,Ord)
 
-data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252
+data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254
   deriving (Eq,Ord)
 
 data OutputFormat = FmtPGFPretty
@@ -489,6 +489,7 @@ encodings =
      ("cp1250", CP_1250),
      ("cp1251", CP_1251),
      ("cp1252", CP_1252),
+     ("cp1254", CP_1254),
      ("latin1", ISO_8859_1)
     ]
 
diff --git a/src/compiler/GF/Text/CP1254.hs b/src/compiler/GF/Text/CP1254.hs
new file mode 100644
index 000000000..488359d70
--- /dev/null
+++ b/src/compiler/GF/Text/CP1254.hs
@@ -0,0 +1,84 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      : GF.Text.CP1254
+-- Maintainer  : Krasimir Angelov
+--
+-- cp1254 is a code page used under Microsoft Windows to write Turkish.
+-- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
+--
+-----------------------------------------------------------------------------
+
+module GF.Text.CP1254 where
+
+import Data.Char
+
+decodeCP1254 = map convert where
+  convert c
+   | c == '\x80'                = chr 0x20AC
+   | c == '\x82'                = chr 0x201A
+   | c == '\x83'                = chr 0x192
+   | c == '\x84'                = chr 0x201E
+   | c == '\x85'                = chr 0x2026
+   | c == '\x86'                = chr 0x2020
+   | c == '\x87'                = chr 0x2021
+   | c == '\x88'                = chr 0x2C6
+   | c == '\x89'                = chr 0x2030
+   | c == '\x8A'                = chr 0x160
+   | c == '\x8B'                = chr 0x2039
+   | c == '\x8C'                = chr 0x152
+   | c == '\x91'                = chr 0x2018
+   | c == '\x92'                = chr 0x2019
+   | c == '\x93'                = chr 0x201C
+   | c == '\x94'                = chr 0x201D
+   | c == '\x95'                = chr 0x2022
+   | c == '\x96'                = chr 0x2013
+   | c == '\x97'                = chr 0x2014
+   | c == '\x98'                = chr 0x2DC
+   | c == '\x99'                = chr 0x2122
+   | c == '\x9A'                = chr 0x161
+   | c == '\x9B'                = chr 0x203A
+   | c == '\x9C'                = chr 0x153
+   | c == '\x9F'                = chr 0x178
+   | c == '\xD0'                = chr 0x11E
+   | c == '\xDD'                = chr 0x130
+   | c == '\xDE'                = chr 0x15E
+   | c == '\xF0'                = chr 0x11F
+   | c == '\xFD'                = chr 0x131
+   | c == '\xFE'                = chr 0x15F
+   | otherwise                  = c
+
+encodeCP1254 = map convert where
+  convert c
+   | oc == 0x20AC               = '\x80'
+   | oc == 0x201A               = '\x82'
+   | oc == 0x192                = '\x83'
+   | oc == 0x201E               = '\x84'
+   | oc == 0x2026               = '\x85'
+   | oc == 0x2020               = '\x86'
+   | oc == 0x2021               = '\x87'
+   | oc == 0x2C6                = '\x88'
+   | oc == 0x2030               = '\x89'
+   | oc == 0x160                = '\x8A'
+   | oc == 0x2039               = '\x8B'
+   | oc == 0x152                = '\x8C'
+   | oc == 0x2018               = '\x91'
+   | oc == 0x2019               = '\x92'
+   | oc == 0x201C               = '\x93'
+   | oc == 0x201D               = '\x94'
+   | oc == 0x2022               = '\x95'
+   | oc == 0x2013               = '\x96'
+   | oc == 0x2014               = '\x97'
+   | oc == 0x2DC                = '\x98'
+   | oc == 0x2122               = '\x99'
+   | oc == 0x161                = '\x9A'
+   | oc == 0x203A               = '\x9B'
+   | oc == 0x153                = '\x9C'
+   | oc == 0x178                = '\x9F'
+   | oc == 0x11E                = '\xD0'
+   | oc == 0x130                = '\xDD'
+   | oc == 0x15E                = '\xDE'
+   | oc == 0x11F                = '\xF0'
+   | oc == 0x131                = '\xFD'
+   | oc == 0x15F                = '\xFE'
+   | otherwise                  = c
+   where oc = ord c
diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs
index e3cd7b0ea..3481b278d 100644
--- a/src/compiler/GF/Text/Coding.hs
+++ b/src/compiler/GF/Text/Coding.hs
@@ -5,12 +5,14 @@ import GF.Text.UTF8
 import GF.Text.CP1250
 import GF.Text.CP1251
 import GF.Text.CP1252
+import GF.Text.CP1254
 
 encodeUnicode e = case e of
   UTF_8   -> encodeUTF8
   CP_1250 -> encodeCP1250
   CP_1251 -> encodeCP1251
   CP_1252 -> encodeCP1252
+  CP_1254 -> encodeCP1254
   _       -> id
 
 decodeUnicode e = case e of
@@ -18,4 +20,5 @@ decodeUnicode e = case e of
   CP_1250 -> decodeCP1250
   CP_1251 -> decodeCP1251
   CP_1252 -> decodeCP1252
+  CP_1254 -> decodeCP1254
   _       -> id
author	krasimir <krasimir@chalmers.se>	2010-03-23 13:44:17 +0000
committer	krasimir <krasimir@chalmers.se>	2010-03-23 13:44:17 +0000
commit	1e51690b71f13c877b19230e70b3be95a154e3ac (patch)
tree	5a17a0b58a9c35958be427abc4fb89b979b5931e /src/compiler/GF
parent	850b897f088d7d94ee49f571f7b32494fbe53e4a (diff)