summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text/Russian.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/Russian.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/Russian.hs')
-rw-r--r--src-3.0/GF/Text/Russian.hs56
1 files changed, 56 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Russian.hs b/src-3.0/GF/Text/Russian.hs
new file mode 100644
index 000000000..c4f1bfd89
--- /dev/null
+++ b/src-3.0/GF/Text/Russian.hs
@@ -0,0 +1,56 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Russian
+-- Maintainer : (Maintainer)
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:40 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.5 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.Text.Russian (mkRussian, mkRusKOI8) where
+
+-- | an ad hoc ASCII encoding. Delimiters: @\/_ _\/@
+mkRussian :: String -> String
+mkRussian = unwords . (map mkRussianWord) . words
+
+-- | the KOI8 encoding, incomplete. Delimiters: @\/* *\/@
+mkRusKOI8 :: String -> String
+mkRusKOI8 = unwords . (map mkRussianKOI8) . words
+
+type RussianChar = Char
+
+mkRussianWord :: String -> [RussianChar]
+mkRussianWord = map (mkRussianChar allRussianCodes)
+
+mkRussianKOI8 :: String -> [RussianChar]
+mkRussianKOI8 = map (mkRussianChar allRussianKOI8)
+
+mkRussianChar chars c = case lookup c cc of Just c' -> c' ; _ -> c
+ where
+ cc = zip chars allRussian
+
+allRussianCodes :: [Char]
+allRussianCodes =
+ -- changed to Ints to work with Haskell compilers e.g. GHC 6.5 CVS
+ -- which expect source files to be in UTF-8
+ -- /bringert 2006-05-19
+ -- "ÅåABVGDEXZIJKLMNOPRSTUFHCQW£}!*ÖYÄabvgdexzijklmnoprstufhcqw#01'öyä"
+ map toEnum [197,229,65,66,86,71,68,69,88,90,73,74,75,76,77,78,79,80,82,83,84,85,70,72,67,81,87,163,125,33,42,214,89,196,97,98,118,103,100,101,120,122,105,106,107,108,109,110,111,112,114,115,116,117,102,104,99,113,119,35,48,49,39,246,121,228]
+
+allRussianKOI8 :: [Char]
+allRussianKOI8 =
+ -- changed to Ints to work with Haskell compilers e.g. GHC 6.5 CVS
+ -- which expect source files to be in UTF-8
+ -- /bringert 2006-05-19
+ -- "^@áâ÷çäåöúéêëìíîïðòóôõæèãþûýøùÿüàñÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝØÙßÜÀÑ"
+ map toEnum [94,64,225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,242,243,244,245,230,232,227,254,251,253,248,249,255,252,224,241,193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,210,211,212,213,198,200,195,222,219,221,216,217,223,220,192,209]
+
+allRussian :: String
+allRussian = (map toEnum (0x0401:0x0451:[0x0410 .. 0x044f])) -- Ëë in odd places
+
+