summaryrefslogtreecommitdiff
path: root/src/GF/Text/Ethiopic.hs
blob: 81abbf719554c966068256a3e74447bab3b2ac2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
----------------------------------------------------------------------
-- |
-- Module      : Ethiopic
-- Maintainer  : HH
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:35 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- Ascii-Unicode decoding for Ethiopian.
-- Copyright (c) Harald Hammarström 2003 under Gnu General Public License
-----------------------------------------------------------------------------

module GF.Text.Ethiopic (mkEthiopic) where

mkEthiopic :: String -> String
mkEthiopic = digraphWordToUnicode . adHocToDigraphWord

-- mkEthiopic :: String -> String
-- mkEthiopic = reverse . unwords . (map (digraphWordToUnicode . adHocToDigraphWord)) . words
--- reverse : assumes everything's on same line

adHocToDigraphWord :: String -> [(Char, Int)]
adHocToDigraphWord str = case str of
  [] -> []
  '<' : cs -> ('<', -1) : spoolMarkup cs
  c1 : cs | isVowel c1 -> (')', vowelOrder c1) : adHocToDigraphWord cs
  -- c1 isn't a vowel
  c1 : cs | not (elem c1 allEthiopicCodes) -> (c1, -1) : adHocToDigraphWord cs
  c1 : c2 : cs | isVowel c2 -> (c1, vowelOrder c2) : adHocToDigraphWord cs
  c1 : cs -> (c1, 5) : adHocToDigraphWord cs 
  
spoolMarkup :: String -> [(Char, Int)]
spoolMarkup s = case s of
  -- [] -> [] -- Shouldn't happen
  '>' : cs -> ('>', -1) : adHocToDigraphWord cs  
  c1 : cs -> (c1, -1) : spoolMarkup cs
    
isVowel x = elem x "A\228ui\239aeoI"

vowelOrder :: Char -> Int
vowelOrder x = case x of 
  'A' -> 0
  '\228' -> 0 -- ä
  'u' -> 1
  'i' -> 2
  'a' -> 3
  'e' -> 4
  'I' -> 5
  '\239' -> 5 -- ï
  'o' -> 6
  c   -> 5 -- vowelless 

digraphWordToUnicode = map digraphToUnicode

digraphToUnicode :: (Char, Int) -> Char
-- digraphToUnicode (c1, c2) = c1
 
digraphToUnicode (c1, -1) = c1 
digraphToUnicode (c1, c2) = toEnum (0x1200 + c2 + 8*case lookup c1 cc of Just c' -> c') 
    where 
      cc = zip allEthiopicCodes allEthiopic

allEthiopic :: [Int]
allEthiopic = [0 .. 44] -- x 8

allEthiopicCodes = "hlHmLrs$KQ__bBtcxXnN)kW__w(zZyd_jgG_TCPSLfp"

-- Q = kW, X = xW, W = kW, G = gW