summaryrefslogtreecommitdiff
path: root/src/GF/Text/Thai.hs
blob: e6c1f8003e9375a9f9cf246ae39d63dbb85ac22f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
----------------------------------------------------------------------
-- |
-- Module      : Thai
-- Maintainer  : (Maintainer)
-- Stability   : (experimental)
-- Portability : (portable)
--
--
-- Thai transliteration and other alphabet information.
-----------------------------------------------------------------------------

-- AR 27/12/2006. Execute test2 to see the transliteration table.

module GF.Text.Thai (mkThai) where

import qualified Data.Map as Map
import Data.Char

-- for testing
import GF.Text.UTF8 
import Data.List


mkThai :: String -> String
mkThai = concat . map mkThaiWord . words

type ThaiChar = Char

mkThaiWord :: String -> [ThaiChar]
mkThaiWord = map (toEnum . mkThaiChar) . unchar

mkThaiChar :: String -> Int
mkThaiChar c = maybe 0 id $ Map.lookup c thaiMap

thaiMap :: Map.Map String Int
thaiMap = Map.fromList $ zip allThaiTrans allThaiCodes

-- convert all string literals in a text

thaiStrings :: String -> String
thaiStrings s = case s of
  '"':cs -> let (t,_:r) = span (/='"') cs in
            '"':mkThai t ++ "\"" ++ thaiStrings r
  c:cs -> c:thaiStrings cs
  _ -> s


-- each character is either [letter] or [letter+nonletter]

unchar :: String -> [String]
unchar s = case s of
  c:d:cs 
   | isAlpha d -> [c]   : unchar (d:cs)
   | d == '?'  ->         unchar cs -- use "o?" to represent implicit 'o'
   | otherwise -> [c,d] : unchar cs
  [_]          -> [s]
  _            -> []

allThaiTrans :: [String]
allThaiTrans = words $
  "-  k  k1 -  k2 -  k3 g  c  c1 c2 s  c3 y  d  t  " ++
  "t1 t2 t3 n  d' t' t4 t5 t6 n  b  p  p1 f  p2 f' " ++
  "p3 m  y' r  -  l  -  w  s' r' s- h  l' O  h' -  " ++
  "a  a. a: a+ i  i: v  v: u  u: -  -  -  -  -  -  " ++
  "e  e: o: a% a& L  R  M  E  T  -  -  -  -  -  -  " ++
  "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 -  -  -  -  -  -  "


allThaiCodes :: [Int]
allThaiCodes = [0x0e00 .. 0x0e7f]


-- to test

test1 = testThai "k2wa:mrak"
test2 = putStrLn $ thaiTable
test3 = do
  writeFile  "thai.html" "<html><body><pre>"
  appendFile "thai.html" thaiTable
  appendFile "thai.html" "</pre></body></html>"


testThai :: String -> IO ()
testThai s = do
  putStrLn $ encodeUTF8 $ mkThai s
  putStrLn $ unwords $ map mkThaiPron $ words s

thaiFile :: FilePath -> Maybe FilePath -> IO ()
thaiFile f mo = do
  s <- readFile f
  let put = maybe putStr writeFile mo
  put $ encodeUTF8 $ thaiStrings s

mkThaiPron = concat . render . unchar where
  render s = case s of
    [c]  -> maybe c return (Map.lookup c thaiFinalMap): []
    c:cs -> pronThai c : render cs
    _ -> []

thaiFinalMap = Map.fromList $ zip allThaiTrans finals


thaiTable :: String
thaiTable = unlines [
  "\t" ++ 
  hex c ++ "\t" ++ 
  encodeUTF8 (showThai s) ++ "\t" ++ 
  s ++ "\t" ++ 
  pronThai s ++ "\t" ++
  [f] ++ "\t" ++
  [q] ++ "\t"
    |
      (c,q,f,s) <- zip4 allThaiCodes heights finals allThaiTrans
  ]

showThai s = case s of
  "-" -> "-"
---  v:_ | elem v "ivu" -> map (toEnum . mkThaiChar) ["O",s] 
  _   -> [toEnum $ mkThaiChar s]

pronThai s = case s of
  [c,p]
    | isUpper c && isDigit p -> [p]
    | isDigit p   -> c:"h"
    | p==':'      -> c:[c]
    | elem p "%&" -> c:"y"
    | p=='+'      -> c:"m"
    | otherwise   -> [c]
  [c] | isUpper c -> "" --- O
  _ -> s

hex = map hx . reverse . digs where
  digs 0 = [0]
  digs n = n `mod` 16 : digs (n `div` 16)
  hx d = "0123456789ABCDEF" !! d

heights :: String
finals  :: String
heights = 
  " MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML" ++ replicate 99 ' '
finals  = 
  " kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n--" ++ replicate 99 ' '