blob: e6c1f8003e9375a9f9cf246ae39d63dbb85ac22f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
----------------------------------------------------------------------
-- |
-- Module : Thai
-- Maintainer : (Maintainer)
-- Stability : (experimental)
-- Portability : (portable)
--
--
-- Thai transliteration and other alphabet information.
-----------------------------------------------------------------------------
-- AR 27/12/2006. Execute test2 to see the transliteration table.
module GF.Text.Thai (mkThai) where
import qualified Data.Map as Map
import Data.Char
-- for testing
import GF.Text.UTF8
import Data.List
mkThai :: String -> String
mkThai = concat . map mkThaiWord . words
type ThaiChar = Char
mkThaiWord :: String -> [ThaiChar]
mkThaiWord = map (toEnum . mkThaiChar) . unchar
mkThaiChar :: String -> Int
mkThaiChar c = maybe 0 id $ Map.lookup c thaiMap
thaiMap :: Map.Map String Int
thaiMap = Map.fromList $ zip allThaiTrans allThaiCodes
-- convert all string literals in a text
thaiStrings :: String -> String
thaiStrings s = case s of
'"':cs -> let (t,_:r) = span (/='"') cs in
'"':mkThai t ++ "\"" ++ thaiStrings r
c:cs -> c:thaiStrings cs
_ -> s
-- each character is either [letter] or [letter+nonletter]
unchar :: String -> [String]
unchar s = case s of
c:d:cs
| isAlpha d -> [c] : unchar (d:cs)
| d == '?' -> unchar cs -- use "o?" to represent implicit 'o'
| otherwise -> [c,d] : unchar cs
[_] -> [s]
_ -> []
allThaiTrans :: [String]
allThaiTrans = words $
"- k k1 - k2 - k3 g c c1 c2 s c3 y d t " ++
"t1 t2 t3 n d' t' t4 t5 t6 n b p p1 f p2 f' " ++
"p3 m y' r - l - w s' r' s- h l' O h' - " ++
"a a. a: a+ i i: v v: u u: - - - - - - " ++
"e e: o: a% a& L R M E T - - - - - - " ++
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
allThaiCodes :: [Int]
allThaiCodes = [0x0e00 .. 0x0e7f]
-- to test
test1 = testThai "k2wa:mrak"
test2 = putStrLn $ thaiTable
test3 = do
writeFile "thai.html" "<html><body><pre>"
appendFile "thai.html" thaiTable
appendFile "thai.html" "</pre></body></html>"
testThai :: String -> IO ()
testThai s = do
putStrLn $ encodeUTF8 $ mkThai s
putStrLn $ unwords $ map mkThaiPron $ words s
thaiFile :: FilePath -> Maybe FilePath -> IO ()
thaiFile f mo = do
s <- readFile f
let put = maybe putStr writeFile mo
put $ encodeUTF8 $ thaiStrings s
mkThaiPron = concat . render . unchar where
render s = case s of
[c] -> maybe c return (Map.lookup c thaiFinalMap): []
c:cs -> pronThai c : render cs
_ -> []
thaiFinalMap = Map.fromList $ zip allThaiTrans finals
thaiTable :: String
thaiTable = unlines [
"\t" ++
hex c ++ "\t" ++
encodeUTF8 (showThai s) ++ "\t" ++
s ++ "\t" ++
pronThai s ++ "\t" ++
[f] ++ "\t" ++
[q] ++ "\t"
|
(c,q,f,s) <- zip4 allThaiCodes heights finals allThaiTrans
]
showThai s = case s of
"-" -> "-"
--- v:_ | elem v "ivu" -> map (toEnum . mkThaiChar) ["O",s]
_ -> [toEnum $ mkThaiChar s]
pronThai s = case s of
[c,p]
| isUpper c && isDigit p -> [p]
| isDigit p -> c:"h"
| p==':' -> c:[c]
| elem p "%&" -> c:"y"
| p=='+' -> c:"m"
| otherwise -> [c]
[c] | isUpper c -> "" --- O
_ -> s
hex = map hx . reverse . digs where
digs 0 = [0]
digs n = n `mod` 16 : digs (n `div` 16)
hx d = "0123456789ABCDEF" !! d
heights :: String
finals :: String
heights =
" MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML" ++ replicate 99 ' '
finals =
" kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n--" ++ replicate 99 ' '
|