summaryrefslogtreecommitdiff
path: root/src/GF/Text
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-12-28 13:16:05 +0000
committeraarne <aarne@cs.chalmers.se>2006-12-28 13:16:05 +0000
commitcef20174f620357c932b59d544034e3c38054295 (patch)
tree542c2122fd3981013bc5f7bb4b3e9da0da41cb7f /src/GF/Text
parentf74d2e3928b683d25da19d0bb868ef97c6466295 (diff)
more thai transliteration; numeral thai grammars
Diffstat (limited to 'src/GF/Text')
-rw-r--r--src/GF/Text/Thai.hs31
1 files changed, 26 insertions, 5 deletions
diff --git a/src/GF/Text/Thai.hs b/src/GF/Text/Thai.hs
index 8e344cc06..7fede0676 100644
--- a/src/GF/Text/Thai.hs
+++ b/src/GF/Text/Thai.hs
@@ -22,7 +22,7 @@ import Data.List
mkThai :: String -> String
-mkThai = unwords . map mkThaiWord . words
+mkThai = concat . map mkThaiWord . words
type ThaiChar = Char
@@ -35,6 +35,15 @@ mkThaiChar c = maybe 0 id $ Map.lookup c thaiMap
thaiMap :: Map.Map String Int
thaiMap = Map.fromList $ zip allThaiTrans allThaiCodes
+-- convert all string literals in a text
+
+thaiStrings :: String -> String
+thaiStrings s = case s of
+ '"':cs -> let (t,_:r) = span (/='"') cs in
+ '"':mkThai t ++ "\"" ++ thaiStrings r
+ c:cs -> c:thaiStrings cs
+ _ -> s
+
-- each character is either [letter] or [letter+nonletter]
@@ -42,6 +51,7 @@ unchar :: String -> [String]
unchar s = case s of
c:d:cs
| isAlpha d -> [c] : unchar (d:cs)
+ | d == '?' -> unchar cs -- use "o?" to represent implicit 'o'
| otherwise -> [c,d] : unchar cs
[_] -> [s]
_ -> []
@@ -52,7 +62,9 @@ allThaiTrans = words $
"t1 t2 t3 n d' t' t4 t5 t6 n b p p1 f p2 f' " ++
"p3 m y' r - l - w s' r' s- h l' O h' - " ++
"a a. a: a+ i i: v v: u u: - - - - - - " ++
- "e e: o: a% a& "
+ "e e: o: a% a& L R M E T - - - - - - " ++
+ "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
+
allThaiCodes :: [Int]
allThaiCodes = [0x0e00 .. 0x0e7f]
@@ -73,6 +85,12 @@ testThai s = do
putStrLn $ encodeUTF8 $ mkThai s
putStrLn $ unwords $ map mkThaiPron $ words s
+thaiFile :: FilePath -> Maybe FilePath -> IO ()
+thaiFile f mo = do
+ s <- readFile f
+ let put = maybe putStr writeFile mo
+ put $ encodeUTF8 $ thaiStrings s
+
mkThaiPron = concat . render . unchar where
render s = case s of
[c] -> maybe c return (Map.lookup c thaiFinalMap): []
@@ -101,12 +119,13 @@ showThai s = case s of
pronThai s = case s of
[c,p]
+ | isUpper c && isDigit p -> show p
| isDigit p -> c:"h"
| p==':' -> c:[c]
| elem p "%&" -> c:"y"
| p=='+' -> c:"m"
| otherwise -> [c]
- "O" -> ""
+ [c] | isUpper c -> "" --- O
_ -> s
hex = map hx . reverse . digs where
@@ -116,5 +135,7 @@ hex = map hx . reverse . digs where
heights :: String
finals :: String
-heights = " MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML "
-finals = " kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n-- "
+heights =
+ " MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML" ++ replicate 99 ' '
+finals =
+ " kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n--" ++ replicate 99 ' '