summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-01-07 22:49:55 +0000
committeraarne <aarne@cs.chalmers.se>2007-01-07 22:49:55 +0000
commitd18ccbf02ef5a7d9ea98775f3e64c10e0105c7f0 (patch)
treef7b4585d0f22f26a07e639c41ace923a5d0d9592
parent090bb304666457e8c13aadbd45924a7f80459ae7 (diff)
thai pron heuristic (not finished)
-rw-r--r--examples/numerals/thai.gf20
-rw-r--r--src/GF/Text/Thai.hs88
2 files changed, 93 insertions, 15 deletions
diff --git a/examples/numerals/thai.gf b/examples/numerals/thai.gf
index fcb2c7bcb..1ef77a8a7 100644
--- a/examples/numerals/thai.gf
+++ b/examples/numerals/thai.gf
@@ -18,13 +18,13 @@ lin
pot01 = mkNum "hnvg" "hnvg" "eOMd'" ;
- n2 = mkNum "s-Og" "y'i:E" "s-Og" ;
+ n2 = mkNum "s-Og" "y'i:T1" "s-Og" ;
n3 = regNum "s-a:m" ;
- n4 = regNum "s-i:E" ; -- E = E48 '
- n5 = regNum "hTa:" ; -- T = E49 9
+ n4 = regNum "s-i:T1" ; -- T1 = E48 '
+ n5 = regNum "hT2a:" ; -- T2 = E49 9
n6 = regNum "ho?k" ;
- n7 = regNum "ecMd'" ; -- M = E47 w
- n8 = regNum "e:pd'" ;
+ n7 = regNum "ecMd'" ; -- M = E47 w
+ n8 = regNum "e'pd'" ;
n9 = regNum "eka:" ;
@@ -33,12 +33,12 @@ lin
pot110 = {s = sip} ;
pot111 = {s = table {
Unit => ["s'ib et"] ;
- Thousand => ["hnvg hmv:En hnvg p2an"]
+ Thousand => ["hnvg hmv:T1n hnvg p2an"]
}
} ;
pot1to19 d = {s = table {
Unit => "s'ib" ++ d.s ! After ;
- Thousand => ["hnvg hmv:En"] ++ d.s ! Indep ++ "p2an"
+ Thousand => ["hnvg hmv:T1n"] ++ d.s ! Indep ++ "p2an"
}
} ;
pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ;
@@ -64,6 +64,6 @@ oper
mkNum x x x ;
- sip = table {Unit => "s'ib" ; Thousand => "hmv:En"} ;
- roy = table {Unit => "rTOy'" ; Thousand => "se:n"} ;
- phan = table {Unit => [] ; Thousand => "p2an"} ;
+ sip = table {Unit => "s'ib" ; Thousand => "hmv:T1n"} ;
+ roy = table {Unit => "rT2Oy'" ; Thousand => "e'sn"} ;
+ phan = table {Unit => [] ; Thousand => "p2an"} ;
diff --git a/src/GF/Text/Thai.hs b/src/GF/Text/Thai.hs
index e6c1f8003..5f4b0cc3f 100644
--- a/src/GF/Text/Thai.hs
+++ b/src/GF/Text/Thai.hs
@@ -27,7 +27,7 @@ mkThai = concat . map mkThaiWord . words
type ThaiChar = Char
mkThaiWord :: String -> [ThaiChar]
-mkThaiWord = map (toEnum . mkThaiChar) . unchar
+mkThaiWord = map (toEnum . mkThaiChar) . unchar . snd . pronAndOrth
mkThaiChar :: String -> Int
mkThaiChar c = maybe 0 id $ Map.lookup c thaiMap
@@ -56,13 +56,22 @@ unchar s = case s of
[_] -> [s]
_ -> []
+-- you can prefix transliteration by irregular phonology in []
+
+pronAndOrth :: String -> (Maybe String, String)
+pronAndOrth s = case s of
+ '[':cs -> case span (/=']') cs of
+ (p,_:o) -> (Just p,o)
+ _ -> (Nothing,s)
+ _ -> (Nothing,s)
+
allThaiTrans :: [String]
allThaiTrans = words $
"- k k1 - k2 - k3 g c c1 c2 s c3 y d t " ++
"t1 t2 t3 n d' t' t4 t5 t6 n b p p1 f p2 f' " ++
"p3 m y' r - l - w s' r' s- h l' O h' - " ++
"a a. a: a+ i i: v v: u u: - - - - - - " ++
- "e e: o: a% a& L R M E T - - - - - - " ++
+ "e e' o: a% a& L R M T1 T2 T3 T4 - - - - " ++
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
@@ -70,6 +79,61 @@ allThaiCodes :: [Int]
allThaiCodes = [0x0e00 .. 0x0e7f]
+-- derive the pronunciation of a syllable
+
+pronSyll :: [Int] -> String
+pronSyll s = cons1 ++ voc ++ cons2 where
+ voc = toned tone $ pronThaiChar vo
+
+ cons1 = concatMap pronThaiChar co1 ----
+ cons2 = mkThaiPron $ unwords $ map recodeThai co2 -- takes care of final ----
+
+ (vo,cc@(co1,co2)) = case s of
+ c:cs | initVowel c -> (c,getCons cs)
+ c1:c:c2 | internVowel c -> (c,([c1],getFinal c2))
+ c1:0x0e2d:c2 -> (0x0e42,([c1],getFinal c2))
+ c0:c1:c:c2 | cluster c0 c1 && internVowel c -> (c,([c0,c1],getFinal c2))
+ c0:c1:0x0e2d:c2 | cluster c0 c1 -> (0x0e42,([c0,c1],getFinal c2))
+ _ -> (0x0e42,getCons s) ---- "o"
+
+ getCons cs = case cs of
+ c0:c1:c2 | cluster c1 c1 -> ([c0,c1],getFinal c2)
+ c:c2 -> ([c],getFinal c2)
+
+ getFinal = snd . getToneFinal
+ toneMark = fst . getToneFinal
+
+ getToneFinal c = case c of
+ [ _,0x0e4c] -> ([], []) -- killer
+ [t,_,0x0e4c] -> ([t],[]) -- killer
+ _ -> splitAt (length c - 1) c
+
+ initVowel c = 0x0e40 <= c && c <= 0x0e44
+ internVowel c = 0x0e30 <= c && c <= 0x0e39
+
+ cluster c0 c1 =
+ c0 == 0x0e2b -- h
+ || c1 == 0x0e23 -- r
+ || c1 == 0x0e25 -- l
+ || c1 == 0x0e27 -- w
+
+ classC = case co1 of
+ _ -> "L" ----
+
+ lengthV = case vo of
+ _ -> False ----
+
+ liveness = case co2 of
+ _ -> False ----
+
+ tone = case (classC,lengthV,liveness,toneMark) of
+ _ -> ""
+
+ toned t v = t ++ v ----
+
+-- [0x0e00 .. 0x0e7f]
+
+
-- to test
test1 = testThai "k2wa:mrak"
@@ -91,14 +155,21 @@ thaiFile f mo = do
let put = maybe putStr writeFile mo
put $ encodeUTF8 $ thaiStrings s
-mkThaiPron = concat . render . unchar where
+mkThaiPron s = case fst $ pronAndOrth s of
+ Just p -> p
+ _ -> concat $ render $ unchar s
+ where
render s = case s of
- [c] -> maybe c return (Map.lookup c thaiFinalMap): []
+ [c] -> finalThai c : []
c:cs -> pronThai c : render cs
_ -> []
+finalThai c = maybe c return (Map.lookup c thaiFinalMap)
thaiFinalMap = Map.fromList $ zip allThaiTrans finals
+classThai c = maybe c return (Map.lookup c thaiClassMap)
+thaiClassMap = Map.fromList $ zip allThaiTrans heights
+
thaiTable :: String
thaiTable = unlines [
@@ -118,13 +189,20 @@ showThai s = case s of
--- v:_ | elem v "ivu" -> map (toEnum . mkThaiChar) ["O",s]
_ -> [toEnum $ mkThaiChar s]
+
+pronThaiChar = pronThai . recodeThai
+
+recodeThai c = allThaiTrans !! (c - 0x0e00)
+
pronThai s = case s of
[c,p]
- | isUpper c && isDigit p -> [p]
+ | c == 'N' && isDigit p -> [p]
+ | c == 'T' && isDigit p -> ['\'',p]
| isDigit p -> c:"h"
| p==':' -> c:[c]
| elem p "%&" -> c:"y"
| p=='+' -> c:"m"
+ | s == "e'" -> "รค"
| otherwise -> [c]
[c] | isUpper c -> "" --- O
_ -> s