summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-01-08 16:12:27 +0000
committeraarne <aarne@cs.chalmers.se>2007-01-08 16:12:27 +0000
commit5ac9974085393d25cec8c0e1aab2ee537e0d971d (patch)
tree8afd97925e518d35ff7354aeffe9f81d2dc0bea8
parent998c20d09850a30a5b9142202645b6f6995a1818 (diff)
thai pronunciation better now
-rw-r--r--src/GF/Text/Thai.hs161
1 files changed, 103 insertions, 58 deletions
diff --git a/src/GF/Text/Thai.hs b/src/GF/Text/Thai.hs
index 5f4b0cc3f..b5e1f6b98 100644
--- a/src/GF/Text/Thai.hs
+++ b/src/GF/Text/Thai.hs
@@ -20,6 +20,8 @@ import Data.Char
import GF.Text.UTF8
import Data.List
+import Debug.Trace
+
mkThai :: String -> String
mkThai = concat . map mkThaiWord . words
@@ -79,59 +81,101 @@ allThaiCodes :: [Int]
allThaiCodes = [0x0e00 .. 0x0e7f]
--- derive the pronunciation of a syllable
-
-pronSyll :: [Int] -> String
-pronSyll s = cons1 ++ voc ++ cons2 where
- voc = toned tone $ pronThaiChar vo
-
- cons1 = concatMap pronThaiChar co1 ----
- cons2 = mkThaiPron $ unwords $ map recodeThai co2 -- takes care of final ----
-
- (vo,cc@(co1,co2)) = case s of
- c:cs | initVowel c -> (c,getCons cs)
- c1:c:c2 | internVowel c -> (c,([c1],getFinal c2))
- c1:0x0e2d:c2 -> (0x0e42,([c1],getFinal c2))
- c0:c1:c:c2 | cluster c0 c1 && internVowel c -> (c,([c0,c1],getFinal c2))
- c0:c1:0x0e2d:c2 | cluster c0 c1 -> (0x0e42,([c0,c1],getFinal c2))
- _ -> (0x0e42,getCons s) ---- "o"
-
- getCons cs = case cs of
- c0:c1:c2 | cluster c1 c1 -> ([c0,c1],getFinal c2)
- c:c2 -> ([c],getFinal c2)
+---------------------
+-- heuristic pronunciation of codes
+---------------------
- getFinal = snd . getToneFinal
- toneMark = fst . getToneFinal
+-- this works for one syllable
- getToneFinal c = case c of
- [ _,0x0e4c] -> ([], []) -- killer
- [t,_,0x0e4c] -> ([t],[]) -- killer
- _ -> splitAt (length c - 1) c
+mkPronSyllable s = pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
- initVowel c = 0x0e40 <= c && c <= 0x0e44
- internVowel c = 0x0e30 <= c && c <= 0x0e39
+data Syllable = Syll {
+ initv :: [Int],
+ initc :: [Int],
+ midv :: [Int],
+ finalc :: [Int],
+ finalv :: [Int],
+ tone :: [Int],
+ shorten :: Bool,
+ kill :: Bool
+ }
+ deriving Show
- cluster c0 c1 =
- c0 == 0x0e2b -- h
- || c1 == 0x0e23 -- r
- || c1 == 0x0e25 -- l
- || c1 == 0x0e27 -- w
+data Tone = TMid | TLow | THigh | TRise | TFall
+ deriving Show
- classC = case co1 of
- _ -> "L" ----
+data CClass = CLow | CMid | CHigh
+ deriving Show
- lengthV = case vo of
- _ -> False ----
+pronSyllable :: Syllable -> String
+pronSyllable s =
+ concatMap pronThaiChar (reverse $ initc s) ++
+ tonem ++
+ vowel ++
+ finalCons
+ -- concatMap pronThaiChar (reverse $ finalc s)
- liveness = case co2 of
- _ -> False ----
-
- tone = case (classC,lengthV,liveness,toneMark) of
- _ -> ""
-
- toned t v = t ++ v ----
+ where
--- [0x0e00 .. 0x0e7f]
+ vowel = case (initv s, midv s, finalv s, shorten s, tone s) of
+ (i,m,f,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
+
+ finalCons =
+ let (c,cs) = splitAt 1 $ finalc s
+ in
+ case c of
+ [] -> []
+ [k] -> concatMap pronThaiChar (reverse cs) ++ finalThai k
+
+ iclass = case take 1 (reverse $ initc s) of
+ [c] -> classThai c
+ [] -> CMid -- O
+
+ isLong = not (shorten s) && case vowel of
+ _:_:_ -> True ----
+ _ -> False
+
+ isLive = case finalCons of
+ c | elem c ["n","m","g"] -> True
+ "" -> isLong
+ _ -> False
+
+ tonem = case (iclass,isLive,isLong,tone s) of
+ (_,_,_, [0x0e4a]) -> tHigh
+ (_,_,_, [0x0e4b]) -> tRise
+ (CLow,_,_,[0x0e49]) -> tRise
+ (_,_,_, [0x0e49]) -> tFall
+ (CLow,_,_,[0x0e48]) -> tFall
+ (_, _,_,[0x0e48]) -> tLow
+ (CHigh,True,_,_) -> tRise
+ (_, True,_,_) -> tMid
+ (CLow,False,False,_) -> tHigh
+ (CLow,False,_,_) -> tFall
+ _ -> tLow
+
+(tMid,tHigh,tLow,tRise,tFall) = ("-","'","`","~","^")
+
+isVowel c = 0x0e30 <= c && c <= 0x0e44 ----
+isCons c = 0x0e01 <= c && c <= 0x0e2f ----
+isTone c = 0x0e48 <= c && c <= 0x0e4b
+
+getSyllable :: [Int] -> Syllable
+getSyllable = foldl get (Syll [] [] [] [] [] [] False False) where
+ get syll c = case c of
+ 0x0e47 -> syll {shorten = True}
+ 0x0e4c -> syll {kill = True, finalc = tail (finalc syll)} --- always last
+ 0x0e2d
+ | null (initc syll) -> syll {initc = [c]} -- "O"
+ | otherwise -> syll {midv = c : midv syll}
+ _
+ | isVowel c -> if null (initc syll)
+ then syll {initv = c : initv syll}
+ else syll {midv = c : midv syll}
+ | isCons c -> if null (midv syll)
+ then syll {initc = c : initc syll}
+ else syll {finalc = c : finalc syll}
+ | isTone c -> syll {tone = [c]}
+ _ -> syll ---- check this
-- to test
@@ -147,7 +191,12 @@ test3 = do
testThai :: String -> IO ()
testThai s = do
putStrLn $ encodeUTF8 $ mkThai s
- putStrLn $ unwords $ map mkThaiPron $ words s
+ putStrLn $ unwords $ map mkPronSyllable $ words s
+
+testSyllable s =
+ let y = getSyllable $ map mkThaiChar $ unchar s
+ in
+ putStrLn $ pronSyllable $ trace (show y) y
thaiFile :: FilePath -> Maybe FilePath -> IO ()
thaiFile f mo = do
@@ -155,20 +204,16 @@ thaiFile f mo = do
let put = maybe putStr writeFile mo
put $ encodeUTF8 $ thaiStrings s
-mkThaiPron s = case fst $ pronAndOrth s of
- Just p -> p
- _ -> concat $ render $ unchar s
- where
- render s = case s of
- [c] -> finalThai c : []
- c:cs -> pronThai c : render cs
- _ -> []
+finalThai c = maybe "" return (Map.lookup c thaiFinalMap)
+thaiFinalMap = Map.fromList $ zip allThaiCodes finals
-finalThai c = maybe c return (Map.lookup c thaiFinalMap)
-thaiFinalMap = Map.fromList $ zip allThaiTrans finals
+classThai c = maybe CLow readClass (Map.lookup c thaiClassMap)
+thaiClassMap = Map.fromList $ zip allThaiCodes heights
-classThai c = maybe c return (Map.lookup c thaiClassMap)
-thaiClassMap = Map.fromList $ zip allThaiTrans heights
+readClass s = case s of
+ 'L' -> CLow
+ 'M' -> CMid
+ 'H' -> CHigh
thaiTable :: String