From c2bbdc8a162adb6e50c66a681b7df1c8fbed3f1e Mon Sep 17 00:00:00 2001 From: aarne Date: Sun, 15 Jun 2008 15:24:11 +0000 Subject: commands for displaying transliteration tables --- src-3.0/GF/Text/Lexing.hs | 3 +- src-3.0/GF/Text/Transliterations.hs | 87 +++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 src-3.0/GF/Text/Transliterations.hs (limited to 'src-3.0/GF/Text') diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs index 5ad2a69b7..16391d183 100644 --- a/src-3.0/GF/Text/Lexing.hs +++ b/src-3.0/GF/Text/Lexing.hs @@ -1,5 +1,6 @@ module GF.Text.Lexing (stringOp) where +import GF.Text.Transliterations import GF.Text.UTF8 import Data.Char @@ -19,7 +20,7 @@ stringOp name = case name of "unwords" -> Just $ appUnlexer unwords "to_utf8" -> Just encodeUTF8 "from_utf8" -> Just decodeUTF8 - _ -> Nothing + _ -> transliterate name appLexer :: (String -> [String]) -> String -> String appLexer f = unwords . filter (not . null) . f diff --git a/src-3.0/GF/Text/Transliterations.hs b/src-3.0/GF/Text/Transliterations.hs new file mode 100644 index 000000000..28f653dcf --- /dev/null +++ b/src-3.0/GF/Text/Transliterations.hs @@ -0,0 +1,87 @@ +module GF.Text.Transliterations (transliterate,transliteration,characterTable) where + +import GF.Text.UTF8 + +import Data.Char +import qualified Data.Map as Map + +transliterate :: String -> Maybe (String -> String) +transliterate s = case s of + 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t + 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t + _ -> Nothing + +transliteration :: String -> Maybe Transliteration +transliteration s = case s of + "devanagari" -> Just transDevanagari + "thai" -> Just transThai + _ -> Nothing + +characterTable :: Transliteration -> String +characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where + prOne (i,s) = unwords ["|", show i, "|", encodeUTF8 [toEnum i], "|", s, "|"] + +data Transliteration = Trans { + trans_to_unicode :: Map.Map String Int, + trans_from_unicode :: Map.Map Int String + } + +appTransToUnicode :: Transliteration -> String -> String +appTransToUnicode trans = + concat . + map (\c -> maybe c (return . toEnum) $ + Map.lookup c (trans_to_unicode trans) + ) . + unchar + +appTransFromUnicode :: Transliteration -> String -> String +appTransFromUnicode trans = + concat . + map (maybe "?" id . + flip Map.lookup (trans_from_unicode trans) + ) . + map fromEnum + + +-- conventions: +-- each character is either [letter] or [letter+nonletter] +-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations + +mkTransliteration :: [String] -> [Int] -> Transliteration +mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) + where + tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"] + uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"] + + +unchar :: String -> [String] +unchar s = case s of + c:d:cs + | isAlpha d -> [c] : unchar (d:cs) + | isSpace d -> [c] : unchar cs + | otherwise -> [c,d] : unchar cs + [_] -> [s] + _ -> [] + +transThai :: Transliteration +transThai = mkTransliteration allTrans allCodes where + allTrans = words $ + "- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++ + "t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++ + "p3 m y r - l - w s- s. s h l' O h' - " ++ + "a. a a: a+ i i: v v: u u: - - - - - - " ++ + "e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++ + "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - " + allCodes = [0x0e00 .. 0x0e7f] + +transDevanagari :: Transliteration +transDevanagari = mkTransliteration allTrans allCodes where + allTrans = words $ + "~ * - - " ++ + "a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++ + "k K g G N: c C j J n: t. T. d. D. n. t " ++ + "T d D n - p P b B m y r - l - - v " ++ + "S s. s h - - r. - A i I u U R - - " ++ + "- e E o O " + allCodes = [0x0901 .. 0x094c] + -- cgit v1.2.3