summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-15 15:24:11 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-15 15:24:11 +0000
commitc2bbdc8a162adb6e50c66a681b7df1c8fbed3f1e (patch)
tree5da1d5e3b7e7ee00619fd1bd9585f99ad06a84e1 /src-3.0/GF/Text
parent8c3111e36a039b2070e796821216d2ff59e09ee6 (diff)
commands for displaying transliteration tables
Diffstat (limited to 'src-3.0/GF/Text')
-rw-r--r--src-3.0/GF/Text/Lexing.hs3
-rw-r--r--src-3.0/GF/Text/Transliterations.hs87
2 files changed, 89 insertions, 1 deletions
diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs
index 5ad2a69b7..16391d183 100644
--- a/src-3.0/GF/Text/Lexing.hs
+++ b/src-3.0/GF/Text/Lexing.hs
@@ -1,5 +1,6 @@
module GF.Text.Lexing (stringOp) where
+import GF.Text.Transliterations
import GF.Text.UTF8
import Data.Char
@@ -19,7 +20,7 @@ stringOp name = case name of
"unwords" -> Just $ appUnlexer unwords
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
- _ -> Nothing
+ _ -> transliterate name
appLexer :: (String -> [String]) -> String -> String
appLexer f = unwords . filter (not . null) . f
diff --git a/src-3.0/GF/Text/Transliterations.hs b/src-3.0/GF/Text/Transliterations.hs
new file mode 100644
index 000000000..28f653dcf
--- /dev/null
+++ b/src-3.0/GF/Text/Transliterations.hs
@@ -0,0 +1,87 @@
+module GF.Text.Transliterations (transliterate,transliteration,characterTable) where
+
+import GF.Text.UTF8
+
+import Data.Char
+import qualified Data.Map as Map
+
+transliterate :: String -> Maybe (String -> String)
+transliterate s = case s of
+ 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t
+ 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
+ _ -> Nothing
+
+transliteration :: String -> Maybe Transliteration
+transliteration s = case s of
+ "devanagari" -> Just transDevanagari
+ "thai" -> Just transThai
+ _ -> Nothing
+
+characterTable :: Transliteration -> String
+characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where
+ prOne (i,s) = unwords ["|", show i, "|", encodeUTF8 [toEnum i], "|", s, "|"]
+
+data Transliteration = Trans {
+ trans_to_unicode :: Map.Map String Int,
+ trans_from_unicode :: Map.Map Int String
+ }
+
+appTransToUnicode :: Transliteration -> String -> String
+appTransToUnicode trans =
+ concat .
+ map (\c -> maybe c (return . toEnum) $
+ Map.lookup c (trans_to_unicode trans)
+ ) .
+ unchar
+
+appTransFromUnicode :: Transliteration -> String -> String
+appTransFromUnicode trans =
+ concat .
+ map (maybe "?" id .
+ flip Map.lookup (trans_from_unicode trans)
+ ) .
+ map fromEnum
+
+
+-- conventions:
+-- each character is either [letter] or [letter+nonletter]
+-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations
+
+mkTransliteration :: [String] -> [Int] -> Transliteration
+mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts))
+ where
+ tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
+ uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
+
+
+unchar :: String -> [String]
+unchar s = case s of
+ c:d:cs
+ | isAlpha d -> [c] : unchar (d:cs)
+ | isSpace d -> [c] : unchar cs
+ | otherwise -> [c,d] : unchar cs
+ [_] -> [s]
+ _ -> []
+
+transThai :: Transliteration
+transThai = mkTransliteration allTrans allCodes where
+ allTrans = words $
+ "- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++
+ "t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++
+ "p3 m y r - l - w s- s. s h l' O h' - " ++
+ "a. a a: a+ i i: v v: u u: - - - - - - " ++
+ "e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++
+ "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
+ allCodes = [0x0e00 .. 0x0e7f]
+
+transDevanagari :: Transliteration
+transDevanagari = mkTransliteration allTrans allCodes where
+ allTrans = words $
+ "~ * - - " ++
+ "a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++
+ "k K g G N: c C j J n: t. T. d. D. n. t " ++
+ "T d D n - p P b B m y r - l - - v " ++
+ "S s. s h - - r. - A i I u U R - - " ++
+ "- e E o O "
+ allCodes = [0x0901 .. 0x094c]
+