diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
| commit | e9e80fc389365e24d4300d7d5390c7d833a96c50 (patch) | |
| tree | f0b58473adaa670bd8fc52ada419d8cad470ee03 /src-3.0/GF/Text/Transliterations.hs | |
| parent | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff) | |
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src-3.0/GF/Text/Transliterations.hs')
| -rw-r--r-- | src-3.0/GF/Text/Transliterations.hs | 97 |
1 files changed, 0 insertions, 97 deletions
diff --git a/src-3.0/GF/Text/Transliterations.hs b/src-3.0/GF/Text/Transliterations.hs deleted file mode 100644 index 30c098df8..000000000 --- a/src-3.0/GF/Text/Transliterations.hs +++ /dev/null @@ -1,97 +0,0 @@ -module GF.Text.Transliterations (transliterate,transliteration,characterTable) where - -import GF.Text.UTF8 - -import Data.Char -import qualified Data.Map as Map - --- transliterations between ASCII and a Unicode character set - --- current transliterations: devanagari, thai - --- to add a new one: define the Unicode range and the corresponding ASCII strings, --- which may be one or two characters long - --- conventions to be followed: --- each character is either [letter] or [letter+nonletter] --- when using a sparse range of unicodes, mark missing codes as "-" in transliterations --- characters can be invisible: ignored in translation to unicode - -transliterate :: String -> Maybe (String -> String) -transliterate s = case s of - 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t - 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t - _ -> Nothing - -transliteration :: String -> Maybe Transliteration -transliteration s = case s of - "devanagari" -> Just transDevanagari - "thai" -> Just transThai - _ -> Nothing - -characterTable :: Transliteration -> String -characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where - prOne (i,s) = unwords ["|", show i, "|", encodeUTF8 [toEnum i], "|", s, "|"] - -data Transliteration = Trans { - trans_to_unicode :: Map.Map String Int, - trans_from_unicode :: Map.Map Int String, - invisible_chars :: [String] - } - -appTransToUnicode :: Transliteration -> String -> String -appTransToUnicode trans = - concat . - map (\c -> maybe c (return . toEnum) $ - Map.lookup c (trans_to_unicode trans) - ) . - filter (flip notElem (invisible_chars trans)) . - unchar - -appTransFromUnicode :: Transliteration -> String -> String -appTransFromUnicode trans = - concat . - map (maybe "?" id . - flip Map.lookup (trans_from_unicode trans) - ) . - map fromEnum - - -mkTransliteration :: [String] -> [Int] -> Transliteration -mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) [] - where - tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"] - uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"] - - -unchar :: String -> [String] -unchar s = case s of - c:d:cs - | isAlpha d -> [c] : unchar (d:cs) - | isSpace d -> [c]:[d]: unchar cs - | otherwise -> [c,d] : unchar cs - [_] -> [s] - _ -> [] - -transThai :: Transliteration -transThai = mkTransliteration allTrans allCodes where - allTrans = words $ - "- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++ - "t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++ - "p3 m y r - l - w s- s. s h l' O h' - " ++ - "a. a a: a+ i i: v v: u u: - - - - - - " ++ - "e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++ - "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - " - allCodes = [0x0e00 .. 0x0e7f] - -transDevanagari :: Transliteration -transDevanagari = (mkTransliteration allTrans allCodes){invisible_chars = ["a"]} where - allTrans = words $ - "M N - - " ++ - "a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++ - "k K g G N: c C j J n: t. T. d. D. n. t " ++ - "T d D n - p P b B m y r - l - - v " ++ - "S s. s h - - r. - A i I u U R - - " ++ - "- e E o O " - allCodes = [0x0901 .. 0x094c] - |
