summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text/Transliterations.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:54:35 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:54:35 +0000
commite9e80fc389365e24d4300d7d5390c7d833a96c50 (patch)
treef0b58473adaa670bd8fc52ada419d8cad470ee03 /src-3.0/GF/Text/Transliterations.hs
parentb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff)
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src-3.0/GF/Text/Transliterations.hs')
-rw-r--r--src-3.0/GF/Text/Transliterations.hs97
1 files changed, 0 insertions, 97 deletions
diff --git a/src-3.0/GF/Text/Transliterations.hs b/src-3.0/GF/Text/Transliterations.hs
deleted file mode 100644
index 30c098df8..000000000
--- a/src-3.0/GF/Text/Transliterations.hs
+++ /dev/null
@@ -1,97 +0,0 @@
-module GF.Text.Transliterations (transliterate,transliteration,characterTable) where
-
-import GF.Text.UTF8
-
-import Data.Char
-import qualified Data.Map as Map
-
--- transliterations between ASCII and a Unicode character set
-
--- current transliterations: devanagari, thai
-
--- to add a new one: define the Unicode range and the corresponding ASCII strings,
--- which may be one or two characters long
-
--- conventions to be followed:
--- each character is either [letter] or [letter+nonletter]
--- when using a sparse range of unicodes, mark missing codes as "-" in transliterations
--- characters can be invisible: ignored in translation to unicode
-
-transliterate :: String -> Maybe (String -> String)
-transliterate s = case s of
- 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t
- 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
- _ -> Nothing
-
-transliteration :: String -> Maybe Transliteration
-transliteration s = case s of
- "devanagari" -> Just transDevanagari
- "thai" -> Just transThai
- _ -> Nothing
-
-characterTable :: Transliteration -> String
-characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where
- prOne (i,s) = unwords ["|", show i, "|", encodeUTF8 [toEnum i], "|", s, "|"]
-
-data Transliteration = Trans {
- trans_to_unicode :: Map.Map String Int,
- trans_from_unicode :: Map.Map Int String,
- invisible_chars :: [String]
- }
-
-appTransToUnicode :: Transliteration -> String -> String
-appTransToUnicode trans =
- concat .
- map (\c -> maybe c (return . toEnum) $
- Map.lookup c (trans_to_unicode trans)
- ) .
- filter (flip notElem (invisible_chars trans)) .
- unchar
-
-appTransFromUnicode :: Transliteration -> String -> String
-appTransFromUnicode trans =
- concat .
- map (maybe "?" id .
- flip Map.lookup (trans_from_unicode trans)
- ) .
- map fromEnum
-
-
-mkTransliteration :: [String] -> [Int] -> Transliteration
-mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) []
- where
- tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
- uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
-
-
-unchar :: String -> [String]
-unchar s = case s of
- c:d:cs
- | isAlpha d -> [c] : unchar (d:cs)
- | isSpace d -> [c]:[d]: unchar cs
- | otherwise -> [c,d] : unchar cs
- [_] -> [s]
- _ -> []
-
-transThai :: Transliteration
-transThai = mkTransliteration allTrans allCodes where
- allTrans = words $
- "- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++
- "t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++
- "p3 m y r - l - w s- s. s h l' O h' - " ++
- "a. a a: a+ i i: v v: u u: - - - - - - " ++
- "e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++
- "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
- allCodes = [0x0e00 .. 0x0e7f]
-
-transDevanagari :: Transliteration
-transDevanagari = (mkTransliteration allTrans allCodes){invisible_chars = ["a"]} where
- allTrans = words $
- "M N - - " ++
- "a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++
- "k K g G N: c C j J n: t. T. d. D. n. t " ++
- "T d D n - p P b B m y r - l - - v " ++
- "S s. s h - - r. - A i I u U R - - " ++
- "- e E o O "
- allCodes = [0x0901 .. 0x094c]
-