diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-15 19:40:53 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-15 19:40:53 +0000 |
| commit | ee4db2ee7dc06b31584a8b3eec73e6ff98e516fb (patch) | |
| tree | 811d80e32dcd8604994ab91a25161f1439caa170 /src-3.0 | |
| parent | 72b4ba963971f4a815f434b55a5fea0d82c2d4e3 (diff) | |
Thai in 1.4
Diffstat (limited to 'src-3.0')
| -rw-r--r-- | src-3.0/GF/Text/Transliterations.hs | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/src-3.0/GF/Text/Transliterations.hs b/src-3.0/GF/Text/Transliterations.hs index 05e10dc98..30c098df8 100644 --- a/src-3.0/GF/Text/Transliterations.hs +++ b/src-3.0/GF/Text/Transliterations.hs @@ -5,6 +5,18 @@ import GF.Text.UTF8 import Data.Char import qualified Data.Map as Map +-- transliterations between ASCII and a Unicode character set + +-- current transliterations: devanagari, thai + +-- to add a new one: define the Unicode range and the corresponding ASCII strings, +-- which may be one or two characters long + +-- conventions to be followed: +-- each character is either [letter] or [letter+nonletter] +-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations +-- characters can be invisible: ignored in translation to unicode + transliterate :: String -> Maybe (String -> String) transliterate s = case s of 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t @@ -45,11 +57,6 @@ appTransFromUnicode trans = map fromEnum --- conventions: --- each character is either [letter] or [letter+nonletter] --- when using a sparse range of unicodes, mark missing codes as "-" in transliterations --- characters can be invisible: ignored in translation to unicode - mkTransliteration :: [String] -> [Int] -> Transliteration mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) [] where @@ -60,9 +67,9 @@ mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip unchar :: String -> [String] unchar s = case s of c:d:cs - | isAlpha d -> [c] : unchar (d:cs) - | isSpace d -> [c] : unchar cs - | otherwise -> [c,d] : unchar cs + | isAlpha d -> [c] : unchar (d:cs) + | isSpace d -> [c]:[d]: unchar cs + | otherwise -> [c,d] : unchar cs [_] -> [s] _ -> [] |
