diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/Tamil.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/Tamil.hs')
| -rw-r--r-- | src-3.0/GF/Text/Tamil.hs | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Tamil.hs b/src-3.0/GF/Text/Tamil.hs new file mode 100644 index 000000000..8ee171acf --- /dev/null +++ b/src-3.0/GF/Text/Tamil.hs @@ -0,0 +1,77 @@ +---------------------------------------------------------------------- +-- | +-- Module : Tamil +-- Maintainer : (Maintainer) +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:40 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Text.Tamil (mkTamil) where + +mkTamil :: String -> String +mkTamil = digraphWordToUnicode . adHocToDigraphWord + +adHocToDigraphWord :: String -> [(Char, Char)] +adHocToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup cs + ' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space + +-- if c1 is a vowel + -- Two of the same vowel => lengthening + c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs + -- digraphed or long vowel + c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs + c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs + +-- c1 isn't a vowel + c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs + c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs + c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent + c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs + + c1 : cs -> (' ', c1) : (' ', '.') : adHocToDigraphWord cs -- vowelless + +isVowel x = elem x "aeiou:" +cap :: Char -> Char +cap x = case x of + 'a' -> 'A' + 'e' -> 'E' + 'i' -> 'I' + 'o' -> 'O' + 'u' -> 'U' + c -> c + +spoolMarkup :: String -> [(Char, Char)] +spoolMarkup s = case s of + -- [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : adHocToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup cs + +digraphWordToUnicode :: [(Char, Char)] -> String +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Char) -> Char +digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 + where + cc = zip allTamilCodes allTamil + +mkPairs :: String -> [(Char, Char)] +mkPairs str = case str of + [] -> [] + c1 : c2 : cs -> (c1, c2) : mkPairs cs + +allTamilCodes :: [(Char, Char)] +allTamilCodes = mkPairs digraphedTamil + +allTamil :: String +allTamil = (map toEnum [0x0b85 .. 0x0bfa]) + +digraphedTamil = " AA: II: UU:______ EE:AI__ OO:AU k______ G c__ j__ \241 T______ N t______ V n p______ m y r l L M v__ s S h________a: ii: uu:______ ee:ai__ oo:au .__________________ :______________________________#1#2#3#4#5#6#7#8#9^1^2^3=d=m=y=d=c==ru##" + |
