summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text/Devanagari.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/Devanagari.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/Devanagari.hs')
-rw-r--r--src-3.0/GF/Text/Devanagari.hs97
1 files changed, 97 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Devanagari.hs b/src-3.0/GF/Text/Devanagari.hs
new file mode 100644
index 000000000..bf4343cd0
--- /dev/null
+++ b/src-3.0/GF/Text/Devanagari.hs
@@ -0,0 +1,97 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Devanagari
+-- Maintainer : (Maintainer)
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:34 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.5 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.Text.Devanagari (mkDevanagari) where
+
+mkDevanagari :: String -> String
+mkDevanagari = digraphWordToUnicode . adHocToDigraphWord
+
+adHocToDigraphWord :: String -> [(Char, Char)]
+adHocToDigraphWord str = case str of
+ [] -> []
+ '<' : cs -> ('\\', '<') : spoolMarkup cs
+ ' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space
+
+-- if c1 is a vowel
+ -- Two of the same vowel => lengthening
+ c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs
+ -- digraphed or long vowel
+ c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs
+ c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs
+
+-- c1 isn't a vowel
+ -- c1 : 'a' : [] -> [(' ', c1)] -- a inherent
+ -- c1 : c2 : [] | isVowel c2 -> (' ', c1) : [(' ', c2)]
+
+ -- c1 is aspirated
+ c1 : 'H' : c2 : c3 : cs | c2 == c3 && isVowel c2 ->
+ (c1, 'H') : (c2, ':') : adHocToDigraphWord cs
+ c1 : 'H' : c2 : c3 : cs | isVowel c2 && isVowel c3 ->
+ (c1, 'H') : (c2, c3) : adHocToDigraphWord cs
+ c1 : 'H' : 'a' : cs -> (c1, 'H') : adHocToDigraphWord cs -- a inherent
+ c1 : 'H' : c2 : cs | isVowel c2 -> (c1, 'H') : (' ', c2) : adHocToDigraphWord cs
+ -- not vowelless at EOW
+ c1 : 'H' : ' ' : cs -> (c1, 'H') : ('\\', ' ') : adHocToDigraphWord cs
+ c1 : 'H' : [] -> [(c1, 'H')]
+ c1 : 'H' : cs -> (c1, 'H') : (' ', '^') : adHocToDigraphWord cs -- vowelless
+
+ -- c1 unasp.
+ c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs
+ c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs
+ c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent
+ c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs
+ -- not vowelless at EOW
+ c1 : ' ' : cs -> (' ', c1) : ('\\', ' '): adHocToDigraphWord cs
+ c1 : [] -> [(' ', c1)]
+ 'M' : cs -> (' ', 'M') : adHocToDigraphWord cs -- vowelless but no vowelless sign for anusvara
+ c1 : cs -> (' ', c1) : (' ', '^') : adHocToDigraphWord cs -- vowelless
+
+isVowel x = elem x "aeiou:"
+cap :: Char -> Char
+cap x = case x of
+ 'a' -> 'A'
+ 'e' -> 'E'
+ 'i' -> 'I'
+ 'o' -> 'O'
+ 'u' -> 'U'
+ c -> c
+
+spoolMarkup :: String -> [(Char, Char)]
+spoolMarkup s = case s of
+ -- [] -> [] -- Shouldn't happen
+ '>' : cs -> ('\\', '>') : adHocToDigraphWord cs
+ c1 : cs -> ('\\', c1) : spoolMarkup cs
+
+
+digraphWordToUnicode :: [(Char, Char)] -> String
+digraphWordToUnicode = map digraphToUnicode
+
+digraphToUnicode :: (Char, Char) -> Char
+digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
+ where
+ cc = zip allDevanagariCodes allDevanagari
+
+digraphedDevanagari = " ~ M ;__ AA: II: UU:RoLoEvE~ EE:AvA~ OAU kkH ggHNG ccH jjH \241 TTH DDH N ttH ddH nn. ppH bbH m y rr. l LL. v \231 S s h____ .-Sa: ii: uu:ror:eve~ eaiava~ oau ^____OM | -dddu______ Q X G zD.RH fy.R:L:mrmR#I#d#0#1#2#3#4#5#6#7#8#9#o"
+
+allDevanagariCodes :: [(Char, Char)]
+allDevanagariCodes = mkPairs digraphedDevanagari
+
+allDevanagari :: String
+allDevanagari = (map toEnum [0x0901 .. 0x0970])
+
+mkPairs :: String -> [(Char, Char)]
+mkPairs str = case str of
+ [] -> []
+ c1 : c2 : cs -> (c1, c2) : mkPairs cs
+