summaryrefslogtreecommitdiff
path: root/src/GF/Text/ExtendedArabic.hs
diff options
context:
space:
mode:
authoraarne <unknown>2003-11-14 12:37:54 +0000
committeraarne <unknown>2003-11-14 12:37:54 +0000
commit33c6309c43734f112e1bda2a960d3263fd3dfa40 (patch)
tree722f93b6ce26d1017f4fee7b6c3408a947a2c896 /src/GF/Text/ExtendedArabic.hs
parent5a7d6e542d7fc0c01bec9163e4be732ac1c6d217 (diff)
Harald's new codings.
Diffstat (limited to 'src/GF/Text/ExtendedArabic.hs')
-rw-r--r--src/GF/Text/ExtendedArabic.hs58
1 files changed, 58 insertions, 0 deletions
diff --git a/src/GF/Text/ExtendedArabic.hs b/src/GF/Text/ExtendedArabic.hs
new file mode 100644
index 000000000..d766abc67
--- /dev/null
+++ b/src/GF/Text/ExtendedArabic.hs
@@ -0,0 +1,58 @@
+module ExtendedArabic where
+
+mkExtendedArabic :: String -> String
+mkExtendedArabic = digraphWordToUnicode . adHocToDigraphWord
+
+adHocToDigraphWord :: String -> [(Char, Char)]
+adHocToDigraphWord str = case str of
+ [] -> []
+ '<' : cs -> ('\\', '<') : spoolMarkup cs
+ -- Sorani
+ 'W' : cs -> (':', 'w') : adHocToDigraphWord cs -- ?? Will do
+ 'E' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing!
+ 'j' : cs -> ('d', 'j') : adHocToDigraphWord cs
+ 'O' : cs -> ('v', 'w') : adHocToDigraphWord cs
+ 'F' : cs -> (' ', 'v') : adHocToDigraphWord cs
+ 'Z' : cs -> ('z', 'h') : adHocToDigraphWord cs
+ 'I' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing!
+ 'C' : cs -> ('c', 'h') : adHocToDigraphWord cs
+ -- Pashto
+ 'e' : cs -> (':', 'y') : adHocToDigraphWord cs
+ '$' : cs -> ('3', 'H') : adHocToDigraphWord cs
+ 'X' : cs -> ('s', '.') : adHocToDigraphWord cs
+ 'G' : cs -> ('z', '.') : adHocToDigraphWord cs
+ 'a' : cs -> (' ', 'A') : adHocToDigraphWord cs
+ 'P' : cs -> ('\'', 'H') : adHocToDigraphWord cs
+ 'R' : cs -> ('o', 'r') : adHocToDigraphWord cs
+ -- Shared
+ 'A' : cs -> (' ', 'h') : adHocToDigraphWord cs -- ?? Maybe to "t or 0x06d5
+ 'c' : cs -> ('s', 'h') : adHocToDigraphWord cs
+ c : cs -> (' ', c) : adHocToDigraphWord cs
+
+
+-- Beginning 0x621 up and including 0x06d1
+digraphedExtendedArabic = " '~A'A'w,A'i A b\"t tTHdj H X dDH r z ssh S D T Z 3GH__________ - f q k l m n h w i y&a&w&i/a/w/i/W/o/~/'/,/|/6/v_____________#0#1#2#3#4#5#6#7#8#9#%#,#'#*>b>q$|> A2'2,3'A'w'w&y'Tb:b:BoT3b p4b4B'H:H2H\"H3Hch4HTdod.dTD:d:D3d3D4dTrvror.rvRz.:rzh4zs.+s*S:S3S3T33>ff.f: v4f.q3q-k~kok.k3k3K gog:g:G3Gvl.l3l3L:n>nTnon3n?h4H't>Y\"Yow-wvwww|w^w:w3w>y/yvy.w:y3y____ -ae"
+
+digraphWordToUnicode = map digraphToUnicode
+
+digraphToUnicode :: (Char, Char) -> Char
+digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
+ where
+ cc = zip allExtendedArabicCodes allExtendedArabic
+
+allExtendedArabicCodes :: [(Char, Char)]
+allExtendedArabicCodes = mkPairs digraphedExtendedArabic
+
+allExtendedArabic :: String
+allExtendedArabic = (map toEnum [0x0621 .. 0x06d1])
+
+mkPairs :: String -> [(Char, Char)]
+mkPairs str = case str of
+ [] -> []
+ c1 : c2 : cs -> (c1, c2) : mkPairs cs
+
+spoolMarkup :: String -> [(Char, Char)]
+spoolMarkup s = case s of
+ [] -> [] -- Shouldn't happen
+ '>' : cs -> ('\\', '>') : adHocToDigraphWord cs
+ c1 : cs -> ('\\', c1) : spoolMarkup cs