diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/ExtendedArabic.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/ExtendedArabic.hs')
| -rw-r--r-- | src-3.0/GF/Text/ExtendedArabic.hs | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/ExtendedArabic.hs b/src-3.0/GF/Text/ExtendedArabic.hs new file mode 100644 index 000000000..d2c5faac5 --- /dev/null +++ b/src-3.0/GF/Text/ExtendedArabic.hs @@ -0,0 +1,99 @@ +---------------------------------------------------------------------- +-- | +-- Module : ExtendedArabic +-- Maintainer : (Maintainer) +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:36 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Text.ExtendedArabic (mkArabic0600, mkExtendedArabic) where + +mkArabic0600 :: String -> String +mkArabic0600 = digraphWordToUnicode . aarnesToDigraphWord + +aarnesToDigraphWord :: String -> [(Char, Char)] +aarnesToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup2 cs + + 'v' : cs -> ('T', 'H') : aarnesToDigraphWord cs + 'a' : cs -> (' ', 'A') : aarnesToDigraphWord cs + 'o' : cs -> (' ', '3') : aarnesToDigraphWord cs + 'O' : cs -> ('\'', 'i') : aarnesToDigraphWord cs + + 'u' : cs -> ('\'', 'A') : aarnesToDigraphWord cs + 'C' : cs -> (' ', 'X') : aarnesToDigraphWord cs + + 'U' : cs -> ('~', 'A') : aarnesToDigraphWord cs + 'A' : cs -> ('"', 't') : aarnesToDigraphWord cs + 'c' : cs -> ('s', 'h') : aarnesToDigraphWord cs + c : cs -> (' ', c) : aarnesToDigraphWord cs + +mkExtendedArabic :: String -> String +mkExtendedArabic = digraphWordToUnicode . adHocToDigraphWord + +adHocToDigraphWord :: String -> [(Char, Char)] +adHocToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup cs + -- Sorani + 'W' : cs -> (':', 'w') : adHocToDigraphWord cs -- ?? Will do + 'E' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing! + 'j' : cs -> ('d', 'j') : adHocToDigraphWord cs + 'O' : cs -> ('v', 'w') : adHocToDigraphWord cs + 'F' : cs -> (' ', 'v') : adHocToDigraphWord cs + 'Z' : cs -> ('z', 'h') : adHocToDigraphWord cs + 'I' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing! + 'C' : cs -> ('c', 'h') : adHocToDigraphWord cs + -- Pashto + 'e' : cs -> (':', 'y') : adHocToDigraphWord cs + '$' : cs -> ('3', 'H') : adHocToDigraphWord cs + 'X' : cs -> ('s', '.') : adHocToDigraphWord cs + 'G' : cs -> ('z', '.') : adHocToDigraphWord cs + 'a' : cs -> (' ', 'A') : adHocToDigraphWord cs + 'P' : cs -> ('\'', 'H') : adHocToDigraphWord cs + 'R' : cs -> ('o', 'r') : adHocToDigraphWord cs + -- Shared + 'A' : cs -> (' ', 'h') : adHocToDigraphWord cs -- ?? Maybe to "t or 0x06d5 + 'c' : cs -> ('s', 'h') : adHocToDigraphWord cs + c : cs -> (' ', c) : adHocToDigraphWord cs + + +-- Beginning 0x621 up and including 0x06d1 +digraphedExtendedArabic = " '~A'A'w,A'i A b\"t tTHdj H X dDH r z ssh S D T Z 3GH__________ - f q k l m n h w i y&a&w&i/a/w/i/W/o/~/'/,/|/6/v_____________#0#1#2#3#4#5#6#7#8#9#%#,#'#*>b>q$|> A2'2,3'A'w'w&y'Tb:b:BoT3b p4b4B'H:H2H\"H3Hch4HTdod.dTD:d:D3d3D4dTrvror.rvRz.:rzh4zs.+s*S:S3S3T33>ff.f: v4f.q3q-k~kok.k3k3K gog:g:G3Gvl.l3l3L:n>nTnon3n?h4H't>Y\"Yow-wvwww|w^w:w3w>y/yvy.w:y3y____ -ae" + +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Char) -> Char +digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 + where + cc = zip allExtendedArabicCodes allExtendedArabic + +allExtendedArabicCodes :: [(Char, Char)] +allExtendedArabicCodes = mkPairs digraphedExtendedArabic + +allExtendedArabic :: String +allExtendedArabic = (map toEnum [0x0621 .. 0x06d1]) + +mkPairs :: String -> [(Char, Char)] +mkPairs str = case str of + [] -> [] + c1 : c2 : cs -> (c1, c2) : mkPairs cs + +spoolMarkup :: String -> [(Char, Char)] +spoolMarkup s = case s of + [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : adHocToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup cs + +spoolMarkup2 :: String -> [(Char, Char)] +spoolMarkup2 s = case s of + [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : aarnesToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup2 cs |
