summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text/Arabic.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/Arabic.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/Arabic.hs')
-rw-r--r--src-3.0/GF/Text/Arabic.hs63
1 files changed, 63 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Arabic.hs b/src-3.0/GF/Text/Arabic.hs
new file mode 100644
index 000000000..c482b1172
--- /dev/null
+++ b/src-3.0/GF/Text/Arabic.hs
@@ -0,0 +1,63 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Arabic
+-- Maintainer : (Maintainer)
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:34 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.6 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.Text.Arabic (mkArabic) where
+
+mkArabic :: String -> String
+mkArabic = unwords . (map mkArabicWord) . words
+----mkArabic = reverse . unwords . (map mkArabicWord) . words
+--- reverse : assumes everything's on same line
+
+type ArabicChar = Char
+
+mkArabicWord :: String -> [ArabicChar]
+mkArabicWord = map mkArabicChar . getLetterPos
+
+getLetterPos :: String -> [(Char,Int)]
+getLetterPos [] = []
+getLetterPos ('I':cs) = ('*',7) : getLetterPos cs -- 0xfe80
+getLetterPos ('O':cs) = ('*',8) : getIn cs -- 0xfe8b
+getLetterPos ('l':'a':cs) = ('*',5) : getLetterPos cs -- 0xfefb
+getLetterPos [c] = [(c,1)] -- 1=isolated
+getLetterPos (c:cs) | isReduced c = (c,1) : getLetterPos cs
+getLetterPos (c:cs) = (c,3) : getIn cs -- 3=initial
+
+
+getIn [] = []
+getIn ('I':cs) = ('*',7) : getLetterPos cs -- 0xfe80
+getIn ('O':cs) = ('*',9) : getIn cs -- 0xfe8c
+getIn ('l':'a':cs) = ('*',6) : getLetterPos cs -- 0xfefc
+getIn [c] = [(c,2)] -- 2=final
+getIn (c:cs) | isReduced c = (c,2) : getLetterPos cs
+getIn (c:cs) = (c,4) : getIn cs -- 4=medial
+
+isReduced :: Char -> Bool
+isReduced c = c `elem` "UuWiYOaAdVrzwj"
+
+mkArabicChar ('*',p) | p > 4 && p < 10 =
+ (map toEnum [0xfefb,0xfefc,0xfe80,0xfe8b,0xfe8c]) !! (p-5)
+mkArabicChar cp@(c,p) = case lookup c cc of Just c' -> (c' !! (p-1)) ; _ -> c
+ where
+ cc = mkArabicTab allArabicCodes allArabic
+
+mkArabicTab (c:cs) as = (c,as1) : mkArabicTab cs as2 where
+ (as1,as2) = if isReduced c then splitAt 2 as else splitAt 4 as
+mkArabicTab [] _ = []
+
+allArabicCodes = "UuWiYOabAtvgHCdVrzscSDTZoxfqklmnhwjy"
+
+allArabic :: String
+allArabic = (map toEnum [0xfe81 .. 0xfef4]) -- I=0xfe80
+
+