diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/Arabic.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/Arabic.hs')
| -rw-r--r-- | src-3.0/GF/Text/Arabic.hs | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Arabic.hs b/src-3.0/GF/Text/Arabic.hs new file mode 100644 index 000000000..c482b1172 --- /dev/null +++ b/src-3.0/GF/Text/Arabic.hs @@ -0,0 +1,63 @@ +---------------------------------------------------------------------- +-- | +-- Module : Arabic +-- Maintainer : (Maintainer) +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:34 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Text.Arabic (mkArabic) where + +mkArabic :: String -> String +mkArabic = unwords . (map mkArabicWord) . words +----mkArabic = reverse . unwords . (map mkArabicWord) . words +--- reverse : assumes everything's on same line + +type ArabicChar = Char + +mkArabicWord :: String -> [ArabicChar] +mkArabicWord = map mkArabicChar . getLetterPos + +getLetterPos :: String -> [(Char,Int)] +getLetterPos [] = [] +getLetterPos ('I':cs) = ('*',7) : getLetterPos cs -- 0xfe80 +getLetterPos ('O':cs) = ('*',8) : getIn cs -- 0xfe8b +getLetterPos ('l':'a':cs) = ('*',5) : getLetterPos cs -- 0xfefb +getLetterPos [c] = [(c,1)] -- 1=isolated +getLetterPos (c:cs) | isReduced c = (c,1) : getLetterPos cs +getLetterPos (c:cs) = (c,3) : getIn cs -- 3=initial + + +getIn [] = [] +getIn ('I':cs) = ('*',7) : getLetterPos cs -- 0xfe80 +getIn ('O':cs) = ('*',9) : getIn cs -- 0xfe8c +getIn ('l':'a':cs) = ('*',6) : getLetterPos cs -- 0xfefc +getIn [c] = [(c,2)] -- 2=final +getIn (c:cs) | isReduced c = (c,2) : getLetterPos cs +getIn (c:cs) = (c,4) : getIn cs -- 4=medial + +isReduced :: Char -> Bool +isReduced c = c `elem` "UuWiYOaAdVrzwj" + +mkArabicChar ('*',p) | p > 4 && p < 10 = + (map toEnum [0xfefb,0xfefc,0xfe80,0xfe8b,0xfe8c]) !! (p-5) +mkArabicChar cp@(c,p) = case lookup c cc of Just c' -> (c' !! (p-1)) ; _ -> c + where + cc = mkArabicTab allArabicCodes allArabic + +mkArabicTab (c:cs) as = (c,as1) : mkArabicTab cs as2 where + (as1,as2) = if isReduced c then splitAt 2 as else splitAt 4 as +mkArabicTab [] _ = [] + +allArabicCodes = "UuWiYOabAtvgHCdVrzscSDTZoxfqklmnhwjy" + +allArabic :: String +allArabic = (map toEnum [0xfe81 .. 0xfef4]) -- I=0xfe80 + + |
