diff options
| author | aarne <unknown> | 2003-09-22 13:16:55 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2003-09-22 13:16:55 +0000 |
| commit | b1402e8bd6a68a891b00a214d6cf184d66defe19 (patch) | |
| tree | 90372ac4e53dce91cf949dbf8e93be06f1d9e8bd /src/GF/Text/Arabic.hs | |
Founding the newly structured GF2.0 cvs archive.
Diffstat (limited to 'src/GF/Text/Arabic.hs')
| -rw-r--r-- | src/GF/Text/Arabic.hs | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/GF/Text/Arabic.hs b/src/GF/Text/Arabic.hs new file mode 100644 index 000000000..6df79c4a9 --- /dev/null +++ b/src/GF/Text/Arabic.hs @@ -0,0 +1,48 @@ +module Arabic where + +mkArabic :: String -> String +mkArabic = reverse . unwords . (map mkArabicWord) . words +--- reverse : assumes everything's on same line + +type ArabicChar = Char + +mkArabicWord :: String -> [ArabicChar] +mkArabicWord = map mkArabicChar . getLetterPos + +getLetterPos :: String -> [(Char,Int)] +getLetterPos [] = [] +getLetterPos ('I':cs) = ('*',7) : getLetterPos cs -- 0xfe80 +getLetterPos ('O':cs) = ('*',8) : getIn cs -- 0xfe8b +getLetterPos ('l':'a':cs) = ('*',5) : getLetterPos cs -- 0xfefb +getLetterPos [c] = [(c,1)] -- 1=isolated +getLetterPos (c:cs) | isReduced c = (c,1) : getLetterPos cs +getLetterPos (c:cs) = (c,3) : getIn cs -- 3=initial + + +getIn [] = [] +getIn ('I':cs) = ('*',7) : getLetterPos cs -- 0xfe80 +getIn ('O':cs) = ('*',9) : getIn cs -- 0xfe8c +getIn ('l':'a':cs) = ('*',6) : getLetterPos cs -- 0xfefc +getIn [c] = [(c,2)] -- 2=final +getIn (c:cs) | isReduced c = (c,2) : getLetterPos cs +getIn (c:cs) = (c,4) : getIn cs -- 4=medial + +isReduced :: Char -> Bool +isReduced c = c `elem` "UuWiYOaAdVrzwj" + +mkArabicChar ('*',p) | p > 4 && p < 10 = + (map toEnum [0xfefb,0xfefc,0xfe80,0xfe8b,0xfe8c]) !! (p-5) +mkArabicChar cp@(c,p) = case lookup c cc of Just c' -> (c' !! (p-1)) ; _ -> c + where + cc = mkArabicTab allArabicCodes allArabic + +mkArabicTab (c:cs) as = (c,as1) : mkArabicTab cs as2 where + (as1,as2) = if isReduced c then splitAt 2 as else splitAt 4 as +mkArabicTab [] _ = [] + +allArabicCodes = "UuWiYOabAtvgHCdVrzscSDTZoxfqklmnhwjy" + +allArabic :: String +allArabic = (map toEnum [0xfe81 .. 0xfef4]) -- I=0xfe80 + + |
