summaryrefslogtreecommitdiff
path: root/src/GF/Text
diff options
context:
space:
mode:
authoraarne <unknown>2004-01-28 12:42:20 +0000
committeraarne <unknown>2004-01-28 12:42:20 +0000
commit1ca54cca208876f83014119da55e747d5342b434 (patch)
tree63413b08d9e26f3a55f82c2da9dcb0c5eef1d06d /src/GF/Text
parent1b002dec90ce2b080301f54a8f84a91ca6669879 (diff)
Unicode. Batch transl into HTML.
Diffstat (limited to 'src/GF/Text')
-rw-r--r--src/GF/Text/Arabic.hs3
-rw-r--r--src/GF/Text/Hebrew.hs3
-rw-r--r--src/GF/Text/Unicode.hs19
3 files changed, 20 insertions, 5 deletions
diff --git a/src/GF/Text/Arabic.hs b/src/GF/Text/Arabic.hs
index 6df79c4a9..6882176eb 100644
--- a/src/GF/Text/Arabic.hs
+++ b/src/GF/Text/Arabic.hs
@@ -1,7 +1,8 @@
module Arabic where
mkArabic :: String -> String
-mkArabic = reverse . unwords . (map mkArabicWord) . words
+mkArabic = unwords . (map mkArabicWord) . words
+----mkArabic = reverse . unwords . (map mkArabicWord) . words
--- reverse : assumes everything's on same line
type ArabicChar = Char
diff --git a/src/GF/Text/Hebrew.hs b/src/GF/Text/Hebrew.hs
index b5a827518..5c163fbb8 100644
--- a/src/GF/Text/Hebrew.hs
+++ b/src/GF/Text/Hebrew.hs
@@ -1,7 +1,8 @@
module Hebrew where
mkHebrew :: String -> String
-mkHebrew = reverse . mkHebrewWord
+mkHebrew = mkHebrewWord
+----mkHebrew = reverse . mkHebrewWord
--- reverse : assumes everything's on same line
type HebrewChar = Char
diff --git a/src/GF/Text/Unicode.hs b/src/GF/Text/Unicode.hs
index 197759213..4d7da0c26 100644
--- a/src/GF/Text/Unicode.hs
+++ b/src/GF/Text/Unicode.hs
@@ -14,15 +14,17 @@ import ExtendedArabic (mkArabic0600)
import ExtendedArabic (mkExtendedArabic)
import ExtraDiacritics (mkExtraDiacritics)
+import Char
+
-- ad hoc Unicode conversions from different alphabets
-- AR 12/4/2000, 18/9/2001, 30/5/2002, 26/1/2004
mkUnicode s = case s of
- '/':'/':cs -> mkGreek unic ++ mkUnicode rest
+ '/':'/':cs -> treat [] mkGreek unic ++ mkUnicode rest
'/':'+':cs -> mkHebrew unic ++ mkUnicode rest
'/':'-':cs -> mkArabic unic ++ mkUnicode rest
- '/':'_':cs -> mkRussian unic ++ mkUnicode rest
+ '/':'_':cs -> treat [] mkRussian unic ++ mkUnicode rest
'/':'*':cs -> mkRusKOI8 unic ++ mkUnicode rest
'/':'E':cs -> mkEthiopic unic ++ mkUnicode rest
'/':'T':cs -> mkTamil unic ++ mkUnicode rest
@@ -36,8 +38,19 @@ mkUnicode s = case s of
c:cs -> c:mkUnicode cs
_ -> s
where
- (unic,rest) = remClosing [] $ drop 2 s
+ (unic,rest) = remClosing [] $ dropWhile isSpace $ drop 2 s
remClosing u s = case s of
c:'/':s | elem c "/+-_*ETC&LJ6AX" -> (reverse u, s) --- end need not match
c:cs -> remClosing (c:u) cs
_ -> (reverse u,[]) -- forgiving missing end
+
+ -- don't convert XML tags --- assumes <> always means XML tags
+ treat old mk s = case s of
+ '<':cs -> mk (reverse old) ++ '<':noTreat cs
+ c:cs -> treat (c:old) mk cs
+ _ -> mk (reverse old)
+ where
+ noTreat s = case s of
+ '>':cs -> '>' : treat [] mk cs
+ c:cs -> c : noTreat cs
+ _ -> s