diff options
| author | aarne <unknown> | 2004-01-28 12:42:20 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2004-01-28 12:42:20 +0000 |
| commit | 1ca54cca208876f83014119da55e747d5342b434 (patch) | |
| tree | 63413b08d9e26f3a55f82c2da9dcb0c5eef1d06d /src/GF/Text | |
| parent | 1b002dec90ce2b080301f54a8f84a91ca6669879 (diff) | |
Unicode. Batch transl into HTML.
Diffstat (limited to 'src/GF/Text')
| -rw-r--r-- | src/GF/Text/Arabic.hs | 3 | ||||
| -rw-r--r-- | src/GF/Text/Hebrew.hs | 3 | ||||
| -rw-r--r-- | src/GF/Text/Unicode.hs | 19 |
3 files changed, 20 insertions, 5 deletions
diff --git a/src/GF/Text/Arabic.hs b/src/GF/Text/Arabic.hs index 6df79c4a9..6882176eb 100644 --- a/src/GF/Text/Arabic.hs +++ b/src/GF/Text/Arabic.hs @@ -1,7 +1,8 @@ module Arabic where mkArabic :: String -> String -mkArabic = reverse . unwords . (map mkArabicWord) . words +mkArabic = unwords . (map mkArabicWord) . words +----mkArabic = reverse . unwords . (map mkArabicWord) . words --- reverse : assumes everything's on same line type ArabicChar = Char diff --git a/src/GF/Text/Hebrew.hs b/src/GF/Text/Hebrew.hs index b5a827518..5c163fbb8 100644 --- a/src/GF/Text/Hebrew.hs +++ b/src/GF/Text/Hebrew.hs @@ -1,7 +1,8 @@ module Hebrew where mkHebrew :: String -> String -mkHebrew = reverse . mkHebrewWord +mkHebrew = mkHebrewWord +----mkHebrew = reverse . mkHebrewWord --- reverse : assumes everything's on same line type HebrewChar = Char diff --git a/src/GF/Text/Unicode.hs b/src/GF/Text/Unicode.hs index 197759213..4d7da0c26 100644 --- a/src/GF/Text/Unicode.hs +++ b/src/GF/Text/Unicode.hs @@ -14,15 +14,17 @@ import ExtendedArabic (mkArabic0600) import ExtendedArabic (mkExtendedArabic) import ExtraDiacritics (mkExtraDiacritics) +import Char + -- ad hoc Unicode conversions from different alphabets -- AR 12/4/2000, 18/9/2001, 30/5/2002, 26/1/2004 mkUnicode s = case s of - '/':'/':cs -> mkGreek unic ++ mkUnicode rest + '/':'/':cs -> treat [] mkGreek unic ++ mkUnicode rest '/':'+':cs -> mkHebrew unic ++ mkUnicode rest '/':'-':cs -> mkArabic unic ++ mkUnicode rest - '/':'_':cs -> mkRussian unic ++ mkUnicode rest + '/':'_':cs -> treat [] mkRussian unic ++ mkUnicode rest '/':'*':cs -> mkRusKOI8 unic ++ mkUnicode rest '/':'E':cs -> mkEthiopic unic ++ mkUnicode rest '/':'T':cs -> mkTamil unic ++ mkUnicode rest @@ -36,8 +38,19 @@ mkUnicode s = case s of c:cs -> c:mkUnicode cs _ -> s where - (unic,rest) = remClosing [] $ drop 2 s + (unic,rest) = remClosing [] $ dropWhile isSpace $ drop 2 s remClosing u s = case s of c:'/':s | elem c "/+-_*ETC&LJ6AX" -> (reverse u, s) --- end need not match c:cs -> remClosing (c:u) cs _ -> (reverse u,[]) -- forgiving missing end + + -- don't convert XML tags --- assumes <> always means XML tags + treat old mk s = case s of + '<':cs -> mk (reverse old) ++ '<':noTreat cs + c:cs -> treat (c:old) mk cs + _ -> mk (reverse old) + where + noTreat s = case s of + '>':cs -> '>' : treat [] mk cs + c:cs -> c : noTreat cs + _ -> s |
