diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Text/Unicode.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Text/Unicode.hs')
| -rw-r--r-- | src-3.0/GF/Text/Unicode.hs | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Unicode.hs b/src-3.0/GF/Text/Unicode.hs new file mode 100644 index 000000000..9d0b9d1a8 --- /dev/null +++ b/src-3.0/GF/Text/Unicode.hs @@ -0,0 +1,69 @@ +---------------------------------------------------------------------- +-- | +-- Module : Unicode +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:23:42 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.12 $ +-- +-- ad hoc Unicode conversions from different alphabets. +-- AR 12\/4\/2000, 18\/9\/2001, 30\/5\/2002, 26\/1\/2004 +----------------------------------------------------------------------------- + +module GF.Text.Unicode (mkUnicode, treat) where + +import GF.Text.Greek (mkGreek) +import GF.Text.Arabic (mkArabic) +import GF.Text.Hebrew (mkHebrew) +import GF.Text.Russian (mkRussian, mkRusKOI8) +import GF.Text.Ethiopic (mkEthiopic) +import GF.Text.Tamil (mkTamil) +import GF.Text.OCSCyrillic (mkOCSCyrillic) +import GF.Text.LatinASupplement (mkLatinASupplement) +import GF.Text.Devanagari (mkDevanagari) +import GF.Text.Hiragana (mkJapanese) +import GF.Text.ExtendedArabic (mkArabic0600) +import GF.Text.ExtendedArabic (mkExtendedArabic) +import GF.Text.ExtraDiacritics (mkExtraDiacritics) + +import Data.Char + +mkUnicode :: String -> String +mkUnicode s = case s of + '/':'/':cs -> treat [] mkGreek unic ++ mkUnicode rest + '/':'+':cs -> mkHebrew unic ++ mkUnicode rest + '/':'-':cs -> mkArabic unic ++ mkUnicode rest + '/':'_':cs -> treat [] mkRussian unic ++ mkUnicode rest + '/':'*':cs -> mkRusKOI8 unic ++ mkUnicode rest + '/':'E':cs -> mkEthiopic unic ++ mkUnicode rest + '/':'T':cs -> mkTamil unic ++ mkUnicode rest + '/':'C':cs -> mkOCSCyrillic unic ++ mkUnicode rest + '/':'&':cs -> mkDevanagari unic ++ mkUnicode rest + '/':'L':cs -> mkLatinASupplement unic ++ mkUnicode rest + '/':'J':cs -> mkJapanese unic ++ mkUnicode rest + '/':'6':cs -> mkArabic0600 unic ++ mkUnicode rest + '/':'A':cs -> mkExtendedArabic unic ++ mkUnicode rest + '/':'X':cs -> mkExtraDiacritics unic ++ mkUnicode rest + c:cs -> c:mkUnicode cs + _ -> s + where + (unic,rest) = remClosing [] $ dropWhile isSpace $ drop 2 s + remClosing u s = case s of + c:'/':s | elem c "/+-_*ETC&LJ6AX" -> (reverse u, s) --- end need not match + c:cs -> remClosing (c:u) cs + _ -> (reverse u,[]) -- forgiving missing end + +-- | don't convert XML tags --- assumes \<\> always means XML tags +treat :: String -> (String -> String) -> String -> String +treat old mk s = case s of + '<':cs -> mk (reverse old) ++ '<':noTreat cs + c:cs -> treat (c:old) mk cs + _ -> mk (reverse old) + where + noTreat s = case s of + '>':cs -> '>' : treat [] mk cs + c:cs -> c : noTreat cs + _ -> s |
