diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-08-16 18:55:28 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-08-16 18:55:28 +0000 |
| commit | 66c927937ad8ba4055a30c87c4455186535804dd (patch) | |
| tree | c0008ba3a3fe2e1f494e7b4b0570a459f593c67b /src | |
| parent | ddbeff3028452751e4e840331d4ec425d83d552c (diff) | |
bronzeage grammar restored except for three ones with issues ; arabic transliteration added
Diffstat (limited to 'src')
| -rw-r--r-- | src/GF/Command/Commands.hs | 3 | ||||
| -rw-r--r-- | src/GF/Text/Transliterations.hs | 15 |
2 files changed, 16 insertions, 2 deletions
diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs index 29f0bd61a..fe3532354 100644 --- a/src/GF/Command/Commands.hs +++ b/src/GF/Command/Commands.hs @@ -445,6 +445,7 @@ allCommands enc pgf = Map.fromList [ let out = maybe "no such transliteration" characterTable $ transliteration t return $ fromString out, options = [ + ("arabic", "Arabic"), ("devanagari","Devanagari"), ("thai", "Thai") ] @@ -584,6 +585,7 @@ stringOpOptions = [ ("bind","bind tokens separated by Prelude.BIND, i.e. &+"), ("chars","lexer that makes every non-space character a token"), ("from_cp1251","decode from cp1251 (Cyrillic used in Bulgarian resource)"), + ("from_arabic","from unicode to GF Arabic transliteration"), ("from_devanagari","from unicode to GF Devanagari transliteration"), ("from_thai","from unicode to GF Thai transliteration"), ("from_utf8","decode from utf8"), @@ -591,6 +593,7 @@ stringOpOptions = [ ("lexcode","code-like lexer"), ("lexmixed","mixture of text and code (code between $...$)"), ("to_cp1251","encode to cp1251 (Cyrillic used in Bulgarian resource)"), + ("to_arabic","from GF Arabic transliteration to unicode"), ("to_devanagari","from GF Devanagari transliteration to unicode"), ("to_html","wrap in a html file with linebreaks"), ("to_thai","from GF Thai transliteration to unicode"), diff --git a/src/GF/Text/Transliterations.hs b/src/GF/Text/Transliterations.hs index 19adf68fa..f780de773 100644 --- a/src/GF/Text/Transliterations.hs +++ b/src/GF/Text/Transliterations.hs @@ -25,9 +25,10 @@ transliterate s = case s of transliteration :: String -> Maybe Transliteration transliteration s = case s of + "arabic" -> Just transArabic "devanagari" -> Just transDevanagari "thai" -> Just transThai - "urdu" -> Just transUrdu +---- "urdu" -> Just transUrdu _ -> Nothing characterTable :: Transliteration -> String @@ -101,5 +102,15 @@ allTransUrduHindi = words $ transUrdu :: Transliteration transUrdu = (mkTransliteration allTransUrduHindi allCodes){invisible_chars = ["a"]} where - allCodes = [0x0901 .. 0x094c] + allCodes = [0x0901 .. 0x094c] ---- TODO: this is devanagari + +transArabic :: Transliteration +transArabic = mkTransliteration allTrans allCodes where + allTrans = words $ + " V A: A? w? A- y? A b t. t v g H K d " ++ -- 0621 - 062f + "W r z s C S D T Z c G " ++ -- 0630 - 063a + " f q k l m n h w y. y a. u. i. a u " ++ -- 0641 - 064f + "i v2 o a: V+ V- i: a+" -- 0650 - 0657 + allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++ + [0x0641..0x064f] ++ [0x0650..0x0657] |
