summaryrefslogtreecommitdiff
path: root/src/compiler/GF
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2011-05-02 14:53:46 +0000
committeraarne <aarne@chalmers.se>2011-05-02 14:53:46 +0000
commit4ec34bdbb69a4c3f2238d9a42dd995f1bae13848 (patch)
tree8ac94e5139677a3ba0fad9cef7c15e4ad5a32c7a /src/compiler/GF
parentfd0fb48493063c849c8a3f7d359d0d2c6c66f4ef (diff)
transliteration via configuration file: ps -to=file or ps -from=file
Diffstat (limited to 'src/compiler/GF')
-rw-r--r--src/compiler/GF/Command/Commands.hs23
-rw-r--r--src/compiler/GF/Text/Transliterations.hs13
2 files changed, 31 insertions, 5 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index cef46516b..e7beec23e 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -628,14 +628,18 @@ allCommands env@(pgf, mos) = Map.fromList [
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal",
"rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8",
- "rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration"
+ "rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration",
+ "ps -to=chinese.trans \"abc\" -- apply transliteration defined in file chinese.trans"
],
- exec = \opts ->
- let (os,fs) = optsAndFlags opts in
- return . fromString . stringOps (envFlag fs) (map prOpt os) . toString,
+ exec = \opts x -> do
+ let (os,fs) = optsAndFlags opts
+ trans <- optTranslit opts
+ return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x),
options = stringOpOptions,
flags = [
- ("env","apply in this environment only")
+ ("env","apply in this environment only"),
+ ("from","backward-apply transliteration defined in this file (format 'unicode translit' per line)"),
+ ("to", "forward-apply transliteration defined in this file")
]
}),
("pt", emptyCommandInfo {
@@ -1100,6 +1104,15 @@ allCommands env@(pgf, mos) = Map.fromList [
probs <- readProbabilitiesFromFile file pgf
return (setProbabilities probs pgf)
+ optTranslit opts = case (valStrOpts "to" "" opts, valStrOpts "from" "" opts) of
+ ("","") -> return id
+ (file,"") -> do
+ src <- readFile file
+ return $ transliterateWithFile file src False
+ (_,file) -> do
+ src <- readFile file
+ return $ transliterateWithFile file src True
+
optFile opts = valStrOpts "file" "_gftmp" opts
optType opts =
diff --git a/src/compiler/GF/Text/Transliterations.hs b/src/compiler/GF/Text/Transliterations.hs
index 0cdd4fb0a..1ce101587 100644
--- a/src/compiler/GF/Text/Transliterations.hs
+++ b/src/compiler/GF/Text/Transliterations.hs
@@ -1,5 +1,6 @@
module GF.Text.Transliterations (
transliterate,
+ transliterateWithFile,
transliteration,
characterTable,
transliterationPrintNames
@@ -27,6 +28,10 @@ transliterate s = case s of
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
_ -> Nothing
+transliterateWithFile :: String -> String -> Bool -> (String -> String)
+transliterateWithFile name src isFrom =
+ (if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src)
+
transliteration :: String -> Maybe Transliteration
transliteration s = Map.lookup s allTransliterations
@@ -82,6 +87,14 @@ mkTransliteration name ts us =
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
+getTransliterationFile :: String -> String -> Transliteration
+getTransliterationFile name = uncurry (mkTransliteration name) . codes
+ where
+ codes = unzip . map (mkOne . words) . lines
+ mkOne ws = case ws of
+ [c]:t:_ -> (t,fromEnum c) -- รค a:
+ u:t:_ -> (t,read u) -- 228 a: OR 0xe4
+ _ -> error $ "not a valid transliteration:" ++ unwords ws
unchar :: String -> [String]
unchar s = case s of