summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src-3.0/GF/Command/Abstract.hs4
-rw-r--r--src-3.0/GF/Command/Commands.hs23
-rw-r--r--src-3.0/GF/Infra/Option.hs8
-rw-r--r--src-3.0/GF/Text/Lexing.hs16
4 files changed, 41 insertions, 10 deletions
diff --git a/src-3.0/GF/Command/Abstract.hs b/src-3.0/GF/Command/Abstract.hs
index 23f76fa82..16905c2f9 100644
--- a/src-3.0/GF/Command/Abstract.hs
+++ b/src-3.0/GF/Command/Abstract.hs
@@ -60,4 +60,8 @@ isFlag o opts = elem o [x | OFlag x _ <- opts]
prOpt :: Option -> String
prOpt (OOpt i) = i ----
+mkOpt :: String -> Option
+mkOpt = OOpt
+
+
diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs
index 68e2c5526..b5ba99f6f 100644
--- a/src-3.0/GF/Command/Commands.hs
+++ b/src-3.0/GF/Command/Commands.hs
@@ -32,6 +32,8 @@ import Data.Maybe
import qualified Data.Map as Map
import System.Cmd
+import Debug.Trace
+
type CommandOutput = ([Tree],String) ---- errors, etc
data CommandInfo = CommandInfo {
@@ -343,7 +345,7 @@ allCommands pgf = Map.fromList [
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal",
"ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
],
- exec = \opts -> return . fromString . stringOps opts . toString,
+ exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString,
options = stringOpOptions
}),
("q", emptyCommandInfo {
@@ -497,11 +499,18 @@ allCommands pgf = Map.fromList [
(abstractName pgf ++ ": " ++ showTree t) :
[lang ++ ": " ++ linear opts lang t | lang <- optLangs opts]
- unlex opts lang = stringOps (exceptUTF8 opts) where
- exceptUTF8 = if isUTF8 then filter ((/="to_UTF8") . prOpt) else id
- isUTF8 = case lookFlag pgf lang "coding" of
- Just "utf8" -> True
- _ -> False
+-- logic of coding in unlexing:
+-- - If lang has no coding flag, or -to_utf8 is not in opts, just opts are used.
+-- - If lang has flag coding=utf8, -to_utf8 is ignored.
+-- - If lang has coding=other, and -to_utf8 is in opts, from_other is applied first.
+
+ unlex opts lang = {- trace (unwords optsC) $ -} stringOps optsC where
+ optsC = case lookFlag pgf lang "coding" of
+ Just "utf8" -> filter (/="to_utf8") $ map prOpt opts
+ Just other | isOpt "to_utf8" opts ->
+ let cod = ("from_" ++ other)
+ in cod : filter (/=cod) (map prOpt opts)
+ _ -> map prOpt opts
optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf
@@ -536,7 +545,7 @@ allCommands pgf = Map.fromList [
[lookupMorpho (buildMorpho pgf (mkCId la)) s | la <- optLangs opts]
-- ps -f -g s returns g (f s)
- stringOps opts s = foldr app s (reverse (map prOpt opts)) where
+ stringOps opts s = foldr app s (reverse opts) where
app f = maybe id id (stringOp f)
stringOpOptions = [
diff --git a/src-3.0/GF/Infra/Option.hs b/src-3.0/GF/Infra/Option.hs
index 44d4adfa5..6c9d3550b 100644
--- a/src-3.0/GF/Infra/Option.hs
+++ b/src-3.0/GF/Infra/Option.hs
@@ -76,7 +76,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug
data Phase = Preproc | Convert | Compile | Link
deriving (Show,Eq,Ord)
-data Encoding = UTF_8 | ISO_8859_1
+data Encoding = UTF_8 | ISO_8859_1 | CP_1251
deriving (Show,Eq,Ord)
data OutputFormat = FmtPGF
@@ -469,7 +469,9 @@ optimizationPackages =
encodings :: [(String,Encoding)]
encodings =
[("utf8", UTF_8),
- ("latin1", ISO_8859_1)]
+ ("cp1251", CP_1251),
+ ("latin1", ISO_8859_1)
+ ]
lookupShow :: Eq a => [(String,a)] -> a -> String
lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]
@@ -542,4 +544,4 @@ instance Functor OptDescr where
instance Functor ArgDescr where
fmap f (NoArg x) = NoArg (f x)
fmap f (ReqArg g s) = ReqArg (f . g) s
- fmap f (OptArg g s) = OptArg (f . g) s \ No newline at end of file
+ fmap f (OptArg g s) = OptArg (f . g) s
diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs
index 1ac2eb498..2c6b417b8 100644
--- a/src-3.0/GF/Text/Lexing.hs
+++ b/src-3.0/GF/Text/Lexing.hs
@@ -24,6 +24,8 @@ stringOp name = case name of
"to_html" -> Just wrapHTML
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
+ "to_cp1251" -> Just encodeCP1251
+ "from_cp1251" -> Just decodeCP1251
_ -> transliterate name
appLexer :: (String -> [String]) -> String -> String
@@ -97,3 +99,17 @@ isPunct = flip elem ".?!,:;"
isParen = flip elem "()[]{}"
isClosing = flip elem ")]}"
+
+-- might be in a file of its own: Windows Cyrillic, used in Bulgarian resource
+
+decodeCP1251 = map convert where
+ convert c
+ | c >= '\192' && c <= '\255' = chr (ord c + 848)
+ | otherwise = c
+
+encodeCP1251 = map convert where
+ convert c
+ | oc >= 1040 && oc <= 1103 = chr (oc - 848)
+ | otherwise = c
+ where oc = ord c
+