From 239f310eb560763c215400a41af7e21fa0f2d51f Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 24 Jun 2008 13:58:04 +0000 Subject: cp1251 coding ; trying to recognize the coding flag in grammar --- src-3.0/GF/Text/Lexing.hs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src-3.0/GF/Text') diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs index 1ac2eb498..2c6b417b8 100644 --- a/src-3.0/GF/Text/Lexing.hs +++ b/src-3.0/GF/Text/Lexing.hs @@ -24,6 +24,8 @@ stringOp name = case name of "to_html" -> Just wrapHTML "to_utf8" -> Just encodeUTF8 "from_utf8" -> Just decodeUTF8 + "to_cp1251" -> Just encodeCP1251 + "from_cp1251" -> Just decodeCP1251 _ -> transliterate name appLexer :: (String -> [String]) -> String -> String @@ -97,3 +99,17 @@ isPunct = flip elem ".?!,:;" isParen = flip elem "()[]{}" isClosing = flip elem ")]}" + +-- might be in a file of its own: Windows Cyrillic, used in Bulgarian resource + +decodeCP1251 = map convert where + convert c + | c >= '\192' && c <= '\255' = chr (ord c + 848) + | otherwise = c + +encodeCP1251 = map convert where + convert c + | oc >= 1040 && oc <= 1103 = chr (oc - 848) + | otherwise = c + where oc = ord c + -- cgit v1.2.3