summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-24 13:58:04 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-24 13:58:04 +0000
commit239f310eb560763c215400a41af7e21fa0f2d51f (patch)
treeafad2f258975e34b40e2e60ead2436ef98c38411 /src-3.0/GF/Text
parent223480bb77d5a86f3a9dcb2f02fcafbd89de453a (diff)
cp1251 coding ; trying to recognize the coding flag in grammar
Diffstat (limited to 'src-3.0/GF/Text')
-rw-r--r--src-3.0/GF/Text/Lexing.hs16
1 files changed, 16 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs
index 1ac2eb498..2c6b417b8 100644
--- a/src-3.0/GF/Text/Lexing.hs
+++ b/src-3.0/GF/Text/Lexing.hs
@@ -24,6 +24,8 @@ stringOp name = case name of
"to_html" -> Just wrapHTML
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
+ "to_cp1251" -> Just encodeCP1251
+ "from_cp1251" -> Just decodeCP1251
_ -> transliterate name
appLexer :: (String -> [String]) -> String -> String
@@ -97,3 +99,17 @@ isPunct = flip elem ".?!,:;"
isParen = flip elem "()[]{}"
isClosing = flip elem ")]}"
+
+-- might be in a file of its own: Windows Cyrillic, used in Bulgarian resource
+
+decodeCP1251 = map convert where
+ convert c
+ | c >= '\192' && c <= '\255' = chr (ord c + 848)
+ | otherwise = c
+
+encodeCP1251 = map convert where
+ convert c
+ | oc >= 1040 && oc <= 1103 = chr (oc - 848)
+ | otherwise = c
+ where oc = ord c
+