summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-15 13:25:22 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-15 13:25:22 +0000
commit8c3111e36a039b2070e796821216d2ff59e09ee6 (patch)
treeb261c5939892a8287c7ccee011899776efdac921 /src-3.0/GF/Text
parent486d21cd7a2364d187483c21fa615e71b0352f7e (diff)
UTF3 coding as ps command options, also -bind
Diffstat (limited to 'src-3.0/GF/Text')
-rw-r--r--src-3.0/GF/Text/Lexing.hs16
1 files changed, 15 insertions, 1 deletions
diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs
index 20dd7bd5e..5ad2a69b7 100644
--- a/src-3.0/GF/Text/Lexing.hs
+++ b/src-3.0/GF/Text/Lexing.hs
@@ -1,5 +1,7 @@
module GF.Text.Lexing (stringOp) where
+import GF.Text.UTF8
+
import Data.Char
-- lexers and unlexers - they work on space-separated word strings
@@ -9,16 +11,21 @@ stringOp name = case name of
"lextext" -> Just $ appLexer lexText
"lexcode" -> Just $ appLexer lexText
"lexmixed" -> Just $ appLexer lexMixed
+ "words" -> Just $ appLexer words
+ "bind" -> Just $ appUnlexer bindTok
"unlextext" -> Just $ appUnlexer unlexText
"unlexcode" -> Just $ appUnlexer unlexCode
"unlexmixed" -> Just $ appUnlexer unlexMixed
+ "unwords" -> Just $ appUnlexer unwords
+ "to_utf8" -> Just encodeUTF8
+ "from_utf8" -> Just decodeUTF8
_ -> Nothing
appLexer :: (String -> [String]) -> String -> String
appLexer f = unwords . filter (not . null) . f
appUnlexer :: ([String] -> String) -> String -> String
-appUnlexer f = f . words
+appUnlexer f = unlines . map (f . words) . lines
lexText :: String -> [String]
lexText s = case s of
@@ -43,6 +50,13 @@ lexMixed = concat . alternate False where
_ -> []
lex env = if env then lexCode else lexText
+bindTok :: [String] -> String
+bindTok ws = case ws of
+ w:"&+":ws2 -> w ++ bindTok ws2
+ w:[] -> w
+ w:ws2 -> w ++ " " ++ bindTok ws2
+ [] -> ""
+
unlexText :: [String] -> String
unlexText s = case s of
w:[] -> w