summaryrefslogtreecommitdiff
path: root/src-3.0
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-15 13:25:22 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-15 13:25:22 +0000
commit8c3111e36a039b2070e796821216d2ff59e09ee6 (patch)
treeb261c5939892a8287c7ccee011899776efdac921 /src-3.0
parent486d21cd7a2364d187483c21fa615e71b0352f7e (diff)
UTF3 coding as ps command options, also -bind
Diffstat (limited to 'src-3.0')
-rw-r--r--src-3.0/GF/Command/Commands.hs17
-rw-r--r--src-3.0/GF/Text/Lexing.hs16
2 files changed, 30 insertions, 3 deletions
diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs
index 859e326ea..cb002f5d7 100644
--- a/src-3.0/GF/Command/Commands.hs
+++ b/src-3.0/GF/Command/Commands.hs
@@ -65,7 +65,9 @@ commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [
co ++ ", " ++ longname info,
synopsis info] ++ if full then [
+ "",
"syntax:" ++++ " " ++ syntax info,
+ "",
explanation info,
"options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info],
"flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info],
@@ -292,18 +294,29 @@ allCommands pgf = Map.fromList [
}),
("ps", emptyCommandInfo {
longname = "put_string",
+ syntax = "ps OPT? STRING",
synopsis = "return a string, possibly processed with a function",
explanation = unlines [
- "Returns a string obtained by its argument string by applying",
+ "Returns a string obtained from its argument string by applying",
"string processing functions in the order given in the command line",
"option list. Thus 'ps -f -g s' returns g (f s). Typical string processors",
- "are lexers and unlexers."
+ "are lexers and unlexers, but also character encoding conversions are possible.",
+ "The unlexers preserve the division of their input to lines."
],
+ examples = [
+ "l (EAdd 3 4) | ps -code -- linearize code-like output",
+ "ps -lexer=code | p -cat=Exp -- parse code-like input",
+ "gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
+ "ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UYF8 terminal"
+ ],
exec = \opts -> return . fromString . stringOps opts . toString,
options = [
+ ("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
+ ("from_utf8","decode from utf8"),
("lextext","text-like lexer"),
("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"),
+ ("to_utf8","encode to utf8"),
("unlextext","text-like unlexer"),
("unlexcode","code-like unlexer"),
("unlexmixed","mixture of text and code (code between $...$)"),
diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs
index 20dd7bd5e..5ad2a69b7 100644
--- a/src-3.0/GF/Text/Lexing.hs
+++ b/src-3.0/GF/Text/Lexing.hs
@@ -1,5 +1,7 @@
module GF.Text.Lexing (stringOp) where
+import GF.Text.UTF8
+
import Data.Char
-- lexers and unlexers - they work on space-separated word strings
@@ -9,16 +11,21 @@ stringOp name = case name of
"lextext" -> Just $ appLexer lexText
"lexcode" -> Just $ appLexer lexText
"lexmixed" -> Just $ appLexer lexMixed
+ "words" -> Just $ appLexer words
+ "bind" -> Just $ appUnlexer bindTok
"unlextext" -> Just $ appUnlexer unlexText
"unlexcode" -> Just $ appUnlexer unlexCode
"unlexmixed" -> Just $ appUnlexer unlexMixed
+ "unwords" -> Just $ appUnlexer unwords
+ "to_utf8" -> Just encodeUTF8
+ "from_utf8" -> Just decodeUTF8
_ -> Nothing
appLexer :: (String -> [String]) -> String -> String
appLexer f = unwords . filter (not . null) . f
appUnlexer :: ([String] -> String) -> String -> String
-appUnlexer f = f . words
+appUnlexer f = unlines . map (f . words) . lines
lexText :: String -> [String]
lexText s = case s of
@@ -43,6 +50,13 @@ lexMixed = concat . alternate False where
_ -> []
lex env = if env then lexCode else lexText
+bindTok :: [String] -> String
+bindTok ws = case ws of
+ w:"&+":ws2 -> w ++ bindTok ws2
+ w:[] -> w
+ w:ws2 -> w ++ " " ++ bindTok ws2
+ [] -> ""
+
unlexText :: [String] -> String
unlexText s = case s of
w:[] -> w