summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-11-10 15:53:38 +0000
committeraarne <aarne@cs.chalmers.se>2008-11-10 15:53:38 +0000
commitd9ff5aa48cbbc7bc4388cd743aa354f60ab125a5 (patch)
treeaf61d385c0273a1ff1c8f3eba6335e64ce346c1d
parent7e82e4a71036d65e955bb160e8ba3cadbfba31d9 (diff)
lexer documentation in help and tutorial updated
-rw-r--r--doc/gf-tutorial.txt24
-rw-r--r--src/GF/Command/Commands.hs13
2 files changed, 12 insertions, 25 deletions
diff --git a/doc/gf-tutorial.txt b/doc/gf-tutorial.txt
index cf9518cff..79d924a16 100644
--- a/doc/gf-tutorial.txt
+++ b/doc/gf-tutorial.txt
@@ -4277,24 +4277,12 @@ In linearization, we use a corresponding **unlexer**:
===Most common lexers and unlexers===
- || lexer | description ||
- | ``words`` | (default) tokens are separated by spaces or newlines
- | ``literals`` | like words, but integer and string literals recognized
- | ``chars`` | each character is a token
- | ``code`` | program code conventions (uses Haskell's lex)
- | ``text`` | with conventions on punctuation and capital letters
- | ``codelit`` | like code, but recognize literals (unknown words as strings)
- | ``textlit`` | like text, but recognize literals (unknown words as strings)
-
- || unlexer | description ||
- | ``unwords`` | (default) space-separated token list
- | ``text`` | format as text: punctuation, capitals, paragraph <p>
- | ``code`` | format as code (spacing, indentation)
- | ``textlit`` | like text, but remove string literal quotes
- | ``codelit`` | like code, but remove string literal quotes
- | ``concat`` | remove all spaces
-
-%TODO: update the names
+ || lexer | unlexer | description ||
+ | ``chars`` | ``unchars`` | each character is a token
+ | ``lexcode`` | ``unlexcode`` | program code conventions (uses Haskell's lex)
+ | ``lexmixed`` | ``unlexmixed`` | like text, but between $ signs like code
+ | ``lextext`` | ``unlextext`` | with conventions on punctuation and capitals
+ | ``words`` | ``unwords`` | (default) tokens separated by space characters
%TODO: also on alphabet encodings - although somewhere else
diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs
index 4d6a29ce7..63d7ff06a 100644
--- a/src/GF/Command/Commands.hs
+++ b/src/GF/Command/Commands.hs
@@ -360,11 +360,10 @@ allCommands cod pgf = Map.fromList [
"To see transliteration tables, use command ut."
],
examples = [
- "l (EAdd 3 4) | ps -code -- linearize code-like output",
- "ps -lexer=code | p -cat=Exp -- parse code-like input",
- "gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
- "ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal",
- "ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
+ "l (EAdd 3 4) | ps -code -- linearize code-like output",
+ "ps -lexer=code | p -cat=Exp -- parse code-like input",
+ "gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
+ "ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal"
],
exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString,
options = stringOpOptions
@@ -626,7 +625,7 @@ stringOpOptions = [
("from_devanagari","from unicode to GF Devanagari transliteration"),
("from_thai","from unicode to GF Telugu transliteration"),
("from_thai","from unicode to GF Thai transliteration"),
- ("from_utf8","decode from utf8"),
+ ("from_utf8","decode from utf8 (default)"),
("lextext","text-like lexer"),
("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"),
@@ -636,7 +635,7 @@ stringOpOptions = [
("to_html","wrap in a html file with linebreaks"),
("to_telugu","from GF Telugu transliteration to unicode"),
("to_thai","from GF Thai transliteration to unicode"),
- ("to_utf8","encode to utf8"),
+ ("to_utf8","encode to utf8 (default)"),
("unlextext","text-like unlexer"),
("unlexcode","code-like unlexer"),
("unlexmixed","mixture of text and code (code between $...$)"),