summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Custom.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2005-11-17 23:17:42 +0000
committeraarne <aarne@cs.chalmers.se>2005-11-17 23:17:42 +0000
commit524c4829f9cc5720c18b8d43bd430d0627edcb89 (patch)
treec10cc4dbb4b6f0bb5464369b1ed3d028c29fec18 /src/GF/UseGrammar/Custom.hs
parente29a1430bf76b00c3714b72b7763190df6716081 (diff)
nondeterministic lexer, e.g. subseqs
Diffstat (limited to 'src/GF/UseGrammar/Custom.hs')
-rw-r--r--src/GF/UseGrammar/Custom.hs32
1 files changed, 17 insertions, 15 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 75294ff4b..26bad1ee9 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -161,7 +161,7 @@ customStringCommand :: CustomData (StateGrammar -> String -> String)
customParser :: CustomData (StateGrammar -> CFCat -> CFParser)
-- | useTokenizer, \"-lexer=x\"
-customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
+customTokenizer :: CustomData (StateGrammar -> String -> [[CFTok]])
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
customUntokenizer :: CustomData (StateGrammar -> String -> String)
@@ -416,22 +416,24 @@ customParser =
-- add your own parsers here
]
-customTokenizer =
+customTokenizer =
+ let sg = singleton in
customData "Tokenizers, selected by option -lexer=x" $
[
- (strCI "words", const $ tokWords)
- ,(strCI "literals", const $ tokLits)
- ,(strCI "vars", const $ tokVars)
- ,(strCI "chars", const $ map (tS . singleton))
- ,(strCI "code", const $ lexHaskell)
- ,(strCI "codevars", lexHaskellVar . stateIsWord)
- ,(strCI "text", const $ lexText)
- ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
- ,(strCI "codelit", lexHaskellLiteral . stateIsWord)
- ,(strCI "textlit", lexTextLiteral . stateIsWord)
- ,(strCI "codeC", const $ lexC2M)
- ,(strCI "ignore", \gr -> lexIgnore (stateIsWord gr) . tokLits)
- ,(strCI "codeCHigh", const $ lexC2M' True)
+ (strCI "words", const $ sg . tokWords)
+ ,(strCI "literals", const $ sg . tokLits)
+ ,(strCI "vars", const $ sg . tokVars)
+ ,(strCI "chars", const $ sg . map (tS . singleton))
+ ,(strCI "code", const $ sg . lexHaskell)
+ ,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr))
+ ,(strCI "text", const $ sg . lexText)
+ ,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr))
+ ,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
+ ,(strCI "textlit", \gr -> sg . (lexTextLiteral $ stateIsWord gr))
+ ,(strCI "codeC", const $ sg . lexC2M)
+ ,(strCI "ignore", \gr -> sg . lexIgnore (stateIsWord gr) . tokLits)
+ ,(strCI "subseqs", \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits)
+ ,(strCI "codeCHigh", const $ sg . lexC2M' True)
-- add your own tokenizers here
]