nondeterministic lexer, e.g. subseqs

author: aarne <aarne@cs.chalmers.se> 2005-11-17 23:17:42 +0000
committer: aarne <aarne@cs.chalmers.se> 2005-11-17 23:17:42 +0000
commit: 524c4829f9cc5720c18b8d43bd430d0627edcb89 (patch)
tree: c10cc4dbb4b6f0bb5464369b1ed3d028c29fec18 /src/GF/UseGrammar/Custom.hs
parent: e29a1430bf76b00c3714b72b7763190df6716081 (diff)
1 files changed, 17 insertions, 15 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 75294ff4b..26bad1ee9 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -161,7 +161,7 @@ customStringCommand  :: CustomData (StateGrammar -> String -> String)
 customParser         :: CustomData (StateGrammar -> CFCat -> CFParser)
 
 -- | useTokenizer, \"-lexer=x\"
-customTokenizer      :: CustomData (StateGrammar -> String -> [CFTok])  
+customTokenizer      :: CustomData (StateGrammar -> String -> [[CFTok]])  
 
 -- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
 customUntokenizer    :: CustomData (StateGrammar -> String -> String)  
@@ -416,22 +416,24 @@ customParser =
 -- add your own parsers here
   ]
 
-customTokenizer = 
+customTokenizer =
+  let sg = singleton in 
   customData "Tokenizers, selected by option -lexer=x" $
   [
-   (strCI "words",     const $ tokWords)
-  ,(strCI "literals",  const $ tokLits)
-  ,(strCI "vars",      const $ tokVars)
-  ,(strCI "chars",     const $ map (tS . singleton))
-  ,(strCI "code",      const $ lexHaskell)
-  ,(strCI "codevars",  lexHaskellVar . stateIsWord)
-  ,(strCI "text",      const $ lexText)
-  ,(strCI "unglue",    \gr -> map tS . decomposeWords (stateMorpho gr))
-  ,(strCI "codelit",   lexHaskellLiteral . stateIsWord)
-  ,(strCI "textlit",   lexTextLiteral . stateIsWord)
-  ,(strCI "codeC",     const $ lexC2M)
-  ,(strCI "ignore",    \gr -> lexIgnore (stateIsWord gr) . tokLits)
-  ,(strCI "codeCHigh", const $ lexC2M' True)
+   (strCI "words",     const $ sg . tokWords)
+  ,(strCI "literals",  const $ sg . tokLits)
+  ,(strCI "vars",      const $ sg . tokVars)
+  ,(strCI "chars",     const $ sg . map (tS . singleton))
+  ,(strCI "code",      const $ sg . lexHaskell)
+  ,(strCI "codevars",  \gr -> sg . (lexHaskellVar $ stateIsWord gr))
+  ,(strCI "text",      const $ sg . lexText)
+  ,(strCI "unglue",    \gr -> sg . map tS . decomposeWords (stateMorpho gr))
+  ,(strCI "codelit",   \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
+  ,(strCI "textlit",   \gr -> sg . (lexTextLiteral $ stateIsWord gr))
+  ,(strCI "codeC",     const $ sg . lexC2M)
+  ,(strCI "ignore",    \gr -> sg . lexIgnore (stateIsWord gr) . tokLits)
+  ,(strCI "subseqs",   \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits)
+  ,(strCI "codeCHigh", const $ sg . lexC2M' True)
 -- add your own tokenizers here
   ]
author	aarne <aarne@cs.chalmers.se>	2005-11-17 23:17:42 +0000
committer	aarne <aarne@cs.chalmers.se>	2005-11-17 23:17:42 +0000
commit	524c4829f9cc5720c18b8d43bd430d0627edcb89 (patch)
tree	c10cc4dbb4b6f0bb5464369b1ed3d028c29fec18 /src/GF/UseGrammar/Custom.hs
parent	e29a1430bf76b00c3714b72b7763190df6716081 (diff)