diff options
| author | aarne <aarne@cs.chalmers.se> | 2005-11-17 23:17:42 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2005-11-17 23:17:42 +0000 |
| commit | 524c4829f9cc5720c18b8d43bd430d0627edcb89 (patch) | |
| tree | c10cc4dbb4b6f0bb5464369b1ed3d028c29fec18 /src/GF/UseGrammar/Custom.hs | |
| parent | e29a1430bf76b00c3714b72b7763190df6716081 (diff) | |
nondeterministic lexer, e.g. subseqs
Diffstat (limited to 'src/GF/UseGrammar/Custom.hs')
| -rw-r--r-- | src/GF/UseGrammar/Custom.hs | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 75294ff4b..26bad1ee9 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -161,7 +161,7 @@ customStringCommand :: CustomData (StateGrammar -> String -> String) customParser :: CustomData (StateGrammar -> CFCat -> CFParser) -- | useTokenizer, \"-lexer=x\" -customTokenizer :: CustomData (StateGrammar -> String -> [CFTok]) +customTokenizer :: CustomData (StateGrammar -> String -> [[CFTok]]) -- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string customUntokenizer :: CustomData (StateGrammar -> String -> String) @@ -416,22 +416,24 @@ customParser = -- add your own parsers here ] -customTokenizer = +customTokenizer = + let sg = singleton in customData "Tokenizers, selected by option -lexer=x" $ [ - (strCI "words", const $ tokWords) - ,(strCI "literals", const $ tokLits) - ,(strCI "vars", const $ tokVars) - ,(strCI "chars", const $ map (tS . singleton)) - ,(strCI "code", const $ lexHaskell) - ,(strCI "codevars", lexHaskellVar . stateIsWord) - ,(strCI "text", const $ lexText) - ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr)) - ,(strCI "codelit", lexHaskellLiteral . stateIsWord) - ,(strCI "textlit", lexTextLiteral . stateIsWord) - ,(strCI "codeC", const $ lexC2M) - ,(strCI "ignore", \gr -> lexIgnore (stateIsWord gr) . tokLits) - ,(strCI "codeCHigh", const $ lexC2M' True) + (strCI "words", const $ sg . tokWords) + ,(strCI "literals", const $ sg . tokLits) + ,(strCI "vars", const $ sg . tokVars) + ,(strCI "chars", const $ sg . map (tS . singleton)) + ,(strCI "code", const $ sg . lexHaskell) + ,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr)) + ,(strCI "text", const $ sg . lexText) + ,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr)) + ,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr)) + ,(strCI "textlit", \gr -> sg . (lexTextLiteral $ stateIsWord gr)) + ,(strCI "codeC", const $ sg . lexC2M) + ,(strCI "ignore", \gr -> sg . lexIgnore (stateIsWord gr) . tokLits) + ,(strCI "subseqs", \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits) + ,(strCI "codeCHigh", const $ sg . lexC2M' True) -- add your own tokenizers here ] |
