summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <unknown>2005-09-29 12:20:08 +0000
committeraarne <unknown>2005-09-29 12:20:08 +0000
commit1db8b90811b280a61acb3dc2961fe1ef45c6a2fb (patch)
treeec4b1524f493cb2b8dc50c253018c8f4b1aa84bb
parent34c0e7925922a353397802987ef61511504589f0 (diff)
lexer=ignore
-rw-r--r--src/GF/UseGrammar/Custom.hs5
-rw-r--r--src/GF/UseGrammar/Tokenize.hs15
2 files changed, 15 insertions, 5 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index c7c68362b..15e909004 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/18 22:55:46 $
+-- > CVS $Date: 2005/09/29 13:20:08 $
-- > CVS $Author: aarne $
--- > CVS $Revision: 1.74 $
+-- > CVS $Revision: 1.75 $
--
-- A database for customizable GF shell commands.
--
@@ -410,6 +410,7 @@ customTokenizer =
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
,(strCI "textlit", lexTextLiteral . stateIsWord)
,(strCI "codeC", const $ lexC2M)
+ ,(strCI "ignore", \gr -> lexIgnore (stateIsWord gr) . tokLits)
,(strCI "codeCHigh", const $ lexC2M' True)
-- add your own tokenizers here
]
diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs
index 6a8119ac0..bfc0e53bb 100644
--- a/src/GF/UseGrammar/Tokenize.hs
+++ b/src/GF/UseGrammar/Tokenize.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/04/21 16:23:52 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.13 $
+-- > CVS $Date: 2005/09/29 13:20:08 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.14 $
--
-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
-- an entry for each is included in 'Custom.customTokenizer'
@@ -22,6 +22,7 @@ module GF.UseGrammar.Tokenize ( tokWords,
lexText,
lexC2M, lexC2M',
lexTextLiteral,
+ lexIgnore
) where
import GF.Data.Operations
@@ -184,3 +185,11 @@ lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
eitherUpper isKnown w = isKnown w
+-- ignore unknown tokens (e.g. keyword spotting)
+
+lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
+lexIgnore isKnown = concatMap mkOne where
+ mkOne t@(TS s)
+ | isKnown s = [t]
+ | otherwise = []
+ mkOne t = [t]