summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Text
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-12 14:36:03 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-12 14:36:03 +0000
commit62b04f399cd271f2e1eed783f63e31727f4ddc65 (patch)
treeae0e5f6abe1c228082b56141b7698043bc9d2c30 /src-3.0/GF/Text
parent96230f84f5aa6ee4775bdf469010ace7b8a902d5 (diff)
added command ps -stringop, with stringop being (un)lexer defined in Lexing
Diffstat (limited to 'src-3.0/GF/Text')
-rw-r--r--src-3.0/GF/Text/Lexing.hs76
1 files changed, 76 insertions, 0 deletions
diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs
new file mode 100644
index 000000000..20dd7bd5e
--- /dev/null
+++ b/src-3.0/GF/Text/Lexing.hs
@@ -0,0 +1,76 @@
+module GF.Text.Lexing (stringOp) where
+
+import Data.Char
+
+-- lexers and unlexers - they work on space-separated word strings
+
+stringOp :: String -> Maybe (String -> String)
+stringOp name = case name of
+ "lextext" -> Just $ appLexer lexText
+ "lexcode" -> Just $ appLexer lexText
+ "lexmixed" -> Just $ appLexer lexMixed
+ "unlextext" -> Just $ appUnlexer unlexText
+ "unlexcode" -> Just $ appUnlexer unlexCode
+ "unlexmixed" -> Just $ appUnlexer unlexMixed
+ _ -> Nothing
+
+appLexer :: (String -> [String]) -> String -> String
+appLexer f = unwords . filter (not . null) . f
+
+appUnlexer :: ([String] -> String) -> String -> String
+appUnlexer f = f . words
+
+lexText :: String -> [String]
+lexText s = case s of
+ c:cs | isPunct c -> [c] : lexText cs
+ c:cs | isSpace c -> lexText cs
+ _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lexText cs
+ _ -> [s]
+
+-- | Haskell lexer, usable for much code
+lexCode :: String -> [String]
+lexCode ss = case lex ss of
+ [(w@(_:_),ws)] -> w : lexCode ws
+ _ -> []
+
+-- | LaTeX style lexer, with "math" environment using Code between $...$
+lexMixed :: String -> [String]
+lexMixed = concat . alternate False where
+ alternate env s = case s of
+ _:_ -> case break (=='$') s of
+ (t,[]) -> lex env t : []
+ (t,c:m) -> lex env t : [[c]] : alternate (not env) m
+ _ -> []
+ lex env = if env then lexCode else lexText
+
+unlexText :: [String] -> String
+unlexText s = case s of
+ w:[] -> w
+ w:[c]:[] | isPunct c -> w ++ [c]
+ w:[c]:cs | isPunct c -> w ++ [c] ++ " " ++ unlexText cs
+ w:ws -> w ++ " " ++ unlexText ws
+ _ -> []
+
+unlexCode :: [String] -> String
+unlexCode s = case s of
+ w:[] -> w
+ [c]:cs | isParen c -> [c] ++ unlexCode cs
+ w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
+ w:ws -> w ++ " " ++ unlexCode ws
+ _ -> []
+
+
+unlexMixed :: [String] -> String
+unlexMixed = concat . alternate False where
+ alternate env s = case s of
+ _:_ -> case break (=="$") s of
+ (t,[]) -> unlex env t : []
+ (t,c:m) -> unlex env t : sep env c : alternate (not env) m
+ _ -> []
+ unlex env = if env then unlexCode else unlexText
+ sep env c = if env then c ++ " " else " " ++ c
+
+isPunct = flip elem ".?!,:;"
+isParen = flip elem "()[]{}"
+isClosing = flip elem ")]}"
+