summaryrefslogtreecommitdiff
path: root/src/runtime/haskell/PGF/Lexing.hs
diff options
context:
space:
mode:
authorKrasimir Angelov <kr.angelov@gmail.com>2017-09-04 11:43:37 +0200
committerKrasimir Angelov <kr.angelov@gmail.com>2017-09-04 11:43:37 +0200
commit1f908fa7bf65f51540ccb2b70ca2bd00d9b3dedf (patch)
tree6211e867d908c3e40bf29dfa0c5d5ab0cbaf2c38 /src/runtime/haskell/PGF/Lexing.hs
parentcae52bb9af3f2735a31a4a64cb3b9d7750d0b2a9 (diff)
eliminate modules PGF.Lexing, PGF.LexingAGreek. Make PGF.Utilities an internal module in the runtime. These are not really part of the core runtime.
Diffstat (limited to 'src/runtime/haskell/PGF/Lexing.hs')
-rw-r--r--src/runtime/haskell/PGF/Lexing.hs115
1 files changed, 0 insertions, 115 deletions
diff --git a/src/runtime/haskell/PGF/Lexing.hs b/src/runtime/haskell/PGF/Lexing.hs
deleted file mode 100644
index 4dc352792..000000000
--- a/src/runtime/haskell/PGF/Lexing.hs
+++ /dev/null
@@ -1,115 +0,0 @@
-module PGF.Lexing where
-import Data.Char(isSpace,toLower,toUpper)
-
--- * Text lexing
--- | Text lexing with standard word capitalization of the first word of every sentence
-lexText :: String -> [String]
-lexText = lexText' uncapitInit
-
--- | Text lexing with custom treatment of the first word of every sentence.
-lexText' :: (String->String) -> String -> [String]
-lexText' uncap1 = uncap . lext where
- lext s = case s of
- c:cs | isMajorPunct c -> [c] : uncap (lext cs)
- c:cs | isMinorPunct c -> [c] : lext cs
- c:cs | isSpace c -> lext cs
- _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
- _ -> [s]
- uncap s = case s of
- w:ws -> uncap1 w:ws
- _ -> s
-
-unlexText :: [String] -> String
-unlexText = capitInit . unlext where
- unlext s = case s of
- w:[] -> w
- w:[c]:[] | isPunct c -> w ++ [c]
- w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs)
- w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
- w:ws -> w ++ " " ++ unlext ws
- _ -> []
-
--- | Bind tokens separated by Prelude.BIND, i.e. &+
-bindTok :: [String] -> [String]
-bindTok ws = case ws of
- w1:"&+":w2:ws -> bindTok ((w1++w2):ws)
- "&+":ws -> bindTok ws
- "&|":(c:cs):ws-> bindTok ((toUpper c:cs) : ws)
- "&|":ws -> bindTok ws
- w:ws -> w:bindTok ws
- [] -> []
-
--- * Code lexing
-
--- | Haskell lexer, usable for much code
-lexCode :: String -> [String]
-lexCode ss = case lex ss of
- [(w@(_:_),ws)] -> w : lexCode ws
- _ -> []
-
-unlexCode :: [String] -> String
-unlexCode s = case s of
- w:[] -> w
- [c]:cs | isParen c -> [c] ++ unlexCode cs
- w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
- w:ws -> w ++ " " ++ unlexCode ws
- _ -> []
-
-
--- | LaTeX lexer in the math mode: \ should not be separated from the next word
-
-lexLatexCode :: String -> [String]
-lexLatexCode = restoreBackslash . lexCode where --- quick hack: postprocess Haskell's lex
- restoreBackslash ws = case ws of
- "\\":w:ww -> ("\\" ++ w) : restoreBackslash ww
- w:ww -> w:restoreBackslash ww
- _ -> ws
-
--- * Mixed lexing
-
--- | LaTeX style lexer, with "math" environment using Code between $...$
-lexMixed :: String -> [String]
-lexMixed = concat . alternate False [] where
- alternate env t s = case s of
- '$':cs -> lex env (reverse t) : ["$"] : alternate (not env) [] cs
- '\\':c:cs | elem c "()[]" -> lex env (reverse t) : [['\\',c]] : alternate (not env) [] cs
- c:cs -> alternate env (c:t) cs
- _ -> [lex env (reverse t)]
- lex env = if env then lexLatexCode else lexText
-
-unlexMixed :: [String] -> String
-unlexMixed = capitInit . concat . alternate False where
- alternate env s = case s of
- _:_ -> case break (flip elem ["$","\\[","\\]","\\(","\\)"]) s of
- (t,[]) -> unlex env t : []
- (t,c:m) -> unlex env t : sep env c m : alternate (not env) m
- _ -> []
- unlex env = if env then unlexCode else (uncapitInit . unlexText)
- sep env c m = case (m,env) of
- ([p]:_,True) | isPunct p -> c -- closing $ glued to next punct
- (_, True) -> c ++ " " -- closing $ otherwise separated by space from what follows
- _ -> " " ++ c -- put space before opening $
-
--- * Additional lexing uitilties
-
--- | Capitalize first letter
-capitInit s = case s of
- c:cs -> toUpper c : cs
- _ -> s
-
--- | Uncapitalize first letter
-uncapitInit s = case s of
- c:cs -> toLower c : cs
- _ -> s
-
--- | Unquote each string wrapped in double quotes
-unquote = map unq where
- unq s = case s of
- '"':cs@(_:_) | last cs == '"' -> init cs
- _ -> s
-
-isPunct = flip elem ".?!,:;"
-isMajorPunct = flip elem ".?!"
-isMinorPunct = flip elem ",:;"
-isParen = flip elem "()[]{}"
-isClosing = flip elem ")]}"