summaryrefslogtreecommitdiff
path: root/src/compiler/GF
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2014-04-08 14:07:49 +0000
committerhallgren <hallgren@chalmers.se>2014-04-08 14:07:49 +0000
commit9cac98a3561386a9e190881ba64292f672f86500 (patch)
tree32ce1cb356fd53db4203d27e7efe6b0f644bff91 /src/compiler/GF
parent936f3d8297fac9e4f4de6a4b0f37305bfec0db91 (diff)
Move basic lexing functions from GF.Text.Lexing to the new module PGF.Lexing
They are thus part of the PGF Run-Time Library, making it possible to add lexing functionality in PGF service in a natural way.
Diffstat (limited to 'src/compiler/GF')
-rw-r--r--src/compiler/GF/Text/Lexing.hs87
1 files changed, 3 insertions, 84 deletions
diff --git a/src/compiler/GF/Text/Lexing.hs b/src/compiler/GF/Text/Lexing.hs
index 87d6ba4f7..29647a786 100644
--- a/src/compiler/GF/Text/Lexing.hs
+++ b/src/compiler/GF/Text/Lexing.hs
@@ -1,12 +1,12 @@
+-- | Lexers and unlexers - they work on space-separated word strings
module GF.Text.Lexing (stringOp,opInEnv) where
import GF.Text.Transliterations
+import PGF.Lexing
-import Data.Char
+import Data.Char (isSpace)
import Data.List (intersperse)
--- lexers and unlexers - they work on space-separated word strings
-
stringOp :: String -> Maybe (String -> String)
stringOp name = case name of
"chars" -> Just $ appLexer (filter (not . all isSpace) . map return)
@@ -51,84 +51,3 @@ appUnlexer f = f . words
wrapHTML :: String -> String
wrapHTML = unlines . tag . intersperse "<br>" . lines where
tag ss = "<html>":"<head>":"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />":"</head>":"<body>" : ss ++ ["</body>","</html>"]
-
-lexText :: String -> [String]
-lexText = uncap . lext where
- lext s = case s of
- c:cs | isMajorPunct c -> [c] : uncap (lext cs)
- c:cs | isMinorPunct c -> [c] : lext cs
- c:cs | isSpace c -> lext cs
- _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
- _ -> [s]
- uncap s = case s of
- (c:cs):ws -> (toLower c : cs):ws
- _ -> s
-
--- | Haskell lexer, usable for much code
-lexCode :: String -> [String]
-lexCode ss = case lex ss of
- [(w@(_:_),ws)] -> w : lexCode ws
- _ -> []
-
--- | LaTeX style lexer, with "math" environment using Code between $...$
-lexMixed :: String -> [String]
-lexMixed = concat . alternate False where
- alternate env s = case s of
- _:_ -> case break (=='$') s of
- (t,[]) -> lex env t : []
- (t,c:m) -> lex env t : [[c]] : alternate (not env) m
- _ -> []
- lex env = if env then lexCode else lexText
-
-bindTok :: [String] -> String
-bindTok ws = case ws of
- w:"&+":ws2 -> w ++ bindTok ws2
- w:[] -> w
- w:ws2 -> w ++ " " ++ bindTok ws2
- [] -> ""
-
-unlexText :: [String] -> String
-unlexText = unlext where
- unlext s = case s of
- w:[] -> w
- w:[c]:[] | isPunct c -> w ++ [c]
- w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs)
- w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
- w:ws -> w ++ " " ++ unlext ws
- _ -> []
-
--- capitalize first letter
-capitInit s = case s of
- c:cs -> toUpper c : cs
- _ -> s
-
--- unquote each string of form "foo"
-unquote = map unq where
- unq s = case s of
- '"':cs@(_:_) | last cs == '"' -> init cs
- _ -> s
-
-unlexCode :: [String] -> String
-unlexCode s = case s of
- w:[] -> w
- [c]:cs | isParen c -> [c] ++ unlexCode cs
- w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
- w:ws -> w ++ " " ++ unlexCode ws
- _ -> []
-
-
-unlexMixed :: [String] -> String
-unlexMixed = concat . alternate False where
- alternate env s = case s of
- _:_ -> case break (=="$") s of
- (t,[]) -> unlex env t : []
- (t,c:m) -> unlex env t : sep env c : alternate (not env) m
- _ -> []
- unlex env = if env then unlexCode else unlexText
- sep env c = if env then c ++ " " else " " ++ c
-
-isPunct = flip elem ".?!,:;"
-isMajorPunct = flip elem ".?!"
-isMinorPunct = flip elem ",:;"
-isParen = flip elem "()[]{}"
-isClosing = flip elem ")]}"