diff options
| author | hallgren <hallgren@chalmers.se> | 2014-04-08 14:07:49 +0000 |
|---|---|---|
| committer | hallgren <hallgren@chalmers.se> | 2014-04-08 14:07:49 +0000 |
| commit | 9cac98a3561386a9e190881ba64292f672f86500 (patch) | |
| tree | 32ce1cb356fd53db4203d27e7efe6b0f644bff91 /src/compiler/GF | |
| parent | 936f3d8297fac9e4f4de6a4b0f37305bfec0db91 (diff) | |
Move basic lexing functions from GF.Text.Lexing to the new module PGF.Lexing
They are thus part of the PGF Run-Time Library, making it possible to add
lexing functionality in PGF service in a natural way.
Diffstat (limited to 'src/compiler/GF')
| -rw-r--r-- | src/compiler/GF/Text/Lexing.hs | 87 |
1 files changed, 3 insertions, 84 deletions
diff --git a/src/compiler/GF/Text/Lexing.hs b/src/compiler/GF/Text/Lexing.hs index 87d6ba4f7..29647a786 100644 --- a/src/compiler/GF/Text/Lexing.hs +++ b/src/compiler/GF/Text/Lexing.hs @@ -1,12 +1,12 @@ +-- | Lexers and unlexers - they work on space-separated word strings module GF.Text.Lexing (stringOp,opInEnv) where import GF.Text.Transliterations +import PGF.Lexing -import Data.Char +import Data.Char (isSpace) import Data.List (intersperse) --- lexers and unlexers - they work on space-separated word strings - stringOp :: String -> Maybe (String -> String) stringOp name = case name of "chars" -> Just $ appLexer (filter (not . all isSpace) . map return) @@ -51,84 +51,3 @@ appUnlexer f = f . words wrapHTML :: String -> String wrapHTML = unlines . tag . intersperse "<br>" . lines where tag ss = "<html>":"<head>":"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />":"</head>":"<body>" : ss ++ ["</body>","</html>"] - -lexText :: String -> [String] -lexText = uncap . lext where - lext s = case s of - c:cs | isMajorPunct c -> [c] : uncap (lext cs) - c:cs | isMinorPunct c -> [c] : lext cs - c:cs | isSpace c -> lext cs - _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs - _ -> [s] - uncap s = case s of - (c:cs):ws -> (toLower c : cs):ws - _ -> s - --- | Haskell lexer, usable for much code -lexCode :: String -> [String] -lexCode ss = case lex ss of - [(w@(_:_),ws)] -> w : lexCode ws - _ -> [] - --- | LaTeX style lexer, with "math" environment using Code between $...$ -lexMixed :: String -> [String] -lexMixed = concat . alternate False where - alternate env s = case s of - _:_ -> case break (=='$') s of - (t,[]) -> lex env t : [] - (t,c:m) -> lex env t : [[c]] : alternate (not env) m - _ -> [] - lex env = if env then lexCode else lexText - -bindTok :: [String] -> String -bindTok ws = case ws of - w:"&+":ws2 -> w ++ bindTok ws2 - w:[] -> w - w:ws2 -> w ++ " " ++ bindTok ws2 - [] -> "" - -unlexText :: [String] -> String -unlexText = unlext where - unlext s = case s of - w:[] -> w - w:[c]:[] | isPunct c -> w ++ [c] - w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs) - w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs - w:ws -> w ++ " " ++ unlext ws - _ -> [] - --- capitalize first letter -capitInit s = case s of - c:cs -> toUpper c : cs - _ -> s - --- unquote each string of form "foo" -unquote = map unq where - unq s = case s of - '"':cs@(_:_) | last cs == '"' -> init cs - _ -> s - -unlexCode :: [String] -> String -unlexCode s = case s of - w:[] -> w - [c]:cs | isParen c -> [c] ++ unlexCode cs - w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs - w:ws -> w ++ " " ++ unlexCode ws - _ -> [] - - -unlexMixed :: [String] -> String -unlexMixed = concat . alternate False where - alternate env s = case s of - _:_ -> case break (=="$") s of - (t,[]) -> unlex env t : [] - (t,c:m) -> unlex env t : sep env c : alternate (not env) m - _ -> [] - unlex env = if env then unlexCode else unlexText - sep env c = if env then c ++ " " else " " ++ c - -isPunct = flip elem ".?!,:;" -isMajorPunct = flip elem ".?!" -isMinorPunct = flip elem ",:;" -isParen = flip elem "()[]{}" -isClosing = flip elem ")]}" |
