diff options
| author | hallgren <hallgren@chalmers.se> | 2015-09-29 12:18:35 +0000 |
|---|---|---|
| committer | hallgren <hallgren@chalmers.se> | 2015-09-29 12:18:35 +0000 |
| commit | 1ccdd0d9fdea3a4f457101ba205c37f28abb8e70 (patch) | |
| tree | f3fd5c385c1691ca79009d20ea7a47fdb49ab2ab /src/compiler/GF | |
| parent | 35be1828241bb8dacdf326810af388b7b349e591 (diff) | |
GF source lexer: allow numeric character escapes in string literals
This makes the output from PGF.showExpr (and other Haskell code that uses
the Prelude.show function to show strings) parsable as GF source code in
more cases.
This is a workaround for the problem that GHC's implementation of the show
function uses numeric escapes for printable non-ASCII characters, e.g.
show "dålig" = "d\229lig"...
Diffstat (limited to 'src/compiler/GF')
| -rw-r--r-- | src/compiler/GF/Grammar/Lexer.x | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x index c2cbb4c47..f073bcdfc 100644 --- a/src/compiler/GF/Grammar/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -16,6 +16,7 @@ import qualified Data.ByteString.Internal as BS(w2c) import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.Map as Map import Data.Word(Word8) +import Data.Char(readLitChar) --import Debug.Trace(trace) } @@ -39,7 +40,7 @@ $white+ ; \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) } (\_ | $l)($l | $d | \_ | \')* { tok ident } -\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) } +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | $d+)))* \" { tok (T_String . unescapeInitTail . unpack) } (\-)? $d+ { tok (T_Integer . read . unpack) } (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) } @@ -217,13 +218,11 @@ resWords = Map.fromList unescapeInitTail :: String -> String unescapeInitTail = unesc . tail where unesc s = case s of - '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs - '\\':'n':cs -> '\n' : unesc cs - '\\':'t':cs -> '\t' : unesc cs + [] -> [] '\"':[] -> [] '\'':[] -> [] - c:cs -> c : unesc cs - _ -> [] + _ -> case readLitChar s of + [(c,cs)] -> c:unesc cs ------------------------------------------------------------------- -- Alex wrapper code. |
