summaryrefslogtreecommitdiff
path: root/src/compiler/GF
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2015-09-29 12:18:35 +0000
committerhallgren <hallgren@chalmers.se>2015-09-29 12:18:35 +0000
commit1ccdd0d9fdea3a4f457101ba205c37f28abb8e70 (patch)
treef3fd5c385c1691ca79009d20ea7a47fdb49ab2ab /src/compiler/GF
parent35be1828241bb8dacdf326810af388b7b349e591 (diff)
GF source lexer: allow numeric character escapes in string literals
This makes the output from PGF.showExpr (and other Haskell code that uses the Prelude.show function to show strings) parsable as GF source code in more cases. This is a workaround for the problem that GHC's implementation of the show function uses numeric escapes for printable non-ASCII characters, e.g. show "dålig" = "d\229lig"...
Diffstat (limited to 'src/compiler/GF')
-rw-r--r--src/compiler/GF/Grammar/Lexer.x11
1 files changed, 5 insertions, 6 deletions
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index c2cbb4c47..f073bcdfc 100644
--- a/src/compiler/GF/Grammar/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -16,6 +16,7 @@ import qualified Data.ByteString.Internal as BS(w2c)
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map
import Data.Word(Word8)
+import Data.Char(readLitChar)
--import Debug.Trace(trace)
}
@@ -39,7 +40,7 @@ $white+ ;
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) }
(\_ | $l)($l | $d | \_ | \')* { tok ident }
-\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
+\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | $d+)))* \" { tok (T_String . unescapeInitTail . unpack) }
(\-)? $d+ { tok (T_Integer . read . unpack) }
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
@@ -217,13 +218,11 @@ resWords = Map.fromList
unescapeInitTail :: String -> String
unescapeInitTail = unesc . tail where
unesc s = case s of
- '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
- '\\':'n':cs -> '\n' : unesc cs
- '\\':'t':cs -> '\t' : unesc cs
+ [] -> []
'\"':[] -> []
'\'':[] -> []
- c:cs -> c : unesc cs
- _ -> []
+ _ -> case readLitChar s of
+ [(c,cs)] -> c:unesc cs
-------------------------------------------------------------------
-- Alex wrapper code.