GF source lexer: allow numeric character escapes in string literals

This makes the output from PGF.showExpr (and other Haskell code that uses the Prelude.show function to show strings) parsable as GF source code in more cases. This is a workaround for the problem that GHC's implementation of the show function uses numeric escapes for printable non-ASCII characters, e.g. show "dålig" = "d\229lig"...
author: hallgren <hallgren@chalmers.se> 2015-09-29 12:18:35 +0000
committer: hallgren <hallgren@chalmers.se> 2015-09-29 12:18:35 +0000
commit: 1ccdd0d9fdea3a4f457101ba205c37f28abb8e70 (patch)
tree: f3fd5c385c1691ca79009d20ea7a47fdb49ab2ab /src/compiler/GF
parent: 35be1828241bb8dacdf326810af388b7b349e591 (diff)
1 files changed, 5 insertions, 6 deletions
diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x
index c2cbb4c47..f073bcdfc 100644
--- a/src/compiler/GF/Grammar/Lexer.x
+++ b/src/compiler/GF/Grammar/Lexer.x
@@ -16,6 +16,7 @@ import qualified Data.ByteString.Internal as BS(w2c)
 import qualified Data.ByteString.UTF8 as UTF8
 import qualified Data.Map as Map
 import Data.Word(Word8)
+import Data.Char(readLitChar)
 --import Debug.Trace(trace)
 }
 
@@ -39,7 +40,7 @@ $white+ ;
 \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) }
 (\_ | $l)($l | $d | \_ | \')*   { tok ident }
 
-\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . unpack) }
+\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | $d+)))* \" { tok (T_String . unescapeInitTail . unpack) }
 
 (\-)? $d+                       { tok (T_Integer . read . unpack) }
 (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double  . read . unpack) }
@@ -217,13 +218,11 @@ resWords = Map.fromList
 unescapeInitTail :: String -> String
 unescapeInitTail = unesc . tail where
   unesc s = case s of
-    '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
-    '\\':'n':cs  -> '\n' : unesc cs
-    '\\':'t':cs  -> '\t' : unesc cs
+    []         -> []
     '\"':[]    -> []
     '\'':[]    -> []
-    c:cs      -> c : unesc cs
-    _         -> []
+    _          -> case readLitChar s of
+                    [(c,cs)] -> c:unesc cs
 
 -------------------------------------------------------------------
 -- Alex wrapper code.
author	hallgren <hallgren@chalmers.se>	2015-09-29 12:18:35 +0000
committer	hallgren <hallgren@chalmers.se>	2015-09-29 12:18:35 +0000
commit	1ccdd0d9fdea3a4f457101ba205c37f28abb8e70 (patch)
tree	f3fd5c385c1691ca79009d20ea7a47fdb49ab2ab /src/compiler/GF
parent	35be1828241bb8dacdf326810af388b7b349e591 (diff)