From 3f57151cc346be0fbf0726d4953f3529ea45e7f4 Mon Sep 17 00:00:00 2001 From: hallgren Date: Tue, 26 Nov 2013 16:12:03 +0000 Subject: Represent identifiers as UTF-8-encoded ByteStrings This was a fairly simple change thanks to previous work on making the Ident type abstract and the fact that PGF.CId already uses UTF-8-encoded ByteStrings. One potential pitfall is that Data.ByteString.UTF8 uses the same type for ByteStrings as Data.ByteString. I renamed ident2bs to ident2utf8 and bsCId to utf8CId, to make it clearer that they work with UTF-8-encoded ByteStrings. Since both the compiler input and identifiers are now UTF-8-encoded ByteStrings, the lexer now creates identifiers without copying any characters. **END OF DESCRIPTION*** Place the long patch description above the ***END OF DESCRIPTION*** marker. The first line of this file will be the patch name. This patch contains the following changes: M ./src/compiler/GF/Compile/CheckGrammar.hs -3 +3 M ./src/compiler/GF/Compile/GrammarToPGF.hs -2 +2 M ./src/compiler/GF/Grammar/Binary.hs -5 +1 M ./src/compiler/GF/Grammar/Lexer.x -11 +13 M ./src/compiler/GF/Infra/Ident.hs -19 +36 M ./src/runtime/haskell/PGF.hs -1 +1 M ./src/runtime/haskell/PGF/CId.hs -2 +3 --- src/compiler/GF/Grammar/Binary.hs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/compiler/GF/Grammar/Binary.hs') diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 6cd3832c0..7b4540ce5 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -30,7 +30,7 @@ import PGF.Data(Literal(..)) gfoVersion = "GF03" instance Binary Ident where - put id = put (ident2bs id) + put id = put (ident2utf8 id) get = do bs <- get if bs == BS.pack "_" then return identW @@ -295,10 +295,6 @@ instance Binary Label where 1 -> fmap LVar get _ -> decodingError -instance Binary RawIdent where - put = put . rawId2bs - get = fmap rawIdentC get - --putGFOVersion = mapM_ (putWord8 . fromIntegral . ord) gfoVersion --getGFOVersion = replicateM (length gfoVersion) (fmap (chr . fromIntegral) getWord8) --putGFOVersion = put gfoVersion -- cgit v1.2.3