Represent identifiers as UTF-8-encoded ByteStrings

This was a fairly simple change thanks to previous work on making the Ident type abstract and the fact that PGF.CId already uses UTF-8-encoded ByteStrings. One potential pitfall is that Data.ByteString.UTF8 uses the same type for ByteStrings as Data.ByteString. I renamed ident2bs to ident2utf8 and bsCId to utf8CId, to make it clearer that they work with UTF-8-encoded ByteStrings. Since both the compiler input and identifiers are now UTF-8-encoded ByteStrings, the lexer now creates identifiers without copying any characters. **END OF DESCRIPTION*** Place the long patch description above the ***END OF DESCRIPTION*** marker. The first line of this file will be the patch name. This patch contains the following changes: M ./src/compiler/GF/Compile/CheckGrammar.hs -3 +3 M ./src/compiler/GF/Compile/GrammarToPGF.hs -2 +2 M ./src/compiler/GF/Grammar/Binary.hs -5 +1 M ./src/compiler/GF/Grammar/Lexer.x -11 +13 M ./src/compiler/GF/Infra/Ident.hs -19 +36 M ./src/runtime/haskell/PGF.hs -1 +1 M ./src/runtime/haskell/PGF/CId.hs -2 +3
author: hallgren <hallgren@chalmers.se> 2013-11-26 16:12:03 +0000
committer: hallgren <hallgren@chalmers.se> 2013-11-26 16:12:03 +0000
commit: 3f57151cc346be0fbf0726d4953f3529ea45e7f4 (patch)
tree: 6106269ff6ea7abb0c27d15cafdd818bb91b6bf7 /src/compiler/GF/Compile/GrammarToPGF.hs
parent: 9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index 25db4bac7..5917b709c 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -5,7 +5,7 @@ module GF.Compile.GrammarToPGF (mkCanon2pgf) where
 import GF.Compile.GeneratePMCFG
 import GF.Compile.GenerateBC
 
-import PGF(CId,mkCId,bsCId)
+import PGF(CId,mkCId,utf8CId)
 import PGF.Data(fidInt,fidFloat,fidString,fidVar)
 import PGF.Optimize(updateProductionIndices)
 --import qualified PGF.Macros as CM
@@ -103,7 +103,7 @@ mkCanon2pgf opts gr am = do
           return (seqs, ((m,id), info) : is)
 
 i2i :: Ident -> CId
-i2i = bsCId . ident2bs
+i2i = utf8CId . ident2utf8
 
 mkType :: [Ident] -> A.Type -> C.Type
 mkType scope t =
author	hallgren <hallgren@chalmers.se>	2013-11-26 16:12:03 +0000
committer	hallgren <hallgren@chalmers.se>	2013-11-26 16:12:03 +0000
commit	3f57151cc346be0fbf0726d4953f3529ea45e7f4 (patch)
tree	6106269ff6ea7abb0c27d15cafdd818bb91b6bf7 /src/compiler/GF/Compile/GrammarToPGF.hs
parent	9d7fdf7c9a525a3b5659a566f76d26d151dcd664 (diff)