diff options
Diffstat (limited to 'src/binary/Data/Binary/Builder.hs')
| -rw-r--r-- | src/binary/Data/Binary/Builder.hs | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/src/binary/Data/Binary/Builder.hs b/src/binary/Data/Binary/Builder.hs new file mode 100644 index 000000000..20e287237 --- /dev/null +++ b/src/binary/Data/Binary/Builder.hs @@ -0,0 +1,425 @@ +{-# LANGUAGE CPP, MagicHash #-} +-- for unboxed shifts + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Binary.Builder +-- Copyright : Lennart Kolmodin, Ross Paterson +-- License : BSD3-style (see LICENSE) +-- +-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se> +-- Stability : experimental +-- Portability : portable to Hugs and GHC +-- +-- Efficient construction of lazy bytestrings. +-- +----------------------------------------------------------------------------- + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +#include "MachDeps.h" +#endif + +module Data.Binary.Builder ( + + -- * The Builder type + Builder + , toLazyByteString + + -- * Constructing Builders + , empty + , singleton + , append + , fromByteString -- :: S.ByteString -> Builder + , fromLazyByteString -- :: L.ByteString -> Builder + + -- * Flushing the buffer state + , flush + + -- * Derived Builders + -- ** Big-endian writes + , putWord16be -- :: Word16 -> Builder + , putWord32be -- :: Word32 -> Builder + , putWord64be -- :: Word64 -> Builder + + -- ** Little-endian writes + , putWord16le -- :: Word16 -> Builder + , putWord32le -- :: Word32 -> Builder + , putWord64le -- :: Word64 -> Builder + + -- ** Host-endian, unaligned writes + , putWordhost -- :: Word -> Builder + , putWord16host -- :: Word16 -> Builder + , putWord32host -- :: Word32 -> Builder + , putWord64host -- :: Word64 -> Builder + + ) where + +import Foreign +import Data.Monoid +import Data.Word +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L + +#ifdef BYTESTRING_IN_BASE +import Data.ByteString.Base (inlinePerformIO) +import qualified Data.ByteString.Base as S +#else +import Data.ByteString.Internal (inlinePerformIO) +import qualified Data.ByteString.Internal as S +import qualified Data.ByteString.Lazy.Internal as L +#endif + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +import GHC.Base +import GHC.Word (Word32(..),Word16(..),Word64(..)) + +#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608 +import GHC.Word (uncheckedShiftRL64#) +#endif +#endif + +------------------------------------------------------------------------ + +-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's. +-- There are several functions for constructing 'Builder's, but only one +-- to inspect them: to extract any data, you have to turn them into lazy +-- 'L.ByteString's using 'toLazyByteString'. +-- +-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte +-- arrays piece by piece. As each buffer is filled, it is \'popped\' +-- off, to become a new chunk of the resulting lazy 'L.ByteString'. +-- All this is hidden from the user of the 'Builder'. + +newtype Builder = Builder { + -- Invariant (from Data.ByteString.Lazy): + -- The lists include no null ByteStrings. + runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString] + } + +instance Monoid Builder where + mempty = empty + {-# INLINE mempty #-} + mappend = append + {-# INLINE mappend #-} + +------------------------------------------------------------------------ + +-- | /O(1)./ The empty Builder, satisfying +-- +-- * @'toLazyByteString' 'empty' = 'L.empty'@ +-- +empty :: Builder +empty = Builder id +{-# INLINE empty #-} + +-- | /O(1)./ A Builder taking a single byte, satisfying +-- +-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@ +-- +singleton :: Word8 -> Builder +singleton = writeN 1 . flip poke +{-# INLINE singleton #-} + +------------------------------------------------------------------------ + +-- | /O(1)./ The concatenation of two Builders, an associative operation +-- with identity 'empty', satisfying +-- +-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@ +-- +append :: Builder -> Builder -> Builder +append (Builder f) (Builder g) = Builder (f . g) +{-# INLINE append #-} + +-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying +-- +-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@ +-- +fromByteString :: S.ByteString -> Builder +fromByteString bs + | S.null bs = empty + | otherwise = flush `append` mapBuilder (bs :) +{-# INLINE fromByteString #-} + +-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying +-- +-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@ +-- +fromLazyByteString :: L.ByteString -> Builder +fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++) +{-# INLINE fromLazyByteString #-} + +------------------------------------------------------------------------ + +-- Our internal buffer type +data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- used bytes + {-# UNPACK #-} !Int -- length left + +------------------------------------------------------------------------ + +-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'. +-- The construction work takes place if and when the relevant part of +-- the lazy 'L.ByteString' is demanded. +-- +toLazyByteString :: Builder -> L.ByteString +toLazyByteString m = L.fromChunks $ unsafePerformIO $ do + buf <- newBuffer defaultSize + return (runBuilder (m `append` flush) (const []) buf) + +-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any, +-- yielding a new chunk in the result lazy 'L.ByteString'. +flush :: Builder +flush = Builder $ \ k buf@(Buffer p o u l) -> + if u == 0 + then k buf + else S.PS p o u : k (Buffer p (o+u) 0 l) + +------------------------------------------------------------------------ + +-- +-- copied from Data.ByteString.Lazy +-- +defaultSize :: Int +defaultSize = 32 * k - overhead + where k = 1024 + overhead = 2 * sizeOf (undefined :: Int) + +------------------------------------------------------------------------ + +-- | Sequence an IO operation on the buffer +unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder +unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do + buf' <- f buf + return (k buf') +{-# INLINE unsafeLiftIO #-} + +-- | Get the size of the buffer +withSize :: (Int -> Builder) -> Builder +withSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> + runBuilder (f l) k buf + +-- | Map the resulting list of bytestrings. +mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder +mapBuilder f = Builder (f .) + +------------------------------------------------------------------------ + +-- | Ensure that there are at least @n@ many bytes available. +ensureFree :: Int -> Builder +ensureFree n = n `seq` withSize $ \ l -> + if n <= l then empty else + flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize))) +{-# INLINE ensureFree #-} + +-- | Ensure that @n@ many bytes are available, and then use @f@ to write some +-- bytes into the memory. +writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder +writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f) +{-# INLINE writeN #-} + +writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer +writeNBuffer n f (Buffer fp o u l) = do + withForeignPtr fp (\p -> f (p `plusPtr` (o+u))) + return (Buffer fp o (u+n) (l-n)) +{-# INLINE writeNBuffer #-} + +newBuffer :: Int -> IO Buffer +newBuffer size = do + fp <- S.mallocByteString size + return $! Buffer fp 0 0 size +{-# INLINE newBuffer #-} + +------------------------------------------------------------------------ +-- Aligned, host order writes of storable values + +-- | Ensure that @n@ many bytes are available, and then use @f@ to write some +-- storable values into the memory. +writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder +writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f) +{-# INLINE writeNbytes #-} + +writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer +writeNBufferBytes n f (Buffer fp o u l) = do + withForeignPtr fp (\p -> f (p `plusPtr` (o+u))) + return (Buffer fp o (u+n) (l-n)) +{-# INLINE writeNBufferBytes #-} + +------------------------------------------------------------------------ + +-- +-- We rely on the fromIntegral to do the right masking for us. +-- The inlining here is critical, and can be worth 4x performance +-- + +-- | Write a Word16 in big endian format +putWord16be :: Word16 -> Builder +putWord16be w = writeN 2 $ \p -> do + poke p (fromIntegral (shiftr_w16 w 8) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (w) :: Word8) +{-# INLINE putWord16be #-} + +-- | Write a Word16 in little endian format +putWord16le :: Word16 -> Builder +putWord16le w = writeN 2 $ \p -> do + poke p (fromIntegral (w) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8) +{-# INLINE putWord16le #-} + +-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16) + +-- | Write a Word32 in big endian format +putWord32be :: Word32 -> Builder +putWord32be w = writeN 4 $ \p -> do + poke p (fromIntegral (shiftr_w32 w 24) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (w) :: Word8) +{-# INLINE putWord32be #-} + +-- +-- a data type to tag Put/Check. writes construct these which are then +-- inlined and flattened. matching Checks will be more robust with rules. +-- + +-- | Write a Word32 in little endian format +putWord32le :: Word32 -> Builder +putWord32le w = writeN 4 $ \p -> do + poke p (fromIntegral (w) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8) +{-# INLINE putWord32le #-} + +-- on a little endian machine: +-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32) + +-- | Write a Word64 in big endian format +putWord64be :: Word64 -> Builder +#if WORD_SIZE_IN_BITS < 64 +-- +-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to +-- Word32, and write that +-- +putWord64be w = + let a = fromIntegral (shiftr_w64 w 32) :: Word32 + b = fromIntegral w :: Word32 + in writeN 8 $ \p -> do + poke p (fromIntegral (shiftr_w32 a 24) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (a) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (b) :: Word8) +#else +putWord64be w = writeN 8 $ \p -> do + poke p (fromIntegral (shiftr_w64 w 56) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (w) :: Word8) +#endif +{-# INLINE putWord64be #-} + +-- | Write a Word64 in little endian format +putWord64le :: Word64 -> Builder + +#if WORD_SIZE_IN_BITS < 64 +putWord64le w = + let b = fromIntegral (shiftr_w64 w 32) :: Word32 + a = fromIntegral w :: Word32 + in writeN 8 $ \p -> do + poke (p) (fromIntegral (a) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (b) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8) +#else +putWord64le w = writeN 8 $ \p -> do + poke p (fromIntegral (w) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8) +#endif +{-# INLINE putWord64le #-} + +-- on a little endian machine: +-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64) + +------------------------------------------------------------------------ +-- Unaligned, word size ops + +-- | /O(1)./ A Builder taking a single native machine word. The word is +-- written in host order, host endian form, for the machine you're on. +-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine, +-- 4 bytes. Values written this way are not portable to +-- different endian or word sized machines, without conversion. +-- +putWordhost :: Word -> Builder +putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w) +{-# INLINE putWordhost #-} + +-- | Write a Word16 in native host order and host endianness. +-- 2 bytes will be written, unaligned. +putWord16host :: Word16 -> Builder +putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16) +{-# INLINE putWord16host #-} + +-- | Write a Word32 in native host order and host endianness. +-- 4 bytes will be written, unaligned. +putWord32host :: Word32 -> Builder +putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32) +{-# INLINE putWord32host #-} + +-- | Write a Word64 in native host order. +-- On a 32 bit machine we write two host order Word32s, in big endian form. +-- 8 bytes will be written, unaligned. +putWord64host :: Word64 -> Builder +putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w) +{-# INLINE putWord64host #-} + +------------------------------------------------------------------------ +-- Unchecked shifts + +{-# INLINE shiftr_w16 #-} +shiftr_w16 :: Word16 -> Int -> Word16 +{-# INLINE shiftr_w32 #-} +shiftr_w32 :: Word32 -> Int -> Word32 +{-# INLINE shiftr_w64 #-} +shiftr_w64 :: Word64 -> Int -> Word64 + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i) +shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i) + +#if WORD_SIZE_IN_BITS < 64 +shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i) + +#if __GLASGOW_HASKELL__ <= 606 +-- Exported by GHC.Word in GHC 6.8 and higher +foreign import ccall unsafe "stg_uncheckedShiftRL64" + uncheckedShiftRL64# :: Word64# -> Int# -> Word64# +#endif + +#else +shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i) +#endif + +#else +shiftr_w16 = shiftR +shiftr_w32 = shiftR +shiftr_w64 = shiftR +#endif |
