diff options
Diffstat (limited to 'src/binary/Data/Binary')
| -rw-r--r-- | src/binary/Data/Binary/Builder.hs | 425 | ||||
| -rw-r--r-- | src/binary/Data/Binary/Get.hs | 543 | ||||
| -rw-r--r-- | src/binary/Data/Binary/IEEE754.lhs | 402 | ||||
| -rw-r--r-- | src/binary/Data/Binary/Put.hs | 216 |
4 files changed, 1586 insertions, 0 deletions
diff --git a/src/binary/Data/Binary/Builder.hs b/src/binary/Data/Binary/Builder.hs new file mode 100644 index 000000000..20e287237 --- /dev/null +++ b/src/binary/Data/Binary/Builder.hs @@ -0,0 +1,425 @@ +{-# LANGUAGE CPP, MagicHash #-} +-- for unboxed shifts + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Binary.Builder +-- Copyright : Lennart Kolmodin, Ross Paterson +-- License : BSD3-style (see LICENSE) +-- +-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se> +-- Stability : experimental +-- Portability : portable to Hugs and GHC +-- +-- Efficient construction of lazy bytestrings. +-- +----------------------------------------------------------------------------- + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +#include "MachDeps.h" +#endif + +module Data.Binary.Builder ( + + -- * The Builder type + Builder + , toLazyByteString + + -- * Constructing Builders + , empty + , singleton + , append + , fromByteString -- :: S.ByteString -> Builder + , fromLazyByteString -- :: L.ByteString -> Builder + + -- * Flushing the buffer state + , flush + + -- * Derived Builders + -- ** Big-endian writes + , putWord16be -- :: Word16 -> Builder + , putWord32be -- :: Word32 -> Builder + , putWord64be -- :: Word64 -> Builder + + -- ** Little-endian writes + , putWord16le -- :: Word16 -> Builder + , putWord32le -- :: Word32 -> Builder + , putWord64le -- :: Word64 -> Builder + + -- ** Host-endian, unaligned writes + , putWordhost -- :: Word -> Builder + , putWord16host -- :: Word16 -> Builder + , putWord32host -- :: Word32 -> Builder + , putWord64host -- :: Word64 -> Builder + + ) where + +import Foreign +import Data.Monoid +import Data.Word +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L + +#ifdef BYTESTRING_IN_BASE +import Data.ByteString.Base (inlinePerformIO) +import qualified Data.ByteString.Base as S +#else +import Data.ByteString.Internal (inlinePerformIO) +import qualified Data.ByteString.Internal as S +import qualified Data.ByteString.Lazy.Internal as L +#endif + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +import GHC.Base +import GHC.Word (Word32(..),Word16(..),Word64(..)) + +#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608 +import GHC.Word (uncheckedShiftRL64#) +#endif +#endif + +------------------------------------------------------------------------ + +-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's. +-- There are several functions for constructing 'Builder's, but only one +-- to inspect them: to extract any data, you have to turn them into lazy +-- 'L.ByteString's using 'toLazyByteString'. +-- +-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte +-- arrays piece by piece. As each buffer is filled, it is \'popped\' +-- off, to become a new chunk of the resulting lazy 'L.ByteString'. +-- All this is hidden from the user of the 'Builder'. + +newtype Builder = Builder { + -- Invariant (from Data.ByteString.Lazy): + -- The lists include no null ByteStrings. + runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString] + } + +instance Monoid Builder where + mempty = empty + {-# INLINE mempty #-} + mappend = append + {-# INLINE mappend #-} + +------------------------------------------------------------------------ + +-- | /O(1)./ The empty Builder, satisfying +-- +-- * @'toLazyByteString' 'empty' = 'L.empty'@ +-- +empty :: Builder +empty = Builder id +{-# INLINE empty #-} + +-- | /O(1)./ A Builder taking a single byte, satisfying +-- +-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@ +-- +singleton :: Word8 -> Builder +singleton = writeN 1 . flip poke +{-# INLINE singleton #-} + +------------------------------------------------------------------------ + +-- | /O(1)./ The concatenation of two Builders, an associative operation +-- with identity 'empty', satisfying +-- +-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@ +-- +append :: Builder -> Builder -> Builder +append (Builder f) (Builder g) = Builder (f . g) +{-# INLINE append #-} + +-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying +-- +-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@ +-- +fromByteString :: S.ByteString -> Builder +fromByteString bs + | S.null bs = empty + | otherwise = flush `append` mapBuilder (bs :) +{-# INLINE fromByteString #-} + +-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying +-- +-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@ +-- +fromLazyByteString :: L.ByteString -> Builder +fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++) +{-# INLINE fromLazyByteString #-} + +------------------------------------------------------------------------ + +-- Our internal buffer type +data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- used bytes + {-# UNPACK #-} !Int -- length left + +------------------------------------------------------------------------ + +-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'. +-- The construction work takes place if and when the relevant part of +-- the lazy 'L.ByteString' is demanded. +-- +toLazyByteString :: Builder -> L.ByteString +toLazyByteString m = L.fromChunks $ unsafePerformIO $ do + buf <- newBuffer defaultSize + return (runBuilder (m `append` flush) (const []) buf) + +-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any, +-- yielding a new chunk in the result lazy 'L.ByteString'. +flush :: Builder +flush = Builder $ \ k buf@(Buffer p o u l) -> + if u == 0 + then k buf + else S.PS p o u : k (Buffer p (o+u) 0 l) + +------------------------------------------------------------------------ + +-- +-- copied from Data.ByteString.Lazy +-- +defaultSize :: Int +defaultSize = 32 * k - overhead + where k = 1024 + overhead = 2 * sizeOf (undefined :: Int) + +------------------------------------------------------------------------ + +-- | Sequence an IO operation on the buffer +unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder +unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do + buf' <- f buf + return (k buf') +{-# INLINE unsafeLiftIO #-} + +-- | Get the size of the buffer +withSize :: (Int -> Builder) -> Builder +withSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> + runBuilder (f l) k buf + +-- | Map the resulting list of bytestrings. +mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder +mapBuilder f = Builder (f .) + +------------------------------------------------------------------------ + +-- | Ensure that there are at least @n@ many bytes available. +ensureFree :: Int -> Builder +ensureFree n = n `seq` withSize $ \ l -> + if n <= l then empty else + flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize))) +{-# INLINE ensureFree #-} + +-- | Ensure that @n@ many bytes are available, and then use @f@ to write some +-- bytes into the memory. +writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder +writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f) +{-# INLINE writeN #-} + +writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer +writeNBuffer n f (Buffer fp o u l) = do + withForeignPtr fp (\p -> f (p `plusPtr` (o+u))) + return (Buffer fp o (u+n) (l-n)) +{-# INLINE writeNBuffer #-} + +newBuffer :: Int -> IO Buffer +newBuffer size = do + fp <- S.mallocByteString size + return $! Buffer fp 0 0 size +{-# INLINE newBuffer #-} + +------------------------------------------------------------------------ +-- Aligned, host order writes of storable values + +-- | Ensure that @n@ many bytes are available, and then use @f@ to write some +-- storable values into the memory. +writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder +writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f) +{-# INLINE writeNbytes #-} + +writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer +writeNBufferBytes n f (Buffer fp o u l) = do + withForeignPtr fp (\p -> f (p `plusPtr` (o+u))) + return (Buffer fp o (u+n) (l-n)) +{-# INLINE writeNBufferBytes #-} + +------------------------------------------------------------------------ + +-- +-- We rely on the fromIntegral to do the right masking for us. +-- The inlining here is critical, and can be worth 4x performance +-- + +-- | Write a Word16 in big endian format +putWord16be :: Word16 -> Builder +putWord16be w = writeN 2 $ \p -> do + poke p (fromIntegral (shiftr_w16 w 8) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (w) :: Word8) +{-# INLINE putWord16be #-} + +-- | Write a Word16 in little endian format +putWord16le :: Word16 -> Builder +putWord16le w = writeN 2 $ \p -> do + poke p (fromIntegral (w) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8) +{-# INLINE putWord16le #-} + +-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16) + +-- | Write a Word32 in big endian format +putWord32be :: Word32 -> Builder +putWord32be w = writeN 4 $ \p -> do + poke p (fromIntegral (shiftr_w32 w 24) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (w) :: Word8) +{-# INLINE putWord32be #-} + +-- +-- a data type to tag Put/Check. writes construct these which are then +-- inlined and flattened. matching Checks will be more robust with rules. +-- + +-- | Write a Word32 in little endian format +putWord32le :: Word32 -> Builder +putWord32le w = writeN 4 $ \p -> do + poke p (fromIntegral (w) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8) +{-# INLINE putWord32le #-} + +-- on a little endian machine: +-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32) + +-- | Write a Word64 in big endian format +putWord64be :: Word64 -> Builder +#if WORD_SIZE_IN_BITS < 64 +-- +-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to +-- Word32, and write that +-- +putWord64be w = + let a = fromIntegral (shiftr_w64 w 32) :: Word32 + b = fromIntegral w :: Word32 + in writeN 8 $ \p -> do + poke p (fromIntegral (shiftr_w32 a 24) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (a) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (b) :: Word8) +#else +putWord64be w = writeN 8 $ \p -> do + poke p (fromIntegral (shiftr_w64 w 56) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (w) :: Word8) +#endif +{-# INLINE putWord64be #-} + +-- | Write a Word64 in little endian format +putWord64le :: Word64 -> Builder + +#if WORD_SIZE_IN_BITS < 64 +putWord64le w = + let b = fromIntegral (shiftr_w64 w 32) :: Word32 + a = fromIntegral w :: Word32 + in writeN 8 $ \p -> do + poke (p) (fromIntegral (a) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (b) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8) +#else +putWord64le w = writeN 8 $ \p -> do + poke p (fromIntegral (w) :: Word8) + poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8) + poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8) + poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8) + poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8) + poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8) + poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8) + poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8) +#endif +{-# INLINE putWord64le #-} + +-- on a little endian machine: +-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64) + +------------------------------------------------------------------------ +-- Unaligned, word size ops + +-- | /O(1)./ A Builder taking a single native machine word. The word is +-- written in host order, host endian form, for the machine you're on. +-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine, +-- 4 bytes. Values written this way are not portable to +-- different endian or word sized machines, without conversion. +-- +putWordhost :: Word -> Builder +putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w) +{-# INLINE putWordhost #-} + +-- | Write a Word16 in native host order and host endianness. +-- 2 bytes will be written, unaligned. +putWord16host :: Word16 -> Builder +putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16) +{-# INLINE putWord16host #-} + +-- | Write a Word32 in native host order and host endianness. +-- 4 bytes will be written, unaligned. +putWord32host :: Word32 -> Builder +putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32) +{-# INLINE putWord32host #-} + +-- | Write a Word64 in native host order. +-- On a 32 bit machine we write two host order Word32s, in big endian form. +-- 8 bytes will be written, unaligned. +putWord64host :: Word64 -> Builder +putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w) +{-# INLINE putWord64host #-} + +------------------------------------------------------------------------ +-- Unchecked shifts + +{-# INLINE shiftr_w16 #-} +shiftr_w16 :: Word16 -> Int -> Word16 +{-# INLINE shiftr_w32 #-} +shiftr_w32 :: Word32 -> Int -> Word32 +{-# INLINE shiftr_w64 #-} +shiftr_w64 :: Word64 -> Int -> Word64 + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i) +shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i) + +#if WORD_SIZE_IN_BITS < 64 +shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i) + +#if __GLASGOW_HASKELL__ <= 606 +-- Exported by GHC.Word in GHC 6.8 and higher +foreign import ccall unsafe "stg_uncheckedShiftRL64" + uncheckedShiftRL64# :: Word64# -> Int# -> Word64# +#endif + +#else +shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i) +#endif + +#else +shiftr_w16 = shiftR +shiftr_w32 = shiftR +shiftr_w64 = shiftR +#endif diff --git a/src/binary/Data/Binary/Get.hs b/src/binary/Data/Binary/Get.hs new file mode 100644 index 000000000..728720b3e --- /dev/null +++ b/src/binary/Data/Binary/Get.hs @@ -0,0 +1,543 @@ +{-# LANGUAGE CPP, MagicHash #-} +-- for unboxed shifts + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Binary.Get +-- Copyright : Lennart Kolmodin +-- License : BSD3-style (see LICENSE) +-- +-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se> +-- Stability : experimental +-- Portability : portable to Hugs and GHC. +-- +-- The Get monad. A monad for efficiently building structures from +-- encoded lazy ByteStrings +-- +----------------------------------------------------------------------------- + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +#include "MachDeps.h" +#endif + +module Data.Binary.Get ( + + -- * The Get type + Get + , runGet + , runGetState + + -- * Parsing + , skip + , uncheckedSkip + , lookAhead + , lookAheadM + , lookAheadE + , uncheckedLookAhead + + -- * Utility + , bytesRead + , getBytes + , remaining + , isEmpty + + -- * Parsing particular types + , getWord8 + + -- ** ByteStrings + , getByteString + , getLazyByteString + , getLazyByteStringNul + , getRemainingLazyByteString + + -- ** Big-endian reads + , getWord16be + , getWord32be + , getWord64be + + -- ** Little-endian reads + , getWord16le + , getWord32le + , getWord64le + + -- ** Host-endian, unaligned reads + , getWordhost + , getWord16host + , getWord32host + , getWord64host + + ) where + +import Control.Monad (when,liftM,ap) +import Control.Monad.Fix +import Data.Maybe (isNothing) + +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as L + +#ifdef BYTESTRING_IN_BASE +import qualified Data.ByteString.Base as B +#else +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy.Internal as L +#endif + +#ifdef APPLICATIVE_IN_BASE +import Control.Applicative (Applicative(..)) +#endif + +import Foreign + +-- used by splitAtST +import Control.Monad.ST +import Data.STRef + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +import GHC.Base +import GHC.Word +import GHC.Int +#endif + +-- | The parse state +data S = S {-# UNPACK #-} !B.ByteString -- current chunk + L.ByteString -- the rest of the input + {-# UNPACK #-} !Int64 -- bytes read + +-- | The Get monad is just a State monad carrying around the input ByteString +newtype Get a = Get { unGet :: S -> (a, S) } + +instance Functor Get where + fmap f m = Get (\s -> case unGet m s of + (a, s') -> (f a, s')) + {-# INLINE fmap #-} + +#ifdef APPLICATIVE_IN_BASE +instance Applicative Get where + pure = return + (<*>) = ap +#endif + +instance Monad Get where + return a = Get (\s -> (a, s)) + {-# INLINE return #-} + + m >>= k = Get (\s -> case unGet m s of + (a, s') -> unGet (k a) s') + {-# INLINE (>>=) #-} + + fail = failDesc + +instance MonadFix Get where + mfix f = Get (\s -> let (a,s') = unGet (f a) s + in (a,s')) + +------------------------------------------------------------------------ + +get :: Get S +get = Get (\s -> (s, s)) + +put :: S -> Get () +put s = Get (\_ -> ((), s)) + +------------------------------------------------------------------------ +-- +-- dons, GHC 6.10: explicit inlining disabled, was killing performance. +-- Without it, GHC seems to do just fine. And we get similar +-- performance with 6.8.2 anyway. +-- + +initState :: L.ByteString -> S +initState xs = mkState xs 0 +{- INLINE initState -} + +{- +initState (B.LPS xs) = + case xs of + [] -> S B.empty L.empty 0 + (x:xs') -> S x (B.LPS xs') 0 +-} + +#ifndef BYTESTRING_IN_BASE +mkState :: L.ByteString -> Int64 -> S +mkState l = case l of + L.Empty -> S B.empty L.empty + L.Chunk x xs -> S x xs +{- INLINE mkState -} + +#else +mkState :: L.ByteString -> Int64 -> S +mkState (B.LPS xs) = + case xs of + [] -> S B.empty L.empty + (x:xs') -> S x (B.LPS xs') +#endif + +-- | Run the Get monad applies a 'get'-based parser on the input ByteString +runGet :: Get a -> L.ByteString -> a +runGet m str = case unGet m (initState str) of (a, _) -> a + +-- | Run the Get monad applies a 'get'-based parser on the input +-- ByteString. Additional to the result of get it returns the number of +-- consumed bytes and the rest of the input. +runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64) +runGetState m str off = + case unGet m (mkState str off) of + (a, ~(S s ss newOff)) -> (a, s `join` ss, newOff) + +------------------------------------------------------------------------ + +failDesc :: String -> Get a +failDesc err = do + S _ _ bytes <- get + Get (error (err ++ ". Failed reading at byte position " ++ show bytes)) + +-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available. +skip :: Int -> Get () +skip n = readN (fromIntegral n) (const ()) + +-- | Skip ahead @n@ bytes. No error if there isn't enough bytes. +uncheckedSkip :: Int64 -> Get () +uncheckedSkip n = do + S s ss bytes <- get + if fromIntegral (B.length s) >= n + then put (S (B.drop (fromIntegral n) s) ss (bytes + n)) + else do + let rest = L.drop (n - fromIntegral (B.length s)) ss + put $! mkState rest (bytes + n) + +-- | Run @ga@, but return without consuming its input. +-- Fails if @ga@ fails. +lookAhead :: Get a -> Get a +lookAhead ga = do + s <- get + a <- ga + put s + return a + +-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'. +-- Fails if @gma@ fails. +lookAheadM :: Get (Maybe a) -> Get (Maybe a) +lookAheadM gma = do + s <- get + ma <- gma + when (isNothing ma) $ + put s + return ma + +-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'. +-- Fails if @gea@ fails. +lookAheadE :: Get (Either a b) -> Get (Either a b) +lookAheadE gea = do + s <- get + ea <- gea + case ea of + Left _ -> put s + _ -> return () + return ea + +-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them. +uncheckedLookAhead :: Int64 -> Get L.ByteString +uncheckedLookAhead n = do + S s ss _ <- get + if n <= fromIntegral (B.length s) + then return (L.fromChunks [B.take (fromIntegral n) s]) + else return $ L.take n (s `join` ss) + +------------------------------------------------------------------------ +-- Utility + +-- | Get the total number of bytes read to this point. +bytesRead :: Get Int64 +bytesRead = do + S _ _ b <- get + return b + +-- | Get the number of remaining unparsed bytes. +-- Useful for checking whether all input has been consumed. +-- Note that this forces the rest of the input. +remaining :: Get Int64 +remaining = do + S s ss _ <- get + return (fromIntegral (B.length s) + L.length ss) + +-- | Test whether all input has been consumed, +-- i.e. there are no remaining unparsed bytes. +isEmpty :: Get Bool +isEmpty = do + S s ss _ <- get + return (B.null s && L.null ss) + +------------------------------------------------------------------------ +-- Utility with ByteStrings + +-- | An efficient 'get' method for strict ByteStrings. Fails if fewer +-- than @n@ bytes are left in the input. +getByteString :: Int -> Get B.ByteString +getByteString n = readN n id +{-# INLINE getByteString #-} + +-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than +-- @n@ bytes are left in the input. +getLazyByteString :: Int64 -> Get L.ByteString +getLazyByteString n = do + S s ss bytes <- get + let big = s `join` ss + case splitAtST n big of + (consume, rest) -> do put $ mkState rest (bytes + n) + return consume +{-# INLINE getLazyByteString #-} + +-- | Get a lazy ByteString that is terminated with a NUL byte. Fails +-- if it reaches the end of input without hitting a NUL. +getLazyByteStringNul :: Get L.ByteString +getLazyByteStringNul = do + S s ss bytes <- get + let big = s `join` ss + (consume, t) = L.break (== 0) big + (h, rest) = L.splitAt 1 t + if L.null h + then fail "too few bytes" + else do + put $ mkState rest (bytes + L.length consume + 1) + return consume +{-# INLINE getLazyByteStringNul #-} + +-- | Get the remaining bytes as a lazy ByteString +getRemainingLazyByteString :: Get L.ByteString +getRemainingLazyByteString = do + S s ss _ <- get + return (s `join` ss) + +------------------------------------------------------------------------ +-- Helpers + +-- | Pull @n@ bytes from the input, as a strict ByteString. +getBytes :: Int -> Get B.ByteString +getBytes n = do + S s ss bytes <- get + if n <= B.length s + then do let (consume,rest) = B.splitAt n s + put $! S rest ss (bytes + fromIntegral n) + return $! consume + else + case L.splitAt (fromIntegral n) (s `join` ss) of + (consuming, rest) -> + do let now = B.concat . L.toChunks $ consuming + put $! mkState rest (bytes + fromIntegral n) + -- forces the next chunk before this one is returned + if (B.length now < n) + then + fail "too few bytes" + else + return now +{- INLINE getBytes -} +-- ^ important + +#ifndef BYTESTRING_IN_BASE +join :: B.ByteString -> L.ByteString -> L.ByteString +join bb lb + | B.null bb = lb + | otherwise = L.Chunk bb lb + +#else +join :: B.ByteString -> L.ByteString -> L.ByteString +join bb (B.LPS lb) + | B.null bb = B.LPS lb + | otherwise = B.LPS (bb:lb) +#endif + -- don't use L.append, it's strict in it's second argument :/ +{- INLINE join -} + +-- | Split a ByteString. If the first result is consumed before the -- +-- second, this runs in constant heap space. +-- +-- You must force the returned tuple for that to work, e.g. +-- +-- > case splitAtST n xs of +-- > (ys,zs) -> consume ys ... consume zs +-- +splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString) +splitAtST i ps | i <= 0 = (L.empty, ps) +#ifndef BYTESTRING_IN_BASE +splitAtST i ps = runST ( + do r <- newSTRef undefined + xs <- first r i ps + ys <- unsafeInterleaveST (readSTRef r) + return (xs, ys)) + + where + first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty + first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty + + first r n (L.Chunk x xs) + | n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs) + return $ L.Chunk (B.take (fromIntegral n) x) L.Empty + | otherwise = do writeSTRef r (L.drop (n - l) xs) + liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs) + + where l = fromIntegral (B.length x) +#else +splitAtST i (B.LPS ps) = runST ( + do r <- newSTRef undefined + xs <- first r i ps + ys <- unsafeInterleaveST (readSTRef r) + return (B.LPS xs, B.LPS ys)) + + where first r 0 xs = writeSTRef r xs >> return [] + first r _ [] = writeSTRef r [] >> return [] + first r n (x:xs) + | n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs) + return [B.take (fromIntegral n) x] + | otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs))) + fmap (x:) $ unsafeInterleaveST (first r (n - l) xs) + + where l = fromIntegral (B.length x) +#endif +{- INLINE splitAtST -} + +-- Pull n bytes from the input, and apply a parser to those bytes, +-- yielding a value. If less than @n@ bytes are available, fail with an +-- error. This wraps @getBytes@. +readN :: Int -> (B.ByteString -> a) -> Get a +readN n f = fmap f $ getBytes n +{- INLINE readN -} +-- ^ important + +------------------------------------------------------------------------ +-- Primtives + +-- helper, get a raw Ptr onto a strict ByteString copied out of the +-- underlying lazy byteString. So many indirections from the raw parser +-- state that my head hurts... + +getPtr :: Storable a => Int -> Get a +getPtr n = do + (fp,o,_) <- readN n B.toForeignPtr + return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o) +{- INLINE getPtr -} + +------------------------------------------------------------------------ + +-- | Read a Word8 from the monad state +getWord8 :: Get Word8 +getWord8 = getPtr (sizeOf (undefined :: Word8)) +{- INLINE getWord8 -} + +-- | Read a Word16 in big endian format +getWord16be :: Get Word16 +getWord16be = do + s <- readN 2 id + return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|. + (fromIntegral (s `B.index` 1)) +{- INLINE getWord16be -} + +-- | Read a Word16 in little endian format +getWord16le :: Get Word16 +getWord16le = do + s <- readN 2 id + return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|. + (fromIntegral (s `B.index` 0) ) +{- INLINE getWord16le -} + +-- | Read a Word32 in big endian format +getWord32be :: Get Word32 +getWord32be = do + s <- readN 4 id + return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|. + (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|. + (fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|. + (fromIntegral (s `B.index` 3) ) +{- INLINE getWord32be -} + +-- | Read a Word32 in little endian format +getWord32le :: Get Word32 +getWord32le = do + s <- readN 4 id + return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|. + (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|. + (fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|. + (fromIntegral (s `B.index` 0) ) +{- INLINE getWord32le -} + +-- | Read a Word64 in big endian format +getWord64be :: Get Word64 +getWord64be = do + s <- readN 8 id + return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|. + (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|. + (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|. + (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|. + (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|. + (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|. + (fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|. + (fromIntegral (s `B.index` 7) ) +{- INLINE getWord64be -} + +-- | Read a Word64 in little endian format +getWord64le :: Get Word64 +getWord64le = do + s <- readN 8 id + return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|. + (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|. + (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|. + (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|. + (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|. + (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|. + (fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|. + (fromIntegral (s `B.index` 0) ) +{- INLINE getWord64le -} + +------------------------------------------------------------------------ +-- Host-endian reads + +-- | /O(1)./ Read a single native machine word. The word is read in +-- host order, host endian form, for the machine you're on. On a 64 bit +-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes. +getWordhost :: Get Word +getWordhost = getPtr (sizeOf (undefined :: Word)) +{- INLINE getWordhost -} + +-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness. +getWord16host :: Get Word16 +getWord16host = getPtr (sizeOf (undefined :: Word16)) +{- INLINE getWord16host -} + +-- | /O(1)./ Read a Word32 in native host order and host endianness. +getWord32host :: Get Word32 +getWord32host = getPtr (sizeOf (undefined :: Word32)) +{- INLINE getWord32host -} + +-- | /O(1)./ Read a Word64 in native host order and host endianess. +getWord64host :: Get Word64 +getWord64host = getPtr (sizeOf (undefined :: Word64)) +{- INLINE getWord64host -} + +------------------------------------------------------------------------ +-- Unchecked shifts + +shiftl_w16 :: Word16 -> Int -> Word16 +shiftl_w32 :: Word32 -> Int -> Word32 +shiftl_w64 :: Word64 -> Int -> Word64 + +#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) +shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i) +shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i) + +#if WORD_SIZE_IN_BITS < 64 +shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i) + +#if __GLASGOW_HASKELL__ <= 606 +-- Exported by GHC.Word in GHC 6.8 and higher +foreign import ccall unsafe "stg_uncheckedShiftL64" + uncheckedShiftL64# :: Word64# -> Int# -> Word64# +#endif + +#else +shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i) +#endif + +#else +shiftl_w16 = shiftL +shiftl_w32 = shiftL +shiftl_w64 = shiftL +#endif diff --git a/src/binary/Data/Binary/IEEE754.lhs b/src/binary/Data/Binary/IEEE754.lhs new file mode 100644 index 000000000..96cbefc5a --- /dev/null +++ b/src/binary/Data/Binary/IEEE754.lhs @@ -0,0 +1,402 @@ +% Copyright (C) 2009 John Millikin <jmillikin@gmail.com> +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see <http://www.gnu.org/licenses/>. + +\ignore{ +\begin{code} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +module Data.Binary.IEEE754 ( + -- * Parsing + getFloat16be, getFloat16le + , getFloat32be, getFloat32le + , getFloat64be, getFloat64le + + -- * Serializing + , putFloat32be, putFloat32le + , putFloat64be, putFloat64le +) where + +import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits) +import Data.Word (Word8) +import Data.List (foldl') + +import qualified Data.ByteString as B +import Data.Binary.Get (Get, getByteString) +import Data.Binary.Put (Put, putByteString) +\end{code} +} + +\section{Parsing} + +\subsection{Public interface} + +\begin{code} +getFloat16be :: Get Float +getFloat16be = getFloat (ByteCount 2) splitBytes +\end{code} + +\begin{code} +getFloat16le :: Get Float +getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse +\end{code} + +\begin{code} +getFloat32be :: Get Float +getFloat32be = getFloat (ByteCount 4) splitBytes +\end{code} + +\begin{code} +getFloat32le :: Get Float +getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse +\end{code} + +\begin{code} +getFloat64be :: Get Double +getFloat64be = getFloat (ByteCount 8) splitBytes +\end{code} + +\begin{code} +getFloat64le :: Get Double +getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse +\end{code} + +\subsection{Implementation} + +Split the raw byte array into (sign, exponent, significand) components. +The exponent and signifcand are drawn directly from the bits in the +original float, and have not been unbiased or otherwise modified. + +\begin{code} +splitBytes :: [Word8] -> RawFloat +splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where + width = ByteCount (length bs) + nBits = bitsInWord8 bs + sign = if head bs .&. 0x80 == 0x80 + then Negative + else Positive + + expStart = 1 + expWidth = exponentWidth nBits + expEnd = expStart + expWidth + exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd + + sigWidth = nBits - expEnd + sig = Significand $ bitSlice bs expEnd nBits +\end{code} + +\subsubsection{Encodings and special values} + +The next step depends on the value of the exponent $e$, size of the +exponent field in bits $w$, and value of the significand. + +\begin{table}[h] +\begin{center} +\begin{tabular}{lrl} +\toprule +Exponent & Significand & Format \\ +\midrule +$0$ & $0$ & Zero \\ +$0$ & $> 0$ & Denormalised \\ +$1 \leq e \leq 2^w - 2$ & \textit{any} & Normalised \\ +$2^w-1$ & $0$ & Infinity \\ +$2^w-1$ & $> 0$ & NaN \\ +\bottomrule +\end{tabular} +\end{center} +\end{table} + +There's no built-in literals for Infinity or NaN, so they +are constructed using the {\tt Read} instances for {\tt Double} and +{\tt Float}. + +\begin{code} +merge :: (Read a, RealFloat a) => RawFloat -> a +merge f@(RawFloat _ _ e sig eWidth _) + | e == 0 = if sig == 0 + then 0.0 + else denormalised f + | e == eMax - 1 = if sig == 0 + then read "Infinity" + else read "NaN" + | otherwise = normalised f + where eMax = 2 `pow` eWidth +\end{code} + +If a value is normalised, its significand has an implied {\tt 1} bit +in its most-significant bit. The significand must be adjusted by +this value before being passed to {\tt encodeField}. + +\begin{code} +normalised :: RealFloat a => RawFloat -> a +normalised f = encodeFloat fraction exp' where + Significand sig = rawSignificand f + Exponent exp' = unbiased - sigWidth + + fraction = sig + (1 `bitShiftL` rawSignificandWidth f) + + sigWidth = fromIntegral $ rawSignificandWidth f + unbiased = unbias (rawExponent f) (rawExponentWidth f) +\end{code} + +For denormalised values, the implied {\tt 1} bit is the least-significant +bit of the exponent. + +\begin{code} +denormalised :: RealFloat a => RawFloat -> a +denormalised f = encodeFloat sig exp' where + Significand sig = rawSignificand f + Exponent exp' = unbiased - sigWidth + 1 + + sigWidth = fromIntegral $ rawSignificandWidth f + unbiased = unbias (rawExponent f) (rawExponentWidth f) +\end{code} + +By composing {\tt splitBytes} and {\tt merge}, the absolute value of the +float is calculated. Before being returned to the calling function, it +must be signed appropriately. + +\begin{code} +getFloat :: (Read a, RealFloat a) => ByteCount + -> ([Word8] -> RawFloat) -> Get a +getFloat (ByteCount width) parser = do + raw <- fmap (parser . B.unpack) $ getByteString width + let absFloat = merge raw + return $ case rawSign raw of + Positive -> absFloat + Negative -> -absFloat +\end{code} + +\section{Serialising} + +\subsection{Public interface} + +\begin{code} +putFloat32be :: Float -> Put +putFloat32be = putFloat (ByteCount 4) id +\end{code} + +\begin{code} +putFloat32le :: Float -> Put +putFloat32le = putFloat (ByteCount 4) reverse +\end{code} + +\begin{code} +putFloat64be :: Double -> Put +putFloat64be = putFloat (ByteCount 8) id +\end{code} + +\begin{code} +putFloat64le :: Double -> Put +putFloat64le = putFloat (ByteCount 8) reverse +\end{code} + +\subsection{Implementation} + +Serialisation is similar to parsing. First, the float is converted to +a structure representing raw bitfields. The values returned from +{\tt decodeFloat} are clamped to their expected lengths before being +stored in the {\tt RawFloat}. + +\begin{code} +splitFloat :: RealFloat a => ByteCount -> a -> RawFloat +splitFloat width x = raw where + raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth + sign = if isNegativeNaN x || isNegativeZero x || x < 0 + then Negative + else Positive + clampedExp = clamp expWidth exp' + clampedSig = clamp sigWidth sig + (exp', sig) = case (dFraction, dExponent, biasedExp) of + (0, 0, _) -> (0, 0) + (_, _, 0) -> (0, Significand $ truncatedSig + 1) + _ -> (biasedExp, Significand truncatedSig) + expWidth = exponentWidth $ bitCount width + sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit + + (dFraction, dExponent) = decodeFloat x + + rawExp = Exponent $ dExponent + fromIntegral sigWidth + biasedExp = bias rawExp expWidth + truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth) +\end{code} + +Then, the {\tt RawFloat} is converted to a list of bytes by mashing all +the fields together into an {\tt Integer}, and chopping up that integer +in 8-bit blocks. + +\begin{code} +rawToBytes :: RawFloat -> [Word8] +rawToBytes raw = integerToBytes mashed width where + RawFloat width sign exp' sig expWidth sigWidth = raw + sign' :: Word8 + sign' = case sign of + Positive -> 0 + Negative -> 1 + mashed = mashBits sig sigWidth . + mashBits exp' expWidth . + mashBits sign' 1 $ 0 +\end{code} + +{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits +in positions above the count. + +\begin{code} +clamp :: (Num a, Bits a) => BitCount -> a -> a +clamp = (.&.) . mask where + mask 1 = 1 + mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1 + mask _ = undefined +\end{code} + +For merging the fields, just shift the starting integer over a bit and +then \textsc{or} it with the next value. The weird parameter order allows +easy composition. + +\begin{code} +mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer +mashBits _ 0 x = x +mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y +\end{code} + +Given an integer, read it in 255-block increments starting from the LSB. +Each increment is converted to a byte and added to the final list. + +\begin{code} +integerToBytes :: Integer -> ByteCount -> [Word8] +integerToBytes _ 0 = [] +integerToBytes x n = bytes where + bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step] + step = fromIntegral x .&. 0xFF +\end{code} + +Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter +allows the same code paths to be used for little- and big-endian +serialisation. + +\begin{code} +putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put +putFloat width f x = putByteString $ B.pack bytes where + bytes = f . rawToBytes . splitFloat width $ x +\end{code} + +\section{Raw float components} + +Information about the raw bit patterns in the float is stored in +{\tt RawFloat}, so they don't have to be passed around to the various +format cases. The exponent should be biased, and the significand +shouldn't have it's implied MSB (if applicable). + +\begin{code} +data RawFloat = RawFloat + { rawWidth :: ByteCount + , rawSign :: Sign + , rawExponent :: Exponent + , rawSignificand :: Significand + , rawExponentWidth :: BitCount + , rawSignificandWidth :: BitCount + } + deriving (Show) +\end{code} + +\section{Exponents} + +Calculate the proper size of the exponent field, in bits, given the +size of the full structure. + +\begin{code} +exponentWidth :: BitCount -> BitCount +exponentWidth k + | k == 16 = 5 + | k == 32 = 8 + | k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13 + | otherwise = error "Invalid length of floating-point value" +\end{code} + +\begin{code} +bias :: Exponent -> BitCount -> Exponent +bias e eWidth = e - (1 - (2 `pow` (eWidth - 1))) +\end{code} + +\begin{code} +unbias :: Exponent -> BitCount -> Exponent +unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1)) +\end{code} + +\section{Byte and bit counting} + +\begin{code} +data Sign = Positive | Negative + deriving (Show) + +newtype Exponent = Exponent Int + deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits) + +newtype Significand = Significand Integer + deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits) + +newtype BitCount = BitCount Int + deriving (Show, Eq, Num, Ord, Real, Enum, Integral) + +newtype ByteCount = ByteCount Int + deriving (Show, Eq, Num, Ord, Real, Enum, Integral) + +bitCount :: ByteCount -> BitCount +bitCount (ByteCount x) = BitCount (x * 8) + +bitsInWord8 :: [Word8] -> BitCount +bitsInWord8 = bitCount . ByteCount . length + +bitShiftL :: (Bits a) => a -> BitCount -> a +bitShiftL x (BitCount n) = shiftL x n + +bitShiftR :: (Bits a) => a -> BitCount -> a +bitShiftR x (BitCount n) = shiftR x n +\end{code} + +\section{Utility} + +Considering a byte list as a sequence of bits, slice it from start +inclusive to end exclusive, and return the resulting bit sequence as an +integer. + +\begin{code} +bitSlice :: [Word8] -> BitCount -> BitCount -> Integer +bitSlice bs = sliceInt (foldl' step 0 bs) bitCount' where + step acc w = shiftL acc 8 + fromIntegral w + bitCount' = bitsInWord8 bs +\end{code} + +Slice a single integer by start and end bit location + +\begin{code} +sliceInt :: Integer -> BitCount -> BitCount -> BitCount -> Integer +sliceInt x xBitCount s e = fromIntegral sliced where + sliced = (x .&. startMask) `bitShiftR` (xBitCount - e) + startMask = n1Bits (xBitCount - s) + n1Bits n = (2 `pow` n) - 1 +\end{code} + +Integral version of {\tt (**)} + +\begin{code} +pow :: (Integral a, Integral b, Integral c) => a -> b -> c +pow b e = floor $ fromIntegral b ** fromIntegral e +\end{code} + +Detect whether a float is {\tt $-$NaN} + +\begin{code} +isNegativeNaN :: RealFloat a => a -> Bool +isNegativeNaN x = isNaN x && (floor x > 0) +\end{code} diff --git a/src/binary/Data/Binary/Put.hs b/src/binary/Data/Binary/Put.hs new file mode 100644 index 000000000..a1f78dfba --- /dev/null +++ b/src/binary/Data/Binary/Put.hs @@ -0,0 +1,216 @@ +{-# LANGUAGE CPP #-} +----------------------------------------------------------------------------- +-- | +-- Module : Data.Binary.Put +-- Copyright : Lennart Kolmodin +-- License : BSD3-style (see LICENSE) +-- +-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se> +-- Stability : stable +-- Portability : Portable to Hugs and GHC. Requires MPTCs +-- +-- The Put monad. A monad for efficiently constructing lazy bytestrings. +-- +----------------------------------------------------------------------------- + +module Data.Binary.Put ( + + -- * The Put type + Put + , PutM(..) + , runPut + , runPutM + , putBuilder + , execPut + + -- * Flushing the implicit parse state + , flush + + -- * Primitives + , putWord8 + , putByteString + , putLazyByteString + + -- * Big-endian primitives + , putWord16be + , putWord32be + , putWord64be + + -- * Little-endian primitives + , putWord16le + , putWord32le + , putWord64le + + -- * Host-endian, unaligned writes + , putWordhost -- :: Word -> Put + , putWord16host -- :: Word16 -> Put + , putWord32host -- :: Word32 -> Put + , putWord64host -- :: Word64 -> Put + + ) where + +import Data.Monoid +import Data.Binary.Builder (Builder, toLazyByteString) +import qualified Data.Binary.Builder as B + +import Data.Word +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L + +#ifdef APPLICATIVE_IN_BASE +import Control.Applicative +#endif + + +------------------------------------------------------------------------ + +-- XXX Strict in buffer only. +data PairS a = PairS a {-# UNPACK #-}!Builder + +sndS :: PairS a -> Builder +sndS (PairS _ b) = b + +-- | The PutM type. A Writer monad over the efficient Builder monoid. +newtype PutM a = Put { unPut :: PairS a } + +-- | Put merely lifts Builder into a Writer monad, applied to (). +type Put = PutM () + +instance Functor PutM where + fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w + {-# INLINE fmap #-} + +#ifdef APPLICATIVE_IN_BASE +instance Applicative PutM where + pure = return + m <*> k = Put $ + let PairS f w = unPut m + PairS x w' = unPut k + in PairS (f x) (w `mappend` w') +#endif + +-- Standard Writer monad, with aggressive inlining +instance Monad PutM where + return a = Put $ PairS a mempty + {-# INLINE return #-} + + m >>= k = Put $ + let PairS a w = unPut m + PairS b w' = unPut (k a) + in PairS b (w `mappend` w') + {-# INLINE (>>=) #-} + + m >> k = Put $ + let PairS _ w = unPut m + PairS b w' = unPut k + in PairS b (w `mappend` w') + {-# INLINE (>>) #-} + +tell :: Builder -> Put +tell b = Put $ PairS () b +{-# INLINE tell #-} + +putBuilder :: Builder -> Put +putBuilder = tell +{-# INLINE putBuilder #-} + +-- | Run the 'Put' monad +execPut :: PutM a -> Builder +execPut = sndS . unPut +{-# INLINE execPut #-} + +-- | Run the 'Put' monad with a serialiser +runPut :: Put -> L.ByteString +runPut = toLazyByteString . sndS . unPut +{-# INLINE runPut #-} + +-- | Run the 'Put' monad with a serialiser and get its result +runPutM :: PutM a -> (a, L.ByteString) +runPutM (Put (PairS f s)) = (f, toLazyByteString s) +{-# INLINE runPutM #-} + +------------------------------------------------------------------------ + +-- | Pop the ByteString we have constructed so far, if any, yielding a +-- new chunk in the result ByteString. +flush :: Put +flush = tell B.flush +{-# INLINE flush #-} + +-- | Efficiently write a byte into the output buffer +putWord8 :: Word8 -> Put +putWord8 = tell . B.singleton +{-# INLINE putWord8 #-} + +-- | An efficient primitive to write a strict ByteString into the output buffer. +-- It flushes the current buffer, and writes the argument into a new chunk. +putByteString :: S.ByteString -> Put +putByteString = tell . B.fromByteString +{-# INLINE putByteString #-} + +-- | Write a lazy ByteString efficiently, simply appending the lazy +-- ByteString chunks to the output buffer +putLazyByteString :: L.ByteString -> Put +putLazyByteString = tell . B.fromLazyByteString +{-# INLINE putLazyByteString #-} + +-- | Write a Word16 in big endian format +putWord16be :: Word16 -> Put +putWord16be = tell . B.putWord16be +{-# INLINE putWord16be #-} + +-- | Write a Word16 in little endian format +putWord16le :: Word16 -> Put +putWord16le = tell . B.putWord16le +{-# INLINE putWord16le #-} + +-- | Write a Word32 in big endian format +putWord32be :: Word32 -> Put +putWord32be = tell . B.putWord32be +{-# INLINE putWord32be #-} + +-- | Write a Word32 in little endian format +putWord32le :: Word32 -> Put +putWord32le = tell . B.putWord32le +{-# INLINE putWord32le #-} + +-- | Write a Word64 in big endian format +putWord64be :: Word64 -> Put +putWord64be = tell . B.putWord64be +{-# INLINE putWord64be #-} + +-- | Write a Word64 in little endian format +putWord64le :: Word64 -> Put +putWord64le = tell . B.putWord64le +{-# INLINE putWord64le #-} + +------------------------------------------------------------------------ + +-- | /O(1)./ Write a single native machine word. The word is +-- written in host order, host endian form, for the machine you're on. +-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine, +-- 4 bytes. Values written this way are not portable to +-- different endian or word sized machines, without conversion. +-- +putWordhost :: Word -> Put +putWordhost = tell . B.putWordhost +{-# INLINE putWordhost #-} + +-- | /O(1)./ Write a Word16 in native host order and host endianness. +-- For portability issues see @putWordhost@. +putWord16host :: Word16 -> Put +putWord16host = tell . B.putWord16host +{-# INLINE putWord16host #-} + +-- | /O(1)./ Write a Word32 in native host order and host endianness. +-- For portability issues see @putWordhost@. +putWord32host :: Word32 -> Put +putWord32host = tell . B.putWord32host +{-# INLINE putWord32host #-} + +-- | /O(1)./ Write a Word64 in native host order +-- On a 32 bit machine we write two host order Word32s, in big endian form. +-- For portability issues see @putWordhost@. +putWord64host :: Word64 -> Put +putWord64host = tell . B.putWord64host +{-# INLINE putWord64host #-} |
