diff options
Diffstat (limited to 'src/binary/Data/Binary/IEEE754.lhs')
| -rw-r--r-- | src/binary/Data/Binary/IEEE754.lhs | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/src/binary/Data/Binary/IEEE754.lhs b/src/binary/Data/Binary/IEEE754.lhs new file mode 100644 index 000000000..96cbefc5a --- /dev/null +++ b/src/binary/Data/Binary/IEEE754.lhs @@ -0,0 +1,402 @@ +% Copyright (C) 2009 John Millikin <jmillikin@gmail.com> +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see <http://www.gnu.org/licenses/>. + +\ignore{ +\begin{code} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +module Data.Binary.IEEE754 ( + -- * Parsing + getFloat16be, getFloat16le + , getFloat32be, getFloat32le + , getFloat64be, getFloat64le + + -- * Serializing + , putFloat32be, putFloat32le + , putFloat64be, putFloat64le +) where + +import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits) +import Data.Word (Word8) +import Data.List (foldl') + +import qualified Data.ByteString as B +import Data.Binary.Get (Get, getByteString) +import Data.Binary.Put (Put, putByteString) +\end{code} +} + +\section{Parsing} + +\subsection{Public interface} + +\begin{code} +getFloat16be :: Get Float +getFloat16be = getFloat (ByteCount 2) splitBytes +\end{code} + +\begin{code} +getFloat16le :: Get Float +getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse +\end{code} + +\begin{code} +getFloat32be :: Get Float +getFloat32be = getFloat (ByteCount 4) splitBytes +\end{code} + +\begin{code} +getFloat32le :: Get Float +getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse +\end{code} + +\begin{code} +getFloat64be :: Get Double +getFloat64be = getFloat (ByteCount 8) splitBytes +\end{code} + +\begin{code} +getFloat64le :: Get Double +getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse +\end{code} + +\subsection{Implementation} + +Split the raw byte array into (sign, exponent, significand) components. +The exponent and signifcand are drawn directly from the bits in the +original float, and have not been unbiased or otherwise modified. + +\begin{code} +splitBytes :: [Word8] -> RawFloat +splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where + width = ByteCount (length bs) + nBits = bitsInWord8 bs + sign = if head bs .&. 0x80 == 0x80 + then Negative + else Positive + + expStart = 1 + expWidth = exponentWidth nBits + expEnd = expStart + expWidth + exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd + + sigWidth = nBits - expEnd + sig = Significand $ bitSlice bs expEnd nBits +\end{code} + +\subsubsection{Encodings and special values} + +The next step depends on the value of the exponent $e$, size of the +exponent field in bits $w$, and value of the significand. + +\begin{table}[h] +\begin{center} +\begin{tabular}{lrl} +\toprule +Exponent & Significand & Format \\ +\midrule +$0$ & $0$ & Zero \\ +$0$ & $> 0$ & Denormalised \\ +$1 \leq e \leq 2^w - 2$ & \textit{any} & Normalised \\ +$2^w-1$ & $0$ & Infinity \\ +$2^w-1$ & $> 0$ & NaN \\ +\bottomrule +\end{tabular} +\end{center} +\end{table} + +There's no built-in literals for Infinity or NaN, so they +are constructed using the {\tt Read} instances for {\tt Double} and +{\tt Float}. + +\begin{code} +merge :: (Read a, RealFloat a) => RawFloat -> a +merge f@(RawFloat _ _ e sig eWidth _) + | e == 0 = if sig == 0 + then 0.0 + else denormalised f + | e == eMax - 1 = if sig == 0 + then read "Infinity" + else read "NaN" + | otherwise = normalised f + where eMax = 2 `pow` eWidth +\end{code} + +If a value is normalised, its significand has an implied {\tt 1} bit +in its most-significant bit. The significand must be adjusted by +this value before being passed to {\tt encodeField}. + +\begin{code} +normalised :: RealFloat a => RawFloat -> a +normalised f = encodeFloat fraction exp' where + Significand sig = rawSignificand f + Exponent exp' = unbiased - sigWidth + + fraction = sig + (1 `bitShiftL` rawSignificandWidth f) + + sigWidth = fromIntegral $ rawSignificandWidth f + unbiased = unbias (rawExponent f) (rawExponentWidth f) +\end{code} + +For denormalised values, the implied {\tt 1} bit is the least-significant +bit of the exponent. + +\begin{code} +denormalised :: RealFloat a => RawFloat -> a +denormalised f = encodeFloat sig exp' where + Significand sig = rawSignificand f + Exponent exp' = unbiased - sigWidth + 1 + + sigWidth = fromIntegral $ rawSignificandWidth f + unbiased = unbias (rawExponent f) (rawExponentWidth f) +\end{code} + +By composing {\tt splitBytes} and {\tt merge}, the absolute value of the +float is calculated. Before being returned to the calling function, it +must be signed appropriately. + +\begin{code} +getFloat :: (Read a, RealFloat a) => ByteCount + -> ([Word8] -> RawFloat) -> Get a +getFloat (ByteCount width) parser = do + raw <- fmap (parser . B.unpack) $ getByteString width + let absFloat = merge raw + return $ case rawSign raw of + Positive -> absFloat + Negative -> -absFloat +\end{code} + +\section{Serialising} + +\subsection{Public interface} + +\begin{code} +putFloat32be :: Float -> Put +putFloat32be = putFloat (ByteCount 4) id +\end{code} + +\begin{code} +putFloat32le :: Float -> Put +putFloat32le = putFloat (ByteCount 4) reverse +\end{code} + +\begin{code} +putFloat64be :: Double -> Put +putFloat64be = putFloat (ByteCount 8) id +\end{code} + +\begin{code} +putFloat64le :: Double -> Put +putFloat64le = putFloat (ByteCount 8) reverse +\end{code} + +\subsection{Implementation} + +Serialisation is similar to parsing. First, the float is converted to +a structure representing raw bitfields. The values returned from +{\tt decodeFloat} are clamped to their expected lengths before being +stored in the {\tt RawFloat}. + +\begin{code} +splitFloat :: RealFloat a => ByteCount -> a -> RawFloat +splitFloat width x = raw where + raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth + sign = if isNegativeNaN x || isNegativeZero x || x < 0 + then Negative + else Positive + clampedExp = clamp expWidth exp' + clampedSig = clamp sigWidth sig + (exp', sig) = case (dFraction, dExponent, biasedExp) of + (0, 0, _) -> (0, 0) + (_, _, 0) -> (0, Significand $ truncatedSig + 1) + _ -> (biasedExp, Significand truncatedSig) + expWidth = exponentWidth $ bitCount width + sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit + + (dFraction, dExponent) = decodeFloat x + + rawExp = Exponent $ dExponent + fromIntegral sigWidth + biasedExp = bias rawExp expWidth + truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth) +\end{code} + +Then, the {\tt RawFloat} is converted to a list of bytes by mashing all +the fields together into an {\tt Integer}, and chopping up that integer +in 8-bit blocks. + +\begin{code} +rawToBytes :: RawFloat -> [Word8] +rawToBytes raw = integerToBytes mashed width where + RawFloat width sign exp' sig expWidth sigWidth = raw + sign' :: Word8 + sign' = case sign of + Positive -> 0 + Negative -> 1 + mashed = mashBits sig sigWidth . + mashBits exp' expWidth . + mashBits sign' 1 $ 0 +\end{code} + +{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits +in positions above the count. + +\begin{code} +clamp :: (Num a, Bits a) => BitCount -> a -> a +clamp = (.&.) . mask where + mask 1 = 1 + mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1 + mask _ = undefined +\end{code} + +For merging the fields, just shift the starting integer over a bit and +then \textsc{or} it with the next value. The weird parameter order allows +easy composition. + +\begin{code} +mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer +mashBits _ 0 x = x +mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y +\end{code} + +Given an integer, read it in 255-block increments starting from the LSB. +Each increment is converted to a byte and added to the final list. + +\begin{code} +integerToBytes :: Integer -> ByteCount -> [Word8] +integerToBytes _ 0 = [] +integerToBytes x n = bytes where + bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step] + step = fromIntegral x .&. 0xFF +\end{code} + +Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter +allows the same code paths to be used for little- and big-endian +serialisation. + +\begin{code} +putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put +putFloat width f x = putByteString $ B.pack bytes where + bytes = f . rawToBytes . splitFloat width $ x +\end{code} + +\section{Raw float components} + +Information about the raw bit patterns in the float is stored in +{\tt RawFloat}, so they don't have to be passed around to the various +format cases. The exponent should be biased, and the significand +shouldn't have it's implied MSB (if applicable). + +\begin{code} +data RawFloat = RawFloat + { rawWidth :: ByteCount + , rawSign :: Sign + , rawExponent :: Exponent + , rawSignificand :: Significand + , rawExponentWidth :: BitCount + , rawSignificandWidth :: BitCount + } + deriving (Show) +\end{code} + +\section{Exponents} + +Calculate the proper size of the exponent field, in bits, given the +size of the full structure. + +\begin{code} +exponentWidth :: BitCount -> BitCount +exponentWidth k + | k == 16 = 5 + | k == 32 = 8 + | k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13 + | otherwise = error "Invalid length of floating-point value" +\end{code} + +\begin{code} +bias :: Exponent -> BitCount -> Exponent +bias e eWidth = e - (1 - (2 `pow` (eWidth - 1))) +\end{code} + +\begin{code} +unbias :: Exponent -> BitCount -> Exponent +unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1)) +\end{code} + +\section{Byte and bit counting} + +\begin{code} +data Sign = Positive | Negative + deriving (Show) + +newtype Exponent = Exponent Int + deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits) + +newtype Significand = Significand Integer + deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits) + +newtype BitCount = BitCount Int + deriving (Show, Eq, Num, Ord, Real, Enum, Integral) + +newtype ByteCount = ByteCount Int + deriving (Show, Eq, Num, Ord, Real, Enum, Integral) + +bitCount :: ByteCount -> BitCount +bitCount (ByteCount x) = BitCount (x * 8) + +bitsInWord8 :: [Word8] -> BitCount +bitsInWord8 = bitCount . ByteCount . length + +bitShiftL :: (Bits a) => a -> BitCount -> a +bitShiftL x (BitCount n) = shiftL x n + +bitShiftR :: (Bits a) => a -> BitCount -> a +bitShiftR x (BitCount n) = shiftR x n +\end{code} + +\section{Utility} + +Considering a byte list as a sequence of bits, slice it from start +inclusive to end exclusive, and return the resulting bit sequence as an +integer. + +\begin{code} +bitSlice :: [Word8] -> BitCount -> BitCount -> Integer +bitSlice bs = sliceInt (foldl' step 0 bs) bitCount' where + step acc w = shiftL acc 8 + fromIntegral w + bitCount' = bitsInWord8 bs +\end{code} + +Slice a single integer by start and end bit location + +\begin{code} +sliceInt :: Integer -> BitCount -> BitCount -> BitCount -> Integer +sliceInt x xBitCount s e = fromIntegral sliced where + sliced = (x .&. startMask) `bitShiftR` (xBitCount - e) + startMask = n1Bits (xBitCount - s) + n1Bits n = (2 `pow` n) - 1 +\end{code} + +Integral version of {\tt (**)} + +\begin{code} +pow :: (Integral a, Integral b, Integral c) => a -> b -> c +pow b e = floor $ fromIntegral b ** fromIntegral e +\end{code} + +Detect whether a float is {\tt $-$NaN} + +\begin{code} +isNegativeNaN :: RealFloat a => a -> Bool +isNegativeNaN x = isNaN x && (floor x > 0) +\end{code} |
