summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2008-10-28 13:57:10 +0000
committerkrasimir <krasimir@chalmers.se>2008-10-28 13:57:10 +0000
commitebd98056ce9d478f0aca68d752a49d87f7431ec9 (patch)
tree8174b823fe84309b81f6b1b04c3353a44cfa357c /src
parent8e43cfb8a8ce4a6c4c608678633c0c5ec67adfff (diff)
binary serialization for PGF
Diffstat (limited to 'src')
-rw-r--r--src/Data/Binary.hs792
-rw-r--r--src/Data/Binary/Builder.hs426
-rw-r--r--src/Data/Binary/Get.hs539
-rw-r--r--src/Data/Binary/Put.hs199
-rw-r--r--src/GF/Compile/Export.hs7
-rw-r--r--src/GF/Compile/GrammarToGFCC.hs7
-rw-r--r--src/GF/Infra/Option.hs8
-rw-r--r--src/GFC.hs18
-rw-r--r--src/PGF.hs10
-rw-r--r--src/PGF/Raw/Abstract.hs14
-rw-r--r--src/PGF/Raw/Convert.hs273
-rw-r--r--src/PGF/Raw/Parse.hs101
-rw-r--r--src/PGF/Raw/Print.hs35
13 files changed, 1976 insertions, 453 deletions
diff --git a/src/Data/Binary.hs b/src/Data/Binary.hs
new file mode 100644
index 000000000..fd0ca6c98
--- /dev/null
+++ b/src/Data/Binary.hs
@@ -0,0 +1,792 @@
+{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary
+-- Copyright : Lennart Kolmodin
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : unstable
+-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
+--
+-- Binary serialisation of Haskell values to and from lazy ByteStrings.
+-- The Binary library provides methods for encoding Haskell values as
+-- streams of bytes directly in memory. The resulting @ByteString@ can
+-- then be written to disk, sent over the network, or futher processed
+-- (for example, compressed with gzip).
+--
+-- The 'Binary' package is notable in that it provides both pure, and
+-- high performance serialisation.
+--
+-- Values are always encoded in network order (big endian) form, and
+-- encoded data should be portable across machine endianess, word size,
+-- or compiler version. For example, data encoded using the Binary class
+-- could be written from GHC, and read back in Hugs.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary (
+
+ -- * The Binary class
+ Binary(..)
+
+ -- $example
+
+ -- * The Get and Put monads
+ , Get
+ , Put
+
+ -- * Useful helpers for writing instances
+ , putWord8
+ , getWord8
+
+ -- * Binary serialisation
+ , encode -- :: Binary a => a -> ByteString
+ , decode -- :: Binary a => ByteString -> a
+
+ -- * IO functions for serialisation
+ , encodeFile -- :: Binary a => FilePath -> a -> IO ()
+ , decodeFile -- :: Binary a => FilePath -> IO a
+
+-- Lazy put and get
+-- , lazyPut
+-- , lazyGet
+
+ , module Data.Word -- useful
+
+ ) where
+
+import Data.Word
+
+import Data.Binary.Put
+import Data.Binary.Get
+
+import Control.Monad
+import Foreign
+import System.IO
+
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as L
+
+import Data.Char (chr,ord)
+import Data.List (unfoldr)
+
+-- And needed for the instances:
+import qualified Data.ByteString as B
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import qualified Data.IntSet as IntSet
+import qualified Data.Ratio as R
+
+import qualified Data.Tree as T
+
+import Data.Array.Unboxed
+
+--
+-- This isn't available in older Hugs or older GHC
+--
+#if __GLASGOW_HASKELL__ >= 606
+import qualified Data.Sequence as Seq
+import qualified Data.Foldable as Fold
+#endif
+
+------------------------------------------------------------------------
+
+-- | The @Binary@ class provides 'put' and 'get', methods to encode and
+-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
+-- Show classes for textual representation of Haskell types, and is
+-- suitable for serialising Haskell values to disk, over the network.
+--
+-- For parsing and generating simple external binary formats (e.g. C
+-- structures), Binary may be used, but in general is not suitable
+-- for complex protocols. Instead use the Put and Get primitives
+-- directly.
+--
+-- Instances of Binary should satisfy the following property:
+--
+-- > decode . encode == id
+--
+-- That is, the 'get' and 'put' methods should be the inverse of each
+-- other. A range of instances are provided for basic Haskell types.
+--
+class Binary t where
+ -- | Encode a value in the Put monad.
+ put :: t -> Put
+ -- | Decode a value in the Get monad
+ get :: Get t
+
+-- $example
+-- To serialise a custom type, an instance of Binary for that type is
+-- required. For example, suppose we have a data structure:
+--
+-- > data Exp = IntE Int
+-- > | OpE String Exp Exp
+-- > deriving Show
+--
+-- We can encode values of this type into bytestrings using the
+-- following instance, which proceeds by recursively breaking down the
+-- structure to serialise:
+--
+-- > instance Binary Exp where
+-- > put (IntE i) = do put (0 :: Word8)
+-- > put i
+-- > put (OpE s e1 e2) = do put (1 :: Word8)
+-- > put s
+-- > put e1
+-- > put e2
+-- >
+-- > get = do t <- get :: Get Word8
+-- > case t of
+-- > 0 -> do i <- get
+-- > return (IntE i)
+-- > 1 -> do s <- get
+-- > e1 <- get
+-- > e2 <- get
+-- > return (OpE s e1 e2)
+--
+-- Note how we write an initial tag byte to indicate each variant of the
+-- data type.
+--
+-- We can simplify the writing of 'get' instances using monadic
+-- combinators:
+--
+-- > get = do tag <- getWord8
+-- > case tag of
+-- > 0 -> liftM IntE get
+-- > 1 -> liftM3 OpE get get get
+--
+-- The generation of Binary instances has been automated by a script
+-- using Scrap Your Boilerplate generics. Use the script here:
+-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
+--
+-- To derive the instance for a type, load this script into GHCi, and
+-- bring your type into scope. Your type can then have its Binary
+-- instances derived as follows:
+--
+-- > $ ghci -fglasgow-exts BinaryDerive.hs
+-- > *BinaryDerive> :l Example.hs
+-- > *Main> deriveM (undefined :: Drinks)
+-- >
+-- > instance Binary Main.Drinks where
+-- > put (Beer a) = putWord8 0 >> put a
+-- > put Coffee = putWord8 1
+-- > put Tea = putWord8 2
+-- > put EnergyDrink = putWord8 3
+-- > put Water = putWord8 4
+-- > put Wine = putWord8 5
+-- > put Whisky = putWord8 6
+-- > get = do
+-- > tag_ <- getWord8
+-- > case tag_ of
+-- > 0 -> get >>= \a -> return (Beer a)
+-- > 1 -> return Coffee
+-- > 2 -> return Tea
+-- > 3 -> return EnergyDrink
+-- > 4 -> return Water
+-- > 5 -> return Wine
+-- > 6 -> return Whisky
+-- >
+--
+-- To serialise this to a bytestring, we use 'encode', which packs the
+-- data structure into a binary format, in a lazy bytestring
+--
+-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+-- > > let v = encode e
+--
+-- Where 'v' is a binary encoded data structure. To reconstruct the
+-- original data, we use 'decode'
+--
+-- > > decode v :: Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- The lazy ByteString that results from 'encode' can be written to
+-- disk, and read from disk using Data.ByteString.Lazy IO functions,
+-- such as hPutStr or writeFile:
+--
+-- > > writeFile "/tmp/exp.txt" (encode e)
+--
+-- And read back with:
+--
+-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- We can also directly serialise a value to and from a Handle, or a file:
+--
+-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- And write a value to disk
+--
+-- > > encodeFile "/tmp/a.txt" v
+--
+
+------------------------------------------------------------------------
+-- Wrappers to run the underlying monad
+
+-- | Encode a value using binary serialisation to a lazy ByteString.
+--
+encode :: Binary a => a -> ByteString
+encode = runPut . put
+{-# INLINE encode #-}
+
+-- | Decode a value from a lazy ByteString, reconstructing the original structure.
+--
+decode :: Binary a => ByteString -> a
+decode = runGet get
+
+------------------------------------------------------------------------
+-- Convenience IO operations
+
+-- | Lazily serialise a value to a file
+--
+-- This is just a convenience function, it's defined simply as:
+--
+-- > encodeFile f = B.writeFile f . encode
+--
+-- So for example if you wanted to compress as well, you could use:
+--
+-- > B.writeFile f . compress . encode
+--
+encodeFile :: Binary a => FilePath -> a -> IO ()
+encodeFile f v = L.writeFile f (encode v)
+
+-- | Lazily reconstruct a value previously written to a file.
+--
+-- This is just a convenience function, it's defined simply as:
+--
+-- > decodeFile f = return . decode =<< B.readFile f
+--
+-- So for example if you wanted to decompress as well, you could use:
+--
+-- > return . decode . decompress =<< B.readFile f
+--
+-- After contructing the data from the input file, 'decodeFile' checks
+-- if the file is empty, and in doing so will force the associated file
+-- handle closed, if it is indeed empty. If the file is not empty,
+-- it is up to the decoding instance to consume the rest of the data,
+-- or otherwise finalise the resource.
+--
+decodeFile :: Binary a => FilePath -> IO a
+decodeFile f = do
+ s <- L.readFile f
+ return $ runGet (do v <- get
+ m <- isEmpty
+ m `seq` return v) s
+
+-- needs bytestring 0.9.1.x to work
+
+------------------------------------------------------------------------
+-- Lazy put and get
+
+-- lazyPut :: (Binary a) => a -> Put
+-- lazyPut a = put (encode a)
+
+-- lazyGet :: (Binary a) => Get a
+-- lazyGet = fmap decode get
+
+------------------------------------------------------------------------
+-- Simple instances
+
+-- The () type need never be written to disk: values of singleton type
+-- can be reconstructed from the type alone
+instance Binary () where
+ put () = return ()
+ get = return ()
+
+-- Bools are encoded as a byte in the range 0 .. 1
+instance Binary Bool where
+ put = putWord8 . fromIntegral . fromEnum
+ get = liftM (toEnum . fromIntegral) getWord8
+
+-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
+instance Binary Ordering where
+ put = putWord8 . fromIntegral . fromEnum
+ get = liftM (toEnum . fromIntegral) getWord8
+
+------------------------------------------------------------------------
+-- Words and Ints
+
+-- Words8s are written as bytes
+instance Binary Word8 where
+ put = putWord8
+ get = getWord8
+
+-- Words16s are written as 2 bytes in big-endian (network) order
+instance Binary Word16 where
+ put = putWord16be
+ get = getWord16be
+
+-- Words32s are written as 4 bytes in big-endian (network) order
+instance Binary Word32 where
+ put = putWord32be
+ get = getWord32be
+
+-- Words64s are written as 8 bytes in big-endian (network) order
+instance Binary Word64 where
+ put = putWord64be
+ get = getWord64be
+
+-- Int8s are written as a single byte.
+instance Binary Int8 where
+ put i = put (fromIntegral i :: Word8)
+ get = liftM fromIntegral (get :: Get Word8)
+
+-- Int16s are written as a 2 bytes in big endian format
+instance Binary Int16 where
+ put i = put (fromIntegral i :: Word16)
+ get = liftM fromIntegral (get :: Get Word16)
+
+-- Int32s are written as a 4 bytes in big endian format
+instance Binary Int32 where
+ put i = put (fromIntegral i :: Word32)
+ get = liftM fromIntegral (get :: Get Word32)
+
+-- Int64s are written as a 4 bytes in big endian format
+instance Binary Int64 where
+ put i = put (fromIntegral i :: Word64)
+ get = liftM fromIntegral (get :: Get Word64)
+
+------------------------------------------------------------------------
+
+-- Words are written as sequence of bytes. The last bit of each
+-- byte indicates whether there are more bytes to be read
+instance Binary Word where
+ put i | i <= 0x7f = do put a
+ | i <= 0x3fff = do put (a .|. 0x80)
+ put b
+ | i <= 0x1fffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put c
+ | i <= 0xfffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put d
+#if WORD_SIZE_IN_BITS < 64
+ | otherwise = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put e
+#else
+ | i <= 0x7ffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put e
+ | i <= 0x3ffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put f
+ | i <= 0x1ffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put g
+ | i <= 0xffffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put h
+ | i <= 0xffffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put h
+ | i <= 0x7fffffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put (h .|. 0x80)
+ put j
+ | otherwise = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put (h .|. 0x80)
+ put (j .|. 0x80)
+ put k
+#endif
+ where
+ a = fromIntegral ( i .&. 0x7f) :: Word8
+ b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
+ c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
+ d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
+ e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
+ f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
+ g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
+ h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
+ j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
+ k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
+
+ get = do i <- getWord8
+ (if i <= 0x7f
+ then return (fromIntegral i)
+ else do n <- get
+ return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
+
+-- Int has the same representation as Word
+instance Binary Int where
+ put i = put (fromIntegral i :: Word)
+ get = liftM fromIntegral (get :: Get Word)
+
+------------------------------------------------------------------------
+--
+-- Portable, and pretty efficient, serialisation of Integer
+--
+
+-- Fixed-size type for a subset of Integer
+type SmallInt = Int32
+
+-- Integers are encoded in two ways: if they fit inside a SmallInt,
+-- they're written as a byte tag, and that value. If the Integer value
+-- is too large to fit in a SmallInt, it is written as a byte array,
+-- along with a sign and length field.
+
+instance Binary Integer where
+
+ {-# INLINE put #-}
+ put n | n >= lo && n <= hi = do
+ putWord8 0
+ put (fromIntegral n :: SmallInt) -- fast path
+ where
+ lo = fromIntegral (minBound :: SmallInt) :: Integer
+ hi = fromIntegral (maxBound :: SmallInt) :: Integer
+
+ put n = do
+ putWord8 1
+ put sign
+ put (unroll (abs n)) -- unroll the bytes
+ where
+ sign = fromIntegral (signum n) :: Word8
+
+ {-# INLINE get #-}
+ get = do
+ tag <- get :: Get Word8
+ case tag of
+ 0 -> liftM fromIntegral (get :: Get SmallInt)
+ _ -> do sign <- get
+ bytes <- get
+ let v = roll bytes
+ return $! if sign == (1 :: Word8) then v else - v
+
+--
+-- Fold and unfold an Integer to and from a list of its bytes
+--
+unroll :: Integer -> [Word8]
+unroll = unfoldr step
+ where
+ step 0 = Nothing
+ step i = Just (fromIntegral i, i `shiftR` 8)
+
+roll :: [Word8] -> Integer
+roll = foldr unstep 0
+ where
+ unstep b a = a `shiftL` 8 .|. fromIntegral b
+
+{-
+
+--
+-- An efficient, raw serialisation for Integer (GHC only)
+--
+
+-- TODO This instance is not architecture portable. GMP stores numbers as
+-- arrays of machine sized words, so the byte format is not portable across
+-- architectures with different endianess and word size.
+
+import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
+import GHC.Base hiding (ord, chr)
+import GHC.Prim
+import GHC.Ptr (Ptr(..))
+import GHC.IOBase (IO(..))
+
+instance Binary Integer where
+ put (S# i) = putWord8 0 >> put (I# i)
+ put (J# s ba) = do
+ putWord8 1
+ put (I# s)
+ put (BA ba)
+
+ get = do
+ b <- getWord8
+ case b of
+ 0 -> do (I# i#) <- get
+ return (S# i#)
+ _ -> do (I# s#) <- get
+ (BA a#) <- get
+ return (J# s# a#)
+
+instance Binary ByteArray where
+
+ -- Pretty safe.
+ put (BA ba) =
+ let sz = sizeofByteArray# ba -- (primitive) in *bytes*
+ addr = byteArrayContents# ba
+ bs = unsafePackAddress (I# sz) addr
+ in put bs -- write as a ByteString. easy, yay!
+
+ -- Pretty scary. Should be quick though
+ get = do
+ (fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
+ assert (off == 0) $ return $ unsafePerformIO $ do
+ (MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
+ let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
+ withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
+ freezeByteArray arr
+
+-- wrapper for ByteArray#
+data ByteArray = BA {-# UNPACK #-} !ByteArray#
+data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
+
+newByteArray :: Int# -> IO MBA
+newByteArray sz = IO $ \s ->
+ case newPinnedByteArray# sz s of { (# s', arr #) ->
+ (# s', MBA arr #) }
+
+freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
+freezeByteArray arr = IO $ \s ->
+ case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
+ (# s', BA arr' #) }
+
+-}
+
+instance (Binary a,Integral a) => Binary (R.Ratio a) where
+ put r = put (R.numerator r) >> put (R.denominator r)
+ get = liftM2 (R.%) get get
+
+------------------------------------------------------------------------
+
+-- Char is serialised as UTF-8
+instance Binary Char where
+ put a | c <= 0x7f = put (fromIntegral c :: Word8)
+ | c <= 0x7ff = do put (0xc0 .|. y)
+ put (0x80 .|. z)
+ | c <= 0xffff = do put (0xe0 .|. x)
+ put (0x80 .|. y)
+ put (0x80 .|. z)
+ | c <= 0x10ffff = do put (0xf0 .|. w)
+ put (0x80 .|. x)
+ put (0x80 .|. y)
+ put (0x80 .|. z)
+ | otherwise = error "Not a valid Unicode code point"
+ where
+ c = ord a
+ z, y, x, w :: Word8
+ z = fromIntegral (c .&. 0x3f)
+ y = fromIntegral (shiftR c 6 .&. 0x3f)
+ x = fromIntegral (shiftR c 12 .&. 0x3f)
+ w = fromIntegral (shiftR c 18 .&. 0x7)
+
+ get = do
+ let getByte = liftM (fromIntegral :: Word8 -> Int) get
+ shiftL6 = flip shiftL 6 :: Int -> Int
+ w <- getByte
+ r <- case () of
+ _ | w < 0x80 -> return w
+ | w < 0xe0 -> do
+ x <- liftM (xor 0x80) getByte
+ return (x .|. shiftL6 (xor 0xc0 w))
+ | w < 0xf0 -> do
+ x <- liftM (xor 0x80) getByte
+ y <- liftM (xor 0x80) getByte
+ return (y .|. shiftL6 (x .|. shiftL6
+ (xor 0xe0 w)))
+ | otherwise -> do
+ x <- liftM (xor 0x80) getByte
+ y <- liftM (xor 0x80) getByte
+ z <- liftM (xor 0x80) getByte
+ return (z .|. shiftL6 (y .|. shiftL6
+ (x .|. shiftL6 (xor 0xf0 w))))
+ return $! chr r
+
+------------------------------------------------------------------------
+-- Instances for the first few tuples
+
+instance (Binary a, Binary b) => Binary (a,b) where
+ put (a,b) = put a >> put b
+ get = liftM2 (,) get get
+
+instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
+ put (a,b,c) = put a >> put b >> put c
+ get = liftM3 (,,) get get get
+
+instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
+ put (a,b,c,d) = put a >> put b >> put c >> put d
+ get = liftM4 (,,,) get get get get
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
+ put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
+ get = liftM5 (,,,,) get get get get get
+
+--
+-- and now just recurse:
+--
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
+ => Binary (a,b,c,d,e,f) where
+ put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
+ get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
+ => Binary (a,b,c,d,e,f,g) where
+ put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
+ get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+ Binary f, Binary g, Binary h)
+ => Binary (a,b,c,d,e,f,g,h) where
+ put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
+ get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+ Binary f, Binary g, Binary h, Binary i)
+ => Binary (a,b,c,d,e,f,g,h,i) where
+ put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
+ get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+ Binary f, Binary g, Binary h, Binary i, Binary j)
+ => Binary (a,b,c,d,e,f,g,h,i,j) where
+ put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
+ get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
+
+------------------------------------------------------------------------
+-- Container types
+
+instance Binary a => Binary [a] where
+ put l = put (length l) >> mapM_ put l
+ get = do n <- get :: Get Int
+ xs <- replicateM n get
+ return xs
+
+instance (Binary a) => Binary (Maybe a) where
+ put Nothing = putWord8 0
+ put (Just x) = putWord8 1 >> put x
+ get = do
+ w <- getWord8
+ case w of
+ 0 -> return Nothing
+ _ -> liftM Just get
+
+instance (Binary a, Binary b) => Binary (Either a b) where
+ put (Left a) = putWord8 0 >> put a
+ put (Right b) = putWord8 1 >> put b
+ get = do
+ w <- getWord8
+ case w of
+ 0 -> liftM Left get
+ _ -> liftM Right get
+
+------------------------------------------------------------------------
+-- ByteStrings (have specially efficient instances)
+
+instance Binary B.ByteString where
+ put bs = do put (B.length bs)
+ putByteString bs
+ get = get >>= getByteString
+
+--
+-- Using old versions of fps, this is a type synonym, and non portable
+--
+-- Requires 'flexible instances'
+--
+instance Binary ByteString where
+ put bs = do put (fromIntegral (L.length bs) :: Int)
+ putLazyByteString bs
+ get = get >>= getLazyByteString
+
+------------------------------------------------------------------------
+-- Maps and Sets
+
+instance (Ord a, Binary a) => Binary (Set.Set a) where
+ put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
+ get = liftM Set.fromDistinctAscList get
+
+instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
+ put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
+ get = liftM Map.fromDistinctAscList get
+
+instance Binary IntSet.IntSet where
+ put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
+ get = liftM IntSet.fromDistinctAscList get
+
+instance (Binary e) => Binary (IntMap.IntMap e) where
+ put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
+ get = liftM IntMap.fromDistinctAscList get
+
+------------------------------------------------------------------------
+-- Queues and Sequences
+
+#if __GLASGOW_HASKELL__ >= 606
+--
+-- This is valid Hugs, but you need the most recent Hugs
+--
+
+instance (Binary e) => Binary (Seq.Seq e) where
+ -- any better way to do this?
+ put = put . Fold.toList
+ get = fmap Seq.fromList get
+
+#endif
+
+------------------------------------------------------------------------
+-- Floating point
+
+instance Binary Double where
+ put d = put (decodeFloat d)
+ get = liftM2 encodeFloat get get
+
+instance Binary Float where
+ put f = put (decodeFloat f)
+ get = liftM2 encodeFloat get get
+
+------------------------------------------------------------------------
+-- Trees
+
+instance (Binary e) => Binary (T.Tree e) where
+ put (T.Node r s) = put r >> put s
+ get = liftM2 T.Node get get
+
+------------------------------------------------------------------------
+-- Arrays
+
+instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
+ put a = do
+ put (bounds a)
+ put (rangeSize $ bounds a) -- write the length
+ mapM_ put (elems a) -- now the elems.
+ get = do
+ bs <- get
+ n <- get -- read the length
+ xs <- replicateM n get -- now the elems.
+ return (listArray bs xs)
+
+--
+-- The IArray UArray e constraint is non portable. Requires flexible instances
+--
+instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
+ put a = do
+ put (bounds a)
+ put (rangeSize $ bounds a) -- now write the length
+ mapM_ put (elems a)
+ get = do
+ bs <- get
+ n <- get
+ xs <- replicateM n get
+ return (listArray bs xs)
diff --git a/src/Data/Binary/Builder.hs b/src/Data/Binary/Builder.hs
new file mode 100644
index 000000000..cccbe6fa4
--- /dev/null
+++ b/src/Data/Binary/Builder.hs
@@ -0,0 +1,426 @@
+{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fglasgow-exts #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary.Builder
+-- Copyright : Lennart Kolmodin, Ross Paterson
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Builder (
+
+ -- * The Builder type
+ Builder
+ , toLazyByteString
+
+ -- * Constructing Builders
+ , empty
+ , singleton
+ , append
+ , fromByteString -- :: S.ByteString -> Builder
+ , fromLazyByteString -- :: L.ByteString -> Builder
+
+ -- * Flushing the buffer state
+ , flush
+
+ -- * Derived Builders
+ -- ** Big-endian writes
+ , putWord16be -- :: Word16 -> Builder
+ , putWord32be -- :: Word32 -> Builder
+ , putWord64be -- :: Word64 -> Builder
+
+ -- ** Little-endian writes
+ , putWord16le -- :: Word16 -> Builder
+ , putWord32le -- :: Word32 -> Builder
+ , putWord64le -- :: Word64 -> Builder
+
+ -- ** Host-endian, unaligned writes
+ , putWordhost -- :: Word -> Builder
+ , putWord16host -- :: Word16 -> Builder
+ , putWord32host -- :: Word32 -> Builder
+ , putWord64host -- :: Word64 -> Builder
+
+ ) where
+
+import Foreign
+import Data.Monoid
+import Data.Word
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import Data.ByteString.Base (inlinePerformIO)
+import qualified Data.ByteString.Base as S
+#else
+import Data.ByteString.Internal (inlinePerformIO)
+import qualified Data.ByteString.Internal as S
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word (Word32(..),Word16(..),Word64(..))
+
+#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
+import GHC.Word (uncheckedShiftRL64#)
+#endif
+#endif
+
+------------------------------------------------------------------------
+
+-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
+-- There are several functions for constructing 'Builder's, but only one
+-- to inspect them: to extract any data, you have to turn them into lazy
+-- 'L.ByteString's using 'toLazyByteString'.
+--
+-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
+-- arrays piece by piece. As each buffer is filled, it is \'popped\'
+-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
+-- All this is hidden from the user of the 'Builder'.
+
+newtype Builder = Builder {
+ -- Invariant (from Data.ByteString.Lazy):
+ -- The lists include no null ByteStrings.
+ runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
+ }
+
+instance Monoid Builder where
+ mempty = empty
+ {-# INLINE mempty #-}
+ mappend = append
+ {-# INLINE mappend #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The empty Builder, satisfying
+--
+-- * @'toLazyByteString' 'empty' = 'L.empty'@
+--
+empty :: Builder
+empty = Builder id
+{-# INLINE empty #-}
+
+-- | /O(1)./ A Builder taking a single byte, satisfying
+--
+-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
+--
+singleton :: Word8 -> Builder
+singleton = writeN 1 . flip poke
+{-# INLINE singleton #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The concatenation of two Builders, an associative operation
+-- with identity 'empty', satisfying
+--
+-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
+--
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE append #-}
+
+-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
+--
+-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
+--
+fromByteString :: S.ByteString -> Builder
+fromByteString bs
+ | S.null bs = empty
+ | otherwise = flush `append` mapBuilder (bs :)
+{-# INLINE fromByteString #-}
+
+-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
+--
+-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
+--
+fromLazyByteString :: L.ByteString -> Builder
+fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
+{-# INLINE fromLazyByteString #-}
+
+------------------------------------------------------------------------
+
+-- Our internal buffer type
+data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
+ {-# UNPACK #-} !Int -- offset
+ {-# UNPACK #-} !Int -- used bytes
+ {-# UNPACK #-} !Int -- length left
+
+------------------------------------------------------------------------
+
+-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
+-- The construction work takes place if and when the relevant part of
+-- the lazy 'L.ByteString' is demanded.
+--
+toLazyByteString :: Builder -> L.ByteString
+toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
+ buf <- newBuffer defaultSize
+ return (runBuilder (m `append` flush) (const []) buf)
+
+-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
+-- yielding a new chunk in the result lazy 'L.ByteString'.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+ if u == 0
+ then k buf
+ else S.PS p o u : k (Buffer p (o+u) 0 l)
+
+------------------------------------------------------------------------
+
+--
+-- copied from Data.ByteString.Lazy
+--
+defaultSize :: Int
+defaultSize = 32 * k - overhead
+ where k = 1024
+ overhead = 2 * sizeOf (undefined :: Int)
+
+------------------------------------------------------------------------
+
+-- | Sequence an IO operation on the buffer
+unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
+unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
+ buf' <- f buf
+ return (k buf')
+{-# INLINE unsafeLiftIO #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
+ runBuilder (f l) k buf
+
+-- | Map the resulting list of bytestrings.
+mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
+mapBuilder f = Builder (f .)
+
+------------------------------------------------------------------------
+
+-- | Ensure that there are at least @n@ many bytes available.
+ensureFree :: Int -> Builder
+ensureFree n = n `seq` withSize $ \ l ->
+ if n <= l then empty else
+ flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
+{-# INLINE ensureFree #-}
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- bytes into the memory.
+writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
+writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
+{-# INLINE writeN #-}
+
+writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
+writeNBuffer n f (Buffer fp o u l) = do
+ withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+ return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBuffer #-}
+
+newBuffer :: Int -> IO Buffer
+newBuffer size = do
+ fp <- S.mallocByteString size
+ return $! Buffer fp 0 0 size
+{-# INLINE newBuffer #-}
+
+------------------------------------------------------------------------
+-- Aligned, host order writes of storable values
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- storable values into the memory.
+writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
+writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
+{-# INLINE writeNbytes #-}
+
+writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
+writeNBufferBytes n f (Buffer fp o u l) = do
+ withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+ return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBufferBytes #-}
+
+------------------------------------------------------------------------
+
+--
+-- We rely on the fromIntegral to do the right masking for us.
+-- The inlining here is critical, and can be worth 4x performance
+--
+
+-- | Write a Word16 in big endian format
+putWord16be :: Word16 -> Builder
+putWord16be w = writeN 2 $ \p -> do
+ poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le :: Word16 -> Builder
+putWord16le w = writeN 2 $ \p -> do
+ poke p (fromIntegral (w) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
+{-# INLINE putWord16le #-}
+
+-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
+
+-- | Write a Word32 in big endian format
+putWord32be :: Word32 -> Builder
+putWord32be w = writeN 4 $ \p -> do
+ poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
+{-# INLINE putWord32be #-}
+
+--
+-- a data type to tag Put/Check. writes construct these which are then
+-- inlined and flattened. matching Checks will be more robust with rules.
+--
+
+-- | Write a Word32 in little endian format
+putWord32le :: Word32 -> Builder
+putWord32le w = writeN 4 $ \p -> do
+ poke p (fromIntegral (w) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
+{-# INLINE putWord32le #-}
+
+-- on a little endian machine:
+-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
+
+-- | Write a Word64 in big endian format
+putWord64be :: Word64 -> Builder
+#if WORD_SIZE_IN_BITS < 64
+--
+-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
+-- Word32, and write that
+--
+putWord64be w =
+ let a = fromIntegral (shiftr_w64 w 32) :: Word32
+ b = fromIntegral w :: Word32
+ in writeN 8 $ \p -> do
+ poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
+#else
+putWord64be w = writeN 8 $ \p -> do
+ poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
+#endif
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le :: Word64 -> Builder
+
+#if WORD_SIZE_IN_BITS < 64
+putWord64le w =
+ let b = fromIntegral (shiftr_w64 w 32) :: Word32
+ a = fromIntegral w :: Word32
+ in writeN 8 $ \p -> do
+ poke (p) (fromIntegral (a) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
+#else
+putWord64le w = writeN 8 $ \p -> do
+ poke p (fromIntegral (w) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
+#endif
+{-# INLINE putWord64le #-}
+
+-- on a little endian machine:
+-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
+
+------------------------------------------------------------------------
+-- Unaligned, word size ops
+
+-- | /O(1)./ A Builder taking a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost :: Word -> Builder
+putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
+{-# INLINE putWordhost #-}
+
+-- | Write a Word16 in native host order and host endianness.
+-- 2 bytes will be written, unaligned.
+putWord16host :: Word16 -> Builder
+putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
+{-# INLINE putWord16host #-}
+
+-- | Write a Word32 in native host order and host endianness.
+-- 4 bytes will be written, unaligned.
+putWord32host :: Word32 -> Builder
+putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
+{-# INLINE putWord32host #-}
+
+-- | Write a Word64 in native host order.
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- 8 bytes will be written, unaligned.
+putWord64host :: Word64 -> Builder
+putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
+{-# INLINE putWord64host #-}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+{-# INLINE shiftr_w16 #-}
+shiftr_w16 :: Word16 -> Int -> Word16
+{-# INLINE shiftr_w32 #-}
+shiftr_w32 :: Word32 -> Int -> Word32
+{-# INLINE shiftr_w64 #-}
+shiftr_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
+shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftRL64"
+ uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
+#endif
+
+#else
+shiftr_w16 = shiftR
+shiftr_w32 = shiftR
+shiftr_w64 = shiftR
+#endif
diff --git a/src/Data/Binary/Get.hs b/src/Data/Binary/Get.hs
new file mode 100644
index 000000000..d92567e45
--- /dev/null
+++ b/src/Data/Binary/Get.hs
@@ -0,0 +1,539 @@
+{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fglasgow-exts #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary.Get
+-- Copyright : Lennart Kolmodin
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : experimental
+-- Portability : portable to Hugs and GHC.
+--
+-- The Get monad. A monad for efficiently building structures from
+-- encoded lazy ByteStrings
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Get (
+
+ -- * The Get type
+ Get
+ , runGet
+ , runGetState
+
+ -- * Parsing
+ , skip
+ , uncheckedSkip
+ , lookAhead
+ , lookAheadM
+ , lookAheadE
+ , uncheckedLookAhead
+
+ -- * Utility
+ , bytesRead
+ , getBytes
+ , remaining
+ , isEmpty
+
+ -- * Parsing particular types
+ , getWord8
+
+ -- ** ByteStrings
+ , getByteString
+ , getLazyByteString
+ , getLazyByteStringNul
+ , getRemainingLazyByteString
+
+ -- ** Big-endian reads
+ , getWord16be
+ , getWord32be
+ , getWord64be
+
+ -- ** Little-endian reads
+ , getWord16le
+ , getWord32le
+ , getWord64le
+
+ -- ** Host-endian, unaligned reads
+ , getWordhost
+ , getWord16host
+ , getWord32host
+ , getWord64host
+
+ ) where
+
+import Control.Monad (when,liftM,ap)
+import Control.Monad.Fix
+import Data.Maybe (isNothing)
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import qualified Data.ByteString.Base as B
+#else
+import qualified Data.ByteString.Internal as B
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative (Applicative(..))
+#endif
+
+import Foreign
+
+-- used by splitAtST
+import Control.Monad.ST
+import Data.STRef
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word
+import GHC.Int
+#endif
+
+-- | The parse state
+data S = S {-# UNPACK #-} !B.ByteString -- current chunk
+ L.ByteString -- the rest of the input
+ {-# UNPACK #-} !Int64 -- bytes read
+
+-- | The Get monad is just a State monad carrying around the input ByteString
+newtype Get a = Get { unGet :: S -> (a, S) }
+
+instance Functor Get where
+ fmap f m = Get (\s -> let (a, s') = unGet m s
+ in (f a, s'))
+ {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative Get where
+ pure = return
+ (<*>) = ap
+#endif
+
+instance Monad Get where
+ return a = Get (\s -> (a, s))
+ {-# INLINE return #-}
+
+ m >>= k = Get (\s -> case unGet m s of
+ (a, s') -> unGet (k a) s')
+ {-# INLINE (>>=) #-}
+
+ fail = failDesc
+
+instance MonadFix Get where
+ mfix f = Get (\s -> let (a,s') = unGet (f a) s
+ in (a,s'))
+
+------------------------------------------------------------------------
+
+get :: Get S
+get = Get (\s -> (s, s))
+
+put :: S -> Get ()
+put s = Get (\_ -> ((), s))
+
+------------------------------------------------------------------------
+
+initState :: L.ByteString -> S
+initState xs = mkState xs 0
+{-# INLINE initState #-}
+
+{-
+initState (B.LPS xs) =
+ case xs of
+ [] -> S B.empty L.empty 0
+ (x:xs') -> S x (B.LPS xs') 0
+-}
+
+#ifndef BYTESTRING_IN_BASE
+mkState :: L.ByteString -> Int64 -> S
+mkState l = case l of
+ L.Empty -> S B.empty L.empty
+ L.Chunk x xs -> S x xs
+{-# INLINE mkState #-}
+
+#else
+mkState :: L.ByteString -> Int64 -> S
+mkState (B.LPS xs) =
+ case xs of
+ [] -> S B.empty L.empty
+ (x:xs') -> S x (B.LPS xs')
+#endif
+
+-- | Run the Get monad applies a 'get'-based parser on the input ByteString
+runGet :: Get a -> L.ByteString -> a
+runGet m str = case unGet m (initState str) of (a, _) -> a
+
+-- | Run the Get monad applies a 'get'-based parser on the input
+-- ByteString. Additional to the result of get it returns the number of
+-- consumed bytes and the rest of the input.
+runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
+runGetState m str off =
+ case unGet m (mkState str off) of
+ (a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
+
+------------------------------------------------------------------------
+
+failDesc :: String -> Get a
+failDesc err = do
+ S _ _ bytes <- get
+ Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
+
+-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
+skip :: Int -> Get ()
+skip n = readN (fromIntegral n) (const ())
+
+-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
+uncheckedSkip :: Int64 -> Get ()
+uncheckedSkip n = do
+ S s ss bytes <- get
+ if fromIntegral (B.length s) >= n
+ then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
+ else do
+ let rest = L.drop (n - fromIntegral (B.length s)) ss
+ put $! mkState rest (bytes + n)
+
+-- | Run @ga@, but return without consuming its input.
+-- Fails if @ga@ fails.
+lookAhead :: Get a -> Get a
+lookAhead ga = do
+ s <- get
+ a <- ga
+ put s
+ return a
+
+-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
+-- Fails if @gma@ fails.
+lookAheadM :: Get (Maybe a) -> Get (Maybe a)
+lookAheadM gma = do
+ s <- get
+ ma <- gma
+ when (isNothing ma) $
+ put s
+ return ma
+
+-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
+-- Fails if @gea@ fails.
+lookAheadE :: Get (Either a b) -> Get (Either a b)
+lookAheadE gea = do
+ s <- get
+ ea <- gea
+ case ea of
+ Left _ -> put s
+ _ -> return ()
+ return ea
+
+-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
+uncheckedLookAhead :: Int64 -> Get L.ByteString
+uncheckedLookAhead n = do
+ S s ss _ <- get
+ if n <= fromIntegral (B.length s)
+ then return (L.fromChunks [B.take (fromIntegral n) s])
+ else return $ L.take n (s `join` ss)
+
+------------------------------------------------------------------------
+-- Utility
+
+-- | Get the total number of bytes read to this point.
+bytesRead :: Get Int64
+bytesRead = do
+ S _ _ b <- get
+ return b
+
+-- | Get the number of remaining unparsed bytes.
+-- Useful for checking whether all input has been consumed.
+-- Note that this forces the rest of the input.
+remaining :: Get Int64
+remaining = do
+ S s ss _ <- get
+ return (fromIntegral (B.length s) + L.length ss)
+
+-- | Test whether all input has been consumed,
+-- i.e. there are no remaining unparsed bytes.
+isEmpty :: Get Bool
+isEmpty = do
+ S s ss _ <- get
+ return (B.null s && L.null ss)
+
+------------------------------------------------------------------------
+-- Utility with ByteStrings
+
+-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
+-- than @n@ bytes are left in the input.
+getByteString :: Int -> Get B.ByteString
+getByteString n = readN n id
+{-# INLINE getByteString #-}
+
+-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
+-- @n@ bytes are left in the input.
+getLazyByteString :: Int64 -> Get L.ByteString
+getLazyByteString n = do
+ S s ss bytes <- get
+ let big = s `join` ss
+ case splitAtST n big of
+ (consume, rest) -> do put $ mkState rest (bytes + n)
+ return consume
+{-# INLINE getLazyByteString #-}
+
+-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
+-- if it reaches the end of input without hitting a NUL.
+getLazyByteStringNul :: Get L.ByteString
+getLazyByteStringNul = do
+ S s ss bytes <- get
+ let big = s `join` ss
+ (consume, t) = L.break (== 0) big
+ (h, rest) = L.splitAt 1 t
+ if L.null h
+ then fail "too few bytes"
+ else do
+ put $ mkState rest (bytes + L.length consume + 1)
+ return consume
+{-# INLINE getLazyByteStringNul #-}
+
+-- | Get the remaining bytes as a lazy ByteString
+getRemainingLazyByteString :: Get L.ByteString
+getRemainingLazyByteString = do
+ S s ss _ <- get
+ return (s `join` ss)
+
+------------------------------------------------------------------------
+-- Helpers
+
+-- | Pull @n@ bytes from the input, as a strict ByteString.
+getBytes :: Int -> Get B.ByteString
+getBytes n = do
+ S s ss bytes <- get
+ if n <= B.length s
+ then do let (consume,rest) = B.splitAt n s
+ put $! S rest ss (bytes + fromIntegral n)
+ return $! consume
+ else
+ case L.splitAt (fromIntegral n) (s `join` ss) of
+ (consuming, rest) ->
+ do let now = B.concat . L.toChunks $ consuming
+ put $! mkState rest (bytes + fromIntegral n)
+ -- forces the next chunk before this one is returned
+ if (B.length now < n)
+ then
+ fail "too few bytes"
+ else
+ return now
+{-# INLINE getBytes #-}
+-- ^ important
+
+#ifndef BYTESTRING_IN_BASE
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb lb
+ | B.null bb = lb
+ | otherwise = L.Chunk bb lb
+
+#else
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb (B.LPS lb)
+ | B.null bb = B.LPS lb
+ | otherwise = B.LPS (bb:lb)
+#endif
+ -- don't use L.append, it's strict in it's second argument :/
+{-# INLINE join #-}
+
+-- | Split a ByteString. If the first result is consumed before the --
+-- second, this runs in constant heap space.
+--
+-- You must force the returned tuple for that to work, e.g.
+--
+-- > case splitAtST n xs of
+-- > (ys,zs) -> consume ys ... consume zs
+--
+splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
+splitAtST i ps | i <= 0 = (L.empty, ps)
+#ifndef BYTESTRING_IN_BASE
+splitAtST i ps = runST (
+ do r <- newSTRef undefined
+ xs <- first r i ps
+ ys <- unsafeInterleaveST (readSTRef r)
+ return (xs, ys))
+
+ where
+ first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
+ first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
+
+ first r n (L.Chunk x xs)
+ | n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
+ return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
+ | otherwise = do writeSTRef r (L.drop (n - l) xs)
+ liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
+
+ where l = fromIntegral (B.length x)
+#else
+splitAtST i (B.LPS ps) = runST (
+ do r <- newSTRef undefined
+ xs <- first r i ps
+ ys <- unsafeInterleaveST (readSTRef r)
+ return (B.LPS xs, B.LPS ys))
+
+ where first r 0 xs = writeSTRef r xs >> return []
+ first r _ [] = writeSTRef r [] >> return []
+ first r n (x:xs)
+ | n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
+ return [B.take (fromIntegral n) x]
+ | otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
+ fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
+
+ where l = fromIntegral (B.length x)
+#endif
+{-# INLINE splitAtST #-}
+
+-- Pull n bytes from the input, and apply a parser to those bytes,
+-- yielding a value. If less than @n@ bytes are available, fail with an
+-- error. This wraps @getBytes@.
+readN :: Int -> (B.ByteString -> a) -> Get a
+readN n f = fmap f $ getBytes n
+{-# INLINE readN #-}
+-- ^ important
+
+------------------------------------------------------------------------
+-- Primtives
+
+-- helper, get a raw Ptr onto a strict ByteString copied out of the
+-- underlying lazy byteString. So many indirections from the raw parser
+-- state that my head hurts...
+
+getPtr :: Storable a => Int -> Get a
+getPtr n = do
+ (fp,o,_) <- readN n B.toForeignPtr
+ return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
+{-# INLINE getPtr #-}
+
+------------------------------------------------------------------------
+
+-- | Read a Word8 from the monad state
+getWord8 :: Get Word8
+getWord8 = getPtr (sizeOf (undefined :: Word8))
+{-# INLINE getWord8 #-}
+
+-- | Read a Word16 in big endian format
+getWord16be :: Get Word16
+getWord16be = do
+ s <- readN 2 id
+ return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
+ (fromIntegral (s `B.index` 1))
+{-# INLINE getWord16be #-}
+
+-- | Read a Word16 in little endian format
+getWord16le :: Get Word16
+getWord16le = do
+ s <- readN 2 id
+ return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
+ (fromIntegral (s `B.index` 0) )
+{-# INLINE getWord16le #-}
+
+-- | Read a Word32 in big endian format
+getWord32be :: Get Word32
+getWord32be = do
+ s <- readN 4 id
+ return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
+ (fromIntegral (s `B.index` 3) )
+{-# INLINE getWord32be #-}
+
+-- | Read a Word32 in little endian format
+getWord32le :: Get Word32
+getWord32le = do
+ s <- readN 4 id
+ return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
+ (fromIntegral (s `B.index` 0) )
+{-# INLINE getWord32le #-}
+
+-- | Read a Word64 in big endian format
+getWord64be :: Get Word64
+getWord64be = do
+ s <- readN 8 id
+ return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
+ (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
+ (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
+ (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
+ (fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
+ (fromIntegral (s `B.index` 7) )
+{-# INLINE getWord64be #-}
+
+-- | Read a Word64 in little endian format
+getWord64le :: Get Word64
+getWord64le = do
+ s <- readN 8 id
+ return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
+ (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
+ (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
+ (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
+ (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
+ (fromIntegral (s `B.index` 0) )
+{-# INLINE getWord64le #-}
+
+------------------------------------------------------------------------
+-- Host-endian reads
+
+-- | /O(1)./ Read a single native machine word. The word is read in
+-- host order, host endian form, for the machine you're on. On a 64 bit
+-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
+getWordhost :: Get Word
+getWordhost = getPtr (sizeOf (undefined :: Word))
+{-# INLINE getWordhost #-}
+
+-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
+getWord16host :: Get Word16
+getWord16host = getPtr (sizeOf (undefined :: Word16))
+{-# INLINE getWord16host #-}
+
+-- | /O(1)./ Read a Word32 in native host order and host endianness.
+getWord32host :: Get Word32
+getWord32host = getPtr (sizeOf (undefined :: Word32))
+{-# INLINE getWord32host #-}
+
+-- | /O(1)./ Read a Word64 in native host order and host endianess.
+getWord64host :: Get Word64
+getWord64host = getPtr (sizeOf (undefined :: Word64))
+{-# INLINE getWord64host #-}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+shiftl_w16 :: Word16 -> Int -> Word16
+shiftl_w32 :: Word32 -> Int -> Word32
+shiftl_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
+shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftL64"
+ uncheckedShiftL64# :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
+#endif
+
+#else
+shiftl_w16 = shiftL
+shiftl_w32 = shiftL
+shiftl_w64 = shiftL
+#endif
diff --git a/src/Data/Binary/Put.hs b/src/Data/Binary/Put.hs
new file mode 100644
index 000000000..353bfb7b1
--- /dev/null
+++ b/src/Data/Binary/Put.hs
@@ -0,0 +1,199 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary.Put
+-- Copyright : Lennart Kolmodin
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : stable
+-- Portability : Portable to Hugs and GHC. Requires MPTCs
+--
+-- The Put monad. A monad for efficiently constructing lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary.Put (
+
+ -- * The Put type
+ Put
+ , PutM(..)
+ , runPut
+
+ -- * Flushing the implicit parse state
+ , flush
+
+ -- * Primitives
+ , putWord8
+ , putByteString
+ , putLazyByteString
+
+ -- * Big-endian primitives
+ , putWord16be
+ , putWord32be
+ , putWord64be
+
+ -- * Little-endian primitives
+ , putWord16le
+ , putWord32le
+ , putWord64le
+
+ -- * Host-endian, unaligned writes
+ , putWordhost -- :: Word -> Put
+ , putWord16host -- :: Word16 -> Put
+ , putWord32host -- :: Word32 -> Put
+ , putWord64host -- :: Word64 -> Put
+
+ ) where
+
+import Data.Monoid
+import Data.Binary.Builder (Builder, toLazyByteString)
+import qualified Data.Binary.Builder as B
+
+import Data.Word
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative
+#endif
+
+
+------------------------------------------------------------------------
+
+-- XXX Strict in buffer only.
+data PairS a = PairS a {-# UNPACK #-}!Builder
+
+sndS :: PairS a -> Builder
+sndS (PairS _ b) = b
+
+-- | The PutM type. A Writer monad over the efficient Builder monoid.
+newtype PutM a = Put { unPut :: PairS a }
+
+-- | Put merely lifts Builder into a Writer monad, applied to ().
+type Put = PutM ()
+
+instance Functor PutM where
+ fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
+ {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative PutM where
+ pure = return
+ m <*> k = Put $
+ let PairS f w = unPut m
+ PairS x w' = unPut k
+ in PairS (f x) (w `mappend` w')
+#endif
+
+-- Standard Writer monad, with aggressive inlining
+instance Monad PutM where
+ return a = Put $ PairS a mempty
+ {-# INLINE return #-}
+
+ m >>= k = Put $
+ let PairS a w = unPut m
+ PairS b w' = unPut (k a)
+ in PairS b (w `mappend` w')
+ {-# INLINE (>>=) #-}
+
+ m >> k = Put $
+ let PairS _ w = unPut m
+ PairS b w' = unPut k
+ in PairS b (w `mappend` w')
+ {-# INLINE (>>) #-}
+
+tell :: Builder -> Put
+tell b = Put $ PairS () b
+{-# INLINE tell #-}
+
+-- | Run the 'Put' monad with a serialiser
+runPut :: Put -> L.ByteString
+runPut = toLazyByteString . sndS . unPut
+{-# INLINE runPut #-}
+
+------------------------------------------------------------------------
+
+-- | Pop the ByteString we have constructed so far, if any, yielding a
+-- new chunk in the result ByteString.
+flush :: Put
+flush = tell B.flush
+{-# INLINE flush #-}
+
+-- | Efficiently write a byte into the output buffer
+putWord8 :: Word8 -> Put
+putWord8 = tell . B.singleton
+{-# INLINE putWord8 #-}
+
+-- | An efficient primitive to write a strict ByteString into the output buffer.
+-- It flushes the current buffer, and writes the argument into a new chunk.
+putByteString :: S.ByteString -> Put
+putByteString = tell . B.fromByteString
+{-# INLINE putByteString #-}
+
+-- | Write a lazy ByteString efficiently, simply appending the lazy
+-- ByteString chunks to the output buffer
+putLazyByteString :: L.ByteString -> Put
+putLazyByteString = tell . B.fromLazyByteString
+{-# INLINE putLazyByteString #-}
+
+-- | Write a Word16 in big endian format
+putWord16be :: Word16 -> Put
+putWord16be = tell . B.putWord16be
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le :: Word16 -> Put
+putWord16le = tell . B.putWord16le
+{-# INLINE putWord16le #-}
+
+-- | Write a Word32 in big endian format
+putWord32be :: Word32 -> Put
+putWord32be = tell . B.putWord32be
+{-# INLINE putWord32be #-}
+
+-- | Write a Word32 in little endian format
+putWord32le :: Word32 -> Put
+putWord32le = tell . B.putWord32le
+{-# INLINE putWord32le #-}
+
+-- | Write a Word64 in big endian format
+putWord64be :: Word64 -> Put
+putWord64be = tell . B.putWord64be
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le :: Word64 -> Put
+putWord64le = tell . B.putWord64le
+{-# INLINE putWord64le #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ Write a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost :: Word -> Put
+putWordhost = tell . B.putWordhost
+{-# INLINE putWordhost #-}
+
+-- | /O(1)./ Write a Word16 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord16host :: Word16 -> Put
+putWord16host = tell . B.putWord16host
+{-# INLINE putWord16host #-}
+
+-- | /O(1)./ Write a Word32 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord32host :: Word32 -> Put
+putWord32host = tell . B.putWord32host
+{-# INLINE putWord32host #-}
+
+-- | /O(1)./ Write a Word64 in native host order
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- For portability issues see @putWordhost@.
+putWord64host :: Word64 -> Put
+putWord64host = tell . B.putWord64host
+{-# INLINE putWord64host #-}
diff --git a/src/GF/Compile/Export.hs b/src/GF/Compile/Export.hs
index 575a9dc84..64b4aeabf 100644
--- a/src/GF/Compile/Export.hs
+++ b/src/GF/Compile/Export.hs
@@ -2,8 +2,6 @@ module GF.Compile.Export where
import PGF.CId
import PGF.Data (PGF(..))
-import PGF.Raw.Print (printTree)
-import PGF.Raw.Convert (fromPGF)
import GF.Compile.GFCCtoHaskell
import GF.Compile.GFCCtoProlog
import GF.Compile.GFCCtoJS
@@ -32,7 +30,6 @@ exportPGF :: Options
-> [(FilePath,String)] -- ^ List of recommended file names and contents.
exportPGF opts fmt pgf =
case fmt of
- FmtPGF -> multi "pgf" printPGF
FmtPGFPretty -> multi "txt" prPGFPretty
FmtJavaScript -> multi "js" pgf2js
FmtHaskell -> multi "hs" (grammar2haskell opts name)
@@ -65,7 +62,3 @@ outputConcr :: PGF -> CId
outputConcr pgf = case cncnames pgf of
[] -> error "No concrete syntax."
cnc:_ -> cnc
-
-printPGF :: PGF -> String
-printPGF = encodeUTF8 . printTree . fromPGF
-
diff --git a/src/GF/Compile/GrammarToGFCC.hs b/src/GF/Compile/GrammarToGFCC.hs
index 4b5cf24bb..267015fb6 100644
--- a/src/GF/Compile/GrammarToGFCC.hs
+++ b/src/GF/Compile/GrammarToGFCC.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE PatternGuards #-}
-module GF.Compile.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where
+module GF.Compile.GrammarToGFCC (mkCanon2gfcc,addParsers) where
import GF.Compile.Export
import GF.Compile.OptimizeGF (unshareModule)
@@ -37,11 +37,6 @@ traceD s t = t
-- the main function: generate PGF from GF.
-
-prGrammar2gfcc :: Options -> String -> SourceGrammar -> (String,String)
-prGrammar2gfcc opts cnc gr = (abs,printPGF gc) where
- (abs,gc) = mkCanon2gfcc opts cnc gr
-
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF)
mkCanon2gfcc opts cnc gr =
(prIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon abs) gr)
diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs
index 25ccb09a2..f9221b233 100644
--- a/src/GF/Infra/Option.hs
+++ b/src/GF/Infra/Option.hs
@@ -80,8 +80,7 @@ data Phase = Preproc | Convert | Compile | Link
data Encoding = UTF_8 | ISO_8859_1 | CP_1251
deriving (Eq,Ord)
-data OutputFormat = FmtPGF
- | FmtPGFPretty
+data OutputFormat = FmtPGFPretty
| FmtJavaScript
| FmtHaskell
| FmtProlog
@@ -239,7 +238,7 @@ defaultFlags = Flags {
optShowCPUTime = False,
optEmitGFO = True,
optGFODir = ".",
- optOutputFormats = [FmtPGF],
+ optOutputFormats = [],
optSISR = Nothing,
optHaskellOptions = Set.empty,
optLexicalCats = Set.empty,
@@ -427,8 +426,7 @@ optDescr =
outputFormats :: [(String,OutputFormat)]
outputFormats =
- [("pgf", FmtPGF),
- ("pgf-pretty", FmtPGFPretty),
+ [("pgf-pretty", FmtPGFPretty),
("js", FmtJavaScript),
("haskell", FmtHaskell),
("prolog", FmtProlog),
diff --git a/src/GFC.hs b/src/GFC.hs
index 8af23d6e1..337acb87a 100644
--- a/src/GFC.hs
+++ b/src/GFC.hs
@@ -4,8 +4,6 @@ module GFC (mainGFC) where
import PGF
import PGF.CId
import PGF.Data
-import PGF.Raw.Parse
-import PGF.Raw.Convert
import GF.Compile
import GF.Compile.Export
@@ -16,6 +14,7 @@ import GF.Infra.Option
import GF.Data.ErrM
import Data.Maybe
+import Data.Binary
import System.FilePath
@@ -57,10 +56,17 @@ unionPGFFiles opts fs =
where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f
writeOutputs :: Options -> PGF -> IOE ()
-writeOutputs opts pgf =
- sequence_ [writeOutput opts name str
- | fmt <- flag optOutputFormats opts,
- (name,str) <- exportPGF opts fmt pgf]
+writeOutputs opts pgf = do
+ writePGF opts pgf
+ sequence_ [writeOutput opts name str
+ | fmt <- flag optOutputFormats opts,
+ (name,str) <- exportPGF opts fmt pgf]
+
+writePGF :: Options -> PGF -> IOE ()
+writePGF opts pgf = do
+ let name = fromMaybe (prCId (absname pgf)) (flag optName opts)
+ outfile = name <.> "pgf"
+ putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ encodeFile outfile pgf
writeOutput :: Options -> FilePath-> String -> IOE ()
writeOutput opts file str =
diff --git a/src/PGF.hs b/src/PGF.hs
index 38031dcbd..ac7deb537 100644
--- a/src/PGF.hs
+++ b/src/PGF.hs
@@ -66,9 +66,7 @@ import PGF.TypeCheck
import PGF.Paraphrase
import PGF.Macros
import PGF.Data
-import PGF.Raw.Convert
-import PGF.Raw.Parse
-import PGF.Raw.Print (printTree)
+import PGF.Binary
import PGF.Parsing.FCFG
import qualified PGF.Parsing.FCFG.Incremental as Incremental
import qualified GF.Compile.GeneratePMCFG as PMCFG
@@ -80,6 +78,7 @@ import GF.Data.Utilities (replace)
import Data.Char
import qualified Data.Map as Map
import Data.Maybe
+import Data.Binary
import System.Random (newStdGen)
import Control.Monad
@@ -210,9 +209,8 @@ readLanguage = readCId
showLanguage = prCId
readPGF f = do
- s <- readFile f >>= return . decodeUTF8 -- pgf is in UTF8, internal in unicode
- g <- parseGrammar s
- return $! addParsers $ toPGF g
+ g <- decodeFile f
+ return $! addParsers g
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
addParsers :: PGF -> PGF
diff --git a/src/PGF/Raw/Abstract.hs b/src/PGF/Raw/Abstract.hs
deleted file mode 100644
index 77d919a2d..000000000
--- a/src/PGF/Raw/Abstract.hs
+++ /dev/null
@@ -1,14 +0,0 @@
-module PGF.Raw.Abstract where
-
-data Grammar =
- Grm [RExp]
- deriving (Eq,Ord,Show)
-
-data RExp =
- App String [RExp]
- | AInt Integer
- | AStr String
- | AFlt Double
- | AMet
- deriving (Eq,Ord,Show)
-
diff --git a/src/PGF/Raw/Convert.hs b/src/PGF/Raw/Convert.hs
deleted file mode 100644
index 85799a3a2..000000000
--- a/src/PGF/Raw/Convert.hs
+++ /dev/null
@@ -1,273 +0,0 @@
-module PGF.Raw.Convert (toPGF,fromPGF) where
-
-import PGF.CId
-import PGF.Data
-import PGF.Raw.Abstract
-
-import Data.Array.IArray
-import qualified Data.Map as Map
-import qualified Data.Set as Set
-import qualified Data.IntMap as IntMap
-
-pgfMajorVersion, pgfMinorVersion :: Integer
-(pgfMajorVersion, pgfMinorVersion) = (1,0)
-
--- convert parsed grammar to internal PGF
-
-toPGF :: Grammar -> PGF
-toPGF (Grm [
- App "pgf" (AInt v1 : AInt v2 : App a []:cs),
- App "flags" gfs,
- ab@(
- App "abstract" [
- App "fun" fs,
- App "cat" cts
- ]),
- App "concrete" ccs
- ]) = let pgf = PGF {
- absname = mkCId a,
- cncnames = [mkCId c | App c [] <- cs],
- gflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs],
- abstract =
- let
- aflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs]
- lfuns = [(mkCId f,(toType typ,toExp def)) | App f [typ, def] <- fs]
- funs = Map.fromAscList lfuns
- lcats = [(mkCId c, Prelude.map toHypo hyps) | App c hyps <- cts]
- cats = Map.fromAscList lcats
- catfuns = Map.fromAscList
- [(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
- in Abstr aflags funs cats catfuns,
- concretes = Map.fromAscList [(mkCId lang, toConcr pgf ts) | App lang ts <- ccs]
- }
- in pgf
- where
-
-toConcr :: PGF -> [RExp] -> Concr
-toConcr pgf rexp =
- let cnc = foldl add (Concr {cflags = Map.empty,
- lins = Map.empty,
- opers = Map.empty,
- lincats = Map.empty,
- lindefs = Map.empty,
- printnames = Map.empty,
- paramlincats = Map.empty,
- parser = Nothing
- }) rexp
- in cnc
- where
- add :: Concr -> RExp -> Concr
- add cnc (App "flags" ts) = cnc { cflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- ts] }
- add cnc (App "lin" ts) = cnc { lins = mkTermMap ts }
- add cnc (App "oper" ts) = cnc { opers = mkTermMap ts }
- add cnc (App "lincat" ts) = cnc { lincats = mkTermMap ts }
- add cnc (App "lindef" ts) = cnc { lindefs = mkTermMap ts }
- add cnc (App "printname" ts) = cnc { printnames = mkTermMap ts }
- add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts }
- add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) }
-
-toPInfo :: [RExp] -> ParserInfo
-toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "categories" (t:cs)] =
- ParserInfo { functions = functions
- , sequences = seqs
- , productions = productions
- , startCats = cats
- , totalCats = expToInt t
- }
- where
- functions = mkArray (map toFFun fs)
- seqs = mkArray (map toFSeq ss)
- productions = IntMap.fromList (map toProductionSet ps)
- cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
-
- toFFun :: RExp -> FFun
- toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
- where
- fun = mkCId f
- prof = map toProfile ts
- lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
-
- toProfile :: RExp -> Profile
- toProfile AMet = []
- toProfile (App "_A" [t]) = [expToInt t]
- toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
-
- toFSeq :: RExp -> FSeq
- toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
-
- toProductionSet :: RExp -> (FCat,Set.Set Production)
- toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
- where
- toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
- toProduction (App "C" [fcat]) = FCoerce (expToInt fcat)
-
-toSymbol :: RExp -> FSymbol
-toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l)
-toSymbol (App "PL" [n,l]) = FSymLit (expToInt n) (expToInt l)
-toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
-toSymbol (AStr t) = FSymTok (KS t)
-
-toType :: RExp -> Type
-toType e = case e of
- App cat [App "H" hypos, App "X" exps] ->
- DTyp (map toHypo hypos) (mkCId cat) (map toExp exps)
- _ -> error $ "type " ++ show e
-
-toHypo :: RExp -> Hypo
-toHypo e = case e of
- App x [typ] -> Hyp (mkCId x) (toType typ)
- _ -> error $ "hypo " ++ show e
-
-toExp :: RExp -> Expr
-toExp e = case e of
- App "Abs" [App x [], exp] -> EAbs (mkCId x) (toExp exp)
- App "App" [e1,e2] -> EApp (toExp e1) (toExp e2)
- App "Eq" eqs -> EEq [Equ (map toExp ps) (toExp v) | App "E" (v:ps) <- eqs]
- App "Var" [App i []] -> EVar (mkCId i)
- AMet -> EMeta 0
- AInt i -> ELit (LInt i)
- AFlt i -> ELit (LFlt i)
- AStr i -> ELit (LStr i)
- _ -> error $ "exp " ++ show e
-
-toTerm :: RExp -> Term
-toTerm e = case e of
- App "R" es -> R (map toTerm es)
- App "S" es -> S (map toTerm es)
- App "FV" es -> FV (map toTerm es)
- App "P" [e,v] -> P (toTerm e) (toTerm v)
- App "W" [AStr s,v] -> W s (toTerm v)
- App "A" [AInt i] -> V (fromInteger i)
- App f [] -> F (mkCId f)
- AInt i -> C (fromInteger i)
- AMet -> TM "?"
- App "KP" (d:alts) -> K (toKP d alts)
- AStr s -> K (KS s)
- _ -> error $ "term " ++ show e
-
-toKP d alts = KP (toStr d) (map toAlt alts)
- where
- toStr (App "S" vs) = [v | AStr v <- vs]
- toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
-
-
-------------------------------
---- from internal to parser --
-------------------------------
-
-fromPGF :: PGF -> Grammar
-fromPGF pgf = Grm [
- App "pgf" (AInt pgfMajorVersion:AInt pgfMinorVersion
- : App (prCId (absname pgf)) [] : map (flip App [] . prCId) (cncnames pgf)),
- App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (gflags pgf `Map.union` aflags apgf)],
- App "abstract" [
- App "fun" [App (prCId f) [fromType t,fromExp d] | (f,(t,d)) <- Map.toList (funs apgf)],
- App "cat" [App (prCId f) (map fromHypo hs) | (f,hs) <- Map.toList (cats apgf)]
- ],
- App "concrete" [App (prCId lang) (fromConcrete c) | (lang,c) <- Map.toList (concretes pgf)]
- ]
- where
- apgf = abstract pgf
- fromConcrete cnc = [
- App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (cflags cnc)],
- App "lin" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lins cnc)],
- App "oper" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (opers cnc)],
- App "lincat" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lincats cnc)],
- App "lindef" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lindefs cnc)],
- App "printname" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (printnames cnc)],
- App "param" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (paramlincats cnc)]
- ] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
-
-fromType :: Type -> RExp
-fromType e = case e of
- DTyp hypos cat exps ->
- App (prCId cat) [
- App "H" (map fromHypo hypos),
- App "X" (map fromExp exps)]
-
-fromHypo :: Hypo -> RExp
-fromHypo e = case e of
- Hyp x typ -> App (prCId x) [fromType typ]
-
-fromExp :: Expr -> RExp
-fromExp e = case e of
- EAbs x exp -> App "Abs" [App (prCId x) [], fromExp exp]
- EApp e1 e2 -> App "App" [fromExp e1, fromExp e2]
- EVar x -> App "Var" [App (prCId x) []]
- ELit (LStr s) -> AStr s
- ELit (LFlt d) -> AFlt d
- ELit (LInt i) -> AInt (toInteger i)
- EMeta _ -> AMet ----
- EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
-
-fromTerm :: Term -> RExp
-fromTerm e = case e of
- R es -> App "R" (map fromTerm es)
- S es -> App "S" (map fromTerm es)
- FV es -> App "FV" (map fromTerm es)
- P e v -> App "P" [fromTerm e, fromTerm v]
- W s v -> App "W" [AStr s, fromTerm v]
- C i -> AInt (toInteger i)
- TM _ -> AMet
- F f -> App (prCId f) []
- V i -> App "A" [AInt (toInteger i)]
- K t -> fromTokn t
-
-fromTokn :: Tokn -> RExp
-fromTokn (KS s) = AStr s
-fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
- where
- str v = App "S" (map AStr v)
-
--- ** Parsing info
-
-fromPInfo :: ParserInfo -> RExp
-fromPInfo p = App "parser" [
- App "functions" [fromFFun fun | fun <- elems (functions p)],
- App "sequences" [fromFSeq seq | seq <- elems (sequences p)],
- App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)],
- App "categories" (intToExp (totalCats p) : [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)])
- ]
-
-fromFFun :: FFun -> RExp
-fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
- where
- fromProfile :: Profile -> RExp
- fromProfile [] = AMet
- fromProfile [x] = daughter x
- fromProfile args = App "_U" (map daughter args)
-
- daughter n = App "_A" [intToExp n]
-
-fromSymbol :: FSymbol -> RExp
-fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l]
-fromSymbol (FSymLit n l) = App "PL" [intToExp n, intToExp l]
-fromSymbol (FSymTok t) = fromTokn t
-
-fromFSeq :: FSeq -> RExp
-fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
-
-fromProductionSet :: (FCat,Set.Set Production) -> RExp
-fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
- where
- fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
- fromPassive (FCoerce fcat) = App "C" [intToExp fcat]
-
--- ** Utilities
-
-mkTermMap :: [RExp] -> Map.Map CId Term
-mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
-
-mkArray :: IArray a e => [e] -> a Int e
-mkArray xs = listArray (0, length xs - 1) xs
-
-expToInt :: Integral a => RExp -> a
-expToInt (App "neg" [AInt i]) = fromIntegral (negate i)
-expToInt (AInt i) = fromIntegral i
-
-expToStr :: RExp -> String
-expToStr (AStr s) = s
-
-intToExp :: Integral a => a -> RExp
-intToExp x | x < 0 = App "neg" [AInt (fromIntegral (negate x))]
- | otherwise = AInt (fromIntegral x)
diff --git a/src/PGF/Raw/Parse.hs b/src/PGF/Raw/Parse.hs
deleted file mode 100644
index 671183ba4..000000000
--- a/src/PGF/Raw/Parse.hs
+++ /dev/null
@@ -1,101 +0,0 @@
-module PGF.Raw.Parse (parseGrammar) where
-
-import PGF.CId
-import PGF.Raw.Abstract
-
-import Control.Monad
-import Data.Char
-import qualified Data.ByteString.Char8 as BS
-
-parseGrammar :: String -> IO Grammar
-parseGrammar s = case runP pGrammar s of
- Just (x,"") -> return x
- _ -> fail "Parse error"
-
-pGrammar :: P Grammar
-pGrammar = liftM Grm pTerms
-
-pTerms :: P [RExp]
-pTerms = liftM2 (:) (pTerm 1) pTerms <++ (skipSpaces >> return [])
-
-pTerm :: Int -> P RExp
-pTerm n = skipSpaces >> (pParen <++ pApp <++ pNum <++ pStr <++ pMeta)
- where pParen = between (char '(') (char ')') (pTerm 0)
- pApp = liftM2 App pIdent (if n == 0 then pTerms else return [])
- pStr = char '"' >> liftM AStr (manyTill (pEsc <++ get) (char '"'))
- pEsc = char '\\' >> get
- pNum = do x <- munch1 isDigit
- ((char '.' >> munch1 isDigit >>= \y -> return (AFlt (read (x++"."++y))))
- <++
- return (AInt (read x)))
- pMeta = char '?' >> return AMet
- pIdent = liftM2 (:) (satisfy isIdentFirst) (munch isIdentRest)
- isIdentFirst c = c == '_' || isAlpha c
- isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
-
--- Parser combinators with only left-biased choice
-
-newtype P a = P { runP :: String -> Maybe (a,String) }
-
-instance Monad P where
- return x = P (\ts -> Just (x,ts))
- P p >>= f = P (\ts -> p ts >>= \ (x,ts') -> runP (f x) ts')
- fail _ = pfail
-
-instance MonadPlus P where
- mzero = pfail
- mplus = (<++)
-
-
-get :: P Char
-get = P (\ts -> case ts of
- [] -> Nothing
- c:cs -> Just (c,cs))
-
-look :: P String
-look = P (\ts -> Just (ts,ts))
-
-(<++) :: P a -> P a -> P a
-P p <++ P q = P (\ts -> p ts `mplus` q ts)
-
-pfail :: P a
-pfail = P (\ts -> Nothing)
-
-satisfy :: (Char -> Bool) -> P Char
-satisfy p = do c <- get
- if p c then return c else pfail
-
-char :: Char -> P Char
-char c = satisfy (c==)
-
-string :: String -> P String
-string this = look >>= scan this
- where
- scan [] _ = return this
- scan (x:xs) (y:ys) | x == y = get >> scan xs ys
- scan _ _ = pfail
-
-skipSpaces :: P ()
-skipSpaces = look >>= skip
- where
- skip (c:s) | isSpace c = get >> skip s
- skip _ = return ()
-
-manyTill :: P a -> P end -> P [a]
-manyTill p end = scan
- where scan = (end >> return []) <++ liftM2 (:) p scan
-
-munch :: (Char -> Bool) -> P String
-munch p = munch1 p <++ return []
-
-munch1 :: (Char -> Bool) -> P String
-munch1 p = liftM2 (:) (satisfy p) (munch p)
-
-choice :: [P a] -> P a
-choice = msum
-
-between :: P open -> P close -> P a -> P a
-between open close p = do open
- x <- p
- close
- return x
diff --git a/src/PGF/Raw/Print.hs b/src/PGF/Raw/Print.hs
deleted file mode 100644
index d34adbc2b..000000000
--- a/src/PGF/Raw/Print.hs
+++ /dev/null
@@ -1,35 +0,0 @@
-module PGF.Raw.Print (printTree) where
-
-import PGF.CId
-import PGF.Raw.Abstract
-
-import Data.List (intersperse)
-import Numeric (showFFloat)
-import qualified Data.ByteString.Char8 as BS
-
-printTree :: Grammar -> String
-printTree g = prGrammar g ""
-
-prGrammar :: Grammar -> ShowS
-prGrammar (Grm xs) = prRExpList xs
-
-prRExp :: Int -> RExp -> ShowS
-prRExp _ (App x []) = showString x
-prRExp n (App x xs) = p (showString x . showChar ' ' . prRExpList xs)
- where p s = if n == 0 then s else showChar '(' . s . showChar ')'
-prRExp _ (AInt x) = shows x
-prRExp _ (AStr x) = showChar '"' . concatS (map mkEsc x) . showChar '"'
-prRExp _ (AFlt x) = showFFloat Nothing x
-prRExp _ AMet = showChar '?'
-
-mkEsc :: Char -> ShowS
-mkEsc s = case s of
- '"' -> showString "\\\""
- '\\' -> showString "\\\\"
- _ -> showChar s
-
-prRExpList :: [RExp] -> ShowS
-prRExpList = concatS . intersperse (showChar ' ') . map (prRExp 1)
-
-concatS :: [ShowS] -> ShowS
-concatS = foldr (.) id