summaryrefslogtreecommitdiff
path: root/src/runtime/haskell
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2009-12-13 18:50:29 +0000
committerkrasimir <krasimir@chalmers.se>2009-12-13 18:50:29 +0000
commitf85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch)
tree667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell
parentd88a865faff59c98fc91556ff8700b10ee5f2df8 (diff)
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/runtime/haskell')
-rw-r--r--src/runtime/haskell/Data/Binary.hs791
-rw-r--r--src/runtime/haskell/Data/Binary/Builder.hs426
-rw-r--r--src/runtime/haskell/Data/Binary/Get.hs544
-rw-r--r--src/runtime/haskell/Data/Binary/Put.hs216
-rw-r--r--src/runtime/haskell/PGF.hs352
-rw-r--r--src/runtime/haskell/PGF/Binary.hs199
-rw-r--r--src/runtime/haskell/PGF/BuildParser.hs76
-rw-r--r--src/runtime/haskell/PGF/CId.hs55
-rw-r--r--src/runtime/haskell/PGF/Check.hs173
-rw-r--r--src/runtime/haskell/PGF/Data.hs95
-rw-r--r--src/runtime/haskell/PGF/Editor.hs241
-rw-r--r--src/runtime/haskell/PGF/Expr.hs355
-rw-r--r--src/runtime/haskell/PGF/Expr.hs-boot28
-rw-r--r--src/runtime/haskell/PGF/Generate.hs66
-rw-r--r--src/runtime/haskell/PGF/Linearize.hs166
-rw-r--r--src/runtime/haskell/PGF/Macros.hs154
-rw-r--r--src/runtime/haskell/PGF/Morphology.hs26
-rw-r--r--src/runtime/haskell/PGF/PMCFG.hs119
-rw-r--r--src/runtime/haskell/PGF/Paraphrase.hs112
-rw-r--r--src/runtime/haskell/PGF/Parsing/FCFG/Active.hs205
-rw-r--r--src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs371
-rw-r--r--src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs188
-rw-r--r--src/runtime/haskell/PGF/ShowLinearize.hs113
-rw-r--r--src/runtime/haskell/PGF/Tree.hs71
-rw-r--r--src/runtime/haskell/PGF/Type.hs103
-rw-r--r--src/runtime/haskell/PGF/TypeCheck.hs524
-rw-r--r--src/runtime/haskell/PGF/VisualizeTree.hs353
27 files changed, 6122 insertions, 0 deletions
diff --git a/src/runtime/haskell/Data/Binary.hs b/src/runtime/haskell/Data/Binary.hs
new file mode 100644
index 000000000..786f5a09e
--- /dev/null
+++ b/src/runtime/haskell/Data/Binary.hs
@@ -0,0 +1,791 @@
+{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary
+-- Copyright : Lennart Kolmodin
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : unstable
+-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
+--
+-- Binary serialisation of Haskell values to and from lazy ByteStrings.
+-- The Binary library provides methods for encoding Haskell values as
+-- streams of bytes directly in memory. The resulting @ByteString@ can
+-- then be written to disk, sent over the network, or futher processed
+-- (for example, compressed with gzip).
+--
+-- The 'Binary' package is notable in that it provides both pure, and
+-- high performance serialisation.
+--
+-- Values are always encoded in network order (big endian) form, and
+-- encoded data should be portable across machine endianess, word size,
+-- or compiler version. For example, data encoded using the Binary class
+-- could be written from GHC, and read back in Hugs.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary (
+
+ -- * The Binary class
+ Binary(..)
+
+ -- $example
+
+ -- * The Get and Put monads
+ , Get
+ , Put
+
+ -- * Useful helpers for writing instances
+ , putWord8
+ , getWord8
+
+ -- * Binary serialisation
+ , encode -- :: Binary a => a -> ByteString
+ , decode -- :: Binary a => ByteString -> a
+
+ -- * IO functions for serialisation
+ , encodeFile -- :: Binary a => FilePath -> a -> IO ()
+ , decodeFile -- :: Binary a => FilePath -> IO a
+
+-- Lazy put and get
+-- , lazyPut
+-- , lazyGet
+
+ , module Data.Word -- useful
+
+ ) where
+
+#include "MachDeps.h"
+
+import Data.Word
+
+import Data.Binary.Put
+import Data.Binary.Get
+
+import Control.Monad
+import Control.Exception
+import Foreign
+import System.IO
+
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as L
+
+import Data.Char (chr,ord)
+import Data.List (unfoldr)
+
+-- And needed for the instances:
+import qualified Data.ByteString as B
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import qualified Data.IntSet as IntSet
+import qualified Data.Ratio as R
+
+import qualified Data.Tree as T
+
+import Data.Array.Unboxed
+
+--
+-- This isn't available in older Hugs or older GHC
+--
+#if __GLASGOW_HASKELL__ >= 606
+import qualified Data.Sequence as Seq
+import qualified Data.Foldable as Fold
+#endif
+
+------------------------------------------------------------------------
+
+-- | The @Binary@ class provides 'put' and 'get', methods to encode and
+-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
+-- Show classes for textual representation of Haskell types, and is
+-- suitable for serialising Haskell values to disk, over the network.
+--
+-- For parsing and generating simple external binary formats (e.g. C
+-- structures), Binary may be used, but in general is not suitable
+-- for complex protocols. Instead use the Put and Get primitives
+-- directly.
+--
+-- Instances of Binary should satisfy the following property:
+--
+-- > decode . encode == id
+--
+-- That is, the 'get' and 'put' methods should be the inverse of each
+-- other. A range of instances are provided for basic Haskell types.
+--
+class Binary t where
+ -- | Encode a value in the Put monad.
+ put :: t -> Put
+ -- | Decode a value in the Get monad
+ get :: Get t
+
+-- $example
+-- To serialise a custom type, an instance of Binary for that type is
+-- required. For example, suppose we have a data structure:
+--
+-- > data Exp = IntE Int
+-- > | OpE String Exp Exp
+-- > deriving Show
+--
+-- We can encode values of this type into bytestrings using the
+-- following instance, which proceeds by recursively breaking down the
+-- structure to serialise:
+--
+-- > instance Binary Exp where
+-- > put (IntE i) = do put (0 :: Word8)
+-- > put i
+-- > put (OpE s e1 e2) = do put (1 :: Word8)
+-- > put s
+-- > put e1
+-- > put e2
+-- >
+-- > get = do t <- get :: Get Word8
+-- > case t of
+-- > 0 -> do i <- get
+-- > return (IntE i)
+-- > 1 -> do s <- get
+-- > e1 <- get
+-- > e2 <- get
+-- > return (OpE s e1 e2)
+--
+-- Note how we write an initial tag byte to indicate each variant of the
+-- data type.
+--
+-- We can simplify the writing of 'get' instances using monadic
+-- combinators:
+--
+-- > get = do tag <- getWord8
+-- > case tag of
+-- > 0 -> liftM IntE get
+-- > 1 -> liftM3 OpE get get get
+--
+-- The generation of Binary instances has been automated by a script
+-- using Scrap Your Boilerplate generics. Use the script here:
+-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
+--
+-- To derive the instance for a type, load this script into GHCi, and
+-- bring your type into scope. Your type can then have its Binary
+-- instances derived as follows:
+--
+-- > $ ghci -fglasgow-exts BinaryDerive.hs
+-- > *BinaryDerive> :l Example.hs
+-- > *Main> deriveM (undefined :: Drinks)
+-- >
+-- > instance Binary Main.Drinks where
+-- > put (Beer a) = putWord8 0 >> put a
+-- > put Coffee = putWord8 1
+-- > put Tea = putWord8 2
+-- > put EnergyDrink = putWord8 3
+-- > put Water = putWord8 4
+-- > put Wine = putWord8 5
+-- > put Whisky = putWord8 6
+-- > get = do
+-- > tag_ <- getWord8
+-- > case tag_ of
+-- > 0 -> get >>= \a -> return (Beer a)
+-- > 1 -> return Coffee
+-- > 2 -> return Tea
+-- > 3 -> return EnergyDrink
+-- > 4 -> return Water
+-- > 5 -> return Wine
+-- > 6 -> return Whisky
+-- >
+--
+-- To serialise this to a bytestring, we use 'encode', which packs the
+-- data structure into a binary format, in a lazy bytestring
+--
+-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+-- > > let v = encode e
+--
+-- Where 'v' is a binary encoded data structure. To reconstruct the
+-- original data, we use 'decode'
+--
+-- > > decode v :: Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- The lazy ByteString that results from 'encode' can be written to
+-- disk, and read from disk using Data.ByteString.Lazy IO functions,
+-- such as hPutStr or writeFile:
+--
+-- > > writeFile "/tmp/exp.txt" (encode e)
+--
+-- And read back with:
+--
+-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- We can also directly serialise a value to and from a Handle, or a file:
+--
+-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- And write a value to disk
+--
+-- > > encodeFile "/tmp/a.txt" v
+--
+
+------------------------------------------------------------------------
+-- Wrappers to run the underlying monad
+
+-- | Encode a value using binary serialisation to a lazy ByteString.
+--
+encode :: Binary a => a -> ByteString
+encode = runPut . put
+{-# INLINE encode #-}
+
+-- | Decode a value from a lazy ByteString, reconstructing the original structure.
+--
+decode :: Binary a => ByteString -> a
+decode = runGet get
+
+------------------------------------------------------------------------
+-- Convenience IO operations
+
+-- | Lazily serialise a value to a file
+--
+-- This is just a convenience function, it's defined simply as:
+--
+-- > encodeFile f = B.writeFile f . encode
+--
+-- So for example if you wanted to compress as well, you could use:
+--
+-- > B.writeFile f . compress . encode
+--
+encodeFile :: Binary a => FilePath -> a -> IO ()
+encodeFile f v = L.writeFile f (encode v)
+
+-- | Lazily reconstruct a value previously written to a file.
+--
+-- This is just a convenience function, it's defined simply as:
+--
+-- > decodeFile f = return . decode =<< B.readFile f
+--
+-- So for example if you wanted to decompress as well, you could use:
+--
+-- > return . decode . decompress =<< B.readFile f
+--
+decodeFile :: Binary a => FilePath -> IO a
+decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
+ s <- L.hGetContents h
+ evaluate $ runGet get s
+
+-- needs bytestring 0.9.1.x to work
+
+------------------------------------------------------------------------
+-- Lazy put and get
+
+-- lazyPut :: (Binary a) => a -> Put
+-- lazyPut a = put (encode a)
+
+-- lazyGet :: (Binary a) => Get a
+-- lazyGet = fmap decode get
+
+------------------------------------------------------------------------
+-- Simple instances
+
+-- The () type need never be written to disk: values of singleton type
+-- can be reconstructed from the type alone
+instance Binary () where
+ put () = return ()
+ get = return ()
+
+-- Bools are encoded as a byte in the range 0 .. 1
+instance Binary Bool where
+ put = putWord8 . fromIntegral . fromEnum
+ get = liftM (toEnum . fromIntegral) getWord8
+
+-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
+instance Binary Ordering where
+ put = putWord8 . fromIntegral . fromEnum
+ get = liftM (toEnum . fromIntegral) getWord8
+
+------------------------------------------------------------------------
+-- Words and Ints
+
+-- Words8s are written as bytes
+instance Binary Word8 where
+ put = putWord8
+ get = getWord8
+
+-- Words16s are written as 2 bytes in big-endian (network) order
+instance Binary Word16 where
+ put = putWord16be
+ get = getWord16be
+
+-- Words32s are written as 4 bytes in big-endian (network) order
+instance Binary Word32 where
+ put = putWord32be
+ get = getWord32be
+
+-- Words64s are written as 8 bytes in big-endian (network) order
+instance Binary Word64 where
+ put = putWord64be
+ get = getWord64be
+
+-- Int8s are written as a single byte.
+instance Binary Int8 where
+ put i = put (fromIntegral i :: Word8)
+ get = liftM fromIntegral (get :: Get Word8)
+
+-- Int16s are written as a 2 bytes in big endian format
+instance Binary Int16 where
+ put i = put (fromIntegral i :: Word16)
+ get = liftM fromIntegral (get :: Get Word16)
+
+-- Int32s are written as a 4 bytes in big endian format
+instance Binary Int32 where
+ put i = put (fromIntegral i :: Word32)
+ get = liftM fromIntegral (get :: Get Word32)
+
+-- Int64s are written as a 4 bytes in big endian format
+instance Binary Int64 where
+ put i = put (fromIntegral i :: Word64)
+ get = liftM fromIntegral (get :: Get Word64)
+
+------------------------------------------------------------------------
+
+-- Words are written as sequence of bytes. The last bit of each
+-- byte indicates whether there are more bytes to be read
+instance Binary Word where
+ put i | i <= 0x7f = do put a
+ | i <= 0x3fff = do put (a .|. 0x80)
+ put b
+ | i <= 0x1fffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put c
+ | i <= 0xfffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put d
+#if WORD_SIZE_IN_BITS < 64
+ | otherwise = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put e
+#else
+ | i <= 0x7ffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put e
+ | i <= 0x3ffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put f
+ | i <= 0x1ffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put g
+ | i <= 0xffffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put h
+ | i <= 0xffffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put h
+ | i <= 0x7fffffffffffffff = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put (h .|. 0x80)
+ put j
+ | otherwise = do put (a .|. 0x80)
+ put (b .|. 0x80)
+ put (c .|. 0x80)
+ put (d .|. 0x80)
+ put (e .|. 0x80)
+ put (f .|. 0x80)
+ put (g .|. 0x80)
+ put (h .|. 0x80)
+ put (j .|. 0x80)
+ put k
+#endif
+ where
+ a = fromIntegral ( i .&. 0x7f) :: Word8
+ b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
+ c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
+ d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
+ e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
+ f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
+ g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
+ h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
+ j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
+ k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
+
+ get = do i <- getWord8
+ (if i <= 0x7f
+ then return (fromIntegral i)
+ else do n <- get
+ return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
+
+-- Int has the same representation as Word
+instance Binary Int where
+ put i = put (fromIntegral i :: Word)
+ get = liftM fromIntegral (get :: Get Word)
+
+------------------------------------------------------------------------
+--
+-- Portable, and pretty efficient, serialisation of Integer
+--
+
+-- Fixed-size type for a subset of Integer
+type SmallInt = Int32
+
+-- Integers are encoded in two ways: if they fit inside a SmallInt,
+-- they're written as a byte tag, and that value. If the Integer value
+-- is too large to fit in a SmallInt, it is written as a byte array,
+-- along with a sign and length field.
+
+instance Binary Integer where
+
+ {-# INLINE put #-}
+ put n | n >= lo && n <= hi = do
+ putWord8 0
+ put (fromIntegral n :: SmallInt) -- fast path
+ where
+ lo = fromIntegral (minBound :: SmallInt) :: Integer
+ hi = fromIntegral (maxBound :: SmallInt) :: Integer
+
+ put n = do
+ putWord8 1
+ put sign
+ put (unroll (abs n)) -- unroll the bytes
+ where
+ sign = fromIntegral (signum n) :: Word8
+
+ {-# INLINE get #-}
+ get = do
+ tag <- get :: Get Word8
+ case tag of
+ 0 -> liftM fromIntegral (get :: Get SmallInt)
+ _ -> do sign <- get
+ bytes <- get
+ let v = roll bytes
+ return $! if sign == (1 :: Word8) then v else - v
+
+--
+-- Fold and unfold an Integer to and from a list of its bytes
+--
+unroll :: Integer -> [Word8]
+unroll = unfoldr step
+ where
+ step 0 = Nothing
+ step i = Just (fromIntegral i, i `shiftR` 8)
+
+roll :: [Word8] -> Integer
+roll = foldr unstep 0
+ where
+ unstep b a = a `shiftL` 8 .|. fromIntegral b
+
+{-
+
+--
+-- An efficient, raw serialisation for Integer (GHC only)
+--
+
+-- TODO This instance is not architecture portable. GMP stores numbers as
+-- arrays of machine sized words, so the byte format is not portable across
+-- architectures with different endianess and word size.
+
+import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
+import GHC.Base hiding (ord, chr)
+import GHC.Prim
+import GHC.Ptr (Ptr(..))
+import GHC.IOBase (IO(..))
+
+instance Binary Integer where
+ put (S# i) = putWord8 0 >> put (I# i)
+ put (J# s ba) = do
+ putWord8 1
+ put (I# s)
+ put (BA ba)
+
+ get = do
+ b <- getWord8
+ case b of
+ 0 -> do (I# i#) <- get
+ return (S# i#)
+ _ -> do (I# s#) <- get
+ (BA a#) <- get
+ return (J# s# a#)
+
+instance Binary ByteArray where
+
+ -- Pretty safe.
+ put (BA ba) =
+ let sz = sizeofByteArray# ba -- (primitive) in *bytes*
+ addr = byteArrayContents# ba
+ bs = unsafePackAddress (I# sz) addr
+ in put bs -- write as a ByteString. easy, yay!
+
+ -- Pretty scary. Should be quick though
+ get = do
+ (fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
+ assert (off == 0) $ return $ unsafePerformIO $ do
+ (MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
+ let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
+ withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
+ freezeByteArray arr
+
+-- wrapper for ByteArray#
+data ByteArray = BA {-# UNPACK #-} !ByteArray#
+data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
+
+newByteArray :: Int# -> IO MBA
+newByteArray sz = IO $ \s ->
+ case newPinnedByteArray# sz s of { (# s', arr #) ->
+ (# s', MBA arr #) }
+
+freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
+freezeByteArray arr = IO $ \s ->
+ case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
+ (# s', BA arr' #) }
+
+-}
+
+instance (Binary a,Integral a) => Binary (R.Ratio a) where
+ put r = put (R.numerator r) >> put (R.denominator r)
+ get = liftM2 (R.%) get get
+
+------------------------------------------------------------------------
+
+-- Char is serialised as UTF-8
+instance Binary Char where
+ put a | c <= 0x7f = put (fromIntegral c :: Word8)
+ | c <= 0x7ff = do put (0xc0 .|. y)
+ put (0x80 .|. z)
+ | c <= 0xffff = do put (0xe0 .|. x)
+ put (0x80 .|. y)
+ put (0x80 .|. z)
+ | c <= 0x10ffff = do put (0xf0 .|. w)
+ put (0x80 .|. x)
+ put (0x80 .|. y)
+ put (0x80 .|. z)
+ | otherwise = error "Not a valid Unicode code point"
+ where
+ c = ord a
+ z, y, x, w :: Word8
+ z = fromIntegral (c .&. 0x3f)
+ y = fromIntegral (shiftR c 6 .&. 0x3f)
+ x = fromIntegral (shiftR c 12 .&. 0x3f)
+ w = fromIntegral (shiftR c 18 .&. 0x7)
+
+ get = do
+ let getByte = liftM (fromIntegral :: Word8 -> Int) get
+ shiftL6 = flip shiftL 6 :: Int -> Int
+ w <- getByte
+ r <- case () of
+ _ | w < 0x80 -> return w
+ | w < 0xe0 -> do
+ x <- liftM (xor 0x80) getByte
+ return (x .|. shiftL6 (xor 0xc0 w))
+ | w < 0xf0 -> do
+ x <- liftM (xor 0x80) getByte
+ y <- liftM (xor 0x80) getByte
+ return (y .|. shiftL6 (x .|. shiftL6
+ (xor 0xe0 w)))
+ | otherwise -> do
+ x <- liftM (xor 0x80) getByte
+ y <- liftM (xor 0x80) getByte
+ z <- liftM (xor 0x80) getByte
+ return (z .|. shiftL6 (y .|. shiftL6
+ (x .|. shiftL6 (xor 0xf0 w))))
+ return $! chr r
+
+------------------------------------------------------------------------
+-- Instances for the first few tuples
+
+instance (Binary a, Binary b) => Binary (a,b) where
+ put (a,b) = put a >> put b
+ get = liftM2 (,) get get
+
+instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
+ put (a,b,c) = put a >> put b >> put c
+ get = liftM3 (,,) get get get
+
+instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
+ put (a,b,c,d) = put a >> put b >> put c >> put d
+ get = liftM4 (,,,) get get get get
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
+ put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
+ get = liftM5 (,,,,) get get get get get
+
+--
+-- and now just recurse:
+--
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
+ => Binary (a,b,c,d,e,f) where
+ put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
+ get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
+ => Binary (a,b,c,d,e,f,g) where
+ put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
+ get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+ Binary f, Binary g, Binary h)
+ => Binary (a,b,c,d,e,f,g,h) where
+ put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
+ get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+ Binary f, Binary g, Binary h, Binary i)
+ => Binary (a,b,c,d,e,f,g,h,i) where
+ put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
+ get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+ Binary f, Binary g, Binary h, Binary i, Binary j)
+ => Binary (a,b,c,d,e,f,g,h,i,j) where
+ put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
+ get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
+
+------------------------------------------------------------------------
+-- Container types
+
+instance Binary a => Binary [a] where
+ put l = put (length l) >> mapM_ put l
+ get = do n <- get :: Get Int
+ xs <- replicateM n get
+ return xs
+
+instance (Binary a) => Binary (Maybe a) where
+ put Nothing = putWord8 0
+ put (Just x) = putWord8 1 >> put x
+ get = do
+ w <- getWord8
+ case w of
+ 0 -> return Nothing
+ _ -> liftM Just get
+
+instance (Binary a, Binary b) => Binary (Either a b) where
+ put (Left a) = putWord8 0 >> put a
+ put (Right b) = putWord8 1 >> put b
+ get = do
+ w <- getWord8
+ case w of
+ 0 -> liftM Left get
+ _ -> liftM Right get
+
+------------------------------------------------------------------------
+-- ByteStrings (have specially efficient instances)
+
+instance Binary B.ByteString where
+ put bs = do put (B.length bs)
+ putByteString bs
+ get = get >>= getByteString
+
+--
+-- Using old versions of fps, this is a type synonym, and non portable
+--
+-- Requires 'flexible instances'
+--
+instance Binary ByteString where
+ put bs = do put (fromIntegral (L.length bs) :: Int)
+ putLazyByteString bs
+ get = get >>= getLazyByteString
+
+------------------------------------------------------------------------
+-- Maps and Sets
+
+instance (Ord a, Binary a) => Binary (Set.Set a) where
+ put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
+ get = liftM Set.fromDistinctAscList get
+
+instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
+ put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
+ get = liftM Map.fromDistinctAscList get
+
+instance Binary IntSet.IntSet where
+ put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
+ get = liftM IntSet.fromDistinctAscList get
+
+instance (Binary e) => Binary (IntMap.IntMap e) where
+ put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
+ get = liftM IntMap.fromDistinctAscList get
+
+------------------------------------------------------------------------
+-- Queues and Sequences
+
+#if __GLASGOW_HASKELL__ >= 606
+--
+-- This is valid Hugs, but you need the most recent Hugs
+--
+
+instance (Binary e) => Binary (Seq.Seq e) where
+ put s = put (Seq.length s) >> Fold.mapM_ put s
+ get = do n <- get :: Get Int
+ rep Seq.empty n get
+ where rep xs 0 _ = return $! xs
+ rep xs n g = xs `seq` n `seq` do
+ x <- g
+ rep (xs Seq.|> x) (n-1) g
+
+#endif
+
+------------------------------------------------------------------------
+-- Floating point
+
+instance Binary Double where
+ put d = put (decodeFloat d)
+ get = liftM2 encodeFloat get get
+
+instance Binary Float where
+ put f = put (decodeFloat f)
+ get = liftM2 encodeFloat get get
+
+------------------------------------------------------------------------
+-- Trees
+
+instance (Binary e) => Binary (T.Tree e) where
+ put (T.Node r s) = put r >> put s
+ get = liftM2 T.Node get get
+
+------------------------------------------------------------------------
+-- Arrays
+
+instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
+ put a = do
+ put (bounds a)
+ put (rangeSize $ bounds a) -- write the length
+ mapM_ put (elems a) -- now the elems.
+ get = do
+ bs <- get
+ n <- get -- read the length
+ xs <- replicateM n get -- now the elems.
+ return (listArray bs xs)
+
+--
+-- The IArray UArray e constraint is non portable. Requires flexible instances
+--
+instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
+ put a = do
+ put (bounds a)
+ put (rangeSize $ bounds a) -- now write the length
+ mapM_ put (elems a)
+ get = do
+ bs <- get
+ n <- get
+ xs <- replicateM n get
+ return (listArray bs xs)
diff --git a/src/runtime/haskell/Data/Binary/Builder.hs b/src/runtime/haskell/Data/Binary/Builder.hs
new file mode 100644
index 000000000..cccbe6fa4
--- /dev/null
+++ b/src/runtime/haskell/Data/Binary/Builder.hs
@@ -0,0 +1,426 @@
+{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fglasgow-exts #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary.Builder
+-- Copyright : Lennart Kolmodin, Ross Paterson
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Builder (
+
+ -- * The Builder type
+ Builder
+ , toLazyByteString
+
+ -- * Constructing Builders
+ , empty
+ , singleton
+ , append
+ , fromByteString -- :: S.ByteString -> Builder
+ , fromLazyByteString -- :: L.ByteString -> Builder
+
+ -- * Flushing the buffer state
+ , flush
+
+ -- * Derived Builders
+ -- ** Big-endian writes
+ , putWord16be -- :: Word16 -> Builder
+ , putWord32be -- :: Word32 -> Builder
+ , putWord64be -- :: Word64 -> Builder
+
+ -- ** Little-endian writes
+ , putWord16le -- :: Word16 -> Builder
+ , putWord32le -- :: Word32 -> Builder
+ , putWord64le -- :: Word64 -> Builder
+
+ -- ** Host-endian, unaligned writes
+ , putWordhost -- :: Word -> Builder
+ , putWord16host -- :: Word16 -> Builder
+ , putWord32host -- :: Word32 -> Builder
+ , putWord64host -- :: Word64 -> Builder
+
+ ) where
+
+import Foreign
+import Data.Monoid
+import Data.Word
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import Data.ByteString.Base (inlinePerformIO)
+import qualified Data.ByteString.Base as S
+#else
+import Data.ByteString.Internal (inlinePerformIO)
+import qualified Data.ByteString.Internal as S
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word (Word32(..),Word16(..),Word64(..))
+
+#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
+import GHC.Word (uncheckedShiftRL64#)
+#endif
+#endif
+
+------------------------------------------------------------------------
+
+-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
+-- There are several functions for constructing 'Builder's, but only one
+-- to inspect them: to extract any data, you have to turn them into lazy
+-- 'L.ByteString's using 'toLazyByteString'.
+--
+-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
+-- arrays piece by piece. As each buffer is filled, it is \'popped\'
+-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
+-- All this is hidden from the user of the 'Builder'.
+
+newtype Builder = Builder {
+ -- Invariant (from Data.ByteString.Lazy):
+ -- The lists include no null ByteStrings.
+ runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
+ }
+
+instance Monoid Builder where
+ mempty = empty
+ {-# INLINE mempty #-}
+ mappend = append
+ {-# INLINE mappend #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The empty Builder, satisfying
+--
+-- * @'toLazyByteString' 'empty' = 'L.empty'@
+--
+empty :: Builder
+empty = Builder id
+{-# INLINE empty #-}
+
+-- | /O(1)./ A Builder taking a single byte, satisfying
+--
+-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
+--
+singleton :: Word8 -> Builder
+singleton = writeN 1 . flip poke
+{-# INLINE singleton #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The concatenation of two Builders, an associative operation
+-- with identity 'empty', satisfying
+--
+-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
+--
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE append #-}
+
+-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
+--
+-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
+--
+fromByteString :: S.ByteString -> Builder
+fromByteString bs
+ | S.null bs = empty
+ | otherwise = flush `append` mapBuilder (bs :)
+{-# INLINE fromByteString #-}
+
+-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
+--
+-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
+--
+fromLazyByteString :: L.ByteString -> Builder
+fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
+{-# INLINE fromLazyByteString #-}
+
+------------------------------------------------------------------------
+
+-- Our internal buffer type
+data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
+ {-# UNPACK #-} !Int -- offset
+ {-# UNPACK #-} !Int -- used bytes
+ {-# UNPACK #-} !Int -- length left
+
+------------------------------------------------------------------------
+
+-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
+-- The construction work takes place if and when the relevant part of
+-- the lazy 'L.ByteString' is demanded.
+--
+toLazyByteString :: Builder -> L.ByteString
+toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
+ buf <- newBuffer defaultSize
+ return (runBuilder (m `append` flush) (const []) buf)
+
+-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
+-- yielding a new chunk in the result lazy 'L.ByteString'.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+ if u == 0
+ then k buf
+ else S.PS p o u : k (Buffer p (o+u) 0 l)
+
+------------------------------------------------------------------------
+
+--
+-- copied from Data.ByteString.Lazy
+--
+defaultSize :: Int
+defaultSize = 32 * k - overhead
+ where k = 1024
+ overhead = 2 * sizeOf (undefined :: Int)
+
+------------------------------------------------------------------------
+
+-- | Sequence an IO operation on the buffer
+unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
+unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
+ buf' <- f buf
+ return (k buf')
+{-# INLINE unsafeLiftIO #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
+ runBuilder (f l) k buf
+
+-- | Map the resulting list of bytestrings.
+mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
+mapBuilder f = Builder (f .)
+
+------------------------------------------------------------------------
+
+-- | Ensure that there are at least @n@ many bytes available.
+ensureFree :: Int -> Builder
+ensureFree n = n `seq` withSize $ \ l ->
+ if n <= l then empty else
+ flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
+{-# INLINE ensureFree #-}
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- bytes into the memory.
+writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
+writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
+{-# INLINE writeN #-}
+
+writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
+writeNBuffer n f (Buffer fp o u l) = do
+ withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+ return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBuffer #-}
+
+newBuffer :: Int -> IO Buffer
+newBuffer size = do
+ fp <- S.mallocByteString size
+ return $! Buffer fp 0 0 size
+{-# INLINE newBuffer #-}
+
+------------------------------------------------------------------------
+-- Aligned, host order writes of storable values
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- storable values into the memory.
+writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
+writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
+{-# INLINE writeNbytes #-}
+
+writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
+writeNBufferBytes n f (Buffer fp o u l) = do
+ withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+ return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBufferBytes #-}
+
+------------------------------------------------------------------------
+
+--
+-- We rely on the fromIntegral to do the right masking for us.
+-- The inlining here is critical, and can be worth 4x performance
+--
+
+-- | Write a Word16 in big endian format
+putWord16be :: Word16 -> Builder
+putWord16be w = writeN 2 $ \p -> do
+ poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le :: Word16 -> Builder
+putWord16le w = writeN 2 $ \p -> do
+ poke p (fromIntegral (w) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
+{-# INLINE putWord16le #-}
+
+-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
+
+-- | Write a Word32 in big endian format
+putWord32be :: Word32 -> Builder
+putWord32be w = writeN 4 $ \p -> do
+ poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
+{-# INLINE putWord32be #-}
+
+--
+-- a data type to tag Put/Check. writes construct these which are then
+-- inlined and flattened. matching Checks will be more robust with rules.
+--
+
+-- | Write a Word32 in little endian format
+putWord32le :: Word32 -> Builder
+putWord32le w = writeN 4 $ \p -> do
+ poke p (fromIntegral (w) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
+{-# INLINE putWord32le #-}
+
+-- on a little endian machine:
+-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
+
+-- | Write a Word64 in big endian format
+putWord64be :: Word64 -> Builder
+#if WORD_SIZE_IN_BITS < 64
+--
+-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
+-- Word32, and write that
+--
+putWord64be w =
+ let a = fromIntegral (shiftr_w64 w 32) :: Word32
+ b = fromIntegral w :: Word32
+ in writeN 8 $ \p -> do
+ poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
+#else
+putWord64be w = writeN 8 $ \p -> do
+ poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
+#endif
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le :: Word64 -> Builder
+
+#if WORD_SIZE_IN_BITS < 64
+putWord64le w =
+ let b = fromIntegral (shiftr_w64 w 32) :: Word32
+ a = fromIntegral w :: Word32
+ in writeN 8 $ \p -> do
+ poke (p) (fromIntegral (a) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
+#else
+putWord64le w = writeN 8 $ \p -> do
+ poke p (fromIntegral (w) :: Word8)
+ poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
+ poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
+ poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
+ poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
+ poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
+ poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
+ poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
+#endif
+{-# INLINE putWord64le #-}
+
+-- on a little endian machine:
+-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
+
+------------------------------------------------------------------------
+-- Unaligned, word size ops
+
+-- | /O(1)./ A Builder taking a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost :: Word -> Builder
+putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
+{-# INLINE putWordhost #-}
+
+-- | Write a Word16 in native host order and host endianness.
+-- 2 bytes will be written, unaligned.
+putWord16host :: Word16 -> Builder
+putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
+{-# INLINE putWord16host #-}
+
+-- | Write a Word32 in native host order and host endianness.
+-- 4 bytes will be written, unaligned.
+putWord32host :: Word32 -> Builder
+putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
+{-# INLINE putWord32host #-}
+
+-- | Write a Word64 in native host order.
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- 8 bytes will be written, unaligned.
+putWord64host :: Word64 -> Builder
+putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
+{-# INLINE putWord64host #-}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+{-# INLINE shiftr_w16 #-}
+shiftr_w16 :: Word16 -> Int -> Word16
+{-# INLINE shiftr_w32 #-}
+shiftr_w32 :: Word32 -> Int -> Word32
+{-# INLINE shiftr_w64 #-}
+shiftr_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
+shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftRL64"
+ uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
+#endif
+
+#else
+shiftr_w16 = shiftR
+shiftr_w32 = shiftR
+shiftr_w64 = shiftR
+#endif
diff --git a/src/runtime/haskell/Data/Binary/Get.hs b/src/runtime/haskell/Data/Binary/Get.hs
new file mode 100644
index 000000000..51062ad31
--- /dev/null
+++ b/src/runtime/haskell/Data/Binary/Get.hs
@@ -0,0 +1,544 @@
+{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fglasgow-exts #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary.Get
+-- Copyright : Lennart Kolmodin
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : experimental
+-- Portability : portable to Hugs and GHC.
+--
+-- The Get monad. A monad for efficiently building structures from
+-- encoded lazy ByteStrings
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Get (
+
+ -- * The Get type
+ Get
+ , runGet
+ , runGetState
+
+ -- * Parsing
+ , skip
+ , uncheckedSkip
+ , lookAhead
+ , lookAheadM
+ , lookAheadE
+ , uncheckedLookAhead
+
+ -- * Utility
+ , bytesRead
+ , getBytes
+ , remaining
+ , isEmpty
+
+ -- * Parsing particular types
+ , getWord8
+
+ -- ** ByteStrings
+ , getByteString
+ , getLazyByteString
+ , getLazyByteStringNul
+ , getRemainingLazyByteString
+
+ -- ** Big-endian reads
+ , getWord16be
+ , getWord32be
+ , getWord64be
+
+ -- ** Little-endian reads
+ , getWord16le
+ , getWord32le
+ , getWord64le
+
+ -- ** Host-endian, unaligned reads
+ , getWordhost
+ , getWord16host
+ , getWord32host
+ , getWord64host
+
+ ) where
+
+import Control.Monad (when,liftM,ap)
+import Control.Monad.Fix
+import Data.Maybe (isNothing)
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import qualified Data.ByteString.Base as B
+#else
+import qualified Data.ByteString.Internal as B
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative (Applicative(..))
+#endif
+
+import Foreign
+
+-- used by splitAtST
+import Control.Monad.ST
+import Data.STRef
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word
+import GHC.Int
+#endif
+
+-- | The parse state
+data S = S {-# UNPACK #-} !B.ByteString -- current chunk
+ L.ByteString -- the rest of the input
+ {-# UNPACK #-} !Int64 -- bytes read
+
+-- | The Get monad is just a State monad carrying around the input ByteString
+newtype Get a = Get { unGet :: S -> (a, S) }
+
+instance Functor Get where
+ fmap f m = Get (\s -> case unGet m s of
+ (a, s') -> (f a, s'))
+ {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative Get where
+ pure = return
+ (<*>) = ap
+#endif
+
+instance Monad Get where
+ return a = Get (\s -> (a, s))
+ {-# INLINE return #-}
+
+ m >>= k = Get (\s -> case unGet m s of
+ (a, s') -> unGet (k a) s')
+ {-# INLINE (>>=) #-}
+
+ fail = failDesc
+
+instance MonadFix Get where
+ mfix f = Get (\s -> let (a,s') = unGet (f a) s
+ in (a,s'))
+
+------------------------------------------------------------------------
+
+get :: Get S
+get = Get (\s -> (s, s))
+
+put :: S -> Get ()
+put s = Get (\_ -> ((), s))
+
+------------------------------------------------------------------------
+--
+-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
+-- Without it, GHC seems to do just fine. And we get similar
+-- performance with 6.8.2 anyway.
+--
+
+initState :: L.ByteString -> S
+initState xs = mkState xs 0
+{- INLINE initState -}
+
+{-
+initState (B.LPS xs) =
+ case xs of
+ [] -> S B.empty L.empty 0
+ (x:xs') -> S x (B.LPS xs') 0
+-}
+
+#ifndef BYTESTRING_IN_BASE
+mkState :: L.ByteString -> Int64 -> S
+mkState l = case l of
+ L.Empty -> S B.empty L.empty
+ L.Chunk x xs -> S x xs
+{- INLINE mkState -}
+
+#else
+mkState :: L.ByteString -> Int64 -> S
+mkState (B.LPS xs) =
+ case xs of
+ [] -> S B.empty L.empty
+ (x:xs') -> S x (B.LPS xs')
+#endif
+
+-- | Run the Get monad applies a 'get'-based parser on the input ByteString
+runGet :: Get a -> L.ByteString -> a
+runGet m str = case unGet m (initState str) of (a, _) -> a
+
+-- | Run the Get monad applies a 'get'-based parser on the input
+-- ByteString. Additional to the result of get it returns the number of
+-- consumed bytes and the rest of the input.
+runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
+runGetState m str off =
+ case unGet m (mkState str off) of
+ (a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
+
+------------------------------------------------------------------------
+
+failDesc :: String -> Get a
+failDesc err = do
+ S _ _ bytes <- get
+ Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
+
+-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
+skip :: Int -> Get ()
+skip n = readN (fromIntegral n) (const ())
+
+-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
+uncheckedSkip :: Int64 -> Get ()
+uncheckedSkip n = do
+ S s ss bytes <- get
+ if fromIntegral (B.length s) >= n
+ then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
+ else do
+ let rest = L.drop (n - fromIntegral (B.length s)) ss
+ put $! mkState rest (bytes + n)
+
+-- | Run @ga@, but return without consuming its input.
+-- Fails if @ga@ fails.
+lookAhead :: Get a -> Get a
+lookAhead ga = do
+ s <- get
+ a <- ga
+ put s
+ return a
+
+-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
+-- Fails if @gma@ fails.
+lookAheadM :: Get (Maybe a) -> Get (Maybe a)
+lookAheadM gma = do
+ s <- get
+ ma <- gma
+ when (isNothing ma) $
+ put s
+ return ma
+
+-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
+-- Fails if @gea@ fails.
+lookAheadE :: Get (Either a b) -> Get (Either a b)
+lookAheadE gea = do
+ s <- get
+ ea <- gea
+ case ea of
+ Left _ -> put s
+ _ -> return ()
+ return ea
+
+-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
+uncheckedLookAhead :: Int64 -> Get L.ByteString
+uncheckedLookAhead n = do
+ S s ss _ <- get
+ if n <= fromIntegral (B.length s)
+ then return (L.fromChunks [B.take (fromIntegral n) s])
+ else return $ L.take n (s `join` ss)
+
+------------------------------------------------------------------------
+-- Utility
+
+-- | Get the total number of bytes read to this point.
+bytesRead :: Get Int64
+bytesRead = do
+ S _ _ b <- get
+ return b
+
+-- | Get the number of remaining unparsed bytes.
+-- Useful for checking whether all input has been consumed.
+-- Note that this forces the rest of the input.
+remaining :: Get Int64
+remaining = do
+ S s ss _ <- get
+ return (fromIntegral (B.length s) + L.length ss)
+
+-- | Test whether all input has been consumed,
+-- i.e. there are no remaining unparsed bytes.
+isEmpty :: Get Bool
+isEmpty = do
+ S s ss _ <- get
+ return (B.null s && L.null ss)
+
+------------------------------------------------------------------------
+-- Utility with ByteStrings
+
+-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
+-- than @n@ bytes are left in the input.
+getByteString :: Int -> Get B.ByteString
+getByteString n = readN n id
+{-# INLINE getByteString #-}
+
+-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
+-- @n@ bytes are left in the input.
+getLazyByteString :: Int64 -> Get L.ByteString
+getLazyByteString n = do
+ S s ss bytes <- get
+ let big = s `join` ss
+ case splitAtST n big of
+ (consume, rest) -> do put $ mkState rest (bytes + n)
+ return consume
+{-# INLINE getLazyByteString #-}
+
+-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
+-- if it reaches the end of input without hitting a NUL.
+getLazyByteStringNul :: Get L.ByteString
+getLazyByteStringNul = do
+ S s ss bytes <- get
+ let big = s `join` ss
+ (consume, t) = L.break (== 0) big
+ (h, rest) = L.splitAt 1 t
+ if L.null h
+ then fail "too few bytes"
+ else do
+ put $ mkState rest (bytes + L.length consume + 1)
+ return consume
+{-# INLINE getLazyByteStringNul #-}
+
+-- | Get the remaining bytes as a lazy ByteString
+getRemainingLazyByteString :: Get L.ByteString
+getRemainingLazyByteString = do
+ S s ss _ <- get
+ return (s `join` ss)
+
+------------------------------------------------------------------------
+-- Helpers
+
+-- | Pull @n@ bytes from the input, as a strict ByteString.
+getBytes :: Int -> Get B.ByteString
+getBytes n = do
+ S s ss bytes <- get
+ if n <= B.length s
+ then do let (consume,rest) = B.splitAt n s
+ put $! S rest ss (bytes + fromIntegral n)
+ return $! consume
+ else
+ case L.splitAt (fromIntegral n) (s `join` ss) of
+ (consuming, rest) ->
+ do let now = B.concat . L.toChunks $ consuming
+ put $! mkState rest (bytes + fromIntegral n)
+ -- forces the next chunk before this one is returned
+ if (B.length now < n)
+ then
+ fail "too few bytes"
+ else
+ return now
+{- INLINE getBytes -}
+-- ^ important
+
+#ifndef BYTESTRING_IN_BASE
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb lb
+ | B.null bb = lb
+ | otherwise = L.Chunk bb lb
+
+#else
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb (B.LPS lb)
+ | B.null bb = B.LPS lb
+ | otherwise = B.LPS (bb:lb)
+#endif
+ -- don't use L.append, it's strict in it's second argument :/
+{- INLINE join -}
+
+-- | Split a ByteString. If the first result is consumed before the --
+-- second, this runs in constant heap space.
+--
+-- You must force the returned tuple for that to work, e.g.
+--
+-- > case splitAtST n xs of
+-- > (ys,zs) -> consume ys ... consume zs
+--
+splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
+splitAtST i ps | i <= 0 = (L.empty, ps)
+#ifndef BYTESTRING_IN_BASE
+splitAtST i ps = runST (
+ do r <- newSTRef undefined
+ xs <- first r i ps
+ ys <- unsafeInterleaveST (readSTRef r)
+ return (xs, ys))
+
+ where
+ first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
+ first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
+
+ first r n (L.Chunk x xs)
+ | n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
+ return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
+ | otherwise = do writeSTRef r (L.drop (n - l) xs)
+ liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
+
+ where l = fromIntegral (B.length x)
+#else
+splitAtST i (B.LPS ps) = runST (
+ do r <- newSTRef undefined
+ xs <- first r i ps
+ ys <- unsafeInterleaveST (readSTRef r)
+ return (B.LPS xs, B.LPS ys))
+
+ where first r 0 xs = writeSTRef r xs >> return []
+ first r _ [] = writeSTRef r [] >> return []
+ first r n (x:xs)
+ | n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
+ return [B.take (fromIntegral n) x]
+ | otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
+ fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
+
+ where l = fromIntegral (B.length x)
+#endif
+{- INLINE splitAtST -}
+
+-- Pull n bytes from the input, and apply a parser to those bytes,
+-- yielding a value. If less than @n@ bytes are available, fail with an
+-- error. This wraps @getBytes@.
+readN :: Int -> (B.ByteString -> a) -> Get a
+readN n f = fmap f $ getBytes n
+{- INLINE readN -}
+-- ^ important
+
+------------------------------------------------------------------------
+-- Primtives
+
+-- helper, get a raw Ptr onto a strict ByteString copied out of the
+-- underlying lazy byteString. So many indirections from the raw parser
+-- state that my head hurts...
+
+getPtr :: Storable a => Int -> Get a
+getPtr n = do
+ (fp,o,_) <- readN n B.toForeignPtr
+ return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
+{- INLINE getPtr -}
+
+------------------------------------------------------------------------
+
+-- | Read a Word8 from the monad state
+getWord8 :: Get Word8
+getWord8 = getPtr (sizeOf (undefined :: Word8))
+{- INLINE getWord8 -}
+
+-- | Read a Word16 in big endian format
+getWord16be :: Get Word16
+getWord16be = do
+ s <- readN 2 id
+ return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
+ (fromIntegral (s `B.index` 1))
+{- INLINE getWord16be -}
+
+-- | Read a Word16 in little endian format
+getWord16le :: Get Word16
+getWord16le = do
+ s <- readN 2 id
+ return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
+ (fromIntegral (s `B.index` 0) )
+{- INLINE getWord16le -}
+
+-- | Read a Word32 in big endian format
+getWord32be :: Get Word32
+getWord32be = do
+ s <- readN 4 id
+ return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
+ (fromIntegral (s `B.index` 3) )
+{- INLINE getWord32be -}
+
+-- | Read a Word32 in little endian format
+getWord32le :: Get Word32
+getWord32le = do
+ s <- readN 4 id
+ return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
+ (fromIntegral (s `B.index` 0) )
+{- INLINE getWord32le -}
+
+-- | Read a Word64 in big endian format
+getWord64be :: Get Word64
+getWord64be = do
+ s <- readN 8 id
+ return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
+ (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
+ (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
+ (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
+ (fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
+ (fromIntegral (s `B.index` 7) )
+{- INLINE getWord64be -}
+
+-- | Read a Word64 in little endian format
+getWord64le :: Get Word64
+getWord64le = do
+ s <- readN 8 id
+ return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
+ (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
+ (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
+ (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
+ (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
+ (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
+ (fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
+ (fromIntegral (s `B.index` 0) )
+{- INLINE getWord64le -}
+
+------------------------------------------------------------------------
+-- Host-endian reads
+
+-- | /O(1)./ Read a single native machine word. The word is read in
+-- host order, host endian form, for the machine you're on. On a 64 bit
+-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
+getWordhost :: Get Word
+getWordhost = getPtr (sizeOf (undefined :: Word))
+{- INLINE getWordhost -}
+
+-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
+getWord16host :: Get Word16
+getWord16host = getPtr (sizeOf (undefined :: Word16))
+{- INLINE getWord16host -}
+
+-- | /O(1)./ Read a Word32 in native host order and host endianness.
+getWord32host :: Get Word32
+getWord32host = getPtr (sizeOf (undefined :: Word32))
+{- INLINE getWord32host -}
+
+-- | /O(1)./ Read a Word64 in native host order and host endianess.
+getWord64host :: Get Word64
+getWord64host = getPtr (sizeOf (undefined :: Word64))
+{- INLINE getWord64host -}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+shiftl_w16 :: Word16 -> Int -> Word16
+shiftl_w32 :: Word32 -> Int -> Word32
+shiftl_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
+shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftL64"
+ uncheckedShiftL64# :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
+#endif
+
+#else
+shiftl_w16 = shiftL
+shiftl_w32 = shiftL
+shiftl_w64 = shiftL
+#endif
diff --git a/src/runtime/haskell/Data/Binary/Put.hs b/src/runtime/haskell/Data/Binary/Put.hs
new file mode 100644
index 000000000..a1f78dfba
--- /dev/null
+++ b/src/runtime/haskell/Data/Binary/Put.hs
@@ -0,0 +1,216 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.Binary.Put
+-- Copyright : Lennart Kolmodin
+-- License : BSD3-style (see LICENSE)
+--
+-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability : stable
+-- Portability : Portable to Hugs and GHC. Requires MPTCs
+--
+-- The Put monad. A monad for efficiently constructing lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary.Put (
+
+ -- * The Put type
+ Put
+ , PutM(..)
+ , runPut
+ , runPutM
+ , putBuilder
+ , execPut
+
+ -- * Flushing the implicit parse state
+ , flush
+
+ -- * Primitives
+ , putWord8
+ , putByteString
+ , putLazyByteString
+
+ -- * Big-endian primitives
+ , putWord16be
+ , putWord32be
+ , putWord64be
+
+ -- * Little-endian primitives
+ , putWord16le
+ , putWord32le
+ , putWord64le
+
+ -- * Host-endian, unaligned writes
+ , putWordhost -- :: Word -> Put
+ , putWord16host -- :: Word16 -> Put
+ , putWord32host -- :: Word32 -> Put
+ , putWord64host -- :: Word64 -> Put
+
+ ) where
+
+import Data.Monoid
+import Data.Binary.Builder (Builder, toLazyByteString)
+import qualified Data.Binary.Builder as B
+
+import Data.Word
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative
+#endif
+
+
+------------------------------------------------------------------------
+
+-- XXX Strict in buffer only.
+data PairS a = PairS a {-# UNPACK #-}!Builder
+
+sndS :: PairS a -> Builder
+sndS (PairS _ b) = b
+
+-- | The PutM type. A Writer monad over the efficient Builder monoid.
+newtype PutM a = Put { unPut :: PairS a }
+
+-- | Put merely lifts Builder into a Writer monad, applied to ().
+type Put = PutM ()
+
+instance Functor PutM where
+ fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
+ {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative PutM where
+ pure = return
+ m <*> k = Put $
+ let PairS f w = unPut m
+ PairS x w' = unPut k
+ in PairS (f x) (w `mappend` w')
+#endif
+
+-- Standard Writer monad, with aggressive inlining
+instance Monad PutM where
+ return a = Put $ PairS a mempty
+ {-# INLINE return #-}
+
+ m >>= k = Put $
+ let PairS a w = unPut m
+ PairS b w' = unPut (k a)
+ in PairS b (w `mappend` w')
+ {-# INLINE (>>=) #-}
+
+ m >> k = Put $
+ let PairS _ w = unPut m
+ PairS b w' = unPut k
+ in PairS b (w `mappend` w')
+ {-# INLINE (>>) #-}
+
+tell :: Builder -> Put
+tell b = Put $ PairS () b
+{-# INLINE tell #-}
+
+putBuilder :: Builder -> Put
+putBuilder = tell
+{-# INLINE putBuilder #-}
+
+-- | Run the 'Put' monad
+execPut :: PutM a -> Builder
+execPut = sndS . unPut
+{-# INLINE execPut #-}
+
+-- | Run the 'Put' monad with a serialiser
+runPut :: Put -> L.ByteString
+runPut = toLazyByteString . sndS . unPut
+{-# INLINE runPut #-}
+
+-- | Run the 'Put' monad with a serialiser and get its result
+runPutM :: PutM a -> (a, L.ByteString)
+runPutM (Put (PairS f s)) = (f, toLazyByteString s)
+{-# INLINE runPutM #-}
+
+------------------------------------------------------------------------
+
+-- | Pop the ByteString we have constructed so far, if any, yielding a
+-- new chunk in the result ByteString.
+flush :: Put
+flush = tell B.flush
+{-# INLINE flush #-}
+
+-- | Efficiently write a byte into the output buffer
+putWord8 :: Word8 -> Put
+putWord8 = tell . B.singleton
+{-# INLINE putWord8 #-}
+
+-- | An efficient primitive to write a strict ByteString into the output buffer.
+-- It flushes the current buffer, and writes the argument into a new chunk.
+putByteString :: S.ByteString -> Put
+putByteString = tell . B.fromByteString
+{-# INLINE putByteString #-}
+
+-- | Write a lazy ByteString efficiently, simply appending the lazy
+-- ByteString chunks to the output buffer
+putLazyByteString :: L.ByteString -> Put
+putLazyByteString = tell . B.fromLazyByteString
+{-# INLINE putLazyByteString #-}
+
+-- | Write a Word16 in big endian format
+putWord16be :: Word16 -> Put
+putWord16be = tell . B.putWord16be
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le :: Word16 -> Put
+putWord16le = tell . B.putWord16le
+{-# INLINE putWord16le #-}
+
+-- | Write a Word32 in big endian format
+putWord32be :: Word32 -> Put
+putWord32be = tell . B.putWord32be
+{-# INLINE putWord32be #-}
+
+-- | Write a Word32 in little endian format
+putWord32le :: Word32 -> Put
+putWord32le = tell . B.putWord32le
+{-# INLINE putWord32le #-}
+
+-- | Write a Word64 in big endian format
+putWord64be :: Word64 -> Put
+putWord64be = tell . B.putWord64be
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le :: Word64 -> Put
+putWord64le = tell . B.putWord64le
+{-# INLINE putWord64le #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ Write a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost :: Word -> Put
+putWordhost = tell . B.putWordhost
+{-# INLINE putWordhost #-}
+
+-- | /O(1)./ Write a Word16 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord16host :: Word16 -> Put
+putWord16host = tell . B.putWord16host
+{-# INLINE putWord16host #-}
+
+-- | /O(1)./ Write a Word32 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord32host :: Word32 -> Put
+putWord32host = tell . B.putWord32host
+{-# INLINE putWord32host #-}
+
+-- | /O(1)./ Write a Word64 in native host order
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- For portability issues see @putWordhost@.
+putWord64host :: Word64 -> Put
+putWord64host = tell . B.putWord64host
+{-# INLINE putWord64host #-}
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
new file mode 100644
index 000000000..6c192095d
--- /dev/null
+++ b/src/runtime/haskell/PGF.hs
@@ -0,0 +1,352 @@
+-------------------------------------------------
+-- |
+-- Module : PGF
+-- Maintainer : Aarne Ranta
+-- Stability : stable
+-- Portability : portable
+--
+-- This module is an Application Programming Interface to
+-- load and interpret grammars compiled in Portable Grammar Format (PGF).
+-- The PGF format is produced as a final output from the GF compiler.
+-- The API is meant to be used for embedding GF grammars in Haskell
+-- programs
+-------------------------------------------------
+
+module PGF(
+ -- * PGF
+ PGF,
+ readPGF,
+
+ -- * Identifiers
+ CId, mkCId, wildCId,
+ showCId, readCId,
+
+ -- * Languages
+ Language,
+ showLanguage, readLanguage,
+ languages, abstractName, languageCode,
+
+ -- * Types
+ Type, Hypo,
+ showType, readType,
+ mkType, mkHypo, mkDepHypo, mkImplHypo,
+ categories, startCat,
+
+ -- * Functions
+ functions, functionType,
+
+ -- * Expressions & Trees
+ -- ** Tree
+ Tree,
+
+ -- ** Expr
+ Expr,
+ showExpr, readExpr,
+ mkApp, unApp,
+ mkStr, unStr,
+ mkInt, unInt,
+ mkDouble, unDouble,
+ mkMeta, isMeta,
+
+ -- * Operations
+ -- ** Linearization
+ linearize, linearizeAllLang, linearizeAll,
+ showPrintName,
+
+ -- ** Parsing
+ parse, parseWithRecovery, canParse, parseAllLang, parseAll,
+
+ -- ** Evaluation
+ PGF.compute, paraphrase,
+
+ -- ** Type Checking
+ -- | The type checker in PGF does both type checking and renaming
+ -- i.e. it verifies that all identifiers are declared and it
+ -- distinguishes between global function or type indentifiers and
+ -- variable names. The type checker should always be applied on
+ -- expressions entered by the user i.e. those produced via functions
+ -- like 'readType' and 'readExpr' because otherwise unexpected results
+ -- could appear. All typechecking functions returns updated versions
+ -- of the input types or expressions because the typechecking could
+ -- also lead to metavariables instantiations.
+ checkType, checkExpr, inferExpr,
+ TcError(..), ppTcError,
+
+ -- ** Word Completion (Incremental Parsing)
+ complete,
+ Incremental.ParseState,
+ Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
+
+ -- ** Generation
+ generateRandom, generateAll, generateAllDepth,
+
+ -- ** Morphological Analysis
+ Lemma, Analysis, Morpho,
+ lookupMorpho, buildMorpho,
+
+ -- ** Visualizations
+ graphvizAbstractTree,
+ graphvizParseTree,
+ graphvizDependencyTree,
+ graphvizAlignment,
+
+ -- * Browsing
+ browse
+ ) where
+
+import PGF.CId
+import PGF.Linearize
+import PGF.Generate
+import PGF.TypeCheck
+import PGF.Paraphrase
+import PGF.VisualizeTree
+import PGF.Macros
+import PGF.Expr (Tree)
+import PGF.Morphology
+import PGF.Data hiding (functions)
+import PGF.Binary
+import qualified PGF.Parsing.FCFG.Active as Active
+import qualified PGF.Parsing.FCFG.Incremental as Incremental
+import qualified GF.Compile.GeneratePMCFG as PMCFG
+
+import GF.Infra.Option
+import GF.Data.Utilities (replace)
+
+import Data.Char
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import Data.Maybe
+import Data.Binary
+import Data.List(mapAccumL)
+import System.Random (newStdGen)
+import Control.Monad
+import Text.PrettyPrint
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+-- | Reads file in Portable Grammar Format and produces
+-- 'PGF' structure. The file is usually produced with:
+--
+-- > $ gf -make <grammar file name>
+readPGF :: FilePath -> IO PGF
+
+-- | Linearizes given expression as string in the language
+linearize :: PGF -> Language -> Tree -> String
+
+-- | Tries to parse the given string in the specified language
+-- and to produce abstract syntax expression. An empty
+-- list is returned if the parsing is not successful. The list may also
+-- contain more than one element if the grammar is ambiguous.
+-- Throws an exception if the given language cannot be used
+-- for parsing, see 'canParse'.
+parse :: PGF -> Language -> Type -> String -> [Tree]
+
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
+
+-- | Checks whether the given language can be used for parsing.
+canParse :: PGF -> Language -> Bool
+
+-- | The same as 'linearizeAllLang' but does not return
+-- the language.
+linearizeAll :: PGF -> Tree -> [String]
+
+-- | Linearizes given expression as string in all languages
+-- available in the grammar.
+linearizeAllLang :: PGF -> Tree -> [(Language,String)]
+
+-- | Show the printname of a type
+showPrintName :: PGF -> Language -> Type -> String
+
+-- | The same as 'parseAllLang' but does not return
+-- the language.
+parseAll :: PGF -> Type -> String -> [[Tree]]
+
+-- | Tries to parse the given string with all available languages.
+-- Languages which cannot be used for parsing (see 'canParse')
+-- are ignored.
+-- The returned list contains pairs of language
+-- and list of abstract syntax expressions
+-- (this is a list, since grammars can be ambiguous).
+-- Only those languages
+-- for which at least one parsing is possible are listed.
+parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
+
+-- | The same as 'generateAllDepth' but does not limit
+-- the depth in the generation.
+generateAll :: PGF -> Type -> [Expr]
+
+-- | Generates an infinite list of random abstract syntax expressions.
+-- This is usefull for tree bank generation which after that can be used
+-- for grammar testing.
+generateRandom :: PGF -> Type -> IO [Expr]
+
+-- | Generates an exhaustive possibly infinite list of
+-- abstract syntax expressions. A depth can be specified
+-- to limit the search space.
+generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
+
+-- | List of all languages available in the given grammar.
+languages :: PGF -> [Language]
+
+-- | Gets the RFC 4646 language tag
+-- of the language which the given concrete syntax implements,
+-- if this is listed in the source grammar.
+-- Example language tags include @\"en\"@ for English,
+-- and @\"en-UK\"@ for British English.
+languageCode :: PGF -> Language -> Maybe String
+
+-- | The abstract language name is the name of the top-level
+-- abstract module
+abstractName :: PGF -> Language
+
+-- | List of all categories defined in the given grammar.
+-- The categories are defined in the abstract syntax
+-- with the \'cat\' keyword.
+categories :: PGF -> [CId]
+
+-- | The start category is defined in the grammar with
+-- the \'startcat\' flag. This is usually the sentence category
+-- but it is not necessary. Despite that there is a start category
+-- defined you can parse with any category. The start category
+-- definition is just for convenience.
+startCat :: PGF -> Type
+
+-- | List of all functions defined in the abstract syntax
+functions :: PGF -> [CId]
+
+-- | The type of a given function
+functionType :: PGF -> CId -> Maybe Type
+
+-- | Complete the last word in the given string. If the input
+-- is empty or ends in whitespace, the last word is considred
+-- to be the empty string. This means that the completions
+-- will be all possible next words.
+complete :: PGF -> Language -> Type -> String
+ -> [String] -- ^ Possible completions,
+ -- including the given input.
+
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+readPGF f = decodeFile f >>= addParsers
+
+-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
+addParsers :: PGF -> IO PGF
+addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
+ | (lang,cnc) <- Map.toList (concretes pgf)]
+ return pgf { concretes = Map.fromList cncs }
+ where
+ wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
+ addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
+ return (lang,cnc { parser = Just pinfo })
+
+linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
+
+parse pgf lang typ s =
+ case Map.lookup lang (concretes pgf) of
+ Just cnc -> case parser cnc of
+ Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
+ then Incremental.parse pgf lang typ (words s)
+ else Active.parse "t" pinfo typ (words s)
+ Nothing -> error ("No parser built for language: " ++ showCId lang)
+ Nothing -> error ("Unknown language: " ++ showCId lang)
+
+parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
+
+canParse pgf cnc = isJust (lookParser pgf cnc)
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t =
+ [(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
+
+showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
+
+parseAll mgr typ = map snd . parseAllLang mgr typ
+
+parseAllLang mgr typ s =
+ [(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
+
+generateRandom pgf cat = do
+ gen <- newStdGen
+ return $ genRandom gen pgf cat
+
+generateAll pgf cat = generate pgf cat Nothing
+generateAllDepth pgf cat = generate pgf cat
+
+abstractName pgf = absname pgf
+
+languages pgf = cncnames pgf
+
+languageCode pgf lang =
+ fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
+
+categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
+
+startCat pgf = DTyp [] (lookStartCat pgf) []
+
+functions pgf = Map.keys (funs (abstract pgf))
+
+functionType pgf fun =
+ case Map.lookup fun (funs (abstract pgf)) of
+ Just (ty,_,_) -> Just ty
+ Nothing -> Nothing
+
+complete pgf from typ input =
+ let (ws,prefix) = tokensAndPrefix input
+ state0 = Incremental.initState pgf from typ
+ in case loop state0 ws of
+ Nothing -> []
+ Just state ->
+ (if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
+ ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
+ where
+ tokensAndPrefix :: String -> ([String],String)
+ tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
+ | null ws = ([],"")
+ | otherwise = (init ws, last ws)
+ where ws = words s
+
+ loop ps [] = Just ps
+ loop ps (t:ts) = case Incremental.nextState ps t of
+ Left es -> Nothing
+ Right ps -> loop ps ts
+
+-- | Converts an expression to normal form
+compute :: PGF -> Expr -> Expr
+compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
+
+browse :: PGF -> CId -> Maybe (String,[CId],[CId])
+browse pgf id = fmap (\def -> (def,producers,consumers)) definition
+ where
+ definition = case Map.lookup id (funs (abstract pgf)) of
+ Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
+ if null eqs
+ then empty
+ else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
+ in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
+ Nothing -> case Map.lookup id (cats (abstract pgf)) of
+ Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)))
+ Nothing -> Nothing
+
+ (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
+ where
+ accum f (ty,_,_) (plist,clist) =
+ let !plist' = if id `elem` ps then f : plist else plist
+ !clist' = if id `elem` cs then f : clist else clist
+ in (plist',clist')
+ where
+ (ps,cs) = tyIds ty
+
+ tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
+ where
+ (pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
+
+ expIds (EAbs _ _ e) ids = expIds e ids
+ expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
+ expIds (EFun id) ids = id : ids
+ expIds (ETyped e _) ids = expIds e ids
+ expIds _ ids = ids
diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs
new file mode 100644
index 000000000..e4ed98424
--- /dev/null
+++ b/src/runtime/haskell/PGF/Binary.hs
@@ -0,0 +1,199 @@
+module PGF.Binary where
+
+import PGF.CId
+import PGF.Data
+import Data.Binary
+import Data.Binary.Put
+import Data.Binary.Get
+import qualified Data.ByteString as BS
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+pgfMajorVersion, pgfMinorVersion :: Word16
+(pgfMajorVersion, pgfMinorVersion) = (1,0)
+
+instance Binary PGF where
+ put pgf = putWord16be pgfMajorVersion >>
+ putWord16be pgfMinorVersion >>
+ put ( absname pgf, cncnames pgf
+ , gflags pgf
+ , abstract pgf, concretes pgf
+ )
+ get = do v1 <- getWord16be
+ v2 <- getWord16be
+ absname <- get
+ cncnames <- get
+ gflags <- get
+ abstract <- get
+ concretes <- get
+ return (PGF{ absname=absname, cncnames=cncnames
+ , gflags=gflags
+ , abstract=abstract, concretes=concretes
+ })
+
+instance Binary CId where
+ put (CId bs) = put bs
+ get = liftM CId get
+
+instance Binary Abstr where
+ put abs = put (aflags abs, funs abs, cats abs)
+ get = do aflags <- get
+ funs <- get
+ cats <- get
+ let catfuns = Map.mapWithKey (\cat _ -> [f | (f, (DTyp _ c _,_,_)) <- Map.toList funs, c==cat]) cats
+ return (Abstr{ aflags=aflags
+ , funs=funs, cats=cats
+ , catfuns=catfuns
+ })
+
+instance Binary Concr where
+ put cnc = put ( cflags cnc, lins cnc, opers cnc
+ , lincats cnc, lindefs cnc
+ , printnames cnc, paramlincats cnc
+ , parser cnc
+ )
+ get = do cflags <- get
+ lins <- get
+ opers <- get
+ lincats <- get
+ lindefs <- get
+ printnames <- get
+ paramlincats <- get
+ parser <- get
+ return (Concr{ cflags=cflags, lins=lins, opers=opers
+ , lincats=lincats, lindefs=lindefs
+ , printnames=printnames
+ , paramlincats=paramlincats
+ , parser=parser
+ })
+
+instance Binary Alternative where
+ put (Alt v x) = put v >> put x
+ get = liftM2 Alt get get
+
+instance Binary Term where
+ put (R es) = putWord8 0 >> put es
+ put (S es) = putWord8 1 >> put es
+ put (FV es) = putWord8 2 >> put es
+ put (P e v) = putWord8 3 >> put (e,v)
+ put (W e v) = putWord8 4 >> put (e,v)
+ put (C i ) = putWord8 5 >> put i
+ put (TM i ) = putWord8 6 >> put i
+ put (F f) = putWord8 7 >> put f
+ put (V i) = putWord8 8 >> put i
+ put (K (KS s)) = putWord8 9 >> put s
+ put (K (KP d vs)) = putWord8 10 >> put (d,vs)
+
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM R get
+ 1 -> liftM S get
+ 2 -> liftM FV get
+ 3 -> liftM2 P get get
+ 4 -> liftM2 W get get
+ 5 -> liftM C get
+ 6 -> liftM TM get
+ 7 -> liftM F get
+ 8 -> liftM V get
+ 9 -> liftM (K . KS) get
+ 10 -> liftM2 (\d vs -> K (KP d vs)) get get
+ _ -> decodingError
+
+instance Binary Expr where
+ put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
+ put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
+ put (ELit (LStr s)) = putWord8 2 >> put s
+ put (ELit (LFlt d)) = putWord8 3 >> put d
+ put (ELit (LInt i)) = putWord8 4 >> put i
+ put (EMeta i) = putWord8 5 >> put i
+ put (EFun f) = putWord8 6 >> put f
+ put (EVar i) = putWord8 7 >> put i
+ put (ETyped e ty) = putWord8 8 >> put (e,ty)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM3 EAbs get get get
+ 1 -> liftM2 EApp get get
+ 2 -> liftM (ELit . LStr) get
+ 3 -> liftM (ELit . LFlt) get
+ 4 -> liftM (ELit . LInt) get
+ 5 -> liftM EMeta get
+ 6 -> liftM EFun get
+ 7 -> liftM EVar get
+ 8 -> liftM2 ETyped get get
+ _ -> decodingError
+
+instance Binary Patt where
+ put (PApp f ps) = putWord8 0 >> put (f,ps)
+ put (PVar x) = putWord8 1 >> put x
+ put PWild = putWord8 2
+ put (PLit (LStr s)) = putWord8 3 >> put s
+ put (PLit (LFlt d)) = putWord8 4 >> put d
+ put (PLit (LInt i)) = putWord8 5 >> put i
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM2 PApp get get
+ 1 -> liftM PVar get
+ 2 -> return PWild
+ 3 -> liftM (PLit . LStr) get
+ 4 -> liftM (PLit . LFlt) get
+ 5 -> liftM (PLit . LInt) get
+ _ -> decodingError
+
+instance Binary Equation where
+ put (Equ ps e) = put (ps,e)
+ get = liftM2 Equ get get
+
+instance Binary Type where
+ put (DTyp hypos cat exps) = put (hypos,cat,exps)
+ get = liftM3 DTyp get get get
+
+instance Binary BindType where
+ put Explicit = putWord8 0
+ put Implicit = putWord8 1
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return Explicit
+ 1 -> return Implicit
+ _ -> decodingError
+
+instance Binary FFun where
+ put (FFun fun prof lins) = put (fun,prof,lins)
+ get = liftM3 FFun get get get
+
+instance Binary FSymbol where
+ put (FSymCat n l) = putWord8 0 >> put (n,l)
+ put (FSymLit n l) = putWord8 1 >> put (n,l)
+ put (FSymKS ts) = putWord8 2 >> put ts
+ put (FSymKP d vs) = putWord8 3 >> put (d,vs)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM2 FSymCat get get
+ 1 -> liftM2 FSymLit get get
+ 2 -> liftM FSymKS get
+ 3 -> liftM2 (\d vs -> FSymKP d vs) get get
+ _ -> decodingError
+
+instance Binary Production where
+ put (FApply ruleid args) = putWord8 0 >> put (ruleid,args)
+ put (FCoerce fcat) = putWord8 1 >> put fcat
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM2 FApply get get
+ 1 -> liftM FCoerce get
+ _ -> decodingError
+
+instance Binary ParserInfo where
+ put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
+ get = do functions <- get
+ sequences <- get
+ productions0<- get
+ totalCats <- get
+ startCats <- get
+ return (ParserInfo{functions=functions,sequences=sequences
+ ,productions0=productions0
+ ,productions =filterProductions productions0
+ ,totalCats=totalCats,startCats=startCats})
+
+decodingError = fail "This PGF file was compiled with different version of GF"
diff --git a/src/runtime/haskell/PGF/BuildParser.hs b/src/runtime/haskell/PGF/BuildParser.hs
new file mode 100644
index 000000000..23e0725c6
--- /dev/null
+++ b/src/runtime/haskell/PGF/BuildParser.hs
@@ -0,0 +1,76 @@
+---------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- FCFG parsing, parser information
+-----------------------------------------------------------------------------
+
+module PGF.BuildParser where
+
+import GF.Data.SortedList
+import GF.Data.Assoc
+import PGF.CId
+import PGF.Data
+import PGF.Parsing.FCFG.Utilities
+
+import Data.Array.IArray
+import Data.Maybe
+import qualified Data.IntMap as IntMap
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Debug.Trace
+
+
+data ParserInfoEx
+ = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
+ , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
+ , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
+ , grammarToks :: [String]
+ }
+
+------------------------------------------------------------
+-- parser information
+
+getLeftCornerTok pinfo (FFun _ _ lins)
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymKS [tok] -> [tok]
+ _ -> []
+ | otherwise = []
+ where
+ syms = (sequences pinfo) ! (lins ! 0)
+
+getLeftCornerCat pinfo args (FFun _ _ lins)
+ | inRange (bounds syms) 0 = case syms ! 0 of
+ FSymCat d _ -> let cat = args !! d
+ in case IntMap.lookup cat (productions pinfo) of
+ Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
+ Nothing -> [cat]
+ _ -> []
+ | otherwise = []
+ where
+ syms = (sequences pinfo) ! (lins ! 0)
+
+buildParserInfo :: ParserInfo -> ParserInfoEx
+buildParserInfo pinfo =
+ ParserInfoEx { epsilonRules = epsilonrules
+ , leftcornerCats = leftcorncats
+ , leftcornerTokens = leftcorntoks
+ , grammarToks = grammartoks
+ }
+
+ where epsilonrules = [ (ruleid,args,cat)
+ | (cat,set) <- IntMap.toList (productions pinfo)
+ , (FApply ruleid args) <- Set.toList set
+ , let (FFun _ _ lins) = (functions pinfo) ! ruleid
+ , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
+ leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
+ | (cat,set) <- IntMap.toList (productions pinfo)
+ , (FApply ruleid args) <- Set.toList set
+ , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
+ leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
+ | (cat,set) <- IntMap.toList (productions pinfo)
+ , (FApply ruleid args) <- Set.toList set
+ , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
+ grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]
diff --git a/src/runtime/haskell/PGF/CId.hs b/src/runtime/haskell/PGF/CId.hs
new file mode 100644
index 000000000..fea304d9d
--- /dev/null
+++ b/src/runtime/haskell/PGF/CId.hs
@@ -0,0 +1,55 @@
+module PGF.CId (CId(..),
+ mkCId, wildCId,
+ readCId, showCId,
+
+ -- utils
+ pCId, pIdent, ppCId) where
+
+import Control.Monad
+import qualified Data.ByteString.Char8 as BS
+import Data.Char
+import qualified Text.ParserCombinators.ReadP as RP
+import qualified Text.PrettyPrint as PP
+
+
+-- | An abstract data type that represents
+-- identifiers for functions and categories in PGF.
+newtype CId = CId BS.ByteString deriving (Eq,Ord)
+
+wildCId :: CId
+wildCId = CId (BS.singleton '_')
+
+-- | Creates a new identifier from 'String'
+mkCId :: String -> CId
+mkCId s = CId (BS.pack s)
+
+-- | Reads an identifier from 'String'. The function returns 'Nothing' if the string is not valid identifier.
+readCId :: String -> Maybe CId
+readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of
+ [x] -> Just x
+ _ -> Nothing
+
+-- | Renders the identifier as 'String'
+showCId :: CId -> String
+showCId (CId x) = BS.unpack x
+
+instance Show CId where
+ showsPrec _ = showString . showCId
+
+instance Read CId where
+ readsPrec _ = RP.readP_to_S pCId
+
+pCId :: RP.ReadP CId
+pCId = do s <- pIdent
+ if s == "_"
+ then RP.pfail
+ else return (mkCId s)
+
+pIdent :: RP.ReadP String
+pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
+ where
+ isIdentFirst c = c == '_' || isLetter c
+ isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
+
+ppCId :: CId -> PP.Doc
+ppCId = PP.text . showCId
diff --git a/src/runtime/haskell/PGF/Check.hs b/src/runtime/haskell/PGF/Check.hs
new file mode 100644
index 000000000..58b66cfe4
--- /dev/null
+++ b/src/runtime/haskell/PGF/Check.hs
@@ -0,0 +1,173 @@
+module PGF.Check (checkPGF) where
+
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import GF.Data.ErrM
+
+import qualified Data.Map as Map
+import Control.Monad
+import Debug.Trace
+
+checkPGF :: PGF -> Err (PGF,Bool)
+checkPGF pgf = do
+ (cs,bs) <- mapM (checkConcrete pgf)
+ (Map.assocs (concretes pgf)) >>= return . unzip
+ return (pgf {concretes = Map.fromAscList cs}, and bs)
+
+
+-- errors are non-fatal; replace with 'fail' to change this
+msg s = trace s (return ())
+
+andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool
+andMapM f xs = mapM f xs >>= return . and
+
+labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool)
+labelBoolErr ms iob = do
+ (x,b) <- iob
+ if b then return (x,b) else (msg ms >> return (x,b))
+
+
+checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
+checkConcrete pgf (lang,cnc) =
+ labelBoolErr ("happened in language " ++ showCId lang) $ do
+ (rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip
+ return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
+ where
+ checkl = checkLin pgf lang
+
+checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
+checkLin pgf lang (f,t) =
+ labelBoolErr ("happened in function " ++ showCId f) $ do
+ (t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
+ return ((f,t'),b)
+
+inferTerm :: [CType] -> Term -> Err (Term,CType)
+inferTerm args trm = case trm of
+ K _ -> returnt str
+ C i -> returnt $ ints i
+ V i -> do
+ testErr (i < length args) ("too large index " ++ show i)
+ returnt $ args !! i
+ S ts -> do
+ (ts',tys) <- mapM infer ts >>= return . unzip
+ let tys' = filter (/=str) tys
+ testErr (null tys')
+ ("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys'))
+ return (S ts',str)
+ R ts -> do
+ (ts',tys) <- mapM infer ts >>= return . unzip
+ return $ (R ts',tuple tys)
+ P t u -> do
+ (t',tt) <- infer t
+ (u',tu) <- infer u
+ case tt of
+ R tys -> case tu of
+ R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]]
+ --- R [v] -> infer $ P t v
+ --- R (v:vs) -> infer $ P (head tys) (R vs)
+
+ C i -> do
+ testErr (i < length tys)
+ ("required more than " ++ show i ++ " fields in " ++ show (R tys))
+ return (P t' u', tys !! i) -- record: index must be known
+ _ -> do
+ let typ = head tys
+ testErr (all (==typ) tys) ("different types in table " ++ show trm)
+ return (P t' u', typ) -- table: types must be same
+ _ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt
+ FV [] -> returnt tm0 ----
+ FV (t:ts) -> do
+ (t',ty) <- infer t
+ (ts',tys) <- mapM infer ts >>= return . unzip
+ testErr (all (eqType True ty) tys) ("different types in variants " ++ show trm)
+ return (FV (t':ts'),ty)
+ W s r -> infer r
+ _ -> Bad ("no type inference for " ++ show trm)
+ where
+ returnt ty = return (trm,ty)
+ infer = inferTerm args
+
+checkTerm :: LinType -> Term -> Err (Term,Bool)
+checkTerm (args,val) trm = case inferTerm args trm of
+ Ok (t,ty) -> if eqType False ty val
+ then return (t,True)
+ else do
+ msg ("term: " ++ show trm ++
+ "\nexpected type: " ++ show val ++
+ "\ninferred type: " ++ show ty)
+ return (t,False)
+ Bad s -> do
+ msg s
+ return (trm,False)
+
+-- symmetry in (Ints m == Ints n) is all we can use in variants
+
+eqType :: Bool -> CType -> CType -> Bool
+eqType symm inf exp = case (inf,exp) of
+ (C k, C n) -> if symm then True else k <= n -- only run-time corr.
+ (R rs,R ts) -> length rs == length ts && and [eqType symm r t | (r,t) <- zip rs ts]
+ (TM _, _) -> True ---- for variants [] ; not safe
+ _ -> inf == exp
+
+-- should be in a generic module, but not in the run-time DataGFCC
+
+type CType = Term
+type LinType = ([CType],CType)
+
+tuple :: [CType] -> CType
+tuple = R
+
+ints :: Int -> CType
+ints = C
+
+str :: CType
+str = S []
+
+lintype :: PGF -> CId -> CId -> LinType
+lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
+ (cs,c) -> (map vlinc cs, linc c) ---- HOAS
+ where
+ linc = lookLincat pgf lang
+ vlinc (0,c) = linc c
+ vlinc (i,c) = case linc c of
+ R ts -> R (ts ++ replicate i str)
+
+inline :: PGF -> CId -> Term -> Term
+inline pgf lang t = case t of
+ F c -> inl $ look c
+ _ -> composSafeOp inl t
+ where
+ inl = inline pgf lang
+ look = lookLin pgf lang
+
+composOp :: Monad m => (Term -> m Term) -> Term -> m Term
+composOp f trm = case trm of
+ R ts -> liftM R $ mapM f ts
+ S ts -> liftM S $ mapM f ts
+ FV ts -> liftM FV $ mapM f ts
+ P t u -> liftM2 P (f t) (f u)
+ W s t -> liftM (W s) $ f t
+ _ -> return trm
+
+composSafeOp :: (Term -> Term) -> Term -> Term
+composSafeOp f = maybe undefined id . composOp (return . f)
+
+-- from GF.Data.Oper
+
+maybeErr :: String -> Maybe a -> Err a
+maybeErr s = maybe (Bad s) Ok
+
+testErr :: Bool -> String -> Err ()
+testErr cond msg = if cond then return () else Bad msg
+
+errVal :: a -> Err a -> a
+errVal a = err (const a) id
+
+errIn :: String -> Err a -> Err a
+errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return
+
+err :: (String -> b) -> (a -> b) -> Err a -> b
+err d f e = case e of
+ Ok a -> f a
+ Bad s -> d s
diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs
new file mode 100644
index 000000000..38027e96e
--- /dev/null
+++ b/src/runtime/haskell/PGF/Data.hs
@@ -0,0 +1,95 @@
+module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where
+
+import PGF.CId
+import PGF.Expr hiding (Value, Env, Tree)
+import PGF.Type
+import PGF.PMCFG
+
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import Data.List
+
+-- internal datatypes for PGF
+
+-- | An abstract data type representing multilingual grammar
+-- in Portable Grammar Format.
+data PGF = PGF {
+ absname :: CId ,
+ cncnames :: [CId] ,
+ gflags :: Map.Map CId String, -- value of a global flag
+ abstract :: Abstr ,
+ concretes :: Map.Map CId Concr
+ }
+
+data Abstr = Abstr {
+ aflags :: Map.Map CId String, -- value of a flag
+ funs :: Map.Map CId (Type,Int,[Equation]), -- type, arrity and definition of function
+ cats :: Map.Map CId [Hypo], -- context of a cat
+ catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
+ }
+
+data Concr = Concr {
+ cflags :: Map.Map CId String, -- value of a flag
+ lins :: Map.Map CId Term, -- lin of a fun
+ opers :: Map.Map CId Term, -- oper generated by subex elim
+ lincats :: Map.Map CId Term, -- lin type of a cat
+ lindefs :: Map.Map CId Term, -- lin default of a cat
+ printnames :: Map.Map CId Term, -- printname of a cat or a fun
+ paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
+ parser :: Maybe ParserInfo -- parser
+ }
+
+data Term =
+ R [Term]
+ | P Term Term
+ | S [Term]
+ | K Tokn
+ | V Int
+ | C Int
+ | F CId
+ | FV [Term]
+ | W String Term
+ | TM String
+ deriving (Eq,Ord,Show)
+
+data Tokn =
+ KS String
+ | KP [String] [Alternative]
+ deriving (Eq,Ord,Show)
+
+
+-- merge two GFCCs; fails is differens absnames; priority to second arg
+
+unionPGF :: PGF -> PGF -> PGF
+unionPGF one two = case absname one of
+ n | n == wildCId -> two -- extending empty grammar
+ | n == absname two -> one { -- extending grammar with same abstract
+ concretes = Map.union (concretes two) (concretes one),
+ cncnames = union (cncnames one) (cncnames two)
+ }
+ _ -> one -- abstracts don't match ---- print error msg
+
+emptyPGF :: PGF
+emptyPGF = PGF {
+ absname = wildCId,
+ cncnames = [] ,
+ gflags = Map.empty,
+ abstract = error "empty grammar, no abstract",
+ concretes = Map.empty
+ }
+
+-- | This is just a 'CId' with the language name.
+-- A language name is the identifier that you write in the
+-- top concrete or abstract module in GF after the
+-- concrete/abstract keyword. Example:
+--
+-- > abstract Lang = ...
+-- > concrete LangEng of Lang = ...
+type Language = CId
+
+readLanguage :: String -> Maybe Language
+readLanguage = readCId
+
+showLanguage :: Language -> String
+showLanguage = showCId
diff --git a/src/runtime/haskell/PGF/Editor.hs b/src/runtime/haskell/PGF/Editor.hs
new file mode 100644
index 000000000..3f69da170
--- /dev/null
+++ b/src/runtime/haskell/PGF/Editor.hs
@@ -0,0 +1,241 @@
+module PGF.Editor (
+ State, -- datatype -- type-annotated possibly open tree with a focus
+ Dict, -- datatype -- abstract syntax information optimized for editing
+ Position, -- datatype -- path from top to focus
+ new, -- :: Type -> State -- create new State
+ refine, -- :: Dict -> CId -> State -> State -- refine focus with CId
+ replace, -- :: Dict -> Tree -> State -> State -- replace focus with Tree
+ delete, -- :: State -> State -- replace focus with ?
+ goNextMeta, -- :: State -> State -- move focus to next ? node
+ goNext, -- :: State -> State -- move to next node
+ goTop, -- :: State -> State -- move focus to the top (=root)
+ goPosition, -- :: Position -> State -> State -- move focus to given position
+ mkPosition, -- :: [Int] -> Position -- list of choices (top = [])
+ showPosition,-- :: Position -> [Int] -- readable position
+ focusType, -- :: State -> Type -- get the type of focus
+ stateTree, -- :: State -> Tree -- get the current tree
+ isMetaFocus, -- :: State -> Bool -- whether focus is ?
+ allMetas, -- :: State -> [(Position,Type)] -- all ?s and their positions
+ prState, -- :: State -> String -- print state, focus marked *
+ refineMenu, -- :: Dict -> State -> [CId] -- get refinement menu
+ pgf2dict -- :: PGF -> Dict -- create editing Dict from PGF
+ ) where
+
+import PGF.Data
+import PGF.CId
+import qualified Data.Map as M
+import Debug.Trace ----
+
+-- API
+
+new :: Type -> State
+new (DTyp _ t _) = etree2state (uETree t)
+
+refine :: Dict -> CId -> State -> State
+refine dict f = replaceInState (mkRefinement dict f)
+
+replace :: Dict -> Tree -> State -> State
+replace dict t = replaceInState (tree2etree dict t)
+
+delete :: State -> State
+delete s = replaceInState (uETree (typ (tree s))) s
+
+goNextMeta :: State -> State
+goNextMeta s =
+ if isComplete s then s
+ else let s1 = goNext s in if isMetaFocus s1
+ then s1 else goNextMeta s1
+
+isComplete :: State -> Bool
+isComplete s = isc (tree s) where
+ isc t = case atom t of
+ AMeta _ -> False
+ ACon _ -> all isc (children t)
+
+goTop :: State -> State
+goTop = navigate (const top)
+
+goPosition :: [Int] -> State -> State
+goPosition p s = s{position = p}
+
+mkPosition :: [Int] -> Position
+mkPosition = id
+
+refineMenu :: Dict -> State -> [CId]
+refineMenu dict s = maybe [] (map fst) $ M.lookup (focusBType s) (refines dict)
+
+focusType :: State -> Type
+focusType s = btype2type (focusBType s)
+
+stateTree :: State -> Tree
+stateTree = etree2tree . tree
+
+pgf2dict :: PGF -> Dict
+pgf2dict pgf = Dict (M.fromAscList fus) refs where
+ fus = [(f,mkFType ty) | (f,(ty,_)) <- M.toList (funs abs)]
+ refs = M.fromAscList [(c, fusTo c) | (c,_) <- M.toList (cats abs)]
+ fusTo c = [(f,ty) | (f,ty@(_,k)) <- fus, k==c] ---- quadratic
+ mkFType (DTyp hyps c _) = ([k | Hyp _ (DTyp _ k _) <- hyps],c) ----dep types
+ abs = abstract pgf
+
+etree2tree :: ETree -> Tree
+etree2tree t = case atom t of
+ ACon f -> Fun f (map etree2tree (children t))
+ AMeta i -> Meta i
+
+tree2etree :: Dict -> Tree -> ETree
+tree2etree dict t = case t of
+ Fun f _ -> annot (look f) t
+ where
+ annot (tys,ty) tr = case tr of
+ Fun f trs -> ETree (ACon f) ty [annt t tr | (t,tr) <- zip tys trs]
+ Meta i -> ETree (AMeta i) ty []
+ annt ty tr = case tr of
+ Fun _ _ -> tree2etree dict tr
+ Meta _ -> annot ([],ty) tr
+ look f = maybe undefined id $ M.lookup f (functs dict)
+
+prState :: State -> String
+prState s = unlines [replicate i ' ' ++ f | (i,f) <- pr [] (tree s)] where
+ pr i t =
+ (ind i,prAtom i (atom t)) : concat [pr (sub j i) c | (j,c) <- zip [0..] (children t)]
+ prAtom i a = prFocus i ++ case a of
+ ACon f -> prCId f
+ AMeta i -> "?" ++ show i
+ prFocus i = if i == position s then "*" else ""
+ ind i = 2 * length i
+ sub j i = i ++ [j]
+
+showPosition :: Position -> [Int]
+showPosition = id
+
+allMetas :: State -> [(Position,Type)]
+allMetas s = [(reverse p, btype2type ty) | (p,ty) <- metas [] (tree s)] where
+ metas p t =
+ (if isMetaAtom (atom t) then [(p,typ t)] else []) ++
+ concat [metas (i:p) u | (i,u) <- zip [0..] (children t)]
+
+---- Trees and navigation
+
+data ETree = ETree {
+ atom :: Atom,
+ typ :: BType,
+ children :: [ETree]
+ }
+ deriving Show
+
+data Atom =
+ ACon CId
+ | AMeta Int
+ deriving Show
+
+btype2type :: BType -> Type
+btype2type t = DTyp [] t []
+
+uETree :: BType -> ETree
+uETree ty = ETree (AMeta 0) ty []
+
+data State = State {
+ position :: Position,
+ tree :: ETree
+ }
+ deriving Show
+
+type Position = [Int]
+
+top :: Position
+top = []
+
+up :: Position -> Position
+up p = case p of
+ _:_ -> init p
+ _ -> p
+
+down :: Position -> Position
+down = (++[0])
+
+left :: Position -> Position
+left p = case p of
+ _:_ | last p > 0 -> init p ++ [last p - 1]
+ _ -> top
+
+right :: Position -> Position
+right p = case p of
+ _:_ -> init p ++ [last p + 1]
+ _ -> top
+
+etree2state :: ETree -> State
+etree2state = State top
+
+doInState :: (ETree -> ETree) -> State -> State
+doInState f s = s{tree = change (position s) (tree s)} where
+ change p t = case p of
+ [] -> f t
+ n:ns -> let (ts1,t0:ts2) = splitAt n (children t) in
+ t{children = ts1 ++ [change ns t0] ++ ts2}
+
+subtree :: Position -> ETree -> ETree
+subtree p t = case p of
+ [] -> t
+ n:ns -> subtree ns (children t !! n)
+
+focus :: State -> ETree
+focus s = subtree (position s) (tree s)
+
+focusBType :: State -> BType
+focusBType s = typ (focus s)
+
+navigate :: (Position -> Position) -> State -> State
+navigate p s = s{position = p (position s)}
+
+-- p is a fix-point aspect of state change
+untilFix :: Eq a => (State -> a) -> (State -> Bool) -> (State -> State) -> State -> State
+untilFix p b f s =
+ if b s
+ then s
+ else let fs = f s in if p fs == p s
+ then s
+ else untilFix p b f fs
+
+untilPosition :: (State -> Bool) -> (State -> State) -> State -> State
+untilPosition = untilFix position
+
+goNext :: State -> State
+goNext s = case focus s of
+ st | not (null (children st)) -> navigate down s
+ _ -> findSister s
+ where
+ findSister s = case s of
+ s' | null (position s') -> s'
+ s' | hasYoungerSisters s' -> navigate right s'
+ s' -> findSister (navigate up s')
+ hasYoungerSisters s = case position s of
+ p@(_:_) -> length (children (focus (navigate up s))) > last p + 1
+ _ -> False
+
+isMetaFocus :: State -> Bool
+isMetaFocus s = isMetaAtom (atom (focus s))
+
+isMetaAtom :: Atom -> Bool
+isMetaAtom a = case a of
+ AMeta _ -> True
+ _ -> False
+
+replaceInState :: ETree -> State -> State
+replaceInState t = doInState (const t)
+
+
+-------
+
+type BType = CId ----dep types
+type FType = ([BType],BType) ----dep types
+
+data Dict = Dict {
+ functs :: M.Map CId FType,
+ refines :: M.Map BType [(CId,FType)]
+ }
+
+mkRefinement :: Dict -> CId -> ETree
+mkRefinement dict f = ETree (ACon f) val (map uETree args) where
+ (args,val) = maybe undefined id $ M.lookup f (functs dict)
+
diff --git a/src/runtime/haskell/PGF/Expr.hs b/src/runtime/haskell/PGF/Expr.hs
new file mode 100644
index 000000000..cf0cb79aa
--- /dev/null
+++ b/src/runtime/haskell/PGF/Expr.hs
@@ -0,0 +1,355 @@
+module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..),
+ readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt,
+
+ mkApp, unApp,
+ mkStr, unStr,
+ mkInt, unInt,
+ mkDouble, unDouble,
+ mkMeta, isMeta,
+
+ normalForm,
+
+ -- needed in the typechecker
+ Value(..), Env, Funs, eval, apply,
+
+ MetaId,
+
+ -- helpers
+ pMeta,pStr,pArg,pLit,freshName,ppMeta,ppLit,ppParens
+ ) where
+
+import PGF.CId
+import PGF.Type
+
+import Data.Char
+import Data.Maybe
+import Data.List as List
+import Data.Map as Map hiding (showTree)
+import Control.Monad
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+
+data Literal =
+ LStr String -- ^ string constant
+ | LInt Integer -- ^ integer constant
+ | LFlt Double -- ^ floating point constant
+ deriving (Eq,Ord,Show)
+
+type MetaId = Int
+
+data BindType =
+ Explicit
+ | Implicit
+ deriving (Eq,Ord,Show)
+
+-- | Tree is the abstract syntax representation of a given sentence
+-- in some concrete syntax. Technically 'Tree' is a type synonym
+-- of 'Expr'.
+type Tree = Expr
+
+-- | An expression in the abstract syntax of the grammar. It could be
+-- both parameter of a dependent type or an abstract syntax tree for
+-- for some sentence.
+data Expr =
+ EAbs BindType CId Expr -- ^ lambda abstraction
+ | EApp Expr Expr -- ^ application
+ | ELit Literal -- ^ literal
+ | EMeta {-# UNPACK #-} !MetaId -- ^ meta variable
+ | EFun CId -- ^ function or data constructor
+ | EVar {-# UNPACK #-} !Int -- ^ variable with de Bruijn index
+ | ETyped Expr Type -- ^ local type signature
+ | EImplArg Expr -- ^ implicit argument in expression
+ deriving (Eq,Ord,Show)
+
+-- | The pattern is used to define equations in the abstract syntax of the grammar.
+data Patt =
+ PApp CId [Patt] -- ^ application. The identifier should be constructor i.e. defined with 'data'
+ | PLit Literal -- ^ literal
+ | PVar CId -- ^ variable
+ | PWild -- ^ wildcard
+ | PImplArg Patt -- ^ implicit argument in pattern
+ deriving (Eq,Ord)
+
+-- | The equation is used to define lambda function as a sequence
+-- of equations with pattern matching. The list of 'Expr' represents
+-- the patterns and the second 'Expr' is the function body for this
+-- equation.
+data Equation =
+ Equ [Patt] Expr
+ deriving (Eq,Ord)
+
+-- | parses 'String' as an expression
+readExpr :: String -> Maybe Expr
+readExpr s = case [x | (x,cs) <- RP.readP_to_S pExpr s, all isSpace cs] of
+ [x] -> Just x
+ _ -> Nothing
+
+-- | renders expression as 'String'. The list
+-- of identifiers is the list of all free variables
+-- in the expression in order reverse to the order
+-- of binding.
+showExpr :: [CId] -> Expr -> String
+showExpr vars = PP.render . ppExpr 0 vars
+
+instance Read Expr where
+ readsPrec _ = RP.readP_to_S pExpr
+
+-- | Constructs an expression by applying a function to a list of expressions
+mkApp :: CId -> [Expr] -> Expr
+mkApp f es = foldl EApp (EFun f) es
+
+-- | Decomposes an expression into application of function
+unApp :: Expr -> Maybe (CId,[Expr])
+unApp = extract []
+ where
+ extract es (EFun f) = Just (f,es)
+ extract es (EApp e1 e2) = extract (e2:es) e1
+ extract es _ = Nothing
+
+-- | Constructs an expression from string literal
+mkStr :: String -> Expr
+mkStr s = ELit (LStr s)
+
+-- | Decomposes an expression into string literal
+unStr :: Expr -> Maybe String
+unStr (ELit (LStr s)) = Just s
+unStr _ = Nothing
+
+-- | Constructs an expression from integer literal
+mkInt :: Integer -> Expr
+mkInt i = ELit (LInt i)
+
+-- | Decomposes an expression into integer literal
+unInt :: Expr -> Maybe Integer
+unInt (ELit (LInt i)) = Just i
+unInt _ = Nothing
+
+-- | Constructs an expression from real number literal
+mkDouble :: Double -> Expr
+mkDouble f = ELit (LFlt f)
+
+-- | Decomposes an expression into real number literal
+unDouble :: Expr -> Maybe Double
+unDouble (ELit (LFlt f)) = Just f
+unDouble _ = Nothing
+
+-- | Constructs an expression which is meta variable
+mkMeta :: Expr
+mkMeta = EMeta 0
+
+-- | Checks whether an expression is a meta variable
+isMeta :: Expr -> Bool
+isMeta (EMeta _) = True
+isMeta _ = False
+
+-----------------------------------------------------
+-- Parsing
+-----------------------------------------------------
+
+pExpr :: RP.ReadP Expr
+pExpr = RP.skipSpaces >> (pAbs RP.<++ pTerm)
+ where
+ pTerm = do f <- pFactor
+ RP.skipSpaces
+ as <- RP.sepBy pArg RP.skipSpaces
+ return (foldl EApp f as)
+
+ pAbs = do xs <- RP.between (RP.char '\\') (RP.skipSpaces >> RP.string "->") pBinds
+ e <- pExpr
+ return (foldr (\(b,x) e -> EAbs b x e) e xs)
+
+pBinds :: RP.ReadP [(BindType,CId)]
+pBinds = do xss <- RP.sepBy1 (RP.skipSpaces >> pBind) (RP.skipSpaces >> RP.char ',')
+ return (concat xss)
+ where
+ pCIdOrWild = pCId `mplus` (RP.char '_' >> return wildCId)
+
+ pBind =
+ do x <- pCIdOrWild
+ return [(Explicit,x)]
+ `mplus`
+ RP.between (RP.char '{')
+ (RP.skipSpaces >> RP.char '}')
+ (RP.sepBy1 (RP.skipSpaces >> pCIdOrWild >>= \id -> return (Implicit,id)) (RP.skipSpaces >> RP.char ','))
+
+pArg = fmap EImplArg (RP.between (RP.char '{') (RP.char '}') pExpr)
+ RP.<++
+ pFactor
+
+pFactor = fmap EFun pCId
+ RP.<++ fmap ELit pLit
+ RP.<++ fmap EMeta pMeta
+ RP.<++ RP.between (RP.char '(') (RP.char ')') pExpr
+ RP.<++ RP.between (RP.char '<') (RP.char '>') pTyped
+
+pTyped = do RP.skipSpaces
+ e <- pExpr
+ RP.skipSpaces
+ RP.char ':'
+ RP.skipSpaces
+ ty <- pType
+ return (ETyped e ty)
+
+pMeta = do RP.char '?'
+ return 0
+
+pLit :: RP.ReadP Literal
+pLit = pNum RP.<++ liftM LStr pStr
+
+pNum = do x <- RP.munch1 isDigit
+ ((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (LFlt (read (x++"."++y))))
+ RP.<++
+ (return (LInt (read x))))
+
+pStr = RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"'))
+ where
+ pEsc = RP.char '\\' >> RP.get
+
+
+-----------------------------------------------------
+-- Printing
+-----------------------------------------------------
+
+ppExpr :: Int -> [CId] -> Expr -> PP.Doc
+ppExpr d scope (EAbs b x e) = let (bs,xs,e1) = getVars [] [] (EAbs b x e)
+ in ppParens (d > 1) (PP.char '\\' PP.<>
+ PP.hsep (PP.punctuate PP.comma (reverse (List.zipWith ppBind bs xs))) PP.<+>
+ PP.text "->" PP.<+>
+ ppExpr 1 (xs++scope) e1)
+ where
+ getVars bs xs (EAbs b x e) = getVars (b:bs) ((freshName x xs):xs) e
+ getVars bs xs e = (bs,xs,e)
+ppExpr d scope (EApp e1 e2) = ppParens (d > 3) ((ppExpr 3 scope e1) PP.<+> (ppExpr 4 scope e2))
+ppExpr d scope (ELit l) = ppLit l
+ppExpr d scope (EMeta n) = ppMeta n
+ppExpr d scope (EFun f) = ppCId f
+ppExpr d scope (EVar i) = ppCId (scope !! i)
+ppExpr d scope (ETyped e ty)= PP.char '<' PP.<> ppExpr 0 scope e PP.<+> PP.colon PP.<+> ppType 0 scope ty PP.<> PP.char '>'
+ppExpr d scope (EImplArg e) = PP.braces (ppExpr 0 scope e)
+
+ppPatt :: Int -> [CId] -> Patt -> ([CId],PP.Doc)
+ppPatt d scope (PApp f ps) = let (scope',ds) = mapAccumL (ppPatt 2) scope ps
+ in (scope',ppParens (not (List.null ps) && d > 1) (ppCId f PP.<+> PP.hsep ds))
+ppPatt d scope (PLit l) = (scope,ppLit l)
+ppPatt d scope (PVar f) = (f:scope,ppCId f)
+ppPatt d scope PWild = (scope,PP.char '_')
+ppPatt d scope (PImplArg p) = let (scope',d) = ppPatt 0 scope p
+ in (scope',PP.braces d)
+
+ppBind Explicit x = ppCId x
+ppBind Implicit x = PP.braces (ppCId x)
+
+ppLit (LStr s) = PP.text (show s)
+ppLit (LInt n) = PP.integer n
+ppLit (LFlt d) = PP.double d
+
+ppMeta :: MetaId -> PP.Doc
+ppMeta n
+ | n == 0 = PP.char '?'
+ | otherwise = PP.char '?' PP.<> PP.int n
+
+ppParens True = PP.parens
+ppParens False = id
+
+freshName :: CId -> [CId] -> CId
+freshName x xs0 = loop 1 x
+ where
+ xs = wildCId : xs0
+
+ loop i y
+ | elem y xs = loop (i+1) (mkCId (show x++show i))
+ | otherwise = y
+
+
+-----------------------------------------------------
+-- Computation
+-----------------------------------------------------
+
+-- | Compute an expression to normal form
+normalForm :: Funs -> Int -> Env -> Expr -> Expr
+normalForm funs k env e = value2expr k (eval funs env e)
+ where
+ value2expr i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr i) vs)
+ value2expr i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr i) vs)
+ value2expr i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr i) vs)
+ value2expr i (VSusp j env vs k) = value2expr i (k (VGen j vs))
+ value2expr i (VLit l) = ELit l
+ value2expr i (VClosure env (EAbs b x e)) = EAbs b x (value2expr (i+1) (eval funs ((VGen i []):env) e))
+ value2expr i (VImplArg v) = EImplArg (value2expr i v)
+
+data Value
+ = VApp CId [Value]
+ | VLit Literal
+ | VMeta {-# UNPACK #-} !MetaId Env [Value]
+ | VSusp {-# UNPACK #-} !MetaId Env [Value] (Value -> Value)
+ | VGen {-# UNPACK #-} !Int [Value]
+ | VClosure Env Expr
+ | VImplArg Value
+
+type Funs = Map.Map CId (Type,Int,[Equation]) -- type and def of a fun
+type Env = [Value]
+
+eval :: Funs -> Env -> Expr -> Value
+eval funs env (EVar i) = env !! i
+eval funs env (EFun f) = case Map.lookup f funs of
+ Just (_,a,eqs) -> if a == 0
+ then case eqs of
+ Equ [] e : _ -> eval funs [] e
+ _ -> VApp f []
+ else VApp f []
+ Nothing -> error ("unknown function "++showCId f)
+eval funs env (EApp e1 e2) = apply funs env e1 [eval funs env e2]
+eval funs env (EAbs b x e) = VClosure env (EAbs b x e)
+eval funs env (EMeta i) = VMeta i env []
+eval funs env (ELit l) = VLit l
+eval funs env (ETyped e _) = eval funs env e
+eval funs env (EImplArg e) = VImplArg (eval funs env e)
+
+apply :: Funs -> Env -> Expr -> [Value] -> Value
+apply funs env e [] = eval funs env e
+apply funs env (EVar i) vs = applyValue funs (env !! i) vs
+apply funs env (EFun f) vs = case Map.lookup f funs of
+ Just (_,a,eqs) -> if a <= length vs
+ then let (as,vs') = splitAt a vs
+ in match funs f eqs as vs'
+ else VApp f vs
+ Nothing -> error ("unknown function "++showCId f)
+apply funs env (EApp e1 e2) vs = apply funs env e1 (eval funs env e2 : vs)
+apply funs env (EAbs _ x e) (v:vs) = apply funs (v:env) e vs
+apply funs env (EMeta i) vs = VMeta i env vs
+apply funs env (ELit l) vs = error "literal of function type"
+apply funs env (ETyped e _) vs = apply funs env e vs
+apply funs env (EImplArg _) vs = error "implicit argument in function position"
+
+applyValue funs v [] = v
+applyValue funs (VApp f vs0) vs = apply funs [] (EFun f) (vs0++vs)
+applyValue funs (VLit _) vs = error "literal of function type"
+applyValue funs (VMeta i env vs0) vs = VMeta i env (vs0++vs)
+applyValue funs (VGen i vs0) vs = VGen i (vs0++vs)
+applyValue funs (VSusp i env vs0 k) vs = VSusp i env vs0 (\v -> applyValue funs (k v) vs)
+applyValue funs (VClosure env (EAbs b x e)) (v:vs) = apply funs (v:env) e vs
+applyValue funs (VImplArg _) vs = error "implicit argument in function position"
+
+-----------------------------------------------------
+-- Pattern matching
+-----------------------------------------------------
+
+match :: Funs -> CId -> [Equation] -> [Value] -> [Value] -> Value
+match funs f eqs as0 vs0 =
+ case eqs of
+ [] -> VApp f (as0++vs0)
+ (Equ ps res):eqs -> tryMatches eqs ps as0 res []
+ where
+ tryMatches eqs [] [] res env = apply funs env res vs0
+ tryMatches eqs (p:ps) (a:as) res env = tryMatch p a env
+ where
+ tryMatch (PVar x ) (v ) env = tryMatches eqs ps as res (v:env)
+ tryMatch (PWild ) (_ ) env = tryMatches eqs ps as res env
+ tryMatch (p ) (VMeta i envi vs ) env = VSusp i envi vs (\v -> tryMatch p v env)
+ tryMatch (p ) (VGen i vs ) env = VApp f (as0++vs0)
+ tryMatch (p ) (VSusp i envi vs k) env = VSusp i envi vs (\v -> tryMatch p (k v) env)
+ tryMatch (PApp f1 ps1) (VApp f2 vs2 ) env | f1 == f2 = tryMatches eqs (ps1++ps) (vs2++as) res env
+ tryMatch (PLit l1 ) (VLit l2 ) env | l1 == l2 = tryMatches eqs ps as res env
+ tryMatch (PImplArg p ) (VImplArg v ) env = tryMatch p v env
+ tryMatch _ _ env = match funs f eqs as0 vs0
+
diff --git a/src/runtime/haskell/PGF/Expr.hs-boot b/src/runtime/haskell/PGF/Expr.hs-boot
new file mode 100644
index 000000000..34a62a410
--- /dev/null
+++ b/src/runtime/haskell/PGF/Expr.hs-boot
@@ -0,0 +1,28 @@
+module PGF.Expr where
+
+import PGF.CId
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+
+data Expr
+
+instance Eq Expr
+instance Ord Expr
+instance Show Expr
+
+
+data BindType = Explicit | Implicit
+
+instance Eq BindType
+instance Ord BindType
+instance Show BindType
+
+
+pArg :: RP.ReadP Expr
+pBinds :: RP.ReadP [(BindType,CId)]
+
+ppExpr :: Int -> [CId] -> Expr -> PP.Doc
+
+freshName :: CId -> [CId] -> CId
+
+ppParens :: Bool -> PP.Doc -> PP.Doc
diff --git a/src/runtime/haskell/PGF/Generate.hs b/src/runtime/haskell/PGF/Generate.hs
new file mode 100644
index 000000000..5add00a78
--- /dev/null
+++ b/src/runtime/haskell/PGF/Generate.hs
@@ -0,0 +1,66 @@
+module PGF.Generate where
+
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import PGF.TypeCheck
+
+import qualified Data.Map as M
+import System.Random
+
+-- generate an infinite list of trees exhaustively
+generate :: PGF -> Type -> Maybe Int -> [Expr]
+generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of
+ Left _ -> False
+ Right _ -> True )
+ (concatMap (\i -> gener i cat) depths)
+ where
+ gener 0 c = [EFun f | (f, ([],_)) <- fns c]
+ gener i c = [
+ tr |
+ (f, (cs,_)) <- fns c,
+ let alts = map (gener (i-1)) cs,
+ ts <- combinations alts,
+ let tr = foldl EApp (EFun f) ts,
+ depth tr >= i
+ ]
+ fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c]
+ depths = maybe [0 ..] (\d -> [0..d]) dp
+
+-- generate an infinite list of trees randomly
+genRandom :: StdGen -> PGF -> Type -> [Expr]
+genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of
+ Left _ -> False
+ Right _ -> True )
+ (genTrees (randomRs (0.0, 1.0 :: Double) gen) cat)
+ where
+ timeout = 47 -- give up
+
+ genTrees ds0 cat =
+ let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds
+ (t,k) = genTree ds cat
+ in (if k>timeout then id else (t:))
+ (genTrees ds2 cat) -- else (drop k ds)
+
+ genTree rs = gett rs where
+ gett ds cid | cid == cidString = (ELit (LStr "foo"), 1)
+ gett ds cid | cid == cidInt = (ELit (LInt 12345), 1)
+ gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1)
+ gett [] _ = (ELit (LStr "TIMEOUT"), 1) ----
+ gett ds cat = case fns cat of
+ [] -> (EMeta 0,1)
+ fs -> let
+ d:ds2 = ds
+ (f,args) = getf d fs
+ (ts,k) = getts ds2 args
+ in (foldl EApp (EFun f) ts, k+1)
+ getf d fs = let lg = (length fs) in
+ fs !! (floor (d * fromIntegral lg))
+ getts ds cats = case cats of
+ c:cs -> let
+ (t, k) = gett ds c
+ (ts,ks) = getts (drop k ds) cs
+ in (t:ts, k + ks)
+ _ -> ([],0)
+
+ fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat]
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs
new file mode 100644
index 000000000..fdd4cecb5
--- /dev/null
+++ b/src/runtime/haskell/PGF/Linearize.hs
@@ -0,0 +1,166 @@
+{-# LANGUAGE ParallelListComp #-}
+module PGF.Linearize
+ (linearizes,realize,realizes,linTree, linTreeMark,linearizesMark) where
+
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import PGF.Tree
+
+import Control.Monad
+import qualified Data.Map as Map
+import Data.List
+
+import Debug.Trace
+
+-- linearization and computation of concrete PGF Terms
+
+linearizes :: PGF -> CId -> Expr -> [String]
+linearizes pgf lang = realizes . linTree pgf lang
+
+realize :: Term -> String
+realize = concat . take 1 . realizes
+
+realizes :: Term -> [String]
+realizes = map (unwords . untokn) . realizest
+
+realizest :: Term -> [[Tokn]]
+realizest trm = case trm of
+ R ts -> realizest (ts !! 0)
+ S ss -> map concat $ combinations $ map realizest ss
+ K t -> [[t]]
+ W s t -> [[KS (s ++ r)] | [KS r] <- realizest t]
+ FV ts -> concatMap realizest ts
+ TM s -> [[KS s]]
+ _ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
+
+untokn :: [Tokn] -> [String]
+untokn ts = case ts of
+ KP d _ : [] -> d
+ KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
+ KS s : ws -> s : untokn ws
+ [] -> []
+ where
+ sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
+ v:_ -> v
+ _ -> d
+
+-- Lifts all variants to the top level (except those in macros).
+liftVariants :: Term -> [Term]
+liftVariants = f
+ where
+ f (R ts) = liftM R $ mapM f ts
+ f (P t1 t2) = liftM2 P (f t1) (f t2)
+ f (S ts) = liftM S $ mapM f ts
+ f (FV ts) = ts >>= f
+ f (W s t) = liftM (W s) $ f t
+ f t = return t
+
+linTree :: PGF -> CId -> Expr -> Term
+linTree pgf lang e = lin (expr2tree e) Nothing
+ where
+ cnc = lookMap (error "no lang") lang (concretes pgf)
+
+ lin (Abs xs e ) mty = case lin e Nothing of
+ R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
+ TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
+ lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of
+ Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps]
+ in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants]
+ Nothing -> tm0
+ lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted
+ lin (Lit (LInt i)) mty = R [kks (show i)]
+ lin (Lit (LFlt d)) mty = R [kks (show d)]
+ lin (Var x) mty = case mty of
+ Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
+ Nothing -> TM (showCId x)
+ lin (Meta i) mty = case mty of
+ Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc))
+ Nothing -> TM (show i)
+
+variants :: [Term] -> Term
+variants ts = case ts of
+ [t] -> t
+ _ -> FV ts
+
+unvariants :: Term -> [Term]
+unvariants t = case t of
+ FV ts -> ts
+ _ -> [t]
+
+compute :: PGF -> CId -> [Term] -> Term -> Term
+compute pgf lang args = comp where
+ comp trm = case trm of
+ P r p -> proj (comp r) (comp p)
+ W s t -> W s (comp t)
+ R ts -> R $ map comp ts
+ V i -> idx args i -- already computed
+ F c -> comp $ look c -- not computed (if contains argvar)
+ FV ts -> FV $ map comp ts
+ S ts -> S $ filter (/= S []) $ map comp ts
+ _ -> trm
+
+ look = lookOper pgf lang
+
+ idx xs i = if i > length xs - 1
+ then trace
+ ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0
+ else xs !! i
+
+ proj r p = case (r,p) of
+ (_, FV ts) -> FV $ map (proj r) ts
+ (FV ts, _ ) -> FV $ map (\t -> proj t p) ts
+ (W s t, _) -> kks (s ++ getString (proj t p))
+ _ -> comp $ getField r (getIndex p)
+
+ getString t = case t of
+ K (KS s) -> s
+ _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
+
+ getIndex t = case t of
+ C i -> i
+ TM _ -> 0 -- default value for parameter
+ _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666
+
+ getField t i = case t of
+ R rs -> idx rs i
+ TM s -> TM s
+ _ -> error ("ERROR in grammar compiler: field from " ++ show t) t
+
+---------
+-- markup with tree positions
+
+linearizesMark :: PGF -> CId -> Expr -> [String]
+linearizesMark pgf lang = realizes . linTreeMark pgf lang
+
+linTreeMark :: PGF -> CId -> Expr -> Term
+linTreeMark pgf lang = lin [] . expr2tree
+ where
+ lin p (Abs xs e ) = case lin p e of
+ R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
+ TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
+ lin p (Fun fun es) =
+ let argVariants =
+ mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
+ in variants [mark (fun,p) $ compute pgf lang args $ look fun |
+ args <- argVariants]
+ lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
+ lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
+ lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
+ lin p (Var x) = mark p $ TM (showCId x)
+ lin p (Meta i) = mark p $ TM (show i)
+
+ look = lookLin pgf lang
+
+ mark :: Show a => a -> Term -> Term
+ mark p t = case t of
+ R ts -> R $ map (mark p) ts
+ FV ts -> R $ map (mark p) ts
+ S ts -> S $ bracket p ts
+ K s -> S $ bracket p [t]
+ W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
+ _ -> t
+ -- otherwise in normal form
+
+ bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
+ sub p i = p ++ [i]
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
new file mode 100644
index 000000000..af25de025
--- /dev/null
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -0,0 +1,154 @@
+module PGF.Macros where
+
+import PGF.CId
+import PGF.Data
+import Control.Monad
+import qualified Data.Map as Map
+import qualified Data.Array as Array
+import Data.Maybe
+import Data.List
+
+-- operations for manipulating PGF grammars and objects
+
+mapConcretes :: (Concr -> Concr) -> PGF -> PGF
+mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
+
+lookLin :: PGF -> CId -> CId -> Term
+lookLin pgf lang fun =
+ lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
+
+lookOper :: PGF -> CId -> CId -> Term
+lookOper pgf lang fun =
+ lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
+
+lookLincat :: PGF -> CId -> CId -> Term
+lookLincat pgf lang fun =
+ lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
+
+lookParamLincat :: PGF -> CId -> CId -> Term
+lookParamLincat pgf lang fun =
+ lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
+
+lookPrintName :: PGF -> CId -> CId -> Term
+lookPrintName pgf lang fun =
+ lookMap tm0 fun $ printnames $ lookMap (error "no lang") lang $ concretes pgf
+
+lookType :: PGF -> CId -> Type
+lookType pgf f =
+ case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
+ (ty,_,_) -> ty
+
+lookDef :: PGF -> CId -> [Equation]
+lookDef pgf f =
+ case lookMap (error $ "lookDef " ++ show f) f (funs (abstract pgf)) of
+ (_,a,eqs) -> eqs
+
+isData :: PGF -> CId -> Bool
+isData pgf f =
+ case Map.lookup f (funs (abstract pgf)) of
+ Just (_,_,[]) -> True -- the encoding of data constrs
+ _ -> False
+
+lookValCat :: PGF -> CId -> CId
+lookValCat pgf = valCat . lookType pgf
+
+lookParser :: PGF -> CId -> Maybe ParserInfo
+lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
+
+lookStartCat :: PGF -> CId
+lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
+ [gflags pgf, aflags (abstract pgf)]
+
+lookGlobalFlag :: PGF -> CId -> String
+lookGlobalFlag pgf f =
+ lookMap "?" f (gflags pgf)
+
+lookAbsFlag :: PGF -> CId -> String
+lookAbsFlag pgf f =
+ lookMap "?" f (aflags (abstract pgf))
+
+lookConcr :: PGF -> CId -> Concr
+lookConcr pgf cnc =
+ lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf
+
+lookConcrFlag :: PGF -> CId -> CId -> Maybe String
+lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang
+
+functionsToCat :: PGF -> CId -> [(CId,Type)]
+functionsToCat pgf cat =
+ [(f,ty) | f <- fs, Just (ty,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
+ where
+ fs = lookMap [] cat $ catfuns $ abstract pgf
+
+missingLins :: PGF -> CId -> [CId]
+missingLins pgf lang = [c | c <- fs, not (hasl c)] where
+ fs = Map.keys $ funs $ abstract pgf
+ hasl = hasLin pgf lang
+
+hasLin :: PGF -> CId -> CId -> Bool
+hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang
+
+restrictPGF :: (CId -> Bool) -> PGF -> PGF
+restrictPGF cond pgf = pgf {
+ abstract = abstr {
+ funs = restrict $ funs $ abstr,
+ cats = restrict $ cats $ abstr
+ }
+ } ---- restrict concrs also, might be needed
+ where
+ restrict = Map.filterWithKey (\c _ -> cond c)
+ abstr = abstract pgf
+
+depth :: Expr -> Int
+depth (EAbs _ _ t) = depth t
+depth (EApp e1 e2) = max (depth e1) (depth e2) + 1
+depth _ = 1
+
+cftype :: [CId] -> CId -> Type
+cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val []
+
+typeOfHypo :: Hypo -> Type
+typeOfHypo (_,_,ty) = ty
+
+catSkeleton :: Type -> ([CId],CId)
+catSkeleton ty = case ty of
+ DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val)
+
+typeSkeleton :: Type -> ([(Int,CId)],CId)
+typeSkeleton ty = case ty of
+ DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val)
+
+valCat :: Type -> CId
+valCat ty = case ty of
+ DTyp _ val _ -> val
+
+contextLength :: Type -> Int
+contextLength ty = case ty of
+ DTyp hyps _ _ -> length hyps
+
+term0 :: CId -> Term
+term0 = TM . showCId
+
+tm0 :: Term
+tm0 = TM "?"
+
+kks :: String -> Term
+kks = K . KS
+
+-- lookup with default value
+lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a
+lookMap d c m = Map.findWithDefault d c m
+
+--- from Operations
+combinations :: [[a]] -> [[a]]
+combinations t = case t of
+ [] -> [[]]
+ aa:uu -> [a:u | a <- aa, u <- combinations uu]
+
+isLiteralCat :: CId -> Bool
+isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar])
+
+cidString = mkCId "String"
+cidInt = mkCId "Int"
+cidFloat = mkCId "Float"
+cidVar = mkCId "__gfVar"
diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs
new file mode 100644
index 000000000..9eee71a97
--- /dev/null
+++ b/src/runtime/haskell/PGF/Morphology.hs
@@ -0,0 +1,26 @@
+module PGF.Morphology(Lemma,Analysis,Morpho,
+ buildMorpho,
+ lookupMorpho,fullFormLexicon) where
+
+import PGF.ShowLinearize (collectWords)
+import PGF.Data
+import PGF.CId
+
+import qualified Data.Map as Map
+import Data.List (intersperse)
+
+-- these 4 definitions depend on the datastructure used
+
+type Lemma = CId
+type Analysis = String
+
+newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
+
+buildMorpho :: PGF -> Language -> Morpho
+buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
+
+lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
+lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
+
+fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
+fullFormLexicon (Morpho mo) = Map.toList mo
diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs
new file mode 100644
index 000000000..c657e3d17
--- /dev/null
+++ b/src/runtime/haskell/PGF/PMCFG.hs
@@ -0,0 +1,119 @@
+module PGF.PMCFG where
+
+import PGF.CId
+import PGF.Expr
+
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import Data.Array.IArray
+import Data.Array.Unboxed
+import Text.PrettyPrint
+
+type FCat = Int
+type FIndex = Int
+type FPointPos = Int
+data FSymbol
+ = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
+ | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
+ | FSymKS [String]
+ | FSymKP [String] [Alternative]
+ deriving (Eq,Ord,Show)
+type Profile = [Int]
+data Production
+ = FApply {-# UNPACK #-} !FunId [FCat]
+ | FCoerce {-# UNPACK #-} !FCat
+ | FConst Expr [String]
+ deriving (Eq,Ord,Show)
+data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
+type FSeq = Array FPointPos FSymbol
+type FunId = Int
+type SeqId = Int
+
+data Alternative =
+ Alt [String] [String]
+ deriving (Eq,Ord,Show)
+
+data ParserInfo
+ = ParserInfo { functions :: Array FunId FFun
+ , sequences :: Array SeqId FSeq
+ , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
+ , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
+ , startCats :: Map.Map CId [FCat]
+ , totalCats :: {-# UNPACK #-} !FCat
+ }
+
+
+fcatString, fcatInt, fcatFloat, fcatVar :: Int
+fcatString = (-1)
+fcatInt = (-2)
+fcatFloat = (-3)
+fcatVar = (-4)
+
+isLiteralFCat :: FCat -> Bool
+isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
+
+ppPMCFG :: ParserInfo -> Doc
+ppPMCFG pinfo =
+ text "productions" $$
+ nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
+ text "functions" $$
+ nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
+ text "sequences" $$
+ nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
+ text "startcats" $$
+ nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
+
+ppProduction (fcat,FApply funid args) =
+ ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
+ppProduction (fcat,FCoerce arg) =
+ ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
+ppProduction (fcat,FConst _ ss) =
+ ppFCat fcat <+> text "->" <+> ppStrs ss
+
+ppFun (funid,FFun fun _ arr) =
+ ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
+
+ppSeq (seqid,seq) =
+ ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
+
+ppStartCat (id,fcats) =
+ ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
+
+ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
+ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
+ppSymbol (FSymKS ts) = ppStrs ts
+ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
+
+ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
+
+ppStrs ss = doubleQuotes (hsep (map text ss))
+
+ppFCat fcat
+ | fcat == fcatString = text "CString"
+ | fcat == fcatInt = text "CInt"
+ | fcat == fcatFloat = text "CFloat"
+ | fcat == fcatVar = text "CVar"
+ | otherwise = char 'C' <> int fcat
+
+ppFunId funid = char 'F' <> int funid
+ppSeqId seqid = char 'S' <> int seqid
+
+
+filterProductions = closure
+ where
+ closure prods0
+ | IntMap.size prods == IntMap.size prods0 = prods
+ | otherwise = closure prods
+ where
+ prods = IntMap.mapMaybe (filterProdSet prods0) prods0
+
+ filterProdSet prods set0
+ | Set.null set = Nothing
+ | otherwise = Just set
+ where
+ set = Set.filter (filterRule prods) set0
+
+ filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
+ filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
+ filterRule prods _ = True
diff --git a/src/runtime/haskell/PGF/Paraphrase.hs b/src/runtime/haskell/PGF/Paraphrase.hs
new file mode 100644
index 000000000..58d15b2e8
--- /dev/null
+++ b/src/runtime/haskell/PGF/Paraphrase.hs
@@ -0,0 +1,112 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Paraphrase
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Generate parapharases with def definitions.
+-----------------------------------------------------------------------------
+
+module PGF.Paraphrase (
+ paraphrase,
+ paraphraseN
+ ) where
+
+import PGF.Data
+import PGF.Tree
+import PGF.Macros (lookDef,isData)
+import PGF.CId
+
+import Data.List (nub,sort,group)
+import qualified Data.Map as Map
+
+import Debug.Trace ----
+
+paraphrase :: PGF -> Expr -> [Expr]
+paraphrase pgf = nub . paraphraseN 2 pgf
+
+paraphraseN :: Int -> PGF -> Expr -> [Expr]
+paraphraseN i pgf = map tree2expr . paraphraseN' i pgf . expr2tree
+
+paraphraseN' :: Int -> PGF -> Tree -> [Tree]
+paraphraseN' 0 _ t = [t]
+paraphraseN' i pgf t =
+ step i t ++ [Fun g ts' | Fun g ts <- step (i-1) t, ts' <- sequence (map par ts)]
+ where
+ par = paraphraseN' (i-1) pgf
+ step 0 t = [t]
+ step i t = let stept = step (i-1) t in stept ++ concat [def u | u <- stept]
+ def = fromDef pgf
+
+fromDef :: PGF -> Tree -> [Tree]
+fromDef pgf t@(Fun f ts) = defDown t ++ defUp t where
+ defDown t = [subst g u | let equ = equsFrom f, (u,g) <- match equ ts, trequ "U" f equ]
+ defUp t = [subst g u | equ <- equsTo f, (u,g) <- match [equ] ts, trequ "D" f equ]
+
+ equsFrom f = [(ps,d) | Just equs <- [lookup f equss], (Fun _ ps,d) <- equs]
+
+ equsTo f = [c | (_,equs) <- equss, c <- casesTo f equs]
+
+ casesTo f equs =
+ [(ps,p) | (p,d@(Fun g ps)) <- equs, g==f,
+ isClosed d || (length equs == 1 && isLinear d)]
+
+ equss = [(f,[(Fun f (map patt2tree ps), expr2tree d) | (Equ ps d) <- eqs]) |
+ (f,(_,_,eqs)) <- Map.assocs (funs (abstract pgf)), not (null eqs)]
+
+ trequ s f e = True ----trace (s ++ ": " ++ show f ++ " " ++ show e) True
+
+subst :: Subst -> Tree -> Tree
+subst g e = case e of
+ Fun f ts -> Fun f (map substg ts)
+ Var x -> maybe e id $ lookup x g
+ _ -> e
+ where
+ substg = subst g
+
+type Subst = [(CId,Tree)]
+
+-- this applies to pattern, hence don't need to consider abstractions
+isClosed :: Tree -> Bool
+isClosed t = case t of
+ Fun _ ts -> all isClosed ts
+ Var _ -> False
+ _ -> True
+
+-- this applies to pattern, hence don't need to consider abstractions
+isLinear :: Tree -> Bool
+isLinear = nodup . vars where
+ vars t = case t of
+ Fun _ ts -> concatMap vars ts
+ Var x -> [x]
+ _ -> []
+ nodup = all ((<2) . length) . group . sort
+
+
+match :: [([Tree],Tree)] -> [Tree] -> [(Tree, Subst)]
+match cases terms = case cases of
+ [] -> []
+ (patts,_):_ | length patts /= length terms -> []
+ (patts,val):cc -> case mapM tryMatch (zip patts terms) of
+ Just substs -> return (val, concat substs)
+ _ -> match cc terms
+ where
+ tryMatch (p,t) = case (p, t) of
+ (Var x, _) | notMeta t -> return [(x,t)]
+ (Fun p pp, Fun f tt) | p == f && length pp == length tt -> do
+ matches <- mapM tryMatch (zip pp tt)
+ return (concat matches)
+ _ -> if p==t then return [] else Nothing
+
+ notMeta e = case e of
+ Meta _ -> False
+ Fun f ts -> all notMeta ts
+ _ -> True
+
+-- | Converts a pattern to tree.
+patt2tree :: Patt -> Tree
+patt2tree (PApp f ps) = Fun f (map patt2tree ps)
+patt2tree (PLit l) = Lit l
+patt2tree (PVar x) = Var x
+patt2tree PWild = Meta 0
diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs
new file mode 100644
index 000000000..e88926f6e
--- /dev/null
+++ b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs
@@ -0,0 +1,205 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- MCFG parsing, the active algorithm
+-----------------------------------------------------------------------------
+
+module PGF.Parsing.FCFG.Active (parse) where
+
+import GF.Data.Assoc
+import GF.Data.SortedList
+import GF.Data.Utilities
+import qualified GF.Data.MultiMap as MM
+
+import PGF.CId
+import PGF.Data
+import PGF.Tree
+import PGF.Parsing.FCFG.Utilities
+import PGF.BuildParser
+
+import Control.Monad (guard)
+
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Data.Array.IArray
+import Debug.Trace
+
+----------------------------------------------------------------------
+-- * parsing
+
+type FToken = String
+
+makeFinalEdge cat 0 0 = (cat, [EmptyRange])
+makeFinalEdge cat i j = (cat, [makeRange i j])
+
+-- | the list of categories = possible starting categories
+parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr]
+parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees
+ where
+ inTokens = input toks
+ starts = Map.findWithDefault [] start (startCats pinfo)
+ schart = xchart2syntaxchart chart pinfo
+ (i,j) = inputBounds inTokens
+ finalEdges = [makeFinalEdge cat i j | cat <- starts]
+ forests = chart2forests schart (const False) finalEdges
+ filteredForests = forests >>= applyProfileToForest
+
+ pinfoex = buildParserInfo pinfo
+
+ chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
+ axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
+ | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
+
+isBU s = s=="b"
+isTD s = s=="t"
+
+-- used in prediction
+emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
+emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
+
+
+process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
+process strategy pinfo pinfoex toks [] chart = chart
+process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
+ where
+ univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
+ | inRange (bounds lin) ppos =
+ case lin ! ppos of
+ FSymCat d r -> let c = args !! d
+ in case recs !! d of
+ [] -> case insertXChart chart item c of
+ Nothing -> chart
+ Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
+ rng <- concatRange rng (found' !! r)
+ return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
+ ++
+ do guard (isTD strategy)
+ (ruleid,args) <- topdownRules pinfo c
+ return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
+ in process strategy pinfo pinfoex toks items chart
+ found' -> let items = do rng <- concatRange rng (found' !! r)
+ return (Active found rng lbl (ppos+1) node args cat)
+ in process strategy pinfo pinfoex toks items chart
+ FSymKS [tok]
+ -> let items = do t_rng <- inputToken toks ? tok
+ rng' <- concatRange rng t_rng
+ return (Active found rng' lbl (ppos+1) node args cat)
+ in process strategy pinfo pinfoex toks items chart
+ | otherwise =
+ if inRange (bounds lins) (lbl+1)
+ then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
+ else univRule (Final (reverse (rng:found)) node args cat) chart
+ where
+ (FFun _ _ lins) = functions pinfo ! ruleid
+ lin = sequences pinfo ! (lins ! lbl)
+ univRule item@(Final found' node args cat) chart =
+ case insertXChart chart item cat of
+ Nothing -> chart
+ Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
+ let FFun _ _ lins = functions pinfo ! ruleid
+ FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
+ rng <- concatRange rng (found' !! r)
+ return (Active found rng l (ppos+1) (updateChildren node d found') args c)
+ ++
+ do guard (isBU strategy)
+ (ruleid,args,c) <- leftcornerCats pinfoex ? cat
+ let FFun _ _ lins = functions pinfo ! ruleid
+ FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
+ return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
+
+ updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
+ updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
+ in process strategy pinfo pinfoex toks items chart
+
+----------------------------------------------------------------------
+-- * XChart
+
+data Item
+ = Active RangeRec
+ Range
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !FPointPos
+ (SyntaxNode FunId RangeRec)
+ [FCat]
+ FCat
+ | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
+ deriving (Eq, Ord, Show)
+
+data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
+
+emptyXChart :: Ord c => XChart c
+emptyXChart = XChart MM.empty MM.empty
+
+insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
+ case MM.insert' c item actives of
+ Nothing -> Nothing
+ Just actives -> Just (XChart actives finals)
+
+insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
+ case MM.insert' c item finals of
+ Nothing -> Nothing
+ Just finals -> Just (XChart actives finals)
+
+lookupXChartAct (XChart actives finals) c = actives MM.! c
+lookupXChartFinal (XChart actives finals) c = finals MM.! c
+
+xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
+xchart2syntaxchart (XChart actives finals) pinfo =
+ accumAssoc groupSyntaxNodes $
+ [ case node of
+ SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
+ in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
+ SString s -> ((cat,found), SString s)
+ SInt n -> ((cat,found), SInt n)
+ SFloat f -> ((cat,found), SFloat f)
+ | (Final found node rhs cat) <- MM.elems finals
+ ]
+
+literals :: ParserInfoEx -> Input FToken -> [Item]
+literals pinfoex toks =
+ [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
+ where
+ lexer t =
+ case reads t of
+ [(n,"")] -> (fcatInt, SInt (n::Integer))
+ _ -> case reads t of
+ [(f,"")] -> (fcatFloat, SFloat (f::Double))
+ _ -> (fcatString,SString t)
+
+
+----------------------------------------------------------------------
+-- Earley --
+
+-- called with all starting categories
+initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
+initialTD pinfo starts toks =
+ do cat <- starts
+ (ruleid,args) <- topdownRules pinfo cat
+ return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
+
+topdownRules pinfo cat = f cat []
+ where
+ f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
+
+ g (FApply ruleid args) rules = (ruleid,args) : rules
+ g (FCoerce cat) rules = f cat rules
+
+
+----------------------------------------------------------------------
+-- Kilbury --
+
+initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
+initialBU pinfo pinfoex toks =
+ do (tok,rngs) <- aAssocs (inputToken toks)
+ (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
+ rng <- rngs
+ return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
+ ++
+ do (ruleid,args,cat) <- epsilonRules pinfoex
+ let FFun _ _ _ = functions pinfo ! ruleid
+ return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)
diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs
new file mode 100644
index 000000000..296a0d33b
--- /dev/null
+++ b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs
@@ -0,0 +1,371 @@
+{-# LANGUAGE BangPatterns #-}
+module PGF.Parsing.FCFG.Incremental
+ ( ParseState
+ , ErrorState
+ , initState
+ , nextState
+ , getCompletions
+ , recoveryStates
+ , extractTrees
+ , parse
+ , parseWithRecovery
+ ) where
+
+import Data.Array.IArray
+import Data.Array.Base (unsafeAt)
+import Data.List (isPrefixOf, foldl')
+import Data.Maybe (fromMaybe, maybe)
+import qualified Data.Map as Map
+import qualified GF.Data.TrieMap as TMap
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.SortedList
+import PGF.CId
+import PGF.Data
+import PGF.Expr(Tree)
+import PGF.Macros
+import PGF.TypeCheck
+import Debug.Trace
+
+parse :: PGF -> Language -> Type -> [String] -> [Tree]
+parse pgf lang typ toks = loop (initState pgf lang typ) toks
+ where
+ loop ps [] = extractTrees ps typ
+ loop ps (t:ts) = case nextState ps t of
+ Left es -> []
+ Right ps -> loop ps ts
+
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
+parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
+ where
+ accept ps [] = extractTrees ps typ
+ accept ps (t:ts) =
+ case nextState ps t of
+ Right ps -> accept ps ts
+ Left es -> skip (recoveryStates open_typs es) ts
+
+ skip ps_map [] = extractTrees (fst ps_map) typ
+ skip ps_map (t:ts) =
+ case Map.lookup t (snd ps_map) of
+ Just ps -> accept ps ts
+ Nothing -> skip ps_map ts
+
+-- | Creates an initial parsing state for a given language and
+-- startup category.
+initState :: PGF -> Language -> Type -> ParseState
+initState pgf lang (DTyp _ start _) =
+ let items = do
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
+ [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ (lbl,seqid) <- assocs lins
+ return (Active 0 0 funid seqid args (AK cat lbl))
+
+ pinfo =
+ case lookParser pgf lang of
+ Just pinfo -> pinfo
+ _ -> error ("Unknown language: " ++ showCId lang)
+
+ in PState pgf
+ pinfo
+ (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
+ (TMap.singleton [] (Set.fromList items))
+
+-- | From the current state and the next token
+-- 'nextState' computes a new state, where the token
+-- is consumed and the current position is shifted by one.
+-- If the new token cannot be accepted then an error state
+-- is returned.
+nextState :: ParseState -> String -> Either ErrorState ParseState
+nextState (PState pgf pinfo chart items) t =
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = fromMaybe TMap.empty (Map.lookup t map_items)
+ (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in if TMap.null acc1
+ then Left (EState pgf pinfo chart2)
+ else Right (PState pgf pinfo chart2 acc1)
+ where
+ add (tok:toks) item acc
+ | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
+ add _ item acc = acc
+
+-- | If the next token is not known but only its prefix (possible empty prefix)
+-- then the 'getCompletions' function can be used to calculate the possible
+-- next words and the consequent states. This is used for word completions in
+-- the GF interpreter.
+getCompletions :: ParseState -> String -> Map.Map String ParseState
+getCompletions (PState pgf pinfo chart items) w =
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
+ (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in fmap (PState pgf pinfo chart2) acc'
+ where
+ add (tok:toks) item acc
+ | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+ add _ item acc = acc
+
+recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
+recoveryStates open_types (EState pgf pinfo chart) =
+ let open_fcats = concatMap type2fcats open_types
+ agenda = foldl (complete open_fcats) [] (actives chart)
+ (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
+ where
+ type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
+
+ complete open_fcats items ac =
+ foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ (:) (Active j' (ppos+1) funid seqid args keyc)))
+ items
+ [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac]
+
+ add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+
+-- | This function extracts the list of all completed parse trees
+-- that spans the whole input consumed so far. The trees are also
+-- limited by the category specified, which is usually
+-- the same as the startup category.
+extractTrees :: ParseState -> Type -> [Tree]
+extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
+ nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
+ where
+ (mb_agenda,acc) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
+
+ exps = do
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
+ [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ lbl <- indices lins
+ Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
+ (fvs,tree) <- go Set.empty 0 (0,fid)
+ guard (Set.null fvs)
+ return tree
+
+ go rec fcat' (d,fcat)
+ | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
+ | Set.member fcat rec = mzero
+ | otherwise = foldForest (\funid args trees ->
+ do let FFun fn _ lins = functions pinfo ! funid
+ args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
+ check_ho_fun fn args
+ `mplus`
+ trees)
+ (\const _ trees ->
+ return (freeVar const,const)
+ `mplus`
+ trees)
+ [] fcat (forest st)
+
+ check_ho_fun fun args
+ | fun == _V = return (head args)
+ | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
+ | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
+
+ mkVar (EFun v) = v
+ mkVar (EMeta _) = wildCId
+
+ freeVar (EFun v) = Set.singleton v
+ freeVar _ = Set.empty
+
+_B = mkCId "_B"
+_V = mkCId "_V"
+
+process mbt fn !seqs !funs [] acc chart = (acc,chart)
+process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
+ | inRange (bounds lin) ppos =
+ case unsafeAt lin ppos of
+ FSymCat d r -> let !fid = args !! d
+ key = AK fid r
+
+ items2 = case lookupPC (mkPK key k) (passive chart) of
+ Nothing -> items
+ Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
+ items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items)
+ (\_ _ items -> items)
+ items2 fid (forest chart)
+ in case lookupAC key (active chart) of
+ Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
+ Just set | Set.member item set -> process mbt fn seqs funs items acc chart
+ | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
+ FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ FSymKP strs vars
+ -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
+ (strs:[strs' | Alt strs' _ <- vars])
+ in process mbt fn seqs funs items acc' chart
+ FSymLit d r -> let !fid = args !! d
+ in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
+ (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ [] -> case litCatMatch fid mbt of
+ Just (toks,lit) -> let fid' = nextId chart
+ !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
+ in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
+ ,nextId=nextId chart+1
+ }
+ Nothing -> process mbt fn seqs funs items acc chart
+ | otherwise =
+ case lookupPC (mkPK key0 j) (passive chart) of
+ Nothing -> let fid = nextId chart
+
+ items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
+ Nothing -> items
+ Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
+ in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
+ in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
+ ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
+ ,nextId =nextId chart+1
+ }
+ Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
+ in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
+ where
+ !lin = unsafeAt seqs seqid
+ !k = offset chart
+
+ mkPK (AK fid lbl) j = PK fid lbl j
+
+ rhs funid lbl = unsafeAt lins lbl
+ where
+ FFun _ _ lins = unsafeAt funs funid
+
+
+updateAt :: Int -> a -> [a] -> [a]
+updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+litCatMatch fcat (Just t)
+ | fcat == fcatString = Just ([t],ELit (LStr t))
+ | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
+ _ -> Nothing }
+ | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
+ _ -> Nothing }
+ | fcat == fcatVar = Just ([t],EFun (mkCId t))
+litCatMatch _ _ = Nothing
+
+
+----------------------------------------------------------------
+-- Active Chart
+----------------------------------------------------------------
+
+data Active
+ = Active {-# UNPACK #-} !Int
+ {-# UNPACK #-} !FPointPos
+ {-# UNPACK #-} !FunId
+ {-# UNPACK #-} !SeqId
+ [FCat]
+ {-# UNPACK #-} !ActiveKey
+ deriving (Eq,Show,Ord)
+data ActiveKey
+ = AK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ deriving (Eq,Ord,Show)
+type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
+
+emptyAC :: ActiveChart
+emptyAC = IntMap.empty
+
+lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
+lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
+
+lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active]
+lookupACByFCat fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.elems map
+
+labelsAC :: FCat -> ActiveChart -> [FIndex]
+labelsAC fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.keys map
+
+insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
+insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
+
+
+----------------------------------------------------------------
+-- Passive Chart
+----------------------------------------------------------------
+
+data PassiveKey
+ = PK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !Int
+ deriving (Eq,Ord,Show)
+
+type PassiveChart = Map.Map PassiveKey FCat
+
+emptyPC :: PassiveChart
+emptyPC = Map.empty
+
+lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
+lookupPC key chart = Map.lookup key chart
+
+insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
+insertPC key fcat chart = Map.insert key fcat chart
+
+
+----------------------------------------------------------------
+-- Forest
+----------------------------------------------------------------
+
+foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
+foldForest f g b fcat forest =
+ case IntMap.lookup fcat forest of
+ Nothing -> b
+ Just set -> Set.fold foldProd b set
+ where
+ foldProd (FCoerce fcat) b = foldForest f g b fcat forest
+ foldProd (FApply funid args) b = f funid args b
+ foldProd (FConst const toks) b = g const toks b
+
+
+----------------------------------------------------------------
+-- Parse State
+----------------------------------------------------------------
+
+-- | An abstract data type whose values represent
+-- the current state in an incremental parser.
+data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
+
+data Chart
+ = Chart
+ { active :: ActiveChart
+ , actives :: [ActiveChart]
+ , passive :: PassiveChart
+ , forest :: IntMap.IntMap (Set.Set Production)
+ , nextId :: {-# UNPACK #-} !FCat
+ , offset :: {-# UNPACK #-} !Int
+ }
+ deriving Show
+
+----------------------------------------------------------------
+-- Error State
+----------------------------------------------------------------
+
+-- | An abstract data type whose values represent
+-- the state in an incremental parser after an error.
+data ErrorState = EState PGF ParserInfo Chart
diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs
new file mode 100644
index 000000000..dc0b2dc4a
--- /dev/null
+++ b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs
@@ -0,0 +1,188 @@
+----------------------------------------------------------------------
+-- |
+-- Maintainer : PL
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/05/13 12:40:19 $
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.6 $
+--
+-- Basic type declarations and functions for grammar formalisms
+-----------------------------------------------------------------------------
+
+
+module PGF.Parsing.FCFG.Utilities where
+
+import Control.Monad
+import Data.Array
+import Data.List (groupBy)
+
+import PGF.CId
+import PGF.Data
+import PGF.Tree
+import GF.Data.Assoc
+import GF.Data.Utilities (sameLength, foldMerge, splitBy)
+
+
+------------------------------------------------------------
+-- ranges as single pairs
+
+type RangeRec = [Range]
+
+data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
+ | EmptyRange
+ deriving (Eq, Ord, Show)
+
+makeRange :: Int -> Int -> Range
+makeRange = Range
+
+concatRange :: Range -> Range -> [Range]
+concatRange EmptyRange rng = return rng
+concatRange rng EmptyRange = return rng
+concatRange (Range i j) (Range j' k) = [Range i k | j==j']
+
+minRange :: Range -> Int
+minRange (Range i j) = i
+
+maxRange :: Range -> Int
+maxRange (Range i j) = j
+
+
+------------------------------------------------------------
+-- * representaions of input tokens
+
+data Input t = MkInput { inputBounds :: (Int, Int),
+ inputToken :: Assoc t [Range]
+ }
+
+input :: Ord t => [t] -> Input t
+input toks = MkInput inBounds inToken
+ where
+ inBounds = (0, length toks)
+ inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
+
+inputMany :: Ord t => [[t]] -> Input t
+inputMany toks = MkInput inBounds inToken
+ where
+ inBounds = (0, length toks)
+ inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
+
+
+------------------------------------------------------------
+-- * representations of syntactical analyses
+
+-- ** charts as finite maps over edges
+
+-- | The values of the chart, a list of key-daughters pairs,
+-- has unique keys. In essence, it is a map from 'n' to daughters.
+-- The daughters should be a set (not necessarily sorted) of rhs's.
+type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
+
+data SyntaxNode n e = SMeta
+ | SNode n [e]
+ | SString String
+ | SInt Integer
+ | SFloat Double
+ deriving (Eq,Ord,Show)
+
+groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
+groupSyntaxNodes [] = []
+groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
+ where
+ (ess,xs') = span xs
+
+ span [] = ([],[])
+ span xs@(SNode n es:xs')
+ | n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
+ | otherwise = ([],xs)
+groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
+groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
+groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
+
+-- ** syntax forests
+
+data SyntaxForest n = FMeta
+ | FNode n [[SyntaxForest n]]
+ -- ^ The outer list should be a set (not necessarily sorted)
+ -- of possible alternatives. Ie. the outer list
+ -- is a disjunctive node, and the inner lists
+ -- are (conjunctive) concatenative nodes
+ | FString String
+ | FInt Integer
+ | FFloat Double
+ deriving (Eq, Ord, Show)
+
+instance Functor SyntaxForest where
+ fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
+ fmap _ (FString s) = FString s
+ fmap _ (FInt n) = FInt n
+ fmap _ (FFloat f) = FFloat f
+ fmap _ (FMeta) = FMeta
+
+forestName :: SyntaxForest n -> Maybe n
+forestName (FNode n _) = Just n
+forestName _ = Nothing
+
+unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
+unifyManyForests = foldM unifyForests FMeta
+
+-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
+-- and all children can be unified
+unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
+unifyForests FMeta forest = return forest
+unifyForests forest FMeta = return forest
+unifyForests (FNode name1 children1) (FNode name2 children2)
+ | name1 == name2 && not (null children) = return $ FNode name1 children
+ where children = [ forests | forests1 <- children1, forests2 <- children2,
+ sameLength forests1 forests2,
+ forests <- zipWithM unifyForests forests1 forests2 ]
+unifyForests (FString s1) (FString s2)
+ | s1 == s2 = return $ FString s1
+unifyForests (FInt n1) (FInt n2)
+ | n1 == n2 = return $ FInt n1
+unifyForests (FFloat f1) (FFloat f2)
+ | f1 == f2 = return $ FFloat f1
+unifyForests _ _ = fail "forest unification failure"
+
+
+-- ** conversions between representations
+
+chart2forests :: (Ord n, Ord e) =>
+ SyntaxChart n e -- ^ The complete chart
+ -> (e -> Bool) -- ^ When is an edge 'FMeta'?
+ -> [e] -- ^ The starting edges
+ -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
+ -- In essence, the result is a map from 'n' to forest daughters
+chart2forests chart isMeta = concatMap (edge2forests [])
+ where edge2forests edges edge
+ | isMeta edge = [FMeta]
+ | edge `elem` edges = []
+ | otherwise = map (item2forest (edge:edges)) $ chart ? edge
+ item2forest edges (SMeta) = FMeta
+ item2forest edges (SNode name children) =
+ FNode name $ children >>= mapM (edge2forests edges)
+ item2forest edges (SString s) = FString s
+ item2forest edges (SInt n) = FInt n
+ item2forest edges (SFloat f) = FFloat f
+
+
+applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
+applyProfileToForest (FNode (fun,profiles) children)
+ | fun == wildCId = concat chForests
+ | otherwise = [ FNode fun chForests | not (null chForests) ]
+ where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
+ forests0 <- children,
+ forests <- mapM applyProfileToForest forests0 ]
+applyProfileToForest (FString s) = [FString s]
+applyProfileToForest (FInt n) = [FInt n]
+applyProfileToForest (FFloat f) = [FFloat f]
+applyProfileToForest (FMeta) = [FMeta]
+
+
+forest2trees :: SyntaxForest CId -> [Tree]
+forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
+forest2trees (FString s) = [Lit (LStr s)]
+forest2trees (FInt n) = [Lit (LInt n)]
+forest2trees (FFloat f) = [Lit (LFlt f)]
+forest2trees (FMeta) = [Meta 0]
diff --git a/src/runtime/haskell/PGF/ShowLinearize.hs b/src/runtime/haskell/PGF/ShowLinearize.hs
new file mode 100644
index 000000000..dd3b997a6
--- /dev/null
+++ b/src/runtime/haskell/PGF/ShowLinearize.hs
@@ -0,0 +1,113 @@
+module PGF.ShowLinearize (
+ collectWords,
+ tableLinearize,
+ recordLinearize,
+ termLinearize,
+ tabularLinearize,
+ allLinearize,
+ markLinearize
+ ) where
+
+import PGF.CId
+import PGF.Data
+import PGF.Tree
+import PGF.Macros
+import PGF.Linearize
+
+import GF.Data.Operations
+import Data.List
+import qualified Data.Map as Map
+
+-- printing linearizations in different ways with source parameters
+
+-- internal representation, only used internally in this module
+data Record =
+ RR [(String,Record)]
+ | RT [(String,Record)]
+ | RFV [Record]
+ | RS String
+ | RCon String
+
+prRecord :: Record -> String
+prRecord = prr where
+ prr t = case t of
+ RR fs -> concat $
+ "{" :
+ (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"]
+ RT fs -> concat $
+ "table {" :
+ (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"]
+ RFV ts -> concat $
+ "variants {" : (intersperse ";" (map prr ts)) ++ ["}"]
+ RS s -> prQuotedString s
+ RCon s -> s
+
+-- uses the encoding of record types in PGF.paramlincat
+mkRecord :: Term -> Term -> Record
+mkRecord typ trm = case (typ,trm) of
+ (_, FV ts) -> RFV $ map (mkRecord typ) ts
+ (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts]
+ (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts]
+ (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts])
+ (FV ps, C i) -> RCon $ str $ ps !! i
+ (S [], _) -> case realizes trm of
+ [s] -> RS s
+ ss -> RFV $ map RS ss
+ _ -> RS $ show trm ---- printTree trm
+ where
+ str = realize
+
+-- show all branches, without labels and params
+allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
+allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
+ pr (p,vs) = unlines vs
+
+-- show all branches, with labels and params
+tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
+tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
+ pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
+
+-- create a table from labels+params to variants
+tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])]
+tabularLinearize pgf lang = branches . recLinearize pgf lang where
+ branches r = case r of
+ RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
+ RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
+ RFV rs -> concatMap branches rs
+ RS s -> [([], [s])]
+ RCon _ -> []
+
+-- show record in GF-source-like syntax
+recordLinearize :: PGF -> CId -> Expr -> String
+recordLinearize pgf lang = prRecord . recLinearize pgf lang
+
+-- create a GF-like record, forming the basis of all functions above
+recLinearize :: PGF -> CId -> Expr -> Record
+recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where
+ typ = case expr2tree tree of
+ Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f
+
+-- show PGF term
+termLinearize :: PGF -> CId -> Expr -> String
+termLinearize pgf lang = show . linTree pgf lang
+
+-- show bracketed markup with references to tree structure
+markLinearize :: PGF -> CId -> Expr -> String
+markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
+
+
+-- for Morphology: word, lemma, tags
+collectWords :: PGF -> Language -> [(String, [(CId,String)])]
+collectWords pgf lang =
+ concatMap collOne
+ [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf]
+ where
+ collOne (f,c,i) =
+ fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
+ fromRec f v r = case r of
+ RR rs -> concat [fromRec f v t | (_,t) <- rs]
+ RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
+ RFV rs -> concatMap (fromRec f v) rs
+ RS s -> [(s,[(f,unwords (reverse v))])]
+ RCon c -> [] ---- inherent
+
diff --git a/src/runtime/haskell/PGF/Tree.hs b/src/runtime/haskell/PGF/Tree.hs
new file mode 100644
index 000000000..cb2052cd7
--- /dev/null
+++ b/src/runtime/haskell/PGF/Tree.hs
@@ -0,0 +1,71 @@
+module PGF.Tree
+ ( Tree(..),
+ tree2expr, expr2tree,
+ prTree
+ ) where
+
+import PGF.CId
+import PGF.Expr hiding (Tree)
+
+import Data.Char
+import Data.List as List
+import Control.Monad
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+
+-- | The tree is an evaluated expression in the abstract syntax
+-- of the grammar. The type is especially restricted to not
+-- allow unapplied lambda abstractions. The tree is used directly
+-- from the linearizer and is produced directly from the parser.
+data Tree =
+ Abs [(BindType,CId)] Tree -- ^ lambda abstraction. The list of variables is non-empty
+ | Var CId -- ^ variable
+ | Fun CId [Tree] -- ^ function application
+ | Lit Literal -- ^ literal
+ | Meta {-# UNPACK #-} !MetaId -- ^ meta variable
+ deriving (Eq, Ord)
+
+-----------------------------------------------------
+-- Conversion Expr <-> Tree
+-----------------------------------------------------
+
+-- | Converts a tree to expression. The conversion
+-- is always total, every tree is a valid expression.
+tree2expr :: Tree -> Expr
+tree2expr = tree2expr []
+ where
+ tree2expr ys (Fun x ts) = foldl EApp (EFun x) (List.map (tree2expr ys) ts)
+ tree2expr ys (Lit l) = ELit l
+ tree2expr ys (Meta n) = EMeta n
+ tree2expr ys (Abs xs t) = foldr (\(b,x) e -> EAbs b x e) (tree2expr (List.map snd (reverse xs)++ys) t) xs
+ tree2expr ys (Var x) = case List.lookup x (zip ys [0..]) of
+ Just i -> EVar i
+ Nothing -> error "unknown variable"
+
+-- | Converts an expression to tree. The conversion is only partial.
+-- Variables and meta variables of function type and beta redexes are not allowed.
+expr2tree :: Expr -> Tree
+expr2tree e = abs [] [] e
+ where
+ abs ys xs (EAbs b x e) = abs ys ((b,x):xs) e
+ abs ys xs (ETyped e _) = abs ys xs e
+ abs ys xs e = case xs of
+ [] -> app ys [] e
+ xs -> Abs (reverse xs) (app (map snd xs++ys) [] e)
+
+ app xs as (EApp e1 e2) = app xs ((abs xs [] e2) : as) e1
+ app xs as (ELit l)
+ | List.null as = Lit l
+ | otherwise = error "literal of function type encountered"
+ app xs as (EMeta n)
+ | List.null as = Meta n
+ | otherwise = error "meta variables of function type are not allowed in trees"
+ app xs as (EAbs _ x e) = error "beta redexes are not allowed in trees"
+ app xs as (EVar i) = Var (xs !! i)
+ app xs as (EFun f) = Fun f as
+ app xs as (ETyped e _) = app xs as e
+
+
+prTree :: Tree -> String
+prTree = showExpr [] . tree2expr
+
diff --git a/src/runtime/haskell/PGF/Type.hs b/src/runtime/haskell/PGF/Type.hs
new file mode 100644
index 000000000..013754a45
--- /dev/null
+++ b/src/runtime/haskell/PGF/Type.hs
@@ -0,0 +1,103 @@
+module PGF.Type ( Type(..), Hypo,
+ readType, showType,
+ mkType, mkHypo, mkDepHypo, mkImplHypo,
+ pType, ppType, ppHypo ) where
+
+import PGF.CId
+import {-# SOURCE #-} PGF.Expr
+import Data.Char
+import Data.List
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+import Control.Monad
+
+-- | To read a type from a 'String', use 'readType'.
+data Type =
+ DTyp [Hypo] CId [Expr]
+ deriving (Eq,Ord,Show)
+
+-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
+type Hypo = (BindType,CId,Type)
+
+-- | Reads a 'Type' from a 'String'.
+readType :: String -> Maybe Type
+readType s = case [x | (x,cs) <- RP.readP_to_S pType s, all isSpace cs] of
+ [x] -> Just x
+ _ -> Nothing
+
+-- | renders type as 'String'. The list
+-- of identifiers is the list of all free variables
+-- in the expression in order reverse to the order
+-- of binding.
+showType :: [CId] -> Type -> String
+showType vars = PP.render . ppType 0 vars
+
+-- | creates a type from list of hypothesises, category and
+-- list of arguments for the category. The operation
+-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
+-- @h_1 -> ... -> h_n -> C e_1 ... e_m@
+mkType :: [Hypo] -> CId -> [Expr] -> Type
+mkType hyps cat args = DTyp hyps cat args
+
+-- | creates hypothesis for non-dependent type i.e. A
+mkHypo :: Type -> Hypo
+mkHypo ty = (Explicit,wildCId,ty)
+
+-- | creates hypothesis for dependent type i.e. (x : A)
+mkDepHypo :: CId -> Type -> Hypo
+mkDepHypo x ty = (Explicit,x,ty)
+
+-- | creates hypothesis for dependent type with implicit argument i.e. ({x} : A)
+mkImplHypo :: CId -> Type -> Hypo
+mkImplHypo x ty = (Implicit,x,ty)
+
+pType :: RP.ReadP Type
+pType = do
+ RP.skipSpaces
+ hyps <- RP.sepBy (pHypo >>= \h -> RP.skipSpaces >> RP.string "->" >> return h) RP.skipSpaces
+ RP.skipSpaces
+ (cat,args) <- pAtom
+ return (DTyp (concat hyps) cat args)
+ where
+ pHypo =
+ do (cat,args) <- pAtom
+ return [(Explicit,wildCId,DTyp [] cat args)]
+ RP.<++
+ (RP.between (RP.char '(') (RP.char ')') $ do
+ xs <- RP.option [(Explicit,wildCId)] $ do
+ xs <- pBinds
+ RP.skipSpaces
+ RP.char ':'
+ return xs
+ ty <- pType
+ return [(b,v,ty) | (b,v) <- xs])
+ RP.<++
+ (RP.between (RP.char '{') (RP.char '}') $ do
+ vs <- RP.sepBy1 (RP.skipSpaces >> pCId) (RP.skipSpaces >> RP.char ',')
+ RP.skipSpaces
+ RP.char ':'
+ ty <- pType
+ return [(Implicit,v,ty) | v <- vs])
+
+ pAtom = do
+ cat <- pCId
+ RP.skipSpaces
+ args <- RP.sepBy pArg RP.skipSpaces
+ return (cat, args)
+
+ppType :: Int -> [CId] -> Type -> PP.Doc
+ppType d scope (DTyp hyps cat args)
+ | null hyps = ppRes scope cat args
+ | otherwise = let (scope',hdocs) = mapAccumL ppHypo scope hyps
+ in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope' cat args) hdocs)
+ where
+ ppRes scope cat es = ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)
+
+ppHypo scope (Explicit,x,typ) = if x == wildCId
+ then (scope,ppType 1 scope typ)
+ else let y = freshName x scope
+ in (y:scope,PP.parens (ppCId y PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+ppHypo scope (Implicit,x,typ) = if x == wildCId
+ then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+ else let y = freshName x scope
+ in (y:scope,PP.parens (PP.braces (ppCId y) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
diff --git a/src/runtime/haskell/PGF/TypeCheck.hs b/src/runtime/haskell/PGF/TypeCheck.hs
new file mode 100644
index 000000000..937c21786
--- /dev/null
+++ b/src/runtime/haskell/PGF/TypeCheck.hs
@@ -0,0 +1,524 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PGF.TypeCheck
+-- Maintainer : Krasimir Angelov
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Type checking in abstract syntax with dependent types.
+-- The type checker also performs renaming and checking for unknown
+-- functions. The variable references are replaced by de Bruijn indices.
+--
+-----------------------------------------------------------------------------
+
+module PGF.TypeCheck (checkType, checkExpr, inferExpr,
+
+ ppTcError, TcError(..)
+ ) where
+
+import PGF.Data
+import PGF.Expr
+import PGF.Macros (typeOfHypo)
+import PGF.CId
+
+import Data.Map as Map
+import Data.IntMap as IntMap
+import Data.Maybe as Maybe
+import Data.List as List
+import Control.Monad
+import Text.PrettyPrint
+
+-----------------------------------------------------
+-- The Scope
+-----------------------------------------------------
+
+data TType = TTyp Env Type
+newtype Scope = Scope [(CId,TType)]
+
+emptyScope = Scope []
+
+addScopedVar :: CId -> TType -> Scope -> Scope
+addScopedVar x tty (Scope gamma) = Scope ((x,tty):gamma)
+
+-- | returns the type and the De Bruijn index of a local variable
+lookupVar :: CId -> Scope -> Maybe (Int,TType)
+lookupVar x (Scope gamma) = listToMaybe [(i,tty) | ((y,tty),i) <- zip gamma [0..], x == y]
+
+-- | returns the type and the name of a local variable
+getVar :: Int -> Scope -> (CId,TType)
+getVar i (Scope gamma) = gamma !! i
+
+scopeEnv :: Scope -> Env
+scopeEnv (Scope gamma) = let n = length gamma
+ in [VGen (n-i-1) [] | i <- [0..n-1]]
+
+scopeVars :: Scope -> [CId]
+scopeVars (Scope gamma) = List.map fst gamma
+
+scopeSize :: Scope -> Int
+scopeSize (Scope gamma) = length gamma
+
+-----------------------------------------------------
+-- The Monad
+-----------------------------------------------------
+
+type MetaStore = IntMap MetaValue
+data MetaValue
+ = MUnbound Scope [Expr -> TcM ()]
+ | MBound Expr
+ | MGuarded Expr [Expr -> TcM ()] {-# UNPACK #-} !Int -- the Int is the number of constraints that have to be solved
+ -- to unlock this meta variable
+
+newtype TcM a = TcM {unTcM :: Abstr -> MetaId -> MetaStore -> TcResult a}
+data TcResult a
+ = Ok {-# UNPACK #-} !MetaId MetaStore a
+ | Fail TcError
+
+instance Monad TcM where
+ return x = TcM (\abstr metaid ms -> Ok metaid ms x)
+ f >>= g = TcM (\abstr metaid ms -> case unTcM f abstr metaid ms of
+ Ok metaid ms x -> unTcM (g x) abstr metaid ms
+ Fail e -> Fail e)
+
+instance Functor TcM where
+ fmap f x = TcM (\abstr metaid ms -> case unTcM x abstr metaid ms of
+ Ok metaid ms x -> Ok metaid ms (f x)
+ Fail e -> Fail e)
+
+lookupCatHyps :: CId -> TcM [Hypo]
+lookupCatHyps cat = TcM (\abstr metaid ms -> case Map.lookup cat (cats abstr) of
+ Just hyps -> Ok metaid ms hyps
+ Nothing -> Fail (UnknownCat cat))
+
+lookupFunType :: CId -> TcM TType
+lookupFunType fun = TcM (\abstr metaid ms -> case Map.lookup fun (funs abstr) of
+ Just (ty,_,_) -> Ok metaid ms (TTyp [] ty)
+ Nothing -> Fail (UnknownFun fun))
+
+newMeta :: Scope -> TcM MetaId
+newMeta scope = TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MUnbound scope []) ms) metaid)
+
+newGuardedMeta :: Scope -> Expr -> TcM MetaId
+newGuardedMeta scope e = getFuns >>= \funs -> TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MGuarded e [] 0) ms) metaid)
+
+getMeta :: MetaId -> TcM MetaValue
+getMeta i = TcM (\abstr metaid ms -> Ok metaid ms $! case IntMap.lookup i ms of
+ Just mv -> mv)
+setMeta :: MetaId -> MetaValue -> TcM ()
+setMeta i mv = TcM (\abstr metaid ms -> Ok metaid (IntMap.insert i mv ms) ())
+
+tcError :: TcError -> TcM a
+tcError e = TcM (\abstr metaid ms -> Fail e)
+
+getFuns :: TcM Funs
+getFuns = TcM (\abstr metaid ms -> Ok metaid ms (funs abstr))
+
+addConstraint :: MetaId -> MetaId -> Env -> [Value] -> (Value -> TcM ()) -> TcM ()
+addConstraint i j env vs c = do
+ funs <- getFuns
+ mv <- getMeta j
+ case mv of
+ MUnbound scope cs -> addRef >> setMeta j (MUnbound scope ((\e -> release >> c (apply funs env e vs)) : cs))
+ MBound e -> c (apply funs env e vs)
+ MGuarded e cs x | x == 0 -> c (apply funs env e vs)
+ | otherwise -> addRef >> setMeta j (MGuarded e ((\e -> release >> c (apply funs env e vs)) : cs) x)
+ where
+ addRef = TcM (\abstr metaid ms -> case IntMap.lookup i ms of
+ Just (MGuarded e cs x) -> Ok metaid (IntMap.insert i (MGuarded e cs (x+1)) ms) ())
+
+ release = TcM (\abstr metaid ms -> case IntMap.lookup i ms of
+ Just (MGuarded e cs x) -> if x == 1
+ then unTcM (sequence_ [c e | c <- cs]) abstr metaid (IntMap.insert i (MGuarded e [] 0) ms)
+ else Ok metaid (IntMap.insert i (MGuarded e cs (x-1)) ms) ())
+
+-----------------------------------------------------
+-- Type errors
+-----------------------------------------------------
+
+-- | If an error occurs in the typechecking phase
+-- the type checker returns not a plain text error message
+-- but a 'TcError' structure which describes the error.
+data TcError
+ = UnknownCat CId -- ^ Unknown category name was found.
+ | UnknownFun CId -- ^ Unknown function name was found.
+ | WrongCatArgs [CId] Type CId Int Int -- ^ A category was applied to wrong number of arguments.
+ -- The first integer is the number of expected arguments and
+ -- the second the number of given arguments.
+ -- The @[CId]@ argument is the list of free variables
+ -- in the type. It should be used for the 'showType' function.
+ | TypeMismatch [CId] Expr Type Type -- ^ The expression is not of the expected type.
+ -- The first type is the expected type, while
+ -- the second is the inferred. The @[CId]@ argument is the list
+ -- of free variables in both the expression and the type.
+ -- It should be used for the 'showType' and 'showExpr' functions.
+ | NotFunType [CId] Expr Type -- ^ Something that is not of function type was applied to an argument.
+ | CannotInferType [CId] Expr -- ^ It is not possible to infer the type of an expression.
+ | UnresolvedMetaVars [CId] Expr [MetaId] -- ^ Some metavariables have to be instantiated in order to complete the typechecking.
+ | UnexpectedImplArg [CId] Expr -- ^ Implicit argument was passed where the type doesn't allow it
+
+-- | Renders the type checking error to a document. See 'Text.PrettyPrint'.
+ppTcError :: TcError -> Doc
+ppTcError (UnknownCat cat) = text "Category" <+> ppCId cat <+> text "is not in scope"
+ppTcError (UnknownFun fun) = text "Function" <+> ppCId fun <+> text "is not in scope"
+ppTcError (WrongCatArgs xs ty cat m n) = text "Category" <+> ppCId cat <+> text "should have" <+> int m <+> text "argument(s), but has been given" <+> int n $$
+ text "In the type:" <+> ppType 0 xs ty
+ppTcError (TypeMismatch xs e ty1 ty2) = text "Couldn't match expected type" <+> ppType 0 xs ty1 $$
+ text " against inferred type" <+> ppType 0 xs ty2 $$
+ text "In the expression:" <+> ppExpr 0 xs e
+ppTcError (NotFunType xs e ty) = text "A function type is expected for the expression" <+> ppExpr 0 xs e <+> text "instead of type" <+> ppType 0 xs ty
+ppTcError (CannotInferType xs e) = text "Cannot infer the type of expression" <+> ppExpr 0 xs e
+ppTcError (UnresolvedMetaVars xs e ms) = text "Meta variable(s)" <+> fsep (List.map ppMeta ms) <+> text "should be resolved" $$
+ text "in the expression:" <+> ppExpr 0 xs e
+ppTcError (UnexpectedImplArg xs e) = braces (ppExpr 0 xs e) <+> text "is implicit argument but not implicit argument is expected here"
+
+-----------------------------------------------------
+-- checkType
+-----------------------------------------------------
+
+-- | Check whether a given type is consistent with the abstract
+-- syntax of the grammar.
+checkType :: PGF -> Type -> Either TcError Type
+checkType pgf ty =
+ case unTcM (tcType emptyScope ty >>= refineType) (abstract pgf) 0 IntMap.empty of
+ Ok _ ms ty -> Right ty
+ Fail err -> Left err
+
+tcType :: Scope -> Type -> TcM Type
+tcType scope ty@(DTyp hyps cat es) = do
+ (scope,hyps) <- tcHypos scope hyps
+ c_hyps <- lookupCatHyps cat
+ let m = length es
+ n = length [ty | (Explicit,x,ty) <- c_hyps]
+ (delta,es) <- tcCatArgs scope es [] c_hyps ty n m
+ return (DTyp hyps cat es)
+
+tcHypos :: Scope -> [Hypo] -> TcM (Scope,[Hypo])
+tcHypos scope [] = return (scope,[])
+tcHypos scope (h:hs) = do
+ (scope,h ) <- tcHypo scope h
+ (scope,hs) <- tcHypos scope hs
+ return (scope,h:hs)
+
+tcHypo :: Scope -> Hypo -> TcM (Scope,Hypo)
+tcHypo scope (b,x,ty) = do
+ ty <- tcType scope ty
+ if x == wildCId
+ then return (scope,(b,x,ty))
+ else return (addScopedVar x (TTyp (scopeEnv scope) ty) scope,(b,x,ty))
+
+tcCatArgs scope [] delta [] ty0 n m = return (delta,[])
+tcCatArgs scope (EImplArg e:es) delta ((Explicit,x,ty):hs) ty0 n m = tcError (UnexpectedImplArg (scopeVars scope) e)
+tcCatArgs scope (EImplArg e:es) delta ((Implicit,x,ty):hs) ty0 n m = do
+ e <- tcExpr scope e (TTyp delta ty)
+ funs <- getFuns
+ (delta,es) <- if x == wildCId
+ then tcCatArgs scope es delta hs ty0 n m
+ else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m
+ return (delta,EImplArg e:es)
+tcCatArgs scope es delta ((Implicit,x,ty):hs) ty0 n m = do
+ i <- newMeta scope
+ (delta,es) <- if x == wildCId
+ then tcCatArgs scope es delta hs ty0 n m
+ else tcCatArgs scope es (VMeta i (scopeEnv scope) [] : delta) hs ty0 n m
+ return (delta,EImplArg (EMeta i) : es)
+tcCatArgs scope (e:es) delta ((Explicit,x,ty):hs) ty0 n m = do
+ e <- tcExpr scope e (TTyp delta ty)
+ funs <- getFuns
+ (delta,es) <- if x == wildCId
+ then tcCatArgs scope es delta hs ty0 n m
+ else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m
+ return (delta,e:es)
+tcCatArgs scope _ delta _ ty0@(DTyp _ cat _) n m = do
+ tcError (WrongCatArgs (scopeVars scope) ty0 cat n m)
+
+-----------------------------------------------------
+-- checkExpr
+-----------------------------------------------------
+
+-- | Checks an expression against a specified type.
+checkExpr :: PGF -> Expr -> Type -> Either TcError Expr
+checkExpr pgf e ty =
+ case unTcM (do e <- tcExpr emptyScope e (TTyp [] ty)
+ e <- refineExpr e
+ checkResolvedMetaStore emptyScope e
+ return e) (abstract pgf) 0 IntMap.empty of
+ Ok _ ms e -> Right e
+ Fail err -> Left err
+
+tcExpr :: Scope -> Expr -> TType -> TcM Expr
+tcExpr scope e0@(EAbs Implicit x e) tty =
+ case tty of
+ TTyp delta (DTyp ((Implicit,y,ty):hs) c es) -> do e <- if y == wildCId
+ then tcExpr (addScopedVar x (TTyp delta ty) scope)
+ e (TTyp delta (DTyp hs c es))
+ else tcExpr (addScopedVar x (TTyp delta ty) scope)
+ e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
+ return (EAbs Implicit x e)
+ _ -> do ty <- evalType (scopeSize scope) tty
+ tcError (NotFunType (scopeVars scope) e0 ty)
+tcExpr scope e0 (TTyp delta (DTyp ((Implicit,y,ty):hs) c es)) = do
+ e0 <- if y == wildCId
+ then tcExpr (addScopedVar wildCId (TTyp delta ty) scope)
+ e0 (TTyp delta (DTyp hs c es))
+ else tcExpr (addScopedVar wildCId (TTyp delta ty) scope)
+ e0 (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
+ return (EAbs Implicit wildCId e0)
+tcExpr scope e0@(EAbs Explicit x e) tty =
+ case tty of
+ TTyp delta (DTyp ((Explicit,y,ty):hs) c es) -> do e <- if y == wildCId
+ then tcExpr (addScopedVar x (TTyp delta ty) scope)
+ e (TTyp delta (DTyp hs c es))
+ else tcExpr (addScopedVar x (TTyp delta ty) scope)
+ e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
+ return (EAbs Explicit x e)
+ _ -> do ty <- evalType (scopeSize scope) tty
+ tcError (NotFunType (scopeVars scope) e0 ty)
+tcExpr scope (EMeta _) tty = do
+ i <- newMeta scope
+ return (EMeta i)
+tcExpr scope e0 tty = do
+ (e0,tty0) <- infExpr scope e0
+ i <- newGuardedMeta scope e0
+ eqType scope (scopeSize scope) i tty tty0
+ return (EMeta i)
+
+
+-----------------------------------------------------
+-- inferExpr
+-----------------------------------------------------
+
+-- | Tries to infer the type of a given expression. Note that
+-- even if the expression is type correct it is not always
+-- possible to infer its type in the GF type system.
+-- In this case the function returns the 'CannotInferType' error.
+inferExpr :: PGF -> Expr -> Either TcError (Expr,Type)
+inferExpr pgf e =
+ case unTcM (do (e,tty) <- infExpr emptyScope e
+ e <- refineExpr e
+ checkResolvedMetaStore emptyScope e
+ ty <- evalType 0 tty
+ return (e,ty)) (abstract pgf) 1 IntMap.empty of
+ Ok _ ms (e,ty) -> Right (e,ty)
+ Fail err -> Left err
+
+infExpr :: Scope -> Expr -> TcM (Expr,TType)
+infExpr scope e0@(EApp e1 e2) = do
+ (e1,TTyp delta ty) <- infExpr scope e1
+ (e0,delta,ty) <- tcArg scope e1 e2 delta ty
+ return (e0,TTyp delta ty)
+infExpr scope e0@(EFun x) = do
+ case lookupVar x scope of
+ Just (i,tty) -> return (EVar i,tty)
+ Nothing -> do tty <- lookupFunType x
+ return (e0,tty)
+infExpr scope e0@(EVar i) = do
+ return (e0,snd (getVar i scope))
+infExpr scope e0@(ELit l) = do
+ let cat = case l of
+ LStr _ -> mkCId "String"
+ LInt _ -> mkCId "Int"
+ LFlt _ -> mkCId "Float"
+ return (e0,TTyp [] (DTyp [] cat []))
+infExpr scope (ETyped e ty) = do
+ ty <- tcType scope ty
+ e <- tcExpr scope e (TTyp (scopeEnv scope) ty)
+ return (ETyped e ty,TTyp (scopeEnv scope) ty)
+infExpr scope (EImplArg e) = do
+ (e,tty) <- infExpr scope e
+ return (EImplArg e,tty)
+infExpr scope e = tcError (CannotInferType (scopeVars scope) e)
+
+tcArg scope e1 e2 delta ty0@(DTyp [] c es) = do
+ ty1 <- evalType (scopeSize scope) (TTyp delta ty0)
+ tcError (NotFunType (scopeVars scope) e1 ty1)
+tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = tcError (UnexpectedImplArg (scopeVars scope) e2)
+tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do
+ e2 <- tcExpr scope e2 (TTyp delta ty)
+ funs <- getFuns
+ if x == wildCId
+ then return (EApp e1 (EImplArg e2), delta,DTyp hs c es)
+ else return (EApp e1 (EImplArg e2),eval funs (scopeEnv scope) e2:delta,DTyp hs c es)
+tcArg scope e1 e2 delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = do
+ e2 <- tcExpr scope e2 (TTyp delta ty)
+ funs <- getFuns
+ if x == wildCId
+ then return (EApp e1 e2, delta,DTyp hs c es)
+ else return (EApp e1 e2,eval funs (scopeEnv scope) e2:delta,DTyp hs c es)
+tcArg scope e1 e2 delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do
+ i <- newMeta scope
+ if x == wildCId
+ then tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 delta (DTyp hs c es)
+ else tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 (VMeta i (scopeEnv scope) [] : delta) (DTyp hs c es)
+
+-----------------------------------------------------
+-- eqType
+-----------------------------------------------------
+
+eqType :: Scope -> Int -> MetaId -> TType -> TType -> TcM ()
+eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2 ty2@(DTyp hyps2 cat2 es2))
+ | cat1 == cat2 = do (k,delta1,delta2) <- eqHyps k delta1 hyps1 delta2 hyps2
+ sequence_ [eqExpr k delta1 e1 delta2 e2 | (e1,e2) <- zip es1 es2]
+ | otherwise = raiseTypeMatchError
+ where
+ raiseTypeMatchError = do ty1 <- evalType k tty1
+ ty2 <- evalType k tty2
+ e <- refineExpr (EMeta i0)
+ tcError (TypeMismatch (scopeVars scope) e ty1 ty2)
+
+ eqHyps :: Int -> Env -> [Hypo] -> Env -> [Hypo] -> TcM (Int,Env,Env)
+ eqHyps k delta1 [] delta2 [] =
+ return (k,delta1,delta2)
+ eqHyps k delta1 ((_,x,ty1) : h1s) delta2 ((_,y,ty2) : h2s) = do
+ eqType scope k i0 (TTyp delta1 ty1) (TTyp delta2 ty2)
+ if x == wildCId && y == wildCId
+ then eqHyps k delta1 h1s delta2 h2s
+ else if x /= wildCId && y /= wildCId
+ then eqHyps (k+1) ((VGen k []):delta1) h1s ((VGen k []):delta2) h2s
+ else raiseTypeMatchError
+ eqHyps k delta1 h1s delta2 h2s = raiseTypeMatchError
+
+ eqExpr :: Int -> Env -> Expr -> Env -> Expr -> TcM ()
+ eqExpr k env1 e1 env2 e2 = do
+ funs <- getFuns
+ eqValue k (eval funs env1 e1) (eval funs env2 e2)
+
+ eqValue :: Int -> Value -> Value -> TcM ()
+ eqValue k v1 v2 = do
+ v1 <- deRef v1
+ v2 <- deRef v2
+ eqValue' k v1 v2
+
+ deRef v@(VMeta i env vs) = do
+ mv <- getMeta i
+ funs <- getFuns
+ case mv of
+ MBound e -> deRef (apply funs env e vs)
+ MGuarded e _ x | x == 0 -> deRef (apply funs env e vs)
+ | otherwise -> return v
+ MUnbound _ _ -> return v
+ deRef v = return v
+
+ eqValue' k (VSusp i env vs1 c) v2 = addConstraint i0 i env vs1 (\v1 -> eqValue k (c v1) v2)
+ eqValue' k v1 (VSusp i env vs2 c) = addConstraint i0 i env vs2 (\v2 -> eqValue k v1 (c v2))
+ eqValue' k (VMeta i env1 vs1) (VMeta j env2 vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2
+ eqValue' k (VMeta i env1 vs1) v2 = do (MUnbound scopei cs) <- getMeta i
+ e2 <- mkLam i scopei env1 vs1 v2
+ setMeta i (MBound e2)
+ sequence_ [c e2 | c <- cs]
+ eqValue' k v1 (VMeta i env2 vs2) = do (MUnbound scopei cs) <- getMeta i
+ e1 <- mkLam i scopei env2 vs2 v1
+ setMeta i (MBound e1)
+ sequence_ [c e1 | c <- cs]
+ eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2
+ eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return ()
+ eqValue' k (VGen i vs1) (VGen j vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2
+ eqValue' k (VClosure env1 (EAbs _ x1 e1)) (VClosure env2 (EAbs _ x2 e2)) = let v = VGen k []
+ in eqExpr (k+1) (v:env1) e1 (v:env2) e2
+ eqValue' k v1 v2 = raiseTypeMatchError
+
+ mkLam i scope env vs0 v = do
+ let k = scopeSize scope
+ vs = reverse (take k env) ++ vs0
+ xs = nub [i | VGen i [] <- vs]
+ if length vs == length xs
+ then return ()
+ else raiseTypeMatchError
+ v <- occurCheck i k xs v
+ funs <- getFuns
+ return (addLam vs0 (value2expr funs (length xs) v))
+ where
+ addLam [] e = e
+ addLam (v:vs) e = EAbs Explicit var (addLam vs e)
+
+ var = mkCId "v"
+
+ occurCheck i0 k xs (VApp f vs) = do vs <- mapM (occurCheck i0 k xs) vs
+ return (VApp f vs)
+ occurCheck i0 k xs (VLit l) = return (VLit l)
+ occurCheck i0 k xs (VMeta i env vs) = do if i == i0
+ then raiseTypeMatchError
+ else return ()
+ mv <- getMeta i
+ funs <- getFuns
+ case mv of
+ MBound e -> occurCheck i0 k xs (apply funs env e vs)
+ MGuarded e _ _ -> occurCheck i0 k xs (apply funs env e vs)
+ MUnbound scopei _ | scopeSize scopei > k -> raiseTypeMatchError
+ | otherwise -> do vs <- mapM (occurCheck i0 k xs) vs
+ return (VMeta i env vs)
+ occurCheck i0 k xs (VSusp i env vs cnt) = do addConstraint i0 i env vs (\v -> occurCheck i0 k xs (cnt v) >> return ())
+ return (VSusp i env vs cnt)
+ occurCheck i0 k xs (VGen i vs) = case List.findIndex (==i) xs of
+ Just i -> do vs <- mapM (occurCheck i0 k xs) vs
+ return (VGen i vs)
+ Nothing -> raiseTypeMatchError
+ occurCheck i0 k xs (VClosure env e) = do env <- mapM (occurCheck i0 k xs) env
+ return (VClosure env e)
+
+
+-----------------------------------------------------------
+-- check for meta variables that still have to be resolved
+-----------------------------------------------------------
+
+checkResolvedMetaStore :: Scope -> Expr -> TcM ()
+checkResolvedMetaStore scope e = TcM (\abstr metaid ms ->
+ let xs = [i | (i,mv) <- IntMap.toList ms, not (isResolved mv)]
+ in if List.null xs
+ then Ok metaid ms ()
+ else Fail (UnresolvedMetaVars (scopeVars scope) e xs))
+ where
+ isResolved (MUnbound _ []) = True
+ isResolved (MGuarded _ _ _) = True
+ isResolved (MBound _) = True
+ isResolved _ = False
+
+-----------------------------------------------------
+-- evalType
+-----------------------------------------------------
+
+evalType :: Int -> TType -> TcM Type
+evalType k (TTyp delta ty) = do funs <- getFuns
+ refineType (evalTy funs k delta ty)
+ where
+ evalTy sig k delta (DTyp hyps cat es) =
+ let ((k1,delta1),hyps1) = mapAccumL (evalHypo sig) (k,delta) hyps
+ in DTyp hyps1 cat (List.map (normalForm sig k1 delta1) es)
+
+ evalHypo sig (k,delta) (b,x,ty) =
+ if x == wildCId
+ then ((k, delta),(b,x,evalTy sig k delta ty))
+ else ((k+1,(VGen k []):delta),(b,x,evalTy sig k delta ty))
+
+
+-----------------------------------------------------
+-- refinement
+-----------------------------------------------------
+
+refineExpr :: Expr -> TcM Expr
+refineExpr e = TcM (\abstr metaid ms -> Ok metaid ms (refineExpr_ ms e))
+
+refineExpr_ ms e = refine e
+ where
+ refine (EAbs b x e) = EAbs b x (refine e)
+ refine (EApp e1 e2) = EApp (refine e1) (refine e2)
+ refine (ELit l) = ELit l
+ refine (EMeta i) = case IntMap.lookup i ms of
+ Just (MBound e ) -> refine e
+ Just (MGuarded e _ _) -> refine e
+ _ -> EMeta i
+ refine (EFun f) = EFun f
+ refine (EVar i) = EVar i
+ refine (ETyped e ty) = ETyped (refine e) (refineType_ ms ty)
+ refine (EImplArg e) = EImplArg (refine e)
+
+refineType :: Type -> TcM Type
+refineType ty = TcM (\abstr metaid ms -> Ok metaid ms (refineType_ ms ty))
+
+refineType_ ms (DTyp hyps cat es) = DTyp [(b,x,refineType_ ms ty) | (b,x,ty) <- hyps] cat (List.map (refineExpr_ ms) es)
+
+value2expr sig i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr sig i) vs)
+value2expr sig i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr sig i) vs)
+value2expr sig i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr sig i) vs)
+value2expr sig i (VSusp j env vs k) = value2expr sig i (k (VGen j vs))
+value2expr sig i (VLit l) = ELit l
+value2expr sig i (VClosure env (EAbs b x e)) = EAbs b x (value2expr sig (i+1) (eval sig ((VGen i []):env) e))
diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs
new file mode 100644
index 000000000..429551f54
--- /dev/null
+++ b/src/runtime/haskell/PGF/VisualizeTree.hs
@@ -0,0 +1,353 @@
+----------------------------------------------------------------------
+-- |
+-- Module : VisualizeTree
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date:
+-- > CVS $Author:
+-- > CVS $Revision:
+--
+-- Print a graph of an abstract syntax tree in Graphviz DOT format
+-- Based on BB's VisualizeGrammar
+-- FIXME: change this to use GF.Visualization.Graphviz,
+-- instead of rolling its own.
+-----------------------------------------------------------------------------
+
+module PGF.VisualizeTree ( graphvizAbstractTree
+ , graphvizParseTree
+ , graphvizDependencyTree
+ , graphvizAlignment
+ , tree2mk
+ , getDepLabels
+ , PosText(..), readPosText
+ ) where
+
+import PGF.CId (CId,showCId,pCId,mkCId)
+import PGF.Data
+import PGF.Tree
+import PGF.Expr (showExpr)
+import PGF.Linearize
+import PGF.Macros (lookValCat)
+
+import qualified Data.Map as Map
+import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
+import Data.Char (isDigit)
+import qualified Text.ParserCombinators.ReadP as RP
+
+import Debug.Trace
+
+graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String
+graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree
+
+tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String]
+tree2graph pgf (funs,cats) = prf [] where
+ prf ps t = let (nod,lab) = prn ps t in
+ (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") :
+ case t of
+ Fun cid trees ->
+ [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
+ concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
+ Abs xs (Fun cid trees) ->
+ [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
+ concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
+ _ -> []
+ prn ps t = case t of
+ Fun cid _ ->
+ let
+ fun = if funs then showCId cid else ""
+ cat = if cats then prCat cid else ""
+ colon = if funs && cats then " : " else ""
+ lab = "\"" ++ fun ++ colon ++ cat ++ "\""
+ in (show(show (ps :: [Int])),lab)
+ Abs bs tree ->
+ let fun = case tree of
+ Fun cid _ -> Fun cid []
+ _ -> tree
+ in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"")
+ _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"")
+ pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];"
+ arr = " -- " -- if digr then " -> " else " -- "
+ prCat = showCId . lookValCat pgf
+ esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts
+
+prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
+ graph = if digr then "digraph" else "graph"
+
+
+-- replace each non-atomic constructor with mkC, where C is the val cat
+tree2mk :: PGF -> Expr -> String
+tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
+ t2m t = case t of
+ Fun cid [] -> t
+ Fun cid ts -> Fun (mk cid) (map t2m ts)
+ _ -> t
+ mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
+
+-- dependency trees from Linearize.linearizeMark
+
+graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
+graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
+ "malt" -> unlines (lin2dep format)
+ "malt_input" -> unlines (lin2dep format)
+ _ -> prGraph True (lin2dep format)
+
+ where
+
+ lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of
+ "malt" -> map (concat . intersperse "\t") wnodes
+ "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
+ _ -> prelude ++ nodes ++ links
+
+ ifd s = if debug then s else []
+
+ pot = readPosText $ head $ linearizesMark pgf lang exp
+ ---- use Just str if you have str to match against
+
+ prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
+
+ nodes = map mkNode nodeWords
+ mkNode (i,((_,p),ss)) =
+ node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
+ nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
+ ((Just f,p),w) <- wlins pot]
+
+ links = map mkLink thelinks
+ thelinks = [(word y, x, label tr y x) |
+ (_,((f,x),_)) <- tail nodeWords,
+ let y = dominant x]
+ mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
+ node = show . show
+
+ dominant x = case x of
+ [] -> x
+ _ | not (x == hx) -> hx
+ _ -> dominant (init x)
+ where
+ hx = headArg (init x) tr x
+
+ headArg x0 tr x = case (tr,x) of
+ (Fun f [],[_]) -> x0 ---- ??
+ (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
+ (Fun f ts,i:y) -> headArg x0 (ts !! i) y
+ _ -> x0 ----
+
+ label tr y x = case span (uncurry (==)) (zip y x) of
+ (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
+ _ -> "" ----
+
+ funAt tr x = case (tr,x) of
+ (Fun f _ ,[]) -> f
+ (Fun f ts,i:y) -> funAt (ts !! i) y
+ _ -> mkCId (prTree tr) ----
+
+ word x = if elem x sortedNodes then x else
+ let x' = headArg x tr (x ++[0]) in
+ if x' == x then [] else word x'
+
+ tr = expr2tree exp
+ sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
+
+ labels = maybe Map.empty id mlab
+ getHead i f = case Map.lookup f labels of
+ Just ls -> length $ takeWhile (/= "head") ls
+ _ -> i
+ getLabel i f = case Map.lookup f labels of
+ Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
+ _ -> showCId f ++ "#" ++ show i
+
+-- to generate CoNLL format for MaltParser
+ nodeMap :: Map.Map [Int] Int
+ nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
+
+ arcMap :: Map.Map [Int] ([Int],String)
+ arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
+
+ lookDomLab p = case Map.lookup p arcMap of
+ Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
+ _ -> (0,rootlabel)
+
+ wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
+ (i, ((fun,p),ws)) <- tail nodeWords,
+ let pos = showCId $ lookValCat pgf fun,
+ let morph = unspec,
+ let (dom,lab) = lookDomLab p
+ ]
+ maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
+ unspec = "_"
+ rootlabel = "ROOT"
+
+type Labels = Map.Map CId [String]
+
+getDepLabels :: [String] -> Labels
+getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
+
+
+-- parse trees from Linearize.linearizeMark
+---- nubrec and domins are quadratic, but could be (n log n)
+
+graphvizParseTree :: PGF -> CId -> Expr -> String
+graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where
+ linMark = head . linearizesMark pgf lang
+ ---- use Just str if you have str to match against
+
+lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
+
+ prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
+
+ nodeRecs = zip [0..]
+ (nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
+ nlins pts =
+ nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
+ concatMap nlins [ts | T _ ts <- pts]
+ leaves pt = [(p++[j],s) | (j,(p,s)) <-
+ zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]]
+
+ nubrec es rs = case rs of
+ r:rr -> let r' = filter (not . flip elem es) (nub r)
+ in r' : nubrec (r' ++ es) rr
+ _ -> rs
+
+ nodes = map mkStruct nodeRecs
+
+ mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
+ cat = showCId . lookValCat pgf
+ fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
+ struct i = "struct" ++ show i
+
+ links = map mkEdge domins
+ domins = nub [((i,x),(j,y)) |
+ (i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
+ x <- xs, y <- ys, dominates x y]
+ dominates (p,x) (q,y) = not (null q) && p == init q
+ mkEdge ((i,x),(j,y)) =
+ struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
+ struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
+
+ postext = readPosText s
+
+-- auxiliaries for graphviz syntax
+struct i = "struct" ++ show i
+mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
+uncommas = map (\c -> if c==',' then 'c' else c)
+tag s = "<" ++ s ++ ">"
+showp = init . tail . show
+mtag = tag . ('n':) . uncommas
+
+-- word alignments from Linearize.linearizesMark
+-- words are chunks like {[0,1,1,0] old}
+
+graphvizAlignment :: PGF -> Expr -> String
+graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
+ linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)]
+
+lin2graph :: [String] -> [String]
+lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links
+
+ where
+
+ prelude = ["rankdir=LR ;", "node [shape = record] ;"]
+
+ nlins :: [(Int,[((Int,String),String)])]
+ nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) |
+ (i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
+
+ unw = concat . intersperse "\\ " -- space escape in graphviz
+
+ nodes = map mkStruct nlins
+
+ mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
+
+ fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
+
+ links = nub $ concatMap mkEdge (init nlins)
+
+ mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
+ [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
+
+ edge i v w =
+ struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
+{-
+alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double)
+alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where
+ linsMark t =
+ [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)]
+
+ mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double)
+ mkStat =
+
+ mkAlign :: [String] -> [(String,String)]
+ mkAlign ss =
+
+ nlins :: [(Int,[((Int,String),String)])]
+ nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) |
+ (i,vs) <- zip [0..] (map (wlins . readPosText) ss)]
+
+ nodes = map mkStruct nlins
+
+ mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
+
+ fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
+
+ links = nub $ concatMap mkEdge (init nlins)
+
+ mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
+ [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
+
+ edge i v w =
+ struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
+-}
+
+wlins :: PosText -> [((Maybe CId,[Int]),[String])]
+wlins pt = case pt of
+ T p pts -> concatMap (lins p) pts
+ M ws -> if null ws then [] else [((Nothing,[]),ws)]
+ where
+ lins p pt = case pt of
+ T q pts -> concatMap (lins q) pts
+ M ws -> if null ws then [] else [(p,ws)]
+
+data PosText =
+ T (Maybe CId,[Int]) [PosText]
+ | M [String]
+ deriving Show
+
+readPosText :: String -> PosText
+readPosText = fst . head . (RP.readP_to_S pPosText) where
+ pPosText = do
+ RP.char '(' >> RP.skipSpaces
+ p <- pPos
+ RP.skipSpaces
+ ts <- RP.many pPosText
+ RP.char ')' >> RP.skipSpaces
+ return (T p ts)
+ RP.<++ do
+ ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
+ return (M ws)
+ pPos = do
+ fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
+ RP.<++ (return Nothing)
+ RP.char '[' >> RP.skipSpaces
+ is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
+ RP.char ']' >> RP.skipSpaces
+ RP.char ')' RP.<++ return ' '
+ return (fun,map read is)
+
+
+{-
+digraph{
+rankdir ="LR" ;
+node [shape = record] ;
+
+struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ;
+struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ;
+
+struct1:f0 -> struct2:f0 ;
+struct1:f1 -> struct2:f2 ;
+struct1:f2 -> struct2:f3 ;
+struct1:f3 -> struct2:f1 ;
+struct1:f0 -> struct2:f4 ;
+}
+-}
+