copy the types for BracketedString from the Haskell runtime to the Haskell bindings

author: krasimir <krasimir@chalmers.se> 2017-01-26 10:31:43 +0000
committer: krasimir <krasimir@chalmers.se> 2017-01-26 10:31:43 +0000
commit: 17163ae88163587f2c9615898a5789aaf3bee298 (patch)
tree: e0924b1b6f90b3696345925b4b01dc625a4c582b /src/runtime
parent: 6af632dd185b176222724cce47a49428f1301752 (diff)
1 files changed, 40 insertions, 0 deletions
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index 1f8d07c12..5d0484c1e 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -44,6 +44,8 @@ module PGF2 (-- * PGF
              ConcName,Concr,languages,
              -- ** Linearization
              linearize,linearizeAll,
+             FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
+
              alignWords,
              -- ** Parsing
              parse, parseWithHeuristics,
@@ -65,6 +67,7 @@ import Prelude hiding (fromEnum)
 import Control.Exception(Exception,throwIO)
 import Control.Monad(forM_)
 import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
+import Text.PrettyPrint
 import PGF2.Expr
 import PGF2.FFI
 
@@ -541,6 +544,43 @@ linearizeAll lang e = unsafePerformIO $
         else do gu_pool_free pl
                 throwIO (PGFError "The abstract tree cannot be linearized")
 
+type FId    = Int
+type LIndex = Int
+
+-- | BracketedString represents a sentence that is linearized
+-- as usual but we also want to retain the ''brackets'' that
+-- mark the beginning and the end of each constituent.
+data BracketedString
+  = Leaf String                                                                -- ^ this is the leaf i.e. a single token
+  | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString]
+                                                                               -- ^ this is a bracket. The 'CId' is the category of
+                                                                               -- the phrase. The 'FId' is an unique identifier for
+                                                                               -- every phrase in the sentence. For context-free grammars
+                                                                               -- i.e. without discontinuous constituents this identifier
+                                                                               -- is also unique for every bracket. When there are discontinuous 
+                                                                               -- phrases then the identifiers are unique for every phrase but
+                                                                               -- not for every bracket since the bracket represents a constituent.
+                                                                               -- The different constituents could still be distinguished by using
+                                                                               -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
+                                                                               -- then the constituent indices will be the same for all brackets
+                                                                               -- that represents the same constituent.
+                                                                               -- The second 'CId' is the name of the abstract function that generated
+                                                                               -- this phrase.
+
+-- | Renders the bracketed string as a string where 
+-- the brackets are shown as @(S ...)@ where
+-- @S@ is the category.
+showBracketedString :: BracketedString -> String
+showBracketedString = render . ppBracketedString
+
+ppBracketedString (Leaf t) = text t
+ppBracketedString (Bracket cat fid index _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
+
+-- | Extracts the sequence of tokens from the bracketed string
+flattenBracketedString :: BracketedString -> [String]
+flattenBracketedString (Leaf w)              = [w]
+flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
+
 alignWords :: Concr -> Expr -> [(String, [Int])]
 alignWords lang e = unsafePerformIO $
   withGuPool $ \pl ->
author	krasimir <krasimir@chalmers.se>	2017-01-26 10:31:43 +0000
committer	krasimir <krasimir@chalmers.se>	2017-01-26 10:31:43 +0000
commit	17163ae88163587f2c9615898a5789aaf3bee298 (patch)
tree	e0924b1b6f90b3696345925b4b01dc625a4c582b /src/runtime
parent	6af632dd185b176222724cce47a49428f1301752 (diff)