From 15d014abb825837f0fd7c9e17d5907001135faaf Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Wed, 6 Sep 2017 12:38:42 +0200 Subject: the parser in the C runtime can now detect incomplete sentences just like the parser in the Haskell runtime. This is also reflected in all bindings. --- src/runtime/haskell-bind/PGF2.hsc | 56 ++++++++++++++++++-------- src/runtime/haskell-bind/PGF2/FFI.hs | 15 ++++++- src/runtime/haskell-bind/examples/pgf-shell.hs | 16 ++++---- 3 files changed, 62 insertions(+), 25 deletions(-) (limited to 'src/runtime/haskell-bind') diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 8fb6e6ccb..0d65822f9 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -63,7 +63,7 @@ module PGF2 (-- * PGF alignWords, -- ** Parsing - parse, parseWithHeuristics, + ParseOutput(..), parse, parseWithHeuristics, -- ** Sentence Lookup lookupSentence, -- ** Generation @@ -480,7 +480,15 @@ getAnalysis ref self c_lemma c_anal prob exn = do anal <- peekUtf8CString c_anal writeIORef ref ((lemma, anal, prob):ans) -parse :: Concr -> Type -> String -> Either String [(Expr,Float)] +-- | This data type encodes the different outcomes which you could get from the parser. +data ParseOutput + = ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed. + -- The string is the token where the parser have failed. + | ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees. + -- The list should be non-empty. + | ParseIncomplete -- ^ The sentence is not complete. + +parse :: Concr -> Type -> String -> ParseOutput parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) [] parseWithHeuristics :: Concr -- ^ the language with which we parse @@ -497,7 +505,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse -- the input sentence; the current offset in the sentence. -- If a literal has been recognized then the output should -- be Just (expr,probability,end_offset) - -> Either String [(Expr,Float)] + -> ParseOutput parseWithHeuristics lang (Type ctype _) sent heuristic callbacks = unsafePerformIO $ do exprPl <- gu_new_pool @@ -510,11 +518,19 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks = if failed then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError if is_parse_error - then do c_tok <- (#peek GuExn, data.data) exn - tok <- peekUtf8CString c_tok - gu_pool_free parsePl - gu_pool_free exprPl - return (Left tok) + then do c_err <- (#peek GuExn, data.data) exn + c_incomplete <- (#peek PgfParseError, incomplete) c_err + if (c_incomplete :: CInt) == 0 + then do c_offset <- (#peek PgfParseError, offset) c_err + token_ptr <- (#peek PgfParseError, token_ptr) c_err + token_len <- (#peek PgfParseError, token_len) c_err + tok <- peekUtf8CStringLen token_ptr token_len + gu_pool_free parsePl + gu_pool_free exprPl + return (ParseFailed (fromIntegral (c_offset :: CInt)) tok) + else do gu_pool_free parsePl + gu_pool_free exprPl + return ParseIncomplete else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn if is_exn then do c_msg <- (#peek GuExn, data.data) exn @@ -528,7 +544,7 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks = else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl exprFPl <- newForeignPtr gu_pool_finalizer exprPl exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl) - return (Right exprs) + return (ParseOk exprs) mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap) mkCallbacksMap concr callbacks pool = do @@ -595,7 +611,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse -> Cat -- ^ the start category -> String -- ^ the input sentence -> Oracle - -> Either String [(Expr,Float)] + -> ParseOutput parseWithOracle lang cat sent (predict,complete,literal) = unsafePerformIO $ do parsePl <- gu_new_pool @@ -612,11 +628,19 @@ parseWithOracle lang cat sent (predict,complete,literal) = if failed then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError if is_parse_error - then do c_tok <- (#peek GuExn, data.data) exn - tok <- peekUtf8CString c_tok - gu_pool_free parsePl - gu_pool_free exprPl - return (Left tok) + then do c_err <- (#peek GuExn, data.data) exn + c_incomplete <- (#peek PgfParseError, incomplete) c_err + if (c_incomplete :: CInt) == 0 + then do c_offset <- (#peek PgfParseError, offset) c_err + token_ptr <- (#peek PgfParseError, token_ptr) c_err + token_len <- (#peek PgfParseError, token_len) c_err + tok <- peekUtf8CStringLen token_ptr token_len + gu_pool_free parsePl + gu_pool_free exprPl + return (ParseFailed (fromIntegral (c_offset :: CInt)) tok) + else do gu_pool_free parsePl + gu_pool_free exprPl + return ParseIncomplete else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn if is_exn then do c_msg <- (#peek GuExn, data.data) exn @@ -630,7 +654,7 @@ parseWithOracle lang cat sent (predict,complete,literal) = else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl exprFPl <- newForeignPtr gu_pool_finalizer exprPl exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl) - return (Right exprs) + return (ParseOk exprs) where oracleWrapper oracle catPtr lblPtr offset = do cat <- peekUtf8CString catPtr diff --git a/src/runtime/haskell-bind/PGF2/FFI.hs b/src/runtime/haskell-bind/PGF2/FFI.hs index a47655d8d..3f30631d8 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hs +++ b/src/runtime/haskell-bind/PGF2/FFI.hs @@ -2,7 +2,7 @@ module PGF2.FFI where -import Foreign ( alloca, poke ) +import Foreign ( alloca, peek, poke ) import Foreign.C import Foreign.Ptr import Foreign.ForeignPtr @@ -116,6 +116,19 @@ peekUtf8CString ptr = else do cs <- decode pptr return (((toEnum . fromEnum) x) : cs) +peekUtf8CStringLen :: CString -> CInt -> IO String +peekUtf8CStringLen ptr len = + alloca $ \pptr -> + poke pptr ptr >> decode pptr (ptr `plusPtr` fromIntegral len) + where + decode pptr end = do + ptr <- peek pptr + if ptr >= end + then return [] + else do x <- gu_utf8_decode pptr + cs <- decode pptr end + return (((toEnum . fromEnum) x) : cs) + newUtf8CString :: String -> Ptr GuPool -> IO CString newUtf8CString s pool = do -- An UTF8 character takes up to 6 bytes. We allocate enough diff --git a/src/runtime/haskell-bind/examples/pgf-shell.hs b/src/runtime/haskell-bind/examples/pgf-shell.hs index 722770822..05c991691 100644 --- a/src/runtime/haskell-bind/examples/pgf-shell.hs +++ b/src/runtime/haskell-bind/examples/pgf-shell.hs @@ -37,18 +37,18 @@ execute cmd = P lang s -> do pgf <- gets fst c <- getConcr' pgf lang case parse c (startCat pgf) s of - Left tok -> do put (pgf,[]) - putln ("Parse error: "++tok) - Right ts -> do put (pgf,map show ts) - pop + ParseFailed _ tok -> do put (pgf,[]) + putln ("Parse error: "++tok) + ParseOk ts -> do put (pgf,map show ts) + pop T from to s -> do pgf <- gets fst cfrom <- getConcr' pgf from cto <- getConcr' pgf to case parse cfrom (startCat pgf) s of - Left tok -> do put (pgf,[]) - putln ("Parse error: "++tok) - Right ts -> do put (pgf,map (linearize cto.fst) ts) - pop + ParseFailed _ tok -> do put (pgf,[]) + putln ("Parse error: "++tok) + ParseOk ts -> do put (pgf,map (linearize cto.fst) ts) + pop I path -> do pgf <- liftIO (readPGF path) putln . unwords . M.keys $ languages pgf put (pgf,[]) -- cgit v1.2.3