summaryrefslogtreecommitdiff
path: root/src/runtime/haskell-bind
diff options
context:
space:
mode:
authorKrasimir Angelov <kr.angelov@gmail.com>2017-09-06 12:38:42 +0200
committerKrasimir Angelov <kr.angelov@gmail.com>2017-09-06 12:38:42 +0200
commit15d014abb825837f0fd7c9e17d5907001135faaf (patch)
treebc569f465432042702dfaa240746b8c6db609588 /src/runtime/haskell-bind
parent18f2135785a71a1e93519a060d40b7ba523cf03b (diff)
the parser in the C runtime can now detect incomplete sentences just like the parser in the Haskell runtime. This is also reflected in all bindings.
Diffstat (limited to 'src/runtime/haskell-bind')
-rw-r--r--src/runtime/haskell-bind/PGF2.hsc56
-rw-r--r--src/runtime/haskell-bind/PGF2/FFI.hs15
-rw-r--r--src/runtime/haskell-bind/examples/pgf-shell.hs16
3 files changed, 62 insertions, 25 deletions
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index 8fb6e6ccb..0d65822f9 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -63,7 +63,7 @@ module PGF2 (-- * PGF
alignWords,
-- ** Parsing
- parse, parseWithHeuristics,
+ ParseOutput(..), parse, parseWithHeuristics,
-- ** Sentence Lookup
lookupSentence,
-- ** Generation
@@ -480,7 +480,15 @@ getAnalysis ref self c_lemma c_anal prob exn = do
anal <- peekUtf8CString c_anal
writeIORef ref ((lemma, anal, prob):ans)
-parse :: Concr -> Type -> String -> Either String [(Expr,Float)]
+-- | This data type encodes the different outcomes which you could get from the parser.
+data ParseOutput
+ = ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
+ -- The string is the token where the parser have failed.
+ | ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
+ -- The list should be non-empty.
+ | ParseIncomplete -- ^ The sentence is not complete.
+
+parse :: Concr -> Type -> String -> ParseOutput
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse
@@ -497,7 +505,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
-- the input sentence; the current offset in the sentence.
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
- -> Either String [(Expr,Float)]
+ -> ParseOutput
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
@@ -510,11 +518,19 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
- then do c_tok <- (#peek GuExn, data.data) exn
- tok <- peekUtf8CString c_tok
- gu_pool_free parsePl
- gu_pool_free exprPl
- return (Left tok)
+ then do c_err <- (#peek GuExn, data.data) exn
+ c_incomplete <- (#peek PgfParseError, incomplete) c_err
+ if (c_incomplete :: CInt) == 0
+ then do c_offset <- (#peek PgfParseError, offset) c_err
+ token_ptr <- (#peek PgfParseError, token_ptr) c_err
+ token_len <- (#peek PgfParseError, token_len) c_err
+ tok <- peekUtf8CStringLen token_ptr token_len
+ gu_pool_free parsePl
+ gu_pool_free exprPl
+ return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
+ else do gu_pool_free parsePl
+ gu_pool_free exprPl
+ return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
@@ -528,7 +544,7 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
- return (Right exprs)
+ return (ParseOk exprs)
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
mkCallbacksMap concr callbacks pool = do
@@ -595,7 +611,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse
-> Cat -- ^ the start category
-> String -- ^ the input sentence
-> Oracle
- -> Either String [(Expr,Float)]
+ -> ParseOutput
parseWithOracle lang cat sent (predict,complete,literal) =
unsafePerformIO $
do parsePl <- gu_new_pool
@@ -612,11 +628,19 @@ parseWithOracle lang cat sent (predict,complete,literal) =
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
- then do c_tok <- (#peek GuExn, data.data) exn
- tok <- peekUtf8CString c_tok
- gu_pool_free parsePl
- gu_pool_free exprPl
- return (Left tok)
+ then do c_err <- (#peek GuExn, data.data) exn
+ c_incomplete <- (#peek PgfParseError, incomplete) c_err
+ if (c_incomplete :: CInt) == 0
+ then do c_offset <- (#peek PgfParseError, offset) c_err
+ token_ptr <- (#peek PgfParseError, token_ptr) c_err
+ token_len <- (#peek PgfParseError, token_len) c_err
+ tok <- peekUtf8CStringLen token_ptr token_len
+ gu_pool_free parsePl
+ gu_pool_free exprPl
+ return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
+ else do gu_pool_free parsePl
+ gu_pool_free exprPl
+ return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
@@ -630,7 +654,7 @@ parseWithOracle lang cat sent (predict,complete,literal) =
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
- return (Right exprs)
+ return (ParseOk exprs)
where
oracleWrapper oracle catPtr lblPtr offset = do
cat <- peekUtf8CString catPtr
diff --git a/src/runtime/haskell-bind/PGF2/FFI.hs b/src/runtime/haskell-bind/PGF2/FFI.hs
index a47655d8d..3f30631d8 100644
--- a/src/runtime/haskell-bind/PGF2/FFI.hs
+++ b/src/runtime/haskell-bind/PGF2/FFI.hs
@@ -2,7 +2,7 @@
module PGF2.FFI where
-import Foreign ( alloca, poke )
+import Foreign ( alloca, peek, poke )
import Foreign.C
import Foreign.Ptr
import Foreign.ForeignPtr
@@ -116,6 +116,19 @@ peekUtf8CString ptr =
else do cs <- decode pptr
return (((toEnum . fromEnum) x) : cs)
+peekUtf8CStringLen :: CString -> CInt -> IO String
+peekUtf8CStringLen ptr len =
+ alloca $ \pptr ->
+ poke pptr ptr >> decode pptr (ptr `plusPtr` fromIntegral len)
+ where
+ decode pptr end = do
+ ptr <- peek pptr
+ if ptr >= end
+ then return []
+ else do x <- gu_utf8_decode pptr
+ cs <- decode pptr end
+ return (((toEnum . fromEnum) x) : cs)
+
newUtf8CString :: String -> Ptr GuPool -> IO CString
newUtf8CString s pool = do
-- An UTF8 character takes up to 6 bytes. We allocate enough
diff --git a/src/runtime/haskell-bind/examples/pgf-shell.hs b/src/runtime/haskell-bind/examples/pgf-shell.hs
index 722770822..05c991691 100644
--- a/src/runtime/haskell-bind/examples/pgf-shell.hs
+++ b/src/runtime/haskell-bind/examples/pgf-shell.hs
@@ -37,18 +37,18 @@ execute cmd =
P lang s -> do pgf <- gets fst
c <- getConcr' pgf lang
case parse c (startCat pgf) s of
- Left tok -> do put (pgf,[])
- putln ("Parse error: "++tok)
- Right ts -> do put (pgf,map show ts)
- pop
+ ParseFailed _ tok -> do put (pgf,[])
+ putln ("Parse error: "++tok)
+ ParseOk ts -> do put (pgf,map show ts)
+ pop
T from to s -> do pgf <- gets fst
cfrom <- getConcr' pgf from
cto <- getConcr' pgf to
case parse cfrom (startCat pgf) s of
- Left tok -> do put (pgf,[])
- putln ("Parse error: "++tok)
- Right ts -> do put (pgf,map (linearize cto.fst) ts)
- pop
+ ParseFailed _ tok -> do put (pgf,[])
+ putln ("Parse error: "++tok)
+ ParseOk ts -> do put (pgf,map (linearize cto.fst) ts)
+ pop
I path -> do pgf <- liftIO (readPGF path)
putln . unwords . M.keys $ languages pgf
put (pgf,[])