summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Commands2.hs10
-rw-r--r--src/runtime/c/pgf/parser.c37
-rw-r--r--src/runtime/c/pgf/pgf.h7
-rw-r--r--src/runtime/haskell-bind/PGF2.hsc56
-rw-r--r--src/runtime/haskell-bind/PGF2/FFI.hs15
-rw-r--r--src/runtime/haskell-bind/examples/pgf-shell.hs16
-rw-r--r--src/runtime/java/jni_utils.c9
-rw-r--r--src/runtime/java/jni_utils.h3
-rw-r--r--src/runtime/java/jpgf.c30
-rw-r--r--src/runtime/java/org/grammaticalframework/pgf/ParseError.java23
-rw-r--r--src/runtime/python/pypgf.c27
-rw-r--r--src/server/PGFService.hs12
12 files changed, 179 insertions, 66 deletions
diff --git a/src/compiler/GF/Command/Commands2.hs b/src/compiler/GF/Command/Commands2.hs
index c8e6fbff3..995d2d560 100644
--- a/src/compiler/GF/Command/Commands2.hs
+++ b/src/compiler/GF/Command/Commands2.hs
@@ -636,10 +636,12 @@ pgfCommands = Map.fromList [
cncs = optConcs env opts
parsed rs = Piped (Exprs ts,unlines msgs)
where
- ts = [hsExpr t|Right ts<-rs,(t,p)<-takeOptNum opts ts]
- msgs = concatMap (either err ok) rs
- err msg = ["Parse failed: "++msg]
- ok = map (PGF2.showExpr [] . fst).takeOptNum opts
+ ts = [hsExpr t|ParseOk ts<-rs,(t,p)<-takeOptNum opts ts]
+ msgs = concatMap mkMsg rs
+
+ mkMsg (ParseOk ts) = (map (PGF2.showExpr [] . fst).takeOptNum opts) ts
+ mkMsg (ParseFailed _ tok) = ["Parse failed: "++tok]
+ mkMsg (ParseIncomplete) = ["The sentence is incomplete"]
optLins env opts ts = case opts of
_ | isOpt "groups" opts ->
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index ecfb7d2ea..2cae01db6 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -2139,30 +2139,37 @@ pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
*(PgfExprProb**)to = pgf_parse_result_next(ps);
}
-static GuString
-pgf_parsing_last_token(PgfParsing* ps, GuPool* pool)
+static PgfParseError*
+pgf_parsing_new_exception(PgfParsing* ps, GuPool* pool)
{
- if (ps->before == NULL)
- return "";
+ const uint8_t* p = (uint8_t*) ps->sentence;
+ const uint8_t* end = p + (ps->before ? ps->before->end_offset : 0);
- const uint8_t* start = (uint8_t*) ps->sentence;
- const uint8_t* end = (uint8_t*) ps->sentence + ps->before->end_offset;
+ PgfParseError* err = gu_new(PgfParseError, pool);
+ err->incomplete= (*end == 0);
+ err->offset = 0;
+ err->token_ptr = (char*) p;
- const uint8_t* p = start;
while (p < end) {
if (gu_ucs_is_space(gu_utf8_decode(&p))) {
- start = p;
+ err->token_ptr = (char*) p;
}
+ err->offset++;
+ }
+
+ if (err->incomplete) {
+ err->token_ptr = NULL;
+ err->token_len = 0;
+ return err;
}
while (*p && !gu_ucs_is_space(gu_utf8_decode(&p))) {
end = p;
}
- char* tok = gu_malloc(pool, end-start+1);
- memcpy(tok, start, (end-start));
- tok[end-start] = 0;
- return tok;
+ err->token_len = ((char*)end)-err->token_ptr;
+
+ return err;
}
PGF_API GuEnum*
@@ -2204,7 +2211,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
while (gu_buf_length(ps->expr_queue) == 0) {
if (!pgf_parsing_proceed(ps)) {
GuExnData* exn = gu_raise(err, PgfParseError);
- exn->data = (void*) pgf_parsing_last_token(ps, exn->pool);
+ exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
return NULL;
}
@@ -2249,7 +2256,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
while (gu_buf_length(ps->expr_queue) == 0) {
if (!pgf_parsing_proceed(ps)) {
GuExnData* exn = gu_raise(err, PgfParseError);
- exn->data = (void*) pgf_parsing_last_token(ps, exn->pool);
+ exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
return NULL;
}
@@ -2312,7 +2319,7 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
while (ps->before->end_offset < len) {
if (!pgf_parsing_proceed(ps)) {
GuExnData* exn = gu_raise(err, PgfParseError);
- exn->data = (void*) pgf_parsing_last_token(ps, exn->pool);
+ exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
return NULL;
}
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index 632a1d332..e6c1c70b8 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -122,6 +122,13 @@ PGF_API_DECL PgfExprEnum*
pgf_generate_all(PgfPGF* pgf, PgfType* ty,
GuExn* err, GuPool* pool, GuPool* out_pool);
+typedef struct {
+ int incomplete; // equal to !=0 if the sentence is incomplete, 0 otherwise
+ size_t offset;
+ const char* token_ptr;
+ size_t token_len;
+} PgfParseError;
+
PGF_API_DECL PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
GuExn* err, GuPool* pool, GuPool* out_pool);
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index 8fb6e6ccb..0d65822f9 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -63,7 +63,7 @@ module PGF2 (-- * PGF
alignWords,
-- ** Parsing
- parse, parseWithHeuristics,
+ ParseOutput(..), parse, parseWithHeuristics,
-- ** Sentence Lookup
lookupSentence,
-- ** Generation
@@ -480,7 +480,15 @@ getAnalysis ref self c_lemma c_anal prob exn = do
anal <- peekUtf8CString c_anal
writeIORef ref ((lemma, anal, prob):ans)
-parse :: Concr -> Type -> String -> Either String [(Expr,Float)]
+-- | This data type encodes the different outcomes which you could get from the parser.
+data ParseOutput
+ = ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
+ -- The string is the token where the parser have failed.
+ | ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
+ -- The list should be non-empty.
+ | ParseIncomplete -- ^ The sentence is not complete.
+
+parse :: Concr -> Type -> String -> ParseOutput
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse
@@ -497,7 +505,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
-- the input sentence; the current offset in the sentence.
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
- -> Either String [(Expr,Float)]
+ -> ParseOutput
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
@@ -510,11 +518,19 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
- then do c_tok <- (#peek GuExn, data.data) exn
- tok <- peekUtf8CString c_tok
- gu_pool_free parsePl
- gu_pool_free exprPl
- return (Left tok)
+ then do c_err <- (#peek GuExn, data.data) exn
+ c_incomplete <- (#peek PgfParseError, incomplete) c_err
+ if (c_incomplete :: CInt) == 0
+ then do c_offset <- (#peek PgfParseError, offset) c_err
+ token_ptr <- (#peek PgfParseError, token_ptr) c_err
+ token_len <- (#peek PgfParseError, token_len) c_err
+ tok <- peekUtf8CStringLen token_ptr token_len
+ gu_pool_free parsePl
+ gu_pool_free exprPl
+ return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
+ else do gu_pool_free parsePl
+ gu_pool_free exprPl
+ return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
@@ -528,7 +544,7 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
- return (Right exprs)
+ return (ParseOk exprs)
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
mkCallbacksMap concr callbacks pool = do
@@ -595,7 +611,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse
-> Cat -- ^ the start category
-> String -- ^ the input sentence
-> Oracle
- -> Either String [(Expr,Float)]
+ -> ParseOutput
parseWithOracle lang cat sent (predict,complete,literal) =
unsafePerformIO $
do parsePl <- gu_new_pool
@@ -612,11 +628,19 @@ parseWithOracle lang cat sent (predict,complete,literal) =
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
- then do c_tok <- (#peek GuExn, data.data) exn
- tok <- peekUtf8CString c_tok
- gu_pool_free parsePl
- gu_pool_free exprPl
- return (Left tok)
+ then do c_err <- (#peek GuExn, data.data) exn
+ c_incomplete <- (#peek PgfParseError, incomplete) c_err
+ if (c_incomplete :: CInt) == 0
+ then do c_offset <- (#peek PgfParseError, offset) c_err
+ token_ptr <- (#peek PgfParseError, token_ptr) c_err
+ token_len <- (#peek PgfParseError, token_len) c_err
+ tok <- peekUtf8CStringLen token_ptr token_len
+ gu_pool_free parsePl
+ gu_pool_free exprPl
+ return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
+ else do gu_pool_free parsePl
+ gu_pool_free exprPl
+ return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
@@ -630,7 +654,7 @@ parseWithOracle lang cat sent (predict,complete,literal) =
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
- return (Right exprs)
+ return (ParseOk exprs)
where
oracleWrapper oracle catPtr lblPtr offset = do
cat <- peekUtf8CString catPtr
diff --git a/src/runtime/haskell-bind/PGF2/FFI.hs b/src/runtime/haskell-bind/PGF2/FFI.hs
index a47655d8d..3f30631d8 100644
--- a/src/runtime/haskell-bind/PGF2/FFI.hs
+++ b/src/runtime/haskell-bind/PGF2/FFI.hs
@@ -2,7 +2,7 @@
module PGF2.FFI where
-import Foreign ( alloca, poke )
+import Foreign ( alloca, peek, poke )
import Foreign.C
import Foreign.Ptr
import Foreign.ForeignPtr
@@ -116,6 +116,19 @@ peekUtf8CString ptr =
else do cs <- decode pptr
return (((toEnum . fromEnum) x) : cs)
+peekUtf8CStringLen :: CString -> CInt -> IO String
+peekUtf8CStringLen ptr len =
+ alloca $ \pptr ->
+ poke pptr ptr >> decode pptr (ptr `plusPtr` fromIntegral len)
+ where
+ decode pptr end = do
+ ptr <- peek pptr
+ if ptr >= end
+ then return []
+ else do x <- gu_utf8_decode pptr
+ cs <- decode pptr end
+ return (((toEnum . fromEnum) x) : cs)
+
newUtf8CString :: String -> Ptr GuPool -> IO CString
newUtf8CString s pool = do
-- An UTF8 character takes up to 6 bytes. We allocate enough
diff --git a/src/runtime/haskell-bind/examples/pgf-shell.hs b/src/runtime/haskell-bind/examples/pgf-shell.hs
index 722770822..05c991691 100644
--- a/src/runtime/haskell-bind/examples/pgf-shell.hs
+++ b/src/runtime/haskell-bind/examples/pgf-shell.hs
@@ -37,18 +37,18 @@ execute cmd =
P lang s -> do pgf <- gets fst
c <- getConcr' pgf lang
case parse c (startCat pgf) s of
- Left tok -> do put (pgf,[])
- putln ("Parse error: "++tok)
- Right ts -> do put (pgf,map show ts)
- pop
+ ParseFailed _ tok -> do put (pgf,[])
+ putln ("Parse error: "++tok)
+ ParseOk ts -> do put (pgf,map show ts)
+ pop
T from to s -> do pgf <- gets fst
cfrom <- getConcr' pgf from
cto <- getConcr' pgf to
case parse cfrom (startCat pgf) s of
- Left tok -> do put (pgf,[])
- putln ("Parse error: "++tok)
- Right ts -> do put (pgf,map (linearize cto.fst) ts)
- pop
+ ParseFailed _ tok -> do put (pgf,[])
+ putln ("Parse error: "++tok)
+ ParseOk ts -> do put (pgf,map (linearize cto.fst) ts)
+ pop
I path -> do pgf <- liftIO (readPGF path)
putln . unwords . M.keys $ languages pgf
put (pgf,[])
diff --git a/src/runtime/java/jni_utils.c b/src/runtime/java/jni_utils.c
index 59c4a7e54..f897b8372 100644
--- a/src/runtime/java/jni_utils.c
+++ b/src/runtime/java/jni_utils.c
@@ -34,10 +34,8 @@ gu2j_string(JNIEnv *env, GuString s) {
}
JPGF_INTERNAL jstring
-gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf) {
- const char* s = gu_string_buf_data(sbuf);
+gu2j_string_len(JNIEnv *env, const char* s, size_t len) {
const char* utf8 = s;
- size_t len = gu_string_buf_length(sbuf);
jchar* utf16 = alloca(len*sizeof(jchar));
jchar* dst = utf16;
@@ -56,6 +54,11 @@ gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf) {
return (*env)->NewString(env, utf16, dst-utf16);
}
+JPGF_INTERNAL jstring
+gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf) {
+ return gu2j_string_len(env, gu_string_buf_data(sbuf), gu_string_buf_length(sbuf));
+}
+
JPGF_INTERNAL GuString
j2gu_string(JNIEnv *env, jstring s, GuPool* pool) {
GuString str = (*env)->GetStringUTFChars(env, s, 0);
diff --git a/src/runtime/java/jni_utils.h b/src/runtime/java/jni_utils.h
index f2d050092..e45f5b7bd 100644
--- a/src/runtime/java/jni_utils.h
+++ b/src/runtime/java/jni_utils.h
@@ -21,6 +21,9 @@ JPGF_INTERNAL_DECL jstring
gu2j_string(JNIEnv *env, GuString s);
JPGF_INTERNAL_DECL jstring
+gu2j_string_len(JNIEnv *env, const char* s, size_t len);
+
+JPGF_INTERNAL_DECL jstring
gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf);
JPGF_INTERNAL_DECL GuString
diff --git a/src/runtime/java/jpgf.c b/src/runtime/java/jpgf.c
index db662f5c2..9f5a1a417 100644
--- a/src/runtime/java/jpgf.c
+++ b/src/runtime/java/jpgf.c
@@ -591,6 +591,30 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba
j2gu_string(env, jcat, pool), &callback->callback);
}
+static void
+throw_parse_error(JNIEnv *env, PgfParseError* err)
+{
+ jstring jtoken;
+ if (err->incomplete)
+ jtoken = NULL;
+ else {
+ jtoken = gu2j_string_len(env, err->token_ptr, err->token_len);
+ if (!jtoken)
+ return;
+ }
+
+ jclass exception_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/ParseError");
+ if (!exception_class)
+ return;
+ jmethodID constrId = (*env)->GetMethodID(env, exception_class, "<init>", "(Ljava/lang/String;IZ)V");
+ if (!constrId)
+ return;
+ jobject exception = (*env)->NewObject(env, exception_class, constrId, jtoken, err->offset, err->incomplete);
+ if (!exception)
+ return;
+ (*env)->Throw(env, exception);
+}
+
JNIEXPORT jobject JNICALL
Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics
(JNIEnv* env, jclass clazz, jobject jconcr, jstring jstartCat, jstring js, jdouble heuristics, jlong callbacksRef, jobject jpool)
@@ -615,8 +639,7 @@ Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics
GuString msg = (GuString) gu_exn_caught_data(parse_err);
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", msg);
} else if (gu_exn_caught(parse_err, PgfParseError)) {
- GuString tok = (GuString) gu_exn_caught_data(parse_err);
- throw_string_exception(env, "org/grammaticalframework/pgf/ParseError", tok);
+ throw_parse_error(env, (PgfParseError*) gu_exn_caught_data(parse_err));
}
gu_pool_free(out_pool);
@@ -656,8 +679,7 @@ Java_org_grammaticalframework_pgf_Completer_complete(JNIEnv* env, jclass clazz,
GuString msg = (GuString) gu_exn_caught_data(parse_err);
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", msg);
} else if (gu_exn_caught(parse_err, PgfParseError)) {
- GuString tok = (GuString) gu_exn_caught_data(parse_err);
- throw_string_exception(env, "org/grammaticalframework/pgf/ParseError", tok);
+ throw_parse_error(env, (PgfParseError*) gu_exn_caught_data(parse_err));
}
gu_pool_free(pool);
diff --git a/src/runtime/java/org/grammaticalframework/pgf/ParseError.java b/src/runtime/java/org/grammaticalframework/pgf/ParseError.java
index 7fd332708..8b3f51ae2 100644
--- a/src/runtime/java/org/grammaticalframework/pgf/ParseError.java
+++ b/src/runtime/java/org/grammaticalframework/pgf/ParseError.java
@@ -4,11 +4,26 @@ package org.grammaticalframework.pgf;
public class ParseError extends Exception {
private static final long serialVersionUID = -6086991674218306569L;
- public ParseError(String token) {
- super(token);
+ private String token;
+ private int offset;
+ private boolean incomplete;
+
+ public ParseError(String token, int offset, boolean incomplete) {
+ super(incomplete ? "The sentence is incomplete" : "Unexpected token: \""+token+"\"");
+ this.token = token;
+ this.offset = offset;
+ this.incomplete = incomplete;
}
-
+
public String getToken() {
- return getMessage();
+ return token;
+ }
+
+ public int getOffset() {
+ return offset;
+ }
+
+ public boolean isIncomplete() {
+ return incomplete;
}
}
diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c
index 7da62e453..97718103b 100644
--- a/src/runtime/python/pypgf.c
+++ b/src/runtime/python/pypgf.c
@@ -1545,13 +1545,28 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
GuString msg = (GuString) gu_exn_caught_data(parse_err);
PyErr_SetString(PGFError, msg);
} else if (gu_exn_caught(parse_err, PgfParseError)) {
- GuString tok = (GuString) gu_exn_caught_data(parse_err);
- PyObject* py_tok = PyString_FromString(tok);
- PyObject_SetAttrString(ParseError, "token", py_tok);
- PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
- Py_DECREF(py_tok);
+ PgfParseError* err = (PgfParseError*) gu_exn_caught_data(parse_err);
+ PyObject* py_offset = PyInt_FromLong(err->offset);
+ if (err->incomplete) {
+ PyObject_SetAttrString(ParseError, "incomplete", Py_True);
+ PyObject_SetAttrString(ParseError, "offset", py_offset);
+ PyErr_Format(ParseError, "The sentence is incomplete");
+ } else {
+ PyObject* py_tok = PyString_FromStringAndSize(err->token_ptr,
+ err->token_len);
+ PyObject_SetAttrString(ParseError, "incomplete", Py_False);
+ PyObject_SetAttrString(ParseError, "offset", py_offset);
+ PyObject_SetAttrString(ParseError, "token", py_tok);
+#if PY_MAJOR_VERSION >= 3
+ PyErr_Format(ParseError, "Unexpected token: \"%U\"", py_tok);
+#else
+ PyErr_Format(ParseError, "Unexpected token: \"%s\"", PyString_AsString(py_tok));
+#endif
+ Py_DECREF(py_tok);
+ }
+ Py_DECREF(py_offset);
}
-
+
Py_DECREF(pyres);
pyres = NULL;
}
diff --git a/src/server/PGFService.hs b/src/server/PGFService.hs
index b1020b4b8..244f2b96c 100644
--- a/src/server/PGFService.hs
+++ b/src/server/PGFService.hs
@@ -191,10 +191,11 @@ cpgfMain qsem command (t,(pgf,pc)) =
-- Without caching parse results:
parse' start mlimit ((from,concr),input) =
- return $ maybe id take mlimit . drop start # cparse
+ case C.parseWithHeuristics concr cat input (-1) callbacks of
+ C.ParseOk ts -> return (Right (maybe id take mlimit (drop start ts)))
+ C.ParseFailed _ tok -> return (Left tok)
+ C.ParseIncomplete -> return (Left "")
where
- --cparse = C.parse concr cat input
- cparse = C.parseWithHeuristics concr cat input (-1) callbacks
callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks
cb fs = [(cat,f pgf (from,concr) input)|(cat,f)<-fs]
{-
@@ -277,8 +278,9 @@ cpgfMain qsem command (t,(pgf,pc)) =
| isUpper c -> toLower c : cs
s -> s
- parse1 = either (const Nothing) (fmap fst . listToMaybe) .
- C.parse concr cat
+ parse1 s = case C.parse concr cat s of
+ C.ParseOk ((t,_):ts) -> Just t
+ _ -> Nothing
morph w = listToMaybe
[t | (f,a,p)<-C.lookupMorpho concr w,
t<-maybeToList (C.readExpr f)]