summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/pgf/expr.c106
-rw-r--r--src/runtime/c/pgf/expr.h3
-rw-r--r--src/runtime/c/pgf/printer.c14
-rw-r--r--src/runtime/haskell/PGF/CId.hs49
4 files changed, 144 insertions, 28 deletions
diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c
index 071b9e693..50dcee119 100644
--- a/src/runtime/c/pgf/expr.c
+++ b/src/runtime/c/pgf/expr.c
@@ -1,5 +1,6 @@
#include "pgf.h"
#include <gu/assert.h>
+#include <gu/utf8.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -166,6 +167,45 @@ pgf_expr_parser_getc(PgfExprParser* parser)
}
}
+static bool
+pgf_is_ident_first(GuUCS ucs)
+{
+ return (ucs == '_') ||
+ (ucs >= 'a' && ucs <= 'z') ||
+ (ucs >= 'A' && ucs <= 'Z') ||
+ (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
+}
+
+static bool
+pgf_is_ident_rest(GuUCS ucs)
+{
+ return (ucs == '_') ||
+ (ucs == '\'') ||
+ (ucs >= '0' && ucs <= '9') ||
+ (ucs >= 'a' && ucs <= 'z') ||
+ (ucs >= 'A' && ucs <= 'Z') ||
+ (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
+}
+
+static bool
+pgf_is_normal_ident(PgfCId id)
+{
+ const uint8_t* p = (const uint8_t*) id;
+ GuUCS ucs = gu_utf8_decode(&p);
+ if (!pgf_is_ident_first(ucs))
+ return false;
+
+ for (;;) {
+ ucs = gu_utf8_decode(&p);
+ if (ucs == 0)
+ break;
+ if (!pgf_is_ident_rest(ucs))
+ return false;
+ }
+
+ return true;
+}
+
static void
pgf_expr_parser_token(PgfExprParser* parser)
{
@@ -227,20 +267,32 @@ pgf_expr_parser_token(PgfExprParser* parser)
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_COLON;
break;
- case '_':
+ case '\'':
pgf_expr_parser_getc(parser);
- parser->token_tag = PGF_TOKEN_WILD;
+
+ GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
+ while (parser->ch != '\'' && parser->ch != EOF) {
+ if (parser->ch == '\\') {
+ pgf_expr_parser_getc(parser);
+ }
+ gu_buf_push(chars, char, parser->ch);
+ pgf_expr_parser_getc(parser);
+ }
+ if (parser->ch == '\'') {
+ pgf_expr_parser_getc(parser);
+ gu_buf_push(chars, char, 0);
+ parser->token_tag = PGF_TOKEN_IDENT;
+ parser->token_value = chars;
+ }
break;
default: {
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
- if (isalpha(parser->ch)) {
- while (isalnum(parser->ch) ||
- parser->ch == '_' ||
- parser->ch == '\'') {
+ if (pgf_is_ident_first(parser->ch)) {
+ do {
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
- }
+ } while (pgf_is_ident_rest(parser->ch));
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_IDENT;
parser->token_value = chars;
@@ -268,7 +320,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
}
} else if (parser->ch == '"') {
pgf_expr_parser_getc(parser);
-
+
while (parser->ch != '"' && parser->ch != EOF) {
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
@@ -925,6 +977,30 @@ pgf_expr_hash(GuHash h, PgfExpr e)
}
void
+pgf_print_cid(PgfCId id,
+ GuOut* out, GuExn* err)
+{
+ if (pgf_is_normal_ident(id))
+ gu_string_write(id, out, err);
+ else {
+ gu_putc('\'', out, err);
+ const uint8_t* p = (const uint8_t*) id;
+ for (;;) {
+ GuUCS ucs = gu_utf8_decode(&p);
+ if (ucs == 0)
+ break;
+ if (ucs == '\'')
+ gu_puts("\\\'", out, err);
+ else if (ucs == '\\')
+ gu_puts("\\\\", out, err);
+ else
+ gu_out_utf8(ucs, out, err);
+ }
+ gu_putc('\'', out, err);
+ }
+}
+
+void
pgf_print_literal(PgfLiteral lit,
GuOut* out, GuExn* err)
{
@@ -973,7 +1049,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('{', out, err);
}
- gu_string_write(abs->id, out, err);
+ pgf_print_cid(abs->id, out, err);
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('}', out, err);
}
@@ -1028,7 +1104,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
break;
case PGF_EXPR_FUN: {
PgfExprFun* fun = ei.data;
- gu_string_write(fun->fun, out, err);
+ pgf_print_cid(fun->fun, out, err);
break;
}
case PGF_EXPR_VAR: {
@@ -1043,7 +1119,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
if (c == NULL) {
gu_printf(out, err, "#%d", evar->var);
} else {
- gu_string_write(c->name, out, err);
+ pgf_print_cid(c->name, out, err);
}
break;
}
@@ -1074,7 +1150,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
{
if (hypo->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_puts("({", out, err);
- gu_string_write(hypo->cid, out, err);
+ pgf_print_cid(hypo->cid, out, err);
gu_puts("} : ", out, err);
pgf_print_type(hypo->type, ctxt, 0, out, err);
gu_puts(")", out, err);
@@ -1083,7 +1159,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
if (strcmp(hypo->cid, "_") != 0) {
gu_puts("(", out, err);
- gu_string_write(hypo->cid, out, err);
+ pgf_print_cid(hypo->cid, out, err);
gu_puts(" : ", out, err);
pgf_print_type(hypo->type, ctxt, 0, out, err);
gu_puts(")", out, err);
@@ -1117,7 +1193,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
gu_puts(" -> ", out, err);
}
- gu_string_write(type->cid, out, err);
+ pgf_print_cid(type->cid, out, err);
for (size_t i = 0; i < type->n_exprs; i++) {
gu_puts(" ", out, err);
@@ -1143,7 +1219,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
if (prec > 3) gu_putc(')', out, err);
} else {
- gu_string_write(type->cid, out, err);
+ pgf_print_cid(type->cid, out, err);
}
}
diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h
index dffe5ac27..2452765f5 100644
--- a/src/runtime/c/pgf/expr.h
+++ b/src/runtime/c/pgf/expr.h
@@ -177,6 +177,9 @@ struct PgfPrintContext {
};
void
+pgf_print_cid(PgfCId id, GuOut* out, GuExn* err);
+
+void
pgf_print_literal(PgfLiteral lit, GuOut* out, GuExn* err);
void
diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c
index 8b737266e..2417a3edd 100644
--- a/src/runtime/c/pgf/printer.c
+++ b/src/runtime/c/pgf/printer.c
@@ -16,7 +16,7 @@ pgf_print_flag(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" flag ", out, err);
- gu_string_write(flag, out, err);
+ pgf_print_cid(flag, out, err);
gu_puts(" = ", out, err);
pgf_print_literal(lit, out, err);
gu_puts(";\n", out, err);
@@ -32,7 +32,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" cat ", out, err);
- gu_string_write(name, out, err);
+ pgf_print_cid(name, out, err);
PgfPrintContext* ctxt = NULL;
size_t n_hypos = gu_seq_length(cat->context);
@@ -61,7 +61,7 @@ pgf_print_absfun(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts((fun->defns == NULL) ? " data " : " fun ", out, err);
- gu_string_write(name, out, err);
+ pgf_print_cid(name, out, err);
gu_puts(" : ", out, err);
pgf_print_type(fun->type, NULL, 0, out, err);
gu_printf(out, err, " ; -- %f\n", fun->ep.prob);
@@ -70,7 +70,7 @@ static void
pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
{
gu_puts("abstract ", out, err);
- gu_string_write(abstr->name, out, err);
+ pgf_print_cid(abstr->name, out, err);
gu_puts(" {\n", out, err);
PgfPrintFn clo1 = { { pgf_print_flag }, out };
@@ -205,7 +205,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
if (cncfun->absfun != NULL) {
gu_puts(" [", out, err);
- gu_string_write(cncfun->absfun->name, out, err);
+ pgf_print_cid(cncfun->absfun->name, out, err);
gu_puts("]", out, err);
}
@@ -311,7 +311,7 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" ", out, err);
- gu_string_write(name, out, err);
+ pgf_print_cid(name, out, err);
gu_puts(" :=\n", out, err);
PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0);
@@ -335,7 +335,7 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
GuOut* out, GuExn* err)
{
gu_puts("concrete ", out, err);
- gu_string_write(cncname, out, err);
+ pgf_print_cid(cncname, out, err);
gu_puts(" {\n", out, err);
PgfPrintFn clo1 = { { pgf_print_flag }, out };
diff --git a/src/runtime/haskell/PGF/CId.hs b/src/runtime/haskell/PGF/CId.hs
index 6a20cb4f3..0594d9fc1 100644
--- a/src/runtime/haskell/PGF/CId.hs
+++ b/src/runtime/haskell/PGF/CId.hs
@@ -7,6 +7,7 @@ module PGF.CId (CId(..),
import Control.Monad
import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString.UTF8 as UTF8
import Data.Char
import qualified Text.ParserCombinators.ReadP as RP
import qualified Text.PrettyPrint as PP
@@ -21,7 +22,7 @@ wildCId = CId (BS.singleton '_')
-- | Creates a new identifier from 'String'
mkCId :: String -> CId
-mkCId s = CId (BS.pack s)
+mkCId s = CId (UTF8.fromString s)
bsCId = CId
@@ -33,7 +34,18 @@ readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of
-- | Renders the identifier as 'String'
showCId :: CId -> String
-showCId (CId x) = BS.unpack x
+showCId (CId x) =
+ let raw = UTF8.toString x
+ in if isIdent raw
+ then raw
+ else "'" ++ concatMap escape raw ++ "'"
+ where
+ isIdent [] = False
+ isIdent (c:cs) = isIdentFirst c && all isIdentRest cs
+
+ escape '\'' = "\\\'"
+ escape '\\' = "\\\\"
+ escape c = [c]
instance Show CId where
showsPrec _ = showString . showCId
@@ -48,10 +60,35 @@ pCId = do s <- pIdent
else return (mkCId s)
pIdent :: RP.ReadP String
-pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
- where
- isIdentFirst c = c == '_' || isLetter c
- isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
+pIdent =
+ liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
+ `mplus`
+ do RP.char '\''
+ cs <- RP.many1 insideChar
+ RP.char '\''
+ return cs
+-- where
+insideChar = RP.readS_to_P $ \s ->
+ case s of
+ [] -> []
+ ('\\':'\\':cs) -> [('\\',cs)]
+ ('\\':'\'':cs) -> [('\'',cs)]
+ ('\\':cs) -> []
+ ('\'':cs) -> []
+ (c:cs) -> [(c,cs)]
+
+isIdentFirst c =
+ (c == '_') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '\192' && c <= '\255' && c /= '\247' && c /= '\215')
+isIdentRest c =
+ (c == '_') ||
+ (c == '\'') ||
+ (c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '\192' && c <= '\255' && c /= '\247' && c /= '\215')
ppCId :: CId -> PP.Doc
ppCId = PP.text . showCId