diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/c/pgf/expr.c | 106 | ||||
| -rw-r--r-- | src/runtime/c/pgf/expr.h | 3 | ||||
| -rw-r--r-- | src/runtime/c/pgf/printer.c | 14 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/CId.hs | 49 |
4 files changed, 144 insertions, 28 deletions
diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c index 071b9e693..50dcee119 100644 --- a/src/runtime/c/pgf/expr.c +++ b/src/runtime/c/pgf/expr.c @@ -1,5 +1,6 @@ #include "pgf.h" #include <gu/assert.h> +#include <gu/utf8.h> #include <ctype.h> #include <stdio.h> #include <stdlib.h> @@ -166,6 +167,45 @@ pgf_expr_parser_getc(PgfExprParser* parser) } } +static bool +pgf_is_ident_first(GuUCS ucs) +{ + return (ucs == '_') || + (ucs >= 'a' && ucs <= 'z') || + (ucs >= 'A' && ucs <= 'Z') || + (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215); +} + +static bool +pgf_is_ident_rest(GuUCS ucs) +{ + return (ucs == '_') || + (ucs == '\'') || + (ucs >= '0' && ucs <= '9') || + (ucs >= 'a' && ucs <= 'z') || + (ucs >= 'A' && ucs <= 'Z') || + (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215); +} + +static bool +pgf_is_normal_ident(PgfCId id) +{ + const uint8_t* p = (const uint8_t*) id; + GuUCS ucs = gu_utf8_decode(&p); + if (!pgf_is_ident_first(ucs)) + return false; + + for (;;) { + ucs = gu_utf8_decode(&p); + if (ucs == 0) + break; + if (!pgf_is_ident_rest(ucs)) + return false; + } + + return true; +} + static void pgf_expr_parser_token(PgfExprParser* parser) { @@ -227,20 +267,32 @@ pgf_expr_parser_token(PgfExprParser* parser) pgf_expr_parser_getc(parser); parser->token_tag = PGF_TOKEN_COLON; break; - case '_': + case '\'': pgf_expr_parser_getc(parser); - parser->token_tag = PGF_TOKEN_WILD; + + GuBuf* chars = gu_new_buf(char, parser->tmp_pool); + while (parser->ch != '\'' && parser->ch != EOF) { + if (parser->ch == '\\') { + pgf_expr_parser_getc(parser); + } + gu_buf_push(chars, char, parser->ch); + pgf_expr_parser_getc(parser); + } + if (parser->ch == '\'') { + pgf_expr_parser_getc(parser); + gu_buf_push(chars, char, 0); + parser->token_tag = PGF_TOKEN_IDENT; + parser->token_value = chars; + } break; default: { GuBuf* chars = gu_new_buf(char, parser->tmp_pool); - if (isalpha(parser->ch)) { - while (isalnum(parser->ch) || - parser->ch == '_' || - parser->ch == '\'') { + if (pgf_is_ident_first(parser->ch)) { + do { gu_buf_push(chars, char, parser->ch); pgf_expr_parser_getc(parser); - } + } while (pgf_is_ident_rest(parser->ch)); gu_buf_push(chars, char, 0); parser->token_tag = PGF_TOKEN_IDENT; parser->token_value = chars; @@ -268,7 +320,7 @@ pgf_expr_parser_token(PgfExprParser* parser) } } else if (parser->ch == '"') { pgf_expr_parser_getc(parser); - + while (parser->ch != '"' && parser->ch != EOF) { gu_buf_push(chars, char, parser->ch); pgf_expr_parser_getc(parser); @@ -925,6 +977,30 @@ pgf_expr_hash(GuHash h, PgfExpr e) } void +pgf_print_cid(PgfCId id, + GuOut* out, GuExn* err) +{ + if (pgf_is_normal_ident(id)) + gu_string_write(id, out, err); + else { + gu_putc('\'', out, err); + const uint8_t* p = (const uint8_t*) id; + for (;;) { + GuUCS ucs = gu_utf8_decode(&p); + if (ucs == 0) + break; + if (ucs == '\'') + gu_puts("\\\'", out, err); + else if (ucs == '\\') + gu_puts("\\\\", out, err); + else + gu_out_utf8(ucs, out, err); + } + gu_putc('\'', out, err); + } +} + +void pgf_print_literal(PgfLiteral lit, GuOut* out, GuExn* err) { @@ -973,7 +1049,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec, if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) { gu_putc('{', out, err); } - gu_string_write(abs->id, out, err); + pgf_print_cid(abs->id, out, err); if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) { gu_putc('}', out, err); } @@ -1028,7 +1104,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec, break; case PGF_EXPR_FUN: { PgfExprFun* fun = ei.data; - gu_string_write(fun->fun, out, err); + pgf_print_cid(fun->fun, out, err); break; } case PGF_EXPR_VAR: { @@ -1043,7 +1119,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec, if (c == NULL) { gu_printf(out, err, "#%d", evar->var); } else { - gu_string_write(c->name, out, err); + pgf_print_cid(c->name, out, err); } break; } @@ -1074,7 +1150,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec, { if (hypo->bind_type == PGF_BIND_TYPE_IMPLICIT) { gu_puts("({", out, err); - gu_string_write(hypo->cid, out, err); + pgf_print_cid(hypo->cid, out, err); gu_puts("} : ", out, err); pgf_print_type(hypo->type, ctxt, 0, out, err); gu_puts(")", out, err); @@ -1083,7 +1159,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec, if (strcmp(hypo->cid, "_") != 0) { gu_puts("(", out, err); - gu_string_write(hypo->cid, out, err); + pgf_print_cid(hypo->cid, out, err); gu_puts(" : ", out, err); pgf_print_type(hypo->type, ctxt, 0, out, err); gu_puts(")", out, err); @@ -1117,7 +1193,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec, gu_puts(" -> ", out, err); } - gu_string_write(type->cid, out, err); + pgf_print_cid(type->cid, out, err); for (size_t i = 0; i < type->n_exprs; i++) { gu_puts(" ", out, err); @@ -1143,7 +1219,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec, if (prec > 3) gu_putc(')', out, err); } else { - gu_string_write(type->cid, out, err); + pgf_print_cid(type->cid, out, err); } } diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h index dffe5ac27..2452765f5 100644 --- a/src/runtime/c/pgf/expr.h +++ b/src/runtime/c/pgf/expr.h @@ -177,6 +177,9 @@ struct PgfPrintContext { }; void +pgf_print_cid(PgfCId id, GuOut* out, GuExn* err); + +void pgf_print_literal(PgfLiteral lit, GuOut* out, GuExn* err); void diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 8b737266e..2417a3edd 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -16,7 +16,7 @@ pgf_print_flag(GuMapItor* fn, const void* key, void* value, GuOut *out = clo->out; gu_puts(" flag ", out, err); - gu_string_write(flag, out, err); + pgf_print_cid(flag, out, err); gu_puts(" = ", out, err); pgf_print_literal(lit, out, err); gu_puts(";\n", out, err); @@ -32,7 +32,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value, GuOut *out = clo->out; gu_puts(" cat ", out, err); - gu_string_write(name, out, err); + pgf_print_cid(name, out, err); PgfPrintContext* ctxt = NULL; size_t n_hypos = gu_seq_length(cat->context); @@ -61,7 +61,7 @@ pgf_print_absfun(GuMapItor* fn, const void* key, void* value, GuOut *out = clo->out; gu_puts((fun->defns == NULL) ? " data " : " fun ", out, err); - gu_string_write(name, out, err); + pgf_print_cid(name, out, err); gu_puts(" : ", out, err); pgf_print_type(fun->type, NULL, 0, out, err); gu_printf(out, err, " ; -- %f\n", fun->ep.prob); @@ -70,7 +70,7 @@ static void pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err) { gu_puts("abstract ", out, err); - gu_string_write(abstr->name, out, err); + pgf_print_cid(abstr->name, out, err); gu_puts(" {\n", out, err); PgfPrintFn clo1 = { { pgf_print_flag }, out }; @@ -205,7 +205,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences, if (cncfun->absfun != NULL) { gu_puts(" [", out, err); - gu_string_write(cncfun->absfun->name, out, err); + pgf_print_cid(cncfun->absfun->name, out, err); gu_puts("]", out, err); } @@ -311,7 +311,7 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value, GuOut *out = clo->out; gu_puts(" ", out, err); - gu_string_write(name, out, err); + pgf_print_cid(name, out, err); gu_puts(" :=\n", out, err); PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0); @@ -335,7 +335,7 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr, GuOut* out, GuExn* err) { gu_puts("concrete ", out, err); - gu_string_write(cncname, out, err); + pgf_print_cid(cncname, out, err); gu_puts(" {\n", out, err); PgfPrintFn clo1 = { { pgf_print_flag }, out }; diff --git a/src/runtime/haskell/PGF/CId.hs b/src/runtime/haskell/PGF/CId.hs index 6a20cb4f3..0594d9fc1 100644 --- a/src/runtime/haskell/PGF/CId.hs +++ b/src/runtime/haskell/PGF/CId.hs @@ -7,6 +7,7 @@ module PGF.CId (CId(..), import Control.Monad import qualified Data.ByteString.Char8 as BS +import qualified Data.ByteString.UTF8 as UTF8 import Data.Char import qualified Text.ParserCombinators.ReadP as RP import qualified Text.PrettyPrint as PP @@ -21,7 +22,7 @@ wildCId = CId (BS.singleton '_') -- | Creates a new identifier from 'String' mkCId :: String -> CId -mkCId s = CId (BS.pack s) +mkCId s = CId (UTF8.fromString s) bsCId = CId @@ -33,7 +34,18 @@ readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of -- | Renders the identifier as 'String' showCId :: CId -> String -showCId (CId x) = BS.unpack x +showCId (CId x) = + let raw = UTF8.toString x + in if isIdent raw + then raw + else "'" ++ concatMap escape raw ++ "'" + where + isIdent [] = False + isIdent (c:cs) = isIdentFirst c && all isIdentRest cs + + escape '\'' = "\\\'" + escape '\\' = "\\\\" + escape c = [c] instance Show CId where showsPrec _ = showString . showCId @@ -48,10 +60,35 @@ pCId = do s <- pIdent else return (mkCId s) pIdent :: RP.ReadP String -pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest) - where - isIdentFirst c = c == '_' || isLetter c - isIdentRest c = c == '_' || c == '\'' || isAlphaNum c +pIdent = + liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest) + `mplus` + do RP.char '\'' + cs <- RP.many1 insideChar + RP.char '\'' + return cs +-- where +insideChar = RP.readS_to_P $ \s -> + case s of + [] -> [] + ('\\':'\\':cs) -> [('\\',cs)] + ('\\':'\'':cs) -> [('\'',cs)] + ('\\':cs) -> [] + ('\'':cs) -> [] + (c:cs) -> [(c,cs)] + +isIdentFirst c = + (c == '_') || + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '\192' && c <= '\255' && c /= '\247' && c /= '\215') +isIdentRest c = + (c == '_') || + (c == '\'') || + (c >= '0' && c <= '9') || + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '\192' && c <= '\255' && c /= '\247' && c /= '\215') ppCId :: CId -> PP.Doc ppCId = PP.text . showCId |
