summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-11-22 13:30:18 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-11-22 13:30:18 +0000
commit8bcc70eac8af379ed3481039eb1bd5feea3cf195 (patch)
tree529a61351cf04adfdb40d008920c6d3719ce64c0 /src/runtime
parent1d2786f7da2c94bbba063137b13d639f552d5f7e (diff)
the GF syntax for identifiers is exteded with quoted forms, i.e. you could write for instance 'ab.c' and then everything between the quites is identifier. This includes Unicode characters and non-ASCII symbols. This is useful for automatically generated GF grammars.
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/pgf/expr.c106
-rw-r--r--src/runtime/c/pgf/expr.h3
-rw-r--r--src/runtime/c/pgf/printer.c14
-rw-r--r--src/runtime/haskell/PGF/CId.hs49
4 files changed, 144 insertions, 28 deletions
diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c
index 071b9e693..50dcee119 100644
--- a/src/runtime/c/pgf/expr.c
+++ b/src/runtime/c/pgf/expr.c
@@ -1,5 +1,6 @@
#include "pgf.h"
#include <gu/assert.h>
+#include <gu/utf8.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -166,6 +167,45 @@ pgf_expr_parser_getc(PgfExprParser* parser)
}
}
+static bool
+pgf_is_ident_first(GuUCS ucs)
+{
+ return (ucs == '_') ||
+ (ucs >= 'a' && ucs <= 'z') ||
+ (ucs >= 'A' && ucs <= 'Z') ||
+ (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
+}
+
+static bool
+pgf_is_ident_rest(GuUCS ucs)
+{
+ return (ucs == '_') ||
+ (ucs == '\'') ||
+ (ucs >= '0' && ucs <= '9') ||
+ (ucs >= 'a' && ucs <= 'z') ||
+ (ucs >= 'A' && ucs <= 'Z') ||
+ (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
+}
+
+static bool
+pgf_is_normal_ident(PgfCId id)
+{
+ const uint8_t* p = (const uint8_t*) id;
+ GuUCS ucs = gu_utf8_decode(&p);
+ if (!pgf_is_ident_first(ucs))
+ return false;
+
+ for (;;) {
+ ucs = gu_utf8_decode(&p);
+ if (ucs == 0)
+ break;
+ if (!pgf_is_ident_rest(ucs))
+ return false;
+ }
+
+ return true;
+}
+
static void
pgf_expr_parser_token(PgfExprParser* parser)
{
@@ -227,20 +267,32 @@ pgf_expr_parser_token(PgfExprParser* parser)
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_COLON;
break;
- case '_':
+ case '\'':
pgf_expr_parser_getc(parser);
- parser->token_tag = PGF_TOKEN_WILD;
+
+ GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
+ while (parser->ch != '\'' && parser->ch != EOF) {
+ if (parser->ch == '\\') {
+ pgf_expr_parser_getc(parser);
+ }
+ gu_buf_push(chars, char, parser->ch);
+ pgf_expr_parser_getc(parser);
+ }
+ if (parser->ch == '\'') {
+ pgf_expr_parser_getc(parser);
+ gu_buf_push(chars, char, 0);
+ parser->token_tag = PGF_TOKEN_IDENT;
+ parser->token_value = chars;
+ }
break;
default: {
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
- if (isalpha(parser->ch)) {
- while (isalnum(parser->ch) ||
- parser->ch == '_' ||
- parser->ch == '\'') {
+ if (pgf_is_ident_first(parser->ch)) {
+ do {
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
- }
+ } while (pgf_is_ident_rest(parser->ch));
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_IDENT;
parser->token_value = chars;
@@ -268,7 +320,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
}
} else if (parser->ch == '"') {
pgf_expr_parser_getc(parser);
-
+
while (parser->ch != '"' && parser->ch != EOF) {
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
@@ -925,6 +977,30 @@ pgf_expr_hash(GuHash h, PgfExpr e)
}
void
+pgf_print_cid(PgfCId id,
+ GuOut* out, GuExn* err)
+{
+ if (pgf_is_normal_ident(id))
+ gu_string_write(id, out, err);
+ else {
+ gu_putc('\'', out, err);
+ const uint8_t* p = (const uint8_t*) id;
+ for (;;) {
+ GuUCS ucs = gu_utf8_decode(&p);
+ if (ucs == 0)
+ break;
+ if (ucs == '\'')
+ gu_puts("\\\'", out, err);
+ else if (ucs == '\\')
+ gu_puts("\\\\", out, err);
+ else
+ gu_out_utf8(ucs, out, err);
+ }
+ gu_putc('\'', out, err);
+ }
+}
+
+void
pgf_print_literal(PgfLiteral lit,
GuOut* out, GuExn* err)
{
@@ -973,7 +1049,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('{', out, err);
}
- gu_string_write(abs->id, out, err);
+ pgf_print_cid(abs->id, out, err);
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('}', out, err);
}
@@ -1028,7 +1104,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
break;
case PGF_EXPR_FUN: {
PgfExprFun* fun = ei.data;
- gu_string_write(fun->fun, out, err);
+ pgf_print_cid(fun->fun, out, err);
break;
}
case PGF_EXPR_VAR: {
@@ -1043,7 +1119,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
if (c == NULL) {
gu_printf(out, err, "#%d", evar->var);
} else {
- gu_string_write(c->name, out, err);
+ pgf_print_cid(c->name, out, err);
}
break;
}
@@ -1074,7 +1150,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
{
if (hypo->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_puts("({", out, err);
- gu_string_write(hypo->cid, out, err);
+ pgf_print_cid(hypo->cid, out, err);
gu_puts("} : ", out, err);
pgf_print_type(hypo->type, ctxt, 0, out, err);
gu_puts(")", out, err);
@@ -1083,7 +1159,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
if (strcmp(hypo->cid, "_") != 0) {
gu_puts("(", out, err);
- gu_string_write(hypo->cid, out, err);
+ pgf_print_cid(hypo->cid, out, err);
gu_puts(" : ", out, err);
pgf_print_type(hypo->type, ctxt, 0, out, err);
gu_puts(")", out, err);
@@ -1117,7 +1193,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
gu_puts(" -> ", out, err);
}
- gu_string_write(type->cid, out, err);
+ pgf_print_cid(type->cid, out, err);
for (size_t i = 0; i < type->n_exprs; i++) {
gu_puts(" ", out, err);
@@ -1143,7 +1219,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
if (prec > 3) gu_putc(')', out, err);
} else {
- gu_string_write(type->cid, out, err);
+ pgf_print_cid(type->cid, out, err);
}
}
diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h
index dffe5ac27..2452765f5 100644
--- a/src/runtime/c/pgf/expr.h
+++ b/src/runtime/c/pgf/expr.h
@@ -177,6 +177,9 @@ struct PgfPrintContext {
};
void
+pgf_print_cid(PgfCId id, GuOut* out, GuExn* err);
+
+void
pgf_print_literal(PgfLiteral lit, GuOut* out, GuExn* err);
void
diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c
index 8b737266e..2417a3edd 100644
--- a/src/runtime/c/pgf/printer.c
+++ b/src/runtime/c/pgf/printer.c
@@ -16,7 +16,7 @@ pgf_print_flag(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" flag ", out, err);
- gu_string_write(flag, out, err);
+ pgf_print_cid(flag, out, err);
gu_puts(" = ", out, err);
pgf_print_literal(lit, out, err);
gu_puts(";\n", out, err);
@@ -32,7 +32,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" cat ", out, err);
- gu_string_write(name, out, err);
+ pgf_print_cid(name, out, err);
PgfPrintContext* ctxt = NULL;
size_t n_hypos = gu_seq_length(cat->context);
@@ -61,7 +61,7 @@ pgf_print_absfun(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts((fun->defns == NULL) ? " data " : " fun ", out, err);
- gu_string_write(name, out, err);
+ pgf_print_cid(name, out, err);
gu_puts(" : ", out, err);
pgf_print_type(fun->type, NULL, 0, out, err);
gu_printf(out, err, " ; -- %f\n", fun->ep.prob);
@@ -70,7 +70,7 @@ static void
pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
{
gu_puts("abstract ", out, err);
- gu_string_write(abstr->name, out, err);
+ pgf_print_cid(abstr->name, out, err);
gu_puts(" {\n", out, err);
PgfPrintFn clo1 = { { pgf_print_flag }, out };
@@ -205,7 +205,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
if (cncfun->absfun != NULL) {
gu_puts(" [", out, err);
- gu_string_write(cncfun->absfun->name, out, err);
+ pgf_print_cid(cncfun->absfun->name, out, err);
gu_puts("]", out, err);
}
@@ -311,7 +311,7 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" ", out, err);
- gu_string_write(name, out, err);
+ pgf_print_cid(name, out, err);
gu_puts(" :=\n", out, err);
PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0);
@@ -335,7 +335,7 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
GuOut* out, GuExn* err)
{
gu_puts("concrete ", out, err);
- gu_string_write(cncname, out, err);
+ pgf_print_cid(cncname, out, err);
gu_puts(" {\n", out, err);
PgfPrintFn clo1 = { { pgf_print_flag }, out };
diff --git a/src/runtime/haskell/PGF/CId.hs b/src/runtime/haskell/PGF/CId.hs
index 6a20cb4f3..0594d9fc1 100644
--- a/src/runtime/haskell/PGF/CId.hs
+++ b/src/runtime/haskell/PGF/CId.hs
@@ -7,6 +7,7 @@ module PGF.CId (CId(..),
import Control.Monad
import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString.UTF8 as UTF8
import Data.Char
import qualified Text.ParserCombinators.ReadP as RP
import qualified Text.PrettyPrint as PP
@@ -21,7 +22,7 @@ wildCId = CId (BS.singleton '_')
-- | Creates a new identifier from 'String'
mkCId :: String -> CId
-mkCId s = CId (BS.pack s)
+mkCId s = CId (UTF8.fromString s)
bsCId = CId
@@ -33,7 +34,18 @@ readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of
-- | Renders the identifier as 'String'
showCId :: CId -> String
-showCId (CId x) = BS.unpack x
+showCId (CId x) =
+ let raw = UTF8.toString x
+ in if isIdent raw
+ then raw
+ else "'" ++ concatMap escape raw ++ "'"
+ where
+ isIdent [] = False
+ isIdent (c:cs) = isIdentFirst c && all isIdentRest cs
+
+ escape '\'' = "\\\'"
+ escape '\\' = "\\\\"
+ escape c = [c]
instance Show CId where
showsPrec _ = showString . showCId
@@ -48,10 +60,35 @@ pCId = do s <- pIdent
else return (mkCId s)
pIdent :: RP.ReadP String
-pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
- where
- isIdentFirst c = c == '_' || isLetter c
- isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
+pIdent =
+ liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
+ `mplus`
+ do RP.char '\''
+ cs <- RP.many1 insideChar
+ RP.char '\''
+ return cs
+-- where
+insideChar = RP.readS_to_P $ \s ->
+ case s of
+ [] -> []
+ ('\\':'\\':cs) -> [('\\',cs)]
+ ('\\':'\'':cs) -> [('\'',cs)]
+ ('\\':cs) -> []
+ ('\'':cs) -> []
+ (c:cs) -> [(c,cs)]
+
+isIdentFirst c =
+ (c == '_') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '\192' && c <= '\255' && c /= '\247' && c /= '\215')
+isIdentRest c =
+ (c == '_') ||
+ (c == '\'') ||
+ (c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '\192' && c <= '\255' && c /= '\247' && c /= '\215')
ppCId :: CId -> PP.Doc
ppCId = PP.text . showCId