From 0095119ec09b394332a23ab7cc16c0009c3f162a Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Tue, 12 Nov 2013 09:54:57 +0000 Subject: added Predef.SOFT_BIND. This special token allows zero or more spaces between ordinary tokens. It is also used in the English RGL to attach the commas to the previous word. --- src/runtime/c/pgf/parser.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/runtime/c/pgf/parser.c') diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 46cfe5614..66d77175d 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -145,6 +145,7 @@ pgf_prev_extern_sym(PgfSymbol sym) case PGF_SYMBOL_VAR: return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1)); case PGF_SYMBOL_BIND: + case PGF_SYMBOL_SOFT_BIND: return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1)); case PGF_SYMBOL_NE: return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1)); @@ -1137,6 +1138,10 @@ pgf_symbols_cmp(GuString* psent, size_t sent_len, BIND_TYPE* pbind, PgfSymbols* *pbind = BIND_HARD; break; } + case PGF_SYMBOL_SOFT_BIND: { + *pbind = BIND_SOFT; + break; + } case PGF_SYMBOL_NE: { return -2; } @@ -1635,6 +1640,31 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym) } break; } + case PGF_SYMBOL_SOFT_BIND: { + if (ps->before->start_offset == ps->before->end_offset) { + if (ps->before->needs_bind) { + PgfParseState* state = + pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD); + if (state != NULL) { + if (state->next == NULL) { + state->viterbi_prob = + item->inside_prob+item->conts->outside_prob; + } + + pgf_item_advance(item, ps->pool); + gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + } else { + pgf_item_free(ps, item); + } + } else { + pgf_item_free(ps, item); + } + } else { + pgf_item_advance(item, ps->pool); + gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item); + } + break; + } default: gu_impossible(); } -- cgit v1.2.3