From 0095119ec09b394332a23ab7cc16c0009c3f162a Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Tue, 12 Nov 2013 09:54:57 +0000 Subject: added Predef.SOFT_BIND. This special token allows zero or more spaces between ordinary tokens. It is also used in the English RGL to attach the commas to the previous word. --- src/runtime/c/pgf/data.h | 1 + src/runtime/c/pgf/linearizer.c | 3 ++- src/runtime/c/pgf/parser.c | 30 ++++++++++++++++++++++++++++++ src/runtime/c/pgf/printer.c | 4 ++++ src/runtime/c/pgf/reader.c | 7 +++++++ src/runtime/haskell/PGF/Binary.hs | 6 ++++-- src/runtime/haskell/PGF/Data.hs | 1 + src/runtime/haskell/PGF/Macros.hs | 2 ++ src/runtime/haskell/PGF/Optimize.hs | 1 + src/runtime/haskell/PGF/Parse.hs | 8 +++++--- src/runtime/haskell/PGF/Printer.hs | 1 + 11 files changed, 58 insertions(+), 6 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 9bc73dd0a..131201132 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -167,6 +167,7 @@ typedef enum { PGF_SYMBOL_KS, PGF_SYMBOL_KP, PGF_SYMBOL_BIND, + PGF_SYMBOL_SOFT_BIND, PGF_SYMBOL_NE } PgfSymbolTag; diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 698bab939..732bce75a 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -723,7 +723,8 @@ pgf_lzr_linearize_symbols(PgfConcr* concr, PgfCncTreeApp* fapp, } break; } - case PGF_SYMBOL_BIND: { + case PGF_SYMBOL_BIND: + case PGF_SYMBOL_SOFT_BIND: { if ((*fnsp)->symbol_bind) { (*fnsp)->symbol_bind(fnsp); } diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 46cfe5614..66d77175d 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -145,6 +145,7 @@ pgf_prev_extern_sym(PgfSymbol sym) case PGF_SYMBOL_VAR: return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1)); case PGF_SYMBOL_BIND: + case PGF_SYMBOL_SOFT_BIND: return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1)); case PGF_SYMBOL_NE: return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1)); @@ -1137,6 +1138,10 @@ pgf_symbols_cmp(GuString* psent, size_t sent_len, BIND_TYPE* pbind, PgfSymbols* *pbind = BIND_HARD; break; } + case PGF_SYMBOL_SOFT_BIND: { + *pbind = BIND_SOFT; + break; + } case PGF_SYMBOL_NE: { return -2; } @@ -1635,6 +1640,31 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym) } break; } + case PGF_SYMBOL_SOFT_BIND: { + if (ps->before->start_offset == ps->before->end_offset) { + if (ps->before->needs_bind) { + PgfParseState* state = + pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD); + if (state != NULL) { + if (state->next == NULL) { + state->viterbi_prob = + item->inside_prob+item->conts->outside_prob; + } + + pgf_item_advance(item, ps->pool); + gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + } else { + pgf_item_free(ps, item); + } + } else { + pgf_item_free(ps, item); + } + } else { + pgf_item_advance(item, ps->pool); + gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item); + } + break; + } default: gu_impossible(); } diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 78c2b74db..8b737266e 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -280,6 +280,10 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) gu_puts("BIND", out, err); break; } + case PGF_SYMBOL_SOFT_BIND: { + gu_puts("SOFT_BIND", out, err); + break; + } default: gu_impossible(); } diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 477adc069..c40781e26 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -723,6 +723,13 @@ pgf_read_symbol(PgfReader* rdr) gu_return_on_exn(rdr->err, gu_null_variant); break; } + case PGF_SYMBOL_SOFT_BIND: { + gu_new_variant(PGF_SYMBOL_SOFT_BIND, + PgfSymbolBIND, + &sym, rdr->opool); + gu_return_on_exn(rdr->err, gu_null_variant); + break; + } default: pgf_read_tag_error(rdr); } diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 2debcf12d..bf30e4506 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -152,7 +152,8 @@ instance Binary Symbol where put (SymKS ts) = putWord8 3 >> put ts put (SymKP d vs) = putWord8 4 >> put (d,vs) put SymBIND = putWord8 5 - put SymNE = putWord8 6 + put SymSOFT_BIND = putWord8 6 + put SymNE = putWord8 7 get = do tag <- getWord8 case tag of 0 -> liftM2 SymCat get get @@ -161,7 +162,8 @@ instance Binary Symbol where 3 -> liftM SymKS get 4 -> liftM2 (\d vs -> SymKP d vs) get get 5 -> return SymBIND - 6 -> return SymNE + 6 -> return SymSOFT_BIND + 7 -> return SymNE _ -> decodingError instance Binary PArg where diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index f5797739f..3222867d2 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -62,6 +62,7 @@ data Symbol | SymKS Token | SymKP [Symbol] [([Symbol],[String])] | SymBIND -- the special BIND token + | SymSOFT_BIND -- the special SOFT_BIND token | SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime) deriving (Eq,Ord,Show) data Production diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index ce75b1c91..0e73180d5 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -160,6 +160,7 @@ data BracketedTokn | LeafKS Token | LeafNE | LeafBIND + | LeafSOFT_BIND | LeafKP [BracketedTokn] [([BracketedTokn],[String])] deriving Eq @@ -222,6 +223,7 @@ computeSeq filter seq args = concatMap compute seq compute (SymKS t) = [LeafKS t] compute SymNE = [LeafNE] compute SymBIND = [LeafKS "&+"] + compute SymSOFT_BIND = [] compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]] getArg d r diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index bfc12e097..ab831704f 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -228,6 +228,7 @@ splitLexicalRules cnc p_prods = [seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts]) seq2prefix (SymNE :syms) = TrieMap.empty seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]] + seq2prefix (SymSOFT_BIND :syms) = TrieMap.fromList [wf []] updateConcrete abs cnc = let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index 9c69940ef..3f5dc313a 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -309,10 +309,12 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha SymNE -> process flit ftok cnc items acc chart SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc in process flit ftok cnc items acc' chart + SymSOFT_BIND->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart SymKP syms vars - -> let to_tok (SymKS t) = [t] - to_tok SymBIND = ["&+"] - to_tok _ = [] + -> let to_tok (SymKS t) = [t] + to_tok SymBIND = ["&+"] + to_tok SymSOFT_BIND = [] + to_tok _ = [] !acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc (syms:[syms' | (syms',_) <- vars]) diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index d3a5ea1d9..4945667f4 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -92,6 +92,7 @@ ppSymbol (SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char ppSymbol (SymKS t) = doubleQuotes (text t) ppSymbol SymNE = text "nonExist" ppSymbol SymBIND = text "BIND" +ppSymbol SymSOFT_BIND = text "SOFT_BIND" ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts))) ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps) -- cgit v1.2.3