From 042243f08a321cd8ed5918ba94e83f22a8552adb Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Wed, 30 Oct 2013 12:53:36 +0000 Subject: added the linref construction in GF. The PGF version number is now bumped --- src/runtime/c/pgf/data.h | 1 + src/runtime/c/pgf/parser.c | 2 ++ src/runtime/c/pgf/printer.c | 34 +++++++++++++++++++++++++--- src/runtime/c/pgf/reader.c | 46 +++++++++++++++++++++++++++++++++----- src/runtime/haskell/PGF.hs | 9 +++++++- src/runtime/haskell/PGF/Binary.hs | 7 ++++-- src/runtime/haskell/PGF/Data.hs | 1 + src/runtime/haskell/PGF/Printer.hs | 6 +++-- 8 files changed, 93 insertions(+), 13 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index ea932d111..5b0401764 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -299,6 +299,7 @@ typedef struct { struct PgfCCat { PgfCncCat* cnccat; PgfCncFuns* lindefs; + PgfCncFuns* linrefs; size_t n_synprods; PgfProductionSeq* prods; float viterbi_prob; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 644a0c5d9..0b8fe59dc 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -611,6 +611,8 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state, { PgfCCat* cat = gu_new_flex(ps->pool, PgfCCat, fin, 1); cat->cnccat = conts->ccat->cnccat; + cat->lindefs = conts->ccat->lindefs; + cat->linrefs = conts->ccat->linrefs; cat->viterbi_prob = viterbi_prob; cat->fid = ps->max_fid++; cat->conts = conts; diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 9ce74d495..da7c70d7c 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -164,9 +164,33 @@ pgf_print_lindefs(GuMapItor* fn, const void* key, void* value, } } +static void +pgf_print_linrefs(GuMapItor* fn, const void* key, void* value, + GuExn* err) +{ + PgfPrintFn* clo = (PgfPrintFn*) fn; + int fid = *((int *) key); + PgfCCat* ccat = *((PgfCCat**) value); + GuOut *out = clo->out; + + if (ccat->linrefs != NULL) { + gu_puts(" ",out,err); + + size_t n_linrefs = gu_seq_length(ccat->linrefs); + for (size_t i = 0; i < n_linrefs; i++) { + if (i > 0) gu_putc(' ', out, err); + + PgfCncFun* fun = gu_seq_get(ccat->linrefs, PgfCncFun*, i); + gu_printf(out,err,"F%d",fun->funid); + } + + gu_printf(out,err," -> C%d\n",fid); + } +} + static void pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences, - GuOut *out, GuExn *err) + GuOut *out, GuExn *err) { gu_printf(out,err," F%d := (", cncfun->funid); @@ -321,6 +345,10 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr, PgfPrintFn clo3 = { { pgf_print_lindefs }, out }; gu_map_iter(concr->ccats, &clo3.fn, err); + gu_puts(" linrefs\n", out, err); + PgfPrintFn clo4 = { { pgf_print_linrefs }, out }; + gu_map_iter(concr->ccats, &clo4.fn, err); + gu_puts(" lin\n", out, err); size_t n_funs = gu_seq_length(concr->cncfuns); for (size_t i = 0; i < n_funs; i++) { @@ -338,8 +366,8 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr, } gu_puts(" categories\n", out, err); - PgfPrintFn clo4 = { { pgf_print_cnccat }, out }; - gu_map_iter(concr->cnccats, &clo4.fn, err); + PgfPrintFn clo5 = { { pgf_print_cnccat }, out }; + gu_map_iter(concr->cnccats, &clo5.fn, err); gu_puts("}\n", out, err); } diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 41619a0b8..d215f25e1 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -830,6 +830,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr) ccat = gu_new(PgfCCat, rdr->opool); ccat->cnccat = NULL; ccat->lindefs = NULL; + ccat->linrefs = NULL; ccat->n_synprods = 0; ccat->prods = NULL; ccat->viterbi_prob = 0; @@ -858,7 +859,7 @@ pgf_read_funid(PgfReader* rdr, PgfConcr* concr) } static void -pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr) +pgf_read_lindefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr) { size_t len = pgf_read_len(rdr); gu_return_on_exn(rdr->err, ); @@ -872,11 +873,33 @@ pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr) ccat->lindefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool); for (size_t j = 0; j < n_funs; j++) { PgfCncFun* fun = pgf_read_funid(rdr, concr); + fun->absfun = abs_lin_fun; gu_seq_set(ccat->lindefs, PgfCncFun*, j, fun); } } } +static void +pgf_read_linrefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr) +{ + size_t len = pgf_read_len(rdr); + gu_return_on_exn(rdr->err, ); + + for (size_t i = 0; i < len; i++) { + PgfCCat* ccat = pgf_read_fid(rdr, concr); + + size_t n_funs = pgf_read_len(rdr); + gu_return_on_exn(rdr->err, ); + + ccat->linrefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool); + for (size_t j = 0; j < n_funs; j++) { + PgfCncFun* fun = pgf_read_funid(rdr, concr); + fun->absfun = abs_lin_fun; + gu_seq_set(ccat->linrefs, PgfCncFun*, j, fun); + } + } +} + static void pgf_read_parg(PgfReader* rdr, PgfConcr* concr, PgfPArg* parg) { @@ -1000,6 +1023,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name) ccat = gu_new(PgfCCat, rdr->opool); ccat->cnccat = NULL; ccat->lindefs = NULL; + ccat->linrefs = NULL; ccat->n_synprods = 0; ccat->prods = NULL; ccat->viterbi_prob = 0; @@ -1123,7 +1147,7 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) } static PgfConcr* -pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr) +pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun) { PgfConcr* concr = gu_new(PgfConcr, rdr->opool); @@ -1153,7 +1177,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr) gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool); concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool); concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool); - pgf_read_lindefs(rdr, concr); + pgf_read_lindefs(rdr, abs_lin_fun, concr); + pgf_read_linrefs(rdr, abs_lin_fun, concr); pgf_read_ccats(rdr, concr); concr->cnccats = pgf_read_cnccats(rdr, abstr, concr); concr->callbacks = pgf_new_callbacks_map(concr, rdr->opool); @@ -1177,10 +1202,21 @@ pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr) size_t len = pgf_read_len(rdr); gu_return_on_exn(rdr->err, NULL); + PgfAbsFun* abs_lin_fun = gu_new(PgfAbsFun, rdr->opool); + abs_lin_fun->name = "_"; + abs_lin_fun->type = gu_new(PgfType, rdr->opool); + abs_lin_fun->type->hypos = NULL; + abs_lin_fun->type->cid = "_"; + abs_lin_fun->type->n_exprs = 0; + abs_lin_fun->arity = 0; + abs_lin_fun->defns = NULL; + abs_lin_fun->ep.prob = INFINITY; + abs_lin_fun->ep.expr = gu_null_variant; + for (size_t i = 0; i < len; i++) { - PgfConcr* concr = pgf_read_concrete(rdr, abstr); + PgfConcr* concr = pgf_read_concrete(rdr, abstr, abs_lin_fun); gu_return_on_exn(rdr->err, NULL); - + gu_map_put(concretes, concr->name, PgfConcr*, concr); } diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 1d0d13f97..fdb834cad 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -32,7 +32,7 @@ module PGF( showType, readType, mkType, mkHypo, mkDepHypo, mkImplHypo, unType, - categories, startCat, + categories, categoryContext, startCat, -- * Functions functions, functionsByCat, functionType, missingLins, @@ -221,6 +221,8 @@ abstractName :: PGF -> Language -- with the \'cat\' keyword. categories :: PGF -> [CId] +categoryContext :: PGF -> CId -> Maybe [Hypo] + -- | The start category is defined in the grammar with -- the \'startcat\' flag. This is usually the sentence category -- but it is not necessary. Despite that there is a start category @@ -279,6 +281,11 @@ languageCode pgf lang = categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))] +categoryContext pgf cat = + case Map.lookup cat (cats (abstract pgf)) of + Just (hypos,_,_) -> Just hypos + Nothing -> Nothing + startCat pgf = DTyp [] (lookStartCat pgf) [] functions pgf = Map.keys (funs (abstract pgf)) diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index e293da99c..bf8fe2824 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -14,7 +14,7 @@ import qualified Data.Set as Set import Control.Monad pgfMajorVersion, pgfMinorVersion :: Word16 -(pgfMajorVersion, pgfMinorVersion) = (1,0) +(pgfMajorVersion, pgfMinorVersion) = (2,0) instance Binary PGF where put pgf = do putWord16be pgfMajorVersion @@ -56,6 +56,7 @@ instance Binary Concr where putArray2 (sequences cnc) putArray (cncfuns cnc) put (lindefs cnc) + put (linrefs cnc) put (productions cnc) put (cnccats cnc) put (totalCats cnc) @@ -64,11 +65,13 @@ instance Binary Concr where sequences <- getArray2 cncfuns <- getArray lindefs <- get + linrefs <- get productions <- get cnccats <- get totalCats <- get return (Concr{ cflags=cflags, printnames=printnames - , sequences=sequences, cncfuns=cncfuns, lindefs=lindefs + , sequences=sequences, cncfuns=cncfuns + , lindefs=lindefs, linrefs=linrefs , productions=productions , pproductions = IntMap.empty , lproductions = Map.empty diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index 06ace4565..19df9d0ed 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -41,6 +41,7 @@ data Concr = Concr { printnames :: Map.Map CId String, -- printname of a cat or a fun cncfuns :: Array FunId CncFun, lindefs :: IntMap.IntMap [FunId], + linrefs :: IntMap.IntMap [FunId], sequences :: Array SeqId Sequence, productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 9385e81c4..5d85255d0 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -47,7 +47,9 @@ ppCnc name cnc = text "productions" $$ nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$ text "lindefs" $$ - nest 2 (vcat (map ppLinDef (IntMap.toList (lindefs cnc)))) $$ + nest 2 (vcat (map ppFunList (IntMap.toList (lindefs cnc)))) $$ + text "linrefs" $$ + nest 2 (vcat (map ppFunList (IntMap.toList (linrefs cnc)))) $$ text "lin" $$ nest 2 (vcat (map ppCncFun (assocs (cncfuns cnc)))) $$ text "sequences" $$ @@ -73,7 +75,7 @@ ppProduction (fid,PConst _ _ ss) = ppCncFun (funid,CncFun fun arr) = ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun) -ppLinDef (fid,funids) = +ppFunList (fid,funids) = ppFId fid <+> text "->" <+> hcat (punctuate comma (map ppFunId funids)) ppSeq (seqid,seq) = -- cgit v1.2.3