diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2013-11-06 10:21:46 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2013-11-06 10:21:46 +0000 |
| commit | 2483dc772897eb0909664f1a88cc7f8ec50ebd5b (patch) | |
| tree | 4ecb223fc246458a9a5997a4b65329b6039ce309 /src | |
| parent | 84ef5fa5fa976569370c58bed855f2ab1de5588c (diff) | |
the content of ParseEngAbs3.probs is now merged with ParseEngAbs.probs. The later is now retrained. Once the grammar is compiled with the .probs file now it doesn't need anything more to do robust parsing. The robustness itself is controlled by the flags 'heuristic_search_factor', 'meta_prob' and 'meta_token_prob' in ParseEngAbs.gf
Diffstat (limited to 'src')
24 files changed, 153 insertions, 179 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 6769b283d..58e485768 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -1134,15 +1134,19 @@ allCommands = Map.fromList [ case arg of [EFun id] -> case Map.lookup id (funs (abstract pgf)) of Just fd -> do putStrLn $ render (ppFun id fd) - putStrLn ("Probability: "++show (probTree pgf (EFun id))) + let (_,_,_,prob,_) = fd + putStrLn ("Probability: "++show prob) return void Nothing -> case Map.lookup id (cats (abstract pgf)) of - Just hyps -> do putStrLn $ - render (ppCat id hyps $$ + Just cd -> do putStrLn $ + render (ppCat id cd $$ if null (functionsToCat pgf id) then empty else space $$ - vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id]) + vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id] $$ + space) + let (_,_,prob,_) = cd + putStrLn ("Probability: "++show prob) return void Nothing -> do putStrLn ("unknown category of function identifier "++show id) return void diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index ce31d5dc1..25db4bac7 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -50,12 +50,12 @@ mkCanon2pgf opts gr am = do [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ Look.allOrigInfos gr am - flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF aflags] + flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags] funs = Map.fromList [(i2i f, (mkType [] ty, mkArrity ma, mkDef pty, 0, addr)) | ((m,f),AbsFun (Just (L _ ty)) ma pty _,addr) <- adefs] - cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, addr)) | + cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0, addr)) | ((m,c),AbsCat (Just (L _ cont)),addr) <- adefs] catfuns cat = @@ -69,7 +69,7 @@ mkCanon2pgf opts gr am = do ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ Look.allOrigInfos gr cm) - let flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF cflags] + let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] seqs = (mkSetArray . Set.fromList . concat) $ (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) diff --git a/src/compiler/GF/Compile/PGFtoLProlog.hs b/src/compiler/GF/Compile/PGFtoLProlog.hs index 2ab5a26d2..61e28eb17 100644 --- a/src/compiler/GF/Compile/PGFtoLProlog.hs +++ b/src/compiler/GF/Compile/PGFtoLProlog.hs @@ -12,7 +12,7 @@ import qualified Data.Map as Map grammar2lambdaprolog_mod pgf = render $ text "module" <+> ppCId (absname pgf) <> char '.' $$ space $$ - vcat [ppClauses cat fns | (cat,(_,fs,_)) <- Map.toList (cats (abstract pgf)), + vcat [ppClauses cat fns | (cat,(_,fs,_,_)) <- Map.toList (cats (abstract pgf)), let fns = [(f,fromJust (Map.lookup f (funs (abstract pgf)))) | (_,f) <- fs]] where ppClauses cat fns = @@ -25,11 +25,11 @@ grammar2lambdaprolog_mod pgf = render $ grammar2lambdaprolog_sig pgf = render $ text "sig" <+> ppCId (absname pgf) <> char '.' $$ space $$ - vcat [ppCat c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$ + vcat [ppCat c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$ space $$ vcat [ppFun f ty <> dot | (f,(ty,_,Nothing,_,_)) <- Map.toList (funs (abstract pgf))] $$ space $$ - vcat [ppExport c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$ + vcat [ppExport c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$ vcat [ppFunPred f (hyps ++ [(Explicit,wildCId,DTyp [] c es)]) <> dot | (f,(DTyp hyps c es,_,Just _,_,_)) <- Map.toList (funs (abstract pgf))] ppCat :: CId -> [Hypo] -> Doc diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs index 061d9e874..8fffd5f07 100644 --- a/src/compiler/GF/Compile/PGFtoProlog.hs +++ b/src/compiler/GF/Compile/PGFtoProlog.hs @@ -49,7 +49,7 @@ plAbstract name abs (f, v) <- Map.assocs (aflags abs)] ++++ plFacts name "cat" 2 "(?Type, ?[X:Type,...])" [[plType cat args, plHypos hypos'] | - (cat, (hypos, _, _)) <- Map.assocs (cats abs), + (cat, (hypos,_,_,_)) <- Map.assocs (cats abs), let ((_, subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos, let args = reverse [EFun x | (_,x) <- subst]] ++++ plFacts name "fun" 3 "(?Fun, ?Type, ?[X:Type,...])" diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 6641b5718..0e897aa00 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -17,6 +17,7 @@ import Data.Binary --import Control.Monad
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS
+import Text.PrettyPrint (render)
import GF.Data.Operations
import GF.Infra.Ident
@@ -24,9 +25,10 @@ import GF.Infra.Option import GF.Grammar.Grammar
import PGF() -- Binary instances
+import PGF.Data(ppLit)
-- Please change this every time when the GFO format is changed
-gfoVersion = "GF02"
+gfoVersion = "GF03"
instance Binary Ident where
put id = put (ident2bs id)
@@ -91,7 +93,7 @@ instance Binary ModuleStatus where instance Binary Options where
put = put . optionsGFO
get = do opts <- get
- case parseModuleOptions ["--" ++ flag ++ "=" ++ value | (flag,value) <- opts] of
+ case parseModuleOptions ["--" ++ flag ++ "=" ++ render (ppLit value) | (flag,value) <- opts] of
Ok x -> return x
Bad msg -> fail msg
diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs index dba08ce44..e15e6e4d3 100644 --- a/src/compiler/GF/Grammar/Printer.hs +++ b/src/compiler/GF/Grammar/Printer.hs @@ -29,7 +29,7 @@ import GF.Infra.Option import GF.Grammar.Values
import GF.Grammar.Grammar
-import PGF.Data (ppMeta)
+import PGF.Data (ppMeta, ppLit)
import PGF.Printer (ppFId, ppFunId, ppSeqId, ppSeq)
import Text.PrettyPrint
@@ -87,7 +87,7 @@ ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ mseqs jments) = ppOptions opts =
text "flags" $$
- nest 2 (vcat [text option <+> equals <+> str value <+> semi | (option,value) <- optionsGFO opts])
+ nest 2 (vcat [text option <+> equals <+> ppLit value <+> semi | (option,value) <- optionsGFO opts])
ppJudgement q (id, AbsCat pcont ) =
text "cat" <+> ppIdent id <+>
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index e0cba05e4..1236e729c 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -43,6 +43,7 @@ import GF.Data.ErrM import Data.Set (Set) import qualified Data.Set as Set +import PGF.Data(Literal(..)) usageHeader :: String usageHeader = unlines @@ -170,7 +171,9 @@ data Flags = Flags { optWarnings :: [Warning], optDump :: [Dump], optTagsOnly :: Bool, - optBeamSize :: Maybe Double, + optHeuristicFactor :: Maybe Double, + optMetaProb :: Maybe Double, + optMetaToknProb :: Maybe Double, optNewComp :: Bool } deriving (Show) @@ -206,16 +209,18 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) -- Showing options -- | Pretty-print the options that are preserved in .gfo files. -optionsGFO :: Options -> [(String,String)] +optionsGFO :: Options -> [(String,Literal)] optionsGFO opts = optionsPGF opts - ++ [("coding", flag optEncoding opts)] + ++ [("coding", LStr (flag optEncoding opts))] -- | Pretty-print the options that are preserved in .pgf files. -optionsPGF :: Options -> [(String,String)] +optionsPGF :: Options -> [(String,Literal)] optionsPGF opts = - maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts) - ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts) - ++ maybe [] (\x -> [("beam_size",show x)]) (flag optBeamSize opts) + maybe [] (\x -> [("language",LStr x)]) (flag optSpeechLanguage opts) + ++ maybe [] (\x -> [("startcat",LStr x)]) (flag optStartCat opts) + ++ maybe [] (\x -> [("heuristic_search_factor",LFlt x)]) (flag optHeuristicFactor opts) + ++ maybe [] (\x -> [("meta_prob",LFlt x)]) (flag optMetaProb opts) + ++ maybe [] (\x -> [("meta_token_prob",LFlt x)]) (flag optMetaToknProb opts) -- Option manipulation @@ -272,7 +277,9 @@ defaultFlags = Flags { optWarnings = [], optDump = [], optTagsOnly = False, - optBeamSize = Nothing, + optHeuristicFactor = Nothing, + optMetaProb = Nothing, + optMetaToknProb = Nothing, optNewComp = #ifdef NEW_COMP True @@ -358,7 +365,9 @@ optDescr = Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).", Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).", Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...", - Option [] ["beam_size"] (ReqArg readDouble "SIZE") "Set the beam size for statistical parsing", + Option [] ["heuristic_search_factor"] (ReqArg (readDouble (\d o -> o { optHeuristicFactor = Just d })) "FACTOR") "Set the heuristic search factor for statistical parsing", + Option [] ["meta_prob"] (ReqArg (readDouble (\d o -> o { optMetaProb = Just d })) "PROB") "Set the probability of introducting a meta variable in the parser", + Option [] ["meta_token_prob"] (ReqArg (readDouble (\d o -> o { optMetaToknProb = Just d })) "PROB") "Set the probability for skipping a token in the parser", Option [] ["new-comp"] (NoArg (set $ \o -> o{optNewComp = True})) "Use the new experimental compiler.", Option [] ["old-comp"] (NoArg (set $ \o -> o{optNewComp = False})) "Use old trusty compiler.", dumpOption "source" Source, @@ -433,9 +442,9 @@ optDescr = Nothing -> fail $ "Unknown CFG transformation: " ++ x' ++ " Known: " ++ show (map fst cfgTransformNames) - readDouble x = case reads x of - [(d,"")] -> set $ \o -> o { optBeamSize = Just d } - _ -> fail "A floating point number is expected" + readDouble f x = case reads x of + [(d,"")] -> set $ f d + _ -> fail "A floating point number is expected" dumpOption s d = Option [] ["dump-"++s] (NoArg (set $ \o -> o { optDump = Dump d:optDump o})) ("Dump output of the " ++ s ++ " phase.") diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs index 24c2e4d86..3703fe7a2 100644 --- a/src/compiler/GF/Speech/VoiceXML.hs +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -39,7 +39,7 @@ type Skeleton = [(CId, [(CId, [CId])])] pgfSkeleton :: PGF -> Skeleton pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType (abstract pgf) f))) | (_,f) <- fs]) - | (c,(_,fs,_)) <- Map.toList (cats (abstract pgf))] + | (c,(_,fs,_,_)) <- Map.toList (cats (abstract pgf))] -- -- * Questions to ask diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs index 61d5726b2..f75a39ab1 100644 --- a/src/compiler/GFC.hs +++ b/src/compiler/GFC.hs @@ -104,7 +104,7 @@ writeByteCode opts pgf where addrs = [(id,addr) | (id,(_,_,_,_,addr)) <- Map.toList (funs (abstract pgf))] ++ - [(id,addr) | (id,(_,_,addr)) <- Map.toList (cats (abstract pgf))] + [(id,addr) | (id,(_,_,_,addr)) <- Map.toList (cats (abstract pgf))] writePGF :: Options -> PGF -> IOE () writePGF opts pgf = do diff --git a/src/compiler/SimpleEditor/Convert.hs b/src/compiler/SimpleEditor/Convert.hs index 3554e3342..86580834a 100644 --- a/src/compiler/SimpleEditor/Convert.hs +++ b/src/compiler/SimpleEditor/Convert.hs @@ -23,6 +23,7 @@ import GF.Grammar.Printer(ppParams,ppTerm,getAbs,TermPrintQual(..)) import GF.Grammar.Parser(runP,pModDef) import GF.Grammar.Lexer(Posn(..)) import GF.Data.ErrM +import PGF.Data(Literal(LStr)) import SimpleEditor.Syntax as S import SimpleEditor.JSON @@ -57,7 +58,10 @@ convAbstract (modid,src) = let cats = reverse cats0 funs = reverse funs0 flags = optionsGFO (mflags src) - startcat = maybe "-" id $ lookup "startcat" flags + startcat = + case lookup "startcat" flags of + Just (LStr cat) -> cat + _ -> "-" return $ Grammar (convId modid) extends (Abstract startcat cats funs) [] convExtends = mapM convExtend diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 5b0401764..9bc73dd0a 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -87,9 +87,7 @@ typedef struct { PgfCId name; PgfHypos* context; - prob_t meta_prob; - prob_t meta_token_prob; - PgfMetaChildMap* meta_child_probs; + prob_t prob; void* predicate; } PgfAbsCat; @@ -230,6 +228,7 @@ typedef GuSeq PgfCncFuns; struct PgfConcr { PgfCId name; + PgfAbstr* abstr; PgfFlags* cflags; PgfPrintNames* printnames; GuMap* ccats; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 4e4724c75..ec21fc84e 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -63,7 +63,10 @@ typedef struct { int prod_full_count; #endif PgfItem* free_item; - prob_t beam_size; + + prob_t heuristic_factor; + prob_t meta_prob; + prob_t meta_token_prob; } PgfParsing; typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE; @@ -1389,12 +1392,14 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err { (void) (err); - PgfAbsCat* abscat = (PgfAbsCat*) key; - prob_t meta_prob = *((prob_t*) value); + PgfAbsCat* abscat = *((PgfAbsCat**) value); PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn; PgfParsing* ps = clo->ps; PgfItem* meta_item = clo->meta_item; + if (abscat->prob == INFINITY) + return; + PgfCncCat* cnccat = gu_map_get(ps->concr->cnccats, abscat->name, PgfCncCat*); if (cnccat == NULL) @@ -1412,7 +1417,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err PgfItem* item = pgf_item_copy(meta_item, ps); item->inside_prob += - ccat->viterbi_prob+meta_prob; + ccat->viterbi_prob+abscat->prob; size_t nargs = gu_seq_length(meta_item->args); item->args = gu_new_seq(PgfPArg, nargs+1, ps->pool); @@ -1698,18 +1703,14 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item) } pgf_parsing_complete(ps, item, ep); } else { - prob_t meta_token_prob = - item->conts->ccat->cnccat->abscat->meta_token_prob; + prob_t meta_token_prob = + ps->meta_token_prob; if (meta_token_prob != INFINITY) { pgf_parsing_meta_scan(ps, item, meta_token_prob); } - PgfCIdMap* meta_child_probs = - item->conts->ccat->cnccat->abscat->meta_child_probs; - if (meta_child_probs != NULL) { - PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item }; - gu_map_iter(meta_child_probs, &clo.fn, NULL); - } + PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item }; + gu_map_iter(ps->concr->abstr->cats, &clo.fn, NULL); } } else { pgf_parsing_symbol(ps, item, item->curr_sym); @@ -1721,22 +1722,38 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item) } } -static prob_t -pgf_parsing_default_beam_size(PgfConcr* concr) +static void +pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr) { - PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral); + PgfLiteral lit; - if (gu_variant_is_null(lit)) - return 0; + lit = + gu_map_get(abstr->aflags, "heuristic_search_factor", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->heuristic_factor = ((PgfLiteralFlt*) pi.data)->val; + } + + lit = + gu_map_get(abstr->aflags, "meta_prob", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->meta_prob = - log(((PgfLiteralFlt*) pi.data)->val); + } - GuVariantInfo pi = gu_variant_open(lit); - gu_assert (pi.tag == PGF_LITERAL_FLT); - return ((PgfLiteralFlt*) pi.data)->val; + lit = + gu_map_get(abstr->aflags, "meta_token_prob", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->meta_token_prob = - log(((PgfLiteralFlt*) pi.data)->val); + } } static PgfParsing* -pgf_new_parsing(PgfConcr* concr, - GuString sentence, double heuristics, +pgf_new_parsing(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool) { PgfParsing* ps = gu_new(PgfParsing, pool); @@ -1756,7 +1773,11 @@ pgf_new_parsing(PgfConcr* concr, ps->prod_full_count = 0; #endif ps->free_item = NULL; - ps->beam_size = heuristics; + ps->heuristic_factor = 0; + ps->meta_prob = INFINITY; + ps->meta_token_prob = INFINITY; + + pgf_parsing_set_default_factors(ps, concr->abstr); PgfExprMeta *expr_meta = gu_new_variant(PGF_EXPR_META, @@ -2107,7 +2128,7 @@ pgf_parse_result_is_new(PgfExprState* st) // TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat static PgfParsing* pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, - GuString sentence, double heuristics, + GuString sentence, double heuristic_factor, GuExn* err, GuPool* pool, GuPool* out_pool) { @@ -2121,12 +2142,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, gu_assert(lin_idx < cnccat->n_lins); - if (heuristics < 0) { - heuristics = pgf_parsing_default_beam_size(concr); + PgfParsing* ps = + pgf_new_parsing(concr, sentence, pool, out_pool); + + if (heuristic_factor >= 0) { + ps->heuristic_factor = heuristic_factor; } - PgfParsing* ps = - pgf_new_parsing(concr, sentence, heuristics, pool, out_pool); PgfParseState* state = pgf_new_parse_state(ps, 0, BIND_SOFT); @@ -2156,11 +2178,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); } - PgfItem *item = - pgf_new_item(ps, conts, ps->meta_prod); - item->inside_prob = - ccat->cnccat->abscat->meta_prob; - gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + if (ps->meta_prob != INFINITY) { + PgfItem *item = + pgf_new_item(ps, conts, ps->meta_prod); + item->inside_prob = + ps->meta_prob; + gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + } } } @@ -2200,7 +2224,7 @@ pgf_parsing_proceed(PgfParsing* ps) prob_t state_delta = (st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))* - ps->beam_size; + ps->heuristic_factor; delta_prob += state_delta; st = st->next; } diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index e804f5ce7..93dea300a 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -35,63 +35,6 @@ pgf_read(const char* fpath, return pgf; } -void -pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, - GuPool* pool, GuExn* err) -{ - FILE *fp = fopen(fpath, "r"); - if (!fp) { - gu_raise_errno(err); - return; - } - - GuPool* tmp_pool = gu_new_pool(); - - for (;;) { - char cat1[21]; - char cat2[21]; - prob_t prob; - - if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3) - break; - - prob = - log(prob); - - PgfAbsCat* abscat1 = - gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*); - if (abscat1 == NULL) { - GuExnData* exn = gu_raise(err, PgfExn); - exn->data = "Unknown category name"; - goto close; - } - - if (strcmp(cat2, "*") == 0) { - abscat1->meta_prob = prob; - } else if (strcmp(cat2, "_") == 0) { - abscat1->meta_token_prob = prob; - } else { - PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*); - if (abscat2 == NULL) { - gu_raise(err, PgfExn); - GuExnData* exn = gu_raise(err, PgfExn); - exn->data = "Unknown category name"; - goto close; - } - - if (abscat1->meta_child_probs == NULL) { - abscat1->meta_child_probs = - gu_map_type_new(PgfMetaChildMap, pool); - } - - gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob); - } - } - -close: - gu_pool_free(tmp_pool); - fclose(fp); -} - GuString pgf_abstract_name(PgfPGF* pgf) { diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 61b8bea6c..ffc293306 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -80,11 +80,6 @@ pgf_read(const char* fpath, * */ - -void -pgf_load_meta_child_probs(PgfPGF*, const char* fpath, - GuPool* pool, GuExn* err); - GuString pgf_abstract_name(PgfPGF*); diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index da7c70d7c..78c2b74db 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -48,7 +48,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value, ctxt = next; } - gu_printf(out, err, " ; -- %f\n",cat->meta_prob); + gu_printf(out, err, " ; -- %f\n", cat->prob); } void diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index d215f25e1..12605b89a 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -516,10 +516,6 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats) gu_return_on_exn(rdr->err, NULL); } - abscat->meta_prob = INFINITY; - abscat->meta_token_prob = INFINITY; - abscat->meta_child_probs = NULL; - GuBuf* functions = gu_new_buf(PgfAbsFun*, rdr->tmp_pool); size_t n_functions = pgf_read_len(rdr); @@ -538,6 +534,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats) gu_buf_push(functions, PgfAbsFun*, absfun); } + abscat->prob = - log(gu_in_f64be(rdr->in, rdr->err)); + pgf_jit_predicate(rdr->jit_state, abscats, abscat, functions); return abscat; @@ -1155,6 +1153,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun) pgf_read_cid(rdr, rdr->opool); gu_return_on_exn(rdr->err, NULL); + concr->abstr = abstr; + concr->cflags = pgf_read_flags(rdr); gu_return_on_exn(rdr->err, NULL); diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index 79a4fdd42..32f8323ab 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -53,18 +53,17 @@ int main(int argc, char* argv[]) { // Create the pool that is used to allocate everything GuPool* pool = gu_new_pool(); int status = EXIT_SUCCESS; - if (argc < 5 || argc > 6) { - fprintf(stderr, "usage: %s pgf cat from-lang to-lang [probs-file]\n", argv[0]); + if (argc < 5) { + fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]); status = EXIT_FAILURE; goto fail; } - char* filename = argv[1]; + GuString filename = argv[1]; GuString cat = argv[2]; - GuString from_lang = argv[3]; GuString to_lang = argv[4]; - + // Create an exception frame that catches all errors. GuExn* err = gu_new_exn(NULL, gu_kind(type), pool); @@ -78,16 +77,6 @@ int main(int argc, char* argv[]) { goto fail; } - if (argc == 6) { - char* meta_probs_filename = argv[5]; - pgf_load_meta_child_probs(pgf, meta_probs_filename, pool, err); - if (!gu_ok(err)) { - fprintf(stderr, "Loading meta child probs failed\n"); - status = EXIT_FAILURE; - goto fail; - } - } - // Look up the source and destination concrete categories PgfConcr* from_concr = pgf_get_language(pgf, from_lang); PgfConcr* to_concr = pgf_get_language(pgf, to_lang); diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index e7e5c53c5..0e3c79f40 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -292,8 +292,8 @@ categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))] categoryContext pgf cat = case Map.lookup cat (cats (abstract pgf)) of - Just (hypos,_,_) -> Just hypos - Nothing -> Nothing + Just (hypos,_,_,_) -> Just hypos + Nothing -> Nothing startCat pgf = DTyp [] (lookStartCat pgf) [] @@ -301,8 +301,8 @@ functions pgf = Map.keys (funs (abstract pgf)) functionsByCat pgf cat = case Map.lookup cat (cats (abstract pgf)) of - Just (_,fns,_) -> map snd fns - Nothing -> [] + Just (_,fns,_,_) -> map snd fns + Nothing -> [] functionType pgf fun = case Map.lookup fun (funs (abstract pgf)) of @@ -325,8 +325,8 @@ browse pgf id = fmap (\def -> (def,producers,consumers)) definition in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]) Just (ty,_,Nothing, _,_) -> Just $ render (text "data" <+> ppCId id <+> colon <+> ppType 0 [] ty) Nothing -> case Map.lookup id (cats (abstract pgf)) of - Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps))) - Nothing -> Nothing + Just (hyps,_,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps))) + Nothing -> Nothing (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf)) where diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 202939f04..2debcf12d 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -40,13 +40,13 @@ instance Binary CId where instance Binary Abstr where
put abs = put (aflags abs,
fmap (\(w,x,y,z,_) -> (w,x,y,z)) (funs abs),
- fmap (\(x,y,_) -> (x,y)) (cats abs))
+ fmap (\(x,y,z,_) -> (x,y,z)) (cats abs))
get = do aflags <- get
funs <- get
cats <- get
return (Abstr{ aflags=aflags
, funs=fmap (\(w,x,y,z) -> (w,x,y,z,0)) funs
- , cats=fmap (\(x,y) -> (x,y,0)) cats
+ , cats=fmap (\(x,y,z) -> (x,y,z,0)) cats
, code=BS.empty
})
diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index e37b243d0..f5797739f 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -29,10 +29,10 @@ data PGF = PGF { data Abstr = Abstr { aflags :: Map.Map CId Literal, -- ^ value of a flag funs :: Map.Map CId (Type,Int,Maybe [Equation],Double,BCAddr), -- ^ type, arrity and definition of function + probability - cats :: Map.Map CId ([Hypo],[(Double, CId)],BCAddr), -- ^ 1. context of a category - -- ^ 2. functions of a category. The order in the list is important, - -- this is the order in which the type singatures are given in the source. - -- The termination of the exhaustive generation might depend on this. + cats :: Map.Map CId ([Hypo],[(Double, CId)],Double,BCAddr), -- ^ 1. context of a category + -- 2. functions of a category. The functions are stored + -- in decreasing probability order. + -- 3. probability code :: BS.ByteString } diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 830a16674..ce75b1c91 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -67,7 +67,7 @@ functionsToCat :: PGF -> CId -> [(CId,Type)] functionsToCat pgf cat = [(f,ty) | (_,f) <- fs, Just (ty,_,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]] where - (_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf + (_,fs,_,_) = lookMap ([],[],0,0) cat $ cats $ abstract pgf -- | List of functions that lack linearizations in the given language. missingLins :: PGF -> Language -> [CId] @@ -82,7 +82,7 @@ restrictPGF :: (CId -> Bool) -> PGF -> PGF restrictPGF cond pgf = pgf { abstract = abstr { funs = Map.filterWithKey (\c _ -> cond c) (funs abstr), - cats = Map.map (\(hyps,fs,addr) -> (hyps,filter (cond . snd) fs,addr)) (cats abstr) + cats = Map.map (\(hyps,fs,p,addr) -> (hyps,filter (cond . snd) fs,p,addr)) (cats abstr) } } ---- restrict concrs also, might be needed where diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 66d8530f0..d3a5ea1d9 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -26,8 +26,8 @@ ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$ ppFlag :: CId -> Literal -> Doc ppFlag flag value = text "flag" <+> ppCId flag <+> char '=' <+> ppLit value <+> char ';' -ppCat :: CId -> ([Hypo],[(Double,CId)],BCAddr) -> Doc -ppCat c (hyps,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';' +ppCat :: CId -> ([Hypo],[(Double,CId)],Double,BCAddr) -> Doc +ppCat c (hyps,_,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';' ppFun :: CId -> (Type,Int,Maybe [Equation],Double,BCAddr) -> Doc ppFun f (t,_,Just eqs,_,_) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t <+> char ';' $$ diff --git a/src/runtime/haskell/PGF/Probabilistic.hs b/src/runtime/haskell/PGF/Probabilistic.hs index 7f980254b..095ade022 100644 --- a/src/runtime/haskell/PGF/Probabilistic.hs +++ b/src/runtime/haskell/PGF/Probabilistic.hs @@ -24,13 +24,14 @@ import Data.Maybe (fromMaybe) --, fromJust -- the probabilities for the different functions in a grammar. data Probabilities = Probs { funProbs :: Map.Map CId Double, - catProbs :: Map.Map CId [(Double, CId)] + catProbs :: Map.Map CId (Double, [(Double, CId)]) } -- | Renders the probability structure as string showProbabilities :: Probabilities -> String -showProbabilities = unlines . map pr . Map.toList . funProbs where - pr (f,d) = showCId f ++ "\t" ++ show d +showProbabilities = unlines . concatMap prProb . Map.toList . catProbs where + prProb (c,(p,fns)) = pr (p,c) : map pr fns + pr (p,f) = showCId f ++ "\t" ++ show p -- | Reads the probabilities from a file. -- This should be a text file where on every line @@ -50,8 +51,12 @@ readProbabilitiesFromFile file pgf = do -- for the result category. mkProbabilities :: PGF -> Map.Map CId Double -> Probabilities mkProbabilities pgf probs = - let funs1 = Map.fromList [(f,p) | (_,cf) <- Map.toList cats1, (p,f) <- cf] - cats1 = Map.map (\(_,fs,_) -> sortBy cmpProb (fill fs)) (cats (abstract pgf)) + let funs1 = Map.fromList [(f,p) | (_,(_,fns)) <- Map.toList cats1, (p,f) <- fns] + cats1 = Map.mapWithKey (\c (_,fns,_,_) -> + let p' = fromMaybe 0 (Map.lookup c probs) + fns' = sortBy cmpProb (fill fns) + in (p', fns')) + (cats (abstract pgf)) in Probs funs1 cats1 where cmpProb (p1,_) (p2,_) = compare p2 p1 @@ -71,15 +76,15 @@ defaultProbabilities pgf = mkProbabilities pgf Map.empty getProbabilities :: PGF -> Probabilities getProbabilities pgf = Probs { - funProbs = Map.map (\(_,_,_,p,_) -> p) (funs (abstract pgf)), - catProbs = Map.map (\(_,fns,_) -> fns) (cats (abstract pgf)) + funProbs = Map.map (\(_,_,_,p,_) -> p ) (funs (abstract pgf)), + catProbs = Map.map (\(_,fns,p,_) -> (p,fns)) (cats (abstract pgf)) } setProbabilities :: Probabilities -> PGF -> PGF setProbabilities probs pgf = pgf { abstract = (abstract pgf) { - funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df,p,addr)) (funs (abstract pgf)) (funProbs probs), - cats = mapUnionWith (\(hypos,_,addr) fns -> (hypos,fns,addr)) (cats (abstract pgf)) (catProbs probs) + funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df, p,addr)) (funs (abstract pgf)) (funProbs probs), + cats = mapUnionWith (\(hypos,_,_,addr) (p,fns) -> (hypos,fns,p,addr)) (cats (abstract pgf)) (catProbs probs) }} where mapUnionWith f map1 map2 = @@ -102,7 +107,7 @@ rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p) mkProbDefs :: PGF -> ([[CId]],[(CId,Type,[Equation])]) mkProbDefs pgf = - let cs = [(c,hyps,fns) | (c,(hyps0,fs,_)) <- Map.toList (cats (abstract pgf)), + let cs = [(c,hyps,fns) | (c,(hyps0,fs,_,_)) <- Map.toList (cats (abstract pgf)), not (elem c [cidString,cidInt,cidFloat]), let hyps = zipWith (\(bt,_,ty) n -> (bt,mkCId ('v':show n),ty)) hyps0 diff --git a/src/runtime/haskell/PGF/TypeCheck.hs b/src/runtime/haskell/PGF/TypeCheck.hs index 141189193..e582f97af 100644 --- a/src/runtime/haskell/PGF/TypeCheck.hs +++ b/src/runtime/haskell/PGF/TypeCheck.hs @@ -121,8 +121,8 @@ runTcM abstr f ms s = unTcM f abstr (\x ms s cp b -> let (es,xs) = cp b lookupCatHyps :: CId -> TcM s [Hypo] lookupCatHyps cat = TcM (\abstr k h ms -> case Map.lookup cat (cats abstr) of - Just (hyps,_,_) -> k hyps ms - Nothing -> h (UnknownCat cat)) + Just (hyps,_,_,_) -> k hyps ms + Nothing -> h (UnknownCat cat)) lookupFunType :: CId -> TcM s Type lookupFunType fun = TcM (\abstr k h ms -> case Map.lookup fun (funs abstr) of @@ -143,8 +143,8 @@ typeGenerators scope cat = fmap normalize (liftM2 (++) x y) | cat == cidString = return [(1.0,ELit (LStr "Foo"),TTyp [] (DTyp [] cat []))] | otherwise = TcM (\abstr k h ms -> case Map.lookup cat (cats abstr) of - Just (_,fns,_) -> unTcM (mapM helper fns) abstr k h ms - Nothing -> h (UnknownCat cat)) + Just (_,fns,_,_) -> unTcM (mapM helper fns) abstr k h ms + Nothing -> h (UnknownCat cat)) helper (p,fn) = do ty <- lookupFunType fn |
