diff options
Diffstat (limited to 'src')
24 files changed, 153 insertions, 179 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 6769b283d..58e485768 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -1134,15 +1134,19 @@ allCommands = Map.fromList [ case arg of [EFun id] -> case Map.lookup id (funs (abstract pgf)) of Just fd -> do putStrLn $ render (ppFun id fd) - putStrLn ("Probability: "++show (probTree pgf (EFun id))) + let (_,_,_,prob,_) = fd + putStrLn ("Probability: "++show prob) return void Nothing -> case Map.lookup id (cats (abstract pgf)) of - Just hyps -> do putStrLn $ - render (ppCat id hyps $$ + Just cd -> do putStrLn $ + render (ppCat id cd $$ if null (functionsToCat pgf id) then empty else space $$ - vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id]) + vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id] $$ + space) + let (_,_,prob,_) = cd + putStrLn ("Probability: "++show prob) return void Nothing -> do putStrLn ("unknown category of function identifier "++show id) return void diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index ce31d5dc1..25db4bac7 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -50,12 +50,12 @@ mkCanon2pgf opts gr am = do [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ Look.allOrigInfos gr am - flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF aflags] + flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags] funs = Map.fromList [(i2i f, (mkType [] ty, mkArrity ma, mkDef pty, 0, addr)) | ((m,f),AbsFun (Just (L _ ty)) ma pty _,addr) <- adefs] - cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, addr)) | + cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0, addr)) | ((m,c),AbsCat (Just (L _ cont)),addr) <- adefs] catfuns cat = @@ -69,7 +69,7 @@ mkCanon2pgf opts gr am = do ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ Look.allOrigInfos gr cm) - let flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF cflags] + let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] seqs = (mkSetArray . Set.fromList . concat) $ (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) diff --git a/src/compiler/GF/Compile/PGFtoLProlog.hs b/src/compiler/GF/Compile/PGFtoLProlog.hs index 2ab5a26d2..61e28eb17 100644 --- a/src/compiler/GF/Compile/PGFtoLProlog.hs +++ b/src/compiler/GF/Compile/PGFtoLProlog.hs @@ -12,7 +12,7 @@ import qualified Data.Map as Map grammar2lambdaprolog_mod pgf = render $ text "module" <+> ppCId (absname pgf) <> char '.' $$ space $$ - vcat [ppClauses cat fns | (cat,(_,fs,_)) <- Map.toList (cats (abstract pgf)), + vcat [ppClauses cat fns | (cat,(_,fs,_,_)) <- Map.toList (cats (abstract pgf)), let fns = [(f,fromJust (Map.lookup f (funs (abstract pgf)))) | (_,f) <- fs]] where ppClauses cat fns = @@ -25,11 +25,11 @@ grammar2lambdaprolog_mod pgf = render $ grammar2lambdaprolog_sig pgf = render $ text "sig" <+> ppCId (absname pgf) <> char '.' $$ space $$ - vcat [ppCat c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$ + vcat [ppCat c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$ space $$ vcat [ppFun f ty <> dot | (f,(ty,_,Nothing,_,_)) <- Map.toList (funs (abstract pgf))] $$ space $$ - vcat [ppExport c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$ + vcat [ppExport c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$ vcat [ppFunPred f (hyps ++ [(Explicit,wildCId,DTyp [] c es)]) <> dot | (f,(DTyp hyps c es,_,Just _,_,_)) <- Map.toList (funs (abstract pgf))] ppCat :: CId -> [Hypo] -> Doc diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs index 061d9e874..8fffd5f07 100644 --- a/src/compiler/GF/Compile/PGFtoProlog.hs +++ b/src/compiler/GF/Compile/PGFtoProlog.hs @@ -49,7 +49,7 @@ plAbstract name abs (f, v) <- Map.assocs (aflags abs)] ++++ plFacts name "cat" 2 "(?Type, ?[X:Type,...])" [[plType cat args, plHypos hypos'] | - (cat, (hypos, _, _)) <- Map.assocs (cats abs), + (cat, (hypos,_,_,_)) <- Map.assocs (cats abs), let ((_, subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos, let args = reverse [EFun x | (_,x) <- subst]] ++++ plFacts name "fun" 3 "(?Fun, ?Type, ?[X:Type,...])" diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 6641b5718..0e897aa00 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -17,6 +17,7 @@ import Data.Binary --import Control.Monad
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS
+import Text.PrettyPrint (render)
import GF.Data.Operations
import GF.Infra.Ident
@@ -24,9 +25,10 @@ import GF.Infra.Option import GF.Grammar.Grammar
import PGF() -- Binary instances
+import PGF.Data(ppLit)
-- Please change this every time when the GFO format is changed
-gfoVersion = "GF02"
+gfoVersion = "GF03"
instance Binary Ident where
put id = put (ident2bs id)
@@ -91,7 +93,7 @@ instance Binary ModuleStatus where instance Binary Options where
put = put . optionsGFO
get = do opts <- get
- case parseModuleOptions ["--" ++ flag ++ "=" ++ value | (flag,value) <- opts] of
+ case parseModuleOptions ["--" ++ flag ++ "=" ++ render (ppLit value) | (flag,value) <- opts] of
Ok x -> return x
Bad msg -> fail msg
diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs index dba08ce44..e15e6e4d3 100644 --- a/src/compiler/GF/Grammar/Printer.hs +++ b/src/compiler/GF/Grammar/Printer.hs @@ -29,7 +29,7 @@ import GF.Infra.Option import GF.Grammar.Values
import GF.Grammar.Grammar
-import PGF.Data (ppMeta)
+import PGF.Data (ppMeta, ppLit)
import PGF.Printer (ppFId, ppFunId, ppSeqId, ppSeq)
import Text.PrettyPrint
@@ -87,7 +87,7 @@ ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ mseqs jments) = ppOptions opts =
text "flags" $$
- nest 2 (vcat [text option <+> equals <+> str value <+> semi | (option,value) <- optionsGFO opts])
+ nest 2 (vcat [text option <+> equals <+> ppLit value <+> semi | (option,value) <- optionsGFO opts])
ppJudgement q (id, AbsCat pcont ) =
text "cat" <+> ppIdent id <+>
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index e0cba05e4..1236e729c 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -43,6 +43,7 @@ import GF.Data.ErrM import Data.Set (Set) import qualified Data.Set as Set +import PGF.Data(Literal(..)) usageHeader :: String usageHeader = unlines @@ -170,7 +171,9 @@ data Flags = Flags { optWarnings :: [Warning], optDump :: [Dump], optTagsOnly :: Bool, - optBeamSize :: Maybe Double, + optHeuristicFactor :: Maybe Double, + optMetaProb :: Maybe Double, + optMetaToknProb :: Maybe Double, optNewComp :: Bool } deriving (Show) @@ -206,16 +209,18 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o) -- Showing options -- | Pretty-print the options that are preserved in .gfo files. -optionsGFO :: Options -> [(String,String)] +optionsGFO :: Options -> [(String,Literal)] optionsGFO opts = optionsPGF opts - ++ [("coding", flag optEncoding opts)] + ++ [("coding", LStr (flag optEncoding opts))] -- | Pretty-print the options that are preserved in .pgf files. -optionsPGF :: Options -> [(String,String)] +optionsPGF :: Options -> [(String,Literal)] optionsPGF opts = - maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts) - ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts) - ++ maybe [] (\x -> [("beam_size",show x)]) (flag optBeamSize opts) + maybe [] (\x -> [("language",LStr x)]) (flag optSpeechLanguage opts) + ++ maybe [] (\x -> [("startcat",LStr x)]) (flag optStartCat opts) + ++ maybe [] (\x -> [("heuristic_search_factor",LFlt x)]) (flag optHeuristicFactor opts) + ++ maybe [] (\x -> [("meta_prob",LFlt x)]) (flag optMetaProb opts) + ++ maybe [] (\x -> [("meta_token_prob",LFlt x)]) (flag optMetaToknProb opts) -- Option manipulation @@ -272,7 +277,9 @@ defaultFlags = Flags { optWarnings = [], optDump = [], optTagsOnly = False, - optBeamSize = Nothing, + optHeuristicFactor = Nothing, + optMetaProb = Nothing, + optMetaToknProb = Nothing, optNewComp = #ifdef NEW_COMP True @@ -358,7 +365,9 @@ optDescr = Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).", Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).", Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...", - Option [] ["beam_size"] (ReqArg readDouble "SIZE") "Set the beam size for statistical parsing", + Option [] ["heuristic_search_factor"] (ReqArg (readDouble (\d o -> o { optHeuristicFactor = Just d })) "FACTOR") "Set the heuristic search factor for statistical parsing", + Option [] ["meta_prob"] (ReqArg (readDouble (\d o -> o { optMetaProb = Just d })) "PROB") "Set the probability of introducting a meta variable in the parser", + Option [] ["meta_token_prob"] (ReqArg (readDouble (\d o -> o { optMetaToknProb = Just d })) "PROB") "Set the probability for skipping a token in the parser", Option [] ["new-comp"] (NoArg (set $ \o -> o{optNewComp = True})) "Use the new experimental compiler.", Option [] ["old-comp"] (NoArg (set $ \o -> o{optNewComp = False})) "Use old trusty compiler.", dumpOption "source" Source, @@ -433,9 +442,9 @@ optDescr = Nothing -> fail $ "Unknown CFG transformation: " ++ x' ++ " Known: " ++ show (map fst cfgTransformNames) - readDouble x = case reads x of - [(d,"")] -> set $ \o -> o { optBeamSize = Just d } - _ -> fail "A floating point number is expected" + readDouble f x = case reads x of + [(d,"")] -> set $ f d + _ -> fail "A floating point number is expected" dumpOption s d = Option [] ["dump-"++s] (NoArg (set $ \o -> o { optDump = Dump d:optDump o})) ("Dump output of the " ++ s ++ " phase.") diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs index 24c2e4d86..3703fe7a2 100644 --- a/src/compiler/GF/Speech/VoiceXML.hs +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -39,7 +39,7 @@ type Skeleton = [(CId, [(CId, [CId])])] pgfSkeleton :: PGF -> Skeleton pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType (abstract pgf) f))) | (_,f) <- fs]) - | (c,(_,fs,_)) <- Map.toList (cats (abstract pgf))] + | (c,(_,fs,_,_)) <- Map.toList (cats (abstract pgf))] -- -- * Questions to ask diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs index 61d5726b2..f75a39ab1 100644 --- a/src/compiler/GFC.hs +++ b/src/compiler/GFC.hs @@ -104,7 +104,7 @@ writeByteCode opts pgf where addrs = [(id,addr) | (id,(_,_,_,_,addr)) <- Map.toList (funs (abstract pgf))] ++ - [(id,addr) | (id,(_,_,addr)) <- Map.toList (cats (abstract pgf))] + [(id,addr) | (id,(_,_,_,addr)) <- Map.toList (cats (abstract pgf))] writePGF :: Options -> PGF -> IOE () writePGF opts pgf = do diff --git a/src/compiler/SimpleEditor/Convert.hs b/src/compiler/SimpleEditor/Convert.hs index 3554e3342..86580834a 100644 --- a/src/compiler/SimpleEditor/Convert.hs +++ b/src/compiler/SimpleEditor/Convert.hs @@ -23,6 +23,7 @@ import GF.Grammar.Printer(ppParams,ppTerm,getAbs,TermPrintQual(..)) import GF.Grammar.Parser(runP,pModDef) import GF.Grammar.Lexer(Posn(..)) import GF.Data.ErrM +import PGF.Data(Literal(LStr)) import SimpleEditor.Syntax as S import SimpleEditor.JSON @@ -57,7 +58,10 @@ convAbstract (modid,src) = let cats = reverse cats0 funs = reverse funs0 flags = optionsGFO (mflags src) - startcat = maybe "-" id $ lookup "startcat" flags + startcat = + case lookup "startcat" flags of + Just (LStr cat) -> cat + _ -> "-" return $ Grammar (convId modid) extends (Abstract startcat cats funs) [] convExtends = mapM convExtend diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 5b0401764..9bc73dd0a 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -87,9 +87,7 @@ typedef struct { PgfCId name; PgfHypos* context; - prob_t meta_prob; - prob_t meta_token_prob; - PgfMetaChildMap* meta_child_probs; + prob_t prob; void* predicate; } PgfAbsCat; @@ -230,6 +228,7 @@ typedef GuSeq PgfCncFuns; struct PgfConcr { PgfCId name; + PgfAbstr* abstr; PgfFlags* cflags; PgfPrintNames* printnames; GuMap* ccats; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 4e4724c75..ec21fc84e 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -63,7 +63,10 @@ typedef struct { int prod_full_count; #endif PgfItem* free_item; - prob_t beam_size; + + prob_t heuristic_factor; + prob_t meta_prob; + prob_t meta_token_prob; } PgfParsing; typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE; @@ -1389,12 +1392,14 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err { (void) (err); - PgfAbsCat* abscat = (PgfAbsCat*) key; - prob_t meta_prob = *((prob_t*) value); + PgfAbsCat* abscat = *((PgfAbsCat**) value); PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn; PgfParsing* ps = clo->ps; PgfItem* meta_item = clo->meta_item; + if (abscat->prob == INFINITY) + return; + PgfCncCat* cnccat = gu_map_get(ps->concr->cnccats, abscat->name, PgfCncCat*); if (cnccat == NULL) @@ -1412,7 +1417,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err PgfItem* item = pgf_item_copy(meta_item, ps); item->inside_prob += - ccat->viterbi_prob+meta_prob; + ccat->viterbi_prob+abscat->prob; size_t nargs = gu_seq_length(meta_item->args); item->args = gu_new_seq(PgfPArg, nargs+1, ps->pool); @@ -1698,18 +1703,14 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item) } pgf_parsing_complete(ps, item, ep); } else { - prob_t meta_token_prob = - item->conts->ccat->cnccat->abscat->meta_token_prob; + prob_t meta_token_prob = + ps->meta_token_prob; if (meta_token_prob != INFINITY) { pgf_parsing_meta_scan(ps, item, meta_token_prob); } - PgfCIdMap* meta_child_probs = - item->conts->ccat->cnccat->abscat->meta_child_probs; - if (meta_child_probs != NULL) { - PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item }; - gu_map_iter(meta_child_probs, &clo.fn, NULL); - } + PgfMetaPredictFn clo = { { pgf_parsing_meta_predict }, ps, item }; + gu_map_iter(ps->concr->abstr->cats, &clo.fn, NULL); } } else { pgf_parsing_symbol(ps, item, item->curr_sym); @@ -1721,22 +1722,38 @@ pgf_parsing_item(PgfParsing* ps, PgfItem* item) } } -static prob_t -pgf_parsing_default_beam_size(PgfConcr* concr) +static void +pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr) { - PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral); + PgfLiteral lit; - if (gu_variant_is_null(lit)) - return 0; + lit = + gu_map_get(abstr->aflags, "heuristic_search_factor", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->heuristic_factor = ((PgfLiteralFlt*) pi.data)->val; + } + + lit = + gu_map_get(abstr->aflags, "meta_prob", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->meta_prob = - log(((PgfLiteralFlt*) pi.data)->val); + } - GuVariantInfo pi = gu_variant_open(lit); - gu_assert (pi.tag == PGF_LITERAL_FLT); - return ((PgfLiteralFlt*) pi.data)->val; + lit = + gu_map_get(abstr->aflags, "meta_token_prob", PgfLiteral); + if (!gu_variant_is_null(lit)) { + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + ps->meta_token_prob = - log(((PgfLiteralFlt*) pi.data)->val); + } } static PgfParsing* -pgf_new_parsing(PgfConcr* concr, - GuString sentence, double heuristics, +pgf_new_parsing(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool) { PgfParsing* ps = gu_new(PgfParsing, pool); @@ -1756,7 +1773,11 @@ pgf_new_parsing(PgfConcr* concr, ps->prod_full_count = 0; #endif ps->free_item = NULL; - ps->beam_size = heuristics; + ps->heuristic_factor = 0; + ps->meta_prob = INFINITY; + ps->meta_token_prob = INFINITY; + + pgf_parsing_set_default_factors(ps, concr->abstr); PgfExprMeta *expr_meta = gu_new_variant(PGF_EXPR_META, @@ -2107,7 +2128,7 @@ pgf_parse_result_is_new(PgfExprState* st) // TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat static PgfParsing* pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, - GuString sentence, double heuristics, + GuString sentence, double heuristic_factor, GuExn* err, GuPool* pool, GuPool* out_pool) { @@ -2121,12 +2142,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, gu_assert(lin_idx < cnccat->n_lins); - if (heuristics < 0) { - heuristics = pgf_parsing_default_beam_size(concr); + PgfParsing* ps = + pgf_new_parsing(concr, sentence, pool, out_pool); + + if (heuristic_factor >= 0) { + ps->heuristic_factor = heuristic_factor; } - PgfParsing* ps = - pgf_new_parsing(concr, sentence, heuristics, pool, out_pool); PgfParseState* state = pgf_new_parse_state(ps, 0, BIND_SOFT); @@ -2156,11 +2178,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); } - PgfItem *item = - pgf_new_item(ps, conts, ps->meta_prod); - item->inside_prob = - ccat->cnccat->abscat->meta_prob; - gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + if (ps->meta_prob != INFINITY) { + PgfItem *item = + pgf_new_item(ps, conts, ps->meta_prod); + item->inside_prob = + ps->meta_prob; + gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); + } } } @@ -2200,7 +2224,7 @@ pgf_parsing_proceed(PgfParsing* ps) prob_t state_delta = (st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))* - ps->beam_size; + ps->heuristic_factor; delta_prob += state_delta; st = st->next; } diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index e804f5ce7..93dea300a 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -35,63 +35,6 @@ pgf_read(const char* fpath, return pgf; } -void -pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, - GuPool* pool, GuExn* err) -{ - FILE *fp = fopen(fpath, "r"); - if (!fp) { - gu_raise_errno(err); - return; - } - - GuPool* tmp_pool = gu_new_pool(); - - for (;;) { - char cat1[21]; - char cat2[21]; - prob_t prob; - - if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3) - break; - - prob = - log(prob); - - PgfAbsCat* abscat1 = - gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*); - if (abscat1 == NULL) { - GuExnData* exn = gu_raise(err, PgfExn); - exn->data = "Unknown category name"; - goto close; - } - - if (strcmp(cat2, "*") == 0) { - abscat1->meta_prob = prob; - } else if (strcmp(cat2, "_") == 0) { - abscat1->meta_token_prob = prob; - } else { - PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*); - if (abscat2 == NULL) { - gu_raise(err, PgfExn); - GuExnData* exn = gu_raise(err, PgfExn); - exn->data = "Unknown category name"; - goto close; - } - - if (abscat1->meta_child_probs == NULL) { - abscat1->meta_child_probs = - gu_map_type_new(PgfMetaChildMap, pool); - } - - gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob); - } - } - -close: - gu_pool_free(tmp_pool); - fclose(fp); -} - GuString pgf_abstract_name(PgfPGF* pgf) { diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 61b8bea6c..ffc293306 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -80,11 +80,6 @@ pgf_read(const char* fpath, * */ - -void -pgf_load_meta_child_probs(PgfPGF*, const char* fpath, - GuPool* pool, GuExn* err); - GuString pgf_abstract_name(PgfPGF*); diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index da7c70d7c..78c2b74db 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -48,7 +48,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value, ctxt = next; } - gu_printf(out, err, " ; -- %f\n",cat->meta_prob); + gu_printf(out, err, " ; -- %f\n", cat->prob); } void diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index d215f25e1..12605b89a 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -516,10 +516,6 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats) gu_return_on_exn(rdr->err, NULL); } - abscat->meta_prob = INFINITY; - abscat->meta_token_prob = INFINITY; - abscat->meta_child_probs = NULL; - GuBuf* functions = gu_new_buf(PgfAbsFun*, rdr->tmp_pool); size_t n_functions = pgf_read_len(rdr); @@ -538,6 +534,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats) gu_buf_push(functions, PgfAbsFun*, absfun); } + abscat->prob = - log(gu_in_f64be(rdr->in, rdr->err)); + pgf_jit_predicate(rdr->jit_state, abscats, abscat, functions); return abscat; @@ -1155,6 +1153,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun) pgf_read_cid(rdr, rdr->opool); gu_return_on_exn(rdr->err, NULL); + concr->abstr = abstr; + concr->cflags = pgf_read_flags(rdr); gu_return_on_exn(rdr->err, NULL); diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index 79a4fdd42..32f8323ab 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -53,18 +53,17 @@ int main(int argc, char* argv[]) { // Create the pool that is used to allocate everything GuPool* pool = gu_new_pool(); int status = EXIT_SUCCESS; - if (argc < 5 || argc > 6) { - fprintf(stderr, "usage: %s pgf cat from-lang to-lang [probs-file]\n", argv[0]); + if (argc < 5) { + fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]); status = EXIT_FAILURE; goto fail; } - char* filename = argv[1]; + GuString filename = argv[1]; GuString cat = argv[2]; - GuString from_lang = argv[3]; GuString to_lang = argv[4]; - + // Create an exception frame that catches all errors. GuExn* err = gu_new_exn(NULL, gu_kind(type), pool); @@ -78,16 +77,6 @@ int main(int argc, char* argv[]) { goto fail; } - if (argc == 6) { - char* meta_probs_filename = argv[5]; - pgf_load_meta_child_probs(pgf, meta_probs_filename, pool, err); - if (!gu_ok(err)) { - fprintf(stderr, "Loading meta child probs failed\n"); - status = EXIT_FAILURE; - goto fail; - } - } - // Look up the source and destination concrete categories PgfConcr* from_concr = pgf_get_language(pgf, from_lang); PgfConcr* to_concr = pgf_get_language(pgf, to_lang); diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index e7e5c53c5..0e3c79f40 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -292,8 +292,8 @@ categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))] categoryContext pgf cat = case Map.lookup cat (cats (abstract pgf)) of - Just (hypos,_,_) -> Just hypos - Nothing -> Nothing + Just (hypos,_,_,_) -> Just hypos + Nothing -> Nothing startCat pgf = DTyp [] (lookStartCat pgf) [] @@ -301,8 +301,8 @@ functions pgf = Map.keys (funs (abstract pgf)) functionsByCat pgf cat = case Map.lookup cat (cats (abstract pgf)) of - Just (_,fns,_) -> map snd fns - Nothing -> [] + Just (_,fns,_,_) -> map snd fns + Nothing -> [] functionType pgf fun = case Map.lookup fun (funs (abstract pgf)) of @@ -325,8 +325,8 @@ browse pgf id = fmap (\def -> (def,producers,consumers)) definition in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]) Just (ty,_,Nothing, _,_) -> Just $ render (text "data" <+> ppCId id <+> colon <+> ppType 0 [] ty) Nothing -> case Map.lookup id (cats (abstract pgf)) of - Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps))) - Nothing -> Nothing + Just (hyps,_,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps))) + Nothing -> Nothing (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf)) where diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 202939f04..2debcf12d 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -40,13 +40,13 @@ instance Binary CId where instance Binary Abstr where
put abs = put (aflags abs,
fmap (\(w,x,y,z,_) -> (w,x,y,z)) (funs abs),
- fmap (\(x,y,_) -> (x,y)) (cats abs))
+ fmap (\(x,y,z,_) -> (x,y,z)) (cats abs))
get = do aflags <- get
funs <- get
cats <- get
return (Abstr{ aflags=aflags
, funs=fmap (\(w,x,y,z) -> (w,x,y,z,0)) funs
- , cats=fmap (\(x,y) -> (x,y,0)) cats
+ , cats=fmap (\(x,y,z) -> (x,y,z,0)) cats
, code=BS.empty
})
diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index e37b243d0..f5797739f 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -29,10 +29,10 @@ data PGF = PGF { data Abstr = Abstr { aflags :: Map.Map CId Literal, -- ^ value of a flag funs :: Map.Map CId (Type,Int,Maybe [Equation],Double,BCAddr), -- ^ type, arrity and definition of function + probability - cats :: Map.Map CId ([Hypo],[(Double, CId)],BCAddr), -- ^ 1. context of a category - -- ^ 2. functions of a category. The order in the list is important, - -- this is the order in which the type singatures are given in the source. - -- The termination of the exhaustive generation might depend on this. + cats :: Map.Map CId ([Hypo],[(Double, CId)],Double,BCAddr), -- ^ 1. context of a category + -- 2. functions of a category. The functions are stored + -- in decreasing probability order. + -- 3. probability code :: BS.ByteString } diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 830a16674..ce75b1c91 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -67,7 +67,7 @@ functionsToCat :: PGF -> CId -> [(CId,Type)] functionsToCat pgf cat = [(f,ty) | (_,f) <- fs, Just (ty,_,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]] where - (_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf + (_,fs,_,_) = lookMap ([],[],0,0) cat $ cats $ abstract pgf -- | List of functions that lack linearizations in the given language. missingLins :: PGF -> Language -> [CId] @@ -82,7 +82,7 @@ restrictPGF :: (CId -> Bool) -> PGF -> PGF restrictPGF cond pgf = pgf { abstract = abstr { funs = Map.filterWithKey (\c _ -> cond c) (funs abstr), - cats = Map.map (\(hyps,fs,addr) -> (hyps,filter (cond . snd) fs,addr)) (cats abstr) + cats = Map.map (\(hyps,fs,p,addr) -> (hyps,filter (cond . snd) fs,p,addr)) (cats abstr) } } ---- restrict concrs also, might be needed where diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 66d8530f0..d3a5ea1d9 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -26,8 +26,8 @@ ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$ ppFlag :: CId -> Literal -> Doc ppFlag flag value = text "flag" <+> ppCId flag <+> char '=' <+> ppLit value <+> char ';' -ppCat :: CId -> ([Hypo],[(Double,CId)],BCAddr) -> Doc -ppCat c (hyps,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';' +ppCat :: CId -> ([Hypo],[(Double,CId)],Double,BCAddr) -> Doc +ppCat c (hyps,_,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';' ppFun :: CId -> (Type,Int,Maybe [Equation],Double,BCAddr) -> Doc ppFun f (t,_,Just eqs,_,_) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t <+> char ';' $$ diff --git a/src/runtime/haskell/PGF/Probabilistic.hs b/src/runtime/haskell/PGF/Probabilistic.hs index 7f980254b..095ade022 100644 --- a/src/runtime/haskell/PGF/Probabilistic.hs +++ b/src/runtime/haskell/PGF/Probabilistic.hs @@ -24,13 +24,14 @@ import Data.Maybe (fromMaybe) --, fromJust -- the probabilities for the different functions in a grammar. data Probabilities = Probs { funProbs :: Map.Map CId Double, - catProbs :: Map.Map CId [(Double, CId)] + catProbs :: Map.Map CId (Double, [(Double, CId)]) } -- | Renders the probability structure as string showProbabilities :: Probabilities -> String -showProbabilities = unlines . map pr . Map.toList . funProbs where - pr (f,d) = showCId f ++ "\t" ++ show d +showProbabilities = unlines . concatMap prProb . Map.toList . catProbs where + prProb (c,(p,fns)) = pr (p,c) : map pr fns + pr (p,f) = showCId f ++ "\t" ++ show p -- | Reads the probabilities from a file. -- This should be a text file where on every line @@ -50,8 +51,12 @@ readProbabilitiesFromFile file pgf = do -- for the result category. mkProbabilities :: PGF -> Map.Map CId Double -> Probabilities mkProbabilities pgf probs = - let funs1 = Map.fromList [(f,p) | (_,cf) <- Map.toList cats1, (p,f) <- cf] - cats1 = Map.map (\(_,fs,_) -> sortBy cmpProb (fill fs)) (cats (abstract pgf)) + let funs1 = Map.fromList [(f,p) | (_,(_,fns)) <- Map.toList cats1, (p,f) <- fns] + cats1 = Map.mapWithKey (\c (_,fns,_,_) -> + let p' = fromMaybe 0 (Map.lookup c probs) + fns' = sortBy cmpProb (fill fns) + in (p', fns')) + (cats (abstract pgf)) in Probs funs1 cats1 where cmpProb (p1,_) (p2,_) = compare p2 p1 @@ -71,15 +76,15 @@ defaultProbabilities pgf = mkProbabilities pgf Map.empty getProbabilities :: PGF -> Probabilities getProbabilities pgf = Probs { - funProbs = Map.map (\(_,_,_,p,_) -> p) (funs (abstract pgf)), - catProbs = Map.map (\(_,fns,_) -> fns) (cats (abstract pgf)) + funProbs = Map.map (\(_,_,_,p,_) -> p ) (funs (abstract pgf)), + catProbs = Map.map (\(_,fns,p,_) -> (p,fns)) (cats (abstract pgf)) } setProbabilities :: Probabilities -> PGF -> PGF setProbabilities probs pgf = pgf { abstract = (abstract pgf) { - funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df,p,addr)) (funs (abstract pgf)) (funProbs probs), - cats = mapUnionWith (\(hypos,_,addr) fns -> (hypos,fns,addr)) (cats (abstract pgf)) (catProbs probs) + funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df, p,addr)) (funs (abstract pgf)) (funProbs probs), + cats = mapUnionWith (\(hypos,_,_,addr) (p,fns) -> (hypos,fns,p,addr)) (cats (abstract pgf)) (catProbs probs) }} where mapUnionWith f map1 map2 = @@ -102,7 +107,7 @@ rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p) mkProbDefs :: PGF -> ([[CId]],[(CId,Type,[Equation])]) mkProbDefs pgf = - let cs = [(c,hyps,fns) | (c,(hyps0,fs,_)) <- Map.toList (cats (abstract pgf)), + let cs = [(c,hyps,fns) | (c,(hyps0,fs,_,_)) <- Map.toList (cats (abstract pgf)), not (elem c [cidString,cidInt,cidFloat]), let hyps = zipWith (\(bt,_,ty) n -> (bt,mkCId ('v':show n),ty)) hyps0 diff --git a/src/runtime/haskell/PGF/TypeCheck.hs b/src/runtime/haskell/PGF/TypeCheck.hs index 141189193..e582f97af 100644 --- a/src/runtime/haskell/PGF/TypeCheck.hs +++ b/src/runtime/haskell/PGF/TypeCheck.hs @@ -121,8 +121,8 @@ runTcM abstr f ms s = unTcM f abstr (\x ms s cp b -> let (es,xs) = cp b lookupCatHyps :: CId -> TcM s [Hypo] lookupCatHyps cat = TcM (\abstr k h ms -> case Map.lookup cat (cats abstr) of - Just (hyps,_,_) -> k hyps ms - Nothing -> h (UnknownCat cat)) + Just (hyps,_,_,_) -> k hyps ms + Nothing -> h (UnknownCat cat)) lookupFunType :: CId -> TcM s Type lookupFunType fun = TcM (\abstr k h ms -> case Map.lookup fun (funs abstr) of @@ -143,8 +143,8 @@ typeGenerators scope cat = fmap normalize (liftM2 (++) x y) | cat == cidString = return [(1.0,ELit (LStr "Foo"),TTyp [] (DTyp [] cat []))] | otherwise = TcM (\abstr k h ms -> case Map.lookup cat (cats abstr) of - Just (_,fns,_) -> unTcM (mapM helper fns) abstr k h ms - Nothing -> h (UnknownCat cat)) + Just (_,fns,_,_) -> unTcM (mapM helper fns) abstr k h ms + Nothing -> h (UnknownCat cat)) helper (p,fn) = do ty <- lookupFunType fn |
