summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-11-06 10:21:46 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-11-06 10:21:46 +0000
commit2483dc772897eb0909664f1a88cc7f8ec50ebd5b (patch)
tree4ecb223fc246458a9a5997a4b65329b6039ce309 /src/compiler
parent84ef5fa5fa976569370c58bed855f2ab1de5588c (diff)
the content of ParseEngAbs3.probs is now merged with ParseEngAbs.probs. The later is now retrained. Once the grammar is compiled with the .probs file now it doesn't need anything more to do robust parsing. The robustness itself is controlled by the flags 'heuristic_search_factor', 'meta_prob' and 'meta_token_prob' in ParseEngAbs.gf
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/GF/Command/Commands.hs12
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs6
-rw-r--r--src/compiler/GF/Compile/PGFtoLProlog.hs6
-rw-r--r--src/compiler/GF/Compile/PGFtoProlog.hs2
-rw-r--r--src/compiler/GF/Grammar/Binary.hs6
-rw-r--r--src/compiler/GF/Grammar/Printer.hs4
-rw-r--r--src/compiler/GF/Infra/Option.hs33
-rw-r--r--src/compiler/GF/Speech/VoiceXML.hs2
-rw-r--r--src/compiler/GFC.hs2
-rw-r--r--src/compiler/SimpleEditor/Convert.hs6
10 files changed, 49 insertions, 30 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 6769b283d..58e485768 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -1134,15 +1134,19 @@ allCommands = Map.fromList [
case arg of
[EFun id] -> case Map.lookup id (funs (abstract pgf)) of
Just fd -> do putStrLn $ render (ppFun id fd)
- putStrLn ("Probability: "++show (probTree pgf (EFun id)))
+ let (_,_,_,prob,_) = fd
+ putStrLn ("Probability: "++show prob)
return void
Nothing -> case Map.lookup id (cats (abstract pgf)) of
- Just hyps -> do putStrLn $
- render (ppCat id hyps $$
+ Just cd -> do putStrLn $
+ render (ppCat id cd $$
if null (functionsToCat pgf id)
then empty
else space $$
- vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id])
+ vcat [ppFun fid (ty,0,Just [],0,0) | (fid,ty) <- functionsToCat pgf id] $$
+ space)
+ let (_,_,prob,_) = cd
+ putStrLn ("Probability: "++show prob)
return void
Nothing -> do putStrLn ("unknown category of function identifier "++show id)
return void
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index ce31d5dc1..25db4bac7 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -50,12 +50,12 @@ mkCanon2pgf opts gr am = do
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
Look.allOrigInfos gr am
- flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF aflags]
+ flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags]
funs = Map.fromList [(i2i f, (mkType [] ty, mkArrity ma, mkDef pty, 0, addr)) |
((m,f),AbsFun (Just (L _ ty)) ma pty _,addr) <- adefs]
- cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, addr)) |
+ cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0, addr)) |
((m,c),AbsCat (Just (L _ cont)),addr) <- adefs]
catfuns cat =
@@ -69,7 +69,7 @@ mkCanon2pgf opts gr am = do
([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++
Look.allOrigInfos gr cm)
- let flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF cflags]
+ let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
seqs = (mkSetArray . Set.fromList . concat) $
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
diff --git a/src/compiler/GF/Compile/PGFtoLProlog.hs b/src/compiler/GF/Compile/PGFtoLProlog.hs
index 2ab5a26d2..61e28eb17 100644
--- a/src/compiler/GF/Compile/PGFtoLProlog.hs
+++ b/src/compiler/GF/Compile/PGFtoLProlog.hs
@@ -12,7 +12,7 @@ import qualified Data.Map as Map
grammar2lambdaprolog_mod pgf = render $
text "module" <+> ppCId (absname pgf) <> char '.' $$
space $$
- vcat [ppClauses cat fns | (cat,(_,fs,_)) <- Map.toList (cats (abstract pgf)),
+ vcat [ppClauses cat fns | (cat,(_,fs,_,_)) <- Map.toList (cats (abstract pgf)),
let fns = [(f,fromJust (Map.lookup f (funs (abstract pgf)))) | (_,f) <- fs]]
where
ppClauses cat fns =
@@ -25,11 +25,11 @@ grammar2lambdaprolog_mod pgf = render $
grammar2lambdaprolog_sig pgf = render $
text "sig" <+> ppCId (absname pgf) <> char '.' $$
space $$
- vcat [ppCat c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$
+ vcat [ppCat c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$
space $$
vcat [ppFun f ty <> dot | (f,(ty,_,Nothing,_,_)) <- Map.toList (funs (abstract pgf))] $$
space $$
- vcat [ppExport c hyps <> dot | (c,(hyps,_,_)) <- Map.toList (cats (abstract pgf))] $$
+ vcat [ppExport c hyps <> dot | (c,(hyps,_,_,_)) <- Map.toList (cats (abstract pgf))] $$
vcat [ppFunPred f (hyps ++ [(Explicit,wildCId,DTyp [] c es)]) <> dot | (f,(DTyp hyps c es,_,Just _,_,_)) <- Map.toList (funs (abstract pgf))]
ppCat :: CId -> [Hypo] -> Doc
diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs
index 061d9e874..8fffd5f07 100644
--- a/src/compiler/GF/Compile/PGFtoProlog.hs
+++ b/src/compiler/GF/Compile/PGFtoProlog.hs
@@ -49,7 +49,7 @@ plAbstract name abs
(f, v) <- Map.assocs (aflags abs)] ++++
plFacts name "cat" 2 "(?Type, ?[X:Type,...])"
[[plType cat args, plHypos hypos'] |
- (cat, (hypos, _, _)) <- Map.assocs (cats abs),
+ (cat, (hypos,_,_,_)) <- Map.assocs (cats abs),
let ((_, subst), hypos') = mapAccumL alphaConvertHypo emptyEnv hypos,
let args = reverse [EFun x | (_,x) <- subst]] ++++
plFacts name "fun" 3 "(?Fun, ?Type, ?[X:Type,...])"
diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs
index 6641b5718..0e897aa00 100644
--- a/src/compiler/GF/Grammar/Binary.hs
+++ b/src/compiler/GF/Grammar/Binary.hs
@@ -17,6 +17,7 @@ import Data.Binary
--import Control.Monad
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as BS
+import Text.PrettyPrint (render)
import GF.Data.Operations
import GF.Infra.Ident
@@ -24,9 +25,10 @@ import GF.Infra.Option
import GF.Grammar.Grammar
import PGF() -- Binary instances
+import PGF.Data(ppLit)
-- Please change this every time when the GFO format is changed
-gfoVersion = "GF02"
+gfoVersion = "GF03"
instance Binary Ident where
put id = put (ident2bs id)
@@ -91,7 +93,7 @@ instance Binary ModuleStatus where
instance Binary Options where
put = put . optionsGFO
get = do opts <- get
- case parseModuleOptions ["--" ++ flag ++ "=" ++ value | (flag,value) <- opts] of
+ case parseModuleOptions ["--" ++ flag ++ "=" ++ render (ppLit value) | (flag,value) <- opts] of
Ok x -> return x
Bad msg -> fail msg
diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs
index dba08ce44..e15e6e4d3 100644
--- a/src/compiler/GF/Grammar/Printer.hs
+++ b/src/compiler/GF/Grammar/Printer.hs
@@ -29,7 +29,7 @@ import GF.Infra.Option
import GF.Grammar.Values
import GF.Grammar.Grammar
-import PGF.Data (ppMeta)
+import PGF.Data (ppMeta, ppLit)
import PGF.Printer (ppFId, ppFunId, ppSeqId, ppSeq)
import Text.PrettyPrint
@@ -87,7 +87,7 @@ ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ mseqs jments) =
ppOptions opts =
text "flags" $$
- nest 2 (vcat [text option <+> equals <+> str value <+> semi | (option,value) <- optionsGFO opts])
+ nest 2 (vcat [text option <+> equals <+> ppLit value <+> semi | (option,value) <- optionsGFO opts])
ppJudgement q (id, AbsCat pcont ) =
text "cat" <+> ppIdent id <+>
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index e0cba05e4..1236e729c 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -43,6 +43,7 @@ import GF.Data.ErrM
import Data.Set (Set)
import qualified Data.Set as Set
+import PGF.Data(Literal(..))
usageHeader :: String
usageHeader = unlines
@@ -170,7 +171,9 @@ data Flags = Flags {
optWarnings :: [Warning],
optDump :: [Dump],
optTagsOnly :: Bool,
- optBeamSize :: Maybe Double,
+ optHeuristicFactor :: Maybe Double,
+ optMetaProb :: Maybe Double,
+ optMetaToknProb :: Maybe Double,
optNewComp :: Bool
}
deriving (Show)
@@ -206,16 +209,18 @@ fixRelativeLibPaths curr_dir lib_dir (Options o) = Options (fixPathFlags . o)
-- Showing options
-- | Pretty-print the options that are preserved in .gfo files.
-optionsGFO :: Options -> [(String,String)]
+optionsGFO :: Options -> [(String,Literal)]
optionsGFO opts = optionsPGF opts
- ++ [("coding", flag optEncoding opts)]
+ ++ [("coding", LStr (flag optEncoding opts))]
-- | Pretty-print the options that are preserved in .pgf files.
-optionsPGF :: Options -> [(String,String)]
+optionsPGF :: Options -> [(String,Literal)]
optionsPGF opts =
- maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
- ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
- ++ maybe [] (\x -> [("beam_size",show x)]) (flag optBeamSize opts)
+ maybe [] (\x -> [("language",LStr x)]) (flag optSpeechLanguage opts)
+ ++ maybe [] (\x -> [("startcat",LStr x)]) (flag optStartCat opts)
+ ++ maybe [] (\x -> [("heuristic_search_factor",LFlt x)]) (flag optHeuristicFactor opts)
+ ++ maybe [] (\x -> [("meta_prob",LFlt x)]) (flag optMetaProb opts)
+ ++ maybe [] (\x -> [("meta_token_prob",LFlt x)]) (flag optMetaToknProb opts)
-- Option manipulation
@@ -272,7 +277,9 @@ defaultFlags = Flags {
optWarnings = [],
optDump = [],
optTagsOnly = False,
- optBeamSize = Nothing,
+ optHeuristicFactor = Nothing,
+ optMetaProb = Nothing,
+ optMetaToknProb = Nothing,
optNewComp =
#ifdef NEW_COMP
True
@@ -358,7 +365,9 @@ optDescr =
Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).",
Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).",
Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...",
- Option [] ["beam_size"] (ReqArg readDouble "SIZE") "Set the beam size for statistical parsing",
+ Option [] ["heuristic_search_factor"] (ReqArg (readDouble (\d o -> o { optHeuristicFactor = Just d })) "FACTOR") "Set the heuristic search factor for statistical parsing",
+ Option [] ["meta_prob"] (ReqArg (readDouble (\d o -> o { optMetaProb = Just d })) "PROB") "Set the probability of introducting a meta variable in the parser",
+ Option [] ["meta_token_prob"] (ReqArg (readDouble (\d o -> o { optMetaToknProb = Just d })) "PROB") "Set the probability for skipping a token in the parser",
Option [] ["new-comp"] (NoArg (set $ \o -> o{optNewComp = True})) "Use the new experimental compiler.",
Option [] ["old-comp"] (NoArg (set $ \o -> o{optNewComp = False})) "Use old trusty compiler.",
dumpOption "source" Source,
@@ -433,9 +442,9 @@ optDescr =
Nothing -> fail $ "Unknown CFG transformation: " ++ x'
++ " Known: " ++ show (map fst cfgTransformNames)
- readDouble x = case reads x of
- [(d,"")] -> set $ \o -> o { optBeamSize = Just d }
- _ -> fail "A floating point number is expected"
+ readDouble f x = case reads x of
+ [(d,"")] -> set $ f d
+ _ -> fail "A floating point number is expected"
dumpOption s d = Option [] ["dump-"++s] (NoArg (set $ \o -> o { optDump = Dump d:optDump o})) ("Dump output of the " ++ s ++ " phase.")
diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs
index 24c2e4d86..3703fe7a2 100644
--- a/src/compiler/GF/Speech/VoiceXML.hs
+++ b/src/compiler/GF/Speech/VoiceXML.hs
@@ -39,7 +39,7 @@ type Skeleton = [(CId, [(CId, [CId])])]
pgfSkeleton :: PGF -> Skeleton
pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType (abstract pgf) f))) | (_,f) <- fs])
- | (c,(_,fs,_)) <- Map.toList (cats (abstract pgf))]
+ | (c,(_,fs,_,_)) <- Map.toList (cats (abstract pgf))]
--
-- * Questions to ask
diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs
index 61d5726b2..f75a39ab1 100644
--- a/src/compiler/GFC.hs
+++ b/src/compiler/GFC.hs
@@ -104,7 +104,7 @@ writeByteCode opts pgf
where
addrs =
[(id,addr) | (id,(_,_,_,_,addr)) <- Map.toList (funs (abstract pgf))] ++
- [(id,addr) | (id,(_,_,addr)) <- Map.toList (cats (abstract pgf))]
+ [(id,addr) | (id,(_,_,_,addr)) <- Map.toList (cats (abstract pgf))]
writePGF :: Options -> PGF -> IOE ()
writePGF opts pgf = do
diff --git a/src/compiler/SimpleEditor/Convert.hs b/src/compiler/SimpleEditor/Convert.hs
index 3554e3342..86580834a 100644
--- a/src/compiler/SimpleEditor/Convert.hs
+++ b/src/compiler/SimpleEditor/Convert.hs
@@ -23,6 +23,7 @@ import GF.Grammar.Printer(ppParams,ppTerm,getAbs,TermPrintQual(..))
import GF.Grammar.Parser(runP,pModDef)
import GF.Grammar.Lexer(Posn(..))
import GF.Data.ErrM
+import PGF.Data(Literal(LStr))
import SimpleEditor.Syntax as S
import SimpleEditor.JSON
@@ -57,7 +58,10 @@ convAbstract (modid,src) =
let cats = reverse cats0
funs = reverse funs0
flags = optionsGFO (mflags src)
- startcat = maybe "-" id $ lookup "startcat" flags
+ startcat =
+ case lookup "startcat" flags of
+ Just (LStr cat) -> cat
+ _ -> "-"
return $ Grammar (convId modid) extends (Abstract startcat cats funs) []
convExtends = mapM convExtend