From 750a2639b0f46153c36f6c713f522db5bb2d8587 Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 27 Jan 2010 17:51:15 +0000 Subject: command eb for example-based grammar conversion; see 'help eb' and the example in examples/animals/QuestionsI.gfe --- src/compiler/GF/Command/Commands.hs | 31 +++++++++++++++++ src/compiler/GF/Compile/ExampleBased.hs | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/compiler/GF/Compile/ExampleBased.hs (limited to 'src/compiler') diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 5d2b5421b..7f8722d00 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -23,6 +23,7 @@ import PGF.Printer import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities) import PGF.Generate (genRandomProb) ---- import GF.Compile.Export +import GF.Compile.ExampleBased import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) import GF.Infra.UseIO import GF.Data.ErrM ---- @@ -231,6 +232,33 @@ allCommands cod env@(pgf, mos) = Map.fromList [ longname = "empty", synopsis = "empty the environment" }), + ("eb", emptyCommandInfo { + longname = "example_based", + syntax = "eb (-probs=FILE | -lang=LANG)* -file=FILE.gfe", + synopsis = "converts .gfe files to .gf files by parsing examples to trees", + explanation = unlines [ + "Reads FILE.gfe and writes FILE.gf. Each expression of form", + "'%ex CAT QUOTEDSTRING' in FILE.gfe is replaced by a syntax tree.", + "This tree is the first one returned by the parser; a biased ranking", + "can be used to regulate the order. If there are more than one parses", + "the rest are shown in comments, with probabilities if the order is biased.", + "The probabilities flag and configuration file is similar to the commands", + "gr and rt. Notice that the command doesn't change the environment,", + "but the resulting .gf file must be imported separately." + ], + flags = [ + ("file","the file to be converted (suffix .gfe must be given)"), + ("lang","the language in which to parse"), + ("probs","file with probabilities to rank the parses") + ], + exec = \opts _ -> do + let file = optFile opts + mprobs <- optProbs opts pgf + let conf = configureExBased pgf mprobs (optLang opts) + file' <- parseExamplesInGrammar conf file + return (fromString ("wrote " ++ file')), + needsTypeCheck = False + }), ("gr", emptyCommandInfo { longname = "generate_random", synopsis = "generate random trees in the current abstract syntax", @@ -871,6 +899,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [ -- putStrLn $ prProbabilities ps return $ Just ps + optFile opts = valStrOpts "file" "_gftmp" opts + optType opts = let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts in case readType str of @@ -988,3 +1018,4 @@ prMorphoAnalysis (w,lps) = morphoMissing :: Morpho -> [String] -> [String] morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)] + diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs new file mode 100644 index 000000000..10d7cdc88 --- /dev/null +++ b/src/compiler/GF/Compile/ExampleBased.hs @@ -0,0 +1,59 @@ +module GF.Compile.ExampleBased (parseExamplesInGrammar,configureExBased) where + +import PGF +import PGF.Probabilistic + +parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath +parseExamplesInGrammar conf file = do + src <- readFile file -- .gfe + let file' = take (length file - 3) file ++ "gf" -- .gf + convertFile conf src file' + return file' + +convertFile :: ExConfiguration -> String -> FilePath -> IO () +convertFile conf src file = do + writeFile file "" -- "-- created by example-based grammar writing in GF\n" + conv src + where + conv s = do + (cex,end) <- findExample s + if null end then return () else do + convEx cex + conv end + findExample s = case s of + '%':'e':'x':cs -> return $ getExample cs + c:cs -> appf [c] >> findExample cs + _ -> return (undefined,s) + getExample s = + let + (cat,exend) = break (=='"') s + (ex, end) = break (=='"') (tail exend) + in ((unwords (words cat),ex), tail end) -- quotes ignored + pgf = resource_pgf conf + lang = language conf + convEx (cat,ex) = do + appn "(" + let typ = maybe (error "no valid cat") id $ readType cat + let ts = rank $ parse pgf lang typ ex + case ts of + [] -> appv ("WARNING: cannot parse example " ++ ex) + t:tt -> appn t >> mapM_ (appn . (" --- " ++)) tt + appn ")" + rank ts = case probs conf of + Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts] + _ -> map (showExpr []) ts + appf = appendFile file + appn s = appf s >> appf "\n" + appv s = appn s >> putStrLn s + +data ExConfiguration = ExConf { + resource_file :: FilePath, + resource_pgf :: PGF, + probs :: Maybe Probabilities, + verbose :: Bool, + language :: Language + } + +configureExBased :: PGF -> Maybe Probabilities -> Language -> ExConfiguration +configureExBased pgf mprobs lang = ExConf [] pgf mprobs False lang + -- cgit v1.2.3