summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2010-01-27 17:51:15 +0000
committeraarne <aarne@chalmers.se>2010-01-27 17:51:15 +0000
commit750a2639b0f46153c36f6c713f522db5bb2d8587 (patch)
treeeb021744f904a0e62b44d4012bea27368651faad
parent890d45579300f39d50a5a18a9f6feed8634ae8ba (diff)
command eb for example-based grammar conversion; see 'help eb' and the example in examples/animals/QuestionsI.gfe
-rw-r--r--examples/animals/Animals.gf12
-rw-r--r--examples/animals/AnimalsEng.gf16
-rw-r--r--examples/animals/AnimalsFre.gf15
-rw-r--r--examples/animals/AnimalsSwe.gf15
-rw-r--r--examples/animals/Questions.gf10
-rw-r--r--examples/animals/QuestionsEng.gf2
-rw-r--r--examples/animals/QuestionsFre.gf2
-rw-r--r--examples/animals/QuestionsI.gf27
-rw-r--r--examples/animals/QuestionsI.gfe15
-rw-r--r--examples/animals/QuestionsSwe.gf2
-rw-r--r--src/compiler/GF/Command/Commands.hs31
-rw-r--r--src/compiler/GF/Compile/ExampleBased.hs59
12 files changed, 206 insertions, 0 deletions
diff --git a/examples/animals/Animals.gf b/examples/animals/Animals.gf
new file mode 100644
index 000000000..33b56e740
--- /dev/null
+++ b/examples/animals/Animals.gf
@@ -0,0 +1,12 @@
+-- The Question grammar specialized to animals.
+
+abstract Animals = Questions ** {
+
+ flags startcat=Phrase ;
+
+ fun
+ -- a lexicon of animals and actions among them
+ Dog, Cat, Mouse, Lion, Zebra : Entity ;
+ Chase, Eat, See : Action ;
+}
+
diff --git a/examples/animals/AnimalsEng.gf b/examples/animals/AnimalsEng.gf
new file mode 100644
index 000000000..22942b735
--- /dev/null
+++ b/examples/animals/AnimalsEng.gf
@@ -0,0 +1,16 @@
+--# -path=.:present:prelude
+--resource/english:resource/abstract:resource/../prelude
+
+concrete AnimalsEng of Animals = QuestionsEng **
+ open LangEng, ParadigmsEng, IrregEng in {
+
+ lin
+ Dog = regN "dog" ;
+ Cat = regN "cat" ;
+ Mouse = mk2N "mouse" "mice" ;
+ Lion = regN "lion" ;
+ Zebra = regN "zebra" ;
+ Chase = dirV2 (regV "chase") ;
+ Eat = dirV2 eat_V ;
+ See = dirV2 see_V ;
+}
diff --git a/examples/animals/AnimalsFre.gf b/examples/animals/AnimalsFre.gf
new file mode 100644
index 000000000..198c84ad7
--- /dev/null
+++ b/examples/animals/AnimalsFre.gf
@@ -0,0 +1,15 @@
+--# -path=.:present:prelude
+
+concrete AnimalsFre of Animals = QuestionsFre **
+ open LangFre, ParadigmsFre, IrregFre in {
+
+ lin
+ Dog = regN "chien" ;
+ Cat = regN "chat" ;
+ Mouse = regGenN "souris" feminine ;
+ Lion = regN "lion" ;
+ Zebra = regGenN "zèbre" masculine ;
+ Chase = dirV2 (regV "chasser") ;
+ Eat = dirV2 (regV "manger") ;
+ See = voir_V2 ;
+}
diff --git a/examples/animals/AnimalsSwe.gf b/examples/animals/AnimalsSwe.gf
new file mode 100644
index 000000000..8f256885a
--- /dev/null
+++ b/examples/animals/AnimalsSwe.gf
@@ -0,0 +1,15 @@
+--# -path=.:present:prelude
+
+concrete AnimalsSwe of Animals = QuestionsSwe **
+ open LangSwe, ParadigmsSwe, IrregSwe in {
+
+ lin
+ Dog = regN "hund" ;
+ Cat = mk2N "katt" "katter" ;
+ Mouse = mkN "mus" "musen" "möss" "mössen" ;
+ Lion = mk2N "lejon" "lejon" ;
+ Zebra = regN "zebra" ;
+ Chase = dirV2 (regV "jaga") ;
+ Eat = dirV2 äta_V ;
+ See = dirV2 se_V ;
+}
diff --git a/examples/animals/Questions.gf b/examples/animals/Questions.gf
new file mode 100644
index 000000000..bb25e785d
--- /dev/null
+++ b/examples/animals/Questions.gf
@@ -0,0 +1,10 @@
+-- Simple questions and answers, in present tense.
+
+abstract Questions = {
+ cat
+ Phrase ; Entity ; Action ;
+ fun
+ Who : Action -> Entity -> Phrase ; -- who chases X
+ Whom : Entity -> Action -> Phrase ; -- whom does X chase
+ Answer : Entity -> Action -> Entity -> Phrase ; -- X chases Y
+}
diff --git a/examples/animals/QuestionsEng.gf b/examples/animals/QuestionsEng.gf
new file mode 100644
index 000000000..8a4c5c406
--- /dev/null
+++ b/examples/animals/QuestionsEng.gf
@@ -0,0 +1,2 @@
+concrete QuestionsEng of Questions = QuestionsI with
+ (Lang = LangEng) ;
diff --git a/examples/animals/QuestionsFre.gf b/examples/animals/QuestionsFre.gf
new file mode 100644
index 000000000..dab23b2ba
--- /dev/null
+++ b/examples/animals/QuestionsFre.gf
@@ -0,0 +1,2 @@
+concrete QuestionsFre of Questions = QuestionsI with
+ (Lang = LangFre) ;
diff --git a/examples/animals/QuestionsI.gf b/examples/animals/QuestionsI.gf
new file mode 100644
index 000000000..30476ccdb
--- /dev/null
+++ b/examples/animals/QuestionsI.gf
@@ -0,0 +1,27 @@
+-- to compile: echo "eb -file=QuestionsI.gfe" | gf $GF_LIB_PATH/present/LangEng.gfo
+-- or use directly gf <mkAnimals.gfs
+
+incomplete concrete QuestionsI of Questions = open Lang in {
+ lincat
+ Phrase = Utt ;
+ Entity = N ;
+ Action = V2 ;
+
+ lin
+ Who love_V2 man_N = (
+UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
+)
+ ;
+ Whom man_N love_V2 = (
+UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
+ --- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
+)
+ ;
+ Answer woman_N love_V2 man_N = (
+UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (UseN love_N))) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))
+ --- UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (ApposCN (UseN love_N) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))))
+ --- UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN woman_N)) (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
+)
+ ;
+
+}
diff --git a/examples/animals/QuestionsI.gfe b/examples/animals/QuestionsI.gfe
new file mode 100644
index 000000000..abb28742b
--- /dev/null
+++ b/examples/animals/QuestionsI.gfe
@@ -0,0 +1,15 @@
+-- to compile: echo "eb -file=QuestionsI.gfe" | gf $GF_LIB_PATH/present/LangEng.gfo
+-- or use directly gf <mkAnimals.gfs
+
+incomplete concrete QuestionsI of Questions = open Lang in {
+ lincat
+ Phrase = Utt ;
+ Entity = N ;
+ Action = V2 ;
+
+ lin
+ Who love_V2 man_N = %ex Utt "who loves men" ;
+ Whom man_N love_V2 = %ex Utt "whom does the man love" ;
+ Answer woman_N love_V2 man_N = %ex Utt "the woman loves men" ;
+
+}
diff --git a/examples/animals/QuestionsSwe.gf b/examples/animals/QuestionsSwe.gf
new file mode 100644
index 000000000..9d59ecb2e
--- /dev/null
+++ b/examples/animals/QuestionsSwe.gf
@@ -0,0 +1,2 @@
+concrete QuestionsSwe of Questions = QuestionsI with
+ (Lang = LangSwe) ;
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 5d2b5421b..7f8722d00 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -23,6 +23,7 @@ import PGF.Printer
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
import PGF.Generate (genRandomProb) ----
import GF.Compile.Export
+import GF.Compile.ExampleBased
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
import GF.Infra.UseIO
import GF.Data.ErrM ----
@@ -231,6 +232,33 @@ allCommands cod env@(pgf, mos) = Map.fromList [
longname = "empty",
synopsis = "empty the environment"
}),
+ ("eb", emptyCommandInfo {
+ longname = "example_based",
+ syntax = "eb (-probs=FILE | -lang=LANG)* -file=FILE.gfe",
+ synopsis = "converts .gfe files to .gf files by parsing examples to trees",
+ explanation = unlines [
+ "Reads FILE.gfe and writes FILE.gf. Each expression of form",
+ "'%ex CAT QUOTEDSTRING' in FILE.gfe is replaced by a syntax tree.",
+ "This tree is the first one returned by the parser; a biased ranking",
+ "can be used to regulate the order. If there are more than one parses",
+ "the rest are shown in comments, with probabilities if the order is biased.",
+ "The probabilities flag and configuration file is similar to the commands",
+ "gr and rt. Notice that the command doesn't change the environment,",
+ "but the resulting .gf file must be imported separately."
+ ],
+ flags = [
+ ("file","the file to be converted (suffix .gfe must be given)"),
+ ("lang","the language in which to parse"),
+ ("probs","file with probabilities to rank the parses")
+ ],
+ exec = \opts _ -> do
+ let file = optFile opts
+ mprobs <- optProbs opts pgf
+ let conf = configureExBased pgf mprobs (optLang opts)
+ file' <- parseExamplesInGrammar conf file
+ return (fromString ("wrote " ++ file')),
+ needsTypeCheck = False
+ }),
("gr", emptyCommandInfo {
longname = "generate_random",
synopsis = "generate random trees in the current abstract syntax",
@@ -871,6 +899,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
-- putStrLn $ prProbabilities ps
return $ Just ps
+ optFile opts = valStrOpts "file" "_gftmp" opts
+
optType opts =
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
in case readType str of
@@ -988,3 +1018,4 @@ prMorphoAnalysis (w,lps) =
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]
+
diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs
new file mode 100644
index 000000000..10d7cdc88
--- /dev/null
+++ b/src/compiler/GF/Compile/ExampleBased.hs
@@ -0,0 +1,59 @@
+module GF.Compile.ExampleBased (parseExamplesInGrammar,configureExBased) where
+
+import PGF
+import PGF.Probabilistic
+
+parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath
+parseExamplesInGrammar conf file = do
+ src <- readFile file -- .gfe
+ let file' = take (length file - 3) file ++ "gf" -- .gf
+ convertFile conf src file'
+ return file'
+
+convertFile :: ExConfiguration -> String -> FilePath -> IO ()
+convertFile conf src file = do
+ writeFile file "" -- "-- created by example-based grammar writing in GF\n"
+ conv src
+ where
+ conv s = do
+ (cex,end) <- findExample s
+ if null end then return () else do
+ convEx cex
+ conv end
+ findExample s = case s of
+ '%':'e':'x':cs -> return $ getExample cs
+ c:cs -> appf [c] >> findExample cs
+ _ -> return (undefined,s)
+ getExample s =
+ let
+ (cat,exend) = break (=='"') s
+ (ex, end) = break (=='"') (tail exend)
+ in ((unwords (words cat),ex), tail end) -- quotes ignored
+ pgf = resource_pgf conf
+ lang = language conf
+ convEx (cat,ex) = do
+ appn "("
+ let typ = maybe (error "no valid cat") id $ readType cat
+ let ts = rank $ parse pgf lang typ ex
+ case ts of
+ [] -> appv ("WARNING: cannot parse example " ++ ex)
+ t:tt -> appn t >> mapM_ (appn . (" --- " ++)) tt
+ appn ")"
+ rank ts = case probs conf of
+ Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts]
+ _ -> map (showExpr []) ts
+ appf = appendFile file
+ appn s = appf s >> appf "\n"
+ appv s = appn s >> putStrLn s
+
+data ExConfiguration = ExConf {
+ resource_file :: FilePath,
+ resource_pgf :: PGF,
+ probs :: Maybe Probabilities,
+ verbose :: Bool,
+ language :: Language
+ }
+
+configureExBased :: PGF -> Maybe Probabilities -> Language -> ExConfiguration
+configureExBased pgf mprobs lang = ExConf [] pgf mprobs False lang
+