summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2010-01-28 10:06:16 +0000
committeraarne <aarne@chalmers.se>2010-01-28 10:06:16 +0000
commite056cc2bfd2e5e152edd9829b8d4d77637536a14 (patch)
treef48c6256e866402ae921d910fbfb5a7483c035cc
parent750a2639b0f46153c36f6c713f522db5bb2d8587 (diff)
in example-based grammar conversion, warnings about unknown words
-rw-r--r--examples/animals/QuestionsI.gf6
-rw-r--r--src/compiler/GF/Command/Commands.hs14
-rw-r--r--src/compiler/GF/Compile/ExampleBased.hs18
-rw-r--r--src/runtime/haskell/PGF/Morphology.hs14
4 files changed, 33 insertions, 19 deletions
diff --git a/examples/animals/QuestionsI.gf b/examples/animals/QuestionsI.gf
index 30476ccdb..c35eeb796 100644
--- a/examples/animals/QuestionsI.gf
+++ b/examples/animals/QuestionsI.gf
@@ -9,18 +9,18 @@ incomplete concrete QuestionsI of Questions = open Lang in {
lin
Who love_V2 man_N = (
+--- WARNING: ambiguous example who loves men
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
)
;
Whom man_N love_V2 = (
+--- WARNING: ambiguous example whom does the man love
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
--- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
)
;
Answer woman_N love_V2 man_N = (
-UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (UseN love_N))) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))
- --- UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (ApposCN (UseN love_N) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))))
- --- UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN woman_N)) (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
+--- WARNING: cannot parse example the woman lovess men; unknown words: lovess
)
;
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 7f8722d00..b56c19d72 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -254,7 +254,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts _ -> do
let file = optFile opts
mprobs <- optProbs opts pgf
- let conf = configureExBased pgf mprobs (optLang opts)
+ let conf = configureExBased pgf (optMorpho opts) mprobs (optLang opts)
file' <- parseExamplesInGrammar conf file
return (fromString ("wrote " ++ file')),
needsTypeCheck = False
@@ -397,7 +397,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts -> case opts of
_ | isOpt "missing" opts ->
return . fromString . unwords .
- morphoMissing (theMorpho opts) .
+ morphoMissing (optMorpho opts) .
concatMap words . toStrings
_ -> return . fromString . unlines .
map prMorphoAnalysis . concatMap (morphos opts) .
@@ -925,9 +925,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [
[] -> ([], "no trees found")
_ -> fromExprs es
returnFromExprsPar opts ts es = return $ case es of
- [] -> ([], "no trees found; unknown words:" +++
- unwords (morphoMissing (theMorpho opts)
- (concatMap words (toStrings ts))))
+ [] -> ([], "no trees found" ++
+ missingWordMsg (optMorpho opts) (concatMap words (toStrings ts))
+ )
_ -> fromExprs es
prGrammar opts
@@ -944,7 +944,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
morpho z f la = maybe z f $ Map.lookup la mos
- theMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
+ optMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
-- ps -f -g s returns g (f s)
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
@@ -1016,6 +1016,4 @@ prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String
prMorphoAnalysis (w,lps) =
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
-morphoMissing :: Morpho -> [String] -> [String]
-morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]
diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs
index 10d7cdc88..93116c4eb 100644
--- a/src/compiler/GF/Compile/ExampleBased.hs
+++ b/src/compiler/GF/Compile/ExampleBased.hs
@@ -2,6 +2,7 @@ module GF.Compile.ExampleBased (parseExamplesInGrammar,configureExBased) where
import PGF
import PGF.Probabilistic
+import PGF.Morphology
parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath
parseExamplesInGrammar conf file = do
@@ -30,30 +31,33 @@ convertFile conf src file = do
(ex, end) = break (=='"') (tail exend)
in ((unwords (words cat),ex), tail end) -- quotes ignored
pgf = resource_pgf conf
+ morpho = resource_morpho conf
lang = language conf
convEx (cat,ex) = do
appn "("
let typ = maybe (error "no valid cat") id $ readType cat
let ts = rank $ parse pgf lang typ ex
case ts of
- [] -> appv ("WARNING: cannot parse example " ++ ex)
- t:tt -> appn t >> mapM_ (appn . (" --- " ++)) tt
+ [] -> appv ("WARNING: cannot parse example " ++ ex ++
+ missingWordMsg morpho (words ex))
+ t:tt -> appv ("WARNING: ambiguous example " ++ ex) >>
+ appn t >> mapM_ (appn . (" --- " ++)) tt
appn ")"
rank ts = case probs conf of
Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts]
_ -> map (showExpr []) ts
appf = appendFile file
appn s = appf s >> appf "\n"
- appv s = appn s >> putStrLn s
+ appv s = appn ("--- " ++ s) >> putStrLn s
data ExConfiguration = ExConf {
- resource_file :: FilePath,
- resource_pgf :: PGF,
+ resource_pgf :: PGF,
+ resource_morpho :: Morpho,
probs :: Maybe Probabilities,
verbose :: Bool,
language :: Language
}
-configureExBased :: PGF -> Maybe Probabilities -> Language -> ExConfiguration
-configureExBased pgf mprobs lang = ExConf [] pgf mprobs False lang
+configureExBased :: PGF -> Morpho -> Maybe Probabilities -> Language -> ExConfiguration
+configureExBased pgf morpho mprobs lang = ExConf pgf morpho mprobs False lang
diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs
index f5c65b3ba..711f9c01d 100644
--- a/src/runtime/haskell/PGF/Morphology.hs
+++ b/src/runtime/haskell/PGF/Morphology.hs
@@ -1,6 +1,7 @@
module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,
- lookupMorpho,fullFormLexicon) where
+ lookupMorpho,fullFormLexicon,
+ morphoMissing,missingWordMsg) where
import PGF.CId
import PGF.Data
@@ -10,6 +11,7 @@ import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.List (intersperse)
+import Data.Char (isDigit) ----
-- these 4 definitions depend on the datastructure used
@@ -42,3 +44,13 @@ lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon (Morpho mo) = Map.toList mo
+
+morphoMissing :: Morpho -> [String] -> [String]
+morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w), notLiteral w] where
+ notLiteral w = not (all isDigit w) ---- should be defined somewhere
+
+missingWordMsg :: Morpho -> [String] -> String
+missingWordMsg morpho ws = case morphoMissing morpho ws of
+ [] -> ", but all words are known"
+ ws -> "; unknown words: " ++ unwords ws
+