summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2009-12-31 11:02:26 +0000
committeraarne <aarne@chalmers.se>2009-12-31 11:02:26 +0000
commit34b839c0f9e7f5a3a850d27c43b3160573ff2c00 (patch)
tree118e681c4523dad89e8d3bd85604e2c9ede3d893 /src
parentb92c34bafdef047401bf22468b6725f3de312801 (diff)
morpho analysis with -missing flag, shows words outside lexicon; also invoked if parsing fails; also added pg -words to show the list of words
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Commands.hs51
-rw-r--r--src/runtime/haskell/PGF/Linearize.hs2
-rw-r--r--src/runtime/haskell/PGF/ShowLinearize.hs5
3 files changed, 44 insertions, 14 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index d8e2a3023..3647d2e14 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -359,12 +359,22 @@ allCommands cod env@(pgf, mos) = Map.fromList [
synopsis = "print the morphological analyses of all words in the string",
explanation = unlines [
"Prints all the analyses of space-separated words in the input string,",
- "using the morphological analyser of the actual grammar (see command pf)"
+ "using the morphological analyser of the actual grammar (see command pg)"
+ ],
+ exec = \opts -> case opts of
+ _ | isOpt "missing" opts ->
+ return . fromString . unwords .
+ morphoMissing (theMorpho opts) .
+ concatMap words . toStrings
+ _ -> return . fromString . unlines .
+ map prMorphoAnalysis . concatMap (morphos opts) .
+ concatMap words . toStrings ,
+ flags = [
+ ("lang","the languages of analysis (comma-separated, no spaces)")
],
- exec = \opts ->
- return . fromString . unlines .
- map prMorphoAnalysis . concatMap (morphos opts) .
- concatMap words . toStrings
+ options = [
+ ("missing","show the list of unknown words in the input")
+ ]
}),
("mq", emptyCommandInfo {
@@ -395,7 +405,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"the parser. For example if -openclass=\"A,N,V\" is given, the parser",
"will accept unknown adjectives, nouns and verbs with the resource grammar."
],
- exec = \opts -> returnFromExprs . concatMap (par opts) . toStrings,
+ exec = \opts ts ->
+ returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts,
flags = [
("cat","target category of parsing"),
("lang","the languages of parsing (comma-separated, no spaces)"),
@@ -424,7 +435,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
options = [
("cats", "show just the names of abstract syntax categories"),
("fullform", "print the fullform lexicon"),
- ("missing","show just the names of functions that have no linearization")
+ ("missing","show just the names of functions that have no linearization"),
+ ("words", "print the list of words")
]
}),
("ph", emptyCommandInfo {
@@ -770,6 +782,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
]
where
enc = encodeUnicode cod
+
par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang]
@@ -847,20 +860,28 @@ allCommands cod env@(pgf, mos) = Map.fromList [
returnFromExprs es = return $ case es of
[] -> ([], "no trees found")
_ -> fromExprs es
+ returnFromExprsPar opts ts es = return $ case es of
+ [] -> ([], "no trees found; unknown words:" +++
+ unwords (morphoMissing (theMorpho opts)
+ (concatMap words (toStrings ts))))
+ _ -> fromExprs es
prGrammar opts
| isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf
| isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts
| isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) |
la <- optLangs opts, let cs = missingLins pgf la]
+ | isOpt "words" opts = return $ fromString $ concatMap (morpho "" prAllWords) $ optLangs opts
| otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts)
return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf
morphos opts s =
- [morpho [] (\mo -> lookupMorpho mo s) la | la <- optLangs opts]
+ [(s,morpho [] (\mo -> lookupMorpho mo s) la) | la <- optLangs opts]
morpho z f la = maybe z f $ Map.lookup la mos
+ theMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
+
-- ps -f -g s returns g (f s)
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
app f = maybe id id (stringOp f)
@@ -924,8 +945,16 @@ lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag)
prFullFormLexicon :: Morpho -> String
prFullFormLexicon mo =
- unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- fullFormLexicon mo]
+ unlines (map prMorphoAnalysis (fullFormLexicon mo))
+
+prAllWords :: Morpho -> String
+prAllWords mo =
+ unwords [w | (w,_) <- fullFormLexicon mo]
+
+prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String
+prMorphoAnalysis (w,lps) =
+ unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
-prMorphoAnalysis :: [(Lemma,Analysis)] -> String
-prMorphoAnalysis lps = unlines [showCId l ++ " " ++ p | (l,p) <- lps]
+morphoMissing :: Morpho -> [String] -> [String]
+morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs
index fdd4cecb5..80d1f1acf 100644
--- a/src/runtime/haskell/PGF/Linearize.hs
+++ b/src/runtime/haskell/PGF/Linearize.hs
@@ -75,7 +75,7 @@ linTree pgf lang e = lin (expr2tree e) Nothing
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (showCId x)
lin (Meta i) mty = case mty of
- Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc))
+ Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (show i)
variants :: [Term] -> Term
diff --git a/src/runtime/haskell/PGF/ShowLinearize.hs b/src/runtime/haskell/PGF/ShowLinearize.hs
index dd3b997a6..fa4de86c8 100644
--- a/src/runtime/haskell/PGF/ShowLinearize.hs
+++ b/src/runtime/haskell/PGF/ShowLinearize.hs
@@ -100,7 +100,7 @@ markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
collectWords pgf lang =
concatMap collOne
- [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf]
+ [(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf]
where
collOne (f,c,i) =
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
@@ -108,6 +108,7 @@ collectWords pgf lang =
RR rs -> concat [fromRec f v t | (_,t) <- rs]
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
RFV rs -> concatMap (fromRec f v) rs
- RS s -> [(s,[(f,unwords (reverse v))])]
+ RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] ---
+-- RS s -> [(s,[(f,unwords (reverse v))])]
RCon c -> [] ---- inherent