From e056cc2bfd2e5e152edd9829b8d4d77637536a14 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 28 Jan 2010 10:06:16 +0000 Subject: in example-based grammar conversion, warnings about unknown words --- src/runtime/haskell/PGF/Morphology.hs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/runtime/haskell') diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index f5c65b3ba..711f9c01d 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -1,6 +1,7 @@ module PGF.Morphology(Lemma,Analysis,Morpho, buildMorpho, - lookupMorpho,fullFormLexicon) where + lookupMorpho,fullFormLexicon, + morphoMissing,missingWordMsg) where import PGF.CId import PGF.Data @@ -10,6 +11,7 @@ import qualified Data.Set as Set import qualified Data.IntMap as IntMap import Data.Array.IArray import Data.List (intersperse) +import Data.Char (isDigit) ---- -- these 4 definitions depend on the datastructure used @@ -42,3 +44,13 @@ lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] fullFormLexicon (Morpho mo) = Map.toList mo + +morphoMissing :: Morpho -> [String] -> [String] +morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w), notLiteral w] where + notLiteral w = not (all isDigit w) ---- should be defined somewhere + +missingWordMsg :: Morpho -> [String] -> String +missingWordMsg morpho ws = case morphoMissing morpho ws of + [] -> ", but all words are known" + ws -> "; unknown words: " ++ unwords ws + -- cgit v1.2.3