summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GF/API.hs19
-rw-r--r--src/GF/Shell.hs2
-rw-r--r--src/GF/UseGrammar/Parsing.hs21
3 files changed, 33 insertions, 9 deletions
diff --git a/src/GF/API.hs b/src/GF/API.hs
index 762fa372f..3efd81472 100644
--- a/src/GF/API.hs
+++ b/src/GF/API.hs
@@ -75,6 +75,7 @@ import GF.Infra.UseIO
import GF.Data.Zipper
import Data.List (nub)
+import Data.Char (toLower)
import Data.Maybe (fromMaybe)
import Control.Monad (liftM)
import System (system)
@@ -314,9 +315,16 @@ morphoAnalyse opts gr
mo = morpho gr
isKnownWord :: GFGrammar -> String -> Bool
-isKnownWord gr s = case morphoAnalyse (options [beShort]) gr s of
- a@(_:_:_) -> last (init a) /= '*' -- [word *]
- _ -> False
+isKnownWord gr s = GF.UseGrammar.Morphology.isKnownWord (morpho gr) s
+
+unknownTokens :: GFGrammar -> [CFTok] -> [String]
+unknownTokens gr ts =
+ [w | TC w <- ts, unk w && unk (uncap w)] ++ [w | TS w <- ts, unk w]
+ where
+ unk w = not $ GF.API.isKnownWord gr w
+ uncap (c:cs) = toLower c : cs
+ uncap s = s
+
{-
prExpXML :: StateGrammar -> Term -> [String]
@@ -397,8 +405,11 @@ optTransfer opts g = case getOptVal opts transferFun of
_ -> id
-}
+optTokenizerResult :: Options -> GFGrammar -> String -> [[CFTok]]
+optTokenizerResult opts gr = customOrDefault opts useTokenizer customTokenizer gr
+
optTokenizer :: Options -> GFGrammar -> String -> String
-optTokenizer opts gr = show . customOrDefault opts useTokenizer customTokenizer gr
+optTokenizer opts gr = show . optTokenizerResult opts gr
-- performs UTF8 if the language does not have flag coding=utf8; replaces name*U
diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs
index 29a4b6c23..dd8267a91 100644
--- a/src/GF/Shell.hs
+++ b/src/GF/Shell.hs
@@ -254,7 +254,7 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
| otherwise -> parse $ prCommandArg a
where
parse x = do
- warnDiscont opts
+ warnDiscont opts
let p = optParseArgErrMsg opts gro x
case p of
Ok (ts,msg)
diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs
index 65ed26863..599268b1d 100644
--- a/src/GF/UseGrammar/Parsing.hs
+++ b/src/GF/UseGrammar/Parsing.hs
@@ -29,6 +29,7 @@ import GF.Grammar.TypeCheck
import GF.Grammar.Values
--import CFMethod
import GF.UseGrammar.Tokenize
+import GF.UseGrammar.Morphology (isKnownWord)
import GF.CF.Profile
import GF.Infra.Option
import GF.UseGrammar.Custom
@@ -41,6 +42,7 @@ import qualified GF.Parsing.GFC as New
import GF.Data.Operations
import Data.List (nub,sortBy)
+import Data.Char (toLower)
import Control.Monad (liftM)
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
@@ -82,10 +84,21 @@ parseStringC opts0 sg cat s
toks = case tokenizer s of
t:_ -> t
_ -> [] ---- no support for undet. tok.
- ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
- ts' <- checkErr $
- allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
- return $ optIntOrAll opts flagNumber ts'
+ unknowns =
+ [w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w]
+ where
+ unk w = not $ isKnownWord (morpho sg) w
+ uncap (c:cs) = toLower c : cs
+ uncap s = s
+
+ case unknowns of
+ _:_ -> fail $ "Unknown words:" +++ unwords unknowns
+ _ -> do
+
+ ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+ ts' <- checkErr $
+ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
+ return $ optIntOrAll opts flagNumber ts'
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]