diff options
Diffstat (limited to 'src/GF/UseGrammar/Parsing.hs')
| -rw-r--r-- | src/GF/UseGrammar/Parsing.hs | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs new file mode 100644 index 000000000..4cd4f4bc8 --- /dev/null +++ b/src/GF/UseGrammar/Parsing.hs @@ -0,0 +1,98 @@ +module Parsing where + +import CheckM +import qualified AbsGFC as C +import GFC +import MkGFC (trExp) ---- +import CMacros +import Linear +import Str +import CF +import CFIdent +import Ident +import TypeCheck +import Values +--import CFMethod +import Tokenize +import Profile +import Option +import Custom +import ShellState + +import Operations + +import List (nub) +import Monad (liftM) + +-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002 + +parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree] +parseString os sg cat = liftM fst . parseStringMsg os sg cat + +parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String) +parseStringMsg os sg cat s = do + (ts,(_,ss)) <- checkStart $ parseStringC os sg cat s + return (ts,unlines ss) + +parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] +parseStringC opts0 sg cat s = do + let opts = unionOptions opts0 $ stateOptions sg + cf = stateCF sg + gr = stateGrammarST sg + cn = cncId sg + tok = customOrDefault opts useTokenizer customTokenizer sg + parser = customOrDefault opts useParser customParser sg cat + tokens2trms opts sg cn parser (tok s) + +tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] +tokens2trms opts sg cn parser as = do + let res@(trees,info) = parser as + ts0 <- return $ nub (cfParseResults res) + ts <- case () of + _ | null ts0 -> checkWarn "No success in cf parsing" >> return [] + _ | raw -> do + ts1 <- return (map cf2trm0 ts0) ----- should not need annot + mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated + _ -> do + (ts1,_) <- checkErr $ mapErr postParse ts0 + ts2 <- mapM (checkErr . (annotate gr) . trExp) ts1 ---- + if forgive then return ts2 else do + let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2] + ps = [t | (t,ss) <- tsss, + any (compatToks as) (map str2cftoks ss)] + if null ps + then raise $ "Failure in morphology." ++ + if verb + then "\nPossible corrections: " +++++ + unlines (nub (map sstr (concatMap snd tsss))) + else "" + else return ps + + if verb + then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info + else return () + + return $ optIntOrAll opts flagNumber $ nub ts + where + gr = stateGrammarST sg + + raw = oElem rawParse opts + verb = oElem beVerbose opts + forgive = oElem forgiveParse opts + + unknown ts = case filter noMatch ts of + [] -> "where all words are known" + us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals + terminals = map TS $ cfTokens $ stateCF sg + noMatch t = all (not . compatTok t) terminals + + +--- too much type checking in building term info? return FullTerm to save work? + +-- raw parsing: so simple it is for a context-free CF grammar +cf2trm0 :: CFTree -> C.Exp +cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees) + where + cffun2trm (CFFun (fun,_)) = fun + mkApp = foldl C.EApp + mkAppAtom a = mkApp (C.EAtom a) |
