diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
| commit | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch) | |
| tree | 0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/UseGrammar/Parsing.hs | |
| parent | fe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff) | |
removed src for 2.9
Diffstat (limited to 'src/GF/UseGrammar/Parsing.hs')
| -rw-r--r-- | src/GF/UseGrammar/Parsing.hs | 177 |
1 files changed, 0 insertions, 177 deletions
diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs deleted file mode 100644 index 2ca057410..000000000 --- a/src/GF/UseGrammar/Parsing.hs +++ /dev/null @@ -1,177 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Parsing --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/06/02 10:23:52 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.25 $ --- --- (Description of the module) ------------------------------------------------------------------------------ - -module GF.UseGrammar.Parsing where - -import GF.Infra.CheckM -import qualified GF.Canon.AbsGFC as C -import GF.Canon.GFC -import GF.Canon.MkGFC (trExp) ---- -import GF.Canon.CMacros -import GF.Grammar.MMacros (refreshMetas) -import GF.UseGrammar.Linear -import GF.Data.Str -import GF.CF.CF -import GF.CF.CFIdent -import GF.Infra.Ident -import GF.Grammar.TypeCheck -import GF.Grammar.Values ---import CFMethod -import GF.UseGrammar.Tokenize -import GF.UseGrammar.Morphology (isKnownWord) -import GF.CF.Profile -import GF.Infra.Option -import GF.UseGrammar.Custom -import GF.Compile.ShellState - -import GF.CF.PPrCF (prCFTree) --- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE -import qualified GF.Parsing.GFC as New - -import GF.Data.Operations - -import Data.List (nub,sortBy) -import Data.Char (toLower) -import Control.Monad (liftM) - --- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002 - -parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree] -parseString os sg cat = liftM fst . parseStringMsg os sg cat - -parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String) -parseStringMsg os sg cat s = do - case checkStart $ parseStringC os sg cat s of - Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss) - Bad s -> return ([],s) - -parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] -parseStringC opts0 sg cat s - | oElem (iOpt "old") opts0 || - (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do - let opts = unionOptions opts0 $ stateOptions sg - cf = stateCF sg - gr = stateGrammarST sg - cn = cncId sg - toks = customOrDefault opts useTokenizer customTokenizer sg s - parser = customOrDefault opts useParser customParser sg cat - if oElem (iOpt "cut") opts - then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks - else mapM (tokens2trms opts sg cn parser) toks >>= return . concat - ----- | or [oElem p opts0 | ----- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do - - | otherwise = do - let opts = unionOptions opts0 $ stateOptions sg - algorithm | oElem newCParser opts0 = "c" - | oElem newMParser opts0 = "m" - | oElem newFParser opts0 = "f" - | otherwise = "f" -- default algorithm: FCFG - strategy = maybe "bottomup" id $ getOptVal opts useParser - -- -parser=bottomup/topdown - tokenizer = customOrDefault opts useTokenizer customTokenizer sg - toks = case tokenizer s of - t:_ -> t - _ -> [] ---- no support for undet. tok. - unknowns = - [w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w] - where - unk w = not $ isKnownWord (morpho sg) w - uncap (c:cs) = toLower c : cs - uncap s = s - - case unknowns of - _:_ | oElem (iOpt "trynextlang") opts -> return [] - _:_ -> fail $ "Unknown words:" +++ unwords unknowns - _ -> do - - ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks - ts' <- checkErr $ - allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts - return $ optIntOrAll opts flagNumber ts' - - -tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] -tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info - where result = parser toks - info = snd result - trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2)) - -trees2trms :: - Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree] -trees2trms opts sg cn as ts0 info = do - let s = unwords $ map prCFTok as - ts <- case () of - _ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return [] - _ | raw -> do - ts1 <- return (map cf2trm0 ts0) ----- should not need annot - checks [ - mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails - ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return [] - ] - _ -> do - let num = optIntOrN opts flagRawtrees 999999 - let (ts01,rest) = splitAt num ts0 - if null rest then return () - else raise ("Warning: only" +++ show num +++ "raw parses out of" +++ - show (length ts0) +++ - "considered; use -rawtrees=<Int> to see more" - ) - (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01 - if null ts1 then raise ss else return () - ts2 <- checkErr $ - allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ---- - if forgive then return ts2 else do - let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2] - ps = [t | (t,ss) <- tsss, - any (compatToks as) (map str2cftoks ss)] - if null ps - then raise $ "Failure in morphology." ++ - if verb - then "\nPossible corrections: " +++++ - unlines (nub (map sstr (concatMap snd tsss))) - else "" - else return ps - if verb - then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info - else return () - - return $ optIntOrAll opts flagNumber $ nub ts - where - gr = stateGrammarST sg - - raw = oElem rawParse opts - verb = oElem beVerbose opts - forgive = oElem forgiveParse opts - ----- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a] - -unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of - [] -> "where all words are known" - us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals - where - terminals = map TS $ stateGrammarWords sg - noMatch t = all (not . compatTok t) terminals - - ---- too much type checking in building term info? return FullTerm to save work? - --- | raw parsing: so simple it is for a context-free CF grammar -cf2trm0 :: CFTree -> C.Exp -cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees) - where - cffun2trm (CFFun (fun,_)) = fun - mkApp = foldl C.EApp - mkAppAtom a = mkApp (C.EAtom a) |
