summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Parsing.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
commitb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch)
tree0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/UseGrammar/Parsing.hs
parentfe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff)
removed src for 2.9
Diffstat (limited to 'src/GF/UseGrammar/Parsing.hs')
-rw-r--r--src/GF/UseGrammar/Parsing.hs177
1 files changed, 0 insertions, 177 deletions
diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs
deleted file mode 100644
index 2ca057410..000000000
--- a/src/GF/UseGrammar/Parsing.hs
+++ /dev/null
@@ -1,177 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : Parsing
--- Maintainer : AR
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/06/02 10:23:52 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.25 $
---
--- (Description of the module)
------------------------------------------------------------------------------
-
-module GF.UseGrammar.Parsing where
-
-import GF.Infra.CheckM
-import qualified GF.Canon.AbsGFC as C
-import GF.Canon.GFC
-import GF.Canon.MkGFC (trExp) ----
-import GF.Canon.CMacros
-import GF.Grammar.MMacros (refreshMetas)
-import GF.UseGrammar.Linear
-import GF.Data.Str
-import GF.CF.CF
-import GF.CF.CFIdent
-import GF.Infra.Ident
-import GF.Grammar.TypeCheck
-import GF.Grammar.Values
---import CFMethod
-import GF.UseGrammar.Tokenize
-import GF.UseGrammar.Morphology (isKnownWord)
-import GF.CF.Profile
-import GF.Infra.Option
-import GF.UseGrammar.Custom
-import GF.Compile.ShellState
-
-import GF.CF.PPrCF (prCFTree)
--- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE
-import qualified GF.Parsing.GFC as New
-
-import GF.Data.Operations
-
-import Data.List (nub,sortBy)
-import Data.Char (toLower)
-import Control.Monad (liftM)
-
--- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
-
-parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
-parseString os sg cat = liftM fst . parseStringMsg os sg cat
-
-parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
-parseStringMsg os sg cat s = do
- case checkStart $ parseStringC os sg cat s of
- Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss)
- Bad s -> return ([],s)
-
-parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
-parseStringC opts0 sg cat s
- | oElem (iOpt "old") opts0 ||
- (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
- let opts = unionOptions opts0 $ stateOptions sg
- cf = stateCF sg
- gr = stateGrammarST sg
- cn = cncId sg
- toks = customOrDefault opts useTokenizer customTokenizer sg s
- parser = customOrDefault opts useParser customParser sg cat
- if oElem (iOpt "cut") opts
- then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
- else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
-
----- | or [oElem p opts0 |
----- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do
-
- | otherwise = do
- let opts = unionOptions opts0 $ stateOptions sg
- algorithm | oElem newCParser opts0 = "c"
- | oElem newMParser opts0 = "m"
- | oElem newFParser opts0 = "f"
- | otherwise = "f" -- default algorithm: FCFG
- strategy = maybe "bottomup" id $ getOptVal opts useParser
- -- -parser=bottomup/topdown
- tokenizer = customOrDefault opts useTokenizer customTokenizer sg
- toks = case tokenizer s of
- t:_ -> t
- _ -> [] ---- no support for undet. tok.
- unknowns =
- [w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w]
- where
- unk w = not $ isKnownWord (morpho sg) w
- uncap (c:cs) = toLower c : cs
- uncap s = s
-
- case unknowns of
- _:_ | oElem (iOpt "trynextlang") opts -> return []
- _:_ -> fail $ "Unknown words:" +++ unwords unknowns
- _ -> do
-
- ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
- ts' <- checkErr $
- allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
- return $ optIntOrAll opts flagNumber ts'
-
-
-tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
-tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
- where result = parser toks
- info = snd result
- trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
-
-trees2trms ::
- Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
-trees2trms opts sg cn as ts0 info = do
- let s = unwords $ map prCFTok as
- ts <- case () of
- _ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return []
- _ | raw -> do
- ts1 <- return (map cf2trm0 ts0) ----- should not need annot
- checks [
- mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
- ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
- ]
- _ -> do
- let num = optIntOrN opts flagRawtrees 999999
- let (ts01,rest) = splitAt num ts0
- if null rest then return ()
- else raise ("Warning: only" +++ show num +++ "raw parses out of" +++
- show (length ts0) +++
- "considered; use -rawtrees=<Int> to see more"
- )
- (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
- if null ts1 then raise ss else return ()
- ts2 <- checkErr $
- allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ----
- if forgive then return ts2 else do
- let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
- ps = [t | (t,ss) <- tsss,
- any (compatToks as) (map str2cftoks ss)]
- if null ps
- then raise $ "Failure in morphology." ++
- if verb
- then "\nPossible corrections: " +++++
- unlines (nub (map sstr (concatMap snd tsss)))
- else ""
- else return ps
- if verb
- then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info
- else return ()
-
- return $ optIntOrAll opts flagNumber $ nub ts
- where
- gr = stateGrammarST sg
-
- raw = oElem rawParse opts
- verb = oElem beVerbose opts
- forgive = oElem forgiveParse opts
-
----- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a]
-
-unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of
- [] -> "where all words are known"
- us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
- where
- terminals = map TS $ stateGrammarWords sg
- noMatch t = all (not . compatTok t) terminals
-
-
---- too much type checking in building term info? return FullTerm to save work?
-
--- | raw parsing: so simple it is for a context-free CF grammar
-cf2trm0 :: CFTree -> C.Exp
-cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
- where
- cffun2trm (CFFun (fun,_)) = fun
- mkApp = foldl C.EApp
- mkAppAtom a = mkApp (C.EAtom a)