summaryrefslogtreecommitdiff
path: root/src/GF/Embed/EmbedParsing.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-09-05 14:05:42 +0000
committeraarne <aarne@cs.chalmers.se>2007-09-05 14:05:42 +0000
commitd2a5148708a3f270b058403e573106ed1d309957 (patch)
treed1676e0578644e96ba644250c5190f5e294bac06 /src/GF/Embed/EmbedParsing.hs
parente2b9657f79d913dddb064037e79a9dd1a63bdbf9 (diff)
modernized parser in EmbedAPI
Diffstat (limited to 'src/GF/Embed/EmbedParsing.hs')
-rw-r--r--src/GF/Embed/EmbedParsing.hs90
1 files changed, 9 insertions, 81 deletions
diff --git a/src/GF/Embed/EmbedParsing.hs b/src/GF/Embed/EmbedParsing.hs
index 40378c491..43909f355 100644
--- a/src/GF/Embed/EmbedParsing.hs
+++ b/src/GF/Embed/EmbedParsing.hs
@@ -33,8 +33,7 @@ import GF.Infra.Option
import GF.Compile.ShellState
import GF.Embed.EmbedCustom
import GF.CF.PPrCF (prCFTree)
-
-import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
+import qualified GF.Parsing.GFC as New
-- import qualified GF.Parsing.GFC as New
@@ -55,83 +54,12 @@ parseStringMsg os sg cat s = do
return (ts,unlines ss)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
-parseStringC opts0 sg cat s
-
- | otherwise = do
- let opts = unionOptions opts0 $ stateOptions sg
- cf = stateCF sg
- gr = stateGrammarST sg
- cn = cncId sg
- tok = customOrDefault opts useTokenizer customTokenizer sg
- parser = PCFOld.parse "ibn" (stateCF sg) cat -- customOrDefault opts useParser customParser sg cat
- tokens2trms opts sg cn parser (tok s)
-
-tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
-tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
- where result = parser toks
- info = snd result
- trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
-
-trees2trms :: Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
-trees2trms opts sg cn as ts0 info = do
- ts <- case () of
- _ | null ts0 -> checkWarn "No success in cf parsing" >> return []
- _ | raw -> do
- ts1 <- return (map cf2trm0 ts0) ----- should not need annot
- checks [
- mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
- ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
- ]
- _ -> do
- let num = optIntOrN opts flagRawtrees 999999
- let (ts01,rest) = splitAt num ts0
- if null rest then return ()
- else checkWarn ("Warning: only" +++ show num +++ "raw parses out of" +++
- show (length ts0) +++
- "considered; use -rawtrees=<Int> to see more"
- )
- (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
- if null ts1 then raise ss else return ()
- ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ----
- if forgive then return ts2 else do
- let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
- ps = [t | (t,ss) <- tsss,
- any (compatToks as) (map str2cftoks ss)]
- if null ps
- then raise $ "Failure in morphology." ++
- if verb
- then "\nPossible corrections: " +++++
- unlines (nub (map sstr (concatMap snd tsss)))
- else ""
- else return ps
-
- if verb
- then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
- else return ()
-
- return $ optIntOrAll opts flagNumber $ nub ts
- where
- gr = stateGrammarST sg
-
- raw = oElem rawParse opts
- verb = oElem beVerbose opts
- forgive = oElem forgiveParse opts
-
- unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
- [] -> "where all words are known"
- us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
- terminals = map TS $ stateGrammarWords sg
- noMatch t = all (not . compatTok t) terminals
-
-
---- too much type checking in building term info? return FullTerm to save work?
-
--- | raw parsing: so simple it is for a context-free CF grammar
-cf2trm0 :: CFTree -> C.Exp
-cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
- where
- cffun2trm (CFFun (fun,_)) = fun
- mkApp = foldl C.EApp
- mkAppAtom a = mkApp (C.EAtom a)
-
+parseStringC opts0 sg cat s = do
+ let opts = unionOptions opts0 $ stateOptions sg
+ algorithm = "f" -- default algorithm: FCFG
+ strategy = "bottomup"
+ tokenizer = customOrDefault opts useTokenizer customTokenizer sg
+ toks = tokenizer s
+ ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+ checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts