diff options
| author | aarne <aarne@cs.chalmers.se> | 2007-09-05 14:05:42 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2007-09-05 14:05:42 +0000 |
| commit | d2a5148708a3f270b058403e573106ed1d309957 (patch) | |
| tree | d1676e0578644e96ba644250c5190f5e294bac06 /src | |
| parent | e2b9657f79d913dddb064037e79a9dd1a63bdbf9 (diff) | |
modernized parser in EmbedAPI
Diffstat (limited to 'src')
| -rw-r--r-- | src/GF/Embed/EmbedAPI.hs | 23 | ||||
| -rw-r--r-- | src/GF/Embed/EmbedParsing.hs | 90 |
2 files changed, 26 insertions, 87 deletions
diff --git a/src/GF/Embed/EmbedAPI.hs b/src/GF/Embed/EmbedAPI.hs index a3a130a36..43e4f2546 100644 --- a/src/GF/Embed/EmbedAPI.hs +++ b/src/GF/Embed/EmbedAPI.hs @@ -15,7 +15,7 @@ module GF.Embed.EmbedAPI where -import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions) +import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat) import GF.UseGrammar.Linear (linTree2string) import GF.UseGrammar.GetTree (string2tree) import GF.Embed.EmbedParsing (parseString) @@ -53,8 +53,11 @@ file2grammar :: FilePath -> IO MultiGrammar linearize :: MultiGrammar -> Language -> Tree -> String parse :: MultiGrammar -> Language -> Category -> String -> [Tree] -linearizeAll :: MultiGrammar -> Tree -> [String] -parseAll :: MultiGrammar -> Category -> String -> [[Tree]] +linearizeAll :: MultiGrammar -> Tree -> [String] +linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)] + +parseAll :: MultiGrammar -> Category -> String -> [[Tree]] +parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])] readTree :: MultiGrammar -> String -> Tree showTree :: Tree -> String @@ -62,6 +65,8 @@ showTree :: Tree -> String languages :: MultiGrammar -> [Language] categories :: MultiGrammar -> [Category] +startCat :: MultiGrammar -> Category + --------------------------------------------------- -- Implementation --------------------------------------------------- @@ -84,15 +89,19 @@ linearize mgr lang = parse mgr lang cat = map tree2exp . errVal [] . - parseString noOptions sgr cfcat + parseString (stateOptions sgr) sgr cfcat where sgr = stateGrammarOfLang mgr (zIdent lang) cfcat = string2CFCat abs cat abs = maybe (error "no abstract syntax") prIdent $ abstract mgr -linearizeAll mgr t = [linearize mgr lang t | lang <- languages mgr] +linearizeAll mgr = map snd . linearizeAllLang mgr +linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr] -parseAll mgr cat s = [parse mgr lang cat s | lang <- languages mgr] +parseAll mgr cat = map snd . parseAllLang mgr cat + +parseAllLang mgr cat s = + [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s @@ -101,3 +110,5 @@ showTree t = prt_ t languages mgr = [prt_ l | l <- allLanguages mgr] categories mgr = [prt_ c | (_,c) <- allCategories mgr] + +startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar diff --git a/src/GF/Embed/EmbedParsing.hs b/src/GF/Embed/EmbedParsing.hs index 40378c491..43909f355 100644 --- a/src/GF/Embed/EmbedParsing.hs +++ b/src/GF/Embed/EmbedParsing.hs @@ -33,8 +33,7 @@ import GF.Infra.Option import GF.Compile.ShellState import GF.Embed.EmbedCustom import GF.CF.PPrCF (prCFTree) - -import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE +import qualified GF.Parsing.GFC as New -- import qualified GF.Parsing.GFC as New @@ -55,83 +54,12 @@ parseStringMsg os sg cat s = do return (ts,unlines ss) parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] -parseStringC opts0 sg cat s - - | otherwise = do - let opts = unionOptions opts0 $ stateOptions sg - cf = stateCF sg - gr = stateGrammarST sg - cn = cncId sg - tok = customOrDefault opts useTokenizer customTokenizer sg - parser = PCFOld.parse "ibn" (stateCF sg) cat -- customOrDefault opts useParser customParser sg cat - tokens2trms opts sg cn parser (tok s) - -tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] -tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info - where result = parser toks - info = snd result - trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2)) - -trees2trms :: Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree] -trees2trms opts sg cn as ts0 info = do - ts <- case () of - _ | null ts0 -> checkWarn "No success in cf parsing" >> return [] - _ | raw -> do - ts1 <- return (map cf2trm0 ts0) ----- should not need annot - checks [ - mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails - ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return [] - ] - _ -> do - let num = optIntOrN opts flagRawtrees 999999 - let (ts01,rest) = splitAt num ts0 - if null rest then return () - else checkWarn ("Warning: only" +++ show num +++ "raw parses out of" +++ - show (length ts0) +++ - "considered; use -rawtrees=<Int> to see more" - ) - (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01 - if null ts1 then raise ss else return () - ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ---- - if forgive then return ts2 else do - let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2] - ps = [t | (t,ss) <- tsss, - any (compatToks as) (map str2cftoks ss)] - if null ps - then raise $ "Failure in morphology." ++ - if verb - then "\nPossible corrections: " +++++ - unlines (nub (map sstr (concatMap snd tsss))) - else "" - else return ps - - if verb - then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info - else return () - - return $ optIntOrAll opts flagNumber $ nub ts - where - gr = stateGrammarST sg - - raw = oElem rawParse opts - verb = oElem beVerbose opts - forgive = oElem forgiveParse opts - - unknown ts = case filter noMatch [t | t@(TS _) <- ts] of - [] -> "where all words are known" - us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals - terminals = map TS $ stateGrammarWords sg - noMatch t = all (not . compatTok t) terminals - - ---- too much type checking in building term info? return FullTerm to save work? - --- | raw parsing: so simple it is for a context-free CF grammar -cf2trm0 :: CFTree -> C.Exp -cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees) - where - cffun2trm (CFFun (fun,_)) = fun - mkApp = foldl C.EApp - mkAppAtom a = mkApp (C.EAtom a) - +parseStringC opts0 sg cat s = do + let opts = unionOptions opts0 $ stateOptions sg + algorithm = "f" -- default algorithm: FCFG + strategy = "bottomup" + tokenizer = customOrDefault opts useTokenizer customTokenizer sg + toks = tokenizer s + ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks + checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts |
