diff options
| author | aarne <aarne@cs.chalmers.se> | 2006-03-04 22:14:33 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2006-03-04 22:14:33 +0000 |
| commit | 2657c51e040964d9704f5c69945685ebd546eb6a (patch) | |
| tree | bab26adf8f85c253c50b90ac9cde36d58efa42bd /src/GF/Compile | |
| parent | 84e01c303dc161c3a811a045cc0c45f3f13e33d6 (diff) | |
example based also with treebank, with real term parser
Diffstat (limited to 'src/GF/Compile')
| -rw-r--r-- | src/GF/Compile/MkConcrete.hs | 147 |
1 files changed, 68 insertions, 79 deletions
diff --git a/src/GF/Compile/MkConcrete.hs b/src/GF/Compile/MkConcrete.hs index a0af24007..aafa56242 100644 --- a/src/GF/Compile/MkConcrete.hs +++ b/src/GF/Compile/MkConcrete.hs @@ -12,15 +12,16 @@ -- Compile a gfe file into a concrete syntax by using the parser on a resource grammar. ----------------------------------------------------------------------------- -module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where +module GF.Compile.MkConcrete (mkConcretes) where import GF.Grammar.Values (Tree,tree2exp) -import GF.Grammar.PrGrammar (prt_) -import GF.Grammar.Grammar (Term(Q,QC)) --- -import GF.Grammar.Macros (composSafeOp, record2subst) +import GF.Grammar.PrGrammar (prt_,prModule) +import GF.Grammar.Grammar --- (Term(..),SourceModule) +import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent) import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords) -import GF.Compile.PGrammar (pTerm) +import GF.Compile.PGrammar (pTerm,pTrm) import GF.Compile.Compile +import GF.Compile.GetGrammar import GF.API import GF.API.IOGrammar import qualified GF.Embed.EmbedAPI as EA @@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option +import GF.Infra.Modules import GF.Infra.ReadFiles import GF.System.Arch +import GF.UseGrammar.Treebank import System.Directory import Data.Char @@ -50,38 +53,40 @@ import Data.List -- notice: we use a hand-crafted lexer and parser in order to preserve -- the layout and comments in the rest of the file. -mkConcretes :: [FilePath] -> IO () -mkConcretes files = do +mkConcretes :: Options -> [FilePath] -> IO () +mkConcretes opts files = do ress <- mapM getResPath files let grps = groupBy (\a b -> fst a == fst b) $ sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files - mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps] + mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps] -mkCncGroups ((res,path),files) = do +mkCncGroups opts0 ((res,path),files) = do putStrLnFlush $ "Going to preprocess examples in " ++ unwords files putStrLn $ "Compiling resource " ++ res - let opts = options [beSilent,pathList path] + let opts = addOptions (options [beSilent,pathList path]) opts0 + let treebank = oElem (iOpt "treebank") opts egr <- appIOE $ shellStateFromFiles opts emptyShellState res - gr <- err (\s -> putStrLn s >> error "resource grammar rejected") - (return . firstStateGrammar) egr - let parser cat = - errVal ([],"No parse") . - optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr - let morpho = isKnownWord gr + (parser,morpho) <- if treebank then do + tb <- err (\_ -> error "no treebank") + return + (egr >>= flip findTreebank (zIdent (unsuffixFile res))) + return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb, + isWordInTreebank tb) + else do + gr <- err (\s -> putStrLn s >> error "resource grammar rejected") + (return . firstStateGrammar) egr + return + (\cat s -> + errVal ([],"No parse") $ + optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>= + (\ (ts,e) -> return (map tree2exp ts, e)) , + isKnownWord gr) putStrLn "Building parser" mapM_ (mkConcrete parser morpho) files -type Parser = String -> String -> ([Tree],String) +type Parser = String -> String -> ([Term],String) type Morpho = String -> Bool -mkConcrete :: Parser -> Morpho -> FilePath -> IO () -mkConcrete parser morpho file = do - cont <- liftM getExLines $ readFileIf file - let out = suffixFile "gf" $ justModuleName file - writeFile out $ "-- File generated by GF from " ++ file - appendFile out "\n" - mapM_ (mkCnc out parser morpho) cont - getResPath :: FilePath -> IO (String,String) getResPath file = do s <- liftM lines $ readFileIf file @@ -95,62 +100,46 @@ getResPath file = do "--#":w:_ -> isPrefixOf ('-':tag) w _ -> False -getExLines :: String -> [Either String String] -getExLines = getl . lines where - getl ls = case ls of - s:ss | begEx (words s) -> case break endEx ls of - (x,y:z) -> Left (unwords (x ++ [y])) : getl z - _ -> Left s : getl ss - s:ss -> Right s : getl ss - [] -> [] - begEx s = case s of - "=":"in":_ -> True - _:ws -> begEx ws - _ -> False - endEx s = case dropWhile isSpace (reverse s) of - ';':_ -> True - _ -> False -mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO () -mkCnc out parser morpho line = do - let (res,msg) = mkCncLine parser morpho line - appendFile out res +mkConcrete :: Parser -> Morpho -> FilePath -> IO () +mkConcrete parser morpho file = do + src <- appIOE (getSourceModule noOptions file) >>= err error return + let (src',msgs) = mkModule parser morpho src + let out = suffixFile "gf" $ justModuleName file + writeFile out $ "-- File generated by GF from " ++ file appendFile out "\n" - ifNull (return ()) putStrLnFlush msg - -mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) -> - Either String String -> (String,String) -mkCncLine parser morpho (Right line) = (line,[]) -mkCncLine parser morpho (Left line) = mkLinRule (words line) where - mkLinRule s = - let - (pre,str) = span (/= "in") s - ([mcat],rest) = splitAt 1 $ tail str - (lin,subst) = span (/= '"') $ tail $ unwords rest - cat = reverse $ takeWhile (/= '.') $ reverse mcat - substs = doSubst (init (tail subst)) - def - | last pre /= "=" = line -- ordinary lin rule - | otherwise = case parser cat lin of - (t:ts,_) -> ind ++ unwords pre +++ - substs (tree2exp t) +++ ";" ++ - if null ts then [] else (" -- AMBIGUOUS:" ++++ - unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts]) - ([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}" - in - (def,def) + appendFile out (prModule src') + appendFile out "{-\n" + appendFile out $ unlines $ filter (not . null) msgs + appendFile out "-}\n" + +mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String]) +mkModule parser morpho (name,src) = case src of + ModMod m@(Module mt st fs me ops js) -> + + let js1 = jments m + (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) [] + mod2 = ModMod $ Module mt st fs me ops $ js2 + in ((name,mod2), msgs) + where + mkInfo ni@(name,info) = case info of + CncFun mt (Yes trm) ppr -> do + trm' <- mkTrm trm + return (name, CncFun mt (Yes trm') ppr) + _ -> return ni + where + mkTrm t = case t of + Example (P _ cat) s -> parse cat s t + Example (Vr cat) s -> parse cat s t + _ -> composOp mkTrm t + parse cat s t = case parser (prt_ cat) s of + (tr:[], _) -> return tr + (tr:trs,_) -> do + updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++) + return tr + ([],ms) -> do + updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++) + return t morph s = case [w | w <- words s, not (morpho w)] of [] -> "" ws -> "unknown words: " ++ unwords ws - ind = takeWhile isSpace line - -doSubst :: String -> Term -> String -doSubst subst0 trm = prt_ $ subt subst trm where - subst - | all isSpace subst0 = [] - | otherwise = err error id $ pTerm subst0 >>= record2subst - subt g t = case t of - Q _ c -> maybe t id $ lookup c g - QC _ c -> maybe t id $ lookup c g - _ -> composSafeOp (subt g) t - |
