diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Compile/MkConcrete.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Compile/MkConcrete.hs')
| -rw-r--r-- | src-3.0/GF/Compile/MkConcrete.hs | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/src-3.0/GF/Compile/MkConcrete.hs b/src-3.0/GF/Compile/MkConcrete.hs new file mode 100644 index 000000000..d016a7e47 --- /dev/null +++ b/src-3.0/GF/Compile/MkConcrete.hs @@ -0,0 +1,154 @@ +---------------------------------------------------------------------- +-- | +-- Module : MkConcrete +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar. +----------------------------------------------------------------------------- + +module GF.Compile.MkConcrete (mkConcretes) where + +import GF.Grammar.Values (Tree,tree2exp) +import GF.Grammar.PrGrammar (prt_,prModule) +import GF.Grammar.Grammar --- (Term(..),SourceModule) +import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent) +import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords) +import GF.Compile.PGrammar (pTerm,pTrm) +import GF.Compile.Compile +import GF.Compile.PrOld (stripTerm) +import GF.Compile.GetGrammar +import GF.API +import GF.API.IOGrammar +import qualified GF.Embed.EmbedAPI as EA + +import GF.Data.Operations +import GF.Infra.UseIO +import GF.Infra.Option +import GF.Infra.Modules +import GF.Infra.ReadFiles +import GF.System.Arch +import GF.UseGrammar.Treebank + +import System.Directory +import System.FilePath +import Data.Char +import Control.Monad +import Data.List + +-- translate strings into lin rules by parsing in a resource +-- grammar. AR 2/6/2005 + +-- Format of rule (on one line): +-- lin F x y = in C "ssss" ; +-- Format of resource path (on first line): +-- --# -resource=PATH +-- Other lines are copied verbatim. +-- A sequence of files can be processed with the same resource without +-- rebuilding the grammar and parser. + +-- notice: we use a hand-crafted lexer and parser in order to preserve +-- the layout and comments in the rest of the file. + +mkConcretes :: Options -> [FilePath] -> IO () +mkConcretes opts files = do + ress <- mapM getResPath files + let grps = groupBy (\a b -> fst a == fst b) $ + sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files + mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps] + +mkCncGroups opts0 ((res,path),files) = do + putStrLnFlush $ "Going to preprocess examples in " ++ unwords files + putStrLn $ "Compiling resource " ++ res + let opts = addOptions (options [beSilent,pathList path]) opts0 + let treebank = oElem (iOpt "treebank") opts + resf <- useIOE res $ do + (fp,_) <- readFileLibraryIOE "" res + return fp + egr <- appIOE $ shellStateFromFiles opts emptyShellState resf + (parser,morpho) <- if treebank then do + tb <- err (\_ -> error $ "no treebank of name" +++ path) + return + (egr >>= flip findTreebank (zIdent path)) + return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb, + isWordInTreebank tb) + else do + gr <- err (\s -> putStrLn s >> error "resource grammar rejected") + (return . firstStateGrammar) egr + return + (\cat s -> + errVal ([],"No parse") $ + optParseArgErrMsg (options [newFParser, firstCat cat, beVerbose]) gr s >>= + (\ (ts,e) -> return (map tree2exp ts, e)) , + isKnownWord gr) + putStrLn "Building parser" + mapM_ (mkConcrete parser morpho) files + +type Parser = String -> String -> ([Term],String) +type Morpho = String -> Bool + +getResPath :: FilePath -> IO (String,String) +getResPath file = do + s <- liftM lines $ readFileIf file + case filter (not . all isSpace) s of + res:path:_ | is "resource" res && is "path" path -> return (val res, val path) + res:path:_ | is "resource" res && is "treebank" path -> return (val res, val path) + res:_ | is "resource" res -> return (val res, "") + _ -> error + "expected --# -resource=FILE and optional --# -path=PATH or --# -treebank=IDENT" + where + val = dropWhile (isSpace) . tail . dropWhile (not . (=='=')) + is tag s = case words s of + "--#":w:_ -> isPrefixOf ('-':tag) w + _ -> False + + +mkConcrete :: Parser -> Morpho -> FilePath -> IO () +mkConcrete parser morpho file = do + src <- appIOE (getSourceModule noOptions file) >>= err error return + let (src',msgs) = mkModule parser morpho src + let out = addExtension (justModuleName file) "gf" + writeFile out $ "-- File generated by GF from " ++ file + appendFile out "\n" + appendFile out (prModule src') + appendFile out "{-\n" + appendFile out $ unlines $ filter (not . null) msgs + appendFile out "-}\n" + +mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String]) +mkModule parser morpho (name,src) = case src of + ModMod m@(Module mt st fs me ops js) -> + + let js1 = jments m + (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) [] + mod2 = ModMod $ Module mt st fs me ops $ js2 + in ((name,mod2), msgs) + where + mkInfo ni@(name,info) = case info of + CncFun mt (Yes trm) ppr -> do + trm' <- mkTrm trm + return (name, CncFun mt (Yes trm') ppr) + _ -> return ni + where + mkTrm t = case t of + Example (P _ cat) s -> parse cat s t + Example (Vr cat) s -> parse cat s t + _ -> composOp mkTrm t + parse cat s t = case parser (prt_ cat) s of + (tr:[], _) -> do + updateSTM ((("PARSED in" +++ prt_ name) : s : [prt_ tr]) ++) + return $ stripTerm tr + (tr:trs,_) -> do + updateSTM ((("AMBIGUOUS in" +++ prt_ name) : s : map prt_ trs) ++) + return $ stripTerm tr + ([],ms) -> do + updateSTM ((("NO PARSE in" +++ prt_ name) : s : ms : [morph s]) ++) + return t + morph s = case [w | w <- words s, not (morpho w)] of + [] -> "" + ws -> "unknown words: " ++ unwords ws |
