summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Compile/MkConcrete.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Compile/MkConcrete.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Compile/MkConcrete.hs')
-rw-r--r--src-3.0/GF/Compile/MkConcrete.hs154
1 files changed, 154 insertions, 0 deletions
diff --git a/src-3.0/GF/Compile/MkConcrete.hs b/src-3.0/GF/Compile/MkConcrete.hs
new file mode 100644
index 000000000..d016a7e47
--- /dev/null
+++ b/src-3.0/GF/Compile/MkConcrete.hs
@@ -0,0 +1,154 @@
+----------------------------------------------------------------------
+-- |
+-- Module : MkConcrete
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date:
+-- > CVS $Author:
+-- > CVS $Revision:
+--
+-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
+-----------------------------------------------------------------------------
+
+module GF.Compile.MkConcrete (mkConcretes) where
+
+import GF.Grammar.Values (Tree,tree2exp)
+import GF.Grammar.PrGrammar (prt_,prModule)
+import GF.Grammar.Grammar --- (Term(..),SourceModule)
+import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
+import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
+import GF.Compile.PGrammar (pTerm,pTrm)
+import GF.Compile.Compile
+import GF.Compile.PrOld (stripTerm)
+import GF.Compile.GetGrammar
+import GF.API
+import GF.API.IOGrammar
+import qualified GF.Embed.EmbedAPI as EA
+
+import GF.Data.Operations
+import GF.Infra.UseIO
+import GF.Infra.Option
+import GF.Infra.Modules
+import GF.Infra.ReadFiles
+import GF.System.Arch
+import GF.UseGrammar.Treebank
+
+import System.Directory
+import System.FilePath
+import Data.Char
+import Control.Monad
+import Data.List
+
+-- translate strings into lin rules by parsing in a resource
+-- grammar. AR 2/6/2005
+
+-- Format of rule (on one line):
+-- lin F x y = in C "ssss" ;
+-- Format of resource path (on first line):
+-- --# -resource=PATH
+-- Other lines are copied verbatim.
+-- A sequence of files can be processed with the same resource without
+-- rebuilding the grammar and parser.
+
+-- notice: we use a hand-crafted lexer and parser in order to preserve
+-- the layout and comments in the rest of the file.
+
+mkConcretes :: Options -> [FilePath] -> IO ()
+mkConcretes opts files = do
+ ress <- mapM getResPath files
+ let grps = groupBy (\a b -> fst a == fst b) $
+ sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
+ mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]
+
+mkCncGroups opts0 ((res,path),files) = do
+ putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
+ putStrLn $ "Compiling resource " ++ res
+ let opts = addOptions (options [beSilent,pathList path]) opts0
+ let treebank = oElem (iOpt "treebank") opts
+ resf <- useIOE res $ do
+ (fp,_) <- readFileLibraryIOE "" res
+ return fp
+ egr <- appIOE $ shellStateFromFiles opts emptyShellState resf
+ (parser,morpho) <- if treebank then do
+ tb <- err (\_ -> error $ "no treebank of name" +++ path)
+ return
+ (egr >>= flip findTreebank (zIdent path))
+ return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
+ isWordInTreebank tb)
+ else do
+ gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
+ (return . firstStateGrammar) egr
+ return
+ (\cat s ->
+ errVal ([],"No parse") $
+ optParseArgErrMsg (options [newFParser, firstCat cat, beVerbose]) gr s >>=
+ (\ (ts,e) -> return (map tree2exp ts, e)) ,
+ isKnownWord gr)
+ putStrLn "Building parser"
+ mapM_ (mkConcrete parser morpho) files
+
+type Parser = String -> String -> ([Term],String)
+type Morpho = String -> Bool
+
+getResPath :: FilePath -> IO (String,String)
+getResPath file = do
+ s <- liftM lines $ readFileIf file
+ case filter (not . all isSpace) s of
+ res:path:_ | is "resource" res && is "path" path -> return (val res, val path)
+ res:path:_ | is "resource" res && is "treebank" path -> return (val res, val path)
+ res:_ | is "resource" res -> return (val res, "")
+ _ -> error
+ "expected --# -resource=FILE and optional --# -path=PATH or --# -treebank=IDENT"
+ where
+ val = dropWhile (isSpace) . tail . dropWhile (not . (=='='))
+ is tag s = case words s of
+ "--#":w:_ -> isPrefixOf ('-':tag) w
+ _ -> False
+
+
+mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
+mkConcrete parser morpho file = do
+ src <- appIOE (getSourceModule noOptions file) >>= err error return
+ let (src',msgs) = mkModule parser morpho src
+ let out = addExtension (justModuleName file) "gf"
+ writeFile out $ "-- File generated by GF from " ++ file
+ appendFile out "\n"
+ appendFile out (prModule src')
+ appendFile out "{-\n"
+ appendFile out $ unlines $ filter (not . null) msgs
+ appendFile out "-}\n"
+
+mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
+mkModule parser morpho (name,src) = case src of
+ ModMod m@(Module mt st fs me ops js) ->
+
+ let js1 = jments m
+ (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
+ mod2 = ModMod $ Module mt st fs me ops $ js2
+ in ((name,mod2), msgs)
+ where
+ mkInfo ni@(name,info) = case info of
+ CncFun mt (Yes trm) ppr -> do
+ trm' <- mkTrm trm
+ return (name, CncFun mt (Yes trm') ppr)
+ _ -> return ni
+ where
+ mkTrm t = case t of
+ Example (P _ cat) s -> parse cat s t
+ Example (Vr cat) s -> parse cat s t
+ _ -> composOp mkTrm t
+ parse cat s t = case parser (prt_ cat) s of
+ (tr:[], _) -> do
+ updateSTM ((("PARSED in" +++ prt_ name) : s : [prt_ tr]) ++)
+ return $ stripTerm tr
+ (tr:trs,_) -> do
+ updateSTM ((("AMBIGUOUS in" +++ prt_ name) : s : map prt_ trs) ++)
+ return $ stripTerm tr
+ ([],ms) -> do
+ updateSTM ((("NO PARSE in" +++ prt_ name) : s : ms : [morph s]) ++)
+ return t
+ morph s = case [w | w <- words s, not (morpho w)] of
+ [] -> ""
+ ws -> "unknown words: " ++ unwords ws