summaryrefslogtreecommitdiff
path: root/src/GF/Compile/MkConcrete.hs
blob: d016a7e4772efa298bc8de2de081f1925a33ed0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
----------------------------------------------------------------------
-- |
-- Module      : MkConcrete
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 
-- > CVS $Author: 
-- > CVS $Revision: 
--
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
-----------------------------------------------------------------------------

module GF.Compile.MkConcrete (mkConcretes) where

import GF.Grammar.Values (Tree,tree2exp)
import GF.Grammar.PrGrammar (prt_,prModule)
import GF.Grammar.Grammar --- (Term(..),SourceModule)
import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
import GF.Compile.PGrammar (pTerm,pTrm)
import GF.Compile.Compile
import GF.Compile.PrOld (stripTerm)
import GF.Compile.GetGrammar
import GF.API
import GF.API.IOGrammar
import qualified GF.Embed.EmbedAPI as EA

import GF.Data.Operations
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Infra.Modules
import GF.Infra.ReadFiles
import GF.System.Arch
import GF.UseGrammar.Treebank

import System.Directory
import System.FilePath
import Data.Char
import Control.Monad
import Data.List

-- translate strings into lin rules by parsing in a resource
-- grammar. AR 2/6/2005

-- Format of rule (on one line):
--    lin F x y = in C "ssss" ;
-- Format of resource path (on first line):
--    --# -resource=PATH
-- Other lines are copied verbatim.
-- A sequence of files can be processed with the same resource without
-- rebuilding the grammar and parser.

-- notice: we use a hand-crafted lexer and parser in order to preserve
-- the layout and comments in the rest of the file.

mkConcretes :: Options -> [FilePath] -> IO ()
mkConcretes opts files = do
  ress <- mapM getResPath files
  let grps = groupBy (\a b -> fst a == fst b) $ 
               sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
  mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]

mkCncGroups opts0 ((res,path),files) = do
  putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
  putStrLn $ "Compiling resource " ++ res
  let opts = addOptions (options [beSilent,pathList path]) opts0
  let treebank = oElem (iOpt "treebank") opts
  resf <- useIOE res $ do
    (fp,_) <- readFileLibraryIOE "" res
    return fp
  egr <- appIOE $ shellStateFromFiles opts emptyShellState resf
  (parser,morpho) <- if treebank then do
      tb <- err (\_ -> error $ "no treebank of name" +++ path)
                return
                (egr >>= flip findTreebank (zIdent path))
      return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
              isWordInTreebank tb)
    else do
      gr  <- err (\s -> putStrLn s >> error "resource grammar rejected") 
                 (return . firstStateGrammar) egr
      return 
            (\cat s -> 
                errVal ([],"No parse") $ 
                optParseArgErrMsg (options [newFParser, firstCat cat, beVerbose]) gr s >>=
                (\ (ts,e) -> return (map tree2exp ts, e)) , 
            isKnownWord gr)
  putStrLn "Building parser"
  mapM_ (mkConcrete parser morpho) files

type Parser = String -> String -> ([Term],String)
type Morpho = String -> Bool 

getResPath :: FilePath -> IO (String,String)
getResPath file = do
  s <- liftM lines $ readFileIf file
  case filter (not . all isSpace) s of
    res:path:_ | is "resource" res && is "path"     path -> return (val res, val path)
    res:path:_ | is "resource" res && is "treebank" path -> return (val res, val path)
    res:_ | is "resource" res -> return (val res, "")
    _ -> error 
           "expected --# -resource=FILE and optional --# -path=PATH or --# -treebank=IDENT"
 where
   val = dropWhile (isSpace) . tail . dropWhile (not . (=='='))
   is tag s = case words s of
     "--#":w:_ -> isPrefixOf ('-':tag) w
     _ -> False 


mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
mkConcrete parser morpho file = do
  src <- appIOE (getSourceModule noOptions file) >>= err error return
  let (src',msgs) = mkModule parser morpho src
  let out = addExtension (justModuleName file) "gf"
  writeFile out $ "-- File generated by GF from " ++ file
  appendFile out "\n"
  appendFile out (prModule src')
  appendFile out "{-\n"
  appendFile out $ unlines $ filter (not . null) msgs
  appendFile out "-}\n"

mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
mkModule parser morpho (name,src) = case src of 
  ModMod m@(Module mt st fs me ops js) ->

    let js1 = jments m
        (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
        mod2 = ModMod $ Module mt st fs me ops $ js2
    in ((name,mod2), msgs)
 where
  mkInfo ni@(name,info) = case info of
    CncFun mt (Yes trm) ppr -> do
      trm' <- mkTrm trm
      return (name, CncFun mt (Yes trm') ppr) 
    _ -> return ni
   where
     mkTrm t = case t of
       Example (P _ cat) s -> parse cat s t
       Example (Vr  cat) s -> parse cat s t
       _ -> composOp mkTrm t
     parse cat s t = case parser (prt_ cat) s of
       (tr:[], _) -> do
         updateSTM ((("PARSED in" +++ prt_ name) : s : [prt_ tr]) ++)
         return $ stripTerm tr
       (tr:trs,_) -> do
         updateSTM ((("AMBIGUOUS in" +++ prt_ name) : s : map prt_ trs) ++)
         return $ stripTerm tr
       ([],ms) -> do
         updateSTM ((("NO PARSE in" +++ prt_ name) : s : ms : [morph s]) ++)
         return t
  morph s = case [w | w <- words s, not (morpho w)] of
    [] -> ""
    ws -> "unknown words: " ++ unwords ws