summaryrefslogtreecommitdiff
path: root/src/GF/Compile/MkConcrete.hs
blob: a0af24007cbb7c0bf0be87d98734df66b3ad6cca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
----------------------------------------------------------------------
-- |
-- Module      : MkConcrete
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 
-- > CVS $Author: 
-- > CVS $Revision: 
--
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
-----------------------------------------------------------------------------

module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where

import GF.Grammar.Values (Tree,tree2exp)
import GF.Grammar.PrGrammar (prt_)
import GF.Grammar.Grammar (Term(Q,QC)) ---
import GF.Grammar.Macros (composSafeOp, record2subst)
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
import GF.Compile.PGrammar (pTerm)
import GF.Compile.Compile
import GF.API
import GF.API.IOGrammar
import qualified GF.Embed.EmbedAPI as EA

import GF.Data.Operations
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Infra.ReadFiles
import GF.System.Arch

import System.Directory
import Data.Char
import Control.Monad
import Data.List

-- translate strings into lin rules by parsing in a resource
-- grammar. AR 2/6/2005

-- Format of rule (on one line):
--    lin F x y = in C "ssss" ;
-- Format of resource path (on first line):
--    --# -resource=PATH
-- Other lines are copied verbatim.
-- A sequence of files can be processed with the same resource without
-- rebuilding the grammar and parser.

-- notice: we use a hand-crafted lexer and parser in order to preserve
-- the layout and comments in the rest of the file.

mkConcretes :: [FilePath] -> IO ()
mkConcretes files = do
  ress <- mapM getResPath files
  let grps = groupBy (\a b -> fst a == fst b) $ 
               sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
  mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps]

mkCncGroups ((res,path),files) = do
  putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
  putStrLn $ "Compiling resource " ++ res
  let opts = options [beSilent,pathList path]
  egr <- appIOE $ shellStateFromFiles opts emptyShellState res
  gr  <- err (\s -> putStrLn s >> error "resource grammar rejected") 
           (return . firstStateGrammar) egr
  let parser cat = 
        errVal ([],"No parse") . 
        optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr
  let morpho = isKnownWord gr
  putStrLn "Building parser"
  mapM_ (mkConcrete parser morpho) files

type Parser = String -> String -> ([Tree],String)
type Morpho = String -> Bool 

mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
mkConcrete parser morpho file = do
  cont <- liftM getExLines $ readFileIf file
  let out = suffixFile "gf" $ justModuleName file
  writeFile out $ "-- File generated by GF from " ++ file
  appendFile out "\n"
  mapM_ (mkCnc out parser morpho) cont

getResPath :: FilePath -> IO (String,String)
getResPath file = do
  s <- liftM lines $ readFileIf file
  case filter (not . all isSpace) s of
    res:path:_ | is "resource" res && is "path" path -> return (val res, val path)
    res:_ | is "resource" res -> return (val res, "")
    _ -> error "expected --# -resource=FILE and optional --# -path=PATH"
 where
   val = dropWhile (isSpace) . tail . dropWhile (not . (=='='))
   is tag s = case words s of
     "--#":w:_ -> isPrefixOf ('-':tag) w
     _ -> False 

getExLines :: String -> [Either String String]
getExLines = getl . lines where
  getl ls = case ls of
    s:ss | begEx (words s) -> case break endEx ls of
      (x,y:z) -> Left (unwords (x ++ [y])) : getl z
      _ -> Left s : getl ss
    s:ss -> Right s : getl ss
    [] -> []
  begEx s = case s of
    "=":"in":_ -> True
    _:ws -> begEx ws
    _ -> False
  endEx s = case dropWhile isSpace (reverse s) of
    ';':_ -> True
    _ -> False

mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO ()
mkCnc out parser morpho line = do
  let (res,msg) = mkCncLine parser morpho line
  appendFile out res
  appendFile out "\n"
  ifNull (return ()) putStrLnFlush msg

mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) -> 
             Either String String -> (String,String)
mkCncLine parser morpho (Right line) = (line,[])
mkCncLine parser morpho (Left line) = mkLinRule (words line) where
  mkLinRule s = 
    let
       (pre,str)     = span (/= "in") s
       ([mcat],rest) = splitAt 1 $ tail str
       (lin,subst)   = span (/= '"') $ tail $ unwords rest
       cat = reverse $ takeWhile (/= '.') $ reverse mcat
       substs = doSubst (init (tail subst))
       def
        | last pre /= "=" = line  -- ordinary lin rule
        | otherwise  = case parser cat lin of 
           (t:ts,_) -> ind ++ unwords pre +++ 
                       substs (tree2exp t) +++ ";" ++
                       if null ts then [] else (" -- AMBIGUOUS:" ++++
                         unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts])
           ([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}" 
    in
     (def,def)
  morph s = case [w | w <- words s, not (morpho w)] of
    [] -> ""
    ws -> "unknown words: " ++ unwords ws
  ind = takeWhile isSpace line

doSubst :: String -> Term -> String
doSubst subst0 trm = prt_ $ subt subst trm where
  subst 
    | all isSpace subst0 = []
    | otherwise = err error id $ pTerm subst0 >>= record2subst
  subt g t = case t of
    Q  _ c -> maybe t id $ lookup c g
    QC _ c -> maybe t id $ lookup c g
    _      -> composSafeOp (subt g) t