summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Compile/Multi.hs
blob: 59e8ed34c4f2918575ed9686a45b44e25a0cc9f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
module GF.Compile.Multi (readMulti) where

import Data.List
import Data.Char

-- AR 29 November 2010
-- quick way of writing a multilingual lexicon and (with some more work) a grammar
-- also several modules in one file
-- file suffix .gfm (GF Multi)


{-
-- This multi-line comment is a possible file in the format.
-- comments are as in GF, one-liners

-- always start by declaring lang names as follows
> langs Eng Fin Swe

-- baseline rules: semicolon-separated line-by-line entries update abs and cncs, adding to S
cheers ; skål ; terveydeksi

-- alternatives within a language are comma-separated
cheers ; skål ; terveydeksi, kippis

-- more advanced: verbatim abstract rules prefixed by "> abs"
> abs cat Drink ;
> abs fun drink : Drink -> S ;

-- verbatim concrete rules prefixed by ">" and comma-separated language list
> Eng,Swe lin Gin = "gin" ; 

-- multiple modules: modules as usual. Each module has to start from a new line.
-- Should be UTF-8 encoded.

-}

{-
main = do
  xx <- getArgs
  if null xx then putStrLn usage else do 
    let (opts,file) = (init xx, last xx)
    (absn,cncns) <- readMulti opts file
    if elem "-pgf" xx 
      then do
         system ("gf -make -s -optimize-pgf " ++ unwords (map gfFile cncns))
         putStrLn $ "wrote " ++ absn ++ ".pgf"
      else return ()
-}

readMulti :: FilePath -> IO (FilePath,[FilePath])
readMulti file = do
  src <- readFile file
  let multi = getMulti (takeWhile (/='.') file) src
      absn  = absName multi
      cncns = cncNames multi
      raws  = rawModules multi
  writeFile (gfFile absn) (absCode multi)
  mapM_ (uncurry writeFile) 
        [(gfFile cncn, cncCode absn cncn cod) | 
          cncn <- cncNames multi, let cod = [r | (la,r) <- cncRules multi, la == cncn]]
  putStrLn $ "wrote " ++ unwords (map gfFile (absn:cncns))
  mapM_ (uncurry writeFile) [(gfFile n,s) | (n,s) <- raws] --- overwrites those above
  return (gfFile absn, map gfFile cncns)

data Multi = Multi {
  rawModules :: [(String,String)],
  absName  :: String,
  cncNames :: [String],
  startCat :: String,
  absRules :: [String],
  cncRules :: [(String,String)] -- lang,lin
  }

emptyMulti :: Multi 
emptyMulti = Multi {
  rawModules = [],
  absName  = "Abs",
  cncNames = [],
  startCat = "S",
  absRules = [],
  cncRules = []
  }

absCode :: Multi -> String
absCode multi = unlines $ header : start ++ (reverse (absRules multi)) ++ ["}"] where
  header = "abstract " ++ absName multi ++ " = {"
  start  = ["flags startcat = " ++ cat ++ " ;", "cat " ++ cat ++ " ;"]
  cat = startCat multi

cncCode :: String -> String -> [String] -> String
cncCode ab cnc rules = unlines $ header : (reverse rules ++ ["}"]) where
  header = "concrete " ++ cnc ++ " of " ++ ab ++ " = {"

getMulti :: String -> String -> Multi
getMulti m s = foldl (flip addMulti) (emptyMulti{absName = m}) (modlines (lines s))

addMulti :: String -> Multi -> Multi
addMulti line multi = case line of
  '-':'-':_ -> multi
  _ | all isSpace line -> multi
  '>':s -> case words s of
     "langs":ws -> let las = [absName multi ++ w | w <- ws] in multi {
       cncNames = las, 
       cncRules = concat [[(la,"lincat " ++ startCat multi ++ " = Str ;"),
                           (la,"flags coding = utf8 ;")] | la <- las]
       }
     "startcat":c:ws -> multi {startCat = c}
     "abs":ws   -> multi {
       absRules = unwords ws : absRules multi
       }
     langs:ws   -> multi {
       cncRules = [(absName multi ++ la, unwords ws) | la <- chop ',' langs] ++ cncRules multi
       }
  _ -> case words line of
        m:name:_ | isModule m -> multi {
          rawModules = (name,line):rawModules multi
          } 
        _ -> let (cat,fun,lins) = getRules (startCat multi) line in 
              multi {
               absRules = ("fun " ++ fun ++ " : " ++ cat ++ " ;") : absRules multi,
               cncRules = zip (cncNames multi) lins ++ cncRules multi
               }

getRules :: String -> String -> (String,String,[String])
getRules cat line = (cat, fun, map lin rss) where
  rss = map (map unspace . chop ',') $ chop ';' line
  fun = map idChar (head (head rss)) ++ "_" ++ cat
  lin rs = "lin " ++ fun ++ " = " ++ unwords (intersperse "|" (map quote rs)) ++ " ;"

chop :: Eq c => c -> [c] -> [[c]]
chop c cs = case break (==c) cs of
  (w,_:cs2) -> w : chop c cs2
  ([],[])   -> []
  (w,_)     -> [w]

-- remove spaces from beginning and end, leave them in the middle
unspace :: String -> String
unspace = unwords . words

quote :: String -> String
quote r = "\"" ++ r ++ "\""

-- to guarantee that the char can be used in an ident
idChar :: Char -> Char
idChar c = 
  if (n > 47 && n < 58) || (n > 64 && n < 91) || (n > 96 && n < 123) 
  then c
  else '_'
 where n = fromEnum c


gfFile :: FilePath -> FilePath
gfFile f = f ++ ".gf"

isModule :: String -> Bool
isModule = flip elem 
  ["abstract","concrete","incomplete","instance","interface","resource"]

modlines :: [String] -> [String]
modlines ss = case ss of
  l:ls -> case words l of
    w:_ | isModule w -> case break (isModule . concat . take 1 . words) ls of
      (ms,rest) -> unlines (l:ms) : modlines rest
    _ -> l : modlines ls
  _ -> []