summaryrefslogtreecommitdiff
path: root/src/GF/Compile/Wordlist.hs
blob: d581ed6837ef476b2747baeb6cd8daef882e71f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
----------------------------------------------------------------------
-- |
-- Module      : Wordlist
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 
-- > CVS $Author: 
-- > CVS $Revision: 
--
-- Compile a gfwl file (multilingual word list) to an abstract + concretes
-----------------------------------------------------------------------------

module GF.Compile.Wordlist (mkWordlist) where

import GF.Data.Operations
import GF.Infra.UseIO
import Data.List
import Data.Char

-- read File.gfwl, write File.gf (abstract) and a set of concretes
-- return the names of the concretes

mkWordlist :: FilePath -> IO [FilePath]
mkWordlist file = do
  s <- readFileIf file
  let abs = fileBody file
  let (cnchs,wlist) = pWordlist abs $ filter notComment $ lines s
  let (gr,grs) = mkGrammars abs cnchs wlist
  let cncfs = [cnc ++ ".gf" | (cnc,_) <- cnchs]
  mapM_ (uncurry writeFile) $ (abs ++ ".gf",gr) : zip cncfs grs
  putStrLn $ "wrote " ++ unwords ((abs ++ ".gf") : cncfs)
  return cncfs

{-
-- syntax of files, e.g.

  # Svenska - Franska - Finska  -- names of concretes
 
  berg  - montagne - vuori      -- word entry

-- this creates:

  cat S ; 
  fun berg_S : S ; 
  lin berg_S = {s = ["berg"]} ;
  lin berg_S = {s = ["montagne"]} ;
  lin berg_S = {s = ["vuori"]} ;

-- support for different categories to be elaborated. The syntax it

  Verb . klättra - grimper / escalader - kiivetä / kiipeillä

-- notice that a word can have several alternative (separator /)
-- and that an alternative can consist of several words
-}

type CncHeader = (String,String) -- module name, module header

type Wordlist = [(String, [[String]])] -- cat, variants for each cnc


pWordlist :: String -> [String] -> ([CncHeader],Wordlist)
pWordlist abs ls = (headers,rules) where
  (hs,rs) = span ((=="#") . take 1) ls
  headers = map mkHeader $ chunks "-" $ filter (/="#") $ words $ concat hs
  rules   = map (mkRule   . words) rs

  mkHeader ws = case ws of
    w:ws2 -> (w, unwords ("concrete":w:"of":abs:"=":ws2))
  mkRule ws = case ws of
    cat:".":vs -> (cat, mkWords vs)
    _          -> ("S", mkWords ws)
  mkWords = map (map unwords . chunks "/") . chunks "-"


mkGrammars :: String -> [CncHeader] -> Wordlist -> (String,[String])
mkGrammars ab hs wl = (abs,cncs) where
  abs = unlines $ map unwords $
    ["abstract",ab,"=","{"]:
    cats ++
    funs ++
    [["}"]]

  cncs = [unlines $ (h ++ " {") : map lin rs ++ ["}"] | ((_,h),rs) <- zip hs rss]

  cats = [["cat",c,";"] | c <- nub $ map fst wl]
  funs = [["fun", f , ":", c,";"] | (f,c,_) <- wlf]

  wlf = [(ident f c, c, ws) | (c,ws@(f:_)) <- wl]

  rss = [[(f, wss !! i) | (f,_,wss) <- wlf] | i <- [0..length hs - 1]]

  lin (f,ss) = unwords ["lin", f, "=", "{s", "=", val ss, "}", ";"]

  val ss = case ss of
    [w] -> quote w
    _   -> "variants {" ++ unwords (intersperse ";" (map quote ss)) ++ "}"

  quote w = "[" ++ prQuotedString w ++ "]" 

  ident f c = concat $ intersperse "_" $ words (head f) ++ [c]


notComment s = not (all isSpace s) && take 2 s /= "--"