summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Speech/PGFToCFG.hs
blob: 163f0253789ab36b4f0065f825721a02b4cf7328 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
----------------------------------------------------------------------
-- |
-- Module      : GF.Speech.PGFToCFG
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where

import PGF.CId
import PGF.Data as PGF
import PGF.Macros
import GF.Infra.Ident
import GF.Speech.CFG hiding (Symbol)

import Data.Array.IArray as Array
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set

bnfPrinter :: PGF -> CId -> String
bnfPrinter = toBNF id

toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc

type Profile = [Int]

pgfToCFG :: PGF 
          -> CId   -- ^ Concrete syntax name
          -> CFG
pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap ruleToCFRule rules)
  where
    cnc = lookConcr pgf lang

    rules :: [(FId,Production)]
    rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions cnc)
                         , prod <- Set.toList set]

    fcatCats :: Map FId Cat
    fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) 
                                 | (c,CncCat s e lbls) <- Map.toList (cnccats cnc), 
                                   (fc,i) <- zip (range (s,e)) [1..]]

    fcatCat :: FId -> Cat
    fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats

    fcatToCat :: FId -> LIndex -> Cat
    fcatToCat c l = fcatCat c ++ row
      where row = if catLinArity c == 1 then "" else "_" ++ show l

    -- gets the number of fields in the lincat for the given category
    catLinArity :: FId -> Int
    catLinArity c = maximum (1:[rangeSize (bounds rhs) | (CncFun _ rhs, _) <- topdownRules c])

    topdownRules cat = f cat []
      where
        f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions cnc))
	 
        g (PApply funid args) rules = (cncfuns cnc ! funid,args) : rules
        g (PCoerce cat)       rules = f cat rules


    extCats :: Set Cat
    extCats = Set.fromList $ map lhsCat startRules

    startRules :: [CFRule]
    startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) 
                      | (c,CncCat s e lbls) <- Map.toList (cnccats cnc), 
                        fc <- range (s,e), not (isPredefFId fc),
                        r <- [0..catLinArity fc-1]]

    ruleToCFRule :: (FId,Production) -> [CFRule]
    ruleToCFRule (c,PApply funid args) = 
        [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
           | (l,seqid) <- Array.assocs rhs
           , let row = sequences cnc ! seqid
           , not (containsLiterals row)]
      where
        CncFun f rhs = cncfuns cnc ! funid

        mkRhs :: Array DotPos Symbol -> [CFSymbol]
        mkRhs = concatMap symbolToCFSymbol . Array.elems

        containsLiterals :: Array DotPos Symbol -> Bool
        containsLiterals row = not (null ([n | SymLit n _ <- Array.elems row] ++
                                          [n | SymVar n _ <- Array.elems row]))

        symbolToCFSymbol :: Symbol -> [CFSymbol]
        symbolToCFSymbol (SymCat n l)    = [let PArg _ fid = args!!n in NonTerminal (fcatToCat fid l)]
        symbolToCFSymbol (SymKS ts)      = map Terminal ts
        symbolToCFSymbol (SymKP ts as)   = map Terminal $ ts 
                                           ---- ++ [t | Alt ss _ <- as, t <- ss]
                                           ---- should be alternatives in [[CFSymbol]]
                                           ---- AR 3/6/2010
        fixProfile :: Array DotPos Symbol -> Int -> Profile
        fixProfile row i = [k | (k,j) <- nts, j == i]
            where
              nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
              
              getPos (SymCat j _) = [j]
              getPos (SymLit j _) = [j]
              getPos _            = []

        profilesToTerm :: [Profile] -> CFTerm
        profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
            where (argTypes,_) = catSkeleton $ lookType (abstract pgf) f

        profileToTerm :: CId -> Profile -> CFTerm
        profileToTerm t [] = CFMeta t
        profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
    ruleToCFRule (c,PCoerce c') =
        [CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
           | l <- [0..catLinArity c-1]]