summaryrefslogtreecommitdiff
path: root/src/GF/Speech/PGFToCFG.hs
blob: c42c5f4ff8df66745a18f7fb81cc527cf05ff9db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
----------------------------------------------------------------------
-- |
-- Module      : GF.Speech.PGFToCFG
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter, 
                           fcfgPrinter, pgfToCFG) where

import PGF.CId
import PGF.Data as PGF
import PGF.Macros
import GF.Data.MultiMap (MultiMap)
import qualified GF.Data.MultiMap as MultiMap
import GF.Infra.Ident
import GF.Speech.CFG

import Data.Array as Array
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set

bnfPrinter :: PGF -> CId -> String
bnfPrinter = toBNF id

toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc

-- FIXME: move this somewhere else
fcfgPrinter :: PGF -> CId -> String
fcfgPrinter pgf cnc = unlines (map showRule rules)
  where
    pinfo = fromMaybe (error "fcfgPrinter") (lookParser pgf cnc)

    rules :: [FRule]
    rules = Array.elems (PGF.allRules pinfo)

    showRule (FRule cid ps cs fc arr) = prCId cid ++ " " ++ show ps ++ ". " ++ showCat fc ++ " ::= [" ++ concat (intersperse ", " (map showCat cs)) ++ "] = " ++ showLin arr
        where
          showLin arr = "[" ++ concat (intersperse ", " [ unwords (map showFSymbol (Array.elems r)) | r <- Array.elems arr]) ++ "]"
          showFSymbol (FSymCat i j) = showCat (cs!!j) ++ "_" ++ show j ++ "." ++ show i
          showFSymbol (FSymTok t) = t
    showCat c = "C" ++ show c

pgfToCFG :: PGF 
          -> CId   -- ^ Concrete syntax name
          -> CFG
pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fruleToCFRule rules)
  where
    pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)

    rules :: [FRule]
    rules = Array.elems (PGF.allRules pinfo)

    fcatCats :: Map FCat Cat
    fcatCats = Map.fromList [(fc, prCId c ++ "_" ++ show i) 
                                 | (c,fcs) <- Map.toList (startupCats pinfo), 
                                   (fc,i) <- zip fcs [1..]]

    fcatCat :: FCat -> Cat
    fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats

    fcatToCat :: FCat -> FIndex -> Cat
    fcatToCat c l = fcatCat c ++ row
      where row = if catLinArity c == 1 then "" else "_" ++ show l

    -- gets the number of fields in the lincat for the given category
    catLinArity :: FCat -> Int
    catLinArity c = maximum (1:[rangeSize (bounds rhs) | FRule _ _ _ _ rhs <- Map.findWithDefault [] c rulesByFCat])

    rulesByFCat :: Map FCat [FRule]
    rulesByFCat = Map.fromListWith (++) [(c,[r]) | r@(FRule _ _ _ c _) <- rules]

    extCats :: Set Cat
    extCats = Set.fromList $ map lhsCat startRules

    -- NOTE: this is only correct for cats that have a lincat with exactly one row.
    startRules :: [CFRule]
    startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc 0)] (CFRes 0) 
                      | (c,fcs) <- Map.toList (startupCats pinfo), 
                        fc <- fcs, not (isLiteralFCat fc)]

    fruleToCFRule :: FRule -> [CFRule]
    fruleToCFRule (FRule f ps args c rhs) = 
        [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
         | (l,row) <- Array.assocs rhs, not (containsLiterals row)]
      where
        mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
        mkRhs = map fsymbolToSymbol . Array.elems

        containsLiterals :: Array FPointPos FSymbol -> Bool
        containsLiterals row = any isLiteralFCat [args!!n | FSymCat _ n <- Array.elems row]

        fsymbolToSymbol :: FSymbol -> CFSymbol
        fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
        fsymbolToSymbol (FSymTok t) = Terminal t

        fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
        fixProfile row = concatMap positions
            where
              nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
              positions i = [k | (k,FSymCat _ j) <- nts, j == i]

        profilesToTerm :: [Profile] -> CFTerm
        profilesToTerm [[n]] | f == wildCId = CFRes n
        profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
            where (argTypes,_) = catSkeleton $ lookType pgf f

        profileToTerm :: CId -> Profile -> CFTerm
        profileToTerm t [] = CFMeta t
        profileToTerm _ xs = CFRes (last xs) -- FIXME: unify

isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])