1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
----------------------------------------------------------------------
-- |
-- Module : SRG
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.20 $
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
--
-- FIXME: figure out name prefix from grammar name
-----------------------------------------------------------------------------
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
makeSimpleSRG, makeSRG
, lookupFM_, prtS
, topDownFilter) where
import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
, Profile, SyntaxForest, filterCats)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
import GF.Speech.Relation
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import Data.List
import Data.Maybe (fromMaybe)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
data SRG = SRG { grammarName :: String -- ^ grammar name
, startCat :: String -- ^ start category name
, origStartCat :: String -- ^ original start category name
, grammarLanguage :: String -- ^ The language for which the grammar
-- is intended, e.g. en_UK
, rules :: [SRGRule]
}
deriving (Eq,Show)
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
-- and productions
deriving (Eq,Show)
-- | maybe a probability, a rule name and a list of symbols
data SRGAlt = SRGAlt (Maybe Double) Name [Symbol String Token]
deriving (Eq,Show)
-- | SRG category name and original name
type CatName = (String,String)
type CatNames = Map String String
-- | Create a non-left-recursive SRG.
-- FIXME: the probabilities, names and profiles in the returned
-- grammar may be meaningless.
makeSimpleSRG :: Ident -- ^ Grammar name
-> String -- ^ Start category
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSimpleSRG
= makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats . removeCycles)
-- | Create a SRG preserving the names, profiles and probabilities of the
-- input grammar. The returned grammar may be left-recursive.
makeSRG :: Ident -- ^ Grammar name
-> String -- ^ Start category
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSRG = makeSRG_ removeEmptyCats
makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the
-- CFG before converting to SRG
-> Ident -- ^ Grammar name
-> String -- ^ Start category
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSRG_ f i origStart opts probs gr
= SRG { grammarName = name,
startCat = lookupFM_ names origStart,
origStartCat = origStart,
grammarLanguage = l,
rules = rs }
where
name = prIdent i
l = fromMaybe "en_UK" (getOptVal opts speechLanguage)
gr' = f (cfgToCFRules gr)
(cats,cfgRules) = unzip gr'
names = mkCatNames name cats
rs = map (cfgRulesToSRGRule names probs) cfgRules
-- FIXME: merge alternatives with same rhs and profile but different probabilities
cfgRulesToSRGRule :: Map String String -> Maybe Probs -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
where origCat = lhsCat r
cat = lookupFM_ names origCat
rhs = nub $ map ruleToAlt rs
ruleToAlt r@(CFRule c ss n)
= SRGAlt (ruleProb probs r) n (map renameCat ss)
renameCat (Cat c) = Cat (lookupFM_ names c)
renameCat t = t
ruleProb :: Maybe Probs -> CFRule_ -> Maybe Double
ruleProb mp r = mp >>= \probs -> lookupProb probs (ruleFun r)
-- FIXME: move to GF.Probabilistic.Probabilistic?
lookupProb :: Probs -> Ident -> Maybe Double
lookupProb probs i = lookupTree prIdent i probs
mkCatNames :: String -- ^ Category name prefix
-> [String] -- ^ Original category names
-> Map String String -- ^ Maps original names to SRG names
mkCatNames prefix origNames = Map.fromList (zip origNames names)
where names = [prefix ++ "_" ++ show x | x <- [0..]]
-- | Remove categories which are not reachable from the start category.
topDownFilter :: SRG -> SRG
topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' }
where
rs' = [ r | r@(SRGRule c _ _) <- rs, c `Set.member` keep]
rhsCats = [ (c,c') | r@(SRGRule c _ ps) <- rs,
SRGAlt _ _ ss <- ps,
c' <- filterCats ss]
uses = reflexiveClosure_ (allSRGCats srg) $ transitiveClosure $ mkRel rhsCats
keep = allRelated uses start
allSRGCats :: SRG -> [String]
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
--
-- * Utilities for building and printing SRGs
--
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
lookupFM_ fm k = Map.findWithDefault (error $ "Key not found: " ++ show k) k fm
prtS :: Print a => a -> ShowS
prtS = showString . prt
|