1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.CFGToFA
--
-- Approximates CFGs with finite state networks.
----------------------------------------------------------------------
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
MFA(..), cfgToMFA, cfgToFA') where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import PGF.Internal
import GF.Data.Utilities
import GF.Grammar.CFG
--import GF.Speech.PGFToCFG
--import GF.Infra.Ident (Ident)
import GF.Data.Graph
--import GF.Data.Relation
import GF.Speech.FiniteState
--import GF.Speech.CFG
data Recursivity = RightR | LeftR | NotR
data MutRecSet = MutRecSet {
mrCats :: Set Cat,
mrNonRecRules :: [CFRule],
mrRecRules :: [CFRule],
mrRec :: Recursivity
}
type MutRecSets = Map Cat MutRecSet
--
-- * Multiple DFA type
--
data MFA = MFA Cat [(Cat,DFA CFSymbol)]
cfgToFA :: CFG -> DFA Token
cfgToFA = minimize . compileAutomaton . makeSimpleRegular
--
-- * Compile strongly regular grammars to NFAs
--
-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: CFG -> NFA Token
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
where
(fa,s,f) = newFA_
ns = mutRecSets g $ mutRecCats False g
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
-> NFA Token -> NFA Token
make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[Terminal t] -> newTransition q0 q1 (Just t) fa
[NonTerminal a] ->
case Map.lookup a ns of
-- a is recursive
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
case mrRec n of
-- the set Ni is right-recursive or cyclic
RightR ->
let new = [(getState c, xs, q1) | Rule c xs _ <- nrs]
++ [(getState c, xs, getState d) | Rule c ss _ <- rs,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
-- the set Ni is left-recursive
LeftR ->
let new = [(q0, xs, getState c) | Rule c xs _ <- nrs]
++ [(getState d, xs, getState c) | Rule c (NonTerminal d:xs) _ <- rs]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats ni fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
-- a is not recursive
Nothing -> let rs = catRules g a
in foldl' (\f (Rule _ b _) -> make_fa_ q0 b q1 f) fa rs
(x:beta) -> let (fa',q) = newState () fa
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
where
make_fa_ = make_fa c
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
--
-- * Compile a strongly regular grammar to a DFA with sub-automata
--
cfgToMFA :: CFG -> MFA
cfgToMFA = buildMFA . makeSimpleRegular
-- | Build a DFA by building and expanding an MFA
cfgToFA' :: CFG -> DFA Token
cfgToFA' = mfaToDFA . cfgToMFA
buildMFA :: CFG -> MFA
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
where fas = compileAutomata g
mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
mfaStartDFA :: MFA -> DFA CFSymbol
mfaStartDFA (MFA start subs) =
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
mfaToDFA :: MFA -> DFA Token
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
where
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
getSub l = fromJust $ Map.lookup l subs'
expand (FA (Graph c ns es) s f)
= foldl' expandEdge (FA (Graph c ns []) s f) es
expandEdge fa (f,t,x) =
case x of
Nothing -> newTransition f t Nothing fa
Just (Terminal s) -> newTransition f t (Just s) fa
Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
removeUnusedSubLats :: MFA -> MFA
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
where
usedMap = subLatUseMap mfa
used = growUsedSet (Set.singleton start)
isUsed c = c `Set.member` used
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
subLatUseMap :: MFA -> Map Cat (Set Cat)
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
usedSubLats :: DFA CFSymbol -> Set Cat
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
-- | Sort sub-networks topologically.
sortSubLats :: MFA -> MFA
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
where
usedByMap = revMultiMap (subLatUseMap mfa)
sortLats _ [] = []
sortLats ub ls = xs ++ sortLats ub' ys
where (xs,ys) = partition ((==0) . indeg) ls
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
-- | Convert a strongly regular grammar to a number of finite automata,
-- one for each non-terminal.
-- The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFG
-> [(Cat,NFA CFSymbol)]
-- ^ A map of non-terminals and their automata.
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
where
mrs = mutRecSets g $ mutRecCats True g
makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
where (fa,s,f) = newFA_
mr = fromJust (Map.lookup c mrs)
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
-- adapted to build a finite automaton for a single (mutually recursive) set only.
-- Categories not in the set will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
-- we are building the automaton.
-> State -- ^ State to come from
-> [CFSymbol] -- ^ Symbols to accept
-> State -- ^ State to end up in
-> NFA CFSymbol -- ^ FA to add to.
-> NFA CFSymbol
make_fa1 mr q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
[c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
[NonTerminal a] ->
case mrRec mr of
NotR -> -- the set is a non-recursive (always singleton) set of categories
-- so the set of category rules is the set of rules for the whole set
make_fas [(q0, b, q1) | Rule _ b _ <- mrNonRecRules mr] fa
RightR -> -- the set is right-recursive or cyclic
let new = [(getState c, xs, q1) | Rule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | Rule c ss _ <- mrRecRules mr,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR -> -- the set is left-recursive
let new = [(q0, xs, getState c) | Rule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | Rule c (NonTerminal d:xs) _ <- mrRecRules mr]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats (mrCats mr) fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
(x:beta) -> let (fa',q) = newState () fa
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
where
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g cs
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = cs,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrRec = rec
}
rec | null rrs = NotR
| all (isRightLinear cs) rrs = RightR
| otherwise = LeftR
--
-- * Utilities
--
-- | Add a state for the given NFA for each of the categories
-- in the given set. Returns a map of categories to their
-- corresponding states.
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns))
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
|