summaryrefslogtreecommitdiff
path: root/src/GF/Speech/CFGToFiniteState.hs
blob: 5a72f548a89fd1496601ea8996b43dfb43ccc5a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
----------------------------------------------------------------------
-- |
-- Module      : CFGToFiniteState
-- Maintainer  : BB
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- Approximates CFGs with finite state networks.
-----------------------------------------------------------------------------

module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where

import Data.List

import GF.Data.Utilities
import GF.Formalism.CFG 
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Conversion.Types
import GF.Infra.Ident (Ident)
import GF.Infra.Option (Options)

import GF.Speech.FiniteState
import GF.Speech.Relation
import GF.Speech.TransformCFG

cfgToFA :: Ident -- ^ Grammar name
	-> Options -> CGrammar -> DFA String
cfgToFA name opts = minimize . compileAutomaton start . makeSimpleRegular
  where start = getStartCat opts

makeSimpleRegular :: CGrammar -> CFRules
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules

-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free 
-- grammar
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
  where trSet cs | allXLinear cs rs = rs
		 | otherwise = concatMap handleCat cs
	    where rs = catSetRules g cs
		  handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
				++ concatMap (makeRightLinearRules c) (catRules g c)
		      where c' = newCat c
		  makeRightLinearRules b' (CFRule c ss n) = 
		      case ys of
			      [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
			      (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n 
					: makeRightLinearRules (newCat b) (CFRule c zs n)
		      where (xs,ys) = break (`catElem` cs) ss
	newCat c = c ++ "$"


-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool    -- ^ If true, all categories will be in some set.
                      --   If false, only recursive categories will be included.
	   -> CFRules -> [[Cat_]]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
  where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
	allCats = map fst g
	refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation

-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: Cat_ -- ^ Start category
		 -> CFRules
		 -> NFA Token
compileAutomaton start g = make_fa s [Cat start] f fa''
  where fa = newFA ()
	s = startState fa
	(fa',f) = newState () fa
	fa'' = addFinalState f fa'
	ns = mutRecCats False g
	-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
	--   Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
	make_fa :: State -> [Symbol Cat_ Token] -> State 
		-> NFA Token -> NFA Token
	make_fa q0 alpha q1 fa = 
	    case alpha of
		   []       -> newTransition q0 q1 Nothing fa
		   [Tok t]  -> newTransition q0 q1 (Just t) fa
		   [Cat a]  -> case findSet a ns of
			        -- a is recursive
				Just ni -> let (fa',ss) = addStatesForCats ni fa
					       getState x = lookup' x ss
					       niRules = catSetRules g ni
					       (nrs,rs) = partition (ruleIsNonRecursive ni) niRules
					       in if all (isRightLinear ni) niRules then
						    -- the set Ni is right-recursive or cyclic
						    let fa''  = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa'
							fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs, 
									  let (xs,Cat d) = (init ss,last ss)] fa''
							in newTransition q0 (getState a) Nothing fa'''
						  else
						    -- the set Ni is left-recursive
						    let fa''  = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa'
							fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa''
							in newTransition (getState a) q1 Nothing fa'''
				-- a is not recursive
				Nothing -> let rs = catRules g a
					       in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs
		   (x:beta) -> let (fa',q) = newState () fa
				in make_fa q beta q1 $ make_fa q0 [x] q fa'
	addStatesForCats [] fa = (fa,[])
	addStatesForCats (c:cs) fa = let (fa',s) = newState () fa
					 (fa'',ss) = addStatesForCats cs fa'
					 in (fa'',(c,s):ss)
	ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs


noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)

-- | Check if all the rules are right-linear, or all the rules are
--   left-linear, with respect to given categories.
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs

-- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c]  -- ^ The categories to consider
	      -> CFRule c n t -- ^ The rule to check for right-linearity
	      -> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs

-- | Checks if a context-free rule is left-linear.
isLeftLinear ::  Eq c => [c]  -- ^ The categories to consider
	      -> CFRule c n t -- ^ The rule to check for right-linearity
	      -> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs