diff options
| author | bringert <bringert@cs.chalmers.se> | 2006-01-04 21:41:12 +0000 |
|---|---|---|
| committer | bringert <bringert@cs.chalmers.se> | 2006-01-04 21:41:12 +0000 |
| commit | a4ba93cc556dadc33ed95abd9baac0d29236bcfe (patch) | |
| tree | cef6d169c72484ec7e187859a905a9c9247ac5e8 /src/GF/Speech/CFGToFiniteState.hs | |
| parent | e22275d467fe78930d2510219a98283422a8a452 (diff) | |
Build SLF networks with sublattices.
Diffstat (limited to 'src/GF/Speech/CFGToFiniteState.hs')
| -rw-r--r-- | src/GF/Speech/CFGToFiniteState.hs | 172 |
1 files changed, 138 insertions, 34 deletions
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs index c12f13b39..855bc8091 100644 --- a/src/GF/Speech/CFGToFiniteState.hs +++ b/src/GF/Speech/CFGToFiniteState.hs @@ -12,9 +12,11 @@ -- Approximates CFGs with finite state networks. ----------------------------------------------------------------------------- -module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where +module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular, + MFALabel(..), MFA(..), cfgToMFA) where import Data.List +import Data.Maybe import Data.Map (Map) import qualified Data.Map as Map import Data.Set (Set) @@ -31,11 +33,13 @@ import GF.Speech.FiniteState import GF.Speech.Relation import GF.Speech.TransformCFG +data Recursivity = RightR | LeftR | NotR + data MutRecSet = MutRecSet { - mrCats :: [Cat_], + mrCats :: Set Cat_, mrNonRecRules :: [CFRule_], mrRecRules :: [CFRule_], - mrIsRightRec :: Bool + mrRec :: Recursivity } @@ -48,6 +52,10 @@ cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular makeSimpleRegular :: CGrammar -> CFRules makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules +-- +-- * Approximate context-free grammars with regular grammars. +-- + -- Use the transformation algorithm from \"Regular Approximation of Context-free -- Grammars through Approximation\", Mohri and Nederhof, 2000 -- to create an over-generating regular frammar for a context-free @@ -63,13 +71,15 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) where c' = newCat c makeRightLinearRules b' (CFRule c ss n) = case ys of - [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left - (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n - : makeRightLinearRules (newCat b) (CFRule c zs n) + [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left + (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n + ++ makeRightLinearRules (newCat b) (CFRule c zs n) where (xs,ys) = break (`catElem` cs) ss + -- don't add rules on the form A -> A + newRule c rhs n | rhs == [Cat c] = [] + | otherwise = [CFRule c rhs n] newCat c = c ++ "$" - -- | Get the sets of mutually recursive non-terminals for a grammar. mutRecCats :: Bool -- ^ If true, all categories will be in some set. -- If false, only recursive categories will be included. @@ -79,32 +89,19 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit allCats = map fst g refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation +-- +-- * Compile strongly regular grammars to NFAs +-- + -- Convert a strongly regular grammar to a finite automaton. compileAutomaton :: Cat_ -- ^ Start category -> CFRules -> NFA Token -compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa'' +compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa where - fa = newFA () - s = startState fa - (fa',f) = newState () fa - fa'' = addFinalState f fa' + (fa,s,f) = newFA_ ns = mutRecSets g $ mutRecCats False g -mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets -mutRecSets g = Map.fromList . concatMap mkMutRecSet - where - mkMutRecSet cs = [ (c,ms) | c <- csl ] - where csl = Set.toList cs - rs = catSetRules g csl - (nrs,rrs) = partition (ruleIsNonRecursive cs) rs - ms = MutRecSet { - mrCats = csl, - mrNonRecRules = nrs, - mrRecRules = rrs, - mrIsRightRec = all (isRightLinear cs) rrs - } - -- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", -- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State @@ -116,14 +113,14 @@ make_fa c@(g,ns) q0 alpha q1 fa = [Cat a] -> case Map.lookup a ns of -- a is recursive Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> - if mrIsRightRec n - then + case mrRec n of + RightR -> -- the set Ni is right-recursive or cyclic let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, let (xs,Cat d) = (init ss,last ss)] in make_fas new $ newTransition q0 (getState a) Nothing fa' - else + LeftR -> -- the set Ni is left-recursive let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs] @@ -143,16 +140,123 @@ make_fa c@(g,ns) q0 alpha q1 fa = make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs -addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, Map Cat_ State) +-- +-- * Multiple DFA type +-- + +data MFALabel a = MFASym a | MFASub String + deriving Eq + +data MFA a = MFA (DFA (MFALabel a)) [(String,DFA (MFALabel a))] + +-- +-- * Compile strongly regular grammars to multiple DFAs +-- + +cfgToMFA :: Options -> CGrammar -> MFA String +cfgToMFA opts g = MFA startFA [(c, toMFA (minimize fa)) | (c,fa) <- fas] + where start = getStartCat opts + startFA = let (fa,s,f) = newFA_ + in newTransition s f (MFASub start) fa + fas = compileAutomata $ makeSimpleRegular g + mkMFALabel (Cat c) = MFASub c + mkMFALabel (Tok t) = MFASym t + toMFA = mapTransitions mkMFALabel + +-- | Convert a strongly regular grammar to a number of finite automata, +-- one for each non-terminal. +-- The edges in the automata accept tokens, or name another automaton to use. +compileAutomata :: CFRules + -> [(Cat_,NFA (Symbol Cat_ Token))] + -- ^ A map of non-terminals and their automata. +compileAutomata g = [(c, makeOneFA c) | c <- allCats g] + where + mrs = mutRecSets g $ mutRecCats True g + makeOneFA c = make_fa1 mr s [Cat c] f fa + where (fa,s,f) = newFA_ + mr = fromJust (Map.lookup c mrs) + + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997, +-- adapted to build a finite automaton for a single (mutually recursive) set only. +-- Categories not in the set (fromJustMap.lookup c mrs)will result in category-labelled edges. +make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which + -- we are building the automaton. + -> State -- ^ State to come from + -> [Symbol Cat_ Token] -- ^ Symbols to accept + -> State -- ^ State to end up in + -> NFA (Symbol Cat_ Token) -- ^ FA to add to. + -> NFA (Symbol Cat_ Token) +make_fa1 mr q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [t@(Tok _)] -> newTransition q0 q1 (Just t) fa + [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa + [Cat a] -> + case mrRec mr of + NotR -> -- the set is a non-recursive (always singleton) set of categories + -- so the set of category rules is the set of rules for the whole set + make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa + RightR -> -- the set is right-recursive or cyclic + let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, + let (xs,Cat d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + LeftR -> -- the set is left-recursive + let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats (mrCats mr) fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + (x:beta) -> let (fa',q) = newState () fa + in make_fas [(q0,[x],q),(q,beta,q1)] fa' + where + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs + +mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets +mutRecSets g = Map.fromList . concatMap mkMutRecSet + where + mkMutRecSet cs = [ (c,ms) | c <- csl ] + where csl = Set.toList cs + rs = catSetRules g csl + (nrs,rrs) = partition (ruleIsNonRecursive cs) rs + ms = MutRecSet { + mrCats = cs, + mrNonRecRules = nrs, + mrRecRules = rrs, + mrRec = rec + } + rec | null rrs = NotR + | all (isRightLinear cs) rrs = RightR + | otherwise = LeftR + +-- +-- * Utilities +-- + +-- | Create a new finite automaton with an initial and a final state. +newFA_ :: Enum n => (FA n () b, n, n) +newFA_ = (fa'', s, f) + where fa = newFA () + s = startState fa + (fa',f) = newState () fa + fa'' = addFinalState f fa' + +-- | Add a state for the given NFA for each of the categories +-- in the given set. Returns a map of categories to their +-- corresponding states. +addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State) addStatesForCats cs fa = (fa', m) - where (fa', ns) = newStates (replicate (length cs) ()) fa - m = Map.fromList (zip cs (map fst ns)) + where (fa', ns) = newStates (replicate (Set.size cs) ()) fa + m = Map.fromList (zip (Set.toList cs) (map fst ns)) ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs - - noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool noCatsInSet cs = not . any (`catElem` cs) |
