diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Speech/CFGToFiniteState.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Speech/CFGToFiniteState.hs')
| -rw-r--r-- | src-3.0/GF/Speech/CFGToFiniteState.hs | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/src-3.0/GF/Speech/CFGToFiniteState.hs b/src-3.0/GF/Speech/CFGToFiniteState.hs new file mode 100644 index 000000000..7e6f80ba1 --- /dev/null +++ b/src-3.0/GF/Speech/CFGToFiniteState.hs @@ -0,0 +1,265 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFGToFiniteState +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/10 16:43:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- Approximates CFGs with finite state networks. +----------------------------------------------------------------------------- + +module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular, + MFA(..), MFALabel, cfgToMFA,cfgToFA') where + +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import GF.Data.Utilities +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..)) +import GF.Conversion.Types +import GF.Infra.Ident (Ident) +import GF.Infra.Option (Options) +import GF.Compile.ShellState (StateGrammar) + +import GF.Speech.FiniteState +import GF.Speech.Graph +import GF.Speech.Relation +import GF.Speech.TransformCFG + +data Recursivity = RightR | LeftR | NotR + +data MutRecSet = MutRecSet { + mrCats :: Set Cat_, + mrNonRecRules :: [CFRule_], + mrRecRules :: [CFRule_], + mrRec :: Recursivity + } + + +type MutRecSets = Map Cat_ MutRecSet + +-- +-- * Multiple DFA type +-- + +type MFALabel a = Symbol String a + +data MFA a = MFA String [(String,DFA (MFALabel a))] + + + +cfgToFA :: Options -> StateGrammar -> DFA Token +cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s + where start = getStartCatCF opts s + +makeSimpleRegular :: Options -> StateGrammar -> CFRules +makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s + where start = getStartCatCF opts s + preprocess = topDownFilter start . bottomUpFilter + . removeCycles + + +-- +-- * Compile strongly regular grammars to NFAs +-- + +-- Convert a strongly regular grammar to a finite automaton. +compileAutomaton :: Cat_ -- ^ Start category + -> CFRules + -> NFA Token +compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa + where + (fa,s,f) = newFA_ + ns = mutRecSets g $ mutRecCats False g + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000. +make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State + -> NFA Token -> NFA Token +make_fa c@(g,ns) q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [Tok t] -> newTransition q0 q1 (Just t) fa + [Cat a] -> case Map.lookup a ns of + -- a is recursive + Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> + case mrRec n of + RightR -> + -- the set Ni is right-recursive or cyclic + let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, + let (xs,Cat d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + LeftR -> + -- the set Ni is left-recursive + let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] + ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats ni fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + -- a is not recursive + Nothing -> let rs = catRules g a + in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs + (x:beta) -> let (fa',q) = newState () fa + in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa' + where + make_fa_ = make_fa c + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs + +-- +-- * Compile a strongly regular grammar to a DFA with sub-automata +-- + +cfgToMFA :: Options -> StateGrammar -> MFA Token +cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s + where start = getStartCatCF opts s + +-- | Build a DFA by building and expanding an MFA +cfgToFA' :: Options -> StateGrammar -> DFA Token +cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s + +buildMFA :: Cat_ -- ^ Start category + -> CFRules -> MFA Token +buildMFA start g = sortSubLats $ removeUnusedSubLats mfa + where fas = compileAutomata g + mfa = MFA start [(c, minimize fa) | (c,fa) <- fas] + +mfaStartDFA :: MFA a -> DFA (MFALabel a) +mfaStartDFA (MFA start subs) = + fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs + +mfaToDFA :: Ord a => MFA a -> DFA a +mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa + where + subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs] + getSub l = fromJust $ Map.lookup l subs' + expand (FA (Graph c ns es) s f) + = foldl' expandEdge (FA (Graph c ns []) s f) es + expandEdge fa (f,t,x) = + case x of + Nothing -> newTransition f t Nothing fa + Just (Tok s) -> newTransition f t (Just s) fa + Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l) + +removeUnusedSubLats :: MFA a -> MFA a +removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c] + where + usedMap = subLatUseMap mfa + used = growUsedSet (Set.singleton start) + isUsed c = c `Set.member` used + growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s) + +subLatUseMap :: MFA a -> Map String (Set String) +subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs] + +usedSubLats :: DFA (MFALabel a) -> Set String +usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa] + +revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a) +revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s] + +-- | Sort sub-networks topologically. +sortSubLats :: MFA a -> MFA a +sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs) + where + usedByMap = revMultiMap (subLatUseMap mfa) + sortLats _ [] = [] + sortLats ub ls = xs ++ sortLats ub' ys + where (xs,ys) = partition ((==0) . indeg) ls + ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub + indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub + +-- | Convert a strongly regular grammar to a number of finite automata, +-- one for each non-terminal. +-- The edges in the automata accept tokens, or name another automaton to use. +compileAutomata :: CFRules + -> [(Cat_,NFA (Symbol Cat_ Token))] + -- ^ A map of non-terminals and their automata. +compileAutomata g = [(c, makeOneFA c) | c <- allCats g] + where + mrs = mutRecSets g $ mutRecCats True g + makeOneFA c = make_fa1 mr s [Cat c] f fa + where (fa,s,f) = newFA_ + mr = fromJust (Map.lookup c mrs) + + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000, +-- adapted to build a finite automaton for a single (mutually recursive) set only. +-- Categories not in the set will result in category-labelled edges. +make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which + -- we are building the automaton. + -> State -- ^ State to come from + -> [Symbol Cat_ Token] -- ^ Symbols to accept + -> State -- ^ State to end up in + -> NFA (Symbol Cat_ Token) -- ^ FA to add to. + -> NFA (Symbol Cat_ Token) +make_fa1 mr q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [t@(Tok _)] -> newTransition q0 q1 (Just t) fa + [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa + [Cat a] -> + case mrRec mr of + NotR -> -- the set is a non-recursive (always singleton) set of categories + -- so the set of category rules is the set of rules for the whole set + make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa + RightR -> -- the set is right-recursive or cyclic + let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, + let (xs,Cat d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + LeftR -> -- the set is left-recursive + let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats (mrCats mr) fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + (x:beta) -> let (fa',q) = newState () fa + in make_fas [(q0,[x],q),(q,beta,q1)] fa' + where + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs + +mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets +mutRecSets g = Map.fromList . concatMap mkMutRecSet + where + mkMutRecSet cs = [ (c,ms) | c <- csl ] + where csl = Set.toList cs + rs = catSetRules g cs + (nrs,rrs) = partition (ruleIsNonRecursive cs) rs + ms = MutRecSet { + mrCats = cs, + mrNonRecRules = nrs, + mrRecRules = rrs, + mrRec = rec + } + rec | null rrs = NotR + | all (isRightLinear cs) rrs = RightR + | otherwise = LeftR + +-- +-- * Utilities +-- + +-- | Add a state for the given NFA for each of the categories +-- in the given set. Returns a map of categories to their +-- corresponding states. +addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State) +addStatesForCats cs fa = (fa', m) + where (fa', ns) = newStates (replicate (Set.size cs) ()) fa + m = Map.fromList (zip (Set.toList cs) (map fst ns)) |
