diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
| commit | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch) | |
| tree | 0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/Speech/CFGToFiniteState.hs | |
| parent | fe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff) | |
removed src for 2.9
Diffstat (limited to 'src/GF/Speech/CFGToFiniteState.hs')
| -rw-r--r-- | src/GF/Speech/CFGToFiniteState.hs | 265 |
1 files changed, 0 insertions, 265 deletions
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs deleted file mode 100644 index 7e6f80ba1..000000000 --- a/src/GF/Speech/CFGToFiniteState.hs +++ /dev/null @@ -1,265 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : CFGToFiniteState --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/10 16:43:44 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.6 $ --- --- Approximates CFGs with finite state networks. ------------------------------------------------------------------------------ - -module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular, - MFA(..), MFALabel, cfgToMFA,cfgToFA') where - -import Data.List -import Data.Maybe -import Data.Map (Map) -import qualified Data.Map as Map -import Data.Set (Set) -import qualified Data.Set as Set - -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..)) -import GF.Conversion.Types -import GF.Infra.Ident (Ident) -import GF.Infra.Option (Options) -import GF.Compile.ShellState (StateGrammar) - -import GF.Speech.FiniteState -import GF.Speech.Graph -import GF.Speech.Relation -import GF.Speech.TransformCFG - -data Recursivity = RightR | LeftR | NotR - -data MutRecSet = MutRecSet { - mrCats :: Set Cat_, - mrNonRecRules :: [CFRule_], - mrRecRules :: [CFRule_], - mrRec :: Recursivity - } - - -type MutRecSets = Map Cat_ MutRecSet - --- --- * Multiple DFA type --- - -type MFALabel a = Symbol String a - -data MFA a = MFA String [(String,DFA (MFALabel a))] - - - -cfgToFA :: Options -> StateGrammar -> DFA Token -cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s - where start = getStartCatCF opts s - -makeSimpleRegular :: Options -> StateGrammar -> CFRules -makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s - where start = getStartCatCF opts s - preprocess = topDownFilter start . bottomUpFilter - . removeCycles - - --- --- * Compile strongly regular grammars to NFAs --- - --- Convert a strongly regular grammar to a finite automaton. -compileAutomaton :: Cat_ -- ^ Start category - -> CFRules - -> NFA Token -compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa - where - (fa,s,f) = newFA_ - ns = mutRecSets g $ mutRecCats False g - --- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", --- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000. -make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State - -> NFA Token -> NFA Token -make_fa c@(g,ns) q0 alpha q1 fa = - case alpha of - [] -> newTransition q0 q1 Nothing fa - [Tok t] -> newTransition q0 q1 (Just t) fa - [Cat a] -> case Map.lookup a ns of - -- a is recursive - Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> - case mrRec n of - RightR -> - -- the set Ni is right-recursive or cyclic - let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] - ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, - let (xs,Cat d) = (init ss,last ss)] - in make_fas new $ newTransition q0 (getState a) Nothing fa' - LeftR -> - -- the set Ni is left-recursive - let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] - ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs] - in make_fas new $ newTransition (getState a) q1 Nothing fa' - where - (fa',stateMap) = addStatesForCats ni fa - getState x = Map.findWithDefault - (error $ "CFGToFiniteState: No state for " ++ x) - x stateMap - -- a is not recursive - Nothing -> let rs = catRules g a - in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs - (x:beta) -> let (fa',q) = newState () fa - in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa' - where - make_fa_ = make_fa c - make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs - --- --- * Compile a strongly regular grammar to a DFA with sub-automata --- - -cfgToMFA :: Options -> StateGrammar -> MFA Token -cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s - where start = getStartCatCF opts s - --- | Build a DFA by building and expanding an MFA -cfgToFA' :: Options -> StateGrammar -> DFA Token -cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s - -buildMFA :: Cat_ -- ^ Start category - -> CFRules -> MFA Token -buildMFA start g = sortSubLats $ removeUnusedSubLats mfa - where fas = compileAutomata g - mfa = MFA start [(c, minimize fa) | (c,fa) <- fas] - -mfaStartDFA :: MFA a -> DFA (MFALabel a) -mfaStartDFA (MFA start subs) = - fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs - -mfaToDFA :: Ord a => MFA a -> DFA a -mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa - where - subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs] - getSub l = fromJust $ Map.lookup l subs' - expand (FA (Graph c ns es) s f) - = foldl' expandEdge (FA (Graph c ns []) s f) es - expandEdge fa (f,t,x) = - case x of - Nothing -> newTransition f t Nothing fa - Just (Tok s) -> newTransition f t (Just s) fa - Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l) - -removeUnusedSubLats :: MFA a -> MFA a -removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c] - where - usedMap = subLatUseMap mfa - used = growUsedSet (Set.singleton start) - isUsed c = c `Set.member` used - growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s) - -subLatUseMap :: MFA a -> Map String (Set String) -subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs] - -usedSubLats :: DFA (MFALabel a) -> Set String -usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa] - -revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a) -revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s] - --- | Sort sub-networks topologically. -sortSubLats :: MFA a -> MFA a -sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs) - where - usedByMap = revMultiMap (subLatUseMap mfa) - sortLats _ [] = [] - sortLats ub ls = xs ++ sortLats ub' ys - where (xs,ys) = partition ((==0) . indeg) ls - ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub - indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub - --- | Convert a strongly regular grammar to a number of finite automata, --- one for each non-terminal. --- The edges in the automata accept tokens, or name another automaton to use. -compileAutomata :: CFRules - -> [(Cat_,NFA (Symbol Cat_ Token))] - -- ^ A map of non-terminals and their automata. -compileAutomata g = [(c, makeOneFA c) | c <- allCats g] - where - mrs = mutRecSets g $ mutRecCats True g - makeOneFA c = make_fa1 mr s [Cat c] f fa - where (fa,s,f) = newFA_ - mr = fromJust (Map.lookup c mrs) - - --- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", --- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000, --- adapted to build a finite automaton for a single (mutually recursive) set only. --- Categories not in the set will result in category-labelled edges. -make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which - -- we are building the automaton. - -> State -- ^ State to come from - -> [Symbol Cat_ Token] -- ^ Symbols to accept - -> State -- ^ State to end up in - -> NFA (Symbol Cat_ Token) -- ^ FA to add to. - -> NFA (Symbol Cat_ Token) -make_fa1 mr q0 alpha q1 fa = - case alpha of - [] -> newTransition q0 q1 Nothing fa - [t@(Tok _)] -> newTransition q0 q1 (Just t) fa - [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa - [Cat a] -> - case mrRec mr of - NotR -> -- the set is a non-recursive (always singleton) set of categories - -- so the set of category rules is the set of rules for the whole set - make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa - RightR -> -- the set is right-recursive or cyclic - let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] - ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, - let (xs,Cat d) = (init ss,last ss)] - in make_fas new $ newTransition q0 (getState a) Nothing fa' - LeftR -> -- the set is left-recursive - let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] - ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr] - in make_fas new $ newTransition (getState a) q1 Nothing fa' - where - (fa',stateMap) = addStatesForCats (mrCats mr) fa - getState x = Map.findWithDefault - (error $ "CFGToFiniteState: No state for " ++ x) - x stateMap - (x:beta) -> let (fa',q) = newState () fa - in make_fas [(q0,[x],q),(q,beta,q1)] fa' - where - make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs - -mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets -mutRecSets g = Map.fromList . concatMap mkMutRecSet - where - mkMutRecSet cs = [ (c,ms) | c <- csl ] - where csl = Set.toList cs - rs = catSetRules g cs - (nrs,rrs) = partition (ruleIsNonRecursive cs) rs - ms = MutRecSet { - mrCats = cs, - mrNonRecRules = nrs, - mrRecRules = rrs, - mrRec = rec - } - rec | null rrs = NotR - | all (isRightLinear cs) rrs = RightR - | otherwise = LeftR - --- --- * Utilities --- - --- | Add a state for the given NFA for each of the categories --- in the given set. Returns a map of categories to their --- corresponding states. -addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State) -addStatesForCats cs fa = (fa', m) - where (fa', ns) = newStates (replicate (Set.size cs) ()) fa - m = Map.fromList (zip (Set.toList cs) (map fst ns)) |
