summaryrefslogtreecommitdiff
path: root/src/GF/Speech/CFGToFiniteState.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
commitb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch)
tree0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/Speech/CFGToFiniteState.hs
parentfe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff)
removed src for 2.9
Diffstat (limited to 'src/GF/Speech/CFGToFiniteState.hs')
-rw-r--r--src/GF/Speech/CFGToFiniteState.hs265
1 files changed, 0 insertions, 265 deletions
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs
deleted file mode 100644
index 7e6f80ba1..000000000
--- a/src/GF/Speech/CFGToFiniteState.hs
+++ /dev/null
@@ -1,265 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : CFGToFiniteState
--- Maintainer : BB
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/11/10 16:43:44 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.6 $
---
--- Approximates CFGs with finite state networks.
------------------------------------------------------------------------------
-
-module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
- MFA(..), MFALabel, cfgToMFA,cfgToFA') where
-
-import Data.List
-import Data.Maybe
-import Data.Map (Map)
-import qualified Data.Map as Map
-import Data.Set (Set)
-import qualified Data.Set as Set
-
-import GF.Data.Utilities
-import GF.Formalism.CFG
-import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
-import GF.Conversion.Types
-import GF.Infra.Ident (Ident)
-import GF.Infra.Option (Options)
-import GF.Compile.ShellState (StateGrammar)
-
-import GF.Speech.FiniteState
-import GF.Speech.Graph
-import GF.Speech.Relation
-import GF.Speech.TransformCFG
-
-data Recursivity = RightR | LeftR | NotR
-
-data MutRecSet = MutRecSet {
- mrCats :: Set Cat_,
- mrNonRecRules :: [CFRule_],
- mrRecRules :: [CFRule_],
- mrRec :: Recursivity
- }
-
-
-type MutRecSets = Map Cat_ MutRecSet
-
---
--- * Multiple DFA type
---
-
-type MFALabel a = Symbol String a
-
-data MFA a = MFA String [(String,DFA (MFALabel a))]
-
-
-
-cfgToFA :: Options -> StateGrammar -> DFA Token
-cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s
- where start = getStartCatCF opts s
-
-makeSimpleRegular :: Options -> StateGrammar -> CFRules
-makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s
- where start = getStartCatCF opts s
- preprocess = topDownFilter start . bottomUpFilter
- . removeCycles
-
-
---
--- * Compile strongly regular grammars to NFAs
---
-
--- Convert a strongly regular grammar to a finite automaton.
-compileAutomaton :: Cat_ -- ^ Start category
- -> CFRules
- -> NFA Token
-compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
- where
- (fa,s,f) = newFA_
- ns = mutRecSets g $ mutRecCats False g
-
--- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
-make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
- -> NFA Token -> NFA Token
-make_fa c@(g,ns) q0 alpha q1 fa =
- case alpha of
- [] -> newTransition q0 q1 Nothing fa
- [Tok t] -> newTransition q0 q1 (Just t) fa
- [Cat a] -> case Map.lookup a ns of
- -- a is recursive
- Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
- case mrRec n of
- RightR ->
- -- the set Ni is right-recursive or cyclic
- let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
- ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
- let (xs,Cat d) = (init ss,last ss)]
- in make_fas new $ newTransition q0 (getState a) Nothing fa'
- LeftR ->
- -- the set Ni is left-recursive
- let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
- ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
- in make_fas new $ newTransition (getState a) q1 Nothing fa'
- where
- (fa',stateMap) = addStatesForCats ni fa
- getState x = Map.findWithDefault
- (error $ "CFGToFiniteState: No state for " ++ x)
- x stateMap
- -- a is not recursive
- Nothing -> let rs = catRules g a
- in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
- (x:beta) -> let (fa',q) = newState () fa
- in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
- where
- make_fa_ = make_fa c
- make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
-
---
--- * Compile a strongly regular grammar to a DFA with sub-automata
---
-
-cfgToMFA :: Options -> StateGrammar -> MFA Token
-cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s
- where start = getStartCatCF opts s
-
--- | Build a DFA by building and expanding an MFA
-cfgToFA' :: Options -> StateGrammar -> DFA Token
-cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s
-
-buildMFA :: Cat_ -- ^ Start category
- -> CFRules -> MFA Token
-buildMFA start g = sortSubLats $ removeUnusedSubLats mfa
- where fas = compileAutomata g
- mfa = MFA start [(c, minimize fa) | (c,fa) <- fas]
-
-mfaStartDFA :: MFA a -> DFA (MFALabel a)
-mfaStartDFA (MFA start subs) =
- fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
-
-mfaToDFA :: Ord a => MFA a -> DFA a
-mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
- where
- subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
- getSub l = fromJust $ Map.lookup l subs'
- expand (FA (Graph c ns es) s f)
- = foldl' expandEdge (FA (Graph c ns []) s f) es
- expandEdge fa (f,t,x) =
- case x of
- Nothing -> newTransition f t Nothing fa
- Just (Tok s) -> newTransition f t (Just s) fa
- Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l)
-
-removeUnusedSubLats :: MFA a -> MFA a
-removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
- where
- usedMap = subLatUseMap mfa
- used = growUsedSet (Set.singleton start)
- isUsed c = c `Set.member` used
- growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
-
-subLatUseMap :: MFA a -> Map String (Set String)
-subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
-
-usedSubLats :: DFA (MFALabel a) -> Set String
-usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa]
-
-revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
-revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
-
--- | Sort sub-networks topologically.
-sortSubLats :: MFA a -> MFA a
-sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
- where
- usedByMap = revMultiMap (subLatUseMap mfa)
- sortLats _ [] = []
- sortLats ub ls = xs ++ sortLats ub' ys
- where (xs,ys) = partition ((==0) . indeg) ls
- ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
- indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
-
--- | Convert a strongly regular grammar to a number of finite automata,
--- one for each non-terminal.
--- The edges in the automata accept tokens, or name another automaton to use.
-compileAutomata :: CFRules
- -> [(Cat_,NFA (Symbol Cat_ Token))]
- -- ^ A map of non-terminals and their automata.
-compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
- where
- mrs = mutRecSets g $ mutRecCats True g
- makeOneFA c = make_fa1 mr s [Cat c] f fa
- where (fa,s,f) = newFA_
- mr = fromJust (Map.lookup c mrs)
-
-
--- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
--- adapted to build a finite automaton for a single (mutually recursive) set only.
--- Categories not in the set will result in category-labelled edges.
-make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
- -- we are building the automaton.
- -> State -- ^ State to come from
- -> [Symbol Cat_ Token] -- ^ Symbols to accept
- -> State -- ^ State to end up in
- -> NFA (Symbol Cat_ Token) -- ^ FA to add to.
- -> NFA (Symbol Cat_ Token)
-make_fa1 mr q0 alpha q1 fa =
- case alpha of
- [] -> newTransition q0 q1 Nothing fa
- [t@(Tok _)] -> newTransition q0 q1 (Just t) fa
- [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
- [Cat a] ->
- case mrRec mr of
- NotR -> -- the set is a non-recursive (always singleton) set of categories
- -- so the set of category rules is the set of rules for the whole set
- make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
- RightR -> -- the set is right-recursive or cyclic
- let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
- ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
- let (xs,Cat d) = (init ss,last ss)]
- in make_fas new $ newTransition q0 (getState a) Nothing fa'
- LeftR -> -- the set is left-recursive
- let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
- ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
- in make_fas new $ newTransition (getState a) q1 Nothing fa'
- where
- (fa',stateMap) = addStatesForCats (mrCats mr) fa
- getState x = Map.findWithDefault
- (error $ "CFGToFiniteState: No state for " ++ x)
- x stateMap
- (x:beta) -> let (fa',q) = newState () fa
- in make_fas [(q0,[x],q),(q,beta,q1)] fa'
- where
- make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
-
-mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
-mutRecSets g = Map.fromList . concatMap mkMutRecSet
- where
- mkMutRecSet cs = [ (c,ms) | c <- csl ]
- where csl = Set.toList cs
- rs = catSetRules g cs
- (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
- ms = MutRecSet {
- mrCats = cs,
- mrNonRecRules = nrs,
- mrRecRules = rrs,
- mrRec = rec
- }
- rec | null rrs = NotR
- | all (isRightLinear cs) rrs = RightR
- | otherwise = LeftR
-
---
--- * Utilities
---
-
--- | Add a state for the given NFA for each of the categories
--- in the given set. Returns a map of categories to their
--- corresponding states.
-addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
-addStatesForCats cs fa = (fa', m)
- where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
- m = Map.fromList (zip (Set.toList cs) (map fst ns))