summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Speech/CFGToFiniteState.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Speech/CFGToFiniteState.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Speech/CFGToFiniteState.hs')
-rw-r--r--src-3.0/GF/Speech/CFGToFiniteState.hs265
1 files changed, 265 insertions, 0 deletions
diff --git a/src-3.0/GF/Speech/CFGToFiniteState.hs b/src-3.0/GF/Speech/CFGToFiniteState.hs
new file mode 100644
index 000000000..7e6f80ba1
--- /dev/null
+++ b/src-3.0/GF/Speech/CFGToFiniteState.hs
@@ -0,0 +1,265 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CFGToFiniteState
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.6 $
+--
+-- Approximates CFGs with finite state networks.
+-----------------------------------------------------------------------------
+
+module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
+ MFA(..), MFALabel, cfgToMFA,cfgToFA') where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
+import GF.Conversion.Types
+import GF.Infra.Ident (Ident)
+import GF.Infra.Option (Options)
+import GF.Compile.ShellState (StateGrammar)
+
+import GF.Speech.FiniteState
+import GF.Speech.Graph
+import GF.Speech.Relation
+import GF.Speech.TransformCFG
+
+data Recursivity = RightR | LeftR | NotR
+
+data MutRecSet = MutRecSet {
+ mrCats :: Set Cat_,
+ mrNonRecRules :: [CFRule_],
+ mrRecRules :: [CFRule_],
+ mrRec :: Recursivity
+ }
+
+
+type MutRecSets = Map Cat_ MutRecSet
+
+--
+-- * Multiple DFA type
+--
+
+type MFALabel a = Symbol String a
+
+data MFA a = MFA String [(String,DFA (MFALabel a))]
+
+
+
+cfgToFA :: Options -> StateGrammar -> DFA Token
+cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s
+ where start = getStartCatCF opts s
+
+makeSimpleRegular :: Options -> StateGrammar -> CFRules
+makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s
+ where start = getStartCatCF opts s
+ preprocess = topDownFilter start . bottomUpFilter
+ . removeCycles
+
+
+--
+-- * Compile strongly regular grammars to NFAs
+--
+
+-- Convert a strongly regular grammar to a finite automaton.
+compileAutomaton :: Cat_ -- ^ Start category
+ -> CFRules
+ -> NFA Token
+compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
+ where
+ (fa,s,f) = newFA_
+ ns = mutRecSets g $ mutRecCats False g
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
+make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
+ -> NFA Token -> NFA Token
+make_fa c@(g,ns) q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [Tok t] -> newTransition q0 q1 (Just t) fa
+ [Cat a] -> case Map.lookup a ns of
+ -- a is recursive
+ Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
+ case mrRec n of
+ RightR ->
+ -- the set Ni is right-recursive or cyclic
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
+ let (xs,Cat d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ LeftR ->
+ -- the set Ni is left-recursive
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
+ ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats ni fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ -- a is not recursive
+ Nothing -> let rs = catRules g a
+ in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
+ where
+ make_fa_ = make_fa c
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
+
+--
+-- * Compile a strongly regular grammar to a DFA with sub-automata
+--
+
+cfgToMFA :: Options -> StateGrammar -> MFA Token
+cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s
+ where start = getStartCatCF opts s
+
+-- | Build a DFA by building and expanding an MFA
+cfgToFA' :: Options -> StateGrammar -> DFA Token
+cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s
+
+buildMFA :: Cat_ -- ^ Start category
+ -> CFRules -> MFA Token
+buildMFA start g = sortSubLats $ removeUnusedSubLats mfa
+ where fas = compileAutomata g
+ mfa = MFA start [(c, minimize fa) | (c,fa) <- fas]
+
+mfaStartDFA :: MFA a -> DFA (MFALabel a)
+mfaStartDFA (MFA start subs) =
+ fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
+
+mfaToDFA :: Ord a => MFA a -> DFA a
+mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
+ where
+ subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
+ getSub l = fromJust $ Map.lookup l subs'
+ expand (FA (Graph c ns es) s f)
+ = foldl' expandEdge (FA (Graph c ns []) s f) es
+ expandEdge fa (f,t,x) =
+ case x of
+ Nothing -> newTransition f t Nothing fa
+ Just (Tok s) -> newTransition f t (Just s) fa
+ Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l)
+
+removeUnusedSubLats :: MFA a -> MFA a
+removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
+ where
+ usedMap = subLatUseMap mfa
+ used = growUsedSet (Set.singleton start)
+ isUsed c = c `Set.member` used
+ growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
+
+subLatUseMap :: MFA a -> Map String (Set String)
+subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
+
+usedSubLats :: DFA (MFALabel a) -> Set String
+usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa]
+
+revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
+revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
+
+-- | Sort sub-networks topologically.
+sortSubLats :: MFA a -> MFA a
+sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
+ where
+ usedByMap = revMultiMap (subLatUseMap mfa)
+ sortLats _ [] = []
+ sortLats ub ls = xs ++ sortLats ub' ys
+ where (xs,ys) = partition ((==0) . indeg) ls
+ ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
+ indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
+
+-- | Convert a strongly regular grammar to a number of finite automata,
+-- one for each non-terminal.
+-- The edges in the automata accept tokens, or name another automaton to use.
+compileAutomata :: CFRules
+ -> [(Cat_,NFA (Symbol Cat_ Token))]
+ -- ^ A map of non-terminals and their automata.
+compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
+ where
+ mrs = mutRecSets g $ mutRecCats True g
+ makeOneFA c = make_fa1 mr s [Cat c] f fa
+ where (fa,s,f) = newFA_
+ mr = fromJust (Map.lookup c mrs)
+
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
+-- adapted to build a finite automaton for a single (mutually recursive) set only.
+-- Categories not in the set will result in category-labelled edges.
+make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
+ -- we are building the automaton.
+ -> State -- ^ State to come from
+ -> [Symbol Cat_ Token] -- ^ Symbols to accept
+ -> State -- ^ State to end up in
+ -> NFA (Symbol Cat_ Token) -- ^ FA to add to.
+ -> NFA (Symbol Cat_ Token)
+make_fa1 mr q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [t@(Tok _)] -> newTransition q0 q1 (Just t) fa
+ [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
+ [Cat a] ->
+ case mrRec mr of
+ NotR -> -- the set is a non-recursive (always singleton) set of categories
+ -- so the set of category rules is the set of rules for the whole set
+ make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
+ RightR -> -- the set is right-recursive or cyclic
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
+ let (xs,Cat d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ LeftR -> -- the set is left-recursive
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats (mrCats mr) fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fas [(q0,[x],q),(q,beta,q1)] fa'
+ where
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
+
+mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
+mutRecSets g = Map.fromList . concatMap mkMutRecSet
+ where
+ mkMutRecSet cs = [ (c,ms) | c <- csl ]
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
+ ms = MutRecSet {
+ mrCats = cs,
+ mrNonRecRules = nrs,
+ mrRecRules = rrs,
+ mrRec = rec
+ }
+ rec | null rrs = NotR
+ | all (isRightLinear cs) rrs = RightR
+ | otherwise = LeftR
+
+--
+-- * Utilities
+--
+
+-- | Add a state for the given NFA for each of the categories
+-- in the given set. Returns a map of categories to their
+-- corresponding states.
+addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
+addStatesForCats cs fa = (fa', m)
+ where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
+ m = Map.fromList (zip (Set.toList cs) (map fst ns))