summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbjorn <bjorn@bringert.net>2008-06-03 18:53:53 +0000
committerbjorn <bjorn@bringert.net>2008-06-03 18:53:53 +0000
commit61ccd948d3798dc0b74dd1d82ac6e6dc8ef49840 (patch)
treee09e85b25fdad6a3cced5a8da943014816e97c27
parent762a9d16f00d481757c657722cded9dbec7fab1d (diff)
Added speech recognition grammar generation code. There is no way yet to invoke the SRG printer, and only SRGS is included.
-rw-r--r--src-3.0/GF/Speech/CFG.hs338
-rw-r--r--src-3.0/GF/Speech/CFGToFA.hs244
-rw-r--r--src-3.0/GF/Speech/FiniteState.hs329
-rw-r--r--src-3.0/GF/Speech/Graph.hs178
-rw-r--r--src-3.0/GF/Speech/Graphviz.hs116
-rw-r--r--src-3.0/GF/Speech/PGFToCFG.hs75
-rw-r--r--src-3.0/GF/Speech/RegExp.hs143
-rw-r--r--src-3.0/GF/Speech/Relation.hs130
-rw-r--r--src-3.0/GF/Speech/SISR.hs80
-rw-r--r--src-3.0/GF/Speech/SRG.hs175
-rw-r--r--src-3.0/GF/Speech/SRGS.hs110
11 files changed, 1918 insertions, 0 deletions
diff --git a/src-3.0/GF/Speech/CFG.hs b/src-3.0/GF/Speech/CFG.hs
new file mode 100644
index 000000000..68a34caec
--- /dev/null
+++ b/src-3.0/GF/Speech/CFG.hs
@@ -0,0 +1,338 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.CFG
+--
+-- Context-free grammar representation and manipulation.
+----------------------------------------------------------------------
+module GF.Speech.CFG where
+
+import GF.Data.Utilities
+import PGF.CId
+import GF.Infra.Option
+import GF.Infra.PrintClass
+import GF.Speech.Relation
+
+import Control.Monad
+import Control.Monad.State (State, get, put, evalState)
+import qualified Data.ByteString.Char8 as BS
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.List
+import Data.Maybe (fromMaybe)
+import Data.Monoid (mconcat)
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+--
+-- * Types
+--
+
+type Cat = String
+type Token = String
+
+data Symbol c t = NonTerminal c | Terminal t
+ deriving (Eq, Ord, Show)
+
+type CFSymbol = Symbol Cat Token
+
+data CFRule = CFRule {
+ lhsCat :: Cat,
+ ruleRhs :: [CFSymbol],
+ ruleName :: CFTerm
+ }
+ deriving (Eq, Ord, Show)
+
+data CFTerm
+ = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
+ | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
+ | CFApp CFTerm CFTerm -- ^ Application
+ | CFRes Int -- ^ The result of the n:th (0-based) non-terminal
+ | CFVar Int -- ^ A lambda-bound variable
+ | CFMeta CId -- ^ A metavariable
+ deriving (Eq, Ord, Show)
+
+data CFG = CFG { cfgStartCat :: Cat,
+ cfgExternalCats :: Set Cat,
+ cfgRules :: Map Cat (Set CFRule) }
+ deriving (Eq, Ord, Show)
+
+--
+-- * Grammar filtering
+--
+
+-- | Removes all directly and indirectly cyclic productions.
+-- FIXME: this may be too aggressive, only one production
+-- needs to be removed to break a given cycle. But which
+-- one should we pick?
+-- FIXME: Does not (yet) remove productions which are cyclic
+-- because of empty productions.
+removeCycles :: CFG -> CFG
+removeCycles = onRules f
+ where f rs = filter (not . isCycle) rs
+ where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
+ isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
+ isCycle _ = False
+
+-- | Better bottom-up filter that also removes categories which contain no finite
+-- strings.
+bottomUpFilter :: CFG -> CFG
+bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
+ where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
+ okSym g = symbol (`elem` allCats g) (const True)
+
+-- | Removes categories which are not reachable from the start category.
+topDownFilter :: CFG -> CFG
+topDownFilter cfg = filterCFGCats (isRelatedTo uses (cfgStartCat cfg)) cfg
+ where
+ rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
+ uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
+
+-- | Merges categories with identical right-hand-sides.
+-- FIXME: handle probabilities
+mergeIdentical :: CFG -> CFG
+mergeIdentical g = onRules (map subst) g
+ where
+ -- maps categories to their replacement
+ m = Map.fromList [(y,concat (intersperse "+" xs))
+ | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
+ -- build data to compare for each category: a set of name,rhs pairs
+ rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
+ subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
+ substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
+
+--
+-- * Removing left recursion
+--
+
+-- The LC_LR algorithm from
+-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
+removeLeftRecursion :: CFG -> CFG
+removeLeftRecursion gr
+ = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
+ where
+ scheme1 = [CFRule a [x,NonTerminal a_x] n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ not (isLeftRecursive x),
+ let a_x = mkCat (NonTerminal a) x,
+ -- this is an extension of LC_LR to avoid generating
+ -- A-X categories for which there are no productions:
+ a_x `Set.member` newCats,
+ let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
+ (\_ -> CFRes 0) x]
+ scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
+ a <- retainedLeftRecursive,
+ b@(NonTerminal b') <- properLeftCornersOf a,
+ isLeftRecursive b,
+ CFRule _ (x:beta) n <- catRules gr b',
+ let a_x = mkCat (NonTerminal a) x,
+ let a_b = mkCat (NonTerminal a) b,
+ let i = length $ filterCats beta,
+ let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
+ (\_ -> CFApp (CFRes i) n) x]
+ scheme3 = [CFRule a_x beta n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ CFRule _ (x':beta) n <- catRules gr a,
+ x == x',
+ let a_x = mkCat (NonTerminal a) x,
+ let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
+ (\_ -> n) x]
+ scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
+
+ newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
+
+ shiftTerm :: CFTerm -> CFTerm
+ shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
+ shiftTerm (CFRes 0) = CFVar 1
+ shiftTerm (CFRes n) = CFRes (n-1)
+ shiftTerm t = t
+ -- note: the rest don't occur in the original grammar
+
+ cats = allCats gr
+ rules = allRules gr
+
+ directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
+ leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
+ properLeftCorner = transitiveClosure directLeftCorner
+ properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
+ isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
+
+ leftRecursive = reflexiveElements properLeftCorner
+ isLeftRecursive = (`Set.member` leftRecursive)
+
+ retained = cfgStartCat gr `Set.insert`
+ Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
+ NonTerminal a <- ruleRhs r]
+ isRetained = (`Set.member` retained)
+
+ retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
+
+mkCat :: CFSymbol -> CFSymbol -> Cat
+mkCat x y = showSymbol x ++ "-" ++ showSymbol y
+ where showSymbol = symbol id show
+
+-- | Get the sets of mutually recursive non-terminals for a grammar.
+mutRecCats :: Bool -- ^ If true, all categories will be in some set.
+ -- If false, only recursive categories will be included.
+ -> CFG -> [Set Cat]
+mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
+ where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
+ refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
+
+--
+-- * Approximate context-free grammars with regular grammars.
+--
+
+makeSimpleRegular :: CFG -> CFG
+makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
+
+-- Use the transformation algorithm from \"Regular Approximation of Context-free
+-- Grammars through Approximation\", Mohri and Nederhof, 2000
+-- to create an over-generating regular frammar for a context-free
+-- grammar
+makeRegular :: CFG -> CFG
+makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
+ where trSet cs | allXLinear cs rs = rs
+ | otherwise = concatMap handleCat csl
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
+ ++ concatMap (makeRightLinearRules c) (catRules g c)
+ where c' = newCat c
+ makeRightLinearRules b' (CFRule c ss n) =
+ case ys of
+ [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
+ (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
+ ++ makeRightLinearRules (newCat b) (CFRule c zs n)
+ where (xs,ys) = break (`catElem` cs) ss
+ -- don't add rules on the form A -> A
+ newRule c rhs n | rhs == [NonTerminal c] = []
+ | otherwise = [CFRule c rhs n]
+ newCat c = c ++ "$"
+
+--
+-- * CFG Utilities
+--
+
+mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
+mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
+
+groupProds :: [CFRule] -> Map Cat (Set CFRule)
+groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
+
+-- | Gets all rules in a CFG.
+allRules :: CFG -> [CFRule]
+allRules = concat . map Set.toList . Map.elems . cfgRules
+
+-- | Gets all rules in a CFG, grouped by their LHS categories.
+allRulesGrouped :: CFG -> [(Cat,[CFRule])]
+allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
+
+-- | Gets all categories which have rules.
+allCats :: CFG -> [Cat]
+allCats = Map.keys . cfgRules
+
+-- | Gets all rules for the given category.
+catRules :: CFG -> Cat -> [CFRule]
+catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
+
+-- | Gets all rules for categories in the given set.
+catSetRules :: CFG -> Set Cat -> [CFRule]
+catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
+
+onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
+onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
+
+onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
+onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
+
+-- | Clean up CFG after rules have been removed.
+cleanCFG :: CFG -> CFG
+cleanCFG = onCFG (Map.filter (not . Set.null))
+
+-- | Combine two CFGs.
+unionCFG :: CFG -> CFG -> CFG
+unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
+
+filterCFG :: (CFRule -> Bool) -> CFG -> CFG
+filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
+
+filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
+filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
+
+countCats :: CFG -> Int
+countCats = Map.size . cfgRules . cleanCFG
+
+countRules :: CFG -> Int
+countRules = length . allRules
+
+prCFG :: CFG -> String
+prCFG = unlines . map prRule . allRules
+ where
+ prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
+ prSym = symbol id (\t -> "\""++ t ++"\"")
+
+--
+-- * CFRule Utilities
+--
+
+ruleFun :: CFRule -> CId
+ruleFun (CFRule _ _ t) = f t
+ where f (CFObj n _) = n
+ f (CFApp _ x) = f x
+ f (CFAbs _ x) = f x
+ f _ = mkCId ""
+
+-- | Check if any of the categories used on the right-hand side
+-- are in the given list of categories.
+anyUsedBy :: [Cat] -> CFRule -> Bool
+anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
+
+mkCFTerm :: String -> CFTerm
+mkCFTerm n = CFObj (mkCId n) []
+
+ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
+ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Set Cat -> [CFRule] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+
+-- | Checks if a context-free rule is right-linear.
+isRightLinear :: Set Cat -- ^ The categories to consider
+ -> CFRule -- ^ The rule to check for right-linearity
+ -> Bool
+isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
+
+-- | Checks if a context-free rule is left-linear.
+isLeftLinear :: Set Cat -- ^ The categories to consider
+ -> CFRule -- ^ The rule to check for left-linearity
+ -> Bool
+isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
+
+
+--
+-- * Symbol utilities
+--
+
+symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
+symbol fc ft (NonTerminal cat) = fc cat
+symbol fc ft (Terminal tok) = ft tok
+
+mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
+mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
+
+filterCats :: [Symbol c t] -> [c]
+filterCats syms = [ cat | NonTerminal cat <- syms ]
+
+filterToks :: [Symbol c t] -> [t]
+filterToks syms = [ tok | Terminal tok <- syms ]
+
+-- | Checks if a symbol is a non-terminal of one of the given categories.
+catElem :: Ord c => Symbol c t -> Set c -> Bool
+catElem s cs = symbol (`Set.member` cs) (const False) s
+
+noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
+noCatsInSet cs = not . any (`catElem` cs)
diff --git a/src-3.0/GF/Speech/CFGToFA.hs b/src-3.0/GF/Speech/CFGToFA.hs
new file mode 100644
index 000000000..1ac4bd24e
--- /dev/null
+++ b/src-3.0/GF/Speech/CFGToFA.hs
@@ -0,0 +1,244 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.CFGToFA
+--
+-- Approximates CFGs with finite state networks.
+----------------------------------------------------------------------
+module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
+ MFA(..), cfgToMFA, cfgToFA') where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import PGF.CId
+import PGF.Data
+import GF.Data.Utilities
+import GF.Speech.CFG
+import GF.Speech.PGFToCFG
+import GF.Infra.Ident (Ident)
+
+import GF.Speech.FiniteState
+import GF.Speech.Graph
+import GF.Speech.Relation
+import GF.Speech.CFG
+
+data Recursivity = RightR | LeftR | NotR
+
+data MutRecSet = MutRecSet {
+ mrCats :: Set Cat,
+ mrNonRecRules :: [CFRule],
+ mrRecRules :: [CFRule],
+ mrRec :: Recursivity
+ }
+
+
+type MutRecSets = Map Cat MutRecSet
+
+--
+-- * Multiple DFA type
+--
+
+data MFA = MFA Cat [(Cat,DFA CFSymbol)]
+
+
+
+cfgToFA :: CFG -> DFA Token
+cfgToFA = minimize . compileAutomaton . makeSimpleRegular
+
+
+--
+-- * Compile strongly regular grammars to NFAs
+--
+
+-- Convert a strongly regular grammar to a finite automaton.
+compileAutomaton :: CFG -> NFA Token
+compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
+ where
+ (fa,s,f) = newFA_
+ ns = mutRecSets g $ mutRecCats False g
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
+make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
+ -> NFA Token -> NFA Token
+make_fa c@(g,ns) q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [Terminal t] -> newTransition q0 q1 (Just t) fa
+ [NonTerminal a] ->
+ case Map.lookup a ns of
+ -- a is recursive
+ Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
+ case mrRec n of
+ -- the set Ni is right-recursive or cyclic
+ RightR ->
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
+ let (xs,NonTerminal d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ -- the set Ni is left-recursive
+ LeftR ->
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
+ ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats ni fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ -- a is not recursive
+ Nothing -> let rs = catRules g a
+ in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
+ where
+ make_fa_ = make_fa c
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
+
+--
+-- * Compile a strongly regular grammar to a DFA with sub-automata
+--
+
+cfgToMFA :: CFG -> MFA
+cfgToMFA = buildMFA . makeSimpleRegular
+
+-- | Build a DFA by building and expanding an MFA
+cfgToFA' :: CFG -> DFA Token
+cfgToFA' = mfaToDFA . cfgToMFA
+
+buildMFA :: CFG -> MFA
+buildMFA g = sortSubLats $ removeUnusedSubLats mfa
+ where fas = compileAutomata g
+ mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
+
+mfaStartDFA :: MFA -> DFA CFSymbol
+mfaStartDFA (MFA start subs) =
+ fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
+
+mfaToDFA :: MFA -> DFA Token
+mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
+ where
+ subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
+ getSub l = fromJust $ Map.lookup l subs'
+ expand (FA (Graph c ns es) s f)
+ = foldl' expandEdge (FA (Graph c ns []) s f) es
+ expandEdge fa (f,t,x) =
+ case x of
+ Nothing -> newTransition f t Nothing fa
+ Just (Terminal s) -> newTransition f t (Just s) fa
+ Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
+
+removeUnusedSubLats :: MFA -> MFA
+removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
+ where
+ usedMap = subLatUseMap mfa
+ used = growUsedSet (Set.singleton start)
+ isUsed c = c `Set.member` used
+ growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
+
+subLatUseMap :: MFA -> Map Cat (Set Cat)
+subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
+
+usedSubLats :: DFA CFSymbol -> Set Cat
+usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
+
+-- | Sort sub-networks topologically.
+sortSubLats :: MFA -> MFA
+sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
+ where
+ usedByMap = revMultiMap (subLatUseMap mfa)
+ sortLats _ [] = []
+ sortLats ub ls = xs ++ sortLats ub' ys
+ where (xs,ys) = partition ((==0) . indeg) ls
+ ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
+ indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
+
+-- | Convert a strongly regular grammar to a number of finite automata,
+-- one for each non-terminal.
+-- The edges in the automata accept tokens, or name another automaton to use.
+compileAutomata :: CFG
+ -> [(Cat,NFA CFSymbol)]
+ -- ^ A map of non-terminals and their automata.
+compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
+ where
+ mrs = mutRecSets g $ mutRecCats True g
+ makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
+ where (fa,s,f) = newFA_
+ mr = fromJust (Map.lookup c mrs)
+
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
+-- adapted to build a finite automaton for a single (mutually recursive) set only.
+-- Categories not in the set will result in category-labelled edges.
+make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
+ -- we are building the automaton.
+ -> State -- ^ State to come from
+ -> [CFSymbol] -- ^ Symbols to accept
+ -> State -- ^ State to end up in
+ -> NFA CFSymbol -- ^ FA to add to.
+ -> NFA CFSymbol
+make_fa1 mr q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
+ [c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
+ [NonTerminal a] ->
+ case mrRec mr of
+ NotR -> -- the set is a non-recursive (always singleton) set of categories
+ -- so the set of category rules is the set of rules for the whole set
+ make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
+ RightR -> -- the set is right-recursive or cyclic
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
+ let (xs,NonTerminal d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ LeftR -> -- the set is left-recursive
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats (mrCats mr) fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fas [(q0,[x],q),(q,beta,q1)] fa'
+ where
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
+
+mutRecSets :: CFG -> [Set Cat] -> MutRecSets
+mutRecSets g = Map.fromList . concatMap mkMutRecSet
+ where
+ mkMutRecSet cs = [ (c,ms) | c <- csl ]
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
+ ms = MutRecSet {
+ mrCats = cs,
+ mrNonRecRules = nrs,
+ mrRecRules = rrs,
+ mrRec = rec
+ }
+ rec | null rrs = NotR
+ | all (isRightLinear cs) rrs = RightR
+ | otherwise = LeftR
+
+--
+-- * Utilities
+--
+
+-- | Add a state for the given NFA for each of the categories
+-- in the given set. Returns a map of categories to their
+-- corresponding states.
+addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
+addStatesForCats cs fa = (fa', m)
+ where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
+ m = Map.fromList (zip (Set.toList cs) (map fst ns))
+
+revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
+revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
diff --git a/src-3.0/GF/Speech/FiniteState.hs b/src-3.0/GF/Speech/FiniteState.hs
new file mode 100644
index 000000000..c809eb544
--- /dev/null
+++ b/src-3.0/GF/Speech/FiniteState.hs
@@ -0,0 +1,329 @@
+----------------------------------------------------------------------
+-- |
+-- Module : FiniteState
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.16 $
+--
+-- A simple finite state network module.
+-----------------------------------------------------------------------------
+module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
+ startState, finalStates,
+ states, transitions,
+ isInternal,
+ newFA, newFA_,
+ addFinalState,
+ newState, newStates,
+ newTransition, newTransitions,
+ insertTransitionWith, insertTransitionsWith,
+ mapStates, mapTransitions,
+ modifyTransitions,
+ nonLoopTransitionsTo, nonLoopTransitionsFrom,
+ loops,
+ removeState,
+ oneFinalState,
+ insertNFA,
+ onGraph,
+ moveLabelsToNodes, removeTrivialEmptyNodes,
+ minimize,
+ dfa2nfa,
+ unusedNames, renameStates,
+ prFAGraphviz, faToGraphviz) where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+import GF.Speech.Graph
+import qualified GF.Speech.Graphviz as Dot
+
+type State = Int
+
+-- | Type parameters: node id type, state label type, edge label type
+-- Data constructor arguments: nodes and edges, start state, final states
+data FA n a b = FA !(Graph n a b) !n ![n]
+
+type NFA a = FA State () (Maybe a)
+
+type DFA a = FA State () a
+
+
+startState :: FA n a b -> n
+startState (FA _ s _) = s
+
+finalStates :: FA n a b -> [n]
+finalStates (FA _ _ ss) = ss
+
+states :: FA n a b -> [(n,a)]
+states (FA g _ _) = nodes g
+
+transitions :: FA n a b -> [(n,n,b)]
+transitions (FA g _ _) = edges g
+
+newFA :: Enum n => a -- ^ Start node label
+ -> FA n a b
+newFA l = FA g s []
+ where (g,s) = newNode l (newGraph [toEnum 0..])
+
+-- | Create a new finite automaton with an initial and a final state.
+newFA_ :: Enum n => (FA n () b, n, n)
+newFA_ = (fa'', s, f)
+ where fa = newFA ()
+ s = startState fa
+ (fa',f) = newState () fa
+ fa'' = addFinalState f fa'
+
+addFinalState :: n -> FA n a b -> FA n a b
+addFinalState f (FA g s ss) = FA g s (f:ss)
+
+newState :: a -> FA n a b -> (FA n a b, n)
+newState x (FA g s ss) = (FA g' s ss, n)
+ where (g',n) = newNode x g
+
+newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
+newStates xs (FA g s ss) = (FA g' s ss, ns)
+ where (g',ns) = newNodes xs g
+
+newTransition :: n -> n -> b -> FA n a b -> FA n a b
+newTransition f t l = onGraph (newEdge (f,t,l))
+
+newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
+newTransitions es = onGraph (newEdges es)
+
+insertTransitionWith :: Eq n =>
+ (b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
+insertTransitionWith f t = onGraph (insertEdgeWith f t)
+
+insertTransitionsWith :: Eq n =>
+ (b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
+insertTransitionsWith f ts fa =
+ foldl' (flip (insertTransitionWith f)) fa ts
+
+mapStates :: (a -> c) -> FA n a b -> FA n c b
+mapStates f = onGraph (nmap f)
+
+mapTransitions :: (b -> c) -> FA n a b -> FA n a c
+mapTransitions f = onGraph (emap f)
+
+modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
+modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
+
+removeState :: Ord n => n -> FA n a b -> FA n a b
+removeState n = onGraph (removeNode n)
+
+minimize :: Ord a => NFA a -> DFA a
+minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
+
+unusedNames :: FA n a b -> [n]
+unusedNames (FA (Graph names _ _) _ _) = names
+
+-- | Gets all incoming transitions to a given state, excluding
+-- transtions from the state itself.
+nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
+nonLoopTransitionsTo s fa =
+ [(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
+
+nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
+nonLoopTransitionsFrom s fa =
+ [(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
+
+loops :: Eq n => n -> FA n a b -> [b]
+loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
+
+-- | Give new names to all nodes.
+renameStates :: Ord x => [y] -- ^ Infinite supply of new names
+ -> FA x a b
+ -> FA y a b
+renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
+ where (ns,rest) = splitAt (length (nodes g)) supply
+ newNodes = Map.fromList (zip (map fst (nodes g)) ns)
+ newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
+ s' = newName s
+ fs' = map newName fs
+
+-- | Insert an NFA into another
+insertNFA :: NFA a -- ^ NFA to insert into
+ -> (State, State) -- ^ States to insert between
+ -> NFA a -- ^ NFA to insert.
+ -> NFA a
+insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
+ = FA (newEdges es g') s1 fs1
+ where
+ es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
+ (g',ren) = mergeGraphs g1 g2
+
+onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
+onGraph f (FA g s ss) = FA (f g) s ss
+
+
+-- | Make the finite automaton have a single final state
+-- by adding a new final state and adding an edge
+-- from the old final states to the new state.
+oneFinalState :: a -- ^ Label to give the new node
+ -> b -- ^ Label to give the new edges
+ -> FA n a b -- ^ The old network
+ -> FA n a b -- ^ The new network
+oneFinalState nl el fa =
+ let (FA g s fs,nf) = newState nl fa
+ es = [ (f,nf,el) | f <- fs ]
+ in FA (newEdges es g) s [nf]
+
+-- | Transform a standard finite automaton with labelled edges
+-- to one where the labels are on the nodes instead. This can add
+-- up to one extra node per edge.
+moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
+moveLabelsToNodes = onGraph f
+ where f g@(Graph c _ _) = Graph c' ns (concat ess)
+ where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
+ (c',is') = mapAccumL fixIncoming c is
+ (ns,ess) = unzip (concat is')
+
+
+-- | Remove empty nodes which are not start or final, and have
+-- exactly one outgoing edge or exactly one incoming edge.
+removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
+removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
+
+-- | Move edges to empty nodes to point to the next node(s).
+-- This is not done if the pointed-to node is a final node.
+skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
+skipSimpleEmptyNodes fa = onGraph og fa
+ where
+ og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
+ where
+ es' = concatMap changeEdge es
+ info = nodeInfo g
+ changeEdge e@(f,t,())
+ | isNothing (getNodeLabel info t)
+ -- && (i * o <= i + o)
+ && not (isFinal fa t)
+ = [ (f,t',()) | (_,t',()) <- getOutgoing info t]
+ | otherwise = [e]
+-- where i = inDegree info t
+-- o = outDegree info t
+
+isInternal :: Eq n => FA n a b -> n -> Bool
+isInternal (FA _ start final) n = n /= start && n `notElem` final
+
+isFinal :: Eq n => FA n a b -> n -> Bool
+isFinal (FA _ _ final) n = n `elem` final
+
+-- | Remove all internal nodes with no incoming edges
+-- or no outgoing edges.
+pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
+pruneUnusable fa = onGraph f fa
+ where
+ f g = if Set.null rns then g else f (removeNodes rns g)
+ where info = nodeInfo g
+ rns = Set.fromList [ n | (n,_) <- nodes g,
+ isInternal fa n,
+ inDegree info n == 0
+ || outDegree info n == 0]
+
+fixIncoming :: (Ord n, Eq a) => [n]
+ -> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
+ -> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
+ -- incoming edges.
+fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
+ where ls = nub $ map edgeLabel es
+ (cs',cs'') = splitAt (length ls) cs
+ newNodes = zip cs' ls
+ es' = [ (x,n,()) | x <- map fst newNodes ]
+ -- separate cyclic and non-cyclic edges
+ (cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
+ -- keep all incoming non-cyclic edges with the right label
+ to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
+ -- for each cyclic edge with the right label,
+ -- add an edge from each of the new nodes (including this one)
+ ++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
+ newContexts = [ (v, to v) | v <- newNodes ]
+
+alphabet :: Eq b => Graph n a (Maybe b) -> [b]
+alphabet = nub . catMaybes . map edgeLabel . edges
+
+determinize :: Ord a => NFA a -> DFA a
+determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
+ (ns',es') = (Set.toList ns, Set.toList es)
+ final = filter isDFAFinal ns'
+ fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
+ in renameStates [0..] fa
+ where info = nodeInfo g
+-- reach = nodesReachable out
+ start = closure info $ Set.singleton s
+ isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
+ h currentStates oldStates es
+ | Set.null currentStates = (oldStates,es)
+ | otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
+ where
+ allOldStates = oldStates `Set.union` currentStates
+ (newStates,es') = new (Set.toList currentStates) Set.empty es
+ uniqueNewStates = newStates Set.\\ allOldStates
+ -- Get the sets of states reachable from the given states
+ -- by consuming one symbol, and the associated edges.
+ new [] rs es = (rs,es)
+ new (n:ns) rs es = new ns rs' es'
+ where cs = reachable info n --reachable reach n
+ rs' = rs `Set.union` Set.fromList (map snd cs)
+ es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
+
+
+-- | Get all the nodes reachable from a list of nodes by only empty edges.
+closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
+closure info x = closure_ x x
+ where closure_ acc check | Set.null check = acc
+ | otherwise = closure_ acc' check'
+ where
+ reach = Set.fromList [y | x <- Set.toList check,
+ (_,y,Nothing) <- getOutgoing info x]
+ acc' = acc `Set.union` reach
+ check' = reach Set.\\ acc
+
+-- | Get a map of labels to sets of all nodes reachable
+-- from a the set of nodes by one edge with the given
+-- label and then any number of empty edges.
+reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
+reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
+reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
+
+reverseNFA :: NFA a -> NFA a
+reverseNFA (FA g s fs) = FA g''' s' [s]
+ where g' = reverseGraph g
+ (g'',s') = newNode () g'
+ g''' = newEdges [(s',f,Nothing) | f <- fs] g''
+
+dfa2nfa :: DFA a -> NFA a
+dfa2nfa = mapTransitions Just
+
+--
+-- * Visualization
+--
+
+prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
+prFAGraphviz = Dot.prGraphviz . faToGraphviz
+
+prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
+prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
+
+faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
+faToGraphviz (FA (Graph _ ns es) s f)
+ = Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
+ where mkNode (n,l) = Dot.Node (show n) attrs
+ where attrs = [("label",l)]
+ ++ if n == s then [("shape","box")] else []
+ ++ if n `elem` f then [("style","bold")] else []
+ mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
+
+--
+-- * Utilities
+--
+
+lookups :: Ord k => [k] -> Map k a -> [a]
+lookups xs m = mapMaybe (flip Map.lookup m) xs
diff --git a/src-3.0/GF/Speech/Graph.hs b/src-3.0/GF/Speech/Graph.hs
new file mode 100644
index 000000000..1a0ebe0c0
--- /dev/null
+++ b/src-3.0/GF/Speech/Graph.hs
@@ -0,0 +1,178 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Graph
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.2 $
+--
+-- A simple graph module.
+-----------------------------------------------------------------------------
+module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
+ , newGraph, nodes, edges
+ , nmap, emap, newNode, newNodes, newEdge, newEdges
+ , insertEdgeWith
+ , removeNode, removeNodes
+ , nodeInfo
+ , getIncoming, getOutgoing, getNodeLabel
+ , inDegree, outDegree
+ , nodeLabel
+ , edgeFrom, edgeTo, edgeLabel
+ , reverseGraph, mergeGraphs, renameNodes
+ ) where
+
+import GF.Data.Utilities
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
+ deriving (Eq,Show)
+
+type Node n a = (n,a)
+type Edge n b = (n,n,b)
+
+type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
+
+-- | Create a new empty graph.
+newGraph :: [n] -> Graph n a b
+newGraph ns = Graph ns [] []
+
+-- | Get all the nodes in the graph.
+nodes :: Graph n a b -> [Node n a]
+nodes (Graph _ ns _) = ns
+
+-- | Get all the edges in the graph.
+edges :: Graph n a b -> [Edge n b]
+edges (Graph _ _ es) = es
+
+-- | Map a function over the node labels.
+nmap :: (a -> c) -> Graph n a b -> Graph n c b
+nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
+
+-- | Map a function over the edge labels.
+emap :: (b -> c) -> Graph n a b -> Graph n a c
+emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
+
+-- | Add a node to the graph.
+newNode :: a -- ^ Node label
+ -> Graph n a b
+ -> (Graph n a b,n) -- ^ Node graph and name of new node
+newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
+
+newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
+newNodes ls g = (g', zip ns ls)
+ where (g',ns) = mapAccumL (flip newNode) g ls
+-- lazy version:
+--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
+-- where (xs,cs') = splitAt (length ls) cs
+-- ns' = zip xs ls
+
+newEdge :: Edge n b -> Graph n a b -> Graph n a b
+newEdge e (Graph c ns es) = Graph c ns (e:es)
+
+newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
+newEdges es g = foldl' (flip newEdge) g es
+-- lazy version:
+-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
+
+insertEdgeWith :: Eq n =>
+ (b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
+insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
+ where h [] = [e]
+ h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
+ | otherwise = e':h es'
+
+-- | Remove a node and all edges to and from that node.
+removeNode :: Ord n => n -> Graph n a b -> Graph n a b
+removeNode n = removeNodes (Set.singleton n)
+
+-- | Remove a set of nodes and all edges to and from those nodes.
+removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
+removeNodes xs (Graph c ns es) = Graph c ns' es'
+ where
+ keepNode n = not (Set.member n xs)
+ ns' = [ x | x@(n,_) <- ns, keepNode n ]
+ es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
+
+-- | Get a map of node names to info about each node.
+nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
+nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
+ where
+ inc = groupEdgesBy edgeTo g
+ out = groupEdgesBy edgeFrom g
+ fn m n = fromMaybe [] (Map.lookup n m)
+
+groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
+ -> Graph n a b -> Map n [Edge n b]
+groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
+
+lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
+lookupNode i n = fromJust $ Map.lookup n i
+
+getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
+getIncoming i n = let (_,inc,_) = lookupNode i n in inc
+
+getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
+getOutgoing i n = let (_,_,out) = lookupNode i n in out
+
+inDegree :: Ord n => NodeInfo n a b -> n -> Int
+inDegree i n = length $ getIncoming i n
+
+outDegree :: Ord n => NodeInfo n a b -> n -> Int
+outDegree i n = length $ getOutgoing i n
+
+getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
+getNodeLabel i n = let (l,_,_) = lookupNode i n in l
+
+nodeLabel :: Node n a -> a
+nodeLabel = snd
+
+edgeFrom :: Edge n b -> n
+edgeFrom (f,_,_) = f
+
+edgeTo :: Edge n b -> n
+edgeTo (_,t,_) = t
+
+edgeLabel :: Edge n b -> b
+edgeLabel (_,_,l) = l
+
+reverseGraph :: Graph n a b -> Graph n a b
+reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
+
+-- | Add the nodes from the second graph to the first graph.
+-- The nodes in the second graph will be renamed using the name
+-- supply in the first graph.
+-- This function is more efficient when the second graph
+-- is smaller than the first.
+mergeGraphs :: Ord m => Graph n a b -> Graph m a b
+ -> (Graph n a b, m -> n) -- ^ The new graph and a function translating
+ -- the old names of nodes in the second graph
+ -- to names in the new graph.
+mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
+ where
+ (xs,c') = splitAt (length (nodes g2)) c
+ newNames = Map.fromList (zip (map fst (nodes g2)) xs)
+ newName n = fromJust $ Map.lookup n newNames
+ Graph _ ns2 es2 = renameNodes newName undefined g2
+
+-- | Rename the nodes in the graph.
+renameNodes :: (n -> m) -- ^ renaming function
+ -> [m] -- ^ infinite supply of fresh node names, to
+ -- use when adding nodes in the future.
+ -> Graph n a b -> Graph m a b
+renameNodes newName c (Graph _ ns es) = Graph c ns' es'
+ where ns' = map' (\ (n,x) -> (newName n,x)) ns
+ es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
+
+-- | A strict 'map'
+map' :: (a -> b) -> [a] -> [b]
+map' _ [] = []
+map' f (x:xs) = ((:) $! f x) $! map' f xs
diff --git a/src-3.0/GF/Speech/Graphviz.hs b/src-3.0/GF/Speech/Graphviz.hs
new file mode 100644
index 000000000..1851fcb64
--- /dev/null
+++ b/src-3.0/GF/Speech/Graphviz.hs
@@ -0,0 +1,116 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Graphviz
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/15 18:10:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.2 $
+--
+-- Graphviz DOT format representation and printing.
+-----------------------------------------------------------------------------
+
+module GF.Speech.Graphviz (
+ Graph(..), GraphType(..),
+ Node(..), Edge(..),
+ Attr,
+ addSubGraphs,
+ setName,
+ setAttr,
+ prGraphviz
+ ) where
+
+import Data.Char
+
+import GF.Data.Utilities
+
+-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs
+data Graph = Graph {
+ gType :: GraphType,
+ gId :: Maybe String,
+ gAttrs :: [Attr],
+ gNodes :: [Node],
+ gEdges :: [Edge],
+ gSubgraphs :: [Graph]
+ }
+ deriving (Show)
+
+data GraphType = Directed | Undirected
+ deriving (Show)
+
+data Node = Node String [Attr]
+ deriving Show
+
+data Edge = Edge String String [Attr]
+ deriving Show
+
+type Attr = (String,String)
+
+--
+-- * Graph construction
+--
+
+addSubGraphs :: [Graph] -> Graph -> Graph
+addSubGraphs gs g = g { gSubgraphs = gs ++ gSubgraphs g }
+
+setName :: String -> Graph -> Graph
+setName n g = g { gId = Just n }
+
+setAttr :: String -> String -> Graph -> Graph
+setAttr n v g = g { gAttrs = tableSet n v (gAttrs g) }
+
+--
+-- * Pretty-printing
+--
+
+prGraphviz :: Graph -> String
+prGraphviz g@(Graph t i _ _ _ _) =
+ graphtype t ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}\n"
+
+prSubGraph :: Graph -> String
+prSubGraph g@(Graph _ i _ _ _ _) =
+ "subgraph" ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}"
+
+prGraph :: Graph -> String
+prGraph (Graph t id at ns es ss) =
+ unlines $ map (++";") (map prAttr at
+ ++ map prNode ns
+ ++ map (prEdge t) es
+ ++ map prSubGraph ss)
+
+graphtype :: GraphType -> String
+graphtype Directed = "digraph"
+graphtype Undirected = "graph"
+
+prNode :: Node -> String
+prNode (Node n at) = esc n ++ " " ++ prAttrList at
+
+prEdge :: GraphType -> Edge -> String
+prEdge t (Edge x y at) = esc x ++ " " ++ edgeop t ++ " " ++ esc y ++ " " ++ prAttrList at
+
+edgeop :: GraphType -> String
+edgeop Directed = "->"
+edgeop Undirected = "--"
+
+prAttrList :: [Attr] -> String
+prAttrList [] = ""
+prAttrList at = "[" ++ join "," (map prAttr at) ++ "]"
+
+prAttr :: Attr -> String
+prAttr (n,v) = esc n ++ " = " ++ esc v
+
+esc :: String -> String
+esc s | needEsc s = "\"" ++ concat [ if shouldEsc c then ['\\',c] else [c] | c <- s ] ++ "\""
+ | otherwise = s
+ where shouldEsc = (`elem` ['"', '\\'])
+
+needEsc :: String -> Bool
+needEsc [] = True
+needEsc xs | all isDigit xs = False
+needEsc (x:xs) = not (isIDFirst x && all isIDChar xs)
+
+isIDFirst, isIDChar :: Char -> Bool
+isIDFirst c = c `elem` (['_']++['a'..'z']++['A'..'Z'])
+isIDChar c = isIDFirst c || isDigit c
diff --git a/src-3.0/GF/Speech/PGFToCFG.hs b/src-3.0/GF/Speech/PGFToCFG.hs
new file mode 100644
index 000000000..a2dc32f32
--- /dev/null
+++ b/src-3.0/GF/Speech/PGFToCFG.hs
@@ -0,0 +1,75 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.PGFToCFG
+--
+-- Approximates PGF grammars with context-free grammars.
+----------------------------------------------------------------------
+module GF.Speech.PGFToCFG (pgfToCFG) where
+
+import PGF.CId
+import PGF.Data as PGF
+import PGF.Macros
+import GF.Infra.Ident
+import GF.Speech.CFG
+
+import Data.Array as Array
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Maybe
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+
+pgfToCFG :: PGF
+ -> CId -- ^ Concrete syntax name
+ -> CFG
+pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fruleToCFRule rules)
+ where
+ pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
+
+ rules :: [FRule]
+ rules = Array.elems (PGF.allRules pinfo)
+
+ fcatGFCats :: Map FCat CId
+ fcatGFCats = Map.fromList [(fc,c) | (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
+
+ fcatGFCat :: FCat -> CId
+ fcatGFCat c = fromMaybe (mkCId "Unknown") (Map.lookup c fcatGFCats)
+
+ fcatToCat :: FCat -> FIndex -> Cat
+ fcatToCat c l = prCId (fcatGFCat c) ++ "_" ++ show c ++ "_" ++ show l
+
+ extCats :: Set Cat
+ extCats = Set.fromList $ map lhsCat startRules
+
+ -- NOTE: this is only correct for cats that have a lincat with exactly one row.
+ startRules :: [CFRule]
+ startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc 0)] (CFRes 0)
+ | (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
+
+ fruleToCFRule :: FRule -> [CFRule]
+ fruleToCFRule (FRule f ps args c rhs) =
+ [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
+ | (l,row) <- Array.assocs rhs]
+ where
+ mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
+ mkRhs = map fsymbolToSymbol . Array.elems
+
+ fsymbolToSymbol :: FSymbol -> CFSymbol
+ fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
+ fsymbolToSymbol (FSymTok t) = Terminal t
+
+ fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
+ fixProfile row = concatMap positions
+ where
+ nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
+ positions i = [k | (k,FSymCat _ j) <- nts, j == i]
+
+ profilesToTerm :: [Profile] -> CFTerm
+ profilesToTerm [[n]] | f == wildCId = CFRes n
+ profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
+ where (argTypes,_) = catSkeleton $ lookType pgf f
+
+ profileToTerm :: CId -> Profile -> CFTerm
+ profileToTerm t [] = CFMeta t
+ profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
diff --git a/src-3.0/GF/Speech/RegExp.hs b/src-3.0/GF/Speech/RegExp.hs
new file mode 100644
index 000000000..5ee40828e
--- /dev/null
+++ b/src-3.0/GF/Speech/RegExp.hs
@@ -0,0 +1,143 @@
+module GF.Speech.RegExp (RE(..),
+ epsilonRE, nullRE,
+ isEpsilon, isNull,
+ unionRE, concatRE, seqRE,
+ repeatRE, minimizeRE,
+ mapRE, mapRE', joinRE,
+ symbolsRE,
+ dfa2re, prRE) where
+
+import Data.List
+
+import GF.Data.Utilities
+import GF.Speech.FiniteState
+
+data RE a =
+ REUnion [RE a] -- ^ REUnion [] is null
+ | REConcat [RE a] -- ^ REConcat [] is epsilon
+ | RERepeat (RE a)
+ | RESymbol a
+ deriving (Eq,Ord,Show)
+
+
+dfa2re :: (Ord a) => DFA a -> RE a
+dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
+ . oneFinalState () epsilonRE . mapTransitions RESymbol
+ where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
+ merge es = [(f,t,unionRE ls)
+ | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
+
+elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
+elimStates fa =
+ case [s | (s,_) <- states fa, isInternal fa s] of
+ [] -> fa
+ sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
+ where sAs = nonLoopTransitionsTo sE fa
+ sBs = nonLoopTransitionsFrom sE fa
+ r2 = unionRE $ loops sE fa
+ ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
+ r r1 r3 = concatRE [r1, repeatRE r2, r3]
+
+epsilonRE :: RE a
+epsilonRE = REConcat []
+
+nullRE :: RE a
+nullRE = REUnion []
+
+isNull :: RE a -> Bool
+isNull (REUnion []) = True
+isNull _ = False
+
+isEpsilon :: RE a -> Bool
+isEpsilon (REConcat []) = True
+isEpsilon _ = False
+
+unionRE :: Ord a => [RE a] -> RE a
+unionRE = unionOrId . sortNub . concatMap toList
+ where
+ toList (REUnion xs) = xs
+ toList x = [x]
+ unionOrId [r] = r
+ unionOrId rs = REUnion rs
+
+concatRE :: [RE a] -> RE a
+concatRE xs | any isNull xs = nullRE
+ | otherwise = case concatMap toList xs of
+ [r] -> r
+ rs -> REConcat rs
+ where
+ toList (REConcat xs) = xs
+ toList x = [x]
+
+seqRE :: [a] -> RE a
+seqRE = concatRE . map RESymbol
+
+repeatRE :: RE a -> RE a
+repeatRE x | isNull x || isEpsilon x = epsilonRE
+ | otherwise = RERepeat x
+
+finalRE :: Ord a => DFA (RE a) -> RE a
+finalRE fa = concatRE [repeatRE r1, r2,
+ repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
+ where
+ s0 = startState fa
+ [sF] = finalStates fa
+ r1 = unionRE $ loops s0 fa
+ r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
+ r3 = unionRE $ loops sF fa
+ r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
+
+reverseRE :: RE a -> RE a
+reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
+reverseRE (REUnion xs) = REUnion (map reverseRE xs)
+reverseRE (RERepeat x) = RERepeat (reverseRE x)
+reverseRE x = x
+
+minimizeRE :: Ord a => RE a -> RE a
+minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
+
+mergeForward :: Ord a => RE a -> RE a
+mergeForward (REUnion xs) =
+ unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
+mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
+mergeForward (RERepeat r) = repeatRE (mergeForward r)
+mergeForward r = r
+
+firstRE :: RE a -> (RE a, RE a)
+firstRE (REConcat (x:xs)) = (x, REConcat xs)
+firstRE r = (r,epsilonRE)
+
+mapRE :: (a -> b) -> RE a -> RE b
+mapRE f = mapRE' (RESymbol . f)
+
+mapRE' :: (a -> RE b) -> RE a -> RE b
+mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
+mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
+mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
+mapRE' f (RESymbol s) = f s
+
+joinRE :: RE (RE a) -> RE a
+joinRE (REConcat xs) = REConcat (map joinRE xs)
+joinRE (REUnion xs) = REUnion (map joinRE xs)
+joinRE (RERepeat xs) = RERepeat (joinRE xs)
+joinRE (RESymbol ss) = ss
+
+symbolsRE :: RE a -> [a]
+symbolsRE (REConcat xs) = concatMap symbolsRE xs
+symbolsRE (REUnion xs) = concatMap symbolsRE xs
+symbolsRE (RERepeat x) = symbolsRE x
+symbolsRE (RESymbol x) = [x]
+
+-- Debugging
+
+prRE :: RE String -> String
+prRE = prRE' 0
+
+prRE' _ (REUnion []) = "<NULL>"
+prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
+prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
+prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
+prRE' _ (RESymbol s) = s
+
+p n m s | n >= m = "(" ++ s ++ ")"
+ | True = s
diff --git a/src-3.0/GF/Speech/Relation.hs b/src-3.0/GF/Speech/Relation.hs
new file mode 100644
index 000000000..641d671a9
--- /dev/null
+++ b/src-3.0/GF/Speech/Relation.hs
@@ -0,0 +1,130 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Relation
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/26 17:13:13 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.1 $
+--
+-- A simple module for relations.
+-----------------------------------------------------------------------------
+
+module GF.Speech.Relation (Rel, mkRel, mkRel'
+ , allRelated , isRelatedTo
+ , transitiveClosure
+ , reflexiveClosure, reflexiveClosure_
+ , symmetricClosure
+ , symmetricSubrelation, reflexiveSubrelation
+ , reflexiveElements
+ , equivalenceClasses
+ , isTransitive, isReflexive, isSymmetric
+ , isEquivalence
+ , isSubRelationOf) where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+
+type Rel a = Map a (Set a)
+
+-- | Creates a relation from a list of related pairs.
+mkRel :: Ord a => [(a,a)] -> Rel a
+mkRel ps = relates ps Map.empty
+
+-- | Creates a relation from a list pairs of elements and the elements
+-- related to them.
+mkRel' :: Ord a => [(a,[a])] -> Rel a
+mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
+
+relToList :: Rel a -> [(a,a)]
+relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
+
+-- | Add a pair to the relation.
+relate :: Ord a => a -> a -> Rel a -> Rel a
+relate x y r = Map.insertWith Set.union x (Set.singleton y) r
+
+-- | Add a list of pairs to the relation.
+relates :: Ord a => [(a,a)] -> Rel a -> Rel a
+relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
+
+-- | Checks if an element is related to another.
+isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
+isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
+
+-- | Get the set of elements to which a given element is related.
+allRelated :: Ord a => Rel a -> a -> Set a
+allRelated r x = fromMaybe Set.empty (Map.lookup x r)
+
+-- | Get all elements in the relation.
+domain :: Ord a => Rel a -> Set a
+domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
+
+-- | Keep only pairs for which both elements are in the given set.
+intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
+intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
+
+transitiveClosure :: Ord a => Rel a -> Rel a
+transitiveClosure r = fix (Map.map growSet) r
+ where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
+
+reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
+ -> Rel a -> Rel a
+reflexiveClosure_ u r = relates [(x,x) | x <- u] r
+
+-- | Uses 'domain'
+reflexiveClosure :: Ord a => Rel a -> Rel a
+reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
+
+symmetricClosure :: Ord a => Rel a -> Rel a
+symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
+
+symmetricSubrelation :: Ord a => Rel a -> Rel a
+symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
+
+reflexiveSubrelation :: Ord a => Rel a -> Rel a
+reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
+
+-- | Get the set of elements which are related to themselves.
+reflexiveElements :: Ord a => Rel a -> Set a
+reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
+
+-- | Keep the related pairs for which the predicate is true.
+filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
+filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
+
+-- | Remove keys that map to no elements.
+purgeEmpty :: Ord a => Rel a -> Rel a
+purgeEmpty r = Map.filter (not . Set.null) r
+
+
+-- | Get the equivalence classes from an equivalence relation.
+equivalenceClasses :: Ord a => Rel a -> [Set a]
+equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
+ where equivalenceClasses_ [] _ = []
+ equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
+ where ys = allRelated r x
+ zs = [x' | x' <- xs, not (x' `Set.member` ys)]
+
+isTransitive :: Ord a => Rel a -> Bool
+isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
+ y <- Set.toList ys, z <- Set.toList (allRelated r y)]
+
+isReflexive :: Ord a => Rel a -> Bool
+isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
+
+isSymmetric :: Ord a => Rel a -> Bool
+isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
+
+isEquivalence :: Ord a => Rel a -> Bool
+isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
+
+isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
+isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)
diff --git a/src-3.0/GF/Speech/SISR.hs b/src-3.0/GF/Speech/SISR.hs
new file mode 100644
index 000000000..df66804d3
--- /dev/null
+++ b/src-3.0/GF/Speech/SISR.hs
@@ -0,0 +1,80 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.SISR
+--
+-- Abstract syntax and pretty printer for SISR,
+-- (Semantic Interpretation for Speech Recognition)
+----------------------------------------------------------------------
+module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
+ topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
+
+import Data.List
+
+import GF.Data.Utilities
+import GF.Infra.Ident
+import GF.Speech.CFG
+import GF.Speech.SRG (SRGNT)
+import PGF.CId
+
+import qualified GF.JavaScript.AbsJS as JS
+import qualified GF.JavaScript.PrintJS as JS
+
+data SISRFormat =
+ -- SISR Working draft 1 April 2003
+ -- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
+ SISROld
+ deriving Show
+
+type SISRTag = [JS.DeclOrExpr]
+
+
+prSISR :: SISRTag -> String
+prSISR = JS.printTree
+
+topCatSISR :: String -> SISRFormat -> SISRTag
+topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
+
+profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
+profileInitSISR t fmt
+ | null (usedArgs t) = []
+ | otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
+
+usedArgs :: CFTerm -> [Int]
+usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
+usedArgs (CFAbs _ x) = usedArgs x
+usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
+usedArgs (CFRes i) = [i]
+usedArgs _ = []
+
+catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
+catSISR t (c,i) fmt
+ | i `elem` usedArgs t = map JS.DExpr
+ [JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
+ | otherwise = []
+
+profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
+profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
+ where
+ f (CFObj n ts) = tree (prCId n) (map f ts)
+ f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
+ f (CFApp x y) = JS.ECall (f x) [f y]
+ f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
+ f (CFVar v) = JS.EVar (var v)
+ f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (prCId typ))]
+
+fmtOut SISROld = JS.EVar (JS.Ident "$")
+
+fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
+
+args = JS.Ident "a"
+
+var v = JS.Ident ("x" ++ show v)
+
+field x y = JS.EMember x (JS.Ident y)
+
+ass = JS.EAssign
+
+tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
+
+obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]
+
diff --git a/src-3.0/GF/Speech/SRG.hs b/src-3.0/GF/Speech/SRG.hs
new file mode 100644
index 000000000..5816c0cb5
--- /dev/null
+++ b/src-3.0/GF/Speech/SRG.hs
@@ -0,0 +1,175 @@
+----------------------------------------------------------------------
+-- |
+-- Module : SRG
+--
+-- Representation of, conversion to, and utilities for
+-- printing of a general Speech Recognition Grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+----------------------------------------------------------------------
+module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem
+ , SRGNT, CFTerm
+ , makeSRG
+ , makeSimpleSRG
+ , makeNonRecursiveSRG
+ , lookupFM_, prtS
+ ) where
+
+import GF.Data.Operations
+import GF.Data.Utilities
+import GF.Infra.Ident
+import GF.Infra.PrintClass
+import GF.Speech.CFG
+import GF.Speech.PGFToCFG
+import GF.Speech.Relation
+import GF.Speech.FiniteState
+import GF.Speech.RegExp
+import GF.Speech.CFGToFA
+import GF.Infra.Option
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+
+import Data.List
+import Data.Maybe (fromMaybe, maybeToList)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import Debug.Trace
+
+data SRG = SRG { srgName :: String -- ^ grammar name
+ , srgStartCat :: Cat -- ^ start category name
+ , srgExternalCats :: Set Cat
+ , srgLanguage :: Maybe String -- ^ The language for which the grammar
+ -- is intended, e.g. en-UK
+ , srgRules :: [SRGRule]
+ }
+ deriving (Eq,Show)
+
+data SRGRule = SRGRule Cat [SRGAlt] -- ^ SRG category name, original category name
+ -- and productions
+ deriving (Eq,Show)
+
+-- | maybe a probability, a rule name and an EBNF right-hand side
+data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
+ deriving (Eq,Show)
+
+type SRGItem = RE SRGSymbol
+
+type SRGSymbol = Symbol SRGNT Token
+
+-- | An SRG non-terminal. Category name and its number in the profile.
+type SRGNT = (Cat, Int)
+
+
+-- | Create a non-left-recursive SRG.
+-- FIXME: the probabilities in the returned
+-- grammar may be meaningless.
+makeSimpleSRG :: PGF
+ -> CId -- ^ Concrete syntax name.
+ -> SRG
+makeSimpleSRG = makeSRG preprocess
+ where
+ preprocess = traceStats "After mergeIdentical"
+ . mergeIdentical
+ . traceStats "After removeLeftRecursion"
+ . removeLeftRecursion
+ . traceStats "After topDownFilter"
+ . topDownFilter
+ . traceStats "After bottomUpFilter"
+ . bottomUpFilter
+ . traceStats "After removeCycles"
+ . removeCycles
+ . traceStats "Inital CFG"
+
+traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
+
+stats g = "Categories: " ++ show (countCats g)
+ ++ " Rules: " ++ show (countRules g)
+
+makeNonRecursiveSRG :: PGF
+ -> CId -- ^ Concrete syntax name.
+ -> SRG
+makeNonRecursiveSRG pgf cnc =
+ SRG { srgName = prCId cnc,
+ srgStartCat = start,
+ srgExternalCats = cfgExternalCats cfg,
+ srgLanguage = getSpeechLanguage pgf cnc,
+ srgRules = rs }
+ where
+ cfg = pgfToCFG pgf cnc
+ MFA start dfas = cfgToMFA cfg
+ rs = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
+ where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
+ dummyCFTerm = CFMeta (mkCId "dummy")
+ dummySRGNT = mapSymbol (\c -> (c,0)) id
+
+makeSRG :: (CFG -> CFG)
+ -> PGF
+ -> CId -- ^ Concrete syntax name.
+ -> SRG
+makeSRG preprocess pgf cnc =
+ SRG { srgName = prCId cnc,
+ srgStartCat = cfgStartCat cfg,
+ srgExternalCats = cfgExternalCats cfg,
+ srgLanguage = getSpeechLanguage pgf cnc,
+ srgRules = rs }
+ where
+ cfg = pgfToCFG pgf cnc
+ (_,cfgRules) = unzip $ allRulesGrouped $ preprocess cfg
+ rs = map cfRulesToSRGRule cfgRules
+
+getSpeechLanguage :: PGF -> CId -> Maybe String
+getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language")
+
+cfRulesToSRGRule :: [CFRule] -> SRGRule
+cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
+ where
+ alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
+ rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
+
+ mkSRGSymbols _ [] = []
+ mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss
+ mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss
+
+allSRGCats :: SRG -> [String]
+allSRGCats SRG { srgRules = rs } = [c | SRGRule c _ <- rs]
+
+--
+-- * Size-optimized EBNF SRGs
+--
+
+srgItem :: [[SRGSymbol]] -> SRGItem
+srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
+-- non-optimizing version:
+--srgItem = unionRE . map seqRE
+
+-- | Merges a list of right-hand sides which all have the same
+-- sequence of non-terminals.
+mergeItems :: [[SRGSymbol]] -> SRGItem
+mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
+
+groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]]
+groupTokens [] = []
+groupTokens (Terminal t:ss) = case groupTokens ss of
+ Terminal ts:ss' -> Terminal (t:ts):ss'
+ ss' -> Terminal [t]:ss'
+groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss
+
+ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol
+ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal)))
+
+--
+-- * Utilities for building and printing SRGs
+--
+
+lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
+lookupFM_ fm k = Map.findWithDefault err k fm
+ where err = error $ "Key not found: " ++ show k
+ ++ "\namong " ++ show (Map.keys fm)
+
+prtS :: Print a => a -> ShowS
+prtS = showString . prt
diff --git a/src-3.0/GF/Speech/SRGS.hs b/src-3.0/GF/Speech/SRGS.hs
new file mode 100644
index 000000000..cfbad42dc
--- /dev/null
+++ b/src-3.0/GF/Speech/SRGS.hs
@@ -0,0 +1,110 @@
+----------------------------------------------------------------------
+-- |
+-- Module : SRGS
+--
+-- Prints an SRGS XML speech recognition grammars.
+----------------------------------------------------------------------
+module GF.Speech.SRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
+
+import GF.Data.Utilities
+import GF.Data.XML
+import GF.Infra.Option
+import GF.Speech.CFG
+import GF.Speech.RegExp
+import GF.Speech.SISR as SISR
+import GF.Speech.SRG
+import PGF (PGF, CId)
+
+import Control.Monad
+import Data.Char (toUpper,toLower)
+import Data.List
+import Data.Maybe
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+
+srgsXmlPrinter :: Maybe SISRFormat
+ -> PGF -> CId -> String
+srgsXmlPrinter sisr pgf cnc = prSrgsXml sisr $ makeSimpleSRG pgf cnc
+
+srgsXmlNonRecursivePrinter :: PGF -> CId -> String
+srgsXmlNonRecursivePrinter pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG pgf cnc
+
+
+prSrgsXml :: Maybe SISRFormat -> SRG -> String
+prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr)
+ where
+ xmlGr = grammar sisr (catFormId (srgStartCat srg)) (srgLanguage srg) $
+ [meta "description"
+ ("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."),
+ meta "generator" "Grammatical Framework"]
+ ++ map ruleToXML (srgRules srg)
+ ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts)
+ where pub | cat `Set.member` srgExternalCats srg = [("scope","public")]
+ | otherwise = []
+ prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
+
+mkProd :: Maybe SISRFormat -> SRGAlt -> XML
+mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf)
+ where x = mkItem sisr n rhs
+ ti = tag sisr (profileInitSISR n)
+ tf = tag sisr (profileFinalSISR n)
+
+mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
+mkItem sisr cn = f
+ where
+ f (REUnion []) = ETag "ruleref" [("special","VOID")]
+ f (REUnion xs)
+ | not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
+ | otherwise = oneOf (map f xs)
+ where (es,nes) = partition isEpsilon xs
+ f (REConcat []) = ETag "ruleref" [("special","NULL")]
+ f (REConcat xs) = Tag "item" [] (map f xs)
+ f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
+ f (RESymbol s) = symItem sisr cn s
+
+symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
+symItem sisr cn (NonTerminal n@(c,_)) =
+ Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
+symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)]
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
+tag Nothing _ = []
+tag (Just fmt) t = case t fmt of
+ [] -> []
+ ts -> [Tag "tag" [] [Data (prSISR ts)]]
+
+catFormId :: String -> String
+catFormId = (++ "_cat")
+
+
+showToken :: Token -> String
+showToken t = t
+
+oneOf :: [XML] -> XML
+oneOf = Tag "one-of" []
+
+grammar :: Maybe SISRFormat
+ -> String -- ^ root
+ -> Maybe String -- ^language
+ -> [XML] -> XML
+grammar sisr root ml =
+ Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
+ ("version","1.0"),
+ ("mode","voice"),
+ ("root",root)]
+ ++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
+ ++ maybe [] (\l -> [("xml:lang", l)]) ml
+
+meta :: String -> String -> XML
+meta n c = ETag "meta" [("name",n),("content",c)]
+
+optimizeSRGS :: XML -> XML
+optimizeSRGS = bottomUpXML f
+ where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
+ f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
+ f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
+ f (Tag "item" as xs) = Tag "item" as (map g xs)
+ where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
+ g x = x
+ f (Tag "one-of" [] [x]) = x
+ f x = x