summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Speech
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2009-12-13 18:50:29 +0000
committerkrasimir <krasimir@chalmers.se>2009-12-13 18:50:29 +0000
commitf85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch)
tree667b886a5e3a4b026a63d4e3597f32497d824761 /src/compiler/GF/Speech
parentd88a865faff59c98fc91556ff8700b10ee5f2df8 (diff)
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/compiler/GF/Speech')
-rw-r--r--src/compiler/GF/Speech/CFG.hs372
-rw-r--r--src/compiler/GF/Speech/CFGToFA.hs244
-rw-r--r--src/compiler/GF/Speech/FiniteState.hs329
-rw-r--r--src/compiler/GF/Speech/GSL.hs95
-rw-r--r--src/compiler/GF/Speech/JSGF.hs113
-rw-r--r--src/compiler/GF/Speech/PGFToCFG.hs116
-rw-r--r--src/compiler/GF/Speech/PrRegExp.hs27
-rw-r--r--src/compiler/GF/Speech/RegExp.hs144
-rw-r--r--src/compiler/GF/Speech/SISR.hs77
-rw-r--r--src/compiler/GF/Speech/SLF.hs178
-rw-r--r--src/compiler/GF/Speech/SRG.hs205
-rw-r--r--src/compiler/GF/Speech/SRGS_ABNF.hs127
-rw-r--r--src/compiler/GF/Speech/SRGS_XML.hs105
-rw-r--r--src/compiler/GF/Speech/VoiceXML.hs243
14 files changed, 2375 insertions, 0 deletions
diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Speech/CFG.hs
new file mode 100644
index 000000000..9ec8416c5
--- /dev/null
+++ b/src/compiler/GF/Speech/CFG.hs
@@ -0,0 +1,372 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.CFG
+--
+-- Context-free grammar representation and manipulation.
+----------------------------------------------------------------------
+module GF.Speech.CFG where
+
+import GF.Data.Utilities
+import PGF.CId
+import GF.Infra.Option
+import GF.Data.Relation
+
+import Control.Monad
+import Control.Monad.State (State, get, put, evalState)
+import qualified Data.ByteString.Char8 as BS
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.List
+import Data.Maybe (fromMaybe)
+import Data.Monoid (mconcat)
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+--
+-- * Types
+--
+
+type Cat = String
+type Token = String
+
+data Symbol c t = NonTerminal c | Terminal t
+ deriving (Eq, Ord, Show)
+
+type CFSymbol = Symbol Cat Token
+
+data CFRule = CFRule {
+ lhsCat :: Cat,
+ ruleRhs :: [CFSymbol],
+ ruleName :: CFTerm
+ }
+ deriving (Eq, Ord, Show)
+
+data CFTerm
+ = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
+ | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
+ | CFApp CFTerm CFTerm -- ^ Application
+ | CFRes Int -- ^ The result of the n:th (0-based) non-terminal
+ | CFVar Int -- ^ A lambda-bound variable
+ | CFMeta CId -- ^ A metavariable
+ deriving (Eq, Ord, Show)
+
+data CFG = CFG { cfgStartCat :: Cat,
+ cfgExternalCats :: Set Cat,
+ cfgRules :: Map Cat (Set CFRule) }
+ deriving (Eq, Ord, Show)
+
+--
+-- * Grammar filtering
+--
+
+-- | Removes all directly and indirectly cyclic productions.
+-- FIXME: this may be too aggressive, only one production
+-- needs to be removed to break a given cycle. But which
+-- one should we pick?
+-- FIXME: Does not (yet) remove productions which are cyclic
+-- because of empty productions.
+removeCycles :: CFG -> CFG
+removeCycles = onRules f
+ where f rs = filter (not . isCycle) rs
+ where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
+ isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
+ isCycle _ = False
+
+-- | Better bottom-up filter that also removes categories which contain no finite
+-- strings.
+bottomUpFilter :: CFG -> CFG
+bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
+ where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
+ okSym g = symbol (`elem` allCats g) (const True)
+
+-- | Removes categories which are not reachable from any external category.
+topDownFilter :: CFG -> CFG
+topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg
+ where
+ rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
+ uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
+ keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg
+
+-- | Merges categories with identical right-hand-sides.
+-- FIXME: handle probabilities
+mergeIdentical :: CFG -> CFG
+mergeIdentical g = onRules (map subst) g
+ where
+ -- maps categories to their replacement
+ m = Map.fromList [(y,concat (intersperse "+" xs))
+ | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
+ -- build data to compare for each category: a set of name,rhs pairs
+ rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
+ subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
+ substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
+
+-- | Keeps only the start category as an external category.
+purgeExternalCats :: CFG -> CFG
+purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) }
+
+--
+-- * Removing left recursion
+--
+
+-- The LC_LR algorithm from
+-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
+removeLeftRecursion :: CFG -> CFG
+removeLeftRecursion gr
+ = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
+ where
+ scheme1 = [CFRule a [x,NonTerminal a_x] n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ not (isLeftRecursive x),
+ let a_x = mkCat (NonTerminal a) x,
+ -- this is an extension of LC_LR to avoid generating
+ -- A-X categories for which there are no productions:
+ a_x `Set.member` newCats,
+ let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
+ (\_ -> CFRes 0) x]
+ scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
+ a <- retainedLeftRecursive,
+ b@(NonTerminal b') <- properLeftCornersOf a,
+ isLeftRecursive b,
+ CFRule _ (x:beta) n <- catRules gr b',
+ let a_x = mkCat (NonTerminal a) x,
+ let a_b = mkCat (NonTerminal a) b,
+ let i = length $ filterCats beta,
+ let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
+ (\_ -> CFApp (CFRes i) n) x]
+ scheme3 = [CFRule a_x beta n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ CFRule _ (x':beta) n <- catRules gr a,
+ x == x',
+ let a_x = mkCat (NonTerminal a) x,
+ let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
+ (\_ -> n) x]
+ scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
+
+ newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
+
+ shiftTerm :: CFTerm -> CFTerm
+ shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
+ shiftTerm (CFRes 0) = CFVar 1
+ shiftTerm (CFRes n) = CFRes (n-1)
+ shiftTerm t = t
+ -- note: the rest don't occur in the original grammar
+
+ cats = allCats gr
+ rules = allRules gr
+
+ directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
+ leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
+ properLeftCorner = transitiveClosure directLeftCorner
+ properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
+ isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
+
+ leftRecursive = reflexiveElements properLeftCorner
+ isLeftRecursive = (`Set.member` leftRecursive)
+
+ retained = cfgStartCat gr `Set.insert`
+ Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
+ NonTerminal a <- ruleRhs r]
+ isRetained = (`Set.member` retained)
+
+ retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
+
+ mkCat :: CFSymbol -> CFSymbol -> Cat
+ mkCat x y = showSymbol x ++ "-" ++ showSymbol y
+ where showSymbol = symbol id show
+
+-- | Get the sets of mutually recursive non-terminals for a grammar.
+mutRecCats :: Bool -- ^ If true, all categories will be in some set.
+ -- If false, only recursive categories will be included.
+ -> CFG -> [Set Cat]
+mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
+ where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
+ refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
+
+--
+-- * Approximate context-free grammars with regular grammars.
+--
+
+makeSimpleRegular :: CFG -> CFG
+makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
+
+-- Use the transformation algorithm from \"Regular Approximation of Context-free
+-- Grammars through Approximation\", Mohri and Nederhof, 2000
+-- to create an over-generating regular grammar for a context-free
+-- grammar
+makeRegular :: CFG -> CFG
+makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
+ where trSet cs | allXLinear cs rs = rs
+ | otherwise = concatMap handleCat (Set.toList cs)
+ where rs = catSetRules g cs
+ handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
+ ++ concatMap (makeRightLinearRules c) (catRules g c)
+ where c' = newCat c
+ makeRightLinearRules b' (CFRule c ss n) =
+ case ys of
+ [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
+ (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
+ ++ makeRightLinearRules (newCat b) (CFRule c zs n)
+ where (xs,ys) = break (`catElem` cs) ss
+ -- don't add rules on the form A -> A
+ newRule c rhs n | rhs == [NonTerminal c] = []
+ | otherwise = [CFRule c rhs n]
+ newCat c = c ++ "$"
+
+--
+-- * CFG Utilities
+--
+
+mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
+mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
+
+groupProds :: [CFRule] -> Map Cat (Set CFRule)
+groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
+
+-- | Gets all rules in a CFG.
+allRules :: CFG -> [CFRule]
+allRules = concat . map Set.toList . Map.elems . cfgRules
+
+-- | Gets all rules in a CFG, grouped by their LHS categories.
+allRulesGrouped :: CFG -> [(Cat,[CFRule])]
+allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
+
+-- | Gets all categories which have rules.
+allCats :: CFG -> [Cat]
+allCats = Map.keys . cfgRules
+
+-- | Gets all categories which have rules or occur in a RHS.
+allCats' :: CFG -> [Cat]
+allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union`
+ Set.fromList [c | rs <- Map.elems (cfgRules cfg),
+ r <- Set.toList rs,
+ NonTerminal c <- ruleRhs r])
+
+-- | Gets all rules for the given category.
+catRules :: CFG -> Cat -> [CFRule]
+catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
+
+-- | Gets all rules for categories in the given set.
+catSetRules :: CFG -> Set Cat -> [CFRule]
+catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
+
+mapCFGCats :: (Cat -> Cat) -> CFG -> CFG
+mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg))
+ (Set.map f (cfgExternalCats cfg))
+ [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg]
+
+onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
+onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
+
+onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
+onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
+
+-- | Clean up CFG after rules have been removed.
+cleanCFG :: CFG -> CFG
+cleanCFG = onCFG (Map.filter (not . Set.null))
+
+-- | Combine two CFGs.
+unionCFG :: CFG -> CFG -> CFG
+unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
+
+filterCFG :: (CFRule -> Bool) -> CFG -> CFG
+filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
+
+filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
+filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
+
+countCats :: CFG -> Int
+countCats = Map.size . cfgRules . cleanCFG
+
+countRules :: CFG -> Int
+countRules = length . allRules
+
+prCFG :: CFG -> String
+prCFG = prProductions . map prRule . allRules
+ where
+ prRule r = (lhsCat r, unwords (map prSym (ruleRhs r)))
+ prSym = symbol id (\t -> "\""++ t ++"\"")
+
+prProductions :: [(Cat,String)] -> String
+prProductions prods =
+ unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods]
+ where
+ maxLHSWidth = maximum $ 0:(map (length . fst) prods)
+ rpad n s = s ++ replicate (n - length s) ' '
+
+prCFTerm :: CFTerm -> String
+prCFTerm = pr 0
+ where
+ pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")")
+ pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t)
+ pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")")
+ pr _ (CFRes i) = "$" ++ show i
+ pr _ (CFVar i) = "x" ++ show i
+ pr _ (CFMeta c) = "?" ++ showCId c
+ paren 0 x = x
+ paren 1 x = "(" ++ x ++ ")"
+
+--
+-- * CFRule Utilities
+--
+
+ruleFun :: CFRule -> CId
+ruleFun (CFRule _ _ t) = f t
+ where f (CFObj n _) = n
+ f (CFApp _ x) = f x
+ f (CFAbs _ x) = f x
+ f _ = mkCId ""
+
+-- | Check if any of the categories used on the right-hand side
+-- are in the given list of categories.
+anyUsedBy :: [Cat] -> CFRule -> Bool
+anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
+
+mkCFTerm :: String -> CFTerm
+mkCFTerm n = CFObj (mkCId n) []
+
+ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
+ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Set Cat -> [CFRule] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+
+-- | Checks if a context-free rule is right-linear.
+isRightLinear :: Set Cat -- ^ The categories to consider
+ -> CFRule -- ^ The rule to check for right-linearity
+ -> Bool
+isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
+
+-- | Checks if a context-free rule is left-linear.
+isLeftLinear :: Set Cat -- ^ The categories to consider
+ -> CFRule -- ^ The rule to check for left-linearity
+ -> Bool
+isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
+
+
+--
+-- * Symbol utilities
+--
+
+symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
+symbol fc ft (NonTerminal cat) = fc cat
+symbol fc ft (Terminal tok) = ft tok
+
+mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
+mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
+
+filterCats :: [Symbol c t] -> [c]
+filterCats syms = [ cat | NonTerminal cat <- syms ]
+
+filterToks :: [Symbol c t] -> [t]
+filterToks syms = [ tok | Terminal tok <- syms ]
+
+-- | Checks if a symbol is a non-terminal of one of the given categories.
+catElem :: Ord c => Symbol c t -> Set c -> Bool
+catElem s cs = symbol (`Set.member` cs) (const False) s
+
+noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
+noCatsInSet cs = not . any (`catElem` cs)
diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs
new file mode 100644
index 000000000..3045ac842
--- /dev/null
+++ b/src/compiler/GF/Speech/CFGToFA.hs
@@ -0,0 +1,244 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.CFGToFA
+--
+-- Approximates CFGs with finite state networks.
+----------------------------------------------------------------------
+module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
+ MFA(..), cfgToMFA, cfgToFA') where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import PGF.CId
+import PGF.Data
+import GF.Data.Utilities
+import GF.Speech.CFG
+import GF.Speech.PGFToCFG
+import GF.Infra.Ident (Ident)
+
+import GF.Data.Graph
+import GF.Data.Relation
+import GF.Speech.FiniteState
+import GF.Speech.CFG
+
+data Recursivity = RightR | LeftR | NotR
+
+data MutRecSet = MutRecSet {
+ mrCats :: Set Cat,
+ mrNonRecRules :: [CFRule],
+ mrRecRules :: [CFRule],
+ mrRec :: Recursivity
+ }
+
+
+type MutRecSets = Map Cat MutRecSet
+
+--
+-- * Multiple DFA type
+--
+
+data MFA = MFA Cat [(Cat,DFA CFSymbol)]
+
+
+
+cfgToFA :: CFG -> DFA Token
+cfgToFA = minimize . compileAutomaton . makeSimpleRegular
+
+
+--
+-- * Compile strongly regular grammars to NFAs
+--
+
+-- Convert a strongly regular grammar to a finite automaton.
+compileAutomaton :: CFG -> NFA Token
+compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
+ where
+ (fa,s,f) = newFA_
+ ns = mutRecSets g $ mutRecCats False g
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
+make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
+ -> NFA Token -> NFA Token
+make_fa c@(g,ns) q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [Terminal t] -> newTransition q0 q1 (Just t) fa
+ [NonTerminal a] ->
+ case Map.lookup a ns of
+ -- a is recursive
+ Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
+ case mrRec n of
+ -- the set Ni is right-recursive or cyclic
+ RightR ->
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
+ let (xs,NonTerminal d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ -- the set Ni is left-recursive
+ LeftR ->
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
+ ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats ni fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ -- a is not recursive
+ Nothing -> let rs = catRules g a
+ in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
+ where
+ make_fa_ = make_fa c
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
+
+--
+-- * Compile a strongly regular grammar to a DFA with sub-automata
+--
+
+cfgToMFA :: CFG -> MFA
+cfgToMFA = buildMFA . makeSimpleRegular
+
+-- | Build a DFA by building and expanding an MFA
+cfgToFA' :: CFG -> DFA Token
+cfgToFA' = mfaToDFA . cfgToMFA
+
+buildMFA :: CFG -> MFA
+buildMFA g = sortSubLats $ removeUnusedSubLats mfa
+ where fas = compileAutomata g
+ mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
+
+mfaStartDFA :: MFA -> DFA CFSymbol
+mfaStartDFA (MFA start subs) =
+ fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
+
+mfaToDFA :: MFA -> DFA Token
+mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
+ where
+ subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
+ getSub l = fromJust $ Map.lookup l subs'
+ expand (FA (Graph c ns es) s f)
+ = foldl' expandEdge (FA (Graph c ns []) s f) es
+ expandEdge fa (f,t,x) =
+ case x of
+ Nothing -> newTransition f t Nothing fa
+ Just (Terminal s) -> newTransition f t (Just s) fa
+ Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
+
+removeUnusedSubLats :: MFA -> MFA
+removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
+ where
+ usedMap = subLatUseMap mfa
+ used = growUsedSet (Set.singleton start)
+ isUsed c = c `Set.member` used
+ growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
+
+subLatUseMap :: MFA -> Map Cat (Set Cat)
+subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
+
+usedSubLats :: DFA CFSymbol -> Set Cat
+usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
+
+-- | Sort sub-networks topologically.
+sortSubLats :: MFA -> MFA
+sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
+ where
+ usedByMap = revMultiMap (subLatUseMap mfa)
+ sortLats _ [] = []
+ sortLats ub ls = xs ++ sortLats ub' ys
+ where (xs,ys) = partition ((==0) . indeg) ls
+ ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
+ indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
+
+-- | Convert a strongly regular grammar to a number of finite automata,
+-- one for each non-terminal.
+-- The edges in the automata accept tokens, or name another automaton to use.
+compileAutomata :: CFG
+ -> [(Cat,NFA CFSymbol)]
+ -- ^ A map of non-terminals and their automata.
+compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
+ where
+ mrs = mutRecSets g $ mutRecCats True g
+ makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
+ where (fa,s,f) = newFA_
+ mr = fromJust (Map.lookup c mrs)
+
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
+-- adapted to build a finite automaton for a single (mutually recursive) set only.
+-- Categories not in the set will result in category-labelled edges.
+make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
+ -- we are building the automaton.
+ -> State -- ^ State to come from
+ -> [CFSymbol] -- ^ Symbols to accept
+ -> State -- ^ State to end up in
+ -> NFA CFSymbol -- ^ FA to add to.
+ -> NFA CFSymbol
+make_fa1 mr q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
+ [c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
+ [NonTerminal a] ->
+ case mrRec mr of
+ NotR -> -- the set is a non-recursive (always singleton) set of categories
+ -- so the set of category rules is the set of rules for the whole set
+ make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
+ RightR -> -- the set is right-recursive or cyclic
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
+ let (xs,NonTerminal d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ LeftR -> -- the set is left-recursive
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats (mrCats mr) fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fas [(q0,[x],q),(q,beta,q1)] fa'
+ where
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
+
+mutRecSets :: CFG -> [Set Cat] -> MutRecSets
+mutRecSets g = Map.fromList . concatMap mkMutRecSet
+ where
+ mkMutRecSet cs = [ (c,ms) | c <- csl ]
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
+ ms = MutRecSet {
+ mrCats = cs,
+ mrNonRecRules = nrs,
+ mrRecRules = rrs,
+ mrRec = rec
+ }
+ rec | null rrs = NotR
+ | all (isRightLinear cs) rrs = RightR
+ | otherwise = LeftR
+
+--
+-- * Utilities
+--
+
+-- | Add a state for the given NFA for each of the categories
+-- in the given set. Returns a map of categories to their
+-- corresponding states.
+addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
+addStatesForCats cs fa = (fa', m)
+ where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
+ m = Map.fromList (zip (Set.toList cs) (map fst ns))
+
+revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
+revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
diff --git a/src/compiler/GF/Speech/FiniteState.hs b/src/compiler/GF/Speech/FiniteState.hs
new file mode 100644
index 000000000..136d773a2
--- /dev/null
+++ b/src/compiler/GF/Speech/FiniteState.hs
@@ -0,0 +1,329 @@
+----------------------------------------------------------------------
+-- |
+-- Module : FiniteState
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.16 $
+--
+-- A simple finite state network module.
+-----------------------------------------------------------------------------
+module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
+ startState, finalStates,
+ states, transitions,
+ isInternal,
+ newFA, newFA_,
+ addFinalState,
+ newState, newStates,
+ newTransition, newTransitions,
+ insertTransitionWith, insertTransitionsWith,
+ mapStates, mapTransitions,
+ modifyTransitions,
+ nonLoopTransitionsTo, nonLoopTransitionsFrom,
+ loops,
+ removeState,
+ oneFinalState,
+ insertNFA,
+ onGraph,
+ moveLabelsToNodes, removeTrivialEmptyNodes,
+ minimize,
+ dfa2nfa,
+ unusedNames, renameStates,
+ prFAGraphviz, faToGraphviz) where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+import GF.Data.Graph
+import qualified GF.Data.Graphviz as Dot
+
+type State = Int
+
+-- | Type parameters: node id type, state label type, edge label type
+-- Data constructor arguments: nodes and edges, start state, final states
+data FA n a b = FA !(Graph n a b) !n ![n]
+
+type NFA a = FA State () (Maybe a)
+
+type DFA a = FA State () a
+
+
+startState :: FA n a b -> n
+startState (FA _ s _) = s
+
+finalStates :: FA n a b -> [n]
+finalStates (FA _ _ ss) = ss
+
+states :: FA n a b -> [(n,a)]
+states (FA g _ _) = nodes g
+
+transitions :: FA n a b -> [(n,n,b)]
+transitions (FA g _ _) = edges g
+
+newFA :: Enum n => a -- ^ Start node label
+ -> FA n a b
+newFA l = FA g s []
+ where (g,s) = newNode l (newGraph [toEnum 0..])
+
+-- | Create a new finite automaton with an initial and a final state.
+newFA_ :: Enum n => (FA n () b, n, n)
+newFA_ = (fa'', s, f)
+ where fa = newFA ()
+ s = startState fa
+ (fa',f) = newState () fa
+ fa'' = addFinalState f fa'
+
+addFinalState :: n -> FA n a b -> FA n a b
+addFinalState f (FA g s ss) = FA g s (f:ss)
+
+newState :: a -> FA n a b -> (FA n a b, n)
+newState x (FA g s ss) = (FA g' s ss, n)
+ where (g',n) = newNode x g
+
+newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
+newStates xs (FA g s ss) = (FA g' s ss, ns)
+ where (g',ns) = newNodes xs g
+
+newTransition :: n -> n -> b -> FA n a b -> FA n a b
+newTransition f t l = onGraph (newEdge (f,t,l))
+
+newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
+newTransitions es = onGraph (newEdges es)
+
+insertTransitionWith :: Eq n =>
+ (b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
+insertTransitionWith f t = onGraph (insertEdgeWith f t)
+
+insertTransitionsWith :: Eq n =>
+ (b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
+insertTransitionsWith f ts fa =
+ foldl' (flip (insertTransitionWith f)) fa ts
+
+mapStates :: (a -> c) -> FA n a b -> FA n c b
+mapStates f = onGraph (nmap f)
+
+mapTransitions :: (b -> c) -> FA n a b -> FA n a c
+mapTransitions f = onGraph (emap f)
+
+modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
+modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
+
+removeState :: Ord n => n -> FA n a b -> FA n a b
+removeState n = onGraph (removeNode n)
+
+minimize :: Ord a => NFA a -> DFA a
+minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
+
+unusedNames :: FA n a b -> [n]
+unusedNames (FA (Graph names _ _) _ _) = names
+
+-- | Gets all incoming transitions to a given state, excluding
+-- transtions from the state itself.
+nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
+nonLoopTransitionsTo s fa =
+ [(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
+
+nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
+nonLoopTransitionsFrom s fa =
+ [(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
+
+loops :: Eq n => n -> FA n a b -> [b]
+loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
+
+-- | Give new names to all nodes.
+renameStates :: Ord x => [y] -- ^ Infinite supply of new names
+ -> FA x a b
+ -> FA y a b
+renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
+ where (ns,rest) = splitAt (length (nodes g)) supply
+ newNodes = Map.fromList (zip (map fst (nodes g)) ns)
+ newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
+ s' = newName s
+ fs' = map newName fs
+
+-- | Insert an NFA into another
+insertNFA :: NFA a -- ^ NFA to insert into
+ -> (State, State) -- ^ States to insert between
+ -> NFA a -- ^ NFA to insert.
+ -> NFA a
+insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
+ = FA (newEdges es g') s1 fs1
+ where
+ es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
+ (g',ren) = mergeGraphs g1 g2
+
+onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
+onGraph f (FA g s ss) = FA (f g) s ss
+
+
+-- | Make the finite automaton have a single final state
+-- by adding a new final state and adding an edge
+-- from the old final states to the new state.
+oneFinalState :: a -- ^ Label to give the new node
+ -> b -- ^ Label to give the new edges
+ -> FA n a b -- ^ The old network
+ -> FA n a b -- ^ The new network
+oneFinalState nl el fa =
+ let (FA g s fs,nf) = newState nl fa
+ es = [ (f,nf,el) | f <- fs ]
+ in FA (newEdges es g) s [nf]
+
+-- | Transform a standard finite automaton with labelled edges
+-- to one where the labels are on the nodes instead. This can add
+-- up to one extra node per edge.
+moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
+moveLabelsToNodes = onGraph f
+ where f g@(Graph c _ _) = Graph c' ns (concat ess)
+ where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
+ (c',is') = mapAccumL fixIncoming c is
+ (ns,ess) = unzip (concat is')
+
+
+-- | Remove empty nodes which are not start or final, and have
+-- exactly one outgoing edge or exactly one incoming edge.
+removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
+removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
+
+-- | Move edges to empty nodes to point to the next node(s).
+-- This is not done if the pointed-to node is a final node.
+skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
+skipSimpleEmptyNodes fa = onGraph og fa
+ where
+ og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
+ where
+ es' = concatMap changeEdge es
+ info = nodeInfo g
+ changeEdge e@(f,t,())
+ | isNothing (getNodeLabel info t)
+ -- && (i * o <= i + o)
+ && not (isFinal fa t)
+ = [ (f,t',()) | (_,t',()) <- getOutgoing info t]
+ | otherwise = [e]
+-- where i = inDegree info t
+-- o = outDegree info t
+
+isInternal :: Eq n => FA n a b -> n -> Bool
+isInternal (FA _ start final) n = n /= start && n `notElem` final
+
+isFinal :: Eq n => FA n a b -> n -> Bool
+isFinal (FA _ _ final) n = n `elem` final
+
+-- | Remove all internal nodes with no incoming edges
+-- or no outgoing edges.
+pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
+pruneUnusable fa = onGraph f fa
+ where
+ f g = if Set.null rns then g else f (removeNodes rns g)
+ where info = nodeInfo g
+ rns = Set.fromList [ n | (n,_) <- nodes g,
+ isInternal fa n,
+ inDegree info n == 0
+ || outDegree info n == 0]
+
+fixIncoming :: (Ord n, Eq a) => [n]
+ -> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
+ -> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
+ -- incoming edges.
+fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
+ where ls = nub $ map edgeLabel es
+ (cs',cs'') = splitAt (length ls) cs
+ newNodes = zip cs' ls
+ es' = [ (x,n,()) | x <- map fst newNodes ]
+ -- separate cyclic and non-cyclic edges
+ (cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
+ -- keep all incoming non-cyclic edges with the right label
+ to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
+ -- for each cyclic edge with the right label,
+ -- add an edge from each of the new nodes (including this one)
+ ++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
+ newContexts = [ (v, to v) | v <- newNodes ]
+
+alphabet :: Eq b => Graph n a (Maybe b) -> [b]
+alphabet = nub . catMaybes . map edgeLabel . edges
+
+determinize :: Ord a => NFA a -> DFA a
+determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
+ (ns',es') = (Set.toList ns, Set.toList es)
+ final = filter isDFAFinal ns'
+ fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
+ in renameStates [0..] fa
+ where info = nodeInfo g
+-- reach = nodesReachable out
+ start = closure info $ Set.singleton s
+ isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
+ h currentStates oldStates es
+ | Set.null currentStates = (oldStates,es)
+ | otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
+ where
+ allOldStates = oldStates `Set.union` currentStates
+ (newStates,es') = new (Set.toList currentStates) Set.empty es
+ uniqueNewStates = newStates Set.\\ allOldStates
+ -- Get the sets of states reachable from the given states
+ -- by consuming one symbol, and the associated edges.
+ new [] rs es = (rs,es)
+ new (n:ns) rs es = new ns rs' es'
+ where cs = reachable info n --reachable reach n
+ rs' = rs `Set.union` Set.fromList (map snd cs)
+ es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
+
+
+-- | Get all the nodes reachable from a list of nodes by only empty edges.
+closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
+closure info x = closure_ x x
+ where closure_ acc check | Set.null check = acc
+ | otherwise = closure_ acc' check'
+ where
+ reach = Set.fromList [y | x <- Set.toList check,
+ (_,y,Nothing) <- getOutgoing info x]
+ acc' = acc `Set.union` reach
+ check' = reach Set.\\ acc
+
+-- | Get a map of labels to sets of all nodes reachable
+-- from a the set of nodes by one edge with the given
+-- label and then any number of empty edges.
+reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
+reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
+reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
+
+reverseNFA :: NFA a -> NFA a
+reverseNFA (FA g s fs) = FA g''' s' [s]
+ where g' = reverseGraph g
+ (g'',s') = newNode () g'
+ g''' = newEdges [(s',f,Nothing) | f <- fs] g''
+
+dfa2nfa :: DFA a -> NFA a
+dfa2nfa = mapTransitions Just
+
+--
+-- * Visualization
+--
+
+prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
+prFAGraphviz = Dot.prGraphviz . faToGraphviz
+
+prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
+prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
+
+faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
+faToGraphviz (FA (Graph _ ns es) s f)
+ = Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
+ where mkNode (n,l) = Dot.Node (show n) attrs
+ where attrs = [("label",l)]
+ ++ if n == s then [("shape","box")] else []
+ ++ if n `elem` f then [("style","bold")] else []
+ mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
+
+--
+-- * Utilities
+--
+
+lookups :: Ord k => [k] -> Map k a -> [a]
+lookups xs m = mapMaybe (flip Map.lookup m) xs
diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs
new file mode 100644
index 000000000..8f26ea64c
--- /dev/null
+++ b/src/compiler/GF/Speech/GSL.hs
@@ -0,0 +1,95 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.GSL
+--
+-- This module prints a CFG as a Nuance GSL 2.0 grammar.
+--
+-----------------------------------------------------------------------------
+
+module GF.Speech.GSL (gslPrinter) where
+
+import GF.Data.Utilities
+import GF.Speech.CFG
+import GF.Speech.SRG
+import GF.Speech.RegExp
+import GF.Infra.Option
+import GF.Infra.Ident
+import PGF.CId
+import PGF.Data
+
+import Data.Char (toUpper,toLower)
+import Data.List (partition)
+import Text.PrettyPrint.HughesPJ
+
+width :: Int
+width = 75
+
+gslPrinter :: Options -> PGF -> CId -> String
+gslPrinter opts pgf cnc = renderStyle st $ prGSL $ makeNonLeftRecursiveSRG opts pgf cnc
+ where st = style { lineLength = width }
+
+prGSL :: SRG -> Doc
+prGSL srg = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg))
+ where
+ header = text ";GSL2.0" $$
+ comment ("Nuance speech recognition grammar for " ++ srgName srg) $$
+ comment ("Generated by GF")
+ mainCat = text ".MAIN" <+> prCat (srgStartCat srg)
+ prRule (SRGRule cat rhs) = prCat cat <+> union (map prAlt rhs)
+ -- FIXME: use the probability
+ prAlt (SRGAlt mp _ rhs) = prItem rhs
+
+
+prItem :: SRGItem -> Doc
+prItem = f
+ where
+ f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
+ where (es,nes) = partition isEpsilon xs
+ f (REConcat [x]) = f x
+ f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")"
+ f (RERepeat x) = text "*" <> f x
+ f (RESymbol s) = prSymbol s
+
+union :: [Doc] -> Doc
+union [x] = x
+union xs = text "[" <> fsep xs <> text "]"
+
+prSymbol :: Symbol SRGNT Token -> Doc
+prSymbol = symbol (prCat . fst) (doubleQuotes . showToken)
+
+-- GSL requires an upper case letter in category names
+prCat :: Cat -> Doc
+prCat = text . firstToUpper
+
+
+firstToUpper :: String -> String
+firstToUpper [] = []
+firstToUpper (x:xs) = toUpper x : xs
+
+{-
+rmPunctCFG :: CGrammar -> CGrammar
+rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g]
+
+keepSymbol :: Symbol c Token -> Bool
+keepSymbol (Tok t) = not (all isPunct (prt t))
+keepSymbol _ = True
+-}
+
+-- Nuance does not like upper case characters in tokens
+showToken :: Token -> Doc
+showToken = text . map toLower
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.:;.,?!()[]{}"
+
+comment :: String -> Doc
+comment s = text ";" <+> text s
+
+
+-- Pretty-printing utilities
+
+emptyLine :: Doc
+emptyLine = text ""
+
+($++$) :: Doc -> Doc -> Doc
+x $++$ y = x $$ emptyLine $$ y
diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs
new file mode 100644
index 000000000..2cfeea5f5
--- /dev/null
+++ b/src/compiler/GF/Speech/JSGF.hs
@@ -0,0 +1,113 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.JSGF
+--
+-- This module prints a CFG as a JSGF grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+--
+-- FIXME: convert to UTF-8
+-----------------------------------------------------------------------------
+
+module GF.Speech.JSGF (jsgfPrinter) where
+
+import GF.Data.Utilities
+import GF.Infra.Option
+import GF.Speech.CFG
+import GF.Speech.RegExp
+import GF.Speech.SISR
+import GF.Speech.SRG
+import PGF.CId
+import PGF.Data
+
+import Data.Char
+import Data.List
+import Data.Maybe
+import Text.PrettyPrint.HughesPJ
+import Debug.Trace
+
+width :: Int
+width = 75
+
+jsgfPrinter :: Options
+ -> PGF
+ -> CId -> String
+jsgfPrinter opts pgf cnc = renderStyle st $ prJSGF sisr $ makeNonLeftRecursiveSRG opts pgf cnc
+ where st = style { lineLength = width }
+ sisr = flag optSISR opts
+
+prJSGF :: Maybe SISRFormat -> SRG -> Doc
+prJSGF sisr srg
+ = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg))
+ where
+ header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$
+ comment ("JSGF speech recognition grammar for " ++ srgName srg) $$
+ comment "Generated by GF" $$
+ text ("grammar " ++ srgName srg ++ ";")
+ lang = maybe empty text (srgLanguage srg)
+ mainCat = rule True "MAIN" [prCat (srgStartCat srg)]
+ prRule (SRGRule cat rhs) = rule (isExternalCat srg cat) cat (map prAlt rhs)
+ prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
+ where initTag | isEmpty t = empty
+ | otherwise = text "<NULL>" <+> t
+ where t = tag sisr (profileInitSISR n)
+ finalTag = tag sisr (profileFinalSISR n)
+ p = if isEmpty initTag && isEmpty finalTag then id else parens
+
+prCat :: Cat -> Doc
+prCat c = char '<' <> text c <> char '>'
+
+prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
+prItem sisr t = f 0
+ where
+ f _ (REUnion []) = text "<VOID>"
+ f p (REUnion xs)
+ | not (null es) = brackets (f 0 (REUnion nes))
+ | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
+ where (es,nes) = partition isEpsilon xs
+ f _ (REConcat []) = text "<NULL>"
+ f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
+ f p (RERepeat x) = f 3 x <> char '*'
+ f _ (RESymbol s) = prSymbol sisr t s
+
+prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc
+prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
+prSymbol _ cn (Terminal t) | all isPunct t = empty -- removes punctuation
+ | otherwise = text t -- FIXME: quote if there is whitespace or odd chars
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
+tag Nothing _ = empty
+tag (Just fmt) t = case t fmt of
+ [] -> empty
+ ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
+ where e [] = []
+ e ('}':xs) = '\\':'}':e xs
+ e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
+ e (x:xs) = x:e xs
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!"
+
+comment :: String -> Doc
+comment s = text "//" <+> text s
+
+alts :: [Doc] -> Doc
+alts = fsep . prepunctuate (text "| ")
+
+rule :: Bool -> Cat -> [Doc] -> Doc
+rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
+ where p = if pub then text "public" else empty
+
+-- Pretty-printing utilities
+
+emptyLine :: Doc
+emptyLine = text ""
+
+prepunctuate :: Doc -> [Doc] -> [Doc]
+prepunctuate _ [] = []
+prepunctuate p (x:xs) = x : map (p <>) xs
+
+($++$) :: Doc -> Doc -> Doc
+x $++$ y = x $$ emptyLine $$ y
+
diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs
new file mode 100644
index 000000000..d22a4ea8d
--- /dev/null
+++ b/src/compiler/GF/Speech/PGFToCFG.hs
@@ -0,0 +1,116 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.PGFToCFG
+--
+-- Approximates PGF grammars with context-free grammars.
+----------------------------------------------------------------------
+module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
+
+import PGF.CId
+import PGF.Data as PGF
+import PGF.Macros
+import GF.Infra.Ident
+import GF.Speech.CFG
+
+import Data.Array.IArray as Array
+import Data.List
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import Data.Maybe
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+bnfPrinter :: PGF -> CId -> String
+bnfPrinter = toBNF id
+
+toBNF :: (CFG -> CFG) -> PGF -> CId -> String
+toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
+
+
+pgfToCFG :: PGF
+ -> CId -- ^ Concrete syntax name
+ -> CFG
+pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules)
+ where
+ pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
+
+ rules :: [(FCat,Production)]
+ rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
+ , prod <- Set.toList set]
+
+ fcatCats :: Map FCat Cat
+ fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
+ | (c,fcs) <- Map.toList (startCats pinfo),
+ (fc,i) <- zip fcs [1..]]
+
+ fcatCat :: FCat -> Cat
+ fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
+
+ fcatToCat :: FCat -> FIndex -> Cat
+ fcatToCat c l = fcatCat c ++ row
+ where row = if catLinArity c == 1 then "" else "_" ++ show l
+
+ -- gets the number of fields in the lincat for the given category
+ catLinArity :: FCat -> Int
+ catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
+
+ topdownRules cat = f cat []
+ where
+ f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
+
+ g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
+ g (FCoerce cat) rules = f cat rules
+
+
+ extCats :: Set Cat
+ extCats = Set.fromList $ map lhsCat startRules
+
+ startRules :: [CFRule]
+ startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
+ | (c,fcs) <- Map.toList (startCats pinfo),
+ fc <- fcs, not (isLiteralFCat fc),
+ r <- [0..catLinArity fc-1]]
+
+ fruleToCFRule :: (FCat,Production) -> [CFRule]
+ fruleToCFRule (c,FApply funid args) =
+ [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
+ | (l,seqid) <- Array.assocs rhs
+ , let row = sequences pinfo ! seqid
+ , not (containsLiterals row)]
+ where
+ FFun f ps rhs = functions pinfo ! funid
+
+ mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
+ mkRhs = concatMap fsymbolToSymbol . Array.elems
+
+ containsLiterals :: Array FPointPos FSymbol -> Bool
+ containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] ||
+ not (null [n | FSymLit n _ <- Array.elems row]) -- only this is needed for PMCFG.
+ -- The first line is for backward compat.
+
+ fsymbolToSymbol :: FSymbol -> [CFSymbol]
+ fsymbolToSymbol (FSymCat n l) = [NonTerminal (fcatToCat (args!!n) l)]
+ fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
+ fsymbolToSymbol (FSymKS ts) = map Terminal ts
+
+ fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
+ fixProfile row = concatMap positions
+ where
+ nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
+ positions i = [k | (k,j) <- nts, j == i]
+
+ getPos (FSymCat j _) = [j]
+ getPos (FSymLit j _) = [j]
+ getPos _ = []
+
+ profilesToTerm :: [Profile] -> CFTerm
+ profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
+ where (argTypes,_) = catSkeleton $ lookType pgf f
+
+ profileToTerm :: CId -> Profile -> CFTerm
+ profileToTerm t [] = CFMeta t
+ profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
+ fruleToCFRule (c,FCoerce c') =
+ [CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
+ | l <- [0..catLinArity c-1]]
diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs
new file mode 100644
index 000000000..0fc35d541
--- /dev/null
+++ b/src/compiler/GF/Speech/PrRegExp.hs
@@ -0,0 +1,27 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.PrRegExp
+--
+-- This module prints a grammar as a regular expression.
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
+
+import GF.Speech.CFG
+import GF.Speech.CFGToFA
+import GF.Speech.PGFToCFG
+import GF.Speech.RegExp
+import PGF
+
+regexpPrinter :: PGF -> CId -> String
+regexpPrinter pgf cnc = (++"\n") $ prRE id $ dfa2re $ cfgToFA $ pgfToCFG pgf cnc
+
+multiRegexpPrinter :: PGF -> CId -> String
+multiRegexpPrinter pgf cnc = prREs $ mfa2res $ cfgToMFA $ pgfToCFG pgf cnc
+
+prREs :: [(String,RE CFSymbol)] -> String
+prREs res = unlines [l ++ " = " ++ prRE id (mapRE showLabel re) | (l,re) <- res]
+ where showLabel = symbol (\l -> "<" ++ l ++ ">") id
+
+mfa2res :: MFA -> [(String,RE CFSymbol)]
+mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas]
diff --git a/src/compiler/GF/Speech/RegExp.hs b/src/compiler/GF/Speech/RegExp.hs
new file mode 100644
index 000000000..2592b3d57
--- /dev/null
+++ b/src/compiler/GF/Speech/RegExp.hs
@@ -0,0 +1,144 @@
+module GF.Speech.RegExp (RE(..),
+ epsilonRE, nullRE,
+ isEpsilon, isNull,
+ unionRE, concatRE, seqRE,
+ repeatRE, minimizeRE,
+ mapRE, mapRE', joinRE,
+ symbolsRE,
+ dfa2re, prRE) where
+
+import Data.List
+
+import GF.Data.Utilities
+import GF.Speech.FiniteState
+
+data RE a =
+ REUnion [RE a] -- ^ REUnion [] is null
+ | REConcat [RE a] -- ^ REConcat [] is epsilon
+ | RERepeat (RE a)
+ | RESymbol a
+ deriving (Eq,Ord,Show)
+
+
+dfa2re :: (Ord a) => DFA a -> RE a
+dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
+ . oneFinalState () epsilonRE . mapTransitions RESymbol
+ where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
+ merge es = [(f,t,unionRE ls)
+ | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
+
+elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
+elimStates fa =
+ case [s | (s,_) <- states fa, isInternal fa s] of
+ [] -> fa
+ sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
+ where sAs = nonLoopTransitionsTo sE fa
+ sBs = nonLoopTransitionsFrom sE fa
+ r2 = unionRE $ loops sE fa
+ ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
+ r r1 r3 = concatRE [r1, repeatRE r2, r3]
+
+epsilonRE :: RE a
+epsilonRE = REConcat []
+
+nullRE :: RE a
+nullRE = REUnion []
+
+isNull :: RE a -> Bool
+isNull (REUnion []) = True
+isNull _ = False
+
+isEpsilon :: RE a -> Bool
+isEpsilon (REConcat []) = True
+isEpsilon _ = False
+
+unionRE :: Ord a => [RE a] -> RE a
+unionRE = unionOrId . sortNub . concatMap toList
+ where
+ toList (REUnion xs) = xs
+ toList x = [x]
+ unionOrId [r] = r
+ unionOrId rs = REUnion rs
+
+concatRE :: [RE a] -> RE a
+concatRE xs | any isNull xs = nullRE
+ | otherwise = case concatMap toList xs of
+ [r] -> r
+ rs -> REConcat rs
+ where
+ toList (REConcat xs) = xs
+ toList x = [x]
+
+seqRE :: [a] -> RE a
+seqRE = concatRE . map RESymbol
+
+repeatRE :: RE a -> RE a
+repeatRE x | isNull x || isEpsilon x = epsilonRE
+ | otherwise = RERepeat x
+
+finalRE :: Ord a => DFA (RE a) -> RE a
+finalRE fa = concatRE [repeatRE r1, r2,
+ repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
+ where
+ s0 = startState fa
+ [sF] = finalStates fa
+ r1 = unionRE $ loops s0 fa
+ r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
+ r3 = unionRE $ loops sF fa
+ r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
+
+reverseRE :: RE a -> RE a
+reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
+reverseRE (REUnion xs) = REUnion (map reverseRE xs)
+reverseRE (RERepeat x) = RERepeat (reverseRE x)
+reverseRE x = x
+
+minimizeRE :: Ord a => RE a -> RE a
+minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
+
+mergeForward :: Ord a => RE a -> RE a
+mergeForward (REUnion xs) =
+ unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
+mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
+mergeForward (RERepeat r) = repeatRE (mergeForward r)
+mergeForward r = r
+
+firstRE :: RE a -> (RE a, RE a)
+firstRE (REConcat (x:xs)) = (x, REConcat xs)
+firstRE r = (r,epsilonRE)
+
+mapRE :: (a -> b) -> RE a -> RE b
+mapRE f = mapRE' (RESymbol . f)
+
+mapRE' :: (a -> RE b) -> RE a -> RE b
+mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
+mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
+mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
+mapRE' f (RESymbol s) = f s
+
+joinRE :: RE (RE a) -> RE a
+joinRE (REConcat xs) = REConcat (map joinRE xs)
+joinRE (REUnion xs) = REUnion (map joinRE xs)
+joinRE (RERepeat xs) = RERepeat (joinRE xs)
+joinRE (RESymbol ss) = ss
+
+symbolsRE :: RE a -> [a]
+symbolsRE (REConcat xs) = concatMap symbolsRE xs
+symbolsRE (REUnion xs) = concatMap symbolsRE xs
+symbolsRE (RERepeat x) = symbolsRE x
+symbolsRE (RESymbol x) = [x]
+
+-- Debugging
+
+prRE :: (a -> String) -> RE a -> String
+prRE = prRE' 0
+
+prRE' :: Int -> (a -> String) -> RE a -> String
+prRE' _ _ (REUnion []) = "<NULL>"
+prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs)))
+prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs))
+prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*"
+prRE' _ f (RESymbol s) = f s
+
+p n m s | n >= m = "(" ++ s ++ ")"
+ | True = s
diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs
new file mode 100644
index 000000000..f966d96b9
--- /dev/null
+++ b/src/compiler/GF/Speech/SISR.hs
@@ -0,0 +1,77 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.SISR
+--
+-- Abstract syntax and pretty printer for SISR,
+-- (Semantic Interpretation for Speech Recognition)
+----------------------------------------------------------------------
+module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
+ topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
+
+import Data.List
+
+import GF.Data.Utilities
+import GF.Infra.Ident
+import GF.Infra.Option (SISRFormat(..))
+import GF.Speech.CFG
+import GF.Speech.SRG (SRGNT)
+import PGF.CId
+
+import qualified GF.JavaScript.AbsJS as JS
+import qualified GF.JavaScript.PrintJS as JS
+
+type SISRTag = [JS.DeclOrExpr]
+
+
+prSISR :: SISRTag -> String
+prSISR = JS.printTree
+
+topCatSISR :: String -> SISRFormat -> SISRTag
+topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
+
+profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
+profileInitSISR t fmt
+ | null (usedArgs t) = []
+ | otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
+
+usedArgs :: CFTerm -> [Int]
+usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
+usedArgs (CFAbs _ x) = usedArgs x
+usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
+usedArgs (CFRes i) = [i]
+usedArgs _ = []
+
+catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
+catSISR t (c,i) fmt
+ | i `elem` usedArgs t = map JS.DExpr
+ [JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
+ | otherwise = []
+
+profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
+profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
+ where
+ f (CFObj n ts) = tree (showCId n) (map f ts)
+ f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
+ f (CFApp x y) = JS.ECall (f x) [f y]
+ f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
+ f (CFVar v) = JS.EVar (var v)
+ f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (showCId typ))]
+
+fmtOut SISR_WD20030401 = JS.EVar (JS.Ident "$")
+fmtOut SISR_1_0 = JS.EVar (JS.Ident "out")
+
+fmtRef SISR_WD20030401 c = JS.EVar (JS.Ident ("$" ++ c))
+fmtRef SISR_1_0 c = field (JS.EVar (JS.Ident "rules")) c
+
+args = JS.Ident "a"
+
+var v = JS.Ident ("x" ++ show v)
+
+field x y = JS.EMember x (JS.Ident y)
+
+ass = JS.EAssign
+
+tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
+
+obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]
+
diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs
new file mode 100644
index 000000000..84633149b
--- /dev/null
+++ b/src/compiler/GF/Speech/SLF.hs
@@ -0,0 +1,178 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.SLF
+--
+-- This module converts a CFG to an SLF finite-state network
+-- for use with the ATK recognizer. The SLF format is described
+-- in the HTK manual, and an example for use in ATK is shown
+-- in the ATK manual.
+--
+-----------------------------------------------------------------------------
+
+module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter,
+ slfSubPrinter,slfSubGraphvizPrinter) where
+
+import GF.Data.Utilities
+import GF.Speech.CFG
+import GF.Speech.FiniteState
+import GF.Speech.CFG
+import GF.Speech.CFGToFA
+import GF.Speech.PGFToCFG
+import qualified GF.Data.Graphviz as Dot
+import PGF
+import PGF.CId
+
+import Control.Monad
+import qualified Control.Monad.State as STM
+import Data.Char (toUpper)
+import Data.List
+import Data.Maybe
+
+data SLFs = SLFs [(String,SLF)] SLF
+
+data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
+
+data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
+ | SLFSubLat { nId :: Int, nLat :: String }
+
+-- | An SLF word is a word, or the empty string.
+type SLFWord = Maybe String
+
+data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
+
+type SLF_FA = FA State (Maybe CFSymbol) ()
+
+mkFAs :: PGF -> CId -> (SLF_FA, [(String,SLF_FA)])
+mkFAs pgf cnc = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
+ where MFA start subs = {- renameSubs $ -} cfgToMFA $ pgfToCFG pgf cnc
+ main = let (fa,s,f) = newFA_ in newTransition s f (NonTerminal start) fa
+
+slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
+slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
+ . moveLabelsToNodes . dfa2nfa
+
+-- | Give sequential names to subnetworks.
+renameSubs :: MFA -> MFA
+renameSubs (MFA start subs) = MFA (newName start) subs'
+ where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
+ newName s = lookup' s newNames
+ subs' = [(newName s,renameLabels n) | (s,n) <- subs]
+ renameLabels = mapTransitions (mapSymbol newName id)
+
+--
+-- * SLF graphviz printing (without sub-networks)
+--
+
+slfGraphvizPrinter :: PGF -> CId -> String
+slfGraphvizPrinter pgf cnc
+ = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc
+ where
+ gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
+
+--
+-- * SLF graphviz printing (with sub-networks)
+--
+
+slfSubGraphvizPrinter :: PGF -> CId -> String
+slfSubGraphvizPrinter pgf cnc = Dot.prGraphviz g
+ where (main, subs) = mkFAs pgf cnc
+ g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
+ ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
+ m = gvSLFFA Nothing main
+
+gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
+gvSLFFA n fa =
+ liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
+ . mapTransitions (const "")) (rename fa)
+ where mfaLabelToGv = symbol ("#"++) id
+ mkCluster Nothing = id
+ mkCluster (Just x)
+ = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
+ rename fa = do
+ names <- STM.get
+ let fa' = renameStates names fa
+ names' = unusedNames fa'
+ STM.put names'
+ return fa'
+
+--
+-- * SLF printing (without sub-networks)
+--
+
+slfPrinter :: PGF -> CId -> String
+slfPrinter pgf cnc
+ = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc
+
+--
+-- * SLF printing (with sub-networks)
+--
+
+-- | Make a network with subnetworks in SLF
+slfSubPrinter :: PGF -> CId -> String
+slfSubPrinter pgf cnc = prSLFs slfs
+ where
+ (main,subs) = mkFAs pgf cnc
+ slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
+ faToSLF = automatonToSLF mfaNodeToSLFNode
+
+automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
+automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
+ where ns = map (uncurry mkNode) (states fa)
+ es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
+
+mfaNodeToSLFNode :: Int -> Maybe CFSymbol -> SLFNode
+mfaNodeToSLFNode i l = case l of
+ Nothing -> mkSLFNode i Nothing
+ Just (Terminal x) -> mkSLFNode i (Just x)
+ Just (NonTerminal s) -> mkSLFSubLat i s
+
+mkSLFNode :: Int -> Maybe String -> SLFNode
+mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
+mkSLFNode i (Just w)
+ | isNonWord w = SLFNode { nId = i,
+ nWord = Nothing,
+ nTag = Just w }
+ | otherwise = SLFNode { nId = i,
+ nWord = Just (map toUpper w),
+ nTag = Just w }
+
+mkSLFSubLat :: Int -> String -> SLFNode
+mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
+
+mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
+mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
+
+prSLFs :: SLFs -> String
+prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
+ where prSub (n,s) = showString "SUBLAT=" . shows n
+ . nl . prOneSLF s . showString "." . nl
+
+prSLF :: SLF -> String
+prSLF slf = prOneSLF slf ""
+
+prOneSLF :: SLF -> ShowS
+prOneSLF (SLF { slfNodes = ns, slfEdges = es})
+ = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
+ where
+ header = prFields [("N",show (length ns)),("L", show (length es))] . nl
+ prNode (SLFNode { nId = i, nWord = w, nTag = t })
+ = prFields $ [("I",show i),("W",showWord w)]
+ ++ maybe [] (\t -> [("s",t)]) t
+ prNode (SLFSubLat { nId = i, nLat = l })
+ = prFields [("I",show i),("L",show l)]
+ prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
+
+-- | Check if a word should not correspond to a word in the SLF file.
+isNonWord :: String -> Bool
+isNonWord = any isPunct
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!()[]{}"
+
+showWord :: SLFWord -> String
+showWord Nothing = "!NULL"
+showWord (Just w) | null w = "!NULL"
+ | otherwise = w
+
+prFields :: [(String,String)] -> ShowS
+prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs
new file mode 100644
index 000000000..2270ec7a1
--- /dev/null
+++ b/src/compiler/GF/Speech/SRG.hs
@@ -0,0 +1,205 @@
+----------------------------------------------------------------------
+-- |
+-- Module : SRG
+--
+-- Representation of, conversion to, and utilities for
+-- printing of a general Speech Recognition Grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+----------------------------------------------------------------------
+module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol
+ , SRGNT, CFTerm
+ , ebnfPrinter
+ , makeNonLeftRecursiveSRG
+ , makeNonRecursiveSRG
+ , getSpeechLanguage
+ , isExternalCat
+ , lookupFM_
+ ) where
+
+import GF.Data.Operations
+import GF.Data.Utilities
+import GF.Infra.Ident
+import GF.Infra.Option
+import GF.Speech.CFG
+import GF.Speech.PGFToCFG
+import GF.Data.Relation
+import GF.Speech.FiniteState
+import GF.Speech.RegExp
+import GF.Speech.CFGToFA
+import GF.Infra.Option
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+
+import Data.List
+import Data.Maybe (fromMaybe, maybeToList)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import Debug.Trace
+
+data SRG = SRG { srgName :: String -- ^ grammar name
+ , srgStartCat :: Cat -- ^ start category name
+ , srgExternalCats :: Set Cat
+ , srgLanguage :: Maybe String -- ^ The language for which the grammar
+ -- is intended, e.g. en-UK
+ , srgRules :: [SRGRule]
+ }
+ deriving (Eq,Show)
+
+data SRGRule = SRGRule Cat [SRGAlt]
+ deriving (Eq,Show)
+
+-- | maybe a probability, a rule name and an EBNF right-hand side
+data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
+ deriving (Eq,Show)
+
+type SRGItem = RE SRGSymbol
+
+type SRGSymbol = Symbol SRGNT Token
+
+-- | An SRG non-terminal. Category name and its number in the profile.
+type SRGNT = (Cat, Int)
+
+ebnfPrinter :: Options -> PGF -> CId -> String
+ebnfPrinter opts pgf cnc = prSRG opts $ makeSRG opts pgf cnc
+
+-- | Create a compact filtered non-left-recursive SRG.
+makeNonLeftRecursiveSRG :: Options -> PGF -> CId -> SRG
+makeNonLeftRecursiveSRG opts = makeSRG opts'
+ where
+ opts' = setDefaultCFGTransform opts CFGNoLR True
+
+makeSRG :: Options -> PGF -> CId -> SRG
+makeSRG opts = mkSRG cfgToSRG preprocess
+ where
+ cfgToSRG cfg = [cfRulesToSRGRule rs | (_,rs) <- allRulesGrouped cfg]
+ preprocess = maybeTransform opts CFGMergeIdentical mergeIdentical
+ . maybeTransform opts CFGNoLR removeLeftRecursion
+ . maybeTransform opts CFGRegular makeRegular
+ . maybeTransform opts CFGTopDownFilter topDownFilter
+ . maybeTransform opts CFGBottomUpFilter bottomUpFilter
+ . maybeTransform opts CFGRemoveCycles removeCycles
+ . maybeTransform opts CFGStartCatOnly purgeExternalCats
+
+setDefaultCFGTransform :: Options -> CFGTransform -> Bool -> Options
+setDefaultCFGTransform opts t b = setCFGTransform t b `addOptions` opts
+
+maybeTransform :: Options -> CFGTransform -> (CFG -> CFG) -> (CFG -> CFG)
+maybeTransform opts t f = if cfgTransform opts t then f else id
+
+traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
+
+stats g = "Categories: " ++ show (countCats g)
+ ++ ", External categories: " ++ show (Set.size (cfgExternalCats g))
+ ++ ", Rules: " ++ show (countRules g)
+
+makeNonRecursiveSRG :: Options
+ -> PGF
+ -> CId -- ^ Concrete syntax name.
+ -> SRG
+makeNonRecursiveSRG opts = mkSRG cfgToSRG id
+ where
+ cfgToSRG cfg = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
+ where
+ MFA _ dfas = cfgToMFA cfg
+ dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
+ dummyCFTerm = CFMeta (mkCId "dummy")
+ dummySRGNT = mapSymbol (\c -> (c,0)) id
+
+mkSRG :: (CFG -> [SRGRule]) -> (CFG -> CFG) -> PGF -> CId -> SRG
+mkSRG mkRules preprocess pgf cnc =
+ SRG { srgName = showCId cnc,
+ srgStartCat = cfgStartCat cfg,
+ srgExternalCats = cfgExternalCats cfg,
+ srgLanguage = getSpeechLanguage pgf cnc,
+ srgRules = mkRules cfg }
+ where cfg = renameCats (showCId cnc) $ preprocess $ pgfToCFG pgf cnc
+
+-- | Renames all external cats C to C_cat, and all internal cats C_X (where X is any string),
+-- to C_N where N is an integer.
+renameCats :: String -> CFG -> CFG
+renameCats prefix cfg = mapCFGCats renameCat cfg
+ where renameCat c | isExternal c = c ++ "_cat"
+ | otherwise = Map.findWithDefault (badCat c) c names
+ isExternal c = c `Set.member` cfgExternalCats cfg
+ catsByPrefix = buildMultiMap [(takeWhile (/='_') cat, cat) | cat <- allCats' cfg, not (isExternal cat)]
+ names = Map.fromList [(c,pref++"_"++show i) | (pref,cs) <- catsByPrefix, (c,i) <- zip cs [1..]]
+ badCat c = error ("GF.Speech.SRG.renameCats: " ++ c ++ "\n" ++ prCFG cfg)
+
+getSpeechLanguage :: PGF -> CId -> Maybe String
+getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language")
+
+cfRulesToSRGRule :: [CFRule] -> SRGRule
+cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
+ where
+ alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
+ rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
+
+ mkSRGSymbols _ [] = []
+ mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss
+ mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss
+
+srgLHSCat :: SRGRule -> Cat
+srgLHSCat (SRGRule c _) = c
+
+isExternalCat :: SRG -> Cat -> Bool
+isExternalCat srg c = c `Set.member` srgExternalCats srg
+
+--
+-- * Size-optimized EBNF SRGs
+--
+
+srgItem :: [[SRGSymbol]] -> SRGItem
+srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
+-- non-optimizing version:
+--srgItem = unionRE . map seqRE
+
+-- | Merges a list of right-hand sides which all have the same
+-- sequence of non-terminals.
+mergeItems :: [[SRGSymbol]] -> SRGItem
+mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
+
+groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]]
+groupTokens [] = []
+groupTokens (Terminal t:ss) = case groupTokens ss of
+ Terminal ts:ss' -> Terminal (t:ts):ss'
+ ss' -> Terminal [t]:ss'
+groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss
+
+ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol
+ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal)))
+
+--
+-- * Utilities for building and printing SRGs
+--
+
+prSRG :: Options -> SRG -> String
+prSRG opts srg = prProductions $ map prRule $ ext ++ int
+ where
+ sisr = flag optSISR opts
+ (ext,int) = partition (isExternalCat srg . srgLHSCat) (srgRules srg)
+ prRule (SRGRule c alts) = (c,unwords (intersperse "|" (concatMap prAlt alts)))
+ prAlt (SRGAlt _ t rhs) =
+ -- FIXME: hack: we high-jack the --sisr flag to add
+ -- a simple lambda calculus format for semantic interpretation
+ -- Maybe the --sisr flag should be renamed.
+ case sisr of
+ Just _ ->
+ -- copy tags to each part of a top-level union,
+ -- to get simpler output
+ case rhs of
+ REUnion xs -> map prOneAlt xs
+ _ -> [prOneAlt rhs]
+ where prOneAlt a = prRE prSym a ++ " { " ++ prCFTerm t ++ " }"
+ Nothing -> [prRE prSym rhs]
+ prSym = symbol fst (\t -> "\""++ t ++"\"")
+
+lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
+lookupFM_ fm k = Map.findWithDefault err k fm
+ where err = error $ "Key not found: " ++ show k
+ ++ "\namong " ++ show (Map.keys fm)
diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs
new file mode 100644
index 000000000..2df1316a8
--- /dev/null
+++ b/src/compiler/GF/Speech/SRGS_ABNF.hs
@@ -0,0 +1,127 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrJSRGS_ABNF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.16 $
+--
+-- This module prints a CFG as a JSGF grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+--
+-- FIXME: convert to UTF-8
+-----------------------------------------------------------------------------
+
+module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
+
+import GF.Data.Utilities
+import GF.Infra.Option
+import GF.Speech.CFG
+import GF.Speech.SISR as SISR
+import GF.Speech.SRG
+import GF.Speech.RegExp
+import PGF (PGF, CId)
+
+import Data.Char
+import Data.List
+import Data.Maybe
+import Text.PrettyPrint.HughesPJ
+import Debug.Trace
+
+width :: Int
+width = 75
+
+srgsAbnfPrinter :: Options
+ -> PGF -> CId -> String
+srgsAbnfPrinter opts pgf cnc = showDoc $ prABNF sisr $ makeNonLeftRecursiveSRG opts pgf cnc
+ where sisr = flag optSISR opts
+
+srgsAbnfNonRecursivePrinter :: Options -> PGF -> CId -> String
+srgsAbnfNonRecursivePrinter opts pgf cnc = showDoc $ prABNF Nothing $ makeNonRecursiveSRG opts pgf cnc
+
+showDoc = renderStyle (style { lineLength = width })
+
+prABNF :: Maybe SISRFormat -> SRG -> Doc
+prABNF sisr srg
+ = header $++$ foldr ($++$) empty (map prRule (srgRules srg))
+ where
+ header = text "#ABNF 1.0 UTF-8;" $$
+ meta "description" ("Speech recognition grammar for " ++ srgName srg) $$
+ meta "generator" "Grammatical Framework" $$
+ language $$ tagFormat $$ mainCat
+ language = maybe empty (\l -> text "language" <+> text l <> char ';') (srgLanguage srg)
+ tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';'
+ | otherwise = empty
+ mainCat = text "root" <+> prCat (srgStartCat srg) <> char ';'
+ prRule (SRGRule cat alts) = rule (isExternalCat srg cat) cat (map prAlt alts)
+ prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
+ where initTag = tag sisr (profileInitSISR n)
+ finalTag = tag sisr (profileFinalSISR n)
+ p = if isEmpty initTag && isEmpty finalTag then id else parens
+
+prCat :: Cat -> Doc
+prCat c = char '$' <> text c
+
+prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
+prItem sisr t = f 0
+ where
+ f _ (REUnion []) = text "$VOID"
+ f p (REUnion xs)
+ | not (null es) = brackets (f 0 (REUnion nes))
+ | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
+ where (es,nes) = partition isEpsilon xs
+ f _ (REConcat []) = text "$NULL"
+ f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
+ f p (RERepeat x) = f 3 x <> text "<0->"
+ f _ (RESymbol s) = prSymbol sisr t s
+
+
+prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc
+prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
+prSymbol _ cn (Terminal t)
+ | all isPunct t = empty -- removes punctuation
+ | otherwise = text t -- FIXME: quote if there is whitespace or odd chars
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
+tag Nothing _ = empty
+tag (Just fmt) t =
+ case t fmt of
+ [] -> empty
+ -- grr, silly SRGS ABNF does not have an escaping mechanism
+ ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}"
+ | otherwise -> text "{" <+> text x <+> text "}"
+ where x = prSISR ts
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!"
+
+comment :: String -> Doc
+comment s = text "//" <+> text s
+
+alts :: [Doc] -> Doc
+alts = fsep . prepunctuate (text "| ")
+
+rule :: Bool -> Cat -> [Doc] -> Doc
+rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
+ where p = if pub then text "public" else empty
+
+meta :: String -> String -> Doc
+meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';'
+
+-- Pretty-printing utilities
+
+emptyLine :: Doc
+emptyLine = text ""
+
+prepunctuate :: Doc -> [Doc] -> [Doc]
+prepunctuate _ [] = []
+prepunctuate p (x:xs) = x : map (p <>) xs
+
+($++$) :: Doc -> Doc -> Doc
+x $++$ y = x $$ emptyLine $$ y
+
diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs
new file mode 100644
index 000000000..1f94de66d
--- /dev/null
+++ b/src/compiler/GF/Speech/SRGS_XML.hs
@@ -0,0 +1,105 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.SRGS_XML
+--
+-- Prints an SRGS XML speech recognition grammars.
+----------------------------------------------------------------------
+module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
+
+import GF.Data.Utilities
+import GF.Data.XML
+import GF.Infra.Option
+import GF.Speech.CFG
+import GF.Speech.RegExp
+import GF.Speech.SISR as SISR
+import GF.Speech.SRG
+import PGF (PGF, CId)
+
+import Control.Monad
+import Data.Char (toUpper,toLower)
+import Data.List
+import Data.Maybe
+import qualified Data.Map as Map
+
+srgsXmlPrinter :: Options
+ -> PGF -> CId -> String
+srgsXmlPrinter opts pgf cnc = prSrgsXml sisr $ makeNonLeftRecursiveSRG opts pgf cnc
+ where sisr = flag optSISR opts
+
+srgsXmlNonRecursivePrinter :: Options -> PGF -> CId -> String
+srgsXmlNonRecursivePrinter opts pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG opts pgf cnc
+
+
+prSrgsXml :: Maybe SISRFormat -> SRG -> String
+prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr)
+ where
+ xmlGr = grammar sisr (srgStartCat srg) (srgLanguage srg) $
+ [meta "description"
+ ("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."),
+ meta "generator" "Grammatical Framework"]
+ ++ map ruleToXML (srgRules srg)
+ ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts)
+ where pub = if isExternalCat srg cat then [("scope","public")] else []
+ prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
+
+mkProd :: Maybe SISRFormat -> SRGAlt -> XML
+mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf)
+ where x = mkItem sisr n rhs
+ ti = tag sisr (profileInitSISR n)
+ tf = tag sisr (profileFinalSISR n)
+
+mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
+mkItem sisr cn = f
+ where
+ f (REUnion []) = ETag "ruleref" [("special","VOID")]
+ f (REUnion xs)
+ | not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
+ | otherwise = oneOf (map f xs)
+ where (es,nes) = partition isEpsilon xs
+ f (REConcat []) = ETag "ruleref" [("special","NULL")]
+ f (REConcat xs) = Tag "item" [] (map f xs)
+ f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
+ f (RESymbol s) = symItem sisr cn s
+
+symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
+symItem sisr cn (NonTerminal n@(c,_)) =
+ Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
+symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)]
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
+tag Nothing _ = []
+tag (Just fmt) t = case t fmt of
+ [] -> []
+ ts -> [Tag "tag" [] [Data (prSISR ts)]]
+
+showToken :: Token -> String
+showToken t = t
+
+oneOf :: [XML] -> XML
+oneOf = Tag "one-of" []
+
+grammar :: Maybe SISRFormat
+ -> String -- ^ root
+ -> Maybe String -- ^language
+ -> [XML] -> XML
+grammar sisr root ml =
+ Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
+ ("version","1.0"),
+ ("mode","voice"),
+ ("root",root)]
+ ++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
+ ++ maybe [] (\l -> [("xml:lang", l)]) ml
+
+meta :: String -> String -> XML
+meta n c = ETag "meta" [("name",n),("content",c)]
+
+optimizeSRGS :: XML -> XML
+optimizeSRGS = bottomUpXML f
+ where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
+ f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
+ f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
+ f (Tag "item" as xs) = Tag "item" as (map g xs)
+ where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
+ g x = x
+ f (Tag "one-of" [] [x]) = x
+ f x = x
diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs
new file mode 100644
index 000000000..134964062
--- /dev/null
+++ b/src/compiler/GF/Speech/VoiceXML.hs
@@ -0,0 +1,243 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.VoiceXML
+--
+-- Creates VoiceXML dialogue systems from PGF grammars.
+-----------------------------------------------------------------------------
+module GF.Speech.VoiceXML (grammar2vxml) where
+
+import GF.Data.Operations
+import GF.Data.Str (sstrV)
+import GF.Data.Utilities
+import GF.Data.XML
+import GF.Infra.Ident
+import GF.Infra.Modules
+import GF.Speech.SRG (getSpeechLanguage)
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import PGF.Linearize (realize)
+
+import Control.Monad (liftM)
+import Data.List (isPrefixOf, find, intersperse)
+import qualified Data.Map as Map
+import Data.Maybe (fromMaybe)
+
+import Debug.Trace
+
+-- | the main function
+grammar2vxml :: PGF -> CId -> String
+grammar2vxml pgf cnc = showsXMLDoc (skel2vxml name language start skel qs) ""
+ where skel = pgfSkeleton pgf
+ name = showCId cnc
+ qs = catQuestions pgf cnc (map fst skel)
+ language = getSpeechLanguage pgf cnc
+ start = lookStartCat pgf
+
+--
+-- * VSkeleton: a simple description of the abstract syntax.
+--
+
+type Skeleton = [(CId, [(CId, [CId])])]
+
+pgfSkeleton :: PGF -> Skeleton
+pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType pgf f))) | f <- fs])
+ | (c,fs) <- Map.toList (catfuns (abstract pgf)),
+ not (isLiteralCat c)]
+
+--
+-- * Questions to ask
+--
+
+type CatQuestions = [(CId,String)]
+
+catQuestions :: PGF -> CId -> [CId] -> CatQuestions
+catQuestions pgf cnc cats = [(c,catQuestion pgf cnc c) | c <- cats]
+
+catQuestion :: PGF -> CId -> CId -> String
+catQuestion pgf cnc cat = realize (lookPrintName pgf cnc cat)
+
+
+{-
+lin :: StateGrammar -> String -> Err String
+lin gr fun = do
+ tree <- string2treeErr gr fun
+ let ls = map unt $ linTree2strings noMark g c tree
+ case ls of
+ [] -> fail $ "No linearization of " ++ fun
+ l:_ -> return l
+ where c = cncId gr
+ g = stateGrammarST gr
+ unt = formatAsText
+-}
+
+getCatQuestion :: CId -> CatQuestions -> String
+getCatQuestion c qs =
+ fromMaybe (error "No question for category " ++ showCId c) (lookup c qs)
+
+--
+-- * Generate VoiceXML
+--
+
+skel2vxml :: String -> Maybe String -> CId -> Skeleton -> CatQuestions -> XML
+skel2vxml name language start skel qs =
+ vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel)
+ where
+ gr = grammarURI name
+ startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)]
+ [param "old" "{ name : '?' }"]]
+
+grammarURI :: String -> String
+grammarURI name = name ++ ".grxml"
+
+
+catForms :: String -> CatQuestions -> CId -> [(CId, [CId])] -> [XML]
+catForms gr qs cat fs =
+ comments [showCId cat ++ " category."]
+ ++ [cat2form gr qs cat fs]
+
+cat2form :: String -> CatQuestions -> CId -> [(CId, [CId])] -> XML
+cat2form gr qs cat fs =
+ form (catFormId cat) $
+ [var "old" Nothing,
+ blockCond "old.name != '?'" [assign "term" "old"],
+ field "term" []
+ [promptString (getCatQuestion cat qs),
+ vxmlGrammar (gr++"#"++catFormId cat)
+ ]
+ ]
+ ++ concatMap (uncurry (fun2sub gr cat)) fs
+ ++ [block [return_ ["term"]{-]-}]]
+
+fun2sub :: String -> CId -> CId -> [CId] -> [XML]
+fun2sub gr cat fun args =
+ comments [showCId fun ++ " : ("
+ ++ concat (intersperse ", " (map showCId args))
+ ++ ") " ++ showCId cat] ++ ss
+ where
+ ss = zipWith mkSub [0..] args
+ mkSub n t = subdialog s [("src","#"++catFormId t),
+ ("cond","term.name == "++string (showCId fun))]
+ [param "old" v,
+ filled [] [assign v (s++".term")]]
+ where s = showCId fun ++ "_" ++ show n
+ v = "term.args["++show n++"]"
+
+catFormId :: CId -> String
+catFormId c = showCId c ++ "_cat"
+
+
+--
+-- * VoiceXML stuff
+--
+
+vxml :: Maybe String -> [XML] -> XML
+vxml ml = Tag "vxml" $ [("version","2.0"),
+ ("xmlns","http://www.w3.org/2001/vxml")]
+ ++ maybe [] (\l -> [("xml:lang", l)]) ml
+
+form :: String -> [XML] -> XML
+form id xs = Tag "form" [("id", id)] xs
+
+field :: String -> [(String,String)] -> [XML] -> XML
+field name attrs = Tag "field" ([("name",name)]++attrs)
+
+subdialog :: String -> [(String,String)] -> [XML] -> XML
+subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs)
+
+filled :: [(String,String)] -> [XML] -> XML
+filled = Tag "filled"
+
+vxmlGrammar :: String -> XML
+vxmlGrammar uri = ETag "grammar" [("src",uri)]
+
+prompt :: [XML] -> XML
+prompt = Tag "prompt" []
+
+promptString :: String -> XML
+promptString p = prompt [Data p]
+
+reprompt :: XML
+reprompt = ETag "reprompt" []
+
+assign :: String -> String -> XML
+assign n e = ETag "assign" [("name",n),("expr",e)]
+
+value :: String -> XML
+value expr = ETag "value" [("expr",expr)]
+
+if_ :: String -> [XML] -> XML
+if_ c b = if_else c b []
+
+if_else :: String -> [XML] -> [XML] -> XML
+if_else c t f = cond [(c,t)] f
+
+cond :: [(String,[XML])] -> [XML] -> XML
+cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es)
+ where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest]
+ ++ if null els then [] else (Tag "else" [] []:els)
+
+goto_item :: String -> XML
+goto_item nextitem = ETag "goto" [("nextitem",nextitem)]
+
+return_ :: [String] -> XML
+return_ names = ETag "return" [("namelist", unwords names)]
+
+block :: [XML] -> XML
+block = Tag "block" []
+
+blockCond :: String -> [XML] -> XML
+blockCond cond = Tag "block" [("cond", cond)]
+
+throw :: String -> String -> XML
+throw event msg = Tag "throw" [("event",event),("message",msg)] []
+
+nomatch :: [XML] -> XML
+nomatch = Tag "nomatch" []
+
+help :: [XML] -> XML
+help = Tag "help" []
+
+param :: String -> String -> XML
+param name expr = ETag "param" [("name",name),("expr",expr)]
+
+var :: String -> Maybe String -> XML
+var name expr = ETag "var" ([("name",name)]++e)
+ where e = maybe [] ((:[]) . (,) "expr") expr
+
+script :: String -> XML
+script s = Tag "script" [] [CData s]
+
+scriptURI :: String -> XML
+scriptURI uri = Tag "script" [("uri", uri)] []
+
+--
+-- * ECMAScript stuff
+--
+
+string :: String -> String
+string s = "'" ++ concatMap esc s ++ "'"
+ where esc '\'' = "\\'"
+ esc c = [c]
+
+{-
+--
+-- * List stuff
+--
+
+isListCat :: (CId, [(CId, [CId])]) -> Bool
+isListCat (cat,rules) = "List" `isPrefixOf` showIdent cat && length rules == 2
+ && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs
+ where c = drop 4 (showIdent cat)
+ fs = map (showIdent . fst) rules
+
+isBaseFun :: CId -> Bool
+isBaseFun f = "Base" `isPrefixOf` showIdent f
+
+isConsFun :: CId -> Bool
+isConsFun f = "Cons" `isPrefixOf` showIdent f
+
+baseSize :: (CId, [(CId, [CId])]) -> Int
+baseSize (_,rules) = length bs
+ where Just (_,bs) = find (isBaseFun . fst) rules
+-}