summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Speech
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Speech
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Speech')
-rw-r--r--src-3.0/GF/Speech/CFGToFiniteState.hs265
-rw-r--r--src-3.0/GF/Speech/FiniteState.hs329
-rw-r--r--src-3.0/GF/Speech/GrammarToVoiceXML.hs285
-rw-r--r--src-3.0/GF/Speech/Graph.hs178
-rw-r--r--src-3.0/GF/Speech/PrFA.hs56
-rw-r--r--src-3.0/GF/Speech/PrGSL.hs113
-rw-r--r--src-3.0/GF/Speech/PrJSGF.hs145
-rw-r--r--src-3.0/GF/Speech/PrRegExp.hs33
-rw-r--r--src-3.0/GF/Speech/PrSLF.hs190
-rw-r--r--src-3.0/GF/Speech/PrSRGS.hs153
-rw-r--r--src-3.0/GF/Speech/PrSRGS_ABNF.hs147
-rw-r--r--src-3.0/GF/Speech/RegExp.hs143
-rw-r--r--src-3.0/GF/Speech/Relation.hs130
-rw-r--r--src-3.0/GF/Speech/RelationQC.hs39
-rw-r--r--src-3.0/GF/Speech/SISR.hs87
-rw-r--r--src-3.0/GF/Speech/SRG.hs235
-rw-r--r--src-3.0/GF/Speech/TransformCFG.hs378
17 files changed, 2906 insertions, 0 deletions
diff --git a/src-3.0/GF/Speech/CFGToFiniteState.hs b/src-3.0/GF/Speech/CFGToFiniteState.hs
new file mode 100644
index 000000000..7e6f80ba1
--- /dev/null
+++ b/src-3.0/GF/Speech/CFGToFiniteState.hs
@@ -0,0 +1,265 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CFGToFiniteState
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.6 $
+--
+-- Approximates CFGs with finite state networks.
+-----------------------------------------------------------------------------
+
+module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
+ MFA(..), MFALabel, cfgToMFA,cfgToFA') where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
+import GF.Conversion.Types
+import GF.Infra.Ident (Ident)
+import GF.Infra.Option (Options)
+import GF.Compile.ShellState (StateGrammar)
+
+import GF.Speech.FiniteState
+import GF.Speech.Graph
+import GF.Speech.Relation
+import GF.Speech.TransformCFG
+
+data Recursivity = RightR | LeftR | NotR
+
+data MutRecSet = MutRecSet {
+ mrCats :: Set Cat_,
+ mrNonRecRules :: [CFRule_],
+ mrRecRules :: [CFRule_],
+ mrRec :: Recursivity
+ }
+
+
+type MutRecSets = Map Cat_ MutRecSet
+
+--
+-- * Multiple DFA type
+--
+
+type MFALabel a = Symbol String a
+
+data MFA a = MFA String [(String,DFA (MFALabel a))]
+
+
+
+cfgToFA :: Options -> StateGrammar -> DFA Token
+cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s
+ where start = getStartCatCF opts s
+
+makeSimpleRegular :: Options -> StateGrammar -> CFRules
+makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s
+ where start = getStartCatCF opts s
+ preprocess = topDownFilter start . bottomUpFilter
+ . removeCycles
+
+
+--
+-- * Compile strongly regular grammars to NFAs
+--
+
+-- Convert a strongly regular grammar to a finite automaton.
+compileAutomaton :: Cat_ -- ^ Start category
+ -> CFRules
+ -> NFA Token
+compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
+ where
+ (fa,s,f) = newFA_
+ ns = mutRecSets g $ mutRecCats False g
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
+make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
+ -> NFA Token -> NFA Token
+make_fa c@(g,ns) q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [Tok t] -> newTransition q0 q1 (Just t) fa
+ [Cat a] -> case Map.lookup a ns of
+ -- a is recursive
+ Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
+ case mrRec n of
+ RightR ->
+ -- the set Ni is right-recursive or cyclic
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
+ let (xs,Cat d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ LeftR ->
+ -- the set Ni is left-recursive
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
+ ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats ni fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ -- a is not recursive
+ Nothing -> let rs = catRules g a
+ in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
+ where
+ make_fa_ = make_fa c
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
+
+--
+-- * Compile a strongly regular grammar to a DFA with sub-automata
+--
+
+cfgToMFA :: Options -> StateGrammar -> MFA Token
+cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s
+ where start = getStartCatCF opts s
+
+-- | Build a DFA by building and expanding an MFA
+cfgToFA' :: Options -> StateGrammar -> DFA Token
+cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s
+
+buildMFA :: Cat_ -- ^ Start category
+ -> CFRules -> MFA Token
+buildMFA start g = sortSubLats $ removeUnusedSubLats mfa
+ where fas = compileAutomata g
+ mfa = MFA start [(c, minimize fa) | (c,fa) <- fas]
+
+mfaStartDFA :: MFA a -> DFA (MFALabel a)
+mfaStartDFA (MFA start subs) =
+ fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
+
+mfaToDFA :: Ord a => MFA a -> DFA a
+mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
+ where
+ subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
+ getSub l = fromJust $ Map.lookup l subs'
+ expand (FA (Graph c ns es) s f)
+ = foldl' expandEdge (FA (Graph c ns []) s f) es
+ expandEdge fa (f,t,x) =
+ case x of
+ Nothing -> newTransition f t Nothing fa
+ Just (Tok s) -> newTransition f t (Just s) fa
+ Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l)
+
+removeUnusedSubLats :: MFA a -> MFA a
+removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
+ where
+ usedMap = subLatUseMap mfa
+ used = growUsedSet (Set.singleton start)
+ isUsed c = c `Set.member` used
+ growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
+
+subLatUseMap :: MFA a -> Map String (Set String)
+subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
+
+usedSubLats :: DFA (MFALabel a) -> Set String
+usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa]
+
+revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
+revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
+
+-- | Sort sub-networks topologically.
+sortSubLats :: MFA a -> MFA a
+sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
+ where
+ usedByMap = revMultiMap (subLatUseMap mfa)
+ sortLats _ [] = []
+ sortLats ub ls = xs ++ sortLats ub' ys
+ where (xs,ys) = partition ((==0) . indeg) ls
+ ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
+ indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
+
+-- | Convert a strongly regular grammar to a number of finite automata,
+-- one for each non-terminal.
+-- The edges in the automata accept tokens, or name another automaton to use.
+compileAutomata :: CFRules
+ -> [(Cat_,NFA (Symbol Cat_ Token))]
+ -- ^ A map of non-terminals and their automata.
+compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
+ where
+ mrs = mutRecSets g $ mutRecCats True g
+ makeOneFA c = make_fa1 mr s [Cat c] f fa
+ where (fa,s,f) = newFA_
+ mr = fromJust (Map.lookup c mrs)
+
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
+-- adapted to build a finite automaton for a single (mutually recursive) set only.
+-- Categories not in the set will result in category-labelled edges.
+make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
+ -- we are building the automaton.
+ -> State -- ^ State to come from
+ -> [Symbol Cat_ Token] -- ^ Symbols to accept
+ -> State -- ^ State to end up in
+ -> NFA (Symbol Cat_ Token) -- ^ FA to add to.
+ -> NFA (Symbol Cat_ Token)
+make_fa1 mr q0 alpha q1 fa =
+ case alpha of
+ [] -> newTransition q0 q1 Nothing fa
+ [t@(Tok _)] -> newTransition q0 q1 (Just t) fa
+ [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
+ [Cat a] ->
+ case mrRec mr of
+ NotR -> -- the set is a non-recursive (always singleton) set of categories
+ -- so the set of category rules is the set of rules for the whole set
+ make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
+ RightR -> -- the set is right-recursive or cyclic
+ let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
+ let (xs,Cat d) = (init ss,last ss)]
+ in make_fas new $ newTransition q0 (getState a) Nothing fa'
+ LeftR -> -- the set is left-recursive
+ let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
+ ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
+ in make_fas new $ newTransition (getState a) q1 Nothing fa'
+ where
+ (fa',stateMap) = addStatesForCats (mrCats mr) fa
+ getState x = Map.findWithDefault
+ (error $ "CFGToFiniteState: No state for " ++ x)
+ x stateMap
+ (x:beta) -> let (fa',q) = newState () fa
+ in make_fas [(q0,[x],q),(q,beta,q1)] fa'
+ where
+ make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
+
+mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
+mutRecSets g = Map.fromList . concatMap mkMutRecSet
+ where
+ mkMutRecSet cs = [ (c,ms) | c <- csl ]
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
+ ms = MutRecSet {
+ mrCats = cs,
+ mrNonRecRules = nrs,
+ mrRecRules = rrs,
+ mrRec = rec
+ }
+ rec | null rrs = NotR
+ | all (isRightLinear cs) rrs = RightR
+ | otherwise = LeftR
+
+--
+-- * Utilities
+--
+
+-- | Add a state for the given NFA for each of the categories
+-- in the given set. Returns a map of categories to their
+-- corresponding states.
+addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
+addStatesForCats cs fa = (fa', m)
+ where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
+ m = Map.fromList (zip (Set.toList cs) (map fst ns))
diff --git a/src-3.0/GF/Speech/FiniteState.hs b/src-3.0/GF/Speech/FiniteState.hs
new file mode 100644
index 000000000..35274e3c4
--- /dev/null
+++ b/src-3.0/GF/Speech/FiniteState.hs
@@ -0,0 +1,329 @@
+----------------------------------------------------------------------
+-- |
+-- Module : FiniteState
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.16 $
+--
+-- A simple finite state network module.
+-----------------------------------------------------------------------------
+module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
+ startState, finalStates,
+ states, transitions,
+ isInternal,
+ newFA, newFA_,
+ addFinalState,
+ newState, newStates,
+ newTransition, newTransitions,
+ insertTransitionWith, insertTransitionsWith,
+ mapStates, mapTransitions,
+ modifyTransitions,
+ nonLoopTransitionsTo, nonLoopTransitionsFrom,
+ loops,
+ removeState,
+ oneFinalState,
+ insertNFA,
+ onGraph,
+ moveLabelsToNodes, removeTrivialEmptyNodes,
+ minimize,
+ dfa2nfa,
+ unusedNames, renameStates,
+ prFAGraphviz, faToGraphviz) where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+import GF.Speech.Graph
+import qualified GF.Visualization.Graphviz as Dot
+
+type State = Int
+
+-- | Type parameters: node id type, state label type, edge label type
+-- Data constructor arguments: nodes and edges, start state, final states
+data FA n a b = FA !(Graph n a b) !n ![n]
+
+type NFA a = FA State () (Maybe a)
+
+type DFA a = FA State () a
+
+
+startState :: FA n a b -> n
+startState (FA _ s _) = s
+
+finalStates :: FA n a b -> [n]
+finalStates (FA _ _ ss) = ss
+
+states :: FA n a b -> [(n,a)]
+states (FA g _ _) = nodes g
+
+transitions :: FA n a b -> [(n,n,b)]
+transitions (FA g _ _) = edges g
+
+newFA :: Enum n => a -- ^ Start node label
+ -> FA n a b
+newFA l = FA g s []
+ where (g,s) = newNode l (newGraph [toEnum 0..])
+
+-- | Create a new finite automaton with an initial and a final state.
+newFA_ :: Enum n => (FA n () b, n, n)
+newFA_ = (fa'', s, f)
+ where fa = newFA ()
+ s = startState fa
+ (fa',f) = newState () fa
+ fa'' = addFinalState f fa'
+
+addFinalState :: n -> FA n a b -> FA n a b
+addFinalState f (FA g s ss) = FA g s (f:ss)
+
+newState :: a -> FA n a b -> (FA n a b, n)
+newState x (FA g s ss) = (FA g' s ss, n)
+ where (g',n) = newNode x g
+
+newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
+newStates xs (FA g s ss) = (FA g' s ss, ns)
+ where (g',ns) = newNodes xs g
+
+newTransition :: n -> n -> b -> FA n a b -> FA n a b
+newTransition f t l = onGraph (newEdge (f,t,l))
+
+newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
+newTransitions es = onGraph (newEdges es)
+
+insertTransitionWith :: Eq n =>
+ (b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
+insertTransitionWith f t = onGraph (insertEdgeWith f t)
+
+insertTransitionsWith :: Eq n =>
+ (b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
+insertTransitionsWith f ts fa =
+ foldl' (flip (insertTransitionWith f)) fa ts
+
+mapStates :: (a -> c) -> FA n a b -> FA n c b
+mapStates f = onGraph (nmap f)
+
+mapTransitions :: (b -> c) -> FA n a b -> FA n a c
+mapTransitions f = onGraph (emap f)
+
+modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
+modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
+
+removeState :: Ord n => n -> FA n a b -> FA n a b
+removeState n = onGraph (removeNode n)
+
+minimize :: Ord a => NFA a -> DFA a
+minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
+
+unusedNames :: FA n a b -> [n]
+unusedNames (FA (Graph names _ _) _ _) = names
+
+-- | Gets all incoming transitions to a given state, excluding
+-- transtions from the state itself.
+nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
+nonLoopTransitionsTo s fa =
+ [(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
+
+nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
+nonLoopTransitionsFrom s fa =
+ [(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
+
+loops :: Eq n => n -> FA n a b -> [b]
+loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
+
+-- | Give new names to all nodes.
+renameStates :: Ord x => [y] -- ^ Infinite supply of new names
+ -> FA x a b
+ -> FA y a b
+renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
+ where (ns,rest) = splitAt (length (nodes g)) supply
+ newNodes = Map.fromList (zip (map fst (nodes g)) ns)
+ newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
+ s' = newName s
+ fs' = map newName fs
+
+-- | Insert an NFA into another
+insertNFA :: NFA a -- ^ NFA to insert into
+ -> (State, State) -- ^ States to insert between
+ -> NFA a -- ^ NFA to insert.
+ -> NFA a
+insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
+ = FA (newEdges es g') s1 fs1
+ where
+ es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
+ (g',ren) = mergeGraphs g1 g2
+
+onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
+onGraph f (FA g s ss) = FA (f g) s ss
+
+
+-- | Make the finite automaton have a single final state
+-- by adding a new final state and adding an edge
+-- from the old final states to the new state.
+oneFinalState :: a -- ^ Label to give the new node
+ -> b -- ^ Label to give the new edges
+ -> FA n a b -- ^ The old network
+ -> FA n a b -- ^ The new network
+oneFinalState nl el fa =
+ let (FA g s fs,nf) = newState nl fa
+ es = [ (f,nf,el) | f <- fs ]
+ in FA (newEdges es g) s [nf]
+
+-- | Transform a standard finite automaton with labelled edges
+-- to one where the labels are on the nodes instead. This can add
+-- up to one extra node per edge.
+moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
+moveLabelsToNodes = onGraph f
+ where f g@(Graph c _ _) = Graph c' ns (concat ess)
+ where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
+ (c',is') = mapAccumL fixIncoming c is
+ (ns,ess) = unzip (concat is')
+
+
+-- | Remove empty nodes which are not start or final, and have
+-- exactly one outgoing edge or exactly one incoming edge.
+removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
+removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
+
+-- | Move edges to empty nodes to point to the next node(s).
+-- This is not done if the pointed-to node is a final node.
+skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
+skipSimpleEmptyNodes fa = onGraph og fa
+ where
+ og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
+ where
+ es' = concatMap changeEdge es
+ info = nodeInfo g
+ changeEdge e@(f,t,())
+ | isNothing (getNodeLabel info t)
+ -- && (i * o <= i + o)
+ && not (isFinal fa t)
+ = [ (f,t',()) | (_,t',()) <- getOutgoing info t]
+ | otherwise = [e]
+-- where i = inDegree info t
+-- o = outDegree info t
+
+isInternal :: Eq n => FA n a b -> n -> Bool
+isInternal (FA _ start final) n = n /= start && n `notElem` final
+
+isFinal :: Eq n => FA n a b -> n -> Bool
+isFinal (FA _ _ final) n = n `elem` final
+
+-- | Remove all internal nodes with no incoming edges
+-- or no outgoing edges.
+pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
+pruneUnusable fa = onGraph f fa
+ where
+ f g = if Set.null rns then g else f (removeNodes rns g)
+ where info = nodeInfo g
+ rns = Set.fromList [ n | (n,_) <- nodes g,
+ isInternal fa n,
+ inDegree info n == 0
+ || outDegree info n == 0]
+
+fixIncoming :: (Ord n, Eq a) => [n]
+ -> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
+ -> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
+ -- incoming edges.
+fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
+ where ls = nub $ map edgeLabel es
+ (cs',cs'') = splitAt (length ls) cs
+ newNodes = zip cs' ls
+ es' = [ (x,n,()) | x <- map fst newNodes ]
+ -- separate cyclic and non-cyclic edges
+ (cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
+ -- keep all incoming non-cyclic edges with the right label
+ to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
+ -- for each cyclic edge with the right label,
+ -- add an edge from each of the new nodes (including this one)
+ ++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
+ newContexts = [ (v, to v) | v <- newNodes ]
+
+alphabet :: Eq b => Graph n a (Maybe b) -> [b]
+alphabet = nub . catMaybes . map edgeLabel . edges
+
+determinize :: Ord a => NFA a -> DFA a
+determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
+ (ns',es') = (Set.toList ns, Set.toList es)
+ final = filter isDFAFinal ns'
+ fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
+ in renameStates [0..] fa
+ where info = nodeInfo g
+-- reach = nodesReachable out
+ start = closure info $ Set.singleton s
+ isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
+ h currentStates oldStates es
+ | Set.null currentStates = (oldStates,es)
+ | otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
+ where
+ allOldStates = oldStates `Set.union` currentStates
+ (newStates,es') = new (Set.toList currentStates) Set.empty es
+ uniqueNewStates = newStates Set.\\ allOldStates
+ -- Get the sets of states reachable from the given states
+ -- by consuming one symbol, and the associated edges.
+ new [] rs es = (rs,es)
+ new (n:ns) rs es = new ns rs' es'
+ where cs = reachable info n --reachable reach n
+ rs' = rs `Set.union` Set.fromList (map snd cs)
+ es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
+
+
+-- | Get all the nodes reachable from a list of nodes by only empty edges.
+closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
+closure info x = closure_ x x
+ where closure_ acc check | Set.null check = acc
+ | otherwise = closure_ acc' check'
+ where
+ reach = Set.fromList [y | x <- Set.toList check,
+ (_,y,Nothing) <- getOutgoing info x]
+ acc' = acc `Set.union` reach
+ check' = reach Set.\\ acc
+
+-- | Get a map of labels to sets of all nodes reachable
+-- from a the set of nodes by one edge with the given
+-- label and then any number of empty edges.
+reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
+reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
+reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
+
+reverseNFA :: NFA a -> NFA a
+reverseNFA (FA g s fs) = FA g''' s' [s]
+ where g' = reverseGraph g
+ (g'',s') = newNode () g'
+ g''' = newEdges [(s',f,Nothing) | f <- fs] g''
+
+dfa2nfa :: DFA a -> NFA a
+dfa2nfa = mapTransitions Just
+
+--
+-- * Visualization
+--
+
+prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
+prFAGraphviz = Dot.prGraphviz . faToGraphviz
+
+prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
+prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
+
+faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
+faToGraphviz (FA (Graph _ ns es) s f)
+ = Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
+ where mkNode (n,l) = Dot.Node (show n) attrs
+ where attrs = [("label",l)]
+ ++ if n == s then [("shape","box")] else []
+ ++ if n `elem` f then [("style","bold")] else []
+ mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
+
+--
+-- * Utilities
+--
+
+lookups :: Ord k => [k] -> Map k a -> [a]
+lookups xs m = mapMaybe (flip Map.lookup m) xs
diff --git a/src-3.0/GF/Speech/GrammarToVoiceXML.hs b/src-3.0/GF/Speech/GrammarToVoiceXML.hs
new file mode 100644
index 000000000..ad7f25d1c
--- /dev/null
+++ b/src-3.0/GF/Speech/GrammarToVoiceXML.hs
@@ -0,0 +1,285 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GrammarToVoiceXML
+-- Maintainer : Bjorn Bringert
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Create VoiceXML dialogue system from a GF grammar.
+-----------------------------------------------------------------------------
+
+module GF.Speech.GrammarToVoiceXML (grammar2vxml) where
+
+import GF.Canon.CanonToGFCC (canon2gfcc)
+import qualified GF.GFCC.CId as C
+import GF.GFCC.DataGFCC (GFCC(..), Abstr(..))
+import GF.GFCC.Macros
+import qualified GF.Canon.GFC as GFC
+import GF.Canon.AbsGFC (Term)
+import GF.Canon.PrintGFC (printTree)
+import GF.Canon.CMacros (noMark, strsFromTerm)
+import GF.Canon.Unlex (formatAsText)
+import GF.Data.Utilities
+import GF.CF.CFIdent (cfCat2Ident)
+import GF.Compile.ShellState (StateGrammar,stateGrammarST,cncId,grammar,
+ startCatStateOpts,stateOptions)
+import GF.Data.Str (sstrV)
+import GF.Grammar.Macros hiding (assign,strsFromTerm)
+import GF.Grammar.Grammar (Fun)
+import GF.Grammar.Values (Tree)
+import GF.Infra.Option (Options, addOptions, getOptVal, speechLanguage)
+import GF.UseGrammar.GetTree (string2treeErr)
+import GF.UseGrammar.Linear (linTree2strings)
+
+import GF.Infra.Ident
+import GF.Infra.Option (noOptions)
+import GF.Infra.Modules
+import GF.Data.Operations
+
+import GF.Data.XML
+
+import Control.Monad (liftM)
+import Data.List (isPrefixOf, find, intersperse)
+import qualified Data.Map as Map
+import Data.Maybe (fromMaybe)
+
+import Debug.Trace
+
+-- | the main function
+grammar2vxml :: Options -> StateGrammar -> String
+grammar2vxml opt s = showsXMLDoc (skel2vxml name language startcat gr' qs) ""
+ where (_, gr') = vSkeleton (stateGrammarST s)
+ name = prIdent (cncId s)
+ qs = catQuestions s (map fst gr')
+ opts = addOptions opt (stateOptions s)
+ language = fmap (replace '_' '-') $ getOptVal opts speechLanguage
+ startcat = C.CId $ prIdent $ cfCat2Ident $ startCatStateOpts opts s
+
+--
+-- * VSkeleton: a simple description of the abstract syntax.
+--
+
+type VSkeleton = [(VIdent, [(VIdent, [VIdent])])]
+type VIdent = C.CId
+
+prid :: VIdent -> String
+prid (C.CId x) = x
+
+vSkeleton :: GFC.CanonGrammar -> (VIdent,VSkeleton)
+vSkeleton = gfccSkeleton . canon2gfcc noOptions
+
+gfccSkeleton :: GFCC -> (VIdent,VSkeleton)
+gfccSkeleton gfcc = (absname gfcc, ts)
+ where a = abstract gfcc
+ ts = [(c,[(f,ft f) | f <- fs]) | (c,fs) <- Map.toList (catfuns a)]
+ ft f = case lookMap (error $ prid f) f (funs a) of
+ (ty,_) -> fst $ GF.GFCC.Macros.catSkeleton ty
+
+--
+-- * Questions to ask
+--
+
+type CatQuestions = [(VIdent,String)]
+
+catQuestions :: StateGrammar -> [VIdent] -> CatQuestions
+catQuestions gr cats = [(c,catQuestion gr c) | c <- cats]
+
+catQuestion :: StateGrammar -> VIdent -> String
+catQuestion gr cat = err errHandler id (getPrintname gr cat >>= term2string)
+ where -- FIXME: use some better warning facility
+ errHandler e = trace ("GrammarToVoiceXML: " ++ e) ("quest_"++prid cat)
+ term2string = liftM sstrV . strsFromTerm
+
+getPrintname :: StateGrammar -> VIdent -> Err Term
+getPrintname gr cat =
+ do m <- lookupModMod (grammar gr) (cncId gr)
+ i <- lookupInfo m (IC (prid cat))
+ case i of
+ GFC.CncCat _ _ p -> return p
+ _ -> fail $ "getPrintname " ++ prid cat
+ ++ ": Expected CncCat, got " ++ show i
+
+
+{-
+lin :: StateGrammar -> String -> Err String
+lin gr fun = do
+ tree <- string2treeErr gr fun
+ let ls = map unt $ linTree2strings noMark g c tree
+ case ls of
+ [] -> fail $ "No linearization of " ++ fun
+ l:_ -> return l
+ where c = cncId gr
+ g = stateGrammarST gr
+ unt = formatAsText
+-}
+
+getCatQuestion :: VIdent -> CatQuestions -> String
+getCatQuestion c qs =
+ fromMaybe (error "No question for category " ++ prid c) (lookup c qs)
+
+--
+-- * Generate VoiceXML
+--
+
+skel2vxml :: String -> Maybe String -> VIdent -> VSkeleton -> CatQuestions -> XML
+skel2vxml name language start skel qs =
+ vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel)
+ where
+ gr = grammarURI name
+ startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)]
+ [param "old" "{ name : '?' }"]]
+
+grammarURI :: String -> String
+grammarURI name = name ++ ".grxml"
+
+
+catForms :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> [XML]
+catForms gr qs cat fs =
+ comments [prid cat ++ " category."]
+ ++ [cat2form gr qs cat fs]
+
+cat2form :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> XML
+cat2form gr qs cat fs =
+ form (catFormId cat) $
+ [var "old" Nothing,
+ blockCond "old.name != '?'" [assign "term" "old"],
+ field "term" []
+ [promptString (getCatQuestion cat qs),
+ vxmlGrammar (gr++"#"++catFormId cat)
+ ]
+ ]
+ ++ concatMap (uncurry (fun2sub gr cat)) fs
+ ++ [block [return_ ["term"]{-]-}]]
+
+fun2sub :: String -> VIdent -> VIdent -> [VIdent] -> [XML]
+fun2sub gr cat fun args =
+ comments [prid fun ++ " : ("
+ ++ concat (intersperse ", " (map prid args))
+ ++ ") " ++ prid cat] ++ ss
+ where
+ ss = zipWith mkSub [0..] args
+ mkSub n t = subdialog s [("src","#"++catFormId t),
+ ("cond","term.name == "++string (prid fun))]
+ [param "old" v,
+ filled [] [assign v (s++".term")]]
+ where s = prid fun ++ "_" ++ show n
+ v = "term.args["++show n++"]"
+
+catFormId :: VIdent -> String
+catFormId c = prid c ++ "_cat"
+
+
+--
+-- * VoiceXML stuff
+--
+
+vxml :: Maybe String -> [XML] -> XML
+vxml ml = Tag "vxml" $ [("version","2.0"),
+ ("xmlns","http://www.w3.org/2001/vxml")]
+ ++ maybe [] (\l -> [("xml:lang", l)]) ml
+
+form :: String -> [XML] -> XML
+form id xs = Tag "form" [("id", id)] xs
+
+field :: String -> [(String,String)] -> [XML] -> XML
+field name attrs = Tag "field" ([("name",name)]++attrs)
+
+subdialog :: String -> [(String,String)] -> [XML] -> XML
+subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs)
+
+filled :: [(String,String)] -> [XML] -> XML
+filled = Tag "filled"
+
+vxmlGrammar :: String -> XML
+vxmlGrammar uri = ETag "grammar" [("src",uri)]
+
+prompt :: [XML] -> XML
+prompt = Tag "prompt" []
+
+promptString :: String -> XML
+promptString p = prompt [Data p]
+
+reprompt :: XML
+reprompt = ETag "reprompt" []
+
+assign :: String -> String -> XML
+assign n e = ETag "assign" [("name",n),("expr",e)]
+
+value :: String -> XML
+value expr = ETag "value" [("expr",expr)]
+
+if_ :: String -> [XML] -> XML
+if_ c b = if_else c b []
+
+if_else :: String -> [XML] -> [XML] -> XML
+if_else c t f = cond [(c,t)] f
+
+cond :: [(String,[XML])] -> [XML] -> XML
+cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es)
+ where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest]
+ ++ if null els then [] else (Tag "else" [] []:els)
+
+goto_item :: String -> XML
+goto_item nextitem = ETag "goto" [("nextitem",nextitem)]
+
+return_ :: [String] -> XML
+return_ names = ETag "return" [("namelist", unwords names)]
+
+block :: [XML] -> XML
+block = Tag "block" []
+
+blockCond :: String -> [XML] -> XML
+blockCond cond = Tag "block" [("cond", cond)]
+
+throw :: String -> String -> XML
+throw event msg = Tag "throw" [("event",event),("message",msg)] []
+
+nomatch :: [XML] -> XML
+nomatch = Tag "nomatch" []
+
+help :: [XML] -> XML
+help = Tag "help" []
+
+param :: String -> String -> XML
+param name expr = ETag "param" [("name",name),("expr",expr)]
+
+var :: String -> Maybe String -> XML
+var name expr = ETag "var" ([("name",name)]++e)
+ where e = maybe [] ((:[]) . (,) "expr") expr
+
+script :: String -> XML
+script s = Tag "script" [] [CData s]
+
+scriptURI :: String -> XML
+scriptURI uri = Tag "script" [("uri", uri)] []
+
+--
+-- * ECMAScript stuff
+--
+
+string :: String -> String
+string s = "'" ++ concatMap esc s ++ "'"
+ where esc '\'' = "\\'"
+ esc c = [c]
+
+{-
+--
+-- * List stuff
+--
+
+isListCat :: (VIdent, [(VIdent, [VIdent])]) -> Bool
+isListCat (cat,rules) = "List" `isPrefixOf` prIdent cat && length rules == 2
+ && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs
+ where c = drop 4 (prIdent cat)
+ fs = map (prIdent . fst) rules
+
+isBaseFun :: VIdent -> Bool
+isBaseFun f = "Base" `isPrefixOf` prIdent f
+
+isConsFun :: VIdent -> Bool
+isConsFun f = "Cons" `isPrefixOf` prIdent f
+
+baseSize :: (VIdent, [(VIdent, [VIdent])]) -> Int
+baseSize (_,rules) = length bs
+ where Just (_,bs) = find (isBaseFun . fst) rules
+-}
diff --git a/src-3.0/GF/Speech/Graph.hs b/src-3.0/GF/Speech/Graph.hs
new file mode 100644
index 000000000..1a0ebe0c0
--- /dev/null
+++ b/src-3.0/GF/Speech/Graph.hs
@@ -0,0 +1,178 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Graph
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.2 $
+--
+-- A simple graph module.
+-----------------------------------------------------------------------------
+module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
+ , newGraph, nodes, edges
+ , nmap, emap, newNode, newNodes, newEdge, newEdges
+ , insertEdgeWith
+ , removeNode, removeNodes
+ , nodeInfo
+ , getIncoming, getOutgoing, getNodeLabel
+ , inDegree, outDegree
+ , nodeLabel
+ , edgeFrom, edgeTo, edgeLabel
+ , reverseGraph, mergeGraphs, renameNodes
+ ) where
+
+import GF.Data.Utilities
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
+ deriving (Eq,Show)
+
+type Node n a = (n,a)
+type Edge n b = (n,n,b)
+
+type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
+
+-- | Create a new empty graph.
+newGraph :: [n] -> Graph n a b
+newGraph ns = Graph ns [] []
+
+-- | Get all the nodes in the graph.
+nodes :: Graph n a b -> [Node n a]
+nodes (Graph _ ns _) = ns
+
+-- | Get all the edges in the graph.
+edges :: Graph n a b -> [Edge n b]
+edges (Graph _ _ es) = es
+
+-- | Map a function over the node labels.
+nmap :: (a -> c) -> Graph n a b -> Graph n c b
+nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
+
+-- | Map a function over the edge labels.
+emap :: (b -> c) -> Graph n a b -> Graph n a c
+emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
+
+-- | Add a node to the graph.
+newNode :: a -- ^ Node label
+ -> Graph n a b
+ -> (Graph n a b,n) -- ^ Node graph and name of new node
+newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
+
+newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
+newNodes ls g = (g', zip ns ls)
+ where (g',ns) = mapAccumL (flip newNode) g ls
+-- lazy version:
+--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
+-- where (xs,cs') = splitAt (length ls) cs
+-- ns' = zip xs ls
+
+newEdge :: Edge n b -> Graph n a b -> Graph n a b
+newEdge e (Graph c ns es) = Graph c ns (e:es)
+
+newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
+newEdges es g = foldl' (flip newEdge) g es
+-- lazy version:
+-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
+
+insertEdgeWith :: Eq n =>
+ (b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
+insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
+ where h [] = [e]
+ h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
+ | otherwise = e':h es'
+
+-- | Remove a node and all edges to and from that node.
+removeNode :: Ord n => n -> Graph n a b -> Graph n a b
+removeNode n = removeNodes (Set.singleton n)
+
+-- | Remove a set of nodes and all edges to and from those nodes.
+removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
+removeNodes xs (Graph c ns es) = Graph c ns' es'
+ where
+ keepNode n = not (Set.member n xs)
+ ns' = [ x | x@(n,_) <- ns, keepNode n ]
+ es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
+
+-- | Get a map of node names to info about each node.
+nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
+nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
+ where
+ inc = groupEdgesBy edgeTo g
+ out = groupEdgesBy edgeFrom g
+ fn m n = fromMaybe [] (Map.lookup n m)
+
+groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
+ -> Graph n a b -> Map n [Edge n b]
+groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
+
+lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
+lookupNode i n = fromJust $ Map.lookup n i
+
+getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
+getIncoming i n = let (_,inc,_) = lookupNode i n in inc
+
+getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
+getOutgoing i n = let (_,_,out) = lookupNode i n in out
+
+inDegree :: Ord n => NodeInfo n a b -> n -> Int
+inDegree i n = length $ getIncoming i n
+
+outDegree :: Ord n => NodeInfo n a b -> n -> Int
+outDegree i n = length $ getOutgoing i n
+
+getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
+getNodeLabel i n = let (l,_,_) = lookupNode i n in l
+
+nodeLabel :: Node n a -> a
+nodeLabel = snd
+
+edgeFrom :: Edge n b -> n
+edgeFrom (f,_,_) = f
+
+edgeTo :: Edge n b -> n
+edgeTo (_,t,_) = t
+
+edgeLabel :: Edge n b -> b
+edgeLabel (_,_,l) = l
+
+reverseGraph :: Graph n a b -> Graph n a b
+reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
+
+-- | Add the nodes from the second graph to the first graph.
+-- The nodes in the second graph will be renamed using the name
+-- supply in the first graph.
+-- This function is more efficient when the second graph
+-- is smaller than the first.
+mergeGraphs :: Ord m => Graph n a b -> Graph m a b
+ -> (Graph n a b, m -> n) -- ^ The new graph and a function translating
+ -- the old names of nodes in the second graph
+ -- to names in the new graph.
+mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
+ where
+ (xs,c') = splitAt (length (nodes g2)) c
+ newNames = Map.fromList (zip (map fst (nodes g2)) xs)
+ newName n = fromJust $ Map.lookup n newNames
+ Graph _ ns2 es2 = renameNodes newName undefined g2
+
+-- | Rename the nodes in the graph.
+renameNodes :: (n -> m) -- ^ renaming function
+ -> [m] -- ^ infinite supply of fresh node names, to
+ -- use when adding nodes in the future.
+ -> Graph n a b -> Graph m a b
+renameNodes newName c (Graph _ ns es) = Graph c ns' es'
+ where ns' = map' (\ (n,x) -> (newName n,x)) ns
+ es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
+
+-- | A strict 'map'
+map' :: (a -> b) -> [a] -> [b]
+map' _ [] = []
+map' f (x:xs) = ((:) $! f x) $! map' f xs
diff --git a/src-3.0/GF/Speech/PrFA.hs b/src-3.0/GF/Speech/PrFA.hs
new file mode 100644
index 000000000..2856039ec
--- /dev/null
+++ b/src-3.0/GF/Speech/PrFA.hs
@@ -0,0 +1,56 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrSLF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.2 $
+--
+-- This module prints finite automata and regular grammars
+-- for a context-free grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) where
+
+import GF.Data.Utilities
+import GF.Conversion.Types
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..),symbol)
+import GF.Infra.Ident
+import GF.Infra.Option (Options)
+import GF.Infra.Print
+import GF.Speech.CFGToFiniteState
+import GF.Speech.FiniteState
+import GF.Speech.TransformCFG
+import GF.Compile.ShellState (StateGrammar)
+
+import Data.Char (toUpper,toLower)
+import Data.List
+import Data.Maybe (fromMaybe)
+
+
+
+faGraphvizPrinter :: Options -> StateGrammar -> String
+faGraphvizPrinter opts s =
+ prFAGraphviz $ mapStates (const "") $ cfgToFA opts s
+
+-- | Convert the grammar to a regular grammar and print it in BNF
+regularPrinter :: Options -> StateGrammar -> String
+regularPrinter opts s = prCFRules $ makeSimpleRegular opts s
+ where
+ prCFRules :: CFRules -> String
+ prCFRules g = unlines [ c ++ " ::= " ++ join " | " (map (showRhs . ruleRhs) rs) | (c,rs) <- allRulesGrouped g]
+ join g = concat . intersperse g
+ showRhs = unwords . map (symbol id show)
+
+faCPrinter :: Options -> StateGrammar -> String
+faCPrinter opts s = fa2c $ cfgToFA opts s
+
+fa2c :: DFA String -> String
+fa2c fa = undefined
diff --git a/src-3.0/GF/Speech/PrGSL.hs b/src-3.0/GF/Speech/PrGSL.hs
new file mode 100644
index 000000000..248991380
--- /dev/null
+++ b/src-3.0/GF/Speech/PrGSL.hs
@@ -0,0 +1,113 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrGSL
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.22 $
+--
+-- This module prints a CFG as a Nuance GSL 2.0 grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrGSL (gslPrinter) where
+
+import GF.Data.Utilities
+import GF.Speech.SRG
+import GF.Speech.RegExp
+import GF.Infra.Ident
+
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..))
+import GF.Conversion.Types
+import GF.Infra.Print
+import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
+import GF.Compile.ShellState (StateGrammar)
+
+import Data.Char (toUpper,toLower)
+import Data.List (partition)
+import Text.PrettyPrint.HughesPJ
+
+width :: Int
+width = 75
+
+gslPrinter :: Options -> StateGrammar -> String
+gslPrinter opts s = renderStyle st $ prGSL $ makeSimpleSRG opts s
+ where st = style { lineLength = width }
+
+prGSL :: SRG -> Doc
+prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
+ = header $++$ mainCat $++$ foldr ($++$) empty (map prRule rs)
+ where
+ header = text ";GSL2.0" $$
+ comment ("Nuance speech recognition grammar for " ++ name) $$
+ comment ("Generated by GF")
+ mainCat = comment ("Start category: " ++ origStart) $$
+ text ".MAIN" <+> prCat start
+ prRule (SRGRule cat origCat rhs) =
+ comment (prt origCat) $$
+ prCat cat <+> union (map prAlt rhs)
+ -- FIXME: use the probability
+ prAlt (SRGAlt mp _ rhs) = prItem rhs
+
+
+prItem :: SRGItem -> Doc
+prItem = f
+ where
+ f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
+ where (es,nes) = partition isEpsilon xs
+ f (REConcat [x]) = f x
+ f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")"
+ f (RERepeat x) = text "*" <> f x
+ f (RESymbol s) = prSymbol s
+
+union :: [Doc] -> Doc
+union [x] = x
+union xs = text "[" <> fsep xs <> text "]"
+
+prSymbol :: Symbol SRGNT Token -> Doc
+prSymbol (Cat (c,_)) = prCat c
+prSymbol (Tok t) = doubleQuotes (showToken t)
+
+-- GSL requires an upper case letter in category names
+prCat :: SRGCat -> Doc
+prCat c = text (firstToUpper c)
+
+
+firstToUpper :: String -> String
+firstToUpper [] = []
+firstToUpper (x:xs) = toUpper x : xs
+
+{-
+rmPunctCFG :: CGrammar -> CGrammar
+rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g]
+
+keepSymbol :: Symbol c Token -> Bool
+keepSymbol (Tok t) = not (all isPunct (prt t))
+keepSymbol _ = True
+-}
+
+-- Nuance does not like upper case characters in tokens
+showToken :: Token -> Doc
+showToken t = text (map toLower (prt t))
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.:;.,?!()[]{}"
+
+comment :: String -> Doc
+comment s = text ";" <+> text s
+
+
+-- Pretty-printing utilities
+
+emptyLine :: Doc
+emptyLine = text ""
+
+($++$) :: Doc -> Doc -> Doc
+x $++$ y = x $$ emptyLine $$ y
diff --git a/src-3.0/GF/Speech/PrJSGF.hs b/src-3.0/GF/Speech/PrJSGF.hs
new file mode 100644
index 000000000..037a4f4e2
--- /dev/null
+++ b/src-3.0/GF/Speech/PrJSGF.hs
@@ -0,0 +1,145 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrJSGF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.16 $
+--
+-- This module prints a CFG as a JSGF grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+--
+-- FIXME: convert to UTF-8
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrJSGF (jsgfPrinter) where
+
+import GF.Conversion.Types
+import GF.Data.Utilities
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats)
+import GF.Infra.Ident
+import GF.Infra.Print
+import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
+import GF.Speech.SISR
+import GF.Speech.SRG
+import GF.Speech.RegExp
+import GF.Compile.ShellState (StateGrammar)
+
+import Data.Char
+import Data.List
+import Data.Maybe
+import Text.PrettyPrint.HughesPJ
+import Debug.Trace
+
+width :: Int
+width = 75
+
+jsgfPrinter :: Maybe SISRFormat
+ -> Options
+ -> StateGrammar -> String
+jsgfPrinter sisr opts s = renderStyle st $ prJSGF sisr $ makeSimpleSRG opts s
+ where st = style { lineLength = width }
+
+prJSGF :: Maybe SISRFormat -> SRG -> Doc
+prJSGF sisr srg@(SRG{grammarName=name,grammarLanguage=ml,
+ startCat=start,origStartCat=origStart,rules=rs})
+ = header $++$ mainCat $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs)
+ where
+ header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$
+ comment ("JSGF speech recognition grammar for " ++ name) $$
+ comment "Generated by GF" $$
+ text ("grammar " ++ name ++ ";")
+ lang = maybe empty text ml
+ mainCat = comment ("Start category: " ++ origStart) $$
+ case cfgCatToGFCat origStart of
+ Just c -> rule True "MAIN" [prCat (catFormId c)]
+ Nothing -> empty
+ prRule (SRGRule cat origCat rhs) =
+ comment origCat $$
+ rule False cat (map prAlt rhs)
+-- rule False cat (map prAlt rhs)
+ -- FIXME: use the probability
+ prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
+-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
+ where initTag | isEmpty t = empty
+ | otherwise = text "<NULL>" <+> t
+ where t = tag sisr (profileInitSISR n)
+ finalTag = tag sisr (profileFinalSISR n)
+ p = if isEmpty initTag && isEmpty finalTag then id else parens
+
+ topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
+ where it i c = prCat c <+> tag sisr (topCatSISR c)
+
+catFormId :: String -> String
+catFormId = (++ "_cat")
+
+prCat :: SRGCat -> Doc
+prCat c = char '<' <> text c <> char '>'
+
+prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
+prItem sisr t = f 0
+ where
+ f _ (REUnion []) = text "<VOID>"
+ f p (REUnion xs)
+ | not (null es) = brackets (f 0 (REUnion nes))
+ | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
+ where (es,nes) = partition isEpsilon xs
+ f _ (REConcat []) = text "<NULL>"
+ f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
+ f p (RERepeat x) = f 3 x <> char '*'
+ f _ (RESymbol s) = prSymbol sisr t s
+
+{-
+prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
+prItem _ _ [] = text "<NULL>"
+prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
+ where paren = if length ss == 1 then id else parens
+-}
+
+prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
+prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
+prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation
+ | otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
+tag Nothing _ = empty
+tag (Just fmt) t = case t fmt of
+ [] -> empty
+ ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
+ where e [] = []
+ e ('}':xs) = '\\':'}':e xs
+ e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
+ e (x:xs) = x:e xs
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!"
+
+comment :: String -> Doc
+comment s = text "//" <+> text s
+
+alts :: [Doc] -> Doc
+alts = fsep . prepunctuate (text "| ")
+
+rule :: Bool -> SRGCat -> [Doc] -> Doc
+rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
+ where p = if pub then text "public" else empty
+
+-- Pretty-printing utilities
+
+emptyLine :: Doc
+emptyLine = text ""
+
+prepunctuate :: Doc -> [Doc] -> [Doc]
+prepunctuate _ [] = []
+prepunctuate p (x:xs) = x : map (p <>) xs
+
+($++$) :: Doc -> Doc -> Doc
+x $++$ y = x $$ emptyLine $$ y
+
diff --git a/src-3.0/GF/Speech/PrRegExp.hs b/src-3.0/GF/Speech/PrRegExp.hs
new file mode 100644
index 000000000..55a25d69b
--- /dev/null
+++ b/src-3.0/GF/Speech/PrRegExp.hs
@@ -0,0 +1,33 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrSLF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- This module prints a grammar as a regular expression.
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
+
+import GF.Conversion.Types
+import GF.Formalism.Utilities
+import GF.Infra.Ident
+import GF.Infra.Option (Options)
+import GF.Speech.CFGToFiniteState
+import GF.Speech.RegExp
+import GF.Compile.ShellState (StateGrammar)
+
+
+regexpPrinter :: Options -> StateGrammar -> String
+regexpPrinter opts s = (++"\n") $ prRE $ dfa2re $ cfgToFA opts s
+
+multiRegexpPrinter :: Options -> StateGrammar -> String
+multiRegexpPrinter opts s = prREs $ mfa2res $ cfgToMFA opts s
+
+prREs :: [(String,RE (MFALabel String))] -> String
+prREs res = unlines [l ++ " = " ++ prRE (mapRE showLabel re) | (l,re) <- res]
+ where showLabel = symbol (\l -> "<" ++ l ++ ">") id
+
+mfa2res :: MFA String -> [(String,RE (MFALabel String))]
+mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas]
diff --git a/src-3.0/GF/Speech/PrSLF.hs b/src-3.0/GF/Speech/PrSLF.hs
new file mode 100644
index 000000000..9bc025558
--- /dev/null
+++ b/src-3.0/GF/Speech/PrSLF.hs
@@ -0,0 +1,190 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrSLF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.12 $
+--
+-- This module converts a CFG to an SLF finite-state network
+-- for use with the ATK recognizer. The SLF format is described
+-- in the HTK manual, and an example for use in ATK is shown
+-- in the ATK manual.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,
+ slfSubPrinter,slfSubGraphvizPrinter) where
+
+import GF.Data.Utilities
+import GF.Conversion.Types
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol)
+import GF.Infra.Ident
+import GF.Infra.Option (Options)
+import GF.Infra.Print
+import GF.Speech.CFGToFiniteState
+import GF.Speech.FiniteState
+import GF.Speech.TransformCFG
+import qualified GF.Visualization.Graphviz as Dot
+import GF.Compile.ShellState (StateGrammar)
+
+import Control.Monad
+import qualified Control.Monad.State as STM
+import Data.Char (toUpper)
+import Data.List
+import Data.Maybe
+
+data SLFs = SLFs [(String,SLF)] SLF
+
+data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
+
+data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
+ | SLFSubLat { nId :: Int, nLat :: String }
+
+-- | An SLF word is a word, or the empty string.
+type SLFWord = Maybe String
+
+data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
+
+type SLF_FA = FA State (Maybe (MFALabel String)) ()
+
+mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)])
+mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
+ where MFA start subs = {- renameSubs $ -} cfgToMFA opts s
+ main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa
+
+slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
+slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
+ . moveLabelsToNodes . dfa2nfa
+
+-- | Give sequential names to subnetworks.
+renameSubs :: MFA String -> MFA String
+renameSubs (MFA start subs) = MFA (newName start) subs'
+ where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
+ newName s = lookup' s newNames
+ subs' = [(newName s,renameLabels n) | (s,n) <- subs]
+ renameLabels = mapTransitions (mapSymbol newName id)
+
+--
+-- * SLF graphviz printing (without sub-networks)
+--
+
+slfGraphvizPrinter :: Options -> StateGrammar -> String
+slfGraphvizPrinter opts s
+ = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s
+ where
+ gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
+
+--
+-- * SLF graphviz printing (with sub-networks)
+--
+
+slfSubGraphvizPrinter :: Options -> StateGrammar -> String
+slfSubGraphvizPrinter opts s = Dot.prGraphviz g
+ where (main, subs) = mkFAs opts s
+ g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
+ ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
+ m = gvSLFFA Nothing main
+
+gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
+gvSLFFA n fa =
+ liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
+ . mapTransitions (const "")) (rename fa)
+ where mfaLabelToGv = symbol ("#"++) id
+ mkCluster Nothing = id
+ mkCluster (Just x)
+ = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
+ rename fa = do
+ names <- STM.get
+ let fa' = renameStates names fa
+ names' = unusedNames fa'
+ STM.put names'
+ return fa'
+
+--
+-- * SLF printing (without sub-networks)
+--
+
+slfPrinter :: Options -> StateGrammar -> String
+slfPrinter opts s
+ = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s
+
+--
+-- * SLF printing (with sub-networks)
+--
+
+-- | Make a network with subnetworks in SLF
+slfSubPrinter :: Options -> StateGrammar -> String
+slfSubPrinter opts s = prSLFs slfs
+ where
+ (main,subs) = mkFAs opts s
+ slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
+ faToSLF = automatonToSLF mfaNodeToSLFNode
+
+automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
+automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
+ where ns = map (uncurry mkNode) (states fa)
+ es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
+
+mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
+mfaNodeToSLFNode i l = case l of
+ Nothing -> mkSLFNode i Nothing
+ Just (Tok x) -> mkSLFNode i (Just x)
+ Just (Cat s) -> mkSLFSubLat i s
+
+mkSLFNode :: Int -> Maybe String -> SLFNode
+mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
+mkSLFNode i (Just w)
+ | isNonWord w = SLFNode { nId = i,
+ nWord = Nothing,
+ nTag = Just w }
+ | otherwise = SLFNode { nId = i,
+ nWord = Just (map toUpper w),
+ nTag = Just w }
+
+mkSLFSubLat :: Int -> String -> SLFNode
+mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
+
+mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
+mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
+
+prSLFs :: SLFs -> String
+prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
+ where prSub (n,s) = showString "SUBLAT=" . shows n
+ . nl . prOneSLF s . showString "." . nl
+
+prSLF :: SLF -> String
+prSLF slf = prOneSLF slf ""
+
+prOneSLF :: SLF -> ShowS
+prOneSLF (SLF { slfNodes = ns, slfEdges = es})
+ = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
+ where
+ header = prFields [("N",show (length ns)),("L", show (length es))] . nl
+ prNode (SLFNode { nId = i, nWord = w, nTag = t })
+ = prFields $ [("I",show i),("W",showWord w)]
+ ++ maybe [] (\t -> [("s",t)]) t
+ prNode (SLFSubLat { nId = i, nLat = l })
+ = prFields [("I",show i),("L",show l)]
+ prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
+
+-- | Check if a word should not correspond to a word in the SLF file.
+isNonWord :: String -> Bool
+isNonWord = any isPunct
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!()[]{}"
+
+showWord :: SLFWord -> String
+showWord Nothing = "!NULL"
+showWord (Just w) | null w = "!NULL"
+ | otherwise = w
+
+prFields :: [(String,String)] -> ShowS
+prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
diff --git a/src-3.0/GF/Speech/PrSRGS.hs b/src-3.0/GF/Speech/PrSRGS.hs
new file mode 100644
index 000000000..d8ae07867
--- /dev/null
+++ b/src-3.0/GF/Speech/PrSRGS.hs
@@ -0,0 +1,153 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrSRGS
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- This module prints a CFG as an SRGS XML grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrSRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
+
+import GF.Data.Utilities
+import GF.Data.XML
+import GF.Speech.RegExp
+import GF.Speech.SISR as SISR
+import GF.Speech.SRG
+import GF.Infra.Ident
+import GF.Today
+
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName, filterCats)
+import GF.Conversion.Types
+import GF.Infra.Print
+import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
+import GF.Compile.ShellState (StateGrammar)
+
+import Data.Char (toUpper,toLower)
+import Data.List
+import Data.Maybe
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+
+srgsXmlPrinter :: Maybe SISRFormat
+ -> Bool -- ^ Include probabilities
+ -> Options
+ -> StateGrammar -> String
+srgsXmlPrinter sisr probs opts s = prSrgsXml sisr probs $ makeSimpleSRG opts s
+
+srgsXmlNonRecursivePrinter :: Options -> StateGrammar -> String
+srgsXmlNonRecursivePrinter opts s = prSrgsXml Nothing False $ makeNonRecursiveSRG opts s
+
+
+prSrgsXml :: Maybe SISRFormat -> Bool -> SRG -> String
+prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start,
+ origStartCat=origStart,grammarLanguage=l,rules=rs})
+ = showXMLDoc (optimizeSRGS xmlGr)
+ where
+ Just root = cfgCatToGFCat origStart
+ xmlGr = grammar sisr (catFormId root) l $
+ [meta "description"
+ ("SRGS XML speech recognition grammar for " ++ name
+ ++ ". " ++ "Original start category: " ++ origStart),
+ meta "generator" ("Grammatical Framework " ++ version)]
+ ++ topCatRules
+ ++ concatMap ruleToXML rs
+ ruleToXML (SRGRule cat origCat alts) =
+ comments ["Category " ++ origCat] ++ [rule cat (prRhs alts)]
+ prRhs rhss = [oneOf (map (mkProd sisr probs) rhss)]
+ -- externally visible rules for each of the GF categories
+ topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
+ where it i c = Tag "item" [] ([ETag "ruleref" [("uri","#" ++ c)]]
+ ++ tag sisr (topCatSISR c))
+ topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is
+
+rule :: String -> [XML] -> XML
+rule i = Tag "rule" [("id",i)]
+
+mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
+mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ [x] ++ tf)
+ where x = mkItem sisr n rhs
+ w | probs = maybe [] (\p -> [("weight", show p)]) mp
+ | otherwise = []
+ ti = tag sisr (profileInitSISR n)
+ tf = tag sisr (profileFinalSISR n)
+
+mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
+mkItem sisr cn = f
+ where
+ f (REUnion []) = ETag "ruleref" [("special","VOID")]
+ f (REUnion xs)
+ | not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
+ | otherwise = oneOf (map f xs)
+ where (es,nes) = partition isEpsilon xs
+ f (REConcat []) = ETag "ruleref" [("special","NULL")]
+ f (REConcat xs) = Tag "item" [] (map f xs)
+ f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
+ f (RESymbol s) = symItem sisr cn s
+
+{-
+mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
+mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ xs ++ tf)
+ where xs = mkItem sisr n rhs
+ w | probs = maybe [] (\p -> [("weight", show p)]) mp
+ | otherwise = []
+ ti = [tag sisr (profileInitSISR n)]
+ tf = [tag sisr (profileFinalSISR n)]
+
+
+mkItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> [XML]
+mkItem sisr cn ss = map (symItem sisr cn) ss
+-}
+
+symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
+symItem sisr cn (Cat n@(c,_)) =
+ Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
+symItem _ _ (Tok t) = Tag "item" [] [Data (showToken t)]
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
+tag Nothing _ = []
+tag (Just fmt) t = case t fmt of
+ [] -> []
+ ts -> [Tag "tag" [] [Data (prSISR ts)]]
+
+catFormId :: String -> String
+catFormId = (++ "_cat")
+
+
+showToken :: Token -> String
+showToken t = t
+
+oneOf :: [XML] -> XML
+oneOf = Tag "one-of" []
+
+grammar :: Maybe SISRFormat
+ -> String -- ^ root
+ -> Maybe String -- ^language
+ -> [XML] -> XML
+grammar sisr root ml =
+ Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
+ ("version","1.0"),
+ ("mode","voice"),
+ ("root",root)]
+ ++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
+ ++ maybe [] (\l -> [("xml:lang", l)]) ml
+
+meta :: String -> String -> XML
+meta n c = ETag "meta" [("name",n),("content",c)]
+
+optimizeSRGS :: XML -> XML
+optimizeSRGS = bottomUpXML f
+ where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
+ f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
+ f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
+ f (Tag "item" as xs) = Tag "item" as (map g xs)
+ where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
+ g x = x
+ f (Tag "one-of" [] [x]) = x
+ f x = x
diff --git a/src-3.0/GF/Speech/PrSRGS_ABNF.hs b/src-3.0/GF/Speech/PrSRGS_ABNF.hs
new file mode 100644
index 000000000..abb84c5dc
--- /dev/null
+++ b/src-3.0/GF/Speech/PrSRGS_ABNF.hs
@@ -0,0 +1,147 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrJSRGS_ABNF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.16 $
+--
+-- This module prints a CFG as a JSGF grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+--
+-- FIXME: convert to UTF-8
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrSRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
+
+import GF.Conversion.Types
+import GF.Data.Utilities
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats)
+import GF.Infra.Ident
+import GF.Infra.Print
+import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
+import GF.Speech.SISR
+import GF.Speech.SRG
+import GF.Speech.RegExp
+import GF.Compile.ShellState (StateGrammar)
+import GF.Today
+
+import Data.Char
+import Data.List
+import Data.Maybe
+import Text.PrettyPrint.HughesPJ
+import Debug.Trace
+
+width :: Int
+width = 75
+
+srgsAbnfPrinter :: Maybe SISRFormat
+ -> Bool -- ^ Include probabilities
+ -> Options
+ -> StateGrammar -> String
+srgsAbnfPrinter sisr probs opts s = showDoc $ prABNF sisr probs $ makeSimpleSRG opts s
+
+srgsAbnfNonRecursivePrinter :: Options -> StateGrammar -> String
+srgsAbnfNonRecursivePrinter opts s = showDoc $ prABNF Nothing False $ makeNonRecursiveSRG opts s
+
+showDoc = renderStyle (style { lineLength = width })
+
+prABNF :: Maybe SISRFormat -> Bool -> SRG -> Doc
+prABNF sisr probs srg@(SRG{grammarName=name,grammarLanguage=ml,
+ startCat=start,origStartCat=origStart,rules=rs})
+ = header $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs)
+ where
+ header = text "#ABNF 1.0 UTF-8;" $$
+ meta "description"
+ ("Speech recognition grammar for " ++ name
+ ++ ". " ++ "Original start category: " ++ origStart) $$
+ meta "generator" ("Grammatical Framework " ++ version) $$
+ language $$ tagFormat $$ mainCat
+ language = maybe empty (\l -> text "language" <+> text l <> char ';') ml
+ tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';'
+ | otherwise = empty
+ mainCat = case cfgCatToGFCat origStart of
+ Just c -> text "root" <+> prCat (catFormId c) <> char ';'
+ Nothing -> empty
+ prRule (SRGRule cat origCat rhs) =
+ comment origCat $$
+ rule False cat (map prAlt rhs)
+ -- FIXME: use the probability
+ prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
+ where initTag = tag sisr (profileInitSISR n)
+ finalTag = tag sisr (profileFinalSISR n)
+ p = if isEmpty initTag && isEmpty finalTag then id else parens
+
+ topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
+ where it i c = prCat c <+> tag sisr (topCatSISR c)
+
+catFormId :: String -> String
+catFormId = (++ "_cat")
+
+prCat :: SRGCat -> Doc
+prCat c = char '$' <> text c
+
+prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
+prItem sisr t = f 0
+ where
+ f _ (REUnion []) = text "$VOID"
+ f p (REUnion xs)
+ | not (null es) = brackets (f 0 (REUnion nes))
+ | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
+ where (es,nes) = partition isEpsilon xs
+ f _ (REConcat []) = text "$NULL"
+ f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
+ f p (RERepeat x) = f 3 x <> text "<0->"
+ f _ (RESymbol s) = prSymbol sisr t s
+
+
+prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
+prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
+prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation
+ | otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars
+
+tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
+tag Nothing _ = empty
+tag (Just fmt) t =
+ case t fmt of
+ [] -> empty
+ -- grr, silly SRGS ABNF does not have an escaping mechanism
+ ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}"
+ | otherwise -> text "{" <+> text x <+> text "}"
+ where x = prSISR ts
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!"
+
+comment :: String -> Doc
+comment s = text "//" <+> text s
+
+alts :: [Doc] -> Doc
+alts = fsep . prepunctuate (text "| ")
+
+rule :: Bool -> SRGCat -> [Doc] -> Doc
+rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
+ where p = if pub then text "public" else empty
+
+meta :: String -> String -> Doc
+meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';'
+
+-- Pretty-printing utilities
+
+emptyLine :: Doc
+emptyLine = text ""
+
+prepunctuate :: Doc -> [Doc] -> [Doc]
+prepunctuate _ [] = []
+prepunctuate p (x:xs) = x : map (p <>) xs
+
+($++$) :: Doc -> Doc -> Doc
+x $++$ y = x $$ emptyLine $$ y
+
diff --git a/src-3.0/GF/Speech/RegExp.hs b/src-3.0/GF/Speech/RegExp.hs
new file mode 100644
index 000000000..5ee40828e
--- /dev/null
+++ b/src-3.0/GF/Speech/RegExp.hs
@@ -0,0 +1,143 @@
+module GF.Speech.RegExp (RE(..),
+ epsilonRE, nullRE,
+ isEpsilon, isNull,
+ unionRE, concatRE, seqRE,
+ repeatRE, minimizeRE,
+ mapRE, mapRE', joinRE,
+ symbolsRE,
+ dfa2re, prRE) where
+
+import Data.List
+
+import GF.Data.Utilities
+import GF.Speech.FiniteState
+
+data RE a =
+ REUnion [RE a] -- ^ REUnion [] is null
+ | REConcat [RE a] -- ^ REConcat [] is epsilon
+ | RERepeat (RE a)
+ | RESymbol a
+ deriving (Eq,Ord,Show)
+
+
+dfa2re :: (Ord a) => DFA a -> RE a
+dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
+ . oneFinalState () epsilonRE . mapTransitions RESymbol
+ where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
+ merge es = [(f,t,unionRE ls)
+ | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
+
+elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
+elimStates fa =
+ case [s | (s,_) <- states fa, isInternal fa s] of
+ [] -> fa
+ sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
+ where sAs = nonLoopTransitionsTo sE fa
+ sBs = nonLoopTransitionsFrom sE fa
+ r2 = unionRE $ loops sE fa
+ ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
+ r r1 r3 = concatRE [r1, repeatRE r2, r3]
+
+epsilonRE :: RE a
+epsilonRE = REConcat []
+
+nullRE :: RE a
+nullRE = REUnion []
+
+isNull :: RE a -> Bool
+isNull (REUnion []) = True
+isNull _ = False
+
+isEpsilon :: RE a -> Bool
+isEpsilon (REConcat []) = True
+isEpsilon _ = False
+
+unionRE :: Ord a => [RE a] -> RE a
+unionRE = unionOrId . sortNub . concatMap toList
+ where
+ toList (REUnion xs) = xs
+ toList x = [x]
+ unionOrId [r] = r
+ unionOrId rs = REUnion rs
+
+concatRE :: [RE a] -> RE a
+concatRE xs | any isNull xs = nullRE
+ | otherwise = case concatMap toList xs of
+ [r] -> r
+ rs -> REConcat rs
+ where
+ toList (REConcat xs) = xs
+ toList x = [x]
+
+seqRE :: [a] -> RE a
+seqRE = concatRE . map RESymbol
+
+repeatRE :: RE a -> RE a
+repeatRE x | isNull x || isEpsilon x = epsilonRE
+ | otherwise = RERepeat x
+
+finalRE :: Ord a => DFA (RE a) -> RE a
+finalRE fa = concatRE [repeatRE r1, r2,
+ repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
+ where
+ s0 = startState fa
+ [sF] = finalStates fa
+ r1 = unionRE $ loops s0 fa
+ r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
+ r3 = unionRE $ loops sF fa
+ r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
+
+reverseRE :: RE a -> RE a
+reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
+reverseRE (REUnion xs) = REUnion (map reverseRE xs)
+reverseRE (RERepeat x) = RERepeat (reverseRE x)
+reverseRE x = x
+
+minimizeRE :: Ord a => RE a -> RE a
+minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
+
+mergeForward :: Ord a => RE a -> RE a
+mergeForward (REUnion xs) =
+ unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
+mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
+mergeForward (RERepeat r) = repeatRE (mergeForward r)
+mergeForward r = r
+
+firstRE :: RE a -> (RE a, RE a)
+firstRE (REConcat (x:xs)) = (x, REConcat xs)
+firstRE r = (r,epsilonRE)
+
+mapRE :: (a -> b) -> RE a -> RE b
+mapRE f = mapRE' (RESymbol . f)
+
+mapRE' :: (a -> RE b) -> RE a -> RE b
+mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
+mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
+mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
+mapRE' f (RESymbol s) = f s
+
+joinRE :: RE (RE a) -> RE a
+joinRE (REConcat xs) = REConcat (map joinRE xs)
+joinRE (REUnion xs) = REUnion (map joinRE xs)
+joinRE (RERepeat xs) = RERepeat (joinRE xs)
+joinRE (RESymbol ss) = ss
+
+symbolsRE :: RE a -> [a]
+symbolsRE (REConcat xs) = concatMap symbolsRE xs
+symbolsRE (REUnion xs) = concatMap symbolsRE xs
+symbolsRE (RERepeat x) = symbolsRE x
+symbolsRE (RESymbol x) = [x]
+
+-- Debugging
+
+prRE :: RE String -> String
+prRE = prRE' 0
+
+prRE' _ (REUnion []) = "<NULL>"
+prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
+prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
+prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
+prRE' _ (RESymbol s) = s
+
+p n m s | n >= m = "(" ++ s ++ ")"
+ | True = s
diff --git a/src-3.0/GF/Speech/Relation.hs b/src-3.0/GF/Speech/Relation.hs
new file mode 100644
index 000000000..641d671a9
--- /dev/null
+++ b/src-3.0/GF/Speech/Relation.hs
@@ -0,0 +1,130 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Relation
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/26 17:13:13 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.1 $
+--
+-- A simple module for relations.
+-----------------------------------------------------------------------------
+
+module GF.Speech.Relation (Rel, mkRel, mkRel'
+ , allRelated , isRelatedTo
+ , transitiveClosure
+ , reflexiveClosure, reflexiveClosure_
+ , symmetricClosure
+ , symmetricSubrelation, reflexiveSubrelation
+ , reflexiveElements
+ , equivalenceClasses
+ , isTransitive, isReflexive, isSymmetric
+ , isEquivalence
+ , isSubRelationOf) where
+
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import GF.Data.Utilities
+
+type Rel a = Map a (Set a)
+
+-- | Creates a relation from a list of related pairs.
+mkRel :: Ord a => [(a,a)] -> Rel a
+mkRel ps = relates ps Map.empty
+
+-- | Creates a relation from a list pairs of elements and the elements
+-- related to them.
+mkRel' :: Ord a => [(a,[a])] -> Rel a
+mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
+
+relToList :: Rel a -> [(a,a)]
+relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
+
+-- | Add a pair to the relation.
+relate :: Ord a => a -> a -> Rel a -> Rel a
+relate x y r = Map.insertWith Set.union x (Set.singleton y) r
+
+-- | Add a list of pairs to the relation.
+relates :: Ord a => [(a,a)] -> Rel a -> Rel a
+relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
+
+-- | Checks if an element is related to another.
+isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
+isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
+
+-- | Get the set of elements to which a given element is related.
+allRelated :: Ord a => Rel a -> a -> Set a
+allRelated r x = fromMaybe Set.empty (Map.lookup x r)
+
+-- | Get all elements in the relation.
+domain :: Ord a => Rel a -> Set a
+domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
+
+-- | Keep only pairs for which both elements are in the given set.
+intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
+intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
+
+transitiveClosure :: Ord a => Rel a -> Rel a
+transitiveClosure r = fix (Map.map growSet) r
+ where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
+
+reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
+ -> Rel a -> Rel a
+reflexiveClosure_ u r = relates [(x,x) | x <- u] r
+
+-- | Uses 'domain'
+reflexiveClosure :: Ord a => Rel a -> Rel a
+reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
+
+symmetricClosure :: Ord a => Rel a -> Rel a
+symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
+
+symmetricSubrelation :: Ord a => Rel a -> Rel a
+symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
+
+reflexiveSubrelation :: Ord a => Rel a -> Rel a
+reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
+
+-- | Get the set of elements which are related to themselves.
+reflexiveElements :: Ord a => Rel a -> Set a
+reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
+
+-- | Keep the related pairs for which the predicate is true.
+filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
+filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
+
+-- | Remove keys that map to no elements.
+purgeEmpty :: Ord a => Rel a -> Rel a
+purgeEmpty r = Map.filter (not . Set.null) r
+
+
+-- | Get the equivalence classes from an equivalence relation.
+equivalenceClasses :: Ord a => Rel a -> [Set a]
+equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
+ where equivalenceClasses_ [] _ = []
+ equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
+ where ys = allRelated r x
+ zs = [x' | x' <- xs, not (x' `Set.member` ys)]
+
+isTransitive :: Ord a => Rel a -> Bool
+isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
+ y <- Set.toList ys, z <- Set.toList (allRelated r y)]
+
+isReflexive :: Ord a => Rel a -> Bool
+isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
+
+isSymmetric :: Ord a => Rel a -> Bool
+isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
+
+isEquivalence :: Ord a => Rel a -> Bool
+isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
+
+isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
+isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)
diff --git a/src-3.0/GF/Speech/RelationQC.hs b/src-3.0/GF/Speech/RelationQC.hs
new file mode 100644
index 000000000..47f783986
--- /dev/null
+++ b/src-3.0/GF/Speech/RelationQC.hs
@@ -0,0 +1,39 @@
+----------------------------------------------------------------------
+-- |
+-- Module : RelationQC
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/26 17:13:13 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.1 $
+--
+-- QuickCheck properties for GF.Speech.Relation
+-----------------------------------------------------------------------------
+
+module GF.Speech.RelationQC where
+
+import GF.Speech.Relation
+
+import Test.QuickCheck
+
+prop_transitiveClosure_trans :: [(Int,Int)] -> Bool
+prop_transitiveClosure_trans ps = isTransitive (transitiveClosure (mkRel ps))
+
+prop_symmetricSubrelation_symm :: [(Int,Int)] -> Bool
+prop_symmetricSubrelation_symm ps = isSymmetric (symmetricSubrelation (mkRel ps))
+
+prop_symmetricSubrelation_sub :: [(Int,Int)] -> Bool
+prop_symmetricSubrelation_sub ps = symmetricSubrelation r `isSubRelationOf` r
+ where r = mkRel ps
+
+prop_symmetricClosure_symm :: [(Int,Int)] -> Bool
+prop_symmetricClosure_symm ps = isSymmetric (symmetricClosure (mkRel ps))
+
+prop_reflexiveClosure_refl :: [(Int,Int)] -> Bool
+prop_reflexiveClosure_refl ps = isReflexive (reflexiveClosure (mkRel ps))
+
+prop_mkEquiv_equiv :: [(Int,Int)] -> Bool
+prop_mkEquiv_equiv ps = isEquivalence (mkEquiv ps)
+ where mkEquiv = transitiveClosure . symmetricClosure . reflexiveClosure . mkRel
diff --git a/src-3.0/GF/Speech/SISR.hs b/src-3.0/GF/Speech/SISR.hs
new file mode 100644
index 000000000..3e68a2e55
--- /dev/null
+++ b/src-3.0/GF/Speech/SISR.hs
@@ -0,0 +1,87 @@
+----------------------------------------------------------------------
+-- |
+-- Module : GF.Speech.SISR
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Abstract syntax and pretty printer for SISR,
+-- (Semantic Interpretation for Speech Recognition)
+--
+-----------------------------------------------------------------------------
+
+module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
+ topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
+
+import Data.List
+
+import GF.Conversion.Types
+import GF.Data.Utilities
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
+import GF.Infra.Ident
+import GF.Speech.TransformCFG
+import GF.Speech.SRG (SRGNT)
+
+import qualified GF.JavaScript.AbsJS as JS
+import qualified GF.JavaScript.PrintJS as JS
+
+data SISRFormat =
+ -- SISR Working draft 1 April 2003
+ -- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
+ SISROld
+ deriving Show
+
+type SISRTag = [JS.DeclOrExpr]
+
+
+prSISR :: SISRTag -> String
+prSISR = JS.printTree
+
+topCatSISR :: String -> SISRFormat -> SISRTag
+topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
+
+profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
+profileInitSISR t fmt
+ | null (usedArgs t) = []
+ | otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
+
+usedArgs :: CFTerm -> [Int]
+usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
+usedArgs (CFAbs _ x) = usedArgs x
+usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
+usedArgs (CFRes i) = [i]
+usedArgs _ = []
+
+catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
+catSISR t (c,i) fmt
+ | i `elem` usedArgs t = map JS.DExpr
+ [JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
+ | otherwise = []
+
+profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
+profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
+ where
+ f (CFObj n ts) = tree (prIdent n) (map f ts)
+ f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
+ f (CFApp x y) = JS.ECall (f x) [f y]
+ f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
+ f (CFVar v) = JS.EVar (var v)
+ f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr typ)]
+
+fmtOut SISROld = JS.EVar (JS.Ident "$")
+
+fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
+
+args = JS.Ident "a"
+
+var v = JS.Ident ("x" ++ show v)
+
+field x y = JS.EMember x (JS.Ident y)
+
+ass = JS.EAssign
+
+tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
+
+obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]
+
diff --git a/src-3.0/GF/Speech/SRG.hs b/src-3.0/GF/Speech/SRG.hs
new file mode 100644
index 000000000..19b6c1c1b
--- /dev/null
+++ b/src-3.0/GF/Speech/SRG.hs
@@ -0,0 +1,235 @@
+----------------------------------------------------------------------
+-- |
+-- Module : SRG
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.20 $
+--
+-- Representation of, conversion to, and utilities for
+-- printing of a general Speech Recognition Grammar.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+-----------------------------------------------------------------------------
+
+module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem,
+ SRGCat, SRGNT, CFTerm
+ , makeSRG
+ , makeSimpleSRG
+ , makeNonRecursiveSRG
+ , lookupFM_, prtS
+ , cfgCatToGFCat, srgTopCats
+ ) where
+
+import GF.Data.Operations
+import GF.Data.Utilities
+import GF.Infra.Ident
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
+ , Profile(..), SyntaxForest
+ , filterCats, mapSymbol, symbol)
+import GF.Conversion.Types
+import GF.Infra.Print
+import GF.Speech.TransformCFG
+import GF.Speech.Relation
+import GF.Speech.FiniteState
+import GF.Speech.RegExp
+import GF.Speech.CFGToFiniteState
+import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
+import GF.Compile.ShellState (StateGrammar, stateProbs, stateOptions, cncId)
+
+import Data.List
+import Data.Maybe (fromMaybe, maybeToList)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import Debug.Trace
+
+data SRG = SRG { grammarName :: String -- ^ grammar name
+ , startCat :: SRGCat -- ^ start category name
+ , origStartCat :: String -- ^ original start category name
+ , grammarLanguage :: Maybe String -- ^ The language for which the grammar
+ -- is intended, e.g. en-UK
+ , rules :: [SRGRule]
+ }
+ deriving (Eq,Show)
+
+data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original category name
+ -- and productions
+ deriving (Eq,Show)
+
+-- | maybe a probability, a rule name and an EBNF right-hand side
+data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
+ deriving (Eq,Show)
+
+type SRGItem = RE (Symbol SRGNT Token)
+
+type SRGCat = String
+
+-- | An SRG non-terminal. Category name and its number in the profile.
+type SRGNT = (SRGCat, Int)
+
+-- | SRG category name and original name
+type CatName = (SRGCat,String)
+
+type CatNames = Map String String
+
+-- | Create a non-left-recursive SRG.
+-- FIXME: the probabilities in the returned
+-- grammar may be meaningless.
+makeSimpleSRG :: Options -- ^ Grammar options
+ -> StateGrammar
+ -> SRG
+makeSimpleSRG opt s = makeSRG preprocess opt s
+ where
+ preprocess origStart = traceStats "After mergeIdentical"
+ . mergeIdentical
+ . traceStats "After removeLeftRecursion"
+ . removeLeftRecursion origStart
+ . traceStats "After topDownFilter"
+ . topDownFilter origStart
+ . traceStats "After bottomUpFilter"
+ . bottomUpFilter
+ . traceStats "After removeCycles"
+ . removeCycles
+ . traceStats "Inital CFG"
+
+traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
+
+stats g = "Categories: " ++ show (countCats g)
+ ++ " Rules: " ++ show (countRules g)
+
+makeNonRecursiveSRG :: Options
+ -> StateGrammar
+ -> SRG
+makeNonRecursiveSRG opt s = renameSRG $
+ SRG { grammarName = prIdent (cncId s),
+ startCat = start,
+ origStartCat = origStart,
+ grammarLanguage = getSpeechLanguage opt s,
+ rules = rs }
+ where
+ origStart = getStartCatCF opt s
+ MFA start dfas = cfgToMFA opt s
+ rs = [SRGRule l l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
+ where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
+ dummyCFTerm = CFMeta "dummy"
+ dummySRGNT = mapSymbol (\c -> (c,0)) id
+
+makeSRG :: (Cat_ -> CFRules -> CFRules)
+ -> Options -- ^ Grammar options
+ -> StateGrammar
+ -> SRG
+makeSRG preprocess opt s = renameSRG $
+ SRG { grammarName = name,
+ startCat = origStart,
+ origStartCat = origStart,
+ grammarLanguage = getSpeechLanguage opt s,
+ rules = rs }
+ where
+ name = prIdent (cncId s)
+ origStart = getStartCatCF opt s
+ (_,cfgRules) = unzip $ allRulesGrouped $ preprocess origStart $ cfgToCFRules s
+ rs = map (cfgRulesToSRGRule (stateProbs s)) cfgRules
+
+-- | Give names on the form NameX to all categories.
+renameSRG :: SRG -> SRG
+renameSRG srg = srg { startCat = renameCat (startCat srg),
+ rules = map renameRule (rules srg) }
+ where
+ names = mkCatNames (grammarName srg) (allSRGCats srg)
+ renameRule (SRGRule _ origCat alts) = SRGRule (renameCat origCat) origCat (map renameAlt alts)
+ renameAlt (SRGAlt mp n rhs) = SRGAlt mp n (mapRE renameSymbol rhs)
+ renameSymbol = mapSymbol (\ (c,x) -> (renameCat c, x)) id
+ renameCat = lookupFM_ names
+
+getSpeechLanguage :: Options -> StateGrammar -> Maybe String
+getSpeechLanguage opt s =
+ fmap (replace '_' '-') $ getOptVal (addOptions opt (stateOptions s)) speechLanguage
+
+-- FIXME: merge alternatives with same rhs and profile but different probabilities
+cfgRulesToSRGRule :: Probs -> [CFRule_] -> SRGRule
+cfgRulesToSRGRule probs rs@(r:_) = SRGRule origCat origCat rhs
+ where
+ origCat = lhsCat r
+ alts = [((n,ruleProb probs r),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
+ rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
+
+ mkSRGSymbols _ [] = []
+ mkSRGSymbols i (Cat c:ss) = Cat (c,i) : mkSRGSymbols (i+1) ss
+ mkSRGSymbols i (Tok t:ss) = Tok t : mkSRGSymbols i ss
+
+ruleProb :: Probs -> CFRule_ -> Maybe Double
+ruleProb probs r = lookupProb probs (ruleFun r)
+
+-- FIXME: move to GF.Probabilistic.Probabilistic?
+lookupProb :: Probs -> Ident -> Maybe Double
+lookupProb probs i = lookupTree prIdent i probs
+
+mkCatNames :: String -- ^ Category name prefix
+ -> [String] -- ^ Original category names
+ -> Map String String -- ^ Maps original names to SRG names
+mkCatNames prefix origNames = Map.fromList (zip origNames names)
+ where names = [prefix ++ "_" ++ show x | x <- [0..]]
+
+
+allSRGCats :: SRG -> [String]
+allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
+
+cfgCatToGFCat :: SRGCat -> Maybe String
+cfgCatToGFCat c
+ -- categories introduced by removeLeftRecursion contain dashes
+ | '-' `elem` c = Nothing
+ -- some categories introduced by -conversion=finite have the form
+ -- "{fun:cat}..."
+ | "{" `isPrefixOf` c = case dropWhile (/=':') $ takeWhile (/='}') $ tail c of
+ ':':c' -> Just c'
+ _ -> error $ "cfgCatToGFCat: Strange category " ++ show c
+ | otherwise = Just $ takeWhile (/='{') c
+
+srgTopCats :: SRG -> [(String,[SRGCat])]
+srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
+ oc <- maybeToList $ cfgCatToGFCat origCat]
+
+--
+-- * Size-optimized EBNF SRGs
+--
+
+srgItem :: [[Symbol SRGNT Token]] -> SRGItem
+srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
+-- non-optimizing version:
+--srgItem = unionRE . map seqRE
+
+-- | Merges a list of right-hand sides which all have the same
+-- sequence of non-terminals.
+mergeItems :: [[Symbol SRGNT Token]] -> SRGItem
+mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
+
+groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
+groupTokens [] = []
+groupTokens (Tok t:ss) = case groupTokens ss of
+ Tok ts:ss' -> Tok (t:ts):ss'
+ ss' -> Tok [t]:ss'
+groupTokens (Cat c:ss) = Cat c : groupTokens ss
+
+ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
+ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
+
+--
+-- * Utilities for building and printing SRGs
+--
+
+lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
+lookupFM_ fm k = Map.findWithDefault err k fm
+ where err = error $ "Key not found: " ++ show k
+ ++ "\namong " ++ show (Map.keys fm)
+
+prtS :: Print a => a -> ShowS
+prtS = showString . prt
diff --git a/src-3.0/GF/Speech/TransformCFG.hs b/src-3.0/GF/Speech/TransformCFG.hs
new file mode 100644
index 000000000..3d7ebd809
--- /dev/null
+++ b/src-3.0/GF/Speech/TransformCFG.hs
@@ -0,0 +1,378 @@
+----------------------------------------------------------------------
+-- |
+-- Module : TransformCFG
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.24 $
+--
+-- This module does some useful transformations on CFGs.
+--
+-- peb thinks: most of this module should be moved to GF.Conversion...
+-----------------------------------------------------------------------------
+
+module GF.Speech.TransformCFG where
+
+import GF.Canon.CanonToGFCC (canon2gfcc)
+import qualified GF.GFCC.CId as C
+import GF.GFCC.Macros (lookType,catSkeleton)
+import GF.GFCC.DataGFCC (GFCC)
+import GF.Conversion.Types
+import GF.CF.PPrCF (prCFCat)
+import GF.Data.Utilities
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
+ NameProfile(..), Profile(..), name2fun, forestName)
+import GF.Infra.Ident
+import GF.Infra.Option
+import GF.Infra.Print
+import GF.Speech.Relation
+import GF.Compile.ShellState (StateGrammar, stateCFG, stateGrammarST, startCatStateOpts, stateOptions)
+
+import Control.Monad
+import Control.Monad.State (State, get, put, evalState)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.List
+import Data.Maybe (fromMaybe)
+import Data.Monoid (mconcat)
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+-- not very nice to replace the structured CFCat type with a simple string
+type CFRule_ = CFRule Cat_ CFTerm Token
+
+data CFTerm
+ = CFObj Fun [CFTerm] -- ^ an abstract syntax function with arguments
+ | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
+ | CFApp CFTerm CFTerm -- ^ Application
+ | CFRes Int -- ^ The result of the n:th (0-based) non-terminal
+ | CFVar Int -- ^ A lambda-bound variable
+ | CFMeta String -- ^ A metavariable
+ deriving (Eq,Ord,Show)
+
+type Cat_ = String
+type CFSymbol_ = Symbol Cat_ Token
+
+type CFRules = Map Cat_ (Set CFRule_)
+
+
+cfgToCFRules :: StateGrammar -> CFRules
+cfgToCFRules s =
+ groupProds [CFRule (catToString c) (map symb r) (nameToTerm n)
+ | CFRule c r n <- cfg]
+ where cfg = stateCFG s
+ symb = mapSymbol catToString id
+ catToString = prt
+ gfcc = stateGFCC s
+ nameToTerm (Name IW [Unify [n]]) = CFRes n
+ nameToTerm (Name f@(IC c) prs) =
+ CFObj f (zipWith profileToTerm args prs)
+ where (args,_) = catSkeleton $ lookType gfcc (C.CId c)
+ nameToTerm n = error $ "cfgToCFRules.nameToTerm" ++ show n
+ profileToTerm (C.CId t) (Unify []) = CFMeta t
+ profileToTerm _ (Unify xs) = CFRes (last xs) -- FIXME: unify
+ profileToTerm (C.CId t) (Constant f) = maybe (CFMeta t) (\x -> CFObj x []) (forestName f)
+
+getStartCat :: Options -> StateGrammar -> String
+getStartCat opts sgr = prCFCat (startCatStateOpts opts' sgr)
+ where opts' = addOptions opts (stateOptions sgr)
+
+getStartCatCF :: Options -> StateGrammar -> String
+getStartCatCF opts sgr = getStartCat opts sgr ++ "{}.s"
+
+stateGFCC :: StateGrammar -> GFCC
+stateGFCC = canon2gfcc noOptions . stateGrammarST
+
+-- * Grammar filtering
+
+-- | Removes all directly and indirectly cyclic productions.
+-- FIXME: this may be too aggressive, only one production
+-- needs to be removed to break a given cycle. But which
+-- one should we pick?
+-- FIXME: Does not (yet) remove productions which are cyclic
+-- because of empty productions.
+removeCycles :: CFRules -> CFRules
+removeCycles = groupProds . f . allRules
+ where f rs = filter (not . isCycle) rs
+ where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [Cat c'] _ <- rs]
+ isCycle (CFRule c [Cat c'] _) = isRelatedTo alias c' c
+ isCycle _ = False
+
+-- | Better bottom-up filter that also removes categories which contain no finite
+-- strings.
+bottomUpFilter :: CFRules -> CFRules
+bottomUpFilter gr = fix grow Map.empty
+ where grow g = g `unionCFRules` filterCFRules (all (okSym g) . ruleRhs) gr
+ okSym g = symbol (`elem` allCats g) (const True)
+
+-- | Removes categories which are not reachable from the start category.
+topDownFilter :: Cat_ -> CFRules -> CFRules
+topDownFilter start rules = filterCFRulesCats (isRelatedTo uses start) rules
+ where
+ rhsCats = [ (lhsCat r, c') | r <- allRules rules, c' <- filterCats (ruleRhs r) ]
+ uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats
+
+-- | Merges categories with identical right-hand-sides.
+-- FIXME: handle probabilities
+mergeIdentical :: CFRules -> CFRules
+mergeIdentical g = groupProds $ map subst $ allRules g
+ where
+ -- maps categories to their replacement
+ m = Map.fromList [(y,concat (intersperse "+" xs))
+ | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList g], y <- xs]
+ -- build data to compare for each category: a set of name,rhs pairs
+ rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
+ subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
+ substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
+
+-- * Removing left recursion
+
+-- The LC_LR algorithm from
+-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
+removeLeftRecursion :: Cat_ -> CFRules -> CFRules
+removeLeftRecursion start gr
+ = groupProds $ concat [scheme1, scheme2, scheme3, scheme4]
+ where
+ scheme1 = [CFRule a [x,Cat a_x] n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ not (isLeftRecursive x),
+ let a_x = mkCat (Cat a) x,
+ -- this is an extension of LC_LR to avoid generating
+ -- A-X categories for which there are no productions:
+ a_x `Set.member` newCats,
+ let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
+ (\_ -> CFRes 0) x]
+ scheme2 = [CFRule a_x (beta++[Cat a_b]) n' |
+ a <- retainedLeftRecursive,
+ b@(Cat b') <- properLeftCornersOf a,
+ isLeftRecursive b,
+ CFRule _ (x:beta) n <- catRules gr b',
+ let a_x = mkCat (Cat a) x,
+ let a_b = mkCat (Cat a) b,
+ let i = length $ filterCats beta,
+ let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
+ (\_ -> CFApp (CFRes i) n) x]
+ scheme3 = [CFRule a_x beta n' |
+ a <- retainedLeftRecursive,
+ x <- properLeftCornersOf a,
+ CFRule _ (x':beta) n <- catRules gr a,
+ x == x',
+ let a_x = mkCat (Cat a) x,
+ let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
+ (\_ -> n) x]
+ scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats
+
+ newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
+
+ shiftTerm :: CFTerm -> CFTerm
+ shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
+ shiftTerm (CFRes 0) = CFVar 1
+ shiftTerm (CFRes n) = CFRes (n-1)
+ shiftTerm t = t
+ -- note: the rest don't occur in the original grammar
+
+ cats = allCats gr
+ rules = allRules gr
+
+ directLeftCorner = mkRel [(Cat c,t) | CFRule c (t:_) _ <- allRules gr]
+ leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner
+ properLeftCorner = transitiveClosure directLeftCorner
+ properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat
+ isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
+
+ leftRecursive = reflexiveElements properLeftCorner
+ isLeftRecursive = (`Set.member` leftRecursive)
+
+ retained = start `Set.insert`
+ Set.fromList [a | r <- allRules (filterCFRulesCats (not . isLeftRecursive . Cat) gr),
+ Cat a <- ruleRhs r]
+ isRetained = (`Set.member` retained)
+
+ retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained
+
+mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_
+mkCat x y = showSymbol x ++ "-" ++ showSymbol y
+ where showSymbol = symbol id show
+
+{-
+
+-- Paull's algorithm, see
+-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
+removeLeftRecursion :: Cat_ -> CFRules -> CFRules
+removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs
+ where
+ handleProds (c, r) = (c, concatMap handleProd r)
+ handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
+ -- FIXME: for non-recursive categories, this changes
+ -- the grammar unneccessarily, maybe we can use mutRecCats
+ -- to make this less invasive
+ -- FIXME: this will give multiple rules with the same name,
+ -- which may mess up the probabilities.
+ [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
+ handleProd r = [r]
+
+removeDirectLeftRecursions :: CFRules -> CFRules
+removeDirectLeftRecursions = concat . flip evalState 0 . mapM removeDirectLeftRecursion
+
+removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
+ -> State Int CFRules
+removeDirectLeftRecursion (a,rs)
+ | null dr = return [(a,rs)]
+ | otherwise =
+ do
+ a' <- fresh a
+ let as = maybeEndWithA' nr
+ is = [CFRule a' (tail r) n | CFRule _ r n <- dr]
+ a's = maybeEndWithA' is
+ -- the not null constraint here avoids creating new
+ -- left recursive (cyclic) rules.
+ maybeEndWithA' xs = xs ++ [CFRule c (r++[Cat a']) n | CFRule c r n <- xs,
+ not (null r)]
+ return [(a, as), (a', a's)]
+ where
+ (dr,nr) = partition isDirectLeftRecursive rs
+ fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n }
+
+isDirectLeftRecursive :: CFRule_ -> Bool
+isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
+isDirectLeftRecursive _ = False
+
+-}
+
+-- | Get the sets of mutually recursive non-terminals for a grammar.
+mutRecCats :: Bool -- ^ If true, all categories will be in some set.
+ -- If false, only recursive categories will be included.
+ -> CFRules -> [Set Cat_]
+mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
+ where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, Cat c' <- ss]
+ refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
+
+--
+-- * Approximate context-free grammars with regular grammars.
+--
+
+-- Use the transformation algorithm from \"Regular Approximation of Context-free
+-- Grammars through Approximation\", Mohri and Nederhof, 2000
+-- to create an over-generating regular frammar for a context-free
+-- grammar
+makeRegular :: CFRules -> CFRules
+makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
+ where trSet cs | allXLinear cs rs = rs
+ | otherwise = concatMap handleCat csl
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
+ ++ concatMap (makeRightLinearRules c) (catRules g c)
+ where c' = newCat c
+ makeRightLinearRules b' (CFRule c ss n) =
+ case ys of
+ [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
+ (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
+ ++ makeRightLinearRules (newCat b) (CFRule c zs n)
+ where (xs,ys) = break (`catElem` cs) ss
+ -- don't add rules on the form A -> A
+ newRule c rhs n | rhs == [Cat c] = []
+ | otherwise = [CFRule c rhs n]
+ newCat c = c ++ "$"
+
+--
+-- * CFG rule utilities
+--
+
+-- | Group productions by their lhs categories
+groupProds :: [CFRule_] -> CFRules
+groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
+
+allRules :: CFRules -> [CFRule_]
+allRules = concat . map Set.toList . Map.elems
+
+allRulesGrouped :: CFRules -> [(Cat_,[CFRule_])]
+allRulesGrouped = Map.toList . Map.map Set.toList
+
+allCats :: CFRules -> [Cat_]
+allCats = Map.keys
+
+catRules :: CFRules -> Cat_ -> [CFRule_]
+catRules rs c = Set.toList $ Map.findWithDefault Set.empty c rs
+
+catSetRules :: CFRules -> Set Cat_ -> [CFRule_]
+catSetRules g cs = allRules $ Map.filterWithKey (\c _ -> c `Set.member` cs) g
+
+cleanCFRules :: CFRules -> CFRules
+cleanCFRules = Map.filter (not . Set.null)
+
+unionCFRules :: CFRules -> CFRules -> CFRules
+unionCFRules = Map.unionWith Set.union
+
+filterCFRules :: (CFRule_ -> Bool) -> CFRules -> CFRules
+filterCFRules p = cleanCFRules . Map.map (Set.filter p)
+
+filterCFRulesCats :: (Cat_ -> Bool) -> CFRules -> CFRules
+filterCFRulesCats p = Map.filterWithKey (\c _ -> p c)
+
+countCats :: CFRules -> Int
+countCats = Map.size . cleanCFRules
+
+countRules :: CFRules -> Int
+countRules = length . allRules
+
+lhsCat :: CFRule c n t -> c
+lhsCat (CFRule c _ _) = c
+
+ruleRhs :: CFRule c n t -> [Symbol c t]
+ruleRhs (CFRule _ ss _) = ss
+
+ruleFun :: CFRule_ -> Fun
+ruleFun (CFRule _ _ t) = f t
+ where f (CFObj n _) = n
+ f (CFApp _ x) = f x
+ f (CFAbs _ x) = f x
+ f _ = IC ""
+
+-- | Checks if a symbol is a non-terminal of one of the given categories.
+catElem :: Ord c => Symbol c t -> Set c -> Bool
+catElem s cs = symbol (`Set.member` cs) (const False) s
+
+-- | Check if any of the categories used on the right-hand side
+-- are in the given list of categories.
+anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
+anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
+
+mkCFTerm :: String -> CFTerm
+mkCFTerm n = CFObj (IC n) []
+
+ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
+ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+
+noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
+noCatsInSet cs = not . any (`catElem` cs)
+
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+
+-- | Checks if a context-free rule is right-linear.
+isRightLinear :: Ord c =>
+ Set c -- ^ The categories to consider
+ -> CFRule c n t -- ^ The rule to check for right-linearity
+ -> Bool
+isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
+
+-- | Checks if a context-free rule is left-linear.
+isLeftLinear :: Ord c =>
+ Set c -- ^ The categories to consider
+ -> CFRule c n t -- ^ The rule to check for left-linearity
+ -> Bool
+isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
+
+prCFRules :: CFRules -> String
+prCFRules = unlines . map prRule . allRules
+ where
+ prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
+ prSym = symbol id (\t -> "\""++ t ++"\"")