diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:43:48 +0000 |
| commit | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch) | |
| tree | 0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/Speech | |
| parent | fe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff) | |
removed src for 2.9
Diffstat (limited to 'src/GF/Speech')
| -rw-r--r-- | src/GF/Speech/CFGToFiniteState.hs | 265 | ||||
| -rw-r--r-- | src/GF/Speech/FiniteState.hs | 329 | ||||
| -rw-r--r-- | src/GF/Speech/GrammarToVoiceXML.hs | 285 | ||||
| -rw-r--r-- | src/GF/Speech/Graph.hs | 178 | ||||
| -rw-r--r-- | src/GF/Speech/PrFA.hs | 56 | ||||
| -rw-r--r-- | src/GF/Speech/PrGSL.hs | 113 | ||||
| -rw-r--r-- | src/GF/Speech/PrJSGF.hs | 145 | ||||
| -rw-r--r-- | src/GF/Speech/PrRegExp.hs | 33 | ||||
| -rw-r--r-- | src/GF/Speech/PrSLF.hs | 190 | ||||
| -rw-r--r-- | src/GF/Speech/PrSRGS.hs | 153 | ||||
| -rw-r--r-- | src/GF/Speech/PrSRGS_ABNF.hs | 147 | ||||
| -rw-r--r-- | src/GF/Speech/RegExp.hs | 143 | ||||
| -rw-r--r-- | src/GF/Speech/Relation.hs | 130 | ||||
| -rw-r--r-- | src/GF/Speech/RelationQC.hs | 39 | ||||
| -rw-r--r-- | src/GF/Speech/SISR.hs | 87 | ||||
| -rw-r--r-- | src/GF/Speech/SRG.hs | 235 | ||||
| -rw-r--r-- | src/GF/Speech/TransformCFG.hs | 378 |
17 files changed, 0 insertions, 2906 deletions
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs deleted file mode 100644 index 7e6f80ba1..000000000 --- a/src/GF/Speech/CFGToFiniteState.hs +++ /dev/null @@ -1,265 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : CFGToFiniteState --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/10 16:43:44 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.6 $ --- --- Approximates CFGs with finite state networks. ------------------------------------------------------------------------------ - -module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular, - MFA(..), MFALabel, cfgToMFA,cfgToFA') where - -import Data.List -import Data.Maybe -import Data.Map (Map) -import qualified Data.Map as Map -import Data.Set (Set) -import qualified Data.Set as Set - -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..)) -import GF.Conversion.Types -import GF.Infra.Ident (Ident) -import GF.Infra.Option (Options) -import GF.Compile.ShellState (StateGrammar) - -import GF.Speech.FiniteState -import GF.Speech.Graph -import GF.Speech.Relation -import GF.Speech.TransformCFG - -data Recursivity = RightR | LeftR | NotR - -data MutRecSet = MutRecSet { - mrCats :: Set Cat_, - mrNonRecRules :: [CFRule_], - mrRecRules :: [CFRule_], - mrRec :: Recursivity - } - - -type MutRecSets = Map Cat_ MutRecSet - --- --- * Multiple DFA type --- - -type MFALabel a = Symbol String a - -data MFA a = MFA String [(String,DFA (MFALabel a))] - - - -cfgToFA :: Options -> StateGrammar -> DFA Token -cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s - where start = getStartCatCF opts s - -makeSimpleRegular :: Options -> StateGrammar -> CFRules -makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s - where start = getStartCatCF opts s - preprocess = topDownFilter start . bottomUpFilter - . removeCycles - - --- --- * Compile strongly regular grammars to NFAs --- - --- Convert a strongly regular grammar to a finite automaton. -compileAutomaton :: Cat_ -- ^ Start category - -> CFRules - -> NFA Token -compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa - where - (fa,s,f) = newFA_ - ns = mutRecSets g $ mutRecCats False g - --- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", --- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000. -make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State - -> NFA Token -> NFA Token -make_fa c@(g,ns) q0 alpha q1 fa = - case alpha of - [] -> newTransition q0 q1 Nothing fa - [Tok t] -> newTransition q0 q1 (Just t) fa - [Cat a] -> case Map.lookup a ns of - -- a is recursive - Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> - case mrRec n of - RightR -> - -- the set Ni is right-recursive or cyclic - let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] - ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, - let (xs,Cat d) = (init ss,last ss)] - in make_fas new $ newTransition q0 (getState a) Nothing fa' - LeftR -> - -- the set Ni is left-recursive - let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] - ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs] - in make_fas new $ newTransition (getState a) q1 Nothing fa' - where - (fa',stateMap) = addStatesForCats ni fa - getState x = Map.findWithDefault - (error $ "CFGToFiniteState: No state for " ++ x) - x stateMap - -- a is not recursive - Nothing -> let rs = catRules g a - in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs - (x:beta) -> let (fa',q) = newState () fa - in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa' - where - make_fa_ = make_fa c - make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs - --- --- * Compile a strongly regular grammar to a DFA with sub-automata --- - -cfgToMFA :: Options -> StateGrammar -> MFA Token -cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s - where start = getStartCatCF opts s - --- | Build a DFA by building and expanding an MFA -cfgToFA' :: Options -> StateGrammar -> DFA Token -cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s - -buildMFA :: Cat_ -- ^ Start category - -> CFRules -> MFA Token -buildMFA start g = sortSubLats $ removeUnusedSubLats mfa - where fas = compileAutomata g - mfa = MFA start [(c, minimize fa) | (c,fa) <- fas] - -mfaStartDFA :: MFA a -> DFA (MFALabel a) -mfaStartDFA (MFA start subs) = - fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs - -mfaToDFA :: Ord a => MFA a -> DFA a -mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa - where - subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs] - getSub l = fromJust $ Map.lookup l subs' - expand (FA (Graph c ns es) s f) - = foldl' expandEdge (FA (Graph c ns []) s f) es - expandEdge fa (f,t,x) = - case x of - Nothing -> newTransition f t Nothing fa - Just (Tok s) -> newTransition f t (Just s) fa - Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l) - -removeUnusedSubLats :: MFA a -> MFA a -removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c] - where - usedMap = subLatUseMap mfa - used = growUsedSet (Set.singleton start) - isUsed c = c `Set.member` used - growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s) - -subLatUseMap :: MFA a -> Map String (Set String) -subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs] - -usedSubLats :: DFA (MFALabel a) -> Set String -usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa] - -revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a) -revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s] - --- | Sort sub-networks topologically. -sortSubLats :: MFA a -> MFA a -sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs) - where - usedByMap = revMultiMap (subLatUseMap mfa) - sortLats _ [] = [] - sortLats ub ls = xs ++ sortLats ub' ys - where (xs,ys) = partition ((==0) . indeg) ls - ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub - indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub - --- | Convert a strongly regular grammar to a number of finite automata, --- one for each non-terminal. --- The edges in the automata accept tokens, or name another automaton to use. -compileAutomata :: CFRules - -> [(Cat_,NFA (Symbol Cat_ Token))] - -- ^ A map of non-terminals and their automata. -compileAutomata g = [(c, makeOneFA c) | c <- allCats g] - where - mrs = mutRecSets g $ mutRecCats True g - makeOneFA c = make_fa1 mr s [Cat c] f fa - where (fa,s,f) = newFA_ - mr = fromJust (Map.lookup c mrs) - - --- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", --- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000, --- adapted to build a finite automaton for a single (mutually recursive) set only. --- Categories not in the set will result in category-labelled edges. -make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which - -- we are building the automaton. - -> State -- ^ State to come from - -> [Symbol Cat_ Token] -- ^ Symbols to accept - -> State -- ^ State to end up in - -> NFA (Symbol Cat_ Token) -- ^ FA to add to. - -> NFA (Symbol Cat_ Token) -make_fa1 mr q0 alpha q1 fa = - case alpha of - [] -> newTransition q0 q1 Nothing fa - [t@(Tok _)] -> newTransition q0 q1 (Just t) fa - [c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa - [Cat a] -> - case mrRec mr of - NotR -> -- the set is a non-recursive (always singleton) set of categories - -- so the set of category rules is the set of rules for the whole set - make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa - RightR -> -- the set is right-recursive or cyclic - let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] - ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, - let (xs,Cat d) = (init ss,last ss)] - in make_fas new $ newTransition q0 (getState a) Nothing fa' - LeftR -> -- the set is left-recursive - let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] - ++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr] - in make_fas new $ newTransition (getState a) q1 Nothing fa' - where - (fa',stateMap) = addStatesForCats (mrCats mr) fa - getState x = Map.findWithDefault - (error $ "CFGToFiniteState: No state for " ++ x) - x stateMap - (x:beta) -> let (fa',q) = newState () fa - in make_fas [(q0,[x],q),(q,beta,q1)] fa' - where - make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs - -mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets -mutRecSets g = Map.fromList . concatMap mkMutRecSet - where - mkMutRecSet cs = [ (c,ms) | c <- csl ] - where csl = Set.toList cs - rs = catSetRules g cs - (nrs,rrs) = partition (ruleIsNonRecursive cs) rs - ms = MutRecSet { - mrCats = cs, - mrNonRecRules = nrs, - mrRecRules = rrs, - mrRec = rec - } - rec | null rrs = NotR - | all (isRightLinear cs) rrs = RightR - | otherwise = LeftR - --- --- * Utilities --- - --- | Add a state for the given NFA for each of the categories --- in the given set. Returns a map of categories to their --- corresponding states. -addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State) -addStatesForCats cs fa = (fa', m) - where (fa', ns) = newStates (replicate (Set.size cs) ()) fa - m = Map.fromList (zip (Set.toList cs) (map fst ns)) diff --git a/src/GF/Speech/FiniteState.hs b/src/GF/Speech/FiniteState.hs deleted file mode 100644 index 35274e3c4..000000000 --- a/src/GF/Speech/FiniteState.hs +++ /dev/null @@ -1,329 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : FiniteState --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/10 16:43:44 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.16 $ --- --- A simple finite state network module. ------------------------------------------------------------------------------ -module GF.Speech.FiniteState (FA(..), State, NFA, DFA, - startState, finalStates, - states, transitions, - isInternal, - newFA, newFA_, - addFinalState, - newState, newStates, - newTransition, newTransitions, - insertTransitionWith, insertTransitionsWith, - mapStates, mapTransitions, - modifyTransitions, - nonLoopTransitionsTo, nonLoopTransitionsFrom, - loops, - removeState, - oneFinalState, - insertNFA, - onGraph, - moveLabelsToNodes, removeTrivialEmptyNodes, - minimize, - dfa2nfa, - unusedNames, renameStates, - prFAGraphviz, faToGraphviz) where - -import Data.List -import Data.Maybe -import Data.Map (Map) -import qualified Data.Map as Map -import Data.Set (Set) -import qualified Data.Set as Set - -import GF.Data.Utilities -import GF.Speech.Graph -import qualified GF.Visualization.Graphviz as Dot - -type State = Int - --- | Type parameters: node id type, state label type, edge label type --- Data constructor arguments: nodes and edges, start state, final states -data FA n a b = FA !(Graph n a b) !n ![n] - -type NFA a = FA State () (Maybe a) - -type DFA a = FA State () a - - -startState :: FA n a b -> n -startState (FA _ s _) = s - -finalStates :: FA n a b -> [n] -finalStates (FA _ _ ss) = ss - -states :: FA n a b -> [(n,a)] -states (FA g _ _) = nodes g - -transitions :: FA n a b -> [(n,n,b)] -transitions (FA g _ _) = edges g - -newFA :: Enum n => a -- ^ Start node label - -> FA n a b -newFA l = FA g s [] - where (g,s) = newNode l (newGraph [toEnum 0..]) - --- | Create a new finite automaton with an initial and a final state. -newFA_ :: Enum n => (FA n () b, n, n) -newFA_ = (fa'', s, f) - where fa = newFA () - s = startState fa - (fa',f) = newState () fa - fa'' = addFinalState f fa' - -addFinalState :: n -> FA n a b -> FA n a b -addFinalState f (FA g s ss) = FA g s (f:ss) - -newState :: a -> FA n a b -> (FA n a b, n) -newState x (FA g s ss) = (FA g' s ss, n) - where (g',n) = newNode x g - -newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)]) -newStates xs (FA g s ss) = (FA g' s ss, ns) - where (g',ns) = newNodes xs g - -newTransition :: n -> n -> b -> FA n a b -> FA n a b -newTransition f t l = onGraph (newEdge (f,t,l)) - -newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b -newTransitions es = onGraph (newEdges es) - -insertTransitionWith :: Eq n => - (b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b -insertTransitionWith f t = onGraph (insertEdgeWith f t) - -insertTransitionsWith :: Eq n => - (b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b -insertTransitionsWith f ts fa = - foldl' (flip (insertTransitionWith f)) fa ts - -mapStates :: (a -> c) -> FA n a b -> FA n c b -mapStates f = onGraph (nmap f) - -mapTransitions :: (b -> c) -> FA n a b -> FA n a c -mapTransitions f = onGraph (emap f) - -modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b -modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es)) - -removeState :: Ord n => n -> FA n a b -> FA n a b -removeState n = onGraph (removeNode n) - -minimize :: Ord a => NFA a -> DFA a -minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA - -unusedNames :: FA n a b -> [n] -unusedNames (FA (Graph names _ _) _ _) = names - --- | Gets all incoming transitions to a given state, excluding --- transtions from the state itself. -nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)] -nonLoopTransitionsTo s fa = - [(f,l) | (f,t,l) <- transitions fa, t == s && f /= s] - -nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)] -nonLoopTransitionsFrom s fa = - [(t,l) | (f,t,l) <- transitions fa, f == s && t /= s] - -loops :: Eq n => n -> FA n a b -> [b] -loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s] - --- | Give new names to all nodes. -renameStates :: Ord x => [y] -- ^ Infinite supply of new names - -> FA x a b - -> FA y a b -renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs' - where (ns,rest) = splitAt (length (nodes g)) supply - newNodes = Map.fromList (zip (map fst (nodes g)) ns) - newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes - s' = newName s - fs' = map newName fs - --- | Insert an NFA into another -insertNFA :: NFA a -- ^ NFA to insert into - -> (State, State) -- ^ States to insert between - -> NFA a -- ^ NFA to insert. - -> NFA a -insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2) - = FA (newEdges es g') s1 fs1 - where - es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2] - (g',ren) = mergeGraphs g1 g2 - -onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d -onGraph f (FA g s ss) = FA (f g) s ss - - --- | Make the finite automaton have a single final state --- by adding a new final state and adding an edge --- from the old final states to the new state. -oneFinalState :: a -- ^ Label to give the new node - -> b -- ^ Label to give the new edges - -> FA n a b -- ^ The old network - -> FA n a b -- ^ The new network -oneFinalState nl el fa = - let (FA g s fs,nf) = newState nl fa - es = [ (f,nf,el) | f <- fs ] - in FA (newEdges es g) s [nf] - --- | Transform a standard finite automaton with labelled edges --- to one where the labels are on the nodes instead. This can add --- up to one extra node per edge. -moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) () -moveLabelsToNodes = onGraph f - where f g@(Graph c _ _) = Graph c' ns (concat ess) - where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)] - (c',is') = mapAccumL fixIncoming c is - (ns,ess) = unzip (concat is') - - --- | Remove empty nodes which are not start or final, and have --- exactly one outgoing edge or exactly one incoming edge. -removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) () -removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes - --- | Move edges to empty nodes to point to the next node(s). --- This is not done if the pointed-to node is a final node. -skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) () -skipSimpleEmptyNodes fa = onGraph og fa - where - og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es') - where - es' = concatMap changeEdge es - info = nodeInfo g - changeEdge e@(f,t,()) - | isNothing (getNodeLabel info t) - -- && (i * o <= i + o) - && not (isFinal fa t) - = [ (f,t',()) | (_,t',()) <- getOutgoing info t] - | otherwise = [e] --- where i = inDegree info t --- o = outDegree info t - -isInternal :: Eq n => FA n a b -> n -> Bool -isInternal (FA _ start final) n = n /= start && n `notElem` final - -isFinal :: Eq n => FA n a b -> n -> Bool -isFinal (FA _ _ final) n = n `elem` final - --- | Remove all internal nodes with no incoming edges --- or no outgoing edges. -pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) () -pruneUnusable fa = onGraph f fa - where - f g = if Set.null rns then g else f (removeNodes rns g) - where info = nodeInfo g - rns = Set.fromList [ n | (n,_) <- nodes g, - isInternal fa n, - inDegree info n == 0 - || outDegree info n == 0] - -fixIncoming :: (Ord n, Eq a) => [n] - -> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges - -> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their - -- incoming edges. -fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts) - where ls = nub $ map edgeLabel es - (cs',cs'') = splitAt (length ls) cs - newNodes = zip cs' ls - es' = [ (x,n,()) | x <- map fst newNodes ] - -- separate cyclic and non-cyclic edges - (cyc,ncyc) = partition (\ (f,_,_) -> f == n) es - -- keep all incoming non-cyclic edges with the right label - to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l'] - -- for each cyclic edge with the right label, - -- add an edge from each of the new nodes (including this one) - ++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes] - newContexts = [ (v, to v) | v <- newNodes ] - -alphabet :: Eq b => Graph n a (Maybe b) -> [b] -alphabet = nub . catMaybes . map edgeLabel . edges - -determinize :: Ord a => NFA a -> DFA a -determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty - (ns',es') = (Set.toList ns, Set.toList es) - final = filter isDFAFinal ns' - fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final - in renameStates [0..] fa - where info = nodeInfo g --- reach = nodesReachable out - start = closure info $ Set.singleton s - isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n)) - h currentStates oldStates es - | Set.null currentStates = (oldStates,es) - | otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es' - where - allOldStates = oldStates `Set.union` currentStates - (newStates,es') = new (Set.toList currentStates) Set.empty es - uniqueNewStates = newStates Set.\\ allOldStates - -- Get the sets of states reachable from the given states - -- by consuming one symbol, and the associated edges. - new [] rs es = (rs,es) - new (n:ns) rs es = new ns rs' es' - where cs = reachable info n --reachable reach n - rs' = rs `Set.union` Set.fromList (map snd cs) - es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs] - - --- | Get all the nodes reachable from a list of nodes by only empty edges. -closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n -closure info x = closure_ x x - where closure_ acc check | Set.null check = acc - | otherwise = closure_ acc' check' - where - reach = Set.fromList [y | x <- Set.toList check, - (_,y,Nothing) <- getOutgoing info x] - acc' = acc `Set.union` reach - check' = reach Set.\\ acc - --- | Get a map of labels to sets of all nodes reachable --- from a the set of nodes by one edge with the given --- label and then any number of empty edges. -reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)] -reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns -reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n] - -reverseNFA :: NFA a -> NFA a -reverseNFA (FA g s fs) = FA g''' s' [s] - where g' = reverseGraph g - (g'',s') = newNode () g' - g''' = newEdges [(s',f,Nothing) | f <- fs] g'' - -dfa2nfa :: DFA a -> NFA a -dfa2nfa = mapTransitions Just - --- --- * Visualization --- - -prFAGraphviz :: (Eq n,Show n) => FA n String String -> String -prFAGraphviz = Dot.prGraphviz . faToGraphviz - -prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String -prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show - -faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph -faToGraphviz (FA (Graph _ ns es) s f) - = Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) [] - where mkNode (n,l) = Dot.Node (show n) attrs - where attrs = [("label",l)] - ++ if n == s then [("shape","box")] else [] - ++ if n `elem` f then [("style","bold")] else [] - mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)] - --- --- * Utilities --- - -lookups :: Ord k => [k] -> Map k a -> [a] -lookups xs m = mapMaybe (flip Map.lookup m) xs diff --git a/src/GF/Speech/GrammarToVoiceXML.hs b/src/GF/Speech/GrammarToVoiceXML.hs deleted file mode 100644 index ad7f25d1c..000000000 --- a/src/GF/Speech/GrammarToVoiceXML.hs +++ /dev/null @@ -1,285 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : GrammarToVoiceXML --- Maintainer : Bjorn Bringert --- Stability : (stable) --- Portability : (portable) --- --- Create VoiceXML dialogue system from a GF grammar. ------------------------------------------------------------------------------ - -module GF.Speech.GrammarToVoiceXML (grammar2vxml) where - -import GF.Canon.CanonToGFCC (canon2gfcc) -import qualified GF.GFCC.CId as C -import GF.GFCC.DataGFCC (GFCC(..), Abstr(..)) -import GF.GFCC.Macros -import qualified GF.Canon.GFC as GFC -import GF.Canon.AbsGFC (Term) -import GF.Canon.PrintGFC (printTree) -import GF.Canon.CMacros (noMark, strsFromTerm) -import GF.Canon.Unlex (formatAsText) -import GF.Data.Utilities -import GF.CF.CFIdent (cfCat2Ident) -import GF.Compile.ShellState (StateGrammar,stateGrammarST,cncId,grammar, - startCatStateOpts,stateOptions) -import GF.Data.Str (sstrV) -import GF.Grammar.Macros hiding (assign,strsFromTerm) -import GF.Grammar.Grammar (Fun) -import GF.Grammar.Values (Tree) -import GF.Infra.Option (Options, addOptions, getOptVal, speechLanguage) -import GF.UseGrammar.GetTree (string2treeErr) -import GF.UseGrammar.Linear (linTree2strings) - -import GF.Infra.Ident -import GF.Infra.Option (noOptions) -import GF.Infra.Modules -import GF.Data.Operations - -import GF.Data.XML - -import Control.Monad (liftM) -import Data.List (isPrefixOf, find, intersperse) -import qualified Data.Map as Map -import Data.Maybe (fromMaybe) - -import Debug.Trace - --- | the main function -grammar2vxml :: Options -> StateGrammar -> String -grammar2vxml opt s = showsXMLDoc (skel2vxml name language startcat gr' qs) "" - where (_, gr') = vSkeleton (stateGrammarST s) - name = prIdent (cncId s) - qs = catQuestions s (map fst gr') - opts = addOptions opt (stateOptions s) - language = fmap (replace '_' '-') $ getOptVal opts speechLanguage - startcat = C.CId $ prIdent $ cfCat2Ident $ startCatStateOpts opts s - --- --- * VSkeleton: a simple description of the abstract syntax. --- - -type VSkeleton = [(VIdent, [(VIdent, [VIdent])])] -type VIdent = C.CId - -prid :: VIdent -> String -prid (C.CId x) = x - -vSkeleton :: GFC.CanonGrammar -> (VIdent,VSkeleton) -vSkeleton = gfccSkeleton . canon2gfcc noOptions - -gfccSkeleton :: GFCC -> (VIdent,VSkeleton) -gfccSkeleton gfcc = (absname gfcc, ts) - where a = abstract gfcc - ts = [(c,[(f,ft f) | f <- fs]) | (c,fs) <- Map.toList (catfuns a)] - ft f = case lookMap (error $ prid f) f (funs a) of - (ty,_) -> fst $ GF.GFCC.Macros.catSkeleton ty - --- --- * Questions to ask --- - -type CatQuestions = [(VIdent,String)] - -catQuestions :: StateGrammar -> [VIdent] -> CatQuestions -catQuestions gr cats = [(c,catQuestion gr c) | c <- cats] - -catQuestion :: StateGrammar -> VIdent -> String -catQuestion gr cat = err errHandler id (getPrintname gr cat >>= term2string) - where -- FIXME: use some better warning facility - errHandler e = trace ("GrammarToVoiceXML: " ++ e) ("quest_"++prid cat) - term2string = liftM sstrV . strsFromTerm - -getPrintname :: StateGrammar -> VIdent -> Err Term -getPrintname gr cat = - do m <- lookupModMod (grammar gr) (cncId gr) - i <- lookupInfo m (IC (prid cat)) - case i of - GFC.CncCat _ _ p -> return p - _ -> fail $ "getPrintname " ++ prid cat - ++ ": Expected CncCat, got " ++ show i - - -{- -lin :: StateGrammar -> String -> Err String -lin gr fun = do - tree <- string2treeErr gr fun - let ls = map unt $ linTree2strings noMark g c tree - case ls of - [] -> fail $ "No linearization of " ++ fun - l:_ -> return l - where c = cncId gr - g = stateGrammarST gr - unt = formatAsText --} - -getCatQuestion :: VIdent -> CatQuestions -> String -getCatQuestion c qs = - fromMaybe (error "No question for category " ++ prid c) (lookup c qs) - --- --- * Generate VoiceXML --- - -skel2vxml :: String -> Maybe String -> VIdent -> VSkeleton -> CatQuestions -> XML -skel2vxml name language start skel qs = - vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel) - where - gr = grammarURI name - startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)] - [param "old" "{ name : '?' }"]] - -grammarURI :: String -> String -grammarURI name = name ++ ".grxml" - - -catForms :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> [XML] -catForms gr qs cat fs = - comments [prid cat ++ " category."] - ++ [cat2form gr qs cat fs] - -cat2form :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> XML -cat2form gr qs cat fs = - form (catFormId cat) $ - [var "old" Nothing, - blockCond "old.name != '?'" [assign "term" "old"], - field "term" [] - [promptString (getCatQuestion cat qs), - vxmlGrammar (gr++"#"++catFormId cat) - ] - ] - ++ concatMap (uncurry (fun2sub gr cat)) fs - ++ [block [return_ ["term"]{-]-}]] - -fun2sub :: String -> VIdent -> VIdent -> [VIdent] -> [XML] -fun2sub gr cat fun args = - comments [prid fun ++ " : (" - ++ concat (intersperse ", " (map prid args)) - ++ ") " ++ prid cat] ++ ss - where - ss = zipWith mkSub [0..] args - mkSub n t = subdialog s [("src","#"++catFormId t), - ("cond","term.name == "++string (prid fun))] - [param "old" v, - filled [] [assign v (s++".term")]] - where s = prid fun ++ "_" ++ show n - v = "term.args["++show n++"]" - -catFormId :: VIdent -> String -catFormId c = prid c ++ "_cat" - - --- --- * VoiceXML stuff --- - -vxml :: Maybe String -> [XML] -> XML -vxml ml = Tag "vxml" $ [("version","2.0"), - ("xmlns","http://www.w3.org/2001/vxml")] - ++ maybe [] (\l -> [("xml:lang", l)]) ml - -form :: String -> [XML] -> XML -form id xs = Tag "form" [("id", id)] xs - -field :: String -> [(String,String)] -> [XML] -> XML -field name attrs = Tag "field" ([("name",name)]++attrs) - -subdialog :: String -> [(String,String)] -> [XML] -> XML -subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs) - -filled :: [(String,String)] -> [XML] -> XML -filled = Tag "filled" - -vxmlGrammar :: String -> XML -vxmlGrammar uri = ETag "grammar" [("src",uri)] - -prompt :: [XML] -> XML -prompt = Tag "prompt" [] - -promptString :: String -> XML -promptString p = prompt [Data p] - -reprompt :: XML -reprompt = ETag "reprompt" [] - -assign :: String -> String -> XML -assign n e = ETag "assign" [("name",n),("expr",e)] - -value :: String -> XML -value expr = ETag "value" [("expr",expr)] - -if_ :: String -> [XML] -> XML -if_ c b = if_else c b [] - -if_else :: String -> [XML] -> [XML] -> XML -if_else c t f = cond [(c,t)] f - -cond :: [(String,[XML])] -> [XML] -> XML -cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es) - where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest] - ++ if null els then [] else (Tag "else" [] []:els) - -goto_item :: String -> XML -goto_item nextitem = ETag "goto" [("nextitem",nextitem)] - -return_ :: [String] -> XML -return_ names = ETag "return" [("namelist", unwords names)] - -block :: [XML] -> XML -block = Tag "block" [] - -blockCond :: String -> [XML] -> XML -blockCond cond = Tag "block" [("cond", cond)] - -throw :: String -> String -> XML -throw event msg = Tag "throw" [("event",event),("message",msg)] [] - -nomatch :: [XML] -> XML -nomatch = Tag "nomatch" [] - -help :: [XML] -> XML -help = Tag "help" [] - -param :: String -> String -> XML -param name expr = ETag "param" [("name",name),("expr",expr)] - -var :: String -> Maybe String -> XML -var name expr = ETag "var" ([("name",name)]++e) - where e = maybe [] ((:[]) . (,) "expr") expr - -script :: String -> XML -script s = Tag "script" [] [CData s] - -scriptURI :: String -> XML -scriptURI uri = Tag "script" [("uri", uri)] [] - --- --- * ECMAScript stuff --- - -string :: String -> String -string s = "'" ++ concatMap esc s ++ "'" - where esc '\'' = "\\'" - esc c = [c] - -{- --- --- * List stuff --- - -isListCat :: (VIdent, [(VIdent, [VIdent])]) -> Bool -isListCat (cat,rules) = "List" `isPrefixOf` prIdent cat && length rules == 2 - && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs - where c = drop 4 (prIdent cat) - fs = map (prIdent . fst) rules - -isBaseFun :: VIdent -> Bool -isBaseFun f = "Base" `isPrefixOf` prIdent f - -isConsFun :: VIdent -> Bool -isConsFun f = "Cons" `isPrefixOf` prIdent f - -baseSize :: (VIdent, [(VIdent, [VIdent])]) -> Int -baseSize (_,rules) = length bs - where Just (_,bs) = find (isBaseFun . fst) rules --} diff --git a/src/GF/Speech/Graph.hs b/src/GF/Speech/Graph.hs deleted file mode 100644 index 1a0ebe0c0..000000000 --- a/src/GF/Speech/Graph.hs +++ /dev/null @@ -1,178 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Graph --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/10 16:43:44 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.2 $ --- --- A simple graph module. ------------------------------------------------------------------------------ -module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo - , newGraph, nodes, edges - , nmap, emap, newNode, newNodes, newEdge, newEdges - , insertEdgeWith - , removeNode, removeNodes - , nodeInfo - , getIncoming, getOutgoing, getNodeLabel - , inDegree, outDegree - , nodeLabel - , edgeFrom, edgeTo, edgeLabel - , reverseGraph, mergeGraphs, renameNodes - ) where - -import GF.Data.Utilities - -import Data.List -import Data.Maybe -import Data.Map (Map) -import qualified Data.Map as Map -import Data.Set (Set) -import qualified Data.Set as Set - -data Graph n a b = Graph [n] ![Node n a] ![Edge n b] - deriving (Eq,Show) - -type Node n a = (n,a) -type Edge n b = (n,n,b) - -type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b]) - --- | Create a new empty graph. -newGraph :: [n] -> Graph n a b -newGraph ns = Graph ns [] [] - --- | Get all the nodes in the graph. -nodes :: Graph n a b -> [Node n a] -nodes (Graph _ ns _) = ns - --- | Get all the edges in the graph. -edges :: Graph n a b -> [Edge n b] -edges (Graph _ _ es) = es - --- | Map a function over the node labels. -nmap :: (a -> c) -> Graph n a b -> Graph n c b -nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es - --- | Map a function over the edge labels. -emap :: (b -> c) -> Graph n a b -> Graph n a c -emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es] - --- | Add a node to the graph. -newNode :: a -- ^ Node label - -> Graph n a b - -> (Graph n a b,n) -- ^ Node graph and name of new node -newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c) - -newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a]) -newNodes ls g = (g', zip ns ls) - where (g',ns) = mapAccumL (flip newNode) g ls --- lazy version: ---newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns') --- where (xs,cs') = splitAt (length ls) cs --- ns' = zip xs ls - -newEdge :: Edge n b -> Graph n a b -> Graph n a b -newEdge e (Graph c ns es) = Graph c ns (e:es) - -newEdges :: [Edge n b] -> Graph n a b -> Graph n a b -newEdges es g = foldl' (flip newEdge) g es --- lazy version: --- newEdges es' (Graph c ns es) = Graph c ns (es'++es) - -insertEdgeWith :: Eq n => - (b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b -insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es) - where h [] = [e] - h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es' - | otherwise = e':h es' - --- | Remove a node and all edges to and from that node. -removeNode :: Ord n => n -> Graph n a b -> Graph n a b -removeNode n = removeNodes (Set.singleton n) - --- | Remove a set of nodes and all edges to and from those nodes. -removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b -removeNodes xs (Graph c ns es) = Graph c ns' es' - where - keepNode n = not (Set.member n xs) - ns' = [ x | x@(n,_) <- ns, keepNode n ] - es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ] - --- | Get a map of node names to info about each node. -nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b -nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ] - where - inc = groupEdgesBy edgeTo g - out = groupEdgesBy edgeFrom g - fn m n = fromMaybe [] (Map.lookup n m) - -groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by - -> Graph n a b -> Map n [Edge n b] -groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g] - -lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b]) -lookupNode i n = fromJust $ Map.lookup n i - -getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b] -getIncoming i n = let (_,inc,_) = lookupNode i n in inc - -getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b] -getOutgoing i n = let (_,_,out) = lookupNode i n in out - -inDegree :: Ord n => NodeInfo n a b -> n -> Int -inDegree i n = length $ getIncoming i n - -outDegree :: Ord n => NodeInfo n a b -> n -> Int -outDegree i n = length $ getOutgoing i n - -getNodeLabel :: Ord n => NodeInfo n a b -> n -> a -getNodeLabel i n = let (l,_,_) = lookupNode i n in l - -nodeLabel :: Node n a -> a -nodeLabel = snd - -edgeFrom :: Edge n b -> n -edgeFrom (f,_,_) = f - -edgeTo :: Edge n b -> n -edgeTo (_,t,_) = t - -edgeLabel :: Edge n b -> b -edgeLabel (_,_,l) = l - -reverseGraph :: Graph n a b -> Graph n a b -reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ] - --- | Add the nodes from the second graph to the first graph. --- The nodes in the second graph will be renamed using the name --- supply in the first graph. --- This function is more efficient when the second graph --- is smaller than the first. -mergeGraphs :: Ord m => Graph n a b -> Graph m a b - -> (Graph n a b, m -> n) -- ^ The new graph and a function translating - -- the old names of nodes in the second graph - -- to names in the new graph. -mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName) - where - (xs,c') = splitAt (length (nodes g2)) c - newNames = Map.fromList (zip (map fst (nodes g2)) xs) - newName n = fromJust $ Map.lookup n newNames - Graph _ ns2 es2 = renameNodes newName undefined g2 - --- | Rename the nodes in the graph. -renameNodes :: (n -> m) -- ^ renaming function - -> [m] -- ^ infinite supply of fresh node names, to - -- use when adding nodes in the future. - -> Graph n a b -> Graph m a b -renameNodes newName c (Graph _ ns es) = Graph c ns' es' - where ns' = map' (\ (n,x) -> (newName n,x)) ns - es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es - --- | A strict 'map' -map' :: (a -> b) -> [a] -> [b] -map' _ [] = [] -map' f (x:xs) = ((:) $! f x) $! map' f xs diff --git a/src/GF/Speech/PrFA.hs b/src/GF/Speech/PrFA.hs deleted file mode 100644 index 2856039ec..000000000 --- a/src/GF/Speech/PrFA.hs +++ /dev/null @@ -1,56 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrSLF --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/10 16:43:44 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.2 $ --- --- This module prints finite automata and regular grammars --- for a context-free grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar ------------------------------------------------------------------------------ - -module GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) where - -import GF.Data.Utilities -import GF.Conversion.Types -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..),symbol) -import GF.Infra.Ident -import GF.Infra.Option (Options) -import GF.Infra.Print -import GF.Speech.CFGToFiniteState -import GF.Speech.FiniteState -import GF.Speech.TransformCFG -import GF.Compile.ShellState (StateGrammar) - -import Data.Char (toUpper,toLower) -import Data.List -import Data.Maybe (fromMaybe) - - - -faGraphvizPrinter :: Options -> StateGrammar -> String -faGraphvizPrinter opts s = - prFAGraphviz $ mapStates (const "") $ cfgToFA opts s - --- | Convert the grammar to a regular grammar and print it in BNF -regularPrinter :: Options -> StateGrammar -> String -regularPrinter opts s = prCFRules $ makeSimpleRegular opts s - where - prCFRules :: CFRules -> String - prCFRules g = unlines [ c ++ " ::= " ++ join " | " (map (showRhs . ruleRhs) rs) | (c,rs) <- allRulesGrouped g] - join g = concat . intersperse g - showRhs = unwords . map (symbol id show) - -faCPrinter :: Options -> StateGrammar -> String -faCPrinter opts s = fa2c $ cfgToFA opts s - -fa2c :: DFA String -> String -fa2c fa = undefined diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs deleted file mode 100644 index 248991380..000000000 --- a/src/GF/Speech/PrGSL.hs +++ /dev/null @@ -1,113 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrGSL --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/01 20:09:04 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.22 $ --- --- This module prints a CFG as a Nuance GSL 2.0 grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar ------------------------------------------------------------------------------ - -module GF.Speech.PrGSL (gslPrinter) where - -import GF.Data.Utilities -import GF.Speech.SRG -import GF.Speech.RegExp -import GF.Infra.Ident - -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..)) -import GF.Conversion.Types -import GF.Infra.Print -import GF.Infra.Option -import GF.Probabilistic.Probabilistic (Probs) -import GF.Compile.ShellState (StateGrammar) - -import Data.Char (toUpper,toLower) -import Data.List (partition) -import Text.PrettyPrint.HughesPJ - -width :: Int -width = 75 - -gslPrinter :: Options -> StateGrammar -> String -gslPrinter opts s = renderStyle st $ prGSL $ makeSimpleSRG opts s - where st = style { lineLength = width } - -prGSL :: SRG -> Doc -prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) - = header $++$ mainCat $++$ foldr ($++$) empty (map prRule rs) - where - header = text ";GSL2.0" $$ - comment ("Nuance speech recognition grammar for " ++ name) $$ - comment ("Generated by GF") - mainCat = comment ("Start category: " ++ origStart) $$ - text ".MAIN" <+> prCat start - prRule (SRGRule cat origCat rhs) = - comment (prt origCat) $$ - prCat cat <+> union (map prAlt rhs) - -- FIXME: use the probability - prAlt (SRGAlt mp _ rhs) = prItem rhs - - -prItem :: SRGItem -> Doc -prItem = f - where - f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes) - where (es,nes) = partition isEpsilon xs - f (REConcat [x]) = f x - f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")" - f (RERepeat x) = text "*" <> f x - f (RESymbol s) = prSymbol s - -union :: [Doc] -> Doc -union [x] = x -union xs = text "[" <> fsep xs <> text "]" - -prSymbol :: Symbol SRGNT Token -> Doc -prSymbol (Cat (c,_)) = prCat c -prSymbol (Tok t) = doubleQuotes (showToken t) - --- GSL requires an upper case letter in category names -prCat :: SRGCat -> Doc -prCat c = text (firstToUpper c) - - -firstToUpper :: String -> String -firstToUpper [] = [] -firstToUpper (x:xs) = toUpper x : xs - -{- -rmPunctCFG :: CGrammar -> CGrammar -rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g] - -keepSymbol :: Symbol c Token -> Bool -keepSymbol (Tok t) = not (all isPunct (prt t)) -keepSymbol _ = True --} - --- Nuance does not like upper case characters in tokens -showToken :: Token -> Doc -showToken t = text (map toLower (prt t)) - -isPunct :: Char -> Bool -isPunct c = c `elem` "-_.:;.,?!()[]{}" - -comment :: String -> Doc -comment s = text ";" <+> text s - - --- Pretty-printing utilities - -emptyLine :: Doc -emptyLine = text "" - -($++$) :: Doc -> Doc -> Doc -x $++$ y = x $$ emptyLine $$ y diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs deleted file mode 100644 index 037a4f4e2..000000000 --- a/src/GF/Speech/PrJSGF.hs +++ /dev/null @@ -1,145 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrJSGF --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/01 20:09:04 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.16 $ --- --- This module prints a CFG as a JSGF grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar --- --- FIXME: convert to UTF-8 ------------------------------------------------------------------------------ - -module GF.Speech.PrJSGF (jsgfPrinter) where - -import GF.Conversion.Types -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats) -import GF.Infra.Ident -import GF.Infra.Print -import GF.Infra.Option -import GF.Probabilistic.Probabilistic (Probs) -import GF.Speech.SISR -import GF.Speech.SRG -import GF.Speech.RegExp -import GF.Compile.ShellState (StateGrammar) - -import Data.Char -import Data.List -import Data.Maybe -import Text.PrettyPrint.HughesPJ -import Debug.Trace - -width :: Int -width = 75 - -jsgfPrinter :: Maybe SISRFormat - -> Options - -> StateGrammar -> String -jsgfPrinter sisr opts s = renderStyle st $ prJSGF sisr $ makeSimpleSRG opts s - where st = style { lineLength = width } - -prJSGF :: Maybe SISRFormat -> SRG -> Doc -prJSGF sisr srg@(SRG{grammarName=name,grammarLanguage=ml, - startCat=start,origStartCat=origStart,rules=rs}) - = header $++$ mainCat $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs) - where - header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$ - comment ("JSGF speech recognition grammar for " ++ name) $$ - comment "Generated by GF" $$ - text ("grammar " ++ name ++ ";") - lang = maybe empty text ml - mainCat = comment ("Start category: " ++ origStart) $$ - case cfgCatToGFCat origStart of - Just c -> rule True "MAIN" [prCat (catFormId c)] - Nothing -> empty - prRule (SRGRule cat origCat rhs) = - comment origCat $$ - rule False cat (map prAlt rhs) --- rule False cat (map prAlt rhs) - -- FIXME: use the probability - prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] --- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag - where initTag | isEmpty t = empty - | otherwise = text "<NULL>" <+> t - where t = tag sisr (profileInitSISR n) - finalTag = tag sisr (profileFinalSISR n) - p = if isEmpty initTag && isEmpty finalTag then id else parens - - topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg] - where it i c = prCat c <+> tag sisr (topCatSISR c) - -catFormId :: String -> String -catFormId = (++ "_cat") - -prCat :: SRGCat -> Doc -prCat c = char '<' <> text c <> char '>' - -prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc -prItem sisr t = f 0 - where - f _ (REUnion []) = text "<VOID>" - f p (REUnion xs) - | not (null es) = brackets (f 0 (REUnion nes)) - | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) - where (es,nes) = partition isEpsilon xs - f _ (REConcat []) = text "<NULL>" - f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) - f p (RERepeat x) = f 3 x <> char '*' - f _ (RESymbol s) = prSymbol sisr t s - -{- -prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc -prItem _ _ [] = text "<NULL>" -prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss - where paren = if length ss == 1 then id else parens --} - -prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc -prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) -prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation - | otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars - -tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc -tag Nothing _ = empty -tag (Just fmt) t = case t fmt of - [] -> empty - ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}' - where e [] = [] - e ('}':xs) = '\\':'}':e xs - e ('\n':xs) = ' ' : e (dropWhile isSpace xs) - e (x:xs) = x:e xs - -isPunct :: Char -> Bool -isPunct c = c `elem` "-_.;.,?!" - -comment :: String -> Doc -comment s = text "//" <+> text s - -alts :: [Doc] -> Doc -alts = fsep . prepunctuate (text "| ") - -rule :: Bool -> SRGCat -> [Doc] -> Doc -rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' - where p = if pub then text "public" else empty - --- Pretty-printing utilities - -emptyLine :: Doc -emptyLine = text "" - -prepunctuate :: Doc -> [Doc] -> [Doc] -prepunctuate _ [] = [] -prepunctuate p (x:xs) = x : map (p <>) xs - -($++$) :: Doc -> Doc -> Doc -x $++$ y = x $$ emptyLine $$ y - diff --git a/src/GF/Speech/PrRegExp.hs b/src/GF/Speech/PrRegExp.hs deleted file mode 100644 index 55a25d69b..000000000 --- a/src/GF/Speech/PrRegExp.hs +++ /dev/null @@ -1,33 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrSLF --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- This module prints a grammar as a regular expression. ------------------------------------------------------------------------------ - -module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where - -import GF.Conversion.Types -import GF.Formalism.Utilities -import GF.Infra.Ident -import GF.Infra.Option (Options) -import GF.Speech.CFGToFiniteState -import GF.Speech.RegExp -import GF.Compile.ShellState (StateGrammar) - - -regexpPrinter :: Options -> StateGrammar -> String -regexpPrinter opts s = (++"\n") $ prRE $ dfa2re $ cfgToFA opts s - -multiRegexpPrinter :: Options -> StateGrammar -> String -multiRegexpPrinter opts s = prREs $ mfa2res $ cfgToMFA opts s - -prREs :: [(String,RE (MFALabel String))] -> String -prREs res = unlines [l ++ " = " ++ prRE (mapRE showLabel re) | (l,re) <- res] - where showLabel = symbol (\l -> "<" ++ l ++ ">") id - -mfa2res :: MFA String -> [(String,RE (MFALabel String))] -mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas] diff --git a/src/GF/Speech/PrSLF.hs b/src/GF/Speech/PrSLF.hs deleted file mode 100644 index 9bc025558..000000000 --- a/src/GF/Speech/PrSLF.hs +++ /dev/null @@ -1,190 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrSLF --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/10 16:43:44 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.12 $ --- --- This module converts a CFG to an SLF finite-state network --- for use with the ATK recognizer. The SLF format is described --- in the HTK manual, and an example for use in ATK is shown --- in the ATK manual. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar ------------------------------------------------------------------------------ - -module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter, - slfSubPrinter,slfSubGraphvizPrinter) where - -import GF.Data.Utilities -import GF.Conversion.Types -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol) -import GF.Infra.Ident -import GF.Infra.Option (Options) -import GF.Infra.Print -import GF.Speech.CFGToFiniteState -import GF.Speech.FiniteState -import GF.Speech.TransformCFG -import qualified GF.Visualization.Graphviz as Dot -import GF.Compile.ShellState (StateGrammar) - -import Control.Monad -import qualified Control.Monad.State as STM -import Data.Char (toUpper) -import Data.List -import Data.Maybe - -data SLFs = SLFs [(String,SLF)] SLF - -data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] } - -data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String } - | SLFSubLat { nId :: Int, nLat :: String } - --- | An SLF word is a word, or the empty string. -type SLFWord = Maybe String - -data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int } - -type SLF_FA = FA State (Maybe (MFALabel String)) () - -mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)]) -mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs]) - where MFA start subs = {- renameSubs $ -} cfgToMFA opts s - main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa - -slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) () -slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing () - . moveLabelsToNodes . dfa2nfa - --- | Give sequential names to subnetworks. -renameSubs :: MFA String -> MFA String -renameSubs (MFA start subs) = MFA (newName start) subs' - where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]] - newName s = lookup' s newNames - subs' = [(newName s,renameLabels n) | (s,n) <- subs] - renameLabels = mapTransitions (mapSymbol newName id) - --- --- * SLF graphviz printing (without sub-networks) --- - -slfGraphvizPrinter :: Options -> StateGrammar -> String -slfGraphvizPrinter opts s - = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s - where - gvFA = mapStates (fromMaybe "") . mapTransitions (const "") - --- --- * SLF graphviz printing (with sub-networks) --- - -slfSubGraphvizPrinter :: Options -> StateGrammar -> String -slfSubGraphvizPrinter opts s = Dot.prGraphviz g - where (main, subs) = mkFAs opts s - g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..] - ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs - m = gvSLFFA Nothing main - -gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph -gvSLFFA n fa = - liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv) - . mapTransitions (const "")) (rename fa) - where mfaLabelToGv = symbol ("#"++) id - mkCluster Nothing = id - mkCluster (Just x) - = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x - rename fa = do - names <- STM.get - let fa' = renameStates names fa - names' = unusedNames fa' - STM.put names' - return fa' - --- --- * SLF printing (without sub-networks) --- - -slfPrinter :: Options -> StateGrammar -> String -slfPrinter opts s - = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s - --- --- * SLF printing (with sub-networks) --- - --- | Make a network with subnetworks in SLF -slfSubPrinter :: Options -> StateGrammar -> String -slfSubPrinter opts s = prSLFs slfs - where - (main,subs) = mkFAs opts s - slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main) - faToSLF = automatonToSLF mfaNodeToSLFNode - -automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF -automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es } - where ns = map (uncurry mkNode) (states fa) - es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa) - -mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode -mfaNodeToSLFNode i l = case l of - Nothing -> mkSLFNode i Nothing - Just (Tok x) -> mkSLFNode i (Just x) - Just (Cat s) -> mkSLFSubLat i s - -mkSLFNode :: Int -> Maybe String -> SLFNode -mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing } -mkSLFNode i (Just w) - | isNonWord w = SLFNode { nId = i, - nWord = Nothing, - nTag = Just w } - | otherwise = SLFNode { nId = i, - nWord = Just (map toUpper w), - nTag = Just w } - -mkSLFSubLat :: Int -> String -> SLFNode -mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub } - -mkSLFEdge :: Int -> (Int,Int) -> SLFEdge -mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t } - -prSLFs :: SLFs -> String -prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) "" - where prSub (n,s) = showString "SUBLAT=" . shows n - . nl . prOneSLF s . showString "." . nl - -prSLF :: SLF -> String -prSLF slf = prOneSLF slf "" - -prOneSLF :: SLF -> ShowS -prOneSLF (SLF { slfNodes = ns, slfEdges = es}) - = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl - where - header = prFields [("N",show (length ns)),("L", show (length es))] . nl - prNode (SLFNode { nId = i, nWord = w, nTag = t }) - = prFields $ [("I",show i),("W",showWord w)] - ++ maybe [] (\t -> [("s",t)]) t - prNode (SLFSubLat { nId = i, nLat = l }) - = prFields [("I",show i),("L",show l)] - prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))] - --- | Check if a word should not correspond to a word in the SLF file. -isNonWord :: String -> Bool -isNonWord = any isPunct - -isPunct :: Char -> Bool -isPunct c = c `elem` "-_.;.,?!()[]{}" - -showWord :: SLFWord -> String -showWord Nothing = "!NULL" -showWord (Just w) | null w = "!NULL" - | otherwise = w - -prFields :: [(String,String)] -> ShowS -prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ] diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs deleted file mode 100644 index d8ae07867..000000000 --- a/src/GF/Speech/PrSRGS.hs +++ /dev/null @@ -1,153 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrSRGS --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- This module prints a CFG as an SRGS XML grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar ------------------------------------------------------------------------------ - -module GF.Speech.PrSRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where - -import GF.Data.Utilities -import GF.Data.XML -import GF.Speech.RegExp -import GF.Speech.SISR as SISR -import GF.Speech.SRG -import GF.Infra.Ident -import GF.Today - -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName, filterCats) -import GF.Conversion.Types -import GF.Infra.Print -import GF.Infra.Option -import GF.Probabilistic.Probabilistic (Probs) -import GF.Compile.ShellState (StateGrammar) - -import Data.Char (toUpper,toLower) -import Data.List -import Data.Maybe -import qualified Data.Map as Map -import qualified Data.Set as Set - -srgsXmlPrinter :: Maybe SISRFormat - -> Bool -- ^ Include probabilities - -> Options - -> StateGrammar -> String -srgsXmlPrinter sisr probs opts s = prSrgsXml sisr probs $ makeSimpleSRG opts s - -srgsXmlNonRecursivePrinter :: Options -> StateGrammar -> String -srgsXmlNonRecursivePrinter opts s = prSrgsXml Nothing False $ makeNonRecursiveSRG opts s - - -prSrgsXml :: Maybe SISRFormat -> Bool -> SRG -> String -prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start, - origStartCat=origStart,grammarLanguage=l,rules=rs}) - = showXMLDoc (optimizeSRGS xmlGr) - where - Just root = cfgCatToGFCat origStart - xmlGr = grammar sisr (catFormId root) l $ - [meta "description" - ("SRGS XML speech recognition grammar for " ++ name - ++ ". " ++ "Original start category: " ++ origStart), - meta "generator" ("Grammatical Framework " ++ version)] - ++ topCatRules - ++ concatMap ruleToXML rs - ruleToXML (SRGRule cat origCat alts) = - comments ["Category " ++ origCat] ++ [rule cat (prRhs alts)] - prRhs rhss = [oneOf (map (mkProd sisr probs) rhss)] - -- externally visible rules for each of the GF categories - topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg] - where it i c = Tag "item" [] ([ETag "ruleref" [("uri","#" ++ c)]] - ++ tag sisr (topCatSISR c)) - topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is - -rule :: String -> [XML] -> XML -rule i = Tag "rule" [("id",i)] - -mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML -mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ [x] ++ tf) - where x = mkItem sisr n rhs - w | probs = maybe [] (\p -> [("weight", show p)]) mp - | otherwise = [] - ti = tag sisr (profileInitSISR n) - tf = tag sisr (profileFinalSISR n) - -mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML -mkItem sisr cn = f - where - f (REUnion []) = ETag "ruleref" [("special","VOID")] - f (REUnion xs) - | not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)] - | otherwise = oneOf (map f xs) - where (es,nes) = partition isEpsilon xs - f (REConcat []) = ETag "ruleref" [("special","NULL")] - f (REConcat xs) = Tag "item" [] (map f xs) - f (RERepeat x) = Tag "item" [("repeat","0-")] [f x] - f (RESymbol s) = symItem sisr cn s - -{- -mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML -mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ xs ++ tf) - where xs = mkItem sisr n rhs - w | probs = maybe [] (\p -> [("weight", show p)]) mp - | otherwise = [] - ti = [tag sisr (profileInitSISR n)] - tf = [tag sisr (profileFinalSISR n)] - - -mkItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> [XML] -mkItem sisr cn ss = map (symItem sisr cn) ss --} - -symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML -symItem sisr cn (Cat n@(c,_)) = - Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n) -symItem _ _ (Tok t) = Tag "item" [] [Data (showToken t)] - -tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML] -tag Nothing _ = [] -tag (Just fmt) t = case t fmt of - [] -> [] - ts -> [Tag "tag" [] [Data (prSISR ts)]] - -catFormId :: String -> String -catFormId = (++ "_cat") - - -showToken :: Token -> String -showToken t = t - -oneOf :: [XML] -> XML -oneOf = Tag "one-of" [] - -grammar :: Maybe SISRFormat - -> String -- ^ root - -> Maybe String -- ^language - -> [XML] -> XML -grammar sisr root ml = - Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"), - ("version","1.0"), - ("mode","voice"), - ("root",root)] - ++ (if isJust sisr then [("tag-format","semantics/1.0")] else []) - ++ maybe [] (\l -> [("xml:lang", l)]) ml - -meta :: String -> String -> XML -meta n c = ETag "meta" [("name",n),("content",c)] - -optimizeSRGS :: XML -> XML -optimizeSRGS = bottomUpXML f - where f (Tag "item" [] [x@(Tag "item" _ _)]) = x - f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x - f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs - f (Tag "item" as xs) = Tag "item" as (map g xs) - where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x - g x = x - f (Tag "one-of" [] [x]) = x - f x = x diff --git a/src/GF/Speech/PrSRGS_ABNF.hs b/src/GF/Speech/PrSRGS_ABNF.hs deleted file mode 100644 index abb84c5dc..000000000 --- a/src/GF/Speech/PrSRGS_ABNF.hs +++ /dev/null @@ -1,147 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrJSRGS_ABNF --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/01 20:09:04 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.16 $ --- --- This module prints a CFG as a JSGF grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar --- --- FIXME: convert to UTF-8 ------------------------------------------------------------------------------ - -module GF.Speech.PrSRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where - -import GF.Conversion.Types -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats) -import GF.Infra.Ident -import GF.Infra.Print -import GF.Infra.Option -import GF.Probabilistic.Probabilistic (Probs) -import GF.Speech.SISR -import GF.Speech.SRG -import GF.Speech.RegExp -import GF.Compile.ShellState (StateGrammar) -import GF.Today - -import Data.Char -import Data.List -import Data.Maybe -import Text.PrettyPrint.HughesPJ -import Debug.Trace - -width :: Int -width = 75 - -srgsAbnfPrinter :: Maybe SISRFormat - -> Bool -- ^ Include probabilities - -> Options - -> StateGrammar -> String -srgsAbnfPrinter sisr probs opts s = showDoc $ prABNF sisr probs $ makeSimpleSRG opts s - -srgsAbnfNonRecursivePrinter :: Options -> StateGrammar -> String -srgsAbnfNonRecursivePrinter opts s = showDoc $ prABNF Nothing False $ makeNonRecursiveSRG opts s - -showDoc = renderStyle (style { lineLength = width }) - -prABNF :: Maybe SISRFormat -> Bool -> SRG -> Doc -prABNF sisr probs srg@(SRG{grammarName=name,grammarLanguage=ml, - startCat=start,origStartCat=origStart,rules=rs}) - = header $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs) - where - header = text "#ABNF 1.0 UTF-8;" $$ - meta "description" - ("Speech recognition grammar for " ++ name - ++ ". " ++ "Original start category: " ++ origStart) $$ - meta "generator" ("Grammatical Framework " ++ version) $$ - language $$ tagFormat $$ mainCat - language = maybe empty (\l -> text "language" <+> text l <> char ';') ml - tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';' - | otherwise = empty - mainCat = case cfgCatToGFCat origStart of - Just c -> text "root" <+> prCat (catFormId c) <> char ';' - Nothing -> empty - prRule (SRGRule cat origCat rhs) = - comment origCat $$ - rule False cat (map prAlt rhs) - -- FIXME: use the probability - prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] - where initTag = tag sisr (profileInitSISR n) - finalTag = tag sisr (profileFinalSISR n) - p = if isEmpty initTag && isEmpty finalTag then id else parens - - topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg] - where it i c = prCat c <+> tag sisr (topCatSISR c) - -catFormId :: String -> String -catFormId = (++ "_cat") - -prCat :: SRGCat -> Doc -prCat c = char '$' <> text c - -prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc -prItem sisr t = f 0 - where - f _ (REUnion []) = text "$VOID" - f p (REUnion xs) - | not (null es) = brackets (f 0 (REUnion nes)) - | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) - where (es,nes) = partition isEpsilon xs - f _ (REConcat []) = text "$NULL" - f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) - f p (RERepeat x) = f 3 x <> text "<0->" - f _ (RESymbol s) = prSymbol sisr t s - - -prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc -prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) -prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation - | otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars - -tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc -tag Nothing _ = empty -tag (Just fmt) t = - case t fmt of - [] -> empty - -- grr, silly SRGS ABNF does not have an escaping mechanism - ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}" - | otherwise -> text "{" <+> text x <+> text "}" - where x = prSISR ts - -isPunct :: Char -> Bool -isPunct c = c `elem` "-_.;.,?!" - -comment :: String -> Doc -comment s = text "//" <+> text s - -alts :: [Doc] -> Doc -alts = fsep . prepunctuate (text "| ") - -rule :: Bool -> SRGCat -> [Doc] -> Doc -rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' - where p = if pub then text "public" else empty - -meta :: String -> String -> Doc -meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';' - --- Pretty-printing utilities - -emptyLine :: Doc -emptyLine = text "" - -prepunctuate :: Doc -> [Doc] -> [Doc] -prepunctuate _ [] = [] -prepunctuate p (x:xs) = x : map (p <>) xs - -($++$) :: Doc -> Doc -> Doc -x $++$ y = x $$ emptyLine $$ y - diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs deleted file mode 100644 index 5ee40828e..000000000 --- a/src/GF/Speech/RegExp.hs +++ /dev/null @@ -1,143 +0,0 @@ -module GF.Speech.RegExp (RE(..), - epsilonRE, nullRE, - isEpsilon, isNull, - unionRE, concatRE, seqRE, - repeatRE, minimizeRE, - mapRE, mapRE', joinRE, - symbolsRE, - dfa2re, prRE) where - -import Data.List - -import GF.Data.Utilities -import GF.Speech.FiniteState - -data RE a = - REUnion [RE a] -- ^ REUnion [] is null - | REConcat [RE a] -- ^ REConcat [] is epsilon - | RERepeat (RE a) - | RESymbol a - deriving (Eq,Ord,Show) - - -dfa2re :: (Ord a) => DFA a -> RE a -dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops - . oneFinalState () epsilonRE . mapTransitions RESymbol - where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa - merge es = [(f,t,unionRE ls) - | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]] - -elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a) -elimStates fa = - case [s | (s,_) <- states fa, isInternal fa s] of - [] -> fa - sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa - where sAs = nonLoopTransitionsTo sE fa - sBs = nonLoopTransitionsFrom sE fa - r2 = unionRE $ loops sE fa - ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs] - r r1 r3 = concatRE [r1, repeatRE r2, r3] - -epsilonRE :: RE a -epsilonRE = REConcat [] - -nullRE :: RE a -nullRE = REUnion [] - -isNull :: RE a -> Bool -isNull (REUnion []) = True -isNull _ = False - -isEpsilon :: RE a -> Bool -isEpsilon (REConcat []) = True -isEpsilon _ = False - -unionRE :: Ord a => [RE a] -> RE a -unionRE = unionOrId . sortNub . concatMap toList - where - toList (REUnion xs) = xs - toList x = [x] - unionOrId [r] = r - unionOrId rs = REUnion rs - -concatRE :: [RE a] -> RE a -concatRE xs | any isNull xs = nullRE - | otherwise = case concatMap toList xs of - [r] -> r - rs -> REConcat rs - where - toList (REConcat xs) = xs - toList x = [x] - -seqRE :: [a] -> RE a -seqRE = concatRE . map RESymbol - -repeatRE :: RE a -> RE a -repeatRE x | isNull x || isEpsilon x = epsilonRE - | otherwise = RERepeat x - -finalRE :: Ord a => DFA (RE a) -> RE a -finalRE fa = concatRE [repeatRE r1, r2, - repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])] - where - s0 = startState fa - [sF] = finalStates fa - r1 = unionRE $ loops s0 fa - r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa - r3 = unionRE $ loops sF fa - r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa - -reverseRE :: RE a -> RE a -reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs -reverseRE (REUnion xs) = REUnion (map reverseRE xs) -reverseRE (RERepeat x) = RERepeat (reverseRE x) -reverseRE x = x - -minimizeRE :: Ord a => RE a -> RE a -minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward - -mergeForward :: Ord a => RE a -> RE a -mergeForward (REUnion xs) = - unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)] -mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)] -mergeForward (RERepeat r) = repeatRE (mergeForward r) -mergeForward r = r - -firstRE :: RE a -> (RE a, RE a) -firstRE (REConcat (x:xs)) = (x, REConcat xs) -firstRE r = (r,epsilonRE) - -mapRE :: (a -> b) -> RE a -> RE b -mapRE f = mapRE' (RESymbol . f) - -mapRE' :: (a -> RE b) -> RE a -> RE b -mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs) -mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs) -mapRE' f (RERepeat x) = RERepeat (mapRE' f x) -mapRE' f (RESymbol s) = f s - -joinRE :: RE (RE a) -> RE a -joinRE (REConcat xs) = REConcat (map joinRE xs) -joinRE (REUnion xs) = REUnion (map joinRE xs) -joinRE (RERepeat xs) = RERepeat (joinRE xs) -joinRE (RESymbol ss) = ss - -symbolsRE :: RE a -> [a] -symbolsRE (REConcat xs) = concatMap symbolsRE xs -symbolsRE (REUnion xs) = concatMap symbolsRE xs -symbolsRE (RERepeat x) = symbolsRE x -symbolsRE (RESymbol x) = [x] - --- Debugging - -prRE :: RE String -> String -prRE = prRE' 0 - -prRE' _ (REUnion []) = "<NULL>" -prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs))) -prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs)) -prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*" -prRE' _ (RESymbol s) = s - -p n m s | n >= m = "(" ++ s ++ ")" - | True = s diff --git a/src/GF/Speech/Relation.hs b/src/GF/Speech/Relation.hs deleted file mode 100644 index 641d671a9..000000000 --- a/src/GF/Speech/Relation.hs +++ /dev/null @@ -1,130 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Relation --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/10/26 17:13:13 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.1 $ --- --- A simple module for relations. ------------------------------------------------------------------------------ - -module GF.Speech.Relation (Rel, mkRel, mkRel' - , allRelated , isRelatedTo - , transitiveClosure - , reflexiveClosure, reflexiveClosure_ - , symmetricClosure - , symmetricSubrelation, reflexiveSubrelation - , reflexiveElements - , equivalenceClasses - , isTransitive, isReflexive, isSymmetric - , isEquivalence - , isSubRelationOf) where - -import Data.List -import Data.Maybe -import Data.Map (Map) -import qualified Data.Map as Map -import Data.Set (Set) -import qualified Data.Set as Set - -import GF.Data.Utilities - -type Rel a = Map a (Set a) - --- | Creates a relation from a list of related pairs. -mkRel :: Ord a => [(a,a)] -> Rel a -mkRel ps = relates ps Map.empty - --- | Creates a relation from a list pairs of elements and the elements --- related to them. -mkRel' :: Ord a => [(a,[a])] -> Rel a -mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs] - -relToList :: Rel a -> [(a,a)] -relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ] - --- | Add a pair to the relation. -relate :: Ord a => a -> a -> Rel a -> Rel a -relate x y r = Map.insertWith Set.union x (Set.singleton y) r - --- | Add a list of pairs to the relation. -relates :: Ord a => [(a,a)] -> Rel a -> Rel a -relates ps r = foldl (\r' (x,y) -> relate x y r') r ps - --- | Checks if an element is related to another. -isRelatedTo :: Ord a => Rel a -> a -> a -> Bool -isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r) - --- | Get the set of elements to which a given element is related. -allRelated :: Ord a => Rel a -> a -> Set a -allRelated r x = fromMaybe Set.empty (Map.lookup x r) - --- | Get all elements in the relation. -domain :: Ord a => Rel a -> Set a -domain r = foldl Set.union (Map.keysSet r) (Map.elems r) - --- | Keep only pairs for which both elements are in the given set. -intersectSetRel :: Ord a => Set a -> Rel a -> Rel a -intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s) - -transitiveClosure :: Ord a => Rel a -> Rel a -transitiveClosure r = fix (Map.map growSet) r - where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys) - -reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined. - -> Rel a -> Rel a -reflexiveClosure_ u r = relates [(x,x) | x <- u] r - --- | Uses 'domain' -reflexiveClosure :: Ord a => Rel a -> Rel a -reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r - -symmetricClosure :: Ord a => Rel a -> Rel a -symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r - -symmetricSubrelation :: Ord a => Rel a -> Rel a -symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r - -reflexiveSubrelation :: Ord a => Rel a -> Rel a -reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r - --- | Get the set of elements which are related to themselves. -reflexiveElements :: Ord a => Rel a -> Set a -reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ] - --- | Keep the related pairs for which the predicate is true. -filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a -filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p) - --- | Remove keys that map to no elements. -purgeEmpty :: Ord a => Rel a -> Rel a -purgeEmpty r = Map.filter (not . Set.null) r - - --- | Get the equivalence classes from an equivalence relation. -equivalenceClasses :: Ord a => Rel a -> [Set a] -equivalenceClasses r = equivalenceClasses_ (Map.keys r) r - where equivalenceClasses_ [] _ = [] - equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r - where ys = allRelated r x - zs = [x' | x' <- xs, not (x' `Set.member` ys)] - -isTransitive :: Ord a => Rel a -> Bool -isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r, - y <- Set.toList ys, z <- Set.toList (allRelated r y)] - -isReflexive :: Ord a => Rel a -> Bool -isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r) - -isSymmetric :: Ord a => Rel a -> Bool -isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r] - -isEquivalence :: Ord a => Rel a -> Bool -isEquivalence r = isReflexive r && isSymmetric r && isTransitive r - -isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool -isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1) diff --git a/src/GF/Speech/RelationQC.hs b/src/GF/Speech/RelationQC.hs deleted file mode 100644 index 47f783986..000000000 --- a/src/GF/Speech/RelationQC.hs +++ /dev/null @@ -1,39 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : RelationQC --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/10/26 17:13:13 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.1 $ --- --- QuickCheck properties for GF.Speech.Relation ------------------------------------------------------------------------------ - -module GF.Speech.RelationQC where - -import GF.Speech.Relation - -import Test.QuickCheck - -prop_transitiveClosure_trans :: [(Int,Int)] -> Bool -prop_transitiveClosure_trans ps = isTransitive (transitiveClosure (mkRel ps)) - -prop_symmetricSubrelation_symm :: [(Int,Int)] -> Bool -prop_symmetricSubrelation_symm ps = isSymmetric (symmetricSubrelation (mkRel ps)) - -prop_symmetricSubrelation_sub :: [(Int,Int)] -> Bool -prop_symmetricSubrelation_sub ps = symmetricSubrelation r `isSubRelationOf` r - where r = mkRel ps - -prop_symmetricClosure_symm :: [(Int,Int)] -> Bool -prop_symmetricClosure_symm ps = isSymmetric (symmetricClosure (mkRel ps)) - -prop_reflexiveClosure_refl :: [(Int,Int)] -> Bool -prop_reflexiveClosure_refl ps = isReflexive (reflexiveClosure (mkRel ps)) - -prop_mkEquiv_equiv :: [(Int,Int)] -> Bool -prop_mkEquiv_equiv ps = isEquivalence (mkEquiv ps) - where mkEquiv = transitiveClosure . symmetricClosure . reflexiveClosure . mkRel diff --git a/src/GF/Speech/SISR.hs b/src/GF/Speech/SISR.hs deleted file mode 100644 index 3e68a2e55..000000000 --- a/src/GF/Speech/SISR.hs +++ /dev/null @@ -1,87 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : GF.Speech.SISR --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- Abstract syntax and pretty printer for SISR, --- (Semantic Interpretation for Speech Recognition) --- ------------------------------------------------------------------------------ - -module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR, - topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where - -import Data.List - -import GF.Conversion.Types -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName) -import GF.Infra.Ident -import GF.Speech.TransformCFG -import GF.Speech.SRG (SRGNT) - -import qualified GF.JavaScript.AbsJS as JS -import qualified GF.JavaScript.PrintJS as JS - -data SISRFormat = - -- SISR Working draft 1 April 2003 - -- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/ - SISROld - deriving Show - -type SISRTag = [JS.DeclOrExpr] - - -prSISR :: SISRTag -> String -prSISR = JS.printTree - -topCatSISR :: String -> SISRFormat -> SISRTag -topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c] - -profileInitSISR :: CFTerm -> SISRFormat -> SISRTag -profileInitSISR t fmt - | null (usedArgs t) = [] - | otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]] - -usedArgs :: CFTerm -> [Int] -usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts) -usedArgs (CFAbs _ x) = usedArgs x -usedArgs (CFApp x y) = usedArgs x `union` usedArgs y -usedArgs (CFRes i) = [i] -usedArgs _ = [] - -catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag -catSISR t (c,i) fmt - | i `elem` usedArgs t = map JS.DExpr - [JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c] - | otherwise = [] - -profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag -profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term] - where - f (CFObj n ts) = tree (prIdent n) (map f ts) - f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)] - f (CFApp x y) = JS.ECall (f x) [f y] - f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) - f (CFVar v) = JS.EVar (var v) - f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr typ)] - -fmtOut SISROld = JS.EVar (JS.Ident "$") - -fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c)) - -args = JS.Ident "a" - -var v = JS.Ident ("x" ++ show v) - -field x y = JS.EMember x (JS.Ident y) - -ass = JS.EAssign - -tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)] - -obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps] - diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs deleted file mode 100644 index 19b6c1c1b..000000000 --- a/src/GF/Speech/SRG.hs +++ /dev/null @@ -1,235 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : SRG --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/01 20:09:04 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.20 $ --- --- Representation of, conversion to, and utilities for --- printing of a general Speech Recognition Grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar ------------------------------------------------------------------------------ - -module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, - SRGCat, SRGNT, CFTerm - , makeSRG - , makeSimpleSRG - , makeNonRecursiveSRG - , lookupFM_, prtS - , cfgCatToGFCat, srgTopCats - ) where - -import GF.Data.Operations -import GF.Data.Utilities -import GF.Infra.Ident -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), NameProfile(..) - , Profile(..), SyntaxForest - , filterCats, mapSymbol, symbol) -import GF.Conversion.Types -import GF.Infra.Print -import GF.Speech.TransformCFG -import GF.Speech.Relation -import GF.Speech.FiniteState -import GF.Speech.RegExp -import GF.Speech.CFGToFiniteState -import GF.Infra.Option -import GF.Probabilistic.Probabilistic (Probs) -import GF.Compile.ShellState (StateGrammar, stateProbs, stateOptions, cncId) - -import Data.List -import Data.Maybe (fromMaybe, maybeToList) -import Data.Map (Map) -import qualified Data.Map as Map -import Data.Set (Set) -import qualified Data.Set as Set - -import Debug.Trace - -data SRG = SRG { grammarName :: String -- ^ grammar name - , startCat :: SRGCat -- ^ start category name - , origStartCat :: String -- ^ original start category name - , grammarLanguage :: Maybe String -- ^ The language for which the grammar - -- is intended, e.g. en-UK - , rules :: [SRGRule] - } - deriving (Eq,Show) - -data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original category name - -- and productions - deriving (Eq,Show) - --- | maybe a probability, a rule name and an EBNF right-hand side -data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem - deriving (Eq,Show) - -type SRGItem = RE (Symbol SRGNT Token) - -type SRGCat = String - --- | An SRG non-terminal. Category name and its number in the profile. -type SRGNT = (SRGCat, Int) - --- | SRG category name and original name -type CatName = (SRGCat,String) - -type CatNames = Map String String - --- | Create a non-left-recursive SRG. --- FIXME: the probabilities in the returned --- grammar may be meaningless. -makeSimpleSRG :: Options -- ^ Grammar options - -> StateGrammar - -> SRG -makeSimpleSRG opt s = makeSRG preprocess opt s - where - preprocess origStart = traceStats "After mergeIdentical" - . mergeIdentical - . traceStats "After removeLeftRecursion" - . removeLeftRecursion origStart - . traceStats "After topDownFilter" - . topDownFilter origStart - . traceStats "After bottomUpFilter" - . bottomUpFilter - . traceStats "After removeCycles" - . removeCycles - . traceStats "Inital CFG" - -traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g - -stats g = "Categories: " ++ show (countCats g) - ++ " Rules: " ++ show (countRules g) - -makeNonRecursiveSRG :: Options - -> StateGrammar - -> SRG -makeNonRecursiveSRG opt s = renameSRG $ - SRG { grammarName = prIdent (cncId s), - startCat = start, - origStartCat = origStart, - grammarLanguage = getSpeechLanguage opt s, - rules = rs } - where - origStart = getStartCatCF opt s - MFA start dfas = cfgToMFA opt s - rs = [SRGRule l l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas] - where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re - dummyCFTerm = CFMeta "dummy" - dummySRGNT = mapSymbol (\c -> (c,0)) id - -makeSRG :: (Cat_ -> CFRules -> CFRules) - -> Options -- ^ Grammar options - -> StateGrammar - -> SRG -makeSRG preprocess opt s = renameSRG $ - SRG { grammarName = name, - startCat = origStart, - origStartCat = origStart, - grammarLanguage = getSpeechLanguage opt s, - rules = rs } - where - name = prIdent (cncId s) - origStart = getStartCatCF opt s - (_,cfgRules) = unzip $ allRulesGrouped $ preprocess origStart $ cfgToCFRules s - rs = map (cfgRulesToSRGRule (stateProbs s)) cfgRules - --- | Give names on the form NameX to all categories. -renameSRG :: SRG -> SRG -renameSRG srg = srg { startCat = renameCat (startCat srg), - rules = map renameRule (rules srg) } - where - names = mkCatNames (grammarName srg) (allSRGCats srg) - renameRule (SRGRule _ origCat alts) = SRGRule (renameCat origCat) origCat (map renameAlt alts) - renameAlt (SRGAlt mp n rhs) = SRGAlt mp n (mapRE renameSymbol rhs) - renameSymbol = mapSymbol (\ (c,x) -> (renameCat c, x)) id - renameCat = lookupFM_ names - -getSpeechLanguage :: Options -> StateGrammar -> Maybe String -getSpeechLanguage opt s = - fmap (replace '_' '-') $ getOptVal (addOptions opt (stateOptions s)) speechLanguage - --- FIXME: merge alternatives with same rhs and profile but different probabilities -cfgRulesToSRGRule :: Probs -> [CFRule_] -> SRGRule -cfgRulesToSRGRule probs rs@(r:_) = SRGRule origCat origCat rhs - where - origCat = lhsCat r - alts = [((n,ruleProb probs r),mkSRGSymbols 0 ss) | CFRule c ss n <- rs] - rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ] - - mkSRGSymbols _ [] = [] - mkSRGSymbols i (Cat c:ss) = Cat (c,i) : mkSRGSymbols (i+1) ss - mkSRGSymbols i (Tok t:ss) = Tok t : mkSRGSymbols i ss - -ruleProb :: Probs -> CFRule_ -> Maybe Double -ruleProb probs r = lookupProb probs (ruleFun r) - --- FIXME: move to GF.Probabilistic.Probabilistic? -lookupProb :: Probs -> Ident -> Maybe Double -lookupProb probs i = lookupTree prIdent i probs - -mkCatNames :: String -- ^ Category name prefix - -> [String] -- ^ Original category names - -> Map String String -- ^ Maps original names to SRG names -mkCatNames prefix origNames = Map.fromList (zip origNames names) - where names = [prefix ++ "_" ++ show x | x <- [0..]] - - -allSRGCats :: SRG -> [String] -allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs] - -cfgCatToGFCat :: SRGCat -> Maybe String -cfgCatToGFCat c - -- categories introduced by removeLeftRecursion contain dashes - | '-' `elem` c = Nothing - -- some categories introduced by -conversion=finite have the form - -- "{fun:cat}..." - | "{" `isPrefixOf` c = case dropWhile (/=':') $ takeWhile (/='}') $ tail c of - ':':c' -> Just c' - _ -> error $ "cfgCatToGFCat: Strange category " ++ show c - | otherwise = Just $ takeWhile (/='{') c - -srgTopCats :: SRG -> [(String,[SRGCat])] -srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg, - oc <- maybeToList $ cfgCatToGFCat origCat] - --- --- * Size-optimized EBNF SRGs --- - -srgItem :: [[Symbol SRGNT Token]] -> SRGItem -srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) --- non-optimizing version: ---srgItem = unionRE . map seqRE - --- | Merges a list of right-hand sides which all have the same --- sequence of non-terminals. -mergeItems :: [[Symbol SRGNT Token]] -> SRGItem -mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens - -groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]] -groupTokens [] = [] -groupTokens (Tok t:ss) = case groupTokens ss of - Tok ts:ss' -> Tok (t:ts):ss' - ss' -> Tok [t]:ss' -groupTokens (Cat c:ss) = Cat c : groupTokens ss - -ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token) -ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok))) - --- --- * Utilities for building and printing SRGs --- - -lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt -lookupFM_ fm k = Map.findWithDefault err k fm - where err = error $ "Key not found: " ++ show k - ++ "\namong " ++ show (Map.keys fm) - -prtS :: Print a => a -> ShowS -prtS = showString . prt diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs deleted file mode 100644 index 3d7ebd809..000000000 --- a/src/GF/Speech/TransformCFG.hs +++ /dev/null @@ -1,378 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : TransformCFG --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/01 20:09:04 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.24 $ --- --- This module does some useful transformations on CFGs. --- --- peb thinks: most of this module should be moved to GF.Conversion... ------------------------------------------------------------------------------ - -module GF.Speech.TransformCFG where - -import GF.Canon.CanonToGFCC (canon2gfcc) -import qualified GF.GFCC.CId as C -import GF.GFCC.Macros (lookType,catSkeleton) -import GF.GFCC.DataGFCC (GFCC) -import GF.Conversion.Types -import GF.CF.PPrCF (prCFCat) -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, - NameProfile(..), Profile(..), name2fun, forestName) -import GF.Infra.Ident -import GF.Infra.Option -import GF.Infra.Print -import GF.Speech.Relation -import GF.Compile.ShellState (StateGrammar, stateCFG, stateGrammarST, startCatStateOpts, stateOptions) - -import Control.Monad -import Control.Monad.State (State, get, put, evalState) -import Data.Map (Map) -import qualified Data.Map as Map -import Data.List -import Data.Maybe (fromMaybe) -import Data.Monoid (mconcat) -import Data.Set (Set) -import qualified Data.Set as Set - --- not very nice to replace the structured CFCat type with a simple string -type CFRule_ = CFRule Cat_ CFTerm Token - -data CFTerm - = CFObj Fun [CFTerm] -- ^ an abstract syntax function with arguments - | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id. - | CFApp CFTerm CFTerm -- ^ Application - | CFRes Int -- ^ The result of the n:th (0-based) non-terminal - | CFVar Int -- ^ A lambda-bound variable - | CFMeta String -- ^ A metavariable - deriving (Eq,Ord,Show) - -type Cat_ = String -type CFSymbol_ = Symbol Cat_ Token - -type CFRules = Map Cat_ (Set CFRule_) - - -cfgToCFRules :: StateGrammar -> CFRules -cfgToCFRules s = - groupProds [CFRule (catToString c) (map symb r) (nameToTerm n) - | CFRule c r n <- cfg] - where cfg = stateCFG s - symb = mapSymbol catToString id - catToString = prt - gfcc = stateGFCC s - nameToTerm (Name IW [Unify [n]]) = CFRes n - nameToTerm (Name f@(IC c) prs) = - CFObj f (zipWith profileToTerm args prs) - where (args,_) = catSkeleton $ lookType gfcc (C.CId c) - nameToTerm n = error $ "cfgToCFRules.nameToTerm" ++ show n - profileToTerm (C.CId t) (Unify []) = CFMeta t - profileToTerm _ (Unify xs) = CFRes (last xs) -- FIXME: unify - profileToTerm (C.CId t) (Constant f) = maybe (CFMeta t) (\x -> CFObj x []) (forestName f) - -getStartCat :: Options -> StateGrammar -> String -getStartCat opts sgr = prCFCat (startCatStateOpts opts' sgr) - where opts' = addOptions opts (stateOptions sgr) - -getStartCatCF :: Options -> StateGrammar -> String -getStartCatCF opts sgr = getStartCat opts sgr ++ "{}.s" - -stateGFCC :: StateGrammar -> GFCC -stateGFCC = canon2gfcc noOptions . stateGrammarST - --- * Grammar filtering - --- | Removes all directly and indirectly cyclic productions. --- FIXME: this may be too aggressive, only one production --- needs to be removed to break a given cycle. But which --- one should we pick? --- FIXME: Does not (yet) remove productions which are cyclic --- because of empty productions. -removeCycles :: CFRules -> CFRules -removeCycles = groupProds . f . allRules - where f rs = filter (not . isCycle) rs - where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [Cat c'] _ <- rs] - isCycle (CFRule c [Cat c'] _) = isRelatedTo alias c' c - isCycle _ = False - --- | Better bottom-up filter that also removes categories which contain no finite --- strings. -bottomUpFilter :: CFRules -> CFRules -bottomUpFilter gr = fix grow Map.empty - where grow g = g `unionCFRules` filterCFRules (all (okSym g) . ruleRhs) gr - okSym g = symbol (`elem` allCats g) (const True) - --- | Removes categories which are not reachable from the start category. -topDownFilter :: Cat_ -> CFRules -> CFRules -topDownFilter start rules = filterCFRulesCats (isRelatedTo uses start) rules - where - rhsCats = [ (lhsCat r, c') | r <- allRules rules, c' <- filterCats (ruleRhs r) ] - uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats - --- | Merges categories with identical right-hand-sides. --- FIXME: handle probabilities -mergeIdentical :: CFRules -> CFRules -mergeIdentical g = groupProds $ map subst $ allRules g - where - -- maps categories to their replacement - m = Map.fromList [(y,concat (intersperse "+" xs)) - | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList g], y <- xs] - -- build data to compare for each category: a set of name,rhs pairs - rulesKey = Set.map (\ (CFRule _ r n) -> (n,r)) - subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n - substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m - --- * Removing left recursion - --- The LC_LR algorithm from --- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf -removeLeftRecursion :: Cat_ -> CFRules -> CFRules -removeLeftRecursion start gr - = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] - where - scheme1 = [CFRule a [x,Cat a_x] n' | - a <- retainedLeftRecursive, - x <- properLeftCornersOf a, - not (isLeftRecursive x), - let a_x = mkCat (Cat a) x, - -- this is an extension of LC_LR to avoid generating - -- A-X categories for which there are no productions: - a_x `Set.member` newCats, - let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0)) - (\_ -> CFRes 0) x] - scheme2 = [CFRule a_x (beta++[Cat a_b]) n' | - a <- retainedLeftRecursive, - b@(Cat b') <- properLeftCornersOf a, - isLeftRecursive b, - CFRule _ (x:beta) n <- catRules gr b', - let a_x = mkCat (Cat a) x, - let a_b = mkCat (Cat a) b, - let i = length $ filterCats beta, - let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n))) - (\_ -> CFApp (CFRes i) n) x] - scheme3 = [CFRule a_x beta n' | - a <- retainedLeftRecursive, - x <- properLeftCornersOf a, - CFRule _ (x':beta) n <- catRules gr a, - x == x', - let a_x = mkCat (Cat a) x, - let n' = symbol (\_ -> CFAbs 1 (shiftTerm n)) - (\_ -> n) x] - scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats - - newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3)) - - shiftTerm :: CFTerm -> CFTerm - shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts) - shiftTerm (CFRes 0) = CFVar 1 - shiftTerm (CFRes n) = CFRes (n-1) - shiftTerm t = t - -- note: the rest don't occur in the original grammar - - cats = allCats gr - rules = allRules gr - - directLeftCorner = mkRel [(Cat c,t) | CFRule c (t:_) _ <- allRules gr] - leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner - properLeftCorner = transitiveClosure directLeftCorner - properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat - isProperLeftCornerOf = flip (isRelatedTo properLeftCorner) - - leftRecursive = reflexiveElements properLeftCorner - isLeftRecursive = (`Set.member` leftRecursive) - - retained = start `Set.insert` - Set.fromList [a | r <- allRules (filterCFRulesCats (not . isLeftRecursive . Cat) gr), - Cat a <- ruleRhs r] - isRetained = (`Set.member` retained) - - retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained - -mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_ -mkCat x y = showSymbol x ++ "-" ++ showSymbol y - where showSymbol = symbol id show - -{- - --- Paull's algorithm, see --- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf -removeLeftRecursion :: Cat_ -> CFRules -> CFRules -removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs - where - handleProds (c, r) = (c, concatMap handleProd r) - handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai = - -- FIXME: for non-recursive categories, this changes - -- the grammar unneccessarily, maybe we can use mutRecCats - -- to make this less invasive - -- FIXME: this will give multiple rules with the same name, - -- which may mess up the probabilities. - [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs] - handleProd r = [r] - -removeDirectLeftRecursions :: CFRules -> CFRules -removeDirectLeftRecursions = concat . flip evalState 0 . mapM removeDirectLeftRecursion - -removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category - -> State Int CFRules -removeDirectLeftRecursion (a,rs) - | null dr = return [(a,rs)] - | otherwise = - do - a' <- fresh a - let as = maybeEndWithA' nr - is = [CFRule a' (tail r) n | CFRule _ r n <- dr] - a's = maybeEndWithA' is - -- the not null constraint here avoids creating new - -- left recursive (cyclic) rules. - maybeEndWithA' xs = xs ++ [CFRule c (r++[Cat a']) n | CFRule c r n <- xs, - not (null r)] - return [(a, as), (a', a's)] - where - (dr,nr) = partition isDirectLeftRecursive rs - fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n } - -isDirectLeftRecursive :: CFRule_ -> Bool -isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c' -isDirectLeftRecursive _ = False - --} - --- | Get the sets of mutually recursive non-terminals for a grammar. -mutRecCats :: Bool -- ^ If true, all categories will be in some set. - -- If false, only recursive categories will be included. - -> CFRules -> [Set Cat_] -mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r - where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, Cat c' <- ss] - refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation - --- --- * Approximate context-free grammars with regular grammars. --- - --- Use the transformation algorithm from \"Regular Approximation of Context-free --- Grammars through Approximation\", Mohri and Nederhof, 2000 --- to create an over-generating regular frammar for a context-free --- grammar -makeRegular :: CFRules -> CFRules -makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) - where trSet cs | allXLinear cs rs = rs - | otherwise = concatMap handleCat csl - where csl = Set.toList cs - rs = catSetRules g cs - handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e - ++ concatMap (makeRightLinearRules c) (catRules g c) - where c' = newCat c - makeRightLinearRules b' (CFRule c ss n) = - case ys of - [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left - (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n - ++ makeRightLinearRules (newCat b) (CFRule c zs n) - where (xs,ys) = break (`catElem` cs) ss - -- don't add rules on the form A -> A - newRule c rhs n | rhs == [Cat c] = [] - | otherwise = [CFRule c rhs n] - newCat c = c ++ "$" - --- --- * CFG rule utilities --- - --- | Group productions by their lhs categories -groupProds :: [CFRule_] -> CFRules -groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) - -allRules :: CFRules -> [CFRule_] -allRules = concat . map Set.toList . Map.elems - -allRulesGrouped :: CFRules -> [(Cat_,[CFRule_])] -allRulesGrouped = Map.toList . Map.map Set.toList - -allCats :: CFRules -> [Cat_] -allCats = Map.keys - -catRules :: CFRules -> Cat_ -> [CFRule_] -catRules rs c = Set.toList $ Map.findWithDefault Set.empty c rs - -catSetRules :: CFRules -> Set Cat_ -> [CFRule_] -catSetRules g cs = allRules $ Map.filterWithKey (\c _ -> c `Set.member` cs) g - -cleanCFRules :: CFRules -> CFRules -cleanCFRules = Map.filter (not . Set.null) - -unionCFRules :: CFRules -> CFRules -> CFRules -unionCFRules = Map.unionWith Set.union - -filterCFRules :: (CFRule_ -> Bool) -> CFRules -> CFRules -filterCFRules p = cleanCFRules . Map.map (Set.filter p) - -filterCFRulesCats :: (Cat_ -> Bool) -> CFRules -> CFRules -filterCFRulesCats p = Map.filterWithKey (\c _ -> p c) - -countCats :: CFRules -> Int -countCats = Map.size . cleanCFRules - -countRules :: CFRules -> Int -countRules = length . allRules - -lhsCat :: CFRule c n t -> c -lhsCat (CFRule c _ _) = c - -ruleRhs :: CFRule c n t -> [Symbol c t] -ruleRhs (CFRule _ ss _) = ss - -ruleFun :: CFRule_ -> Fun -ruleFun (CFRule _ _ t) = f t - where f (CFObj n _) = n - f (CFApp _ x) = f x - f (CFAbs _ x) = f x - f _ = IC "" - --- | Checks if a symbol is a non-terminal of one of the given categories. -catElem :: Ord c => Symbol c t -> Set c -> Bool -catElem s cs = symbol (`Set.member` cs) (const False) s - --- | Check if any of the categories used on the right-hand side --- are in the given list of categories. -anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool -anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) - -mkCFTerm :: String -> CFTerm -mkCFTerm n = CFObj (IC n) [] - -ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool -ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs - -noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool -noCatsInSet cs = not . any (`catElem` cs) - --- | Check if all the rules are right-linear, or all the rules are --- left-linear, with respect to given categories. -allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool -allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs - --- | Checks if a context-free rule is right-linear. -isRightLinear :: Ord c => - Set c -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity - -> Bool -isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs - --- | Checks if a context-free rule is left-linear. -isLeftLinear :: Ord c => - Set c -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for left-linearity - -> Bool -isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs - -prCFRules :: CFRules -> String -prCFRules = unlines . map prRule . allRules - where - prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r)) - prSym = symbol id (\t -> "\""++ t ++"\"") |
