diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/compiler/GF/Speech | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/compiler/GF/Speech')
| -rw-r--r-- | src/compiler/GF/Speech/CFG.hs | 372 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/CFGToFA.hs | 244 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/FiniteState.hs | 329 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/GSL.hs | 95 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/JSGF.hs | 113 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/PGFToCFG.hs | 116 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/PrRegExp.hs | 27 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/RegExp.hs | 144 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SISR.hs | 77 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SLF.hs | 178 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SRG.hs | 205 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SRGS_ABNF.hs | 127 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SRGS_XML.hs | 105 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/VoiceXML.hs | 243 |
14 files changed, 2375 insertions, 0 deletions
diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Speech/CFG.hs new file mode 100644 index 000000000..9ec8416c5 --- /dev/null +++ b/src/compiler/GF/Speech/CFG.hs @@ -0,0 +1,372 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.CFG +-- +-- Context-free grammar representation and manipulation. +---------------------------------------------------------------------- +module GF.Speech.CFG where + +import GF.Data.Utilities +import PGF.CId +import GF.Infra.Option +import GF.Data.Relation + +import Control.Monad +import Control.Monad.State (State, get, put, evalState) +import qualified Data.ByteString.Char8 as BS +import Data.Map (Map) +import qualified Data.Map as Map +import Data.List +import Data.Maybe (fromMaybe) +import Data.Monoid (mconcat) +import Data.Set (Set) +import qualified Data.Set as Set + +-- +-- * Types +-- + +type Cat = String +type Token = String + +data Symbol c t = NonTerminal c | Terminal t + deriving (Eq, Ord, Show) + +type CFSymbol = Symbol Cat Token + +data CFRule = CFRule { + lhsCat :: Cat, + ruleRhs :: [CFSymbol], + ruleName :: CFTerm + } + deriving (Eq, Ord, Show) + +data CFTerm + = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments + | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id. + | CFApp CFTerm CFTerm -- ^ Application + | CFRes Int -- ^ The result of the n:th (0-based) non-terminal + | CFVar Int -- ^ A lambda-bound variable + | CFMeta CId -- ^ A metavariable + deriving (Eq, Ord, Show) + +data CFG = CFG { cfgStartCat :: Cat, + cfgExternalCats :: Set Cat, + cfgRules :: Map Cat (Set CFRule) } + deriving (Eq, Ord, Show) + +-- +-- * Grammar filtering +-- + +-- | Removes all directly and indirectly cyclic productions. +-- FIXME: this may be too aggressive, only one production +-- needs to be removed to break a given cycle. But which +-- one should we pick? +-- FIXME: Does not (yet) remove productions which are cyclic +-- because of empty productions. +removeCycles :: CFG -> CFG +removeCycles = onRules f + where f rs = filter (not . isCycle) rs + where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs] + isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c + isCycle _ = False + +-- | Better bottom-up filter that also removes categories which contain no finite +-- strings. +bottomUpFilter :: CFG -> CFG +bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty }) + where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr + okSym g = symbol (`elem` allCats g) (const True) + +-- | Removes categories which are not reachable from any external category. +topDownFilter :: CFG -> CFG +topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg + where + rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ] + uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats + keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg + +-- | Merges categories with identical right-hand-sides. +-- FIXME: handle probabilities +mergeIdentical :: CFG -> CFG +mergeIdentical g = onRules (map subst) g + where + -- maps categories to their replacement + m = Map.fromList [(y,concat (intersperse "+" xs)) + | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs] + -- build data to compare for each category: a set of name,rhs pairs + rulesKey = Set.map (\ (CFRule _ r n) -> (n,r)) + subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n + substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m + +-- | Keeps only the start category as an external category. +purgeExternalCats :: CFG -> CFG +purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) } + +-- +-- * Removing left recursion +-- + +-- The LC_LR algorithm from +-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf +removeLeftRecursion :: CFG -> CFG +removeLeftRecursion gr + = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] } + where + scheme1 = [CFRule a [x,NonTerminal a_x] n' | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + not (isLeftRecursive x), + let a_x = mkCat (NonTerminal a) x, + -- this is an extension of LC_LR to avoid generating + -- A-X categories for which there are no productions: + a_x `Set.member` newCats, + let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0)) + (\_ -> CFRes 0) x] + scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' | + a <- retainedLeftRecursive, + b@(NonTerminal b') <- properLeftCornersOf a, + isLeftRecursive b, + CFRule _ (x:beta) n <- catRules gr b', + let a_x = mkCat (NonTerminal a) x, + let a_b = mkCat (NonTerminal a) b, + let i = length $ filterCats beta, + let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n))) + (\_ -> CFApp (CFRes i) n) x] + scheme3 = [CFRule a_x beta n' | + a <- retainedLeftRecursive, + x <- properLeftCornersOf a, + CFRule _ (x':beta) n <- catRules gr a, + x == x', + let a_x = mkCat (NonTerminal a) x, + let n' = symbol (\_ -> CFAbs 1 (shiftTerm n)) + (\_ -> n) x] + scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats + + newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3)) + + shiftTerm :: CFTerm -> CFTerm + shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts) + shiftTerm (CFRes 0) = CFVar 1 + shiftTerm (CFRes n) = CFRes (n-1) + shiftTerm t = t + -- note: the rest don't occur in the original grammar + + cats = allCats gr + rules = allRules gr + + directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr] + leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner + properLeftCorner = transitiveClosure directLeftCorner + properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal + isProperLeftCornerOf = flip (isRelatedTo properLeftCorner) + + leftRecursive = reflexiveElements properLeftCorner + isLeftRecursive = (`Set.member` leftRecursive) + + retained = cfgStartCat gr `Set.insert` + Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr), + NonTerminal a <- ruleRhs r] + isRetained = (`Set.member` retained) + + retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained + + mkCat :: CFSymbol -> CFSymbol -> Cat + mkCat x y = showSymbol x ++ "-" ++ showSymbol y + where showSymbol = symbol id show + +-- | Get the sets of mutually recursive non-terminals for a grammar. +mutRecCats :: Bool -- ^ If true, all categories will be in some set. + -- If false, only recursive categories will be included. + -> CFG -> [Set Cat] +mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r + where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss] + refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation + +-- +-- * Approximate context-free grammars with regular grammars. +-- + +makeSimpleRegular :: CFG -> CFG +makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles + +-- Use the transformation algorithm from \"Regular Approximation of Context-free +-- Grammars through Approximation\", Mohri and Nederhof, 2000 +-- to create an over-generating regular grammar for a context-free +-- grammar +makeRegular :: CFG -> CFG +makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) } + where trSet cs | allXLinear cs rs = rs + | otherwise = concatMap handleCat (Set.toList cs) + where rs = catSetRules g cs + handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c + makeRightLinearRules b' (CFRule c ss n) = + case ys of + [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left + (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n + ++ makeRightLinearRules (newCat b) (CFRule c zs n) + where (xs,ys) = break (`catElem` cs) ss + -- don't add rules on the form A -> A + newRule c rhs n | rhs == [NonTerminal c] = [] + | otherwise = [CFRule c rhs n] + newCat c = c ++ "$" + +-- +-- * CFG Utilities +-- + +mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG +mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs } + +groupProds :: [CFRule] -> Map Cat (Set CFRule) +groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) + +-- | Gets all rules in a CFG. +allRules :: CFG -> [CFRule] +allRules = concat . map Set.toList . Map.elems . cfgRules + +-- | Gets all rules in a CFG, grouped by their LHS categories. +allRulesGrouped :: CFG -> [(Cat,[CFRule])] +allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules + +-- | Gets all categories which have rules. +allCats :: CFG -> [Cat] +allCats = Map.keys . cfgRules + +-- | Gets all categories which have rules or occur in a RHS. +allCats' :: CFG -> [Cat] +allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union` + Set.fromList [c | rs <- Map.elems (cfgRules cfg), + r <- Set.toList rs, + NonTerminal c <- ruleRhs r]) + +-- | Gets all rules for the given category. +catRules :: CFG -> Cat -> [CFRule] +catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr) + +-- | Gets all rules for categories in the given set. +catSetRules :: CFG -> Set Cat -> [CFRule] +catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr + +mapCFGCats :: (Cat -> Cat) -> CFG -> CFG +mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg)) + (Set.map f (cfgExternalCats cfg)) + [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg] + +onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG +onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) } + +onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG +onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg } + +-- | Clean up CFG after rules have been removed. +cleanCFG :: CFG -> CFG +cleanCFG = onCFG (Map.filter (not . Set.null)) + +-- | Combine two CFGs. +unionCFG :: CFG -> CFG -> CFG +unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x + +filterCFG :: (CFRule -> Bool) -> CFG -> CFG +filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p)) + +filterCFGCats :: (Cat -> Bool) -> CFG -> CFG +filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c)) + +countCats :: CFG -> Int +countCats = Map.size . cfgRules . cleanCFG + +countRules :: CFG -> Int +countRules = length . allRules + +prCFG :: CFG -> String +prCFG = prProductions . map prRule . allRules + where + prRule r = (lhsCat r, unwords (map prSym (ruleRhs r))) + prSym = symbol id (\t -> "\""++ t ++"\"") + +prProductions :: [(Cat,String)] -> String +prProductions prods = + unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods] + where + maxLHSWidth = maximum $ 0:(map (length . fst) prods) + rpad n s = s ++ replicate (n - length s) ' ' + +prCFTerm :: CFTerm -> String +prCFTerm = pr 0 + where + pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")") + pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t) + pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")") + pr _ (CFRes i) = "$" ++ show i + pr _ (CFVar i) = "x" ++ show i + pr _ (CFMeta c) = "?" ++ showCId c + paren 0 x = x + paren 1 x = "(" ++ x ++ ")" + +-- +-- * CFRule Utilities +-- + +ruleFun :: CFRule -> CId +ruleFun (CFRule _ _ t) = f t + where f (CFObj n _) = n + f (CFApp _ x) = f x + f (CFAbs _ x) = f x + f _ = mkCId "" + +-- | Check if any of the categories used on the right-hand side +-- are in the given list of categories. +anyUsedBy :: [Cat] -> CFRule -> Bool +anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) + +mkCFTerm :: String -> CFTerm +mkCFTerm n = CFObj (mkCId n) [] + +ruleIsNonRecursive :: Set Cat -> CFRule -> Bool +ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + +-- | Check if all the rules are right-linear, or all the rules are +-- left-linear, with respect to given categories. +allXLinear :: Set Cat -> [CFRule] -> Bool +allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs + +-- | Checks if a context-free rule is right-linear. +isRightLinear :: Set Cat -- ^ The categories to consider + -> CFRule -- ^ The rule to check for right-linearity + -> Bool +isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs + +-- | Checks if a context-free rule is left-linear. +isLeftLinear :: Set Cat -- ^ The categories to consider + -> CFRule -- ^ The rule to check for left-linearity + -> Bool +isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs + + +-- +-- * Symbol utilities +-- + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (NonTerminal cat) = fc cat +symbol fc ft (Terminal tok) = ft tok + +mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t' +mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft) + +filterCats :: [Symbol c t] -> [c] +filterCats syms = [ cat | NonTerminal cat <- syms ] + +filterToks :: [Symbol c t] -> [t] +filterToks syms = [ tok | Terminal tok <- syms ] + +-- | Checks if a symbol is a non-terminal of one of the given categories. +catElem :: Ord c => Symbol c t -> Set c -> Bool +catElem s cs = symbol (`Set.member` cs) (const False) s + +noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool +noCatsInSet cs = not . any (`catElem` cs) diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs new file mode 100644 index 000000000..3045ac842 --- /dev/null +++ b/src/compiler/GF/Speech/CFGToFA.hs @@ -0,0 +1,244 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.CFGToFA +-- +-- Approximates CFGs with finite state networks. +---------------------------------------------------------------------- +module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular, + MFA(..), cfgToMFA, cfgToFA') where + +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import PGF.CId +import PGF.Data +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.PGFToCFG +import GF.Infra.Ident (Ident) + +import GF.Data.Graph +import GF.Data.Relation +import GF.Speech.FiniteState +import GF.Speech.CFG + +data Recursivity = RightR | LeftR | NotR + +data MutRecSet = MutRecSet { + mrCats :: Set Cat, + mrNonRecRules :: [CFRule], + mrRecRules :: [CFRule], + mrRec :: Recursivity + } + + +type MutRecSets = Map Cat MutRecSet + +-- +-- * Multiple DFA type +-- + +data MFA = MFA Cat [(Cat,DFA CFSymbol)] + + + +cfgToFA :: CFG -> DFA Token +cfgToFA = minimize . compileAutomaton . makeSimpleRegular + + +-- +-- * Compile strongly regular grammars to NFAs +-- + +-- Convert a strongly regular grammar to a finite automaton. +compileAutomaton :: CFG -> NFA Token +compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa + where + (fa,s,f) = newFA_ + ns = mutRecSets g $ mutRecCats False g + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000. +make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State + -> NFA Token -> NFA Token +make_fa c@(g,ns) q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [Terminal t] -> newTransition q0 q1 (Just t) fa + [NonTerminal a] -> + case Map.lookup a ns of + -- a is recursive + Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> + case mrRec n of + -- the set Ni is right-recursive or cyclic + RightR -> + let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, + let (xs,NonTerminal d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + -- the set Ni is left-recursive + LeftR -> + let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] + ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats ni fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + -- a is not recursive + Nothing -> let rs = catRules g a + in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs + (x:beta) -> let (fa',q) = newState () fa + in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa' + where + make_fa_ = make_fa c + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs + +-- +-- * Compile a strongly regular grammar to a DFA with sub-automata +-- + +cfgToMFA :: CFG -> MFA +cfgToMFA = buildMFA . makeSimpleRegular + +-- | Build a DFA by building and expanding an MFA +cfgToFA' :: CFG -> DFA Token +cfgToFA' = mfaToDFA . cfgToMFA + +buildMFA :: CFG -> MFA +buildMFA g = sortSubLats $ removeUnusedSubLats mfa + where fas = compileAutomata g + mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas] + +mfaStartDFA :: MFA -> DFA CFSymbol +mfaStartDFA (MFA start subs) = + fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs + +mfaToDFA :: MFA -> DFA Token +mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa + where + subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs] + getSub l = fromJust $ Map.lookup l subs' + expand (FA (Graph c ns es) s f) + = foldl' expandEdge (FA (Graph c ns []) s f) es + expandEdge fa (f,t,x) = + case x of + Nothing -> newTransition f t Nothing fa + Just (Terminal s) -> newTransition f t (Just s) fa + Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l) + +removeUnusedSubLats :: MFA -> MFA +removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c] + where + usedMap = subLatUseMap mfa + used = growUsedSet (Set.singleton start) + isUsed c = c `Set.member` used + growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s) + +subLatUseMap :: MFA -> Map Cat (Set Cat) +subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs] + +usedSubLats :: DFA CFSymbol -> Set Cat +usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa] + +-- | Sort sub-networks topologically. +sortSubLats :: MFA -> MFA +sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs) + where + usedByMap = revMultiMap (subLatUseMap mfa) + sortLats _ [] = [] + sortLats ub ls = xs ++ sortLats ub' ys + where (xs,ys) = partition ((==0) . indeg) ls + ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub + indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub + +-- | Convert a strongly regular grammar to a number of finite automata, +-- one for each non-terminal. +-- The edges in the automata accept tokens, or name another automaton to use. +compileAutomata :: CFG + -> [(Cat,NFA CFSymbol)] + -- ^ A map of non-terminals and their automata. +compileAutomata g = [(c, makeOneFA c) | c <- allCats g] + where + mrs = mutRecSets g $ mutRecCats True g + makeOneFA c = make_fa1 mr s [NonTerminal c] f fa + where (fa,s,f) = newFA_ + mr = fromJust (Map.lookup c mrs) + + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000, +-- adapted to build a finite automaton for a single (mutually recursive) set only. +-- Categories not in the set will result in category-labelled edges. +make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which + -- we are building the automaton. + -> State -- ^ State to come from + -> [CFSymbol] -- ^ Symbols to accept + -> State -- ^ State to end up in + -> NFA CFSymbol -- ^ FA to add to. + -> NFA CFSymbol +make_fa1 mr q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [t@(Terminal _)] -> newTransition q0 q1 (Just t) fa + [c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa + [NonTerminal a] -> + case mrRec mr of + NotR -> -- the set is a non-recursive (always singleton) set of categories + -- so the set of category rules is the set of rules for the whole set + make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa + RightR -> -- the set is right-recursive or cyclic + let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, + let (xs,NonTerminal d) = (init ss,last ss)] + in make_fas new $ newTransition q0 (getState a) Nothing fa' + LeftR -> -- the set is left-recursive + let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] + ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr] + in make_fas new $ newTransition (getState a) q1 Nothing fa' + where + (fa',stateMap) = addStatesForCats (mrCats mr) fa + getState x = Map.findWithDefault + (error $ "CFGToFiniteState: No state for " ++ x) + x stateMap + (x:beta) -> let (fa',q) = newState () fa + in make_fas [(q0,[x],q),(q,beta,q1)] fa' + where + make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs + +mutRecSets :: CFG -> [Set Cat] -> MutRecSets +mutRecSets g = Map.fromList . concatMap mkMutRecSet + where + mkMutRecSet cs = [ (c,ms) | c <- csl ] + where csl = Set.toList cs + rs = catSetRules g cs + (nrs,rrs) = partition (ruleIsNonRecursive cs) rs + ms = MutRecSet { + mrCats = cs, + mrNonRecRules = nrs, + mrRecRules = rrs, + mrRec = rec + } + rec | null rrs = NotR + | all (isRightLinear cs) rrs = RightR + | otherwise = LeftR + +-- +-- * Utilities +-- + +-- | Add a state for the given NFA for each of the categories +-- in the given set. Returns a map of categories to their +-- corresponding states. +addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State) +addStatesForCats cs fa = (fa', m) + where (fa', ns) = newStates (replicate (Set.size cs) ()) fa + m = Map.fromList (zip (Set.toList cs) (map fst ns)) + +revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a) +revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s] diff --git a/src/compiler/GF/Speech/FiniteState.hs b/src/compiler/GF/Speech/FiniteState.hs new file mode 100644 index 000000000..136d773a2 --- /dev/null +++ b/src/compiler/GF/Speech/FiniteState.hs @@ -0,0 +1,329 @@ +---------------------------------------------------------------------- +-- | +-- Module : FiniteState +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/10 16:43:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.16 $ +-- +-- A simple finite state network module. +----------------------------------------------------------------------------- +module GF.Speech.FiniteState (FA(..), State, NFA, DFA, + startState, finalStates, + states, transitions, + isInternal, + newFA, newFA_, + addFinalState, + newState, newStates, + newTransition, newTransitions, + insertTransitionWith, insertTransitionsWith, + mapStates, mapTransitions, + modifyTransitions, + nonLoopTransitionsTo, nonLoopTransitionsFrom, + loops, + removeState, + oneFinalState, + insertNFA, + onGraph, + moveLabelsToNodes, removeTrivialEmptyNodes, + minimize, + dfa2nfa, + unusedNames, renameStates, + prFAGraphviz, faToGraphviz) where + +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import GF.Data.Utilities +import GF.Data.Graph +import qualified GF.Data.Graphviz as Dot + +type State = Int + +-- | Type parameters: node id type, state label type, edge label type +-- Data constructor arguments: nodes and edges, start state, final states +data FA n a b = FA !(Graph n a b) !n ![n] + +type NFA a = FA State () (Maybe a) + +type DFA a = FA State () a + + +startState :: FA n a b -> n +startState (FA _ s _) = s + +finalStates :: FA n a b -> [n] +finalStates (FA _ _ ss) = ss + +states :: FA n a b -> [(n,a)] +states (FA g _ _) = nodes g + +transitions :: FA n a b -> [(n,n,b)] +transitions (FA g _ _) = edges g + +newFA :: Enum n => a -- ^ Start node label + -> FA n a b +newFA l = FA g s [] + where (g,s) = newNode l (newGraph [toEnum 0..]) + +-- | Create a new finite automaton with an initial and a final state. +newFA_ :: Enum n => (FA n () b, n, n) +newFA_ = (fa'', s, f) + where fa = newFA () + s = startState fa + (fa',f) = newState () fa + fa'' = addFinalState f fa' + +addFinalState :: n -> FA n a b -> FA n a b +addFinalState f (FA g s ss) = FA g s (f:ss) + +newState :: a -> FA n a b -> (FA n a b, n) +newState x (FA g s ss) = (FA g' s ss, n) + where (g',n) = newNode x g + +newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)]) +newStates xs (FA g s ss) = (FA g' s ss, ns) + where (g',ns) = newNodes xs g + +newTransition :: n -> n -> b -> FA n a b -> FA n a b +newTransition f t l = onGraph (newEdge (f,t,l)) + +newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b +newTransitions es = onGraph (newEdges es) + +insertTransitionWith :: Eq n => + (b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b +insertTransitionWith f t = onGraph (insertEdgeWith f t) + +insertTransitionsWith :: Eq n => + (b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b +insertTransitionsWith f ts fa = + foldl' (flip (insertTransitionWith f)) fa ts + +mapStates :: (a -> c) -> FA n a b -> FA n c b +mapStates f = onGraph (nmap f) + +mapTransitions :: (b -> c) -> FA n a b -> FA n a c +mapTransitions f = onGraph (emap f) + +modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b +modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es)) + +removeState :: Ord n => n -> FA n a b -> FA n a b +removeState n = onGraph (removeNode n) + +minimize :: Ord a => NFA a -> DFA a +minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA + +unusedNames :: FA n a b -> [n] +unusedNames (FA (Graph names _ _) _ _) = names + +-- | Gets all incoming transitions to a given state, excluding +-- transtions from the state itself. +nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)] +nonLoopTransitionsTo s fa = + [(f,l) | (f,t,l) <- transitions fa, t == s && f /= s] + +nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)] +nonLoopTransitionsFrom s fa = + [(t,l) | (f,t,l) <- transitions fa, f == s && t /= s] + +loops :: Eq n => n -> FA n a b -> [b] +loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s] + +-- | Give new names to all nodes. +renameStates :: Ord x => [y] -- ^ Infinite supply of new names + -> FA x a b + -> FA y a b +renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs' + where (ns,rest) = splitAt (length (nodes g)) supply + newNodes = Map.fromList (zip (map fst (nodes g)) ns) + newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes + s' = newName s + fs' = map newName fs + +-- | Insert an NFA into another +insertNFA :: NFA a -- ^ NFA to insert into + -> (State, State) -- ^ States to insert between + -> NFA a -- ^ NFA to insert. + -> NFA a +insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2) + = FA (newEdges es g') s1 fs1 + where + es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2] + (g',ren) = mergeGraphs g1 g2 + +onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d +onGraph f (FA g s ss) = FA (f g) s ss + + +-- | Make the finite automaton have a single final state +-- by adding a new final state and adding an edge +-- from the old final states to the new state. +oneFinalState :: a -- ^ Label to give the new node + -> b -- ^ Label to give the new edges + -> FA n a b -- ^ The old network + -> FA n a b -- ^ The new network +oneFinalState nl el fa = + let (FA g s fs,nf) = newState nl fa + es = [ (f,nf,el) | f <- fs ] + in FA (newEdges es g) s [nf] + +-- | Transform a standard finite automaton with labelled edges +-- to one where the labels are on the nodes instead. This can add +-- up to one extra node per edge. +moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) () +moveLabelsToNodes = onGraph f + where f g@(Graph c _ _) = Graph c' ns (concat ess) + where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)] + (c',is') = mapAccumL fixIncoming c is + (ns,ess) = unzip (concat is') + + +-- | Remove empty nodes which are not start or final, and have +-- exactly one outgoing edge or exactly one incoming edge. +removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) () +removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes + +-- | Move edges to empty nodes to point to the next node(s). +-- This is not done if the pointed-to node is a final node. +skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) () +skipSimpleEmptyNodes fa = onGraph og fa + where + og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es') + where + es' = concatMap changeEdge es + info = nodeInfo g + changeEdge e@(f,t,()) + | isNothing (getNodeLabel info t) + -- && (i * o <= i + o) + && not (isFinal fa t) + = [ (f,t',()) | (_,t',()) <- getOutgoing info t] + | otherwise = [e] +-- where i = inDegree info t +-- o = outDegree info t + +isInternal :: Eq n => FA n a b -> n -> Bool +isInternal (FA _ start final) n = n /= start && n `notElem` final + +isFinal :: Eq n => FA n a b -> n -> Bool +isFinal (FA _ _ final) n = n `elem` final + +-- | Remove all internal nodes with no incoming edges +-- or no outgoing edges. +pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) () +pruneUnusable fa = onGraph f fa + where + f g = if Set.null rns then g else f (removeNodes rns g) + where info = nodeInfo g + rns = Set.fromList [ n | (n,_) <- nodes g, + isInternal fa n, + inDegree info n == 0 + || outDegree info n == 0] + +fixIncoming :: (Ord n, Eq a) => [n] + -> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges + -> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their + -- incoming edges. +fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts) + where ls = nub $ map edgeLabel es + (cs',cs'') = splitAt (length ls) cs + newNodes = zip cs' ls + es' = [ (x,n,()) | x <- map fst newNodes ] + -- separate cyclic and non-cyclic edges + (cyc,ncyc) = partition (\ (f,_,_) -> f == n) es + -- keep all incoming non-cyclic edges with the right label + to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l'] + -- for each cyclic edge with the right label, + -- add an edge from each of the new nodes (including this one) + ++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes] + newContexts = [ (v, to v) | v <- newNodes ] + +alphabet :: Eq b => Graph n a (Maybe b) -> [b] +alphabet = nub . catMaybes . map edgeLabel . edges + +determinize :: Ord a => NFA a -> DFA a +determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty + (ns',es') = (Set.toList ns, Set.toList es) + final = filter isDFAFinal ns' + fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final + in renameStates [0..] fa + where info = nodeInfo g +-- reach = nodesReachable out + start = closure info $ Set.singleton s + isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n)) + h currentStates oldStates es + | Set.null currentStates = (oldStates,es) + | otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es' + where + allOldStates = oldStates `Set.union` currentStates + (newStates,es') = new (Set.toList currentStates) Set.empty es + uniqueNewStates = newStates Set.\\ allOldStates + -- Get the sets of states reachable from the given states + -- by consuming one symbol, and the associated edges. + new [] rs es = (rs,es) + new (n:ns) rs es = new ns rs' es' + where cs = reachable info n --reachable reach n + rs' = rs `Set.union` Set.fromList (map snd cs) + es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs] + + +-- | Get all the nodes reachable from a list of nodes by only empty edges. +closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n +closure info x = closure_ x x + where closure_ acc check | Set.null check = acc + | otherwise = closure_ acc' check' + where + reach = Set.fromList [y | x <- Set.toList check, + (_,y,Nothing) <- getOutgoing info x] + acc' = acc `Set.union` reach + check' = reach Set.\\ acc + +-- | Get a map of labels to sets of all nodes reachable +-- from a the set of nodes by one edge with the given +-- label and then any number of empty edges. +reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)] +reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns +reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n] + +reverseNFA :: NFA a -> NFA a +reverseNFA (FA g s fs) = FA g''' s' [s] + where g' = reverseGraph g + (g'',s') = newNode () g' + g''' = newEdges [(s',f,Nothing) | f <- fs] g'' + +dfa2nfa :: DFA a -> NFA a +dfa2nfa = mapTransitions Just + +-- +-- * Visualization +-- + +prFAGraphviz :: (Eq n,Show n) => FA n String String -> String +prFAGraphviz = Dot.prGraphviz . faToGraphviz + +prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String +prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show + +faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph +faToGraphviz (FA (Graph _ ns es) s f) + = Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) [] + where mkNode (n,l) = Dot.Node (show n) attrs + where attrs = [("label",l)] + ++ if n == s then [("shape","box")] else [] + ++ if n `elem` f then [("style","bold")] else [] + mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)] + +-- +-- * Utilities +-- + +lookups :: Ord k => [k] -> Map k a -> [a] +lookups xs m = mapMaybe (flip Map.lookup m) xs diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs new file mode 100644 index 000000000..8f26ea64c --- /dev/null +++ b/src/compiler/GF/Speech/GSL.hs @@ -0,0 +1,95 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.GSL +-- +-- This module prints a CFG as a Nuance GSL 2.0 grammar. +-- +----------------------------------------------------------------------------- + +module GF.Speech.GSL (gslPrinter) where + +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.SRG +import GF.Speech.RegExp +import GF.Infra.Option +import GF.Infra.Ident +import PGF.CId +import PGF.Data + +import Data.Char (toUpper,toLower) +import Data.List (partition) +import Text.PrettyPrint.HughesPJ + +width :: Int +width = 75 + +gslPrinter :: Options -> PGF -> CId -> String +gslPrinter opts pgf cnc = renderStyle st $ prGSL $ makeNonLeftRecursiveSRG opts pgf cnc + where st = style { lineLength = width } + +prGSL :: SRG -> Doc +prGSL srg = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text ";GSL2.0" $$ + comment ("Nuance speech recognition grammar for " ++ srgName srg) $$ + comment ("Generated by GF") + mainCat = text ".MAIN" <+> prCat (srgStartCat srg) + prRule (SRGRule cat rhs) = prCat cat <+> union (map prAlt rhs) + -- FIXME: use the probability + prAlt (SRGAlt mp _ rhs) = prItem rhs + + +prItem :: SRGItem -> Doc +prItem = f + where + f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes) + where (es,nes) = partition isEpsilon xs + f (REConcat [x]) = f x + f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")" + f (RERepeat x) = text "*" <> f x + f (RESymbol s) = prSymbol s + +union :: [Doc] -> Doc +union [x] = x +union xs = text "[" <> fsep xs <> text "]" + +prSymbol :: Symbol SRGNT Token -> Doc +prSymbol = symbol (prCat . fst) (doubleQuotes . showToken) + +-- GSL requires an upper case letter in category names +prCat :: Cat -> Doc +prCat = text . firstToUpper + + +firstToUpper :: String -> String +firstToUpper [] = [] +firstToUpper (x:xs) = toUpper x : xs + +{- +rmPunctCFG :: CGrammar -> CGrammar +rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g] + +keepSymbol :: Symbol c Token -> Bool +keepSymbol (Tok t) = not (all isPunct (prt t)) +keepSymbol _ = True +-} + +-- Nuance does not like upper case characters in tokens +showToken :: Token -> Doc +showToken = text . map toLower + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.:;.,?!()[]{}" + +comment :: String -> Doc +comment s = text ";" <+> text s + + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs new file mode 100644 index 000000000..2cfeea5f5 --- /dev/null +++ b/src/compiler/GF/Speech/JSGF.hs @@ -0,0 +1,113 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.JSGF +-- +-- This module prints a CFG as a JSGF grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +-- +-- FIXME: convert to UTF-8 +----------------------------------------------------------------------------- + +module GF.Speech.JSGF (jsgfPrinter) where + +import GF.Data.Utilities +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.RegExp +import GF.Speech.SISR +import GF.Speech.SRG +import PGF.CId +import PGF.Data + +import Data.Char +import Data.List +import Data.Maybe +import Text.PrettyPrint.HughesPJ +import Debug.Trace + +width :: Int +width = 75 + +jsgfPrinter :: Options + -> PGF + -> CId -> String +jsgfPrinter opts pgf cnc = renderStyle st $ prJSGF sisr $ makeNonLeftRecursiveSRG opts pgf cnc + where st = style { lineLength = width } + sisr = flag optSISR opts + +prJSGF :: Maybe SISRFormat -> SRG -> Doc +prJSGF sisr srg + = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$ + comment ("JSGF speech recognition grammar for " ++ srgName srg) $$ + comment "Generated by GF" $$ + text ("grammar " ++ srgName srg ++ ";") + lang = maybe empty text (srgLanguage srg) + mainCat = rule True "MAIN" [prCat (srgStartCat srg)] + prRule (SRGRule cat rhs) = rule (isExternalCat srg cat) cat (map prAlt rhs) + prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] + where initTag | isEmpty t = empty + | otherwise = text "<NULL>" <+> t + where t = tag sisr (profileInitSISR n) + finalTag = tag sisr (profileFinalSISR n) + p = if isEmpty initTag && isEmpty finalTag then id else parens + +prCat :: Cat -> Doc +prCat c = char '<' <> text c <> char '>' + +prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc +prItem sisr t = f 0 + where + f _ (REUnion []) = text "<VOID>" + f p (REUnion xs) + | not (null es) = brackets (f 0 (REUnion nes)) + | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) + where (es,nes) = partition isEpsilon xs + f _ (REConcat []) = text "<NULL>" + f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) + f p (RERepeat x) = f 3 x <> char '*' + f _ (RESymbol s) = prSymbol sisr t s + +prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc +prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) +prSymbol _ cn (Terminal t) | all isPunct t = empty -- removes punctuation + | otherwise = text t -- FIXME: quote if there is whitespace or odd chars + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc +tag Nothing _ = empty +tag (Just fmt) t = case t fmt of + [] -> empty + ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}' + where e [] = [] + e ('}':xs) = '\\':'}':e xs + e ('\n':xs) = ' ' : e (dropWhile isSpace xs) + e (x:xs) = x:e xs + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +comment :: String -> Doc +comment s = text "//" <+> text s + +alts :: [Doc] -> Doc +alts = fsep . prepunctuate (text "| ") + +rule :: Bool -> Cat -> [Doc] -> Doc +rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' + where p = if pub then text "public" else empty + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +prepunctuate :: Doc -> [Doc] -> [Doc] +prepunctuate _ [] = [] +prepunctuate p (x:xs) = x : map (p <>) xs + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y + diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs new file mode 100644 index 000000000..d22a4ea8d --- /dev/null +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -0,0 +1,116 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.PGFToCFG +-- +-- Approximates PGF grammars with context-free grammars. +---------------------------------------------------------------------- +module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where + +import PGF.CId +import PGF.Data as PGF +import PGF.Macros +import GF.Infra.Ident +import GF.Speech.CFG + +import Data.Array.IArray as Array +import Data.List +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import Data.Maybe +import Data.Set (Set) +import qualified Data.Set as Set + +bnfPrinter :: PGF -> CId -> String +bnfPrinter = toBNF id + +toBNF :: (CFG -> CFG) -> PGF -> CId -> String +toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc + + +pgfToCFG :: PGF + -> CId -- ^ Concrete syntax name + -> CFG +pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules) + where + pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) + + rules :: [(FCat,Production)] + rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo) + , prod <- Set.toList set] + + fcatCats :: Map FCat Cat + fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) + | (c,fcs) <- Map.toList (startCats pinfo), + (fc,i) <- zip fcs [1..]] + + fcatCat :: FCat -> Cat + fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats + + fcatToCat :: FCat -> FIndex -> Cat + fcatToCat c l = fcatCat c ++ row + where row = if catLinArity c == 1 then "" else "_" ++ show l + + -- gets the number of fields in the lincat for the given category + catLinArity :: FCat -> Int + catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c]) + + topdownRules cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply funid args) rules = (functions pinfo ! funid,args) : rules + g (FCoerce cat) rules = f cat rules + + + extCats :: Set Cat + extCats = Set.fromList $ map lhsCat startRules + + startRules :: [CFRule] + startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) + | (c,fcs) <- Map.toList (startCats pinfo), + fc <- fcs, not (isLiteralFCat fc), + r <- [0..catLinArity fc-1]] + + fruleToCFRule :: (FCat,Production) -> [CFRule] + fruleToCFRule (c,FApply funid args) = + [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps)) + | (l,seqid) <- Array.assocs rhs + , let row = sequences pinfo ! seqid + , not (containsLiterals row)] + where + FFun f ps rhs = functions pinfo ! funid + + mkRhs :: Array FPointPos FSymbol -> [CFSymbol] + mkRhs = concatMap fsymbolToSymbol . Array.elems + + containsLiterals :: Array FPointPos FSymbol -> Bool + containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] || + not (null [n | FSymLit n _ <- Array.elems row]) -- only this is needed for PMCFG. + -- The first line is for backward compat. + + fsymbolToSymbol :: FSymbol -> [CFSymbol] + fsymbolToSymbol (FSymCat n l) = [NonTerminal (fcatToCat (args!!n) l)] + fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)] + fsymbolToSymbol (FSymKS ts) = map Terminal ts + + fixProfile :: Array FPointPos FSymbol -> Profile -> Profile + fixProfile row = concatMap positions + where + nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt] + positions i = [k | (k,j) <- nts, j == i] + + getPos (FSymCat j _) = [j] + getPos (FSymLit j _) = [j] + getPos _ = [] + + profilesToTerm :: [Profile] -> CFTerm + profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps) + where (argTypes,_) = catSkeleton $ lookType pgf f + + profileToTerm :: CId -> Profile -> CFTerm + profileToTerm t [] = CFMeta t + profileToTerm _ xs = CFRes (last xs) -- FIXME: unify + fruleToCFRule (c,FCoerce c') = + [CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) + | l <- [0..catLinArity c-1]] diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs new file mode 100644 index 000000000..0fc35d541 --- /dev/null +++ b/src/compiler/GF/Speech/PrRegExp.hs @@ -0,0 +1,27 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.PrRegExp +-- +-- This module prints a grammar as a regular expression. +----------------------------------------------------------------------------- + +module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where + +import GF.Speech.CFG +import GF.Speech.CFGToFA +import GF.Speech.PGFToCFG +import GF.Speech.RegExp +import PGF + +regexpPrinter :: PGF -> CId -> String +regexpPrinter pgf cnc = (++"\n") $ prRE id $ dfa2re $ cfgToFA $ pgfToCFG pgf cnc + +multiRegexpPrinter :: PGF -> CId -> String +multiRegexpPrinter pgf cnc = prREs $ mfa2res $ cfgToMFA $ pgfToCFG pgf cnc + +prREs :: [(String,RE CFSymbol)] -> String +prREs res = unlines [l ++ " = " ++ prRE id (mapRE showLabel re) | (l,re) <- res] + where showLabel = symbol (\l -> "<" ++ l ++ ">") id + +mfa2res :: MFA -> [(String,RE CFSymbol)] +mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas] diff --git a/src/compiler/GF/Speech/RegExp.hs b/src/compiler/GF/Speech/RegExp.hs new file mode 100644 index 000000000..2592b3d57 --- /dev/null +++ b/src/compiler/GF/Speech/RegExp.hs @@ -0,0 +1,144 @@ +module GF.Speech.RegExp (RE(..), + epsilonRE, nullRE, + isEpsilon, isNull, + unionRE, concatRE, seqRE, + repeatRE, minimizeRE, + mapRE, mapRE', joinRE, + symbolsRE, + dfa2re, prRE) where + +import Data.List + +import GF.Data.Utilities +import GF.Speech.FiniteState + +data RE a = + REUnion [RE a] -- ^ REUnion [] is null + | REConcat [RE a] -- ^ REConcat [] is epsilon + | RERepeat (RE a) + | RESymbol a + deriving (Eq,Ord,Show) + + +dfa2re :: (Ord a) => DFA a -> RE a +dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops + . oneFinalState () epsilonRE . mapTransitions RESymbol + where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa + merge es = [(f,t,unionRE ls) + | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]] + +elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a) +elimStates fa = + case [s | (s,_) <- states fa, isInternal fa s] of + [] -> fa + sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa + where sAs = nonLoopTransitionsTo sE fa + sBs = nonLoopTransitionsFrom sE fa + r2 = unionRE $ loops sE fa + ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs] + r r1 r3 = concatRE [r1, repeatRE r2, r3] + +epsilonRE :: RE a +epsilonRE = REConcat [] + +nullRE :: RE a +nullRE = REUnion [] + +isNull :: RE a -> Bool +isNull (REUnion []) = True +isNull _ = False + +isEpsilon :: RE a -> Bool +isEpsilon (REConcat []) = True +isEpsilon _ = False + +unionRE :: Ord a => [RE a] -> RE a +unionRE = unionOrId . sortNub . concatMap toList + where + toList (REUnion xs) = xs + toList x = [x] + unionOrId [r] = r + unionOrId rs = REUnion rs + +concatRE :: [RE a] -> RE a +concatRE xs | any isNull xs = nullRE + | otherwise = case concatMap toList xs of + [r] -> r + rs -> REConcat rs + where + toList (REConcat xs) = xs + toList x = [x] + +seqRE :: [a] -> RE a +seqRE = concatRE . map RESymbol + +repeatRE :: RE a -> RE a +repeatRE x | isNull x || isEpsilon x = epsilonRE + | otherwise = RERepeat x + +finalRE :: Ord a => DFA (RE a) -> RE a +finalRE fa = concatRE [repeatRE r1, r2, + repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])] + where + s0 = startState fa + [sF] = finalStates fa + r1 = unionRE $ loops s0 fa + r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa + r3 = unionRE $ loops sF fa + r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa + +reverseRE :: RE a -> RE a +reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs +reverseRE (REUnion xs) = REUnion (map reverseRE xs) +reverseRE (RERepeat x) = RERepeat (reverseRE x) +reverseRE x = x + +minimizeRE :: Ord a => RE a -> RE a +minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward + +mergeForward :: Ord a => RE a -> RE a +mergeForward (REUnion xs) = + unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)] +mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)] +mergeForward (RERepeat r) = repeatRE (mergeForward r) +mergeForward r = r + +firstRE :: RE a -> (RE a, RE a) +firstRE (REConcat (x:xs)) = (x, REConcat xs) +firstRE r = (r,epsilonRE) + +mapRE :: (a -> b) -> RE a -> RE b +mapRE f = mapRE' (RESymbol . f) + +mapRE' :: (a -> RE b) -> RE a -> RE b +mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs) +mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs) +mapRE' f (RERepeat x) = RERepeat (mapRE' f x) +mapRE' f (RESymbol s) = f s + +joinRE :: RE (RE a) -> RE a +joinRE (REConcat xs) = REConcat (map joinRE xs) +joinRE (REUnion xs) = REUnion (map joinRE xs) +joinRE (RERepeat xs) = RERepeat (joinRE xs) +joinRE (RESymbol ss) = ss + +symbolsRE :: RE a -> [a] +symbolsRE (REConcat xs) = concatMap symbolsRE xs +symbolsRE (REUnion xs) = concatMap symbolsRE xs +symbolsRE (RERepeat x) = symbolsRE x +symbolsRE (RESymbol x) = [x] + +-- Debugging + +prRE :: (a -> String) -> RE a -> String +prRE = prRE' 0 + +prRE' :: Int -> (a -> String) -> RE a -> String +prRE' _ _ (REUnion []) = "<NULL>" +prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs))) +prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs)) +prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*" +prRE' _ f (RESymbol s) = f s + +p n m s | n >= m = "(" ++ s ++ ")" + | True = s diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs new file mode 100644 index 000000000..f966d96b9 --- /dev/null +++ b/src/compiler/GF/Speech/SISR.hs @@ -0,0 +1,77 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.SISR +-- +-- Abstract syntax and pretty printer for SISR, +-- (Semantic Interpretation for Speech Recognition) +---------------------------------------------------------------------- +module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR, + topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where + +import Data.List + +import GF.Data.Utilities +import GF.Infra.Ident +import GF.Infra.Option (SISRFormat(..)) +import GF.Speech.CFG +import GF.Speech.SRG (SRGNT) +import PGF.CId + +import qualified GF.JavaScript.AbsJS as JS +import qualified GF.JavaScript.PrintJS as JS + +type SISRTag = [JS.DeclOrExpr] + + +prSISR :: SISRTag -> String +prSISR = JS.printTree + +topCatSISR :: String -> SISRFormat -> SISRTag +topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c] + +profileInitSISR :: CFTerm -> SISRFormat -> SISRTag +profileInitSISR t fmt + | null (usedArgs t) = [] + | otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]] + +usedArgs :: CFTerm -> [Int] +usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts) +usedArgs (CFAbs _ x) = usedArgs x +usedArgs (CFApp x y) = usedArgs x `union` usedArgs y +usedArgs (CFRes i) = [i] +usedArgs _ = [] + +catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag +catSISR t (c,i) fmt + | i `elem` usedArgs t = map JS.DExpr + [JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c] + | otherwise = [] + +profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag +profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term] + where + f (CFObj n ts) = tree (showCId n) (map f ts) + f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)] + f (CFApp x y) = JS.ECall (f x) [f y] + f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) + f (CFVar v) = JS.EVar (var v) + f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (showCId typ))] + +fmtOut SISR_WD20030401 = JS.EVar (JS.Ident "$") +fmtOut SISR_1_0 = JS.EVar (JS.Ident "out") + +fmtRef SISR_WD20030401 c = JS.EVar (JS.Ident ("$" ++ c)) +fmtRef SISR_1_0 c = field (JS.EVar (JS.Ident "rules")) c + +args = JS.Ident "a" + +var v = JS.Ident ("x" ++ show v) + +field x y = JS.EMember x (JS.Ident y) + +ass = JS.EAssign + +tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)] + +obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps] + diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs new file mode 100644 index 000000000..84633149b --- /dev/null +++ b/src/compiler/GF/Speech/SLF.hs @@ -0,0 +1,178 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.SLF +-- +-- This module converts a CFG to an SLF finite-state network +-- for use with the ATK recognizer. The SLF format is described +-- in the HTK manual, and an example for use in ATK is shown +-- in the ATK manual. +-- +----------------------------------------------------------------------------- + +module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter, + slfSubPrinter,slfSubGraphvizPrinter) where + +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.FiniteState +import GF.Speech.CFG +import GF.Speech.CFGToFA +import GF.Speech.PGFToCFG +import qualified GF.Data.Graphviz as Dot +import PGF +import PGF.CId + +import Control.Monad +import qualified Control.Monad.State as STM +import Data.Char (toUpper) +import Data.List +import Data.Maybe + +data SLFs = SLFs [(String,SLF)] SLF + +data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] } + +data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String } + | SLFSubLat { nId :: Int, nLat :: String } + +-- | An SLF word is a word, or the empty string. +type SLFWord = Maybe String + +data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int } + +type SLF_FA = FA State (Maybe CFSymbol) () + +mkFAs :: PGF -> CId -> (SLF_FA, [(String,SLF_FA)]) +mkFAs pgf cnc = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs]) + where MFA start subs = {- renameSubs $ -} cfgToMFA $ pgfToCFG pgf cnc + main = let (fa,s,f) = newFA_ in newTransition s f (NonTerminal start) fa + +slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) () +slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing () + . moveLabelsToNodes . dfa2nfa + +-- | Give sequential names to subnetworks. +renameSubs :: MFA -> MFA +renameSubs (MFA start subs) = MFA (newName start) subs' + where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]] + newName s = lookup' s newNames + subs' = [(newName s,renameLabels n) | (s,n) <- subs] + renameLabels = mapTransitions (mapSymbol newName id) + +-- +-- * SLF graphviz printing (without sub-networks) +-- + +slfGraphvizPrinter :: PGF -> CId -> String +slfGraphvizPrinter pgf cnc + = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc + where + gvFA = mapStates (fromMaybe "") . mapTransitions (const "") + +-- +-- * SLF graphviz printing (with sub-networks) +-- + +slfSubGraphvizPrinter :: PGF -> CId -> String +slfSubGraphvizPrinter pgf cnc = Dot.prGraphviz g + where (main, subs) = mkFAs pgf cnc + g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..] + ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs + m = gvSLFFA Nothing main + +gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph +gvSLFFA n fa = + liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv) + . mapTransitions (const "")) (rename fa) + where mfaLabelToGv = symbol ("#"++) id + mkCluster Nothing = id + mkCluster (Just x) + = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x + rename fa = do + names <- STM.get + let fa' = renameStates names fa + names' = unusedNames fa' + STM.put names' + return fa' + +-- +-- * SLF printing (without sub-networks) +-- + +slfPrinter :: PGF -> CId -> String +slfPrinter pgf cnc + = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc + +-- +-- * SLF printing (with sub-networks) +-- + +-- | Make a network with subnetworks in SLF +slfSubPrinter :: PGF -> CId -> String +slfSubPrinter pgf cnc = prSLFs slfs + where + (main,subs) = mkFAs pgf cnc + slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main) + faToSLF = automatonToSLF mfaNodeToSLFNode + +automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF +automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es } + where ns = map (uncurry mkNode) (states fa) + es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa) + +mfaNodeToSLFNode :: Int -> Maybe CFSymbol -> SLFNode +mfaNodeToSLFNode i l = case l of + Nothing -> mkSLFNode i Nothing + Just (Terminal x) -> mkSLFNode i (Just x) + Just (NonTerminal s) -> mkSLFSubLat i s + +mkSLFNode :: Int -> Maybe String -> SLFNode +mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing } +mkSLFNode i (Just w) + | isNonWord w = SLFNode { nId = i, + nWord = Nothing, + nTag = Just w } + | otherwise = SLFNode { nId = i, + nWord = Just (map toUpper w), + nTag = Just w } + +mkSLFSubLat :: Int -> String -> SLFNode +mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub } + +mkSLFEdge :: Int -> (Int,Int) -> SLFEdge +mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t } + +prSLFs :: SLFs -> String +prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) "" + where prSub (n,s) = showString "SUBLAT=" . shows n + . nl . prOneSLF s . showString "." . nl + +prSLF :: SLF -> String +prSLF slf = prOneSLF slf "" + +prOneSLF :: SLF -> ShowS +prOneSLF (SLF { slfNodes = ns, slfEdges = es}) + = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl + where + header = prFields [("N",show (length ns)),("L", show (length es))] . nl + prNode (SLFNode { nId = i, nWord = w, nTag = t }) + = prFields $ [("I",show i),("W",showWord w)] + ++ maybe [] (\t -> [("s",t)]) t + prNode (SLFSubLat { nId = i, nLat = l }) + = prFields [("I",show i),("L",show l)] + prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))] + +-- | Check if a word should not correspond to a word in the SLF file. +isNonWord :: String -> Bool +isNonWord = any isPunct + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!()[]{}" + +showWord :: SLFWord -> String +showWord Nothing = "!NULL" +showWord (Just w) | null w = "!NULL" + | otherwise = w + +prFields :: [(String,String)] -> ShowS +prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ] diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs new file mode 100644 index 000000000..2270ec7a1 --- /dev/null +++ b/src/compiler/GF/Speech/SRG.hs @@ -0,0 +1,205 @@ +---------------------------------------------------------------------- +-- | +-- Module : SRG +-- +-- Representation of, conversion to, and utilities for +-- printing of a general Speech Recognition Grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +---------------------------------------------------------------------- +module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol + , SRGNT, CFTerm + , ebnfPrinter + , makeNonLeftRecursiveSRG + , makeNonRecursiveSRG + , getSpeechLanguage + , isExternalCat + , lookupFM_ + ) where + +import GF.Data.Operations +import GF.Data.Utilities +import GF.Infra.Ident +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.PGFToCFG +import GF.Data.Relation +import GF.Speech.FiniteState +import GF.Speech.RegExp +import GF.Speech.CFGToFA +import GF.Infra.Option +import PGF.CId +import PGF.Data +import PGF.Macros + +import Data.List +import Data.Maybe (fromMaybe, maybeToList) +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set + +import Debug.Trace + +data SRG = SRG { srgName :: String -- ^ grammar name + , srgStartCat :: Cat -- ^ start category name + , srgExternalCats :: Set Cat + , srgLanguage :: Maybe String -- ^ The language for which the grammar + -- is intended, e.g. en-UK + , srgRules :: [SRGRule] + } + deriving (Eq,Show) + +data SRGRule = SRGRule Cat [SRGAlt] + deriving (Eq,Show) + +-- | maybe a probability, a rule name and an EBNF right-hand side +data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem + deriving (Eq,Show) + +type SRGItem = RE SRGSymbol + +type SRGSymbol = Symbol SRGNT Token + +-- | An SRG non-terminal. Category name and its number in the profile. +type SRGNT = (Cat, Int) + +ebnfPrinter :: Options -> PGF -> CId -> String +ebnfPrinter opts pgf cnc = prSRG opts $ makeSRG opts pgf cnc + +-- | Create a compact filtered non-left-recursive SRG. +makeNonLeftRecursiveSRG :: Options -> PGF -> CId -> SRG +makeNonLeftRecursiveSRG opts = makeSRG opts' + where + opts' = setDefaultCFGTransform opts CFGNoLR True + +makeSRG :: Options -> PGF -> CId -> SRG +makeSRG opts = mkSRG cfgToSRG preprocess + where + cfgToSRG cfg = [cfRulesToSRGRule rs | (_,rs) <- allRulesGrouped cfg] + preprocess = maybeTransform opts CFGMergeIdentical mergeIdentical + . maybeTransform opts CFGNoLR removeLeftRecursion + . maybeTransform opts CFGRegular makeRegular + . maybeTransform opts CFGTopDownFilter topDownFilter + . maybeTransform opts CFGBottomUpFilter bottomUpFilter + . maybeTransform opts CFGRemoveCycles removeCycles + . maybeTransform opts CFGStartCatOnly purgeExternalCats + +setDefaultCFGTransform :: Options -> CFGTransform -> Bool -> Options +setDefaultCFGTransform opts t b = setCFGTransform t b `addOptions` opts + +maybeTransform :: Options -> CFGTransform -> (CFG -> CFG) -> (CFG -> CFG) +maybeTransform opts t f = if cfgTransform opts t then f else id + +traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g + +stats g = "Categories: " ++ show (countCats g) + ++ ", External categories: " ++ show (Set.size (cfgExternalCats g)) + ++ ", Rules: " ++ show (countRules g) + +makeNonRecursiveSRG :: Options + -> PGF + -> CId -- ^ Concrete syntax name. + -> SRG +makeNonRecursiveSRG opts = mkSRG cfgToSRG id + where + cfgToSRG cfg = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas] + where + MFA _ dfas = cfgToMFA cfg + dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re + dummyCFTerm = CFMeta (mkCId "dummy") + dummySRGNT = mapSymbol (\c -> (c,0)) id + +mkSRG :: (CFG -> [SRGRule]) -> (CFG -> CFG) -> PGF -> CId -> SRG +mkSRG mkRules preprocess pgf cnc = + SRG { srgName = showCId cnc, + srgStartCat = cfgStartCat cfg, + srgExternalCats = cfgExternalCats cfg, + srgLanguage = getSpeechLanguage pgf cnc, + srgRules = mkRules cfg } + where cfg = renameCats (showCId cnc) $ preprocess $ pgfToCFG pgf cnc + +-- | Renames all external cats C to C_cat, and all internal cats C_X (where X is any string), +-- to C_N where N is an integer. +renameCats :: String -> CFG -> CFG +renameCats prefix cfg = mapCFGCats renameCat cfg + where renameCat c | isExternal c = c ++ "_cat" + | otherwise = Map.findWithDefault (badCat c) c names + isExternal c = c `Set.member` cfgExternalCats cfg + catsByPrefix = buildMultiMap [(takeWhile (/='_') cat, cat) | cat <- allCats' cfg, not (isExternal cat)] + names = Map.fromList [(c,pref++"_"++show i) | (pref,cs) <- catsByPrefix, (c,i) <- zip cs [1..]] + badCat c = error ("GF.Speech.SRG.renameCats: " ++ c ++ "\n" ++ prCFG cfg) + +getSpeechLanguage :: PGF -> CId -> Maybe String +getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language") + +cfRulesToSRGRule :: [CFRule] -> SRGRule +cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs + where + alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs] + rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ] + + mkSRGSymbols _ [] = [] + mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss + mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss + +srgLHSCat :: SRGRule -> Cat +srgLHSCat (SRGRule c _) = c + +isExternalCat :: SRG -> Cat -> Bool +isExternalCat srg c = c `Set.member` srgExternalCats srg + +-- +-- * Size-optimized EBNF SRGs +-- + +srgItem :: [[SRGSymbol]] -> SRGItem +srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) +-- non-optimizing version: +--srgItem = unionRE . map seqRE + +-- | Merges a list of right-hand sides which all have the same +-- sequence of non-terminals. +mergeItems :: [[SRGSymbol]] -> SRGItem +mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens + +groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]] +groupTokens [] = [] +groupTokens (Terminal t:ss) = case groupTokens ss of + Terminal ts:ss' -> Terminal (t:ts):ss' + ss' -> Terminal [t]:ss' +groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss + +ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol +ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal))) + +-- +-- * Utilities for building and printing SRGs +-- + +prSRG :: Options -> SRG -> String +prSRG opts srg = prProductions $ map prRule $ ext ++ int + where + sisr = flag optSISR opts + (ext,int) = partition (isExternalCat srg . srgLHSCat) (srgRules srg) + prRule (SRGRule c alts) = (c,unwords (intersperse "|" (concatMap prAlt alts))) + prAlt (SRGAlt _ t rhs) = + -- FIXME: hack: we high-jack the --sisr flag to add + -- a simple lambda calculus format for semantic interpretation + -- Maybe the --sisr flag should be renamed. + case sisr of + Just _ -> + -- copy tags to each part of a top-level union, + -- to get simpler output + case rhs of + REUnion xs -> map prOneAlt xs + _ -> [prOneAlt rhs] + where prOneAlt a = prRE prSym a ++ " { " ++ prCFTerm t ++ " }" + Nothing -> [prRE prSym rhs] + prSym = symbol fst (\t -> "\""++ t ++"\"") + +lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt +lookupFM_ fm k = Map.findWithDefault err k fm + where err = error $ "Key not found: " ++ show k + ++ "\namong " ++ show (Map.keys fm) diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs new file mode 100644 index 000000000..2df1316a8 --- /dev/null +++ b/src/compiler/GF/Speech/SRGS_ABNF.hs @@ -0,0 +1,127 @@ +---------------------------------------------------------------------- +-- | +-- Module : PrJSRGS_ABNF +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/01 20:09:04 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.16 $ +-- +-- This module prints a CFG as a JSGF grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +-- +-- FIXME: convert to UTF-8 +----------------------------------------------------------------------------- + +module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where + +import GF.Data.Utilities +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.SISR as SISR +import GF.Speech.SRG +import GF.Speech.RegExp +import PGF (PGF, CId) + +import Data.Char +import Data.List +import Data.Maybe +import Text.PrettyPrint.HughesPJ +import Debug.Trace + +width :: Int +width = 75 + +srgsAbnfPrinter :: Options + -> PGF -> CId -> String +srgsAbnfPrinter opts pgf cnc = showDoc $ prABNF sisr $ makeNonLeftRecursiveSRG opts pgf cnc + where sisr = flag optSISR opts + +srgsAbnfNonRecursivePrinter :: Options -> PGF -> CId -> String +srgsAbnfNonRecursivePrinter opts pgf cnc = showDoc $ prABNF Nothing $ makeNonRecursiveSRG opts pgf cnc + +showDoc = renderStyle (style { lineLength = width }) + +prABNF :: Maybe SISRFormat -> SRG -> Doc +prABNF sisr srg + = header $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text "#ABNF 1.0 UTF-8;" $$ + meta "description" ("Speech recognition grammar for " ++ srgName srg) $$ + meta "generator" "Grammatical Framework" $$ + language $$ tagFormat $$ mainCat + language = maybe empty (\l -> text "language" <+> text l <> char ';') (srgLanguage srg) + tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';' + | otherwise = empty + mainCat = text "root" <+> prCat (srgStartCat srg) <> char ';' + prRule (SRGRule cat alts) = rule (isExternalCat srg cat) cat (map prAlt alts) + prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] + where initTag = tag sisr (profileInitSISR n) + finalTag = tag sisr (profileFinalSISR n) + p = if isEmpty initTag && isEmpty finalTag then id else parens + +prCat :: Cat -> Doc +prCat c = char '$' <> text c + +prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc +prItem sisr t = f 0 + where + f _ (REUnion []) = text "$VOID" + f p (REUnion xs) + | not (null es) = brackets (f 0 (REUnion nes)) + | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) + where (es,nes) = partition isEpsilon xs + f _ (REConcat []) = text "$NULL" + f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) + f p (RERepeat x) = f 3 x <> text "<0->" + f _ (RESymbol s) = prSymbol sisr t s + + +prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc +prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) +prSymbol _ cn (Terminal t) + | all isPunct t = empty -- removes punctuation + | otherwise = text t -- FIXME: quote if there is whitespace or odd chars + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc +tag Nothing _ = empty +tag (Just fmt) t = + case t fmt of + [] -> empty + -- grr, silly SRGS ABNF does not have an escaping mechanism + ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}" + | otherwise -> text "{" <+> text x <+> text "}" + where x = prSISR ts + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +comment :: String -> Doc +comment s = text "//" <+> text s + +alts :: [Doc] -> Doc +alts = fsep . prepunctuate (text "| ") + +rule :: Bool -> Cat -> [Doc] -> Doc +rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' + where p = if pub then text "public" else empty + +meta :: String -> String -> Doc +meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';' + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +prepunctuate :: Doc -> [Doc] -> [Doc] +prepunctuate _ [] = [] +prepunctuate p (x:xs) = x : map (p <>) xs + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y + diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs new file mode 100644 index 000000000..1f94de66d --- /dev/null +++ b/src/compiler/GF/Speech/SRGS_XML.hs @@ -0,0 +1,105 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.SRGS_XML +-- +-- Prints an SRGS XML speech recognition grammars. +---------------------------------------------------------------------- +module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where + +import GF.Data.Utilities +import GF.Data.XML +import GF.Infra.Option +import GF.Speech.CFG +import GF.Speech.RegExp +import GF.Speech.SISR as SISR +import GF.Speech.SRG +import PGF (PGF, CId) + +import Control.Monad +import Data.Char (toUpper,toLower) +import Data.List +import Data.Maybe +import qualified Data.Map as Map + +srgsXmlPrinter :: Options + -> PGF -> CId -> String +srgsXmlPrinter opts pgf cnc = prSrgsXml sisr $ makeNonLeftRecursiveSRG opts pgf cnc + where sisr = flag optSISR opts + +srgsXmlNonRecursivePrinter :: Options -> PGF -> CId -> String +srgsXmlNonRecursivePrinter opts pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG opts pgf cnc + + +prSrgsXml :: Maybe SISRFormat -> SRG -> String +prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr) + where + xmlGr = grammar sisr (srgStartCat srg) (srgLanguage srg) $ + [meta "description" + ("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."), + meta "generator" "Grammatical Framework"] + ++ map ruleToXML (srgRules srg) + ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts) + where pub = if isExternalCat srg cat then [("scope","public")] else [] + prRhs rhss = [oneOf (map (mkProd sisr) rhss)] + +mkProd :: Maybe SISRFormat -> SRGAlt -> XML +mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf) + where x = mkItem sisr n rhs + ti = tag sisr (profileInitSISR n) + tf = tag sisr (profileFinalSISR n) + +mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML +mkItem sisr cn = f + where + f (REUnion []) = ETag "ruleref" [("special","VOID")] + f (REUnion xs) + | not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)] + | otherwise = oneOf (map f xs) + where (es,nes) = partition isEpsilon xs + f (REConcat []) = ETag "ruleref" [("special","NULL")] + f (REConcat xs) = Tag "item" [] (map f xs) + f (RERepeat x) = Tag "item" [("repeat","0-")] [f x] + f (RESymbol s) = symItem sisr cn s + +symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML +symItem sisr cn (NonTerminal n@(c,_)) = + Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n) +symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)] + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML] +tag Nothing _ = [] +tag (Just fmt) t = case t fmt of + [] -> [] + ts -> [Tag "tag" [] [Data (prSISR ts)]] + +showToken :: Token -> String +showToken t = t + +oneOf :: [XML] -> XML +oneOf = Tag "one-of" [] + +grammar :: Maybe SISRFormat + -> String -- ^ root + -> Maybe String -- ^language + -> [XML] -> XML +grammar sisr root ml = + Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"), + ("version","1.0"), + ("mode","voice"), + ("root",root)] + ++ (if isJust sisr then [("tag-format","semantics/1.0")] else []) + ++ maybe [] (\l -> [("xml:lang", l)]) ml + +meta :: String -> String -> XML +meta n c = ETag "meta" [("name",n),("content",c)] + +optimizeSRGS :: XML -> XML +optimizeSRGS = bottomUpXML f + where f (Tag "item" [] [x@(Tag "item" _ _)]) = x + f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x + f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs + f (Tag "item" as xs) = Tag "item" as (map g xs) + where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x + g x = x + f (Tag "one-of" [] [x]) = x + f x = x diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs new file mode 100644 index 000000000..134964062 --- /dev/null +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -0,0 +1,243 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.VoiceXML +-- +-- Creates VoiceXML dialogue systems from PGF grammars. +----------------------------------------------------------------------------- +module GF.Speech.VoiceXML (grammar2vxml) where + +import GF.Data.Operations +import GF.Data.Str (sstrV) +import GF.Data.Utilities +import GF.Data.XML +import GF.Infra.Ident +import GF.Infra.Modules +import GF.Speech.SRG (getSpeechLanguage) +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.Linearize (realize) + +import Control.Monad (liftM) +import Data.List (isPrefixOf, find, intersperse) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) + +import Debug.Trace + +-- | the main function +grammar2vxml :: PGF -> CId -> String +grammar2vxml pgf cnc = showsXMLDoc (skel2vxml name language start skel qs) "" + where skel = pgfSkeleton pgf + name = showCId cnc + qs = catQuestions pgf cnc (map fst skel) + language = getSpeechLanguage pgf cnc + start = lookStartCat pgf + +-- +-- * VSkeleton: a simple description of the abstract syntax. +-- + +type Skeleton = [(CId, [(CId, [CId])])] + +pgfSkeleton :: PGF -> Skeleton +pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType pgf f))) | f <- fs]) + | (c,fs) <- Map.toList (catfuns (abstract pgf)), + not (isLiteralCat c)] + +-- +-- * Questions to ask +-- + +type CatQuestions = [(CId,String)] + +catQuestions :: PGF -> CId -> [CId] -> CatQuestions +catQuestions pgf cnc cats = [(c,catQuestion pgf cnc c) | c <- cats] + +catQuestion :: PGF -> CId -> CId -> String +catQuestion pgf cnc cat = realize (lookPrintName pgf cnc cat) + + +{- +lin :: StateGrammar -> String -> Err String +lin gr fun = do + tree <- string2treeErr gr fun + let ls = map unt $ linTree2strings noMark g c tree + case ls of + [] -> fail $ "No linearization of " ++ fun + l:_ -> return l + where c = cncId gr + g = stateGrammarST gr + unt = formatAsText +-} + +getCatQuestion :: CId -> CatQuestions -> String +getCatQuestion c qs = + fromMaybe (error "No question for category " ++ showCId c) (lookup c qs) + +-- +-- * Generate VoiceXML +-- + +skel2vxml :: String -> Maybe String -> CId -> Skeleton -> CatQuestions -> XML +skel2vxml name language start skel qs = + vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel) + where + gr = grammarURI name + startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)] + [param "old" "{ name : '?' }"]] + +grammarURI :: String -> String +grammarURI name = name ++ ".grxml" + + +catForms :: String -> CatQuestions -> CId -> [(CId, [CId])] -> [XML] +catForms gr qs cat fs = + comments [showCId cat ++ " category."] + ++ [cat2form gr qs cat fs] + +cat2form :: String -> CatQuestions -> CId -> [(CId, [CId])] -> XML +cat2form gr qs cat fs = + form (catFormId cat) $ + [var "old" Nothing, + blockCond "old.name != '?'" [assign "term" "old"], + field "term" [] + [promptString (getCatQuestion cat qs), + vxmlGrammar (gr++"#"++catFormId cat) + ] + ] + ++ concatMap (uncurry (fun2sub gr cat)) fs + ++ [block [return_ ["term"]{-]-}]] + +fun2sub :: String -> CId -> CId -> [CId] -> [XML] +fun2sub gr cat fun args = + comments [showCId fun ++ " : (" + ++ concat (intersperse ", " (map showCId args)) + ++ ") " ++ showCId cat] ++ ss + where + ss = zipWith mkSub [0..] args + mkSub n t = subdialog s [("src","#"++catFormId t), + ("cond","term.name == "++string (showCId fun))] + [param "old" v, + filled [] [assign v (s++".term")]] + where s = showCId fun ++ "_" ++ show n + v = "term.args["++show n++"]" + +catFormId :: CId -> String +catFormId c = showCId c ++ "_cat" + + +-- +-- * VoiceXML stuff +-- + +vxml :: Maybe String -> [XML] -> XML +vxml ml = Tag "vxml" $ [("version","2.0"), + ("xmlns","http://www.w3.org/2001/vxml")] + ++ maybe [] (\l -> [("xml:lang", l)]) ml + +form :: String -> [XML] -> XML +form id xs = Tag "form" [("id", id)] xs + +field :: String -> [(String,String)] -> [XML] -> XML +field name attrs = Tag "field" ([("name",name)]++attrs) + +subdialog :: String -> [(String,String)] -> [XML] -> XML +subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs) + +filled :: [(String,String)] -> [XML] -> XML +filled = Tag "filled" + +vxmlGrammar :: String -> XML +vxmlGrammar uri = ETag "grammar" [("src",uri)] + +prompt :: [XML] -> XML +prompt = Tag "prompt" [] + +promptString :: String -> XML +promptString p = prompt [Data p] + +reprompt :: XML +reprompt = ETag "reprompt" [] + +assign :: String -> String -> XML +assign n e = ETag "assign" [("name",n),("expr",e)] + +value :: String -> XML +value expr = ETag "value" [("expr",expr)] + +if_ :: String -> [XML] -> XML +if_ c b = if_else c b [] + +if_else :: String -> [XML] -> [XML] -> XML +if_else c t f = cond [(c,t)] f + +cond :: [(String,[XML])] -> [XML] -> XML +cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es) + where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest] + ++ if null els then [] else (Tag "else" [] []:els) + +goto_item :: String -> XML +goto_item nextitem = ETag "goto" [("nextitem",nextitem)] + +return_ :: [String] -> XML +return_ names = ETag "return" [("namelist", unwords names)] + +block :: [XML] -> XML +block = Tag "block" [] + +blockCond :: String -> [XML] -> XML +blockCond cond = Tag "block" [("cond", cond)] + +throw :: String -> String -> XML +throw event msg = Tag "throw" [("event",event),("message",msg)] [] + +nomatch :: [XML] -> XML +nomatch = Tag "nomatch" [] + +help :: [XML] -> XML +help = Tag "help" [] + +param :: String -> String -> XML +param name expr = ETag "param" [("name",name),("expr",expr)] + +var :: String -> Maybe String -> XML +var name expr = ETag "var" ([("name",name)]++e) + where e = maybe [] ((:[]) . (,) "expr") expr + +script :: String -> XML +script s = Tag "script" [] [CData s] + +scriptURI :: String -> XML +scriptURI uri = Tag "script" [("uri", uri)] [] + +-- +-- * ECMAScript stuff +-- + +string :: String -> String +string s = "'" ++ concatMap esc s ++ "'" + where esc '\'' = "\\'" + esc c = [c] + +{- +-- +-- * List stuff +-- + +isListCat :: (CId, [(CId, [CId])]) -> Bool +isListCat (cat,rules) = "List" `isPrefixOf` showIdent cat && length rules == 2 + && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs + where c = drop 4 (showIdent cat) + fs = map (showIdent . fst) rules + +isBaseFun :: CId -> Bool +isBaseFun f = "Base" `isPrefixOf` showIdent f + +isConsFun :: CId -> Bool +isConsFun f = "Cons" `isPrefixOf` showIdent f + +baseSize :: (CId, [(CId, [CId])]) -> Int +baseSize (_,rules) = length bs + where Just (_,bs) = find (isBaseFun . fst) rules +-} |
