summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2006-02-01 16:23:14 +0000
committerbringert <bringert@cs.chalmers.se>2006-02-01 16:23:14 +0000
commitfd0dfd7d4d9885b8b4cac7bd60308e2890a05caa (patch)
tree73613f4cc00fac0d683220bcc4258bdb99179914 /src
parent992e212bccec9f30ee7b4f5e764b0bd793ccc651 (diff)
First version of SRGS with semantic tags.
Diffstat (limited to 'src')
-rw-r--r--src/GF/Data/Utilities.hs7
-rw-r--r--src/GF/Shell/HelpFile.hs8
-rw-r--r--src/GF/Speech/PrGSL.hs4
-rw-r--r--src/GF/Speech/PrJSGF.hs4
-rw-r--r--src/GF/Speech/PrSRGS.hs80
-rw-r--r--src/GF/Speech/SRG.hs45
-rw-r--r--src/GF/Speech/TransformCFG.hs14
-rw-r--r--src/GF/UseGrammar/Custom.hs7
-rw-r--r--src/HelpFile8
9 files changed, 125 insertions, 52 deletions
diff --git a/src/GF/Data/Utilities.hs b/src/GF/Data/Utilities.hs
index aaadad1fe..c7e1600c3 100644
--- a/src/GF/Data/Utilities.hs
+++ b/src/GF/Data/Utilities.hs
@@ -73,7 +73,12 @@ sortNub = map head . group . sort
-- | Like 'nubBy', but more efficient as it uses sorting internally.
sortNubBy :: (a -> a -> Ordering) -> [a] -> [a]
-sortNubBy f = map head . groupBy (compareEq f) . sortBy f
+sortNubBy f = map head . sortGroupBy f
+
+-- | Sorts and then groups elements given and ordering of the
+-- elements.
+sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]]
+sortGroupBy f = groupBy (compareEq f) . sortBy f
-- | Take the union of a list of lists.
unionAll :: Eq a => [[a]] -> [a]
diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs
index ab5321297..482967fb2 100644
--- a/src/GF/Shell/HelpFile.hs
+++ b/src/GF/Shell/HelpFile.hs
@@ -598,10 +598,14 @@ txtHelpFile =
"\n -printer=jsgf Java Speech Grammar Format" ++
"\n -printer=srgs_xml SRGS XML format" ++
"\n -printer=srgs_xml_prob SRGS XML format, with weights" ++
+ "\n -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the" ++
+ "\n Microsoft Speech API." ++
"\n -printer=slf a finite automaton in the HTK SLF format" ++
"\n -printer=slf_graphviz the same automaton as slf, but in Graphviz format" ++
- "\n -printer=slf_sub a finite automaton with sub-automata in the HTK SLF format" ++
- "\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format" ++
+ "\n -printer=slf_sub a finite automaton with sub-automata in the " ++
+ "\n HTK SLF format" ++
+ "\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in " ++
+ "\n Graphviz format" ++
"\n -printer=fa_graphviz a finite automaton with labelled edges" ++
"\n -printer=regular a regular grammar in a simple BNF" ++
"\n -printer=unpar a gfc grammar with parameters eliminated" ++
diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs
index 34e2620bb..b204ae6c3 100644
--- a/src/GF/Speech/PrGSL.hs
+++ b/src/GF/Speech/PrGSL.hs
@@ -33,7 +33,7 @@ import Data.Char (toUpper,toLower)
gslPrinter :: Ident -- ^ Grammar name
-> Options -> Maybe Probs -> CGrammar -> String
gslPrinter name opts probs cfg = prGSL srg ""
- where srg = makeSRG name opts probs cfg
+ where srg = makeSimpleSRG name opts probs cfg
prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -48,7 +48,7 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
showString "; " . prtS origCat . nl
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
-- FIXME: use the probability
- prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
+ prAlt (SRGAlt mp _ rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\""
diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs
index 0eab36828..56f5eda1b 100644
--- a/src/GF/Speech/PrJSGF.hs
+++ b/src/GF/Speech/PrJSGF.hs
@@ -32,7 +32,7 @@ import GF.Speech.SRG
jsgfPrinter :: Ident -- ^ Grammar name
-> Options -> Maybe Probs -> CGrammar -> String
jsgfPrinter name opts probs cfg = prJSGF srg ""
- where srg = makeSRG name opts probs cfg
+ where srg = makeSimpleSRG name opts probs cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -49,7 +49,7 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
comments [origCat] . nl
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
-- FIXME: use the probability
- prAlt (SRGAlt mp rhs)
+ prAlt (SRGAlt mp _ rhs)
| null rhs' = showString "<NULL>"
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs
index 63775c852..81d5fd236 100644
--- a/src/GF/Speech/PrSRGS.hs
+++ b/src/GF/Speech/PrSRGS.hs
@@ -22,7 +22,7 @@ import GF.Speech.SRG
import GF.Infra.Ident
import GF.Formalism.CFG
-import GF.Formalism.Utilities (Symbol(..))
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
@@ -37,43 +37,74 @@ type Attr = (String,String)
srgsXmlPrinter :: Ident -- ^ Grammar name
-> Options
+ -> Bool -- ^ Whether to include semantic interpretation
-> Maybe Probs
-> CGrammar -> String
-srgsXmlPrinter name opts probs cfg = prSrgsXml srg ""
+srgsXmlPrinter name opts sisr probs cfg = prSrgsXml sisr srg ""
where srg = makeSRG name opts probs cfg
-prSrgsXml :: SRG -> ShowS
-prSrgsXml (SRG{grammarName=name,startCat=start,
+prSrgsXml :: Bool -> SRG -> ShowS
+prSrgsXml sisr (SRG{grammarName=name,startCat=start,
origStartCat=origStart,grammarLanguage=l,rules=rs})
= header . showsXML xmlGr
where
header = showString "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
root = prCat start
- xmlGr = grammar root l (comments
- ["SRGS XML speech recognition grammar for " ++ name,
- "Generated by GF",
- "Original start category: " ++ origStart]
+ xmlGr = grammar root l ([meta "description"
+ ("SRGS XML speech recognition grammar for " ++ name
+ ++ ". " ++ "Original start category: " ++ origStart),
+ meta "generator" "GF"]
++ map ruleToXML rs)
ruleToXML (SRGRule cat origCat alts) =
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts])
prRhs rhss = oneOf (map prAlt rhss)
- prAlt (SRGAlt p rhs) = item p (map prSymbol rhs)
- prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] []
- prSymbol (Tok t) = item Nothing [Data (showToken t)]
- prCat c = c -- FIXME: escape something?
- showToken t = t -- FIXME: escape something?
-
-rule :: String -- ^ id
- -> [XML] -> XML
+ prAlt (SRGAlt p n@(Name _ pr) rhs)
+ | sisr = prodItem (Just n) p (map (uncurry (symItem pr)) (numberCats 0 rhs))
+ | otherwise = prodItem Nothing p (map (\s -> symItem [] s 0) rhs)
+ numberCats _ [] = []
+ numberCats n (s@(Cat _):ss) = (s,n):numberCats (n+1) ss
+ numberCats n (s:ss) = (s,n):numberCats n ss
+
+rule :: String -> [XML] -> XML
rule i = Tag "rule" [("id",i)]
-item :: Maybe Double -> [XML] -> XML
--- FIXME: what is the weight called?
-item mp xs = Tag "item" as cs
- where as = maybe [] (\p -> [("weight", show p)]) mp
- cs = case xs of
- [Tag "item" [] xs'] -> xs'
- _ -> xs
+prodItem :: Maybe Name -> Maybe Double -> [XML] -> XML
+prodItem n mp xs = Tag "item" w (t++cs)
+ where
+ w = maybe [] (\p -> [("weight", show p)]) mp
+ t = maybe [] prodTag n
+ cs = case xs of
+ [Tag "item" [] xs'] -> xs'
+ _ -> xs
+
+prodTag :: Name -> [XML]
+prodTag (Name f prs) = [Tag "tag" [] [Data (join "; " ts)]]
+ where
+ ts = ["$.name=" ++ showFun f] ++
+ ["$.arg" ++ show n ++ "=" ++ argInit (prs!!n)
+ | n <- [0..length prs-1]]
+ argInit (Unify _) = metavar
+ argInit (Constant f) = maybe metavar showFun (forestName f)
+ showFun = show . prIdent
+ metavar = show "?"
+
+symItem :: [Profile a] -> Symbol String Token -> Int -> XML
+symItem prs (Cat c) x = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t)
+ where
+ t = if null ts then [] else [Tag "tag" [] [Data (join "; " ts)]]
+ ts = ["$.arg" ++ show n ++ "=$$"
+ | n <- [0..length prs-1], inProfile x (prs!!n)]
+symItem _ (Tok t) _ = Tag "item" [] [Data (showToken t)]
+
+inProfile :: Int -> Profile a -> Bool
+inProfile x (Unify xs) = x `elem` xs
+inProfile _ (Constant _) = False
+
+prCat :: String -> String
+prCat c = c -- FIXME: escape something?
+
+showToken :: Token -> String
+showToken t = t -- FIXME: escape something?
oneOf :: [XML] -> XML
oneOf [x] = x
@@ -88,6 +119,9 @@ grammar root l = Tag "grammar" [("xml:lang", l),
("mode","voice"),
("root",root)]
+meta :: String -> String -> XML
+meta n c = Tag "meta" [("name",n),("content",c)] []
+
comments :: [String] -> [XML]
comments = map Comment
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index 8bc4f68e1..ce4d89da0 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -18,13 +18,16 @@
-- FIXME: figure out name prefix from grammar name
-----------------------------------------------------------------------------
-module GF.Speech.SRG where
+module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
+ makeSimpleSRG, makeSRG
+ , lookupFM_, prtS) where
import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Formalism.CFG
-import GF.Formalism.Utilities (Symbol(..))
+import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
+ , Profile, SyntaxForest)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
@@ -48,8 +51,8 @@ data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original c
-- and productions
deriving (Eq,Show)
--- | maybe a probability, and a list of symbols
-data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token]
+-- | maybe a probability, a rule name and a list of symbols
+data SRGAlt = SRGAlt (Maybe Double) Name [Symbol String Token]
deriving (Eq,Show)
-- | SRG category name and original name
@@ -57,12 +60,34 @@ type CatName = (String,String)
type CatNames = FiniteMap String String
+-- | Create a non-left-recursive SRG.
+-- FIXME: the probabilities, names and profiles in the returned
+-- grammar may be meaningless.
+makeSimpleSRG :: Ident -- ^ Grammar name
+ -> Options -- ^ Grammar options
+ -> Maybe Probs -- ^ Probabilities
+ -> CGrammar -- ^ A context-free grammar
+ -> SRG
+makeSimpleSRG
+ = makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats)
+
+-- | Create a SRG preserving the names, profiles and probabilities of the
+-- input grammar. The returned grammar may be left-recursive.
makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
-makeSRG i opts probs gr
+makeSRG = makeSRG_ removeEmptyCats
+
+makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the
+ -- CFG before converting to SRG
+ -> Ident -- ^ Grammar name
+ -> Options -- ^ Grammar options
+ -> Maybe Probs -- ^ Probabilities
+ -> CGrammar -- ^ A context-free grammar
+ -> SRG
+makeSRG_ f i opts probs gr
= SRG { grammarName = name,
startCat = lookupFM_ names origStart,
origStartCat = origStart,
@@ -72,21 +97,19 @@ makeSRG i opts probs gr
name = prIdent i
origStart = getStartCat opts
l = fromMaybe "en_UK" (getOptVal opts speechLanguage)
- gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr
+ gr' = f (cfgToCFRules gr)
(cats,cfgRules) = unzip gr'
names = mkCatNames name cats
rs = map (cfgRulesToSRGRule names probs) cfgRules
-
--- FIXME: probabilities get larger than 1.0 when new rules are
--- introduced
--- FIXME: merge alternatives with same rhs but different probabilities
+-- FIXME: merge alternatives with same rhs and profile but different probabilities
cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
where origCat = lhsCat r
cat = lookupFM_ names origCat
rhs = nub $ map ruleToAlt rs
- ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r))
+ ruleToAlt r@(CFRule c ss n)
+ = SRGAlt (ruleProb probs r) n (map renameCat ss)
renameCat (Cat c) = Cat (lookupFM_ names c)
renameCat t = t
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index 08aae8897..38148418c 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -37,6 +37,7 @@ import Control.Monad
import Data.FiniteMap
import Data.List
import Data.Maybe (fromMaybe)
+import Data.Monoid (mconcat)
import Data.Set (Set)
import qualified Data.Set as Set
@@ -77,14 +78,13 @@ removeEmptyCats = fix removeEmptyCats'
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
--- | Remove rules which are identical, not caring about the rule names.
--- FIXME: this messes up probabilities
+-- | Remove rules which have the same rhs.
+-- FIXME: this messes up probabilities, names and profiles
removeIdenticalRules :: CFRules -> CFRules
-removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g]
- where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
- case c1 `compare` c2 of
- EQ -> ss1 `compare` ss2
- o -> o
+removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g]
+ where
+ cmpRules (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
+ mconcat [c1 `compare` c2, ss1 `compare` ss2]
removeLeftRecursion :: CFRules -> CFRules
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 9a6cd0e21..0d6a143ef 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -249,11 +249,14 @@ customGrammarPrinter =
in jsgfPrinter name opts Nothing $ stateCFG s)
,(strCI "srgs_xml", \s -> let opts = stateOptions s
name = cncId s
- in srgsXmlPrinter name opts Nothing $ stateCFG s)
+ in srgsXmlPrinter name opts False Nothing $ stateCFG s)
,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s
name = cncId s
probs = stateProbs s
- in srgsXmlPrinter name opts (Just probs) $ stateCFG s)
+ in srgsXmlPrinter name opts False (Just probs) $ stateCFG s)
+ ,(strCI "srgs_xml_ms_sem", \s -> let opts = stateOptions s
+ name = cncId s
+ in srgsXmlPrinter name opts True Nothing $ stateCFG s)
,(strCI "slf", \s -> let opts = stateOptions s
start = getStartCat opts
name = cncId s
diff --git a/src/HelpFile b/src/HelpFile
index 88b5eba7e..a67f79412 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -569,10 +569,14 @@ q, quit: q
-printer=jsgf Java Speech Grammar Format
-printer=srgs_xml SRGS XML format
-printer=srgs_xml_prob SRGS XML format, with weights
+ -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the
+ Microsoft Speech API.
-printer=slf a finite automaton in the HTK SLF format
-printer=slf_graphviz the same automaton as slf, but in Graphviz format
- -printer=slf_sub a finite automaton with sub-automata in the HTK SLF format
- -printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format
+ -printer=slf_sub a finite automaton with sub-automata in the
+ HTK SLF format
+ -printer=slf_sub_graphviz the same automaton as slf_sub, but in
+ Graphviz format
-printer=fa_graphviz a finite automaton with labelled edges
-printer=regular a regular grammar in a simple BNF
-printer=unpar a gfc grammar with parameters eliminated