summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbringert <unknown>2005-11-01 19:09:04 +0000
committerbringert <unknown>2005-11-01 19:09:04 +0000
commit928c84b36cf68b9d590ad2d8cba9e69e3cd3d2d2 (patch)
tree595a92162abc9faceb92531d26b5b171cd6a162d
parenta6cd4c131b3b9d98c8581be9659e14155ed64bdc (diff)
Documented SRGS XML generation. Started working on support for probabilities in SRG generation. Added support for probabilities in for SRGS.
-rw-r--r--src/GF/Data/Operations.hs15
-rw-r--r--src/GF/Shell/HelpFile.hs7
-rw-r--r--src/GF/Speech/PrGSL.hs16
-rw-r--r--src/GF/Speech/PrJSGF.hs19
-rw-r--r--src/GF/Speech/PrSRGS.hs34
-rw-r--r--src/GF/Speech/SRG.hs43
-rw-r--r--src/GF/Speech/TransformCFG.hs16
-rw-r--r--src/GF/UseGrammar/Custom.hs16
-rw-r--r--src/HelpFile1
9 files changed, 108 insertions, 59 deletions
diff --git a/src/GF/Data/Operations.hs b/src/GF/Data/Operations.hs
index c297bc55a..ce8f90a03 100644
--- a/src/GF/Data/Operations.hs
+++ b/src/GF/Data/Operations.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/05/30 18:39:44 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.20 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.21 $
--
-- some auxiliary GF operations. AR 19\/6\/1998 -- 6\/2\/2001
--
@@ -100,6 +100,7 @@ data Err a = Ok a | Bad String
instance Monad Err where
return = Ok
+ fail = Bad
Ok a >>= f = f a
Bad s >>= f = Bad s
@@ -302,19 +303,19 @@ isInBinTree :: (Ord a) => a -> BinTree a b -> Bool
isInBinTree x = err (const False) (const True) . justLookupTree x
-- isInBinTree = elemFM
-justLookupTree :: (Ord a) => a -> BinTree a b -> Err b
+justLookupTree :: (Monad m,Ord a) => a -> BinTree a b -> m b
justLookupTree = lookupTree (const [])
-lookupTree :: (Ord a) => (a -> String) -> a -> BinTree a b -> Err b
+lookupTree :: (Monad m,Ord a) => (a -> String) -> a -> BinTree a b -> m b
lookupTree pr x tree = case tree of
- NT -> Bad ("no occurrence of element" +++ pr x)
+ NT -> fail ("no occurrence of element" +++ pr x)
BT (a,b) left right
| x < a -> lookupTree pr x left
| x > a -> lookupTree pr x right
| x == a -> return b
--lookupTree pr x tree = case lookupFM tree x of
-- Just y -> return y
--- _ -> Bad ("no occurrence of element" +++ pr x)
+-- _ -> fail ("no occurrence of element" +++ pr x)
lookupTreeMany :: Ord a => (a -> String) -> [BinTree a b] -> a -> Err b
lookupTreeMany pr (t:ts) x = case lookupTree pr x t of
diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs
index e2216ce64..7daac5816 100644
--- a/src/GF/Shell/HelpFile.hs
+++ b/src/GF/Shell/HelpFile.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 19:02:35 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.18 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.19 $
--
-- Help on shell commands. Generated from HelpFile by 'make help'.
-- PLEASE DON'T EDIT THIS FILE.
@@ -575,6 +575,7 @@ txtHelpFile =
"\n -printer=probs show probabilities of all functions" ++
"\n -printer=gsl Nuance GSL speech recognition grammar" ++
"\n -printer=jsgf Java Speech Grammar Format" ++
+ "\n -printer=srgs_xml SRGS XML format" ++
"\n -printer=slf a finite automaton in the HTK SLF format" ++
"\n -printer=slf_graphviz the same automaton as in SLF, but in Graphviz format" ++
"\n -printer=fa_graphviz a finite automaton with labelled edges" ++
diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs
index 4f245a328..34e2620bb 100644
--- a/src/GF/Speech/PrGSL.hs
+++ b/src/GF/Speech/PrGSL.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/14 15:17:29 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.21 $
+-- > CVS $Revision: 1.22 $
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
@@ -26,13 +26,14 @@ import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
import Data.Char (toUpper,toLower)
gslPrinter :: Ident -- ^ Grammar name
- -> Options -> CGrammar -> String
-gslPrinter name opts cfg = prGSL srg ""
- where srg = makeSRG name opts cfg
+ -> Options -> Maybe Probs -> CGrammar -> String
+gslPrinter name opts probs cfg = prGSL srg ""
+ where srg = makeSRG name opts probs cfg
prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -46,8 +47,9 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
prRule (SRGRule cat origCat rhs) =
showString "; " . prtS origCat . nl
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
- prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
- where rhs' = rmPunct rhs
+ -- FIXME: use the probability
+ prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
+ where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\""
-- GSL requires an upper case letter in category names
diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs
index 5d0b0a211..0eab36828 100644
--- a/src/GF/Speech/PrJSGF.hs
+++ b/src/GF/Speech/PrJSGF.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/14 15:17:29 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.15 $
+-- > CVS $Revision: 1.16 $
--
-- This module prints a CFG as a JSGF grammar.
--
@@ -26,12 +26,13 @@ import GF.Formalism.Utilities (Symbol(..))
import GF.Infra.Ident
import GF.Infra.Print
import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SRG
jsgfPrinter :: Ident -- ^ Grammar name
- -> Options -> CGrammar -> String
-jsgfPrinter name opts cfg = prJSGF srg ""
- where srg = makeSRG name opts cfg
+ -> Options -> Maybe Probs -> CGrammar -> String
+jsgfPrinter name opts probs cfg = prJSGF srg ""
+ where srg = makeSRG name opts probs cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -47,9 +48,11 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
prRule (SRGRule cat origCat rhs) =
comments [origCat] . nl
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
- prAlt rhs | null rhs' = showString "<NULL>"
- | otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
- where rhs' = rmPunct rhs
+ -- FIXME: use the probability
+ prAlt (SRGAlt mp rhs)
+ | null rhs' = showString "<NULL>"
+ | otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
+ where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
prCat c = showChar '<' . showString c . showChar '>'
diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs
index dc4e1c126..3604f34cd 100644
--- a/src/GF/Speech/PrSRGS.hs
+++ b/src/GF/Speech/PrSRGS.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 16:48:08 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.1 $
+-- > CVS $Revision: 1.2 $
--
-- This module prints a CFG as an SRGS XML grammar.
--
@@ -26,6 +26,7 @@ import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
import Data.Char (toUpper,toLower)
@@ -35,9 +36,11 @@ data XML = Data String | Tag String [Attr] [XML] | Comment String
type Attr = (String,String)
srgsXmlPrinter :: Ident -- ^ Grammar name
- -> Options -> CGrammar -> String
-srgsXmlPrinter name opts cfg = prSrgsXml srg ""
- where srg = makeSRG name opts cfg
+ -> Options
+ -> Maybe Probs
+ -> CGrammar -> String
+srgsXmlPrinter name opts probs cfg = prSrgsXml srg ""
+ where srg = makeSRG name opts probs cfg
prSrgsXml :: SRG -> ShowS
prSrgsXml (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -50,13 +53,12 @@ prSrgsXml (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
"Generated by GF",
"Original start category: " ++ origStart]
++ map ruleToXML rs)
- ruleToXML (SRGRule cat origCat rhss) =
- rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs rhss])
- prRhs rhss = oneOf (map item (map prAlt rhss))
- -- FIXME: don't use one-of if there is only one
- prAlt rhs = map prSymbol rhs
+ ruleToXML (SRGRule cat origCat alts) =
+ rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts])
+ prRhs rhss = oneOf (map prAlt rhss)
+ prAlt (SRGAlt p rhs) = item p (map prSymbol rhs)
prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] []
- prSymbol (Tok t) = item [Data (showToken t)]
+ prSymbol (Tok t) = item Nothing [Data (showToken t)]
prCat c = c -- FIXME: escape something?
showToken t = t -- FIXME: escape something?
@@ -64,9 +66,13 @@ rule :: String -- ^ id
-> [XML] -> XML
rule i = Tag "rule" [("id",i)]
-item :: [XML] -> XML
-item [x@(Tag "item" _ _)] = x
-item xs = Tag "item" [] xs
+item :: Maybe Double -> [XML] -> XML
+-- FIXME: what is the weight called?
+item mp xs = Tag "item" as cs
+ where as = maybe [] (\p -> [("weight", show p)]) mp
+ cs = case xs of
+ [Tag "item" [] xs'] -> xs'
+ _ -> xs
oneOf :: [XML] -> XML
oneOf [x] = x
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index 11d326fff..51c01df01 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/26 17:14:03 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.19 $
+-- > CVS $Revision: 1.20 $
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
@@ -20,6 +20,7 @@
module GF.Speech.SRG where
+import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -28,6 +29,7 @@ import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
import GF.Infra.Option
+import GF.Probabilistic.Probabilistic (Probs)
import Data.List
import Data.Maybe (fromMaybe)
@@ -38,9 +40,15 @@ data SRG = SRG { grammarName :: String -- ^ grammar name
, origStartCat :: String -- ^ original start category name
, rules :: [SRGRule]
}
+ deriving (Eq,Show)
+
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
-- and productions
-type SRGAlt = [Symbol String Token]
+ deriving (Eq,Show)
+
+-- | maybe a probability, and a list of symbols
+data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token]
+ deriving (Eq,Show)
-- | SRG category name and original name
type CatName = (String,String)
@@ -49,27 +57,42 @@ type CatNames = FiniteMap String String
makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
+ -> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
-makeSRG i opts gr = SRG { grammarName = name,
- startCat = lookupFM_ names origStart,
- origStartCat = origStart,
- rules = map (cfgRulesToSRGRule names) cfgRules }
+makeSRG i opts probs gr
+ = SRG { grammarName = name,
+ startCat = lookupFM_ names origStart,
+ origStartCat = origStart,
+ rules = rs }
where
name = prIdent i
origStart = getStartCat opts
gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr
(cats,cfgRules) = unzip gr'
names = mkCatNames name cats
+ rs = map (cfgRulesToSRGRule names probs) cfgRules
+
-cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
-cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
+-- FIXME: probabilities get larger than 1.0 when new rules are
+-- introduced
+-- FIXME: merge alternatives with same rhs but different probabilities
+cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule
+cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
where origCat = lhsCat r
cat = lookupFM_ names origCat
- rhs = nub $ map (map renameCat . ruleRhs) rs
+ rhs = nub $ map ruleToAlt rs
+ ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r))
renameCat (Cat c) = Cat (lookupFM_ names c)
renameCat t = t
+ruleProb :: Maybe Probs -> CFRule_ -> Maybe Double
+ruleProb mp r = mp >>= \probs -> lookupProb probs (ruleFun r)
+
+-- FIXME: move to GF.Probabilistic.Probabilistic?
+lookupProb :: Probs -> Ident -> Maybe Double
+lookupProb probs i = lookupTree prIdent i probs
+
mkCatNames :: String -- ^ Category name prefix
-> [String] -- ^ Original category names
-> FiniteMap String String -- ^ Maps original names to SRG names
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index d12d06628..39060206b 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/26 18:47:16 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.23 $
+-- > CVS $Revision: 1.24 $
--
-- This module does some useful transformations on CFGs.
--
@@ -26,7 +26,8 @@ module GF.Speech.TransformCFG {- (CFRule_, CFRules,
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
-import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
+import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
+ NameProfile(..), name2fun)
import GF.Infra.Ident
import GF.Infra.Option
import GF.Infra.Print
@@ -75,6 +76,7 @@ removeEmptyCats = fix removeEmptyCats'
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
-- | Remove rules which are identical, not caring about the rule names.
+-- FIXME: this messes up probabilities
removeIdenticalRules :: CFRules -> CFRules
removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g]
where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
@@ -87,7 +89,11 @@ removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds r
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
- -- FIXME: this will give multiple rules with the same name
+ -- FIXME: for non-recursive categories, this changes
+ -- the grammar unneccessarily, maybe we can use mutRecCats
+ -- to make this less invasive
+ -- FIXME: this will give multiple rules with the same name,
+ -- which may mess up the probabilities.
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
handleProd r = [r]
@@ -124,6 +130,8 @@ lhsCat (CFRule c _ _) = c
ruleRhs :: CFRule c n t -> [Symbol c t]
ruleRhs (CFRule _ ss _) = ss
+ruleFun :: CFRule_ -> Fun
+ruleFun (CFRule _ _ n) = name2fun n
-- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Eq c => Symbol c t -> [c] -> Bool
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 67ed388f8..c13b5c91c 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/10/31 19:02:35 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.80 $
+-- > CVS $Date: 2005/11/01 20:09:04 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.81 $
--
-- A database for customizable GF shell commands.
--
@@ -240,13 +240,17 @@ customGrammarPrinter =
,(strCI "srg", prSRG . stateCF)
,(strCI "gsl", \s -> let opts = stateOptions s
name = cncId s
- in gslPrinter name opts $ stateCFG s)
+ in gslPrinter name opts Nothing $ stateCFG s)
,(strCI "jsgf", \s -> let opts = stateOptions s
name = cncId s
- in jsgfPrinter name opts $ stateCFG s)
+ in jsgfPrinter name opts Nothing $ stateCFG s)
,(strCI "srgs_xml", \s -> let opts = stateOptions s
name = cncId s
- in srgsXmlPrinter name opts $ stateCFG s)
+ in srgsXmlPrinter name opts Nothing $ stateCFG s)
+ ,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s
+ name = cncId s
+ probs = stateProbs s
+ in srgsXmlPrinter name opts (Just probs) $ stateCFG s)
,(strCI "slf", \s -> let opts = stateOptions s
name = cncId s
in slfPrinter name opts $ stateCFG s)
diff --git a/src/HelpFile b/src/HelpFile
index 573191204..68f23aee1 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -546,6 +546,7 @@ q, quit: q
-printer=probs show probabilities of all functions
-printer=gsl Nuance GSL speech recognition grammar
-printer=jsgf Java Speech Grammar Format
+ -printer=srgs_xml SRGS XML format
-printer=slf a finite automaton in the HTK SLF format
-printer=slf_graphviz the same automaton as in SLF, but in Graphviz format
-printer=fa_graphviz a finite automaton with labelled edges