summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <unknown>2004-10-01 08:43:59 +0000
committerbringert <unknown>2004-10-01 08:43:59 +0000
commitf64502d4a7016864990432fd7135157c1d29ef0b (patch)
tree9ed90c4246229f6a8695694c5468d5647dea3bb7 /src
parent653c903dc782ac39ea4e42daee0154f1d9fc9e8d (diff)
Generalized Speech Recognition Grammar generation. Added JSGF grammar printer.
Diffstat (limited to 'src')
-rw-r--r--src/GF/Speech/PrGSL.hs117
-rw-r--r--src/GF/Speech/PrJSGF.hs67
-rw-r--r--src/GF/Speech/SRG.hs125
-rw-r--r--src/GF/UseGrammar/Custom.hs4
4 files changed, 212 insertions, 101 deletions
diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs
index c7cb283c6..0d7cb6c39 100644
--- a/src/GF/Speech/PrGSL.hs
+++ b/src/GF/Speech/PrGSL.hs
@@ -11,96 +11,45 @@
Created : September 13, 2004
- Modified :
+ Modified : October 1, 2004
**************************************************************
-}
--- FIXME: this modules should not be in cfgm, but where?
-
--- FIXME: remove left-recursion
-
--- FIXME: remove empty rules
-
--- FIXME: remove categories with no RHS
-
-- FIXME: remove / warn / fail if there are int / string literal
-- categories in the grammar
--- FIXME: figure out name prefix from grammar name
-
module PrGSL (gslPrinter) where
+import SRG
import Ident
import CFGrammar
import Parser (Symbol(..))
import GrammarTypes
import PrintParser
-import TransformCFG
import Option
-import Data.List
-import Data.Maybe (fromMaybe)
-import Data.FiniteMap
-
-
-data GSLGrammar = GSLGrammar String -- ^ grammar name
- String -- ^ start category name
- [GSLRule]
-data GSLRule = GSLRule String [GSLAlt]
-type GSLAlt = [Symbol String Token]
-
-type CatNames = FiniteMap String String
-
gslPrinter :: Ident -- ^ Grammar name
-> Options -> CFGrammar -> String
-gslPrinter name opts = prGSL (prIdent name) start
- where mstart = getOptVal opts gStartCat
- start = fromMaybe "S" mstart ++ "{}.s"
-
-prGSL :: String -- ^ Grammar name
- -> String -- ^ startcat
- -> CFGrammar -> String
-prGSL name start cfg = prGSLGrammar names gsl ""
- where
- cfg' = makeNice cfg
- gsl = cfgToGSL name start cfg'
- names = mkCatNames gsl
-
-cfgToGSL :: String -- ^ grammar name
- -> String -- ^ start category
- -> [CFRule_] -> GSLGrammar
-cfgToGSL name start =
- GSLGrammar name start . map cfgRulesToGSLRule . sortAndGroupBy ruleCat
- where
- ruleCat (Rule c _ _) = c
- ruleRhs (Rule _ r _) = r
- cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs)
-
-mkCatNames :: GSLGrammar -> CatNames
-mkCatNames (GSLGrammar name start rules) = listToFM (zip lhsCats names)
- where names = [name ++ "_" ++ show x | x <- [0..]]
- lhsCats = [ c | GSLRule c _ <- rules]
+gslPrinter name opts cfg = prGSL srg ""
+ where srg = makeSRG name opts cfg
-prGSLGrammar :: CatNames -> GSLGrammar -> ShowS
-prGSLGrammar names (GSLGrammar name start g) =
- header . mainCat . unlinesS (map prGSLrule g)
+prGSL :: SRG -> ShowS
+prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
+ = header . mainCat . unlinesS (map prRule rs)
where
header = showString ";GSL2.0" . nl
. comments ["Nuance speech recognition grammar for " ++ name,
"Generated by GF"] . nl . nl
- mainCat = showString ("; Start category: " ++ start) . nl
- . showString ".MAIN " . prGSLCat start . nl . nl
- prGSLrule (GSLRule cat rhs) =
- showString "; " . prtS cat . nl
- . prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl
- prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")"
+ mainCat = showString ("; Start category: " ++ origStart) . nl
+ . showString ".MAIN " . prCat start . nl . nl
+ prRule (SRGRule cat origCat rhs) =
+ showString "; " . prtS origCat . nl
+ . prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
+ prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
- prGSLSymbol (Cat c) = prGSLCat c
- prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\""
- prGSLCat c = showString n
- where n = case lookupFM names c of
- Nothing -> error $ "Unknown category: " ++ c
- Just x -> x
+ prSymbol (Cat c) = prCat c
+ prSymbol (Tok t) = wrap "\"" (prtS t) "\""
+ prCat c = showString c
rmPunct :: [Symbol String Token] -> [Symbol String Token]
rmPunct [] = []
@@ -112,37 +61,3 @@ isPunct c = c `elem` "-_.;.,?!"
comments :: [String] -> ShowS
comments = unlinesS . map (showString . ("; " ++))
-
---
--- * Utils
---
-
-nl :: ShowS
-nl = showChar '\n'
-
-sp :: ShowS
-sp = showChar ' '
-
-wrap :: String -> ShowS -> String -> ShowS
-wrap o s c = showString o . s . showString c
-
-concatS :: [ShowS] -> ShowS
-concatS = foldr (.) id
-
-unwordsS :: [ShowS] -> ShowS
-unwordsS = concatS . intersperse sp
-
-unlinesS :: [ShowS] -> ShowS
-unlinesS = concatS . intersperse nl
-
-sortAndGroupBy :: Ord b =>
- (a -> b) -- ^ Gets the value to sort and group by
- -> [a]
- -> [[a]]
-sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
-
-both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
-both f g x y = f (g x) (g y)
-
-prtS :: Print a => a -> ShowS
-prtS = showString . prt \ No newline at end of file
diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs
new file mode 100644
index 000000000..4c849e8c8
--- /dev/null
+++ b/src/GF/Speech/PrJSGF.hs
@@ -0,0 +1,67 @@
+{-
+ **************************************************************
+ GF Module
+
+ Description : This module prints a CFG as a JSGF grammar.
+
+ Author : Björn Bringert (bringert@cs.chalmers.se)
+
+ License : GPL (GNU General Public License)
+
+ Created : October 1, 2004
+
+ Modified :
+ **************************************************************
+-}
+
+-- FIXME: remove / warn / fail if there are int / string literal
+-- categories in the grammar
+
+-- FIXME: convert to UTF-8
+
+module PrJSGF (jsgfPrinter) where
+
+import SRG
+import Ident
+import CFGrammar
+import Parser (Symbol(..))
+import GrammarTypes
+import PrintParser
+import Option
+
+jsgfPrinter :: Ident -- ^ Grammar name
+ -> Options -> CFGrammar -> String
+jsgfPrinter name opts cfg = prJSGF srg ""
+ where srg = makeSRG name opts cfg
+
+prJSGF :: SRG -> ShowS
+prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
+ = header . mainCat . unlinesS (map prRule rs)
+ where
+ header = showString "#JSGF V1.0 UTF-8;" . nl
+ . comments ["JSGF speech recognition grammar for " ++ name,
+ "Generated by GF"] . nl
+ . showString ("grammar " ++ name ++ ";") . nl
+ . nl
+ mainCat = comments ["Start category: " ++ origStart] . nl
+ . showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
+ prRule (SRGRule cat origCat rhs) =
+ comments [origCat] . nl
+ . prCat cat . showString " = " . join " | " (map prAlt rhs) . nl
+ prAlt rhs | null rhs' = showString "<NULL>"
+ | otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
+ where rhs' = rmPunct rhs
+ prSymbol (Cat c) = prCat c
+ prSymbol (Tok t) = wrap "\"" (prtS t) "\""
+ prCat c = showChar '<' . showString c . showChar '>'
+
+rmPunct :: [Symbol String Token] -> [Symbol String Token]
+rmPunct [] = []
+rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
+rmPunct (s:ss) = s : rmPunct ss
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!"
+
+comments :: [String] -> ShowS
+comments = unlinesS . map (showString . ("// " ++))
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
new file mode 100644
index 000000000..0fc7349f3
--- /dev/null
+++ b/src/GF/Speech/SRG.hs
@@ -0,0 +1,125 @@
+{-
+ **************************************************************
+ GF Module
+
+ Description : Representation of, conversion to, and
+ utilities for printing of a
+ general Speech Recognition Grammar.
+
+ Author : Björn Bringert (bringert@cs.chalmers.se)
+
+ License : GPL (GNU General Public License)
+
+ Created : October 1, 2004
+
+ Modified :
+ **************************************************************
+-}
+
+-- FIXME: remove / warn / fail if there are int / string literal
+-- categories in the grammar
+
+-- FIXME: figure out name prefix from grammar name
+
+module SRG where
+
+import Ident
+import CFGrammar
+import Parser (Symbol(..))
+import GrammarTypes
+import PrintParser
+import TransformCFG
+import Option
+
+import Data.List
+import Data.Maybe (fromMaybe)
+import Data.FiniteMap
+
+
+data SRG = SRG { grammarName :: String -- ^ grammar name
+ , startCat :: String -- ^ start category name
+ , origStartCat :: String -- ^ original start category name
+ , rules :: [SRGRule]
+ }
+data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
+ -- and productions
+type SRGAlt = [Symbol String Token]
+type CatName = (String,String) -- ^ SRG category name and original name
+
+type CatNames = FiniteMap String String
+
+makeSRG :: Ident -- ^ Grammar name
+ -> Options -- ^ Grammar options
+ -> CFGrammar -- ^ A context-free grammar
+ -> SRG
+makeSRG i opts gr = SRG { grammarName = name,
+ startCat = start,
+ origStartCat = origStart,
+ rules = rs }
+ where
+ name = prIdent i
+ origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
+ start = lookupFM_ names origStart
+ gr' = makeNice gr
+ names = mkCatNames name (nub $ map ruleCat gr')
+ rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr')
+
+cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
+cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
+ where origCat = ruleCat r
+ cat = lookupFM_ names origCat
+ rhs = nub $ map (map renameCat . ruleRhs) rs
+ renameCat (Cat c) = Cat (lookupFM_ names c)
+ renameCat t = t
+
+ruleCat :: Rule n c t -> c
+ruleCat (Rule c _ _) = c
+
+ruleRhs :: Rule n c t -> [Symbol c t]
+ruleRhs (Rule _ r _) = r
+
+mkCatNames :: String -- ^ Category name prefix
+ -> [String] -- ^ Original category names
+ -> FiniteMap String String -- ^ Maps original names to SRG names
+mkCatNames prefix origNames = listToFM (zip origNames names)
+ where names = [prefix ++ "_" ++ show x | x <- [0..]]
+
+--
+-- * Utilities for building and printing SRGs
+--
+
+nl :: ShowS
+nl = showChar '\n'
+
+sp :: ShowS
+sp = showChar ' '
+
+wrap :: String -> ShowS -> String -> ShowS
+wrap o s c = showString o . s . showString c
+
+concatS :: [ShowS] -> ShowS
+concatS = foldr (.) id
+
+unwordsS :: [ShowS] -> ShowS
+unwordsS = join " "
+
+unlinesS :: [ShowS] -> ShowS
+unlinesS = join "\n"
+
+join :: String -> [ShowS] -> ShowS
+join glue = concatS . intersperse (showString glue)
+
+sortAndGroupBy :: Ord b =>
+ (a -> b) -- ^ Gets the value to sort and group by
+ -> [a]
+ -> [[a]]
+sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
+
+both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
+both f g x y = f (g x) (g y)
+
+prtS :: Print a => a -> ShowS
+prtS = showString . prt
+
+lookupFM_ :: (Ord key, Show key) => FiniteMap key elt -> key -> elt
+lookupFM_ fm k = lookupWithDefaultFM fm (error $ "Key not found: " ++ show k) k
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 9119b8f36..dfffd2b2a 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -28,6 +28,7 @@ import PrOld
import MkGFC
import CFtoSRG
import PrGSL (gslPrinter)
+import PrJSGF (jsgfPrinter)
import Zipper
@@ -194,6 +195,9 @@ customGrammarPrinter =
,(strCI "gsl", \s -> let opts = stateOptions s
name = cncId s
in gslPrinter name opts $ Cnv.cfg $ statePInfo s)
+ ,(strCI "jsgf", \s -> let opts = stateOptions s
+ name = cncId s
+ in jsgfPrinter name opts $ Cnv.cfg $ statePInfo s)
,(strCI "plbnf", prLBNF True)
,(strCI "lbnf", prLBNF False)
,(strCI "bnf", prBNF False)