From 4369e679986fb602180b03f461105b9b3a2fdce2 Mon Sep 17 00:00:00 2001 From: bjorn Date: Thu, 12 Jun 2008 18:39:02 +0000 Subject: Get JSGF generation to compile. Still untested. --- src-3.0/GF/Compile/Export.hs | 3 + src-3.0/GF/Speech/JSGF.hs | 114 ++++++++++++++++++++++++++++++++++ src-3.0/GF/Speech/PrJSGF.hs | 145 ------------------------------------------- src-3.0/GFC.hs | 1 + 4 files changed, 118 insertions(+), 145 deletions(-) create mode 100644 src-3.0/GF/Speech/JSGF.hs delete mode 100644 src-3.0/GF/Speech/PrJSGF.hs diff --git a/src-3.0/GF/Compile/Export.hs b/src-3.0/GF/Compile/Export.hs index 22b248159..d5f9e33ae 100644 --- a/src-3.0/GF/Compile/Export.hs +++ b/src-3.0/GF/Compile/Export.hs @@ -10,6 +10,7 @@ import GF.Infra.Option import GF.Speech.CFG import GF.Speech.PGFToCFG import GF.Speech.SRGS_XML +import GF.Speech.JSGF import GF.Speech.VoiceXML import GF.Text.UTF8 @@ -27,9 +28,11 @@ prPGF fmt gr name = case fmt of FmtHaskell_GADT -> grammar2haskellGADT gr name FmtBNF -> prCFG $ pgfToCFG gr (outputConcr gr) FmtSRGS_XML -> srgsXmlPrinter Nothing gr (outputConcr gr) + FmtJSGF -> jsgfPrinter Nothing gr (outputConcr gr) FmtVoiceXML -> grammar2vxml gr (outputConcr gr) + -- | Get the name of the concrete syntax to generate output from. -- FIXME: there should be an option to change this. outputConcr :: PGF -> CId diff --git a/src-3.0/GF/Speech/JSGF.hs b/src-3.0/GF/Speech/JSGF.hs new file mode 100644 index 000000000..53a40ffd4 --- /dev/null +++ b/src-3.0/GF/Speech/JSGF.hs @@ -0,0 +1,114 @@ +---------------------------------------------------------------------- +-- | +-- Module : GF.Speech.JSGF +-- +-- This module prints a CFG as a JSGF grammar. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +-- +-- FIXME: convert to UTF-8 +----------------------------------------------------------------------------- + +module GF.Speech.JSGF (jsgfPrinter) where + +import GF.Data.Utilities +import GF.Speech.CFG +import GF.Speech.RegExp +import GF.Speech.SISR +import GF.Speech.SRG +import PGF.CId +import PGF.Data + +import Data.Char +import Data.List +import Data.Maybe +import Text.PrettyPrint.HughesPJ +import Debug.Trace + +width :: Int +width = 75 + +jsgfPrinter :: Maybe SISRFormat + -> PGF + -> CId -> String +jsgfPrinter sisr pgf cnc = renderStyle st $ prJSGF sisr $ makeSimpleSRG pgf cnc + where st = style { lineLength = width } + +prJSGF :: Maybe SISRFormat -> SRG -> Doc +prJSGF sisr srg + = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg)) + where + header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$ + comment ("JSGF speech recognition grammar for " ++ srgName srg) $$ + comment "Generated by GF" $$ + text ("grammar " ++ srgName srg ++ ";") + lang = maybe empty text (srgLanguage srg) + mainCat = rule True "MAIN" [prCat (srgStartCat srg)] + prRule (SRGRule cat rhs) = rule (isExternalCat srg cat) cat (map prAlt rhs) + prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] + where initTag | isEmpty t = empty + | otherwise = text "" <+> t + where t = tag sisr (profileInitSISR n) + finalTag = tag sisr (profileFinalSISR n) + p = if isEmpty initTag && isEmpty finalTag then id else parens + +catFormId :: String -> String +catFormId = (++ "_cat") + +prCat :: Cat -> Doc +prCat c = char '<' <> text c <> char '>' + +prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc +prItem sisr t = f 0 + where + f _ (REUnion []) = text "" + f p (REUnion xs) + | not (null es) = brackets (f 0 (REUnion nes)) + | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) + where (es,nes) = partition isEpsilon xs + f _ (REConcat []) = text "" + f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) + f p (RERepeat x) = f 3 x <> char '*' + f _ (RESymbol s) = prSymbol sisr t s + +prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc +prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) +prSymbol _ cn (Terminal t) | all isPunct t = empty -- removes punctuation + | otherwise = text t -- FIXME: quote if there is whitespace or odd chars + +tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc +tag Nothing _ = empty +tag (Just fmt) t = case t fmt of + [] -> empty + ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}' + where e [] = [] + e ('}':xs) = '\\':'}':e xs + e ('\n':xs) = ' ' : e (dropWhile isSpace xs) + e (x:xs) = x:e xs + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +comment :: String -> Doc +comment s = text "//" <+> text s + +alts :: [Doc] -> Doc +alts = fsep . prepunctuate (text "| ") + +rule :: Bool -> Cat -> [Doc] -> Doc +rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' + where p = if pub then text "public" else empty + +-- Pretty-printing utilities + +emptyLine :: Doc +emptyLine = text "" + +prepunctuate :: Doc -> [Doc] -> [Doc] +prepunctuate _ [] = [] +prepunctuate p (x:xs) = x : map (p <>) xs + +($++$) :: Doc -> Doc -> Doc +x $++$ y = x $$ emptyLine $$ y + diff --git a/src-3.0/GF/Speech/PrJSGF.hs b/src-3.0/GF/Speech/PrJSGF.hs deleted file mode 100644 index 037a4f4e2..000000000 --- a/src-3.0/GF/Speech/PrJSGF.hs +++ /dev/null @@ -1,145 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PrJSGF --- Maintainer : BB --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/01 20:09:04 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.16 $ --- --- This module prints a CFG as a JSGF grammar. --- --- FIXME: remove \/ warn \/ fail if there are int \/ string literal --- categories in the grammar --- --- FIXME: convert to UTF-8 ------------------------------------------------------------------------------ - -module GF.Speech.PrJSGF (jsgfPrinter) where - -import GF.Conversion.Types -import GF.Data.Utilities -import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats) -import GF.Infra.Ident -import GF.Infra.Print -import GF.Infra.Option -import GF.Probabilistic.Probabilistic (Probs) -import GF.Speech.SISR -import GF.Speech.SRG -import GF.Speech.RegExp -import GF.Compile.ShellState (StateGrammar) - -import Data.Char -import Data.List -import Data.Maybe -import Text.PrettyPrint.HughesPJ -import Debug.Trace - -width :: Int -width = 75 - -jsgfPrinter :: Maybe SISRFormat - -> Options - -> StateGrammar -> String -jsgfPrinter sisr opts s = renderStyle st $ prJSGF sisr $ makeSimpleSRG opts s - where st = style { lineLength = width } - -prJSGF :: Maybe SISRFormat -> SRG -> Doc -prJSGF sisr srg@(SRG{grammarName=name,grammarLanguage=ml, - startCat=start,origStartCat=origStart,rules=rs}) - = header $++$ mainCat $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs) - where - header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$ - comment ("JSGF speech recognition grammar for " ++ name) $$ - comment "Generated by GF" $$ - text ("grammar " ++ name ++ ";") - lang = maybe empty text ml - mainCat = comment ("Start category: " ++ origStart) $$ - case cfgCatToGFCat origStart of - Just c -> rule True "MAIN" [prCat (catFormId c)] - Nothing -> empty - prRule (SRGRule cat origCat rhs) = - comment origCat $$ - rule False cat (map prAlt rhs) --- rule False cat (map prAlt rhs) - -- FIXME: use the probability - prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag] --- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag - where initTag | isEmpty t = empty - | otherwise = text "" <+> t - where t = tag sisr (profileInitSISR n) - finalTag = tag sisr (profileFinalSISR n) - p = if isEmpty initTag && isEmpty finalTag then id else parens - - topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg] - where it i c = prCat c <+> tag sisr (topCatSISR c) - -catFormId :: String -> String -catFormId = (++ "_cat") - -prCat :: SRGCat -> Doc -prCat c = char '<' <> text c <> char '>' - -prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc -prItem sisr t = f 0 - where - f _ (REUnion []) = text "" - f p (REUnion xs) - | not (null es) = brackets (f 0 (REUnion nes)) - | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs)) - where (es,nes) = partition isEpsilon xs - f _ (REConcat []) = text "" - f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs)) - f p (RERepeat x) = f 3 x <> char '*' - f _ (RESymbol s) = prSymbol sisr t s - -{- -prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc -prItem _ _ [] = text "" -prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss - where paren = if length ss == 1 then id else parens --} - -prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc -prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n) -prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation - | otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars - -tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc -tag Nothing _ = empty -tag (Just fmt) t = case t fmt of - [] -> empty - ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}' - where e [] = [] - e ('}':xs) = '\\':'}':e xs - e ('\n':xs) = ' ' : e (dropWhile isSpace xs) - e (x:xs) = x:e xs - -isPunct :: Char -> Bool -isPunct c = c `elem` "-_.;.,?!" - -comment :: String -> Doc -comment s = text "//" <+> text s - -alts :: [Doc] -> Doc -alts = fsep . prepunctuate (text "| ") - -rule :: Bool -> SRGCat -> [Doc] -> Doc -rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';' - where p = if pub then text "public" else empty - --- Pretty-printing utilities - -emptyLine :: Doc -emptyLine = text "" - -prepunctuate :: Doc -> [Doc] -> [Doc] -prepunctuate _ [] = [] -prepunctuate p (x:xs) = x : map (p <>) xs - -($++$) :: Doc -> Doc -> Doc -x $++$ y = x $$ emptyLine $$ y - diff --git a/src-3.0/GFC.hs b/src-3.0/GFC.hs index f8ae6e8e3..73fb6f9f9 100644 --- a/src-3.0/GFC.hs +++ b/src-3.0/GFC.hs @@ -47,6 +47,7 @@ fmtExtension FmtHaskell = "hs" fmtExtension FmtHaskell_GADT = "hs" fmtExtension FmtBNF = "bnf" fmtExtension FmtSRGS_XML = "grxml" +fmtExtension FmtJSGF = "jsgf" fmtExtension FmtVoiceXML = "vxml" writeOutputFile :: FilePath -> String -> IOE () -- cgit v1.2.3