blob: 26421d36c22edf574ce177990e614b45049227cd (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
----------------------------------------------------------------------
-- |
-- Module : PrJSGF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- This module prints a CFG as a JSGF grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
--
-- FIXME: convert to UTF-8
-----------------------------------------------------------------------------
module GF.Speech.PrJSGF (jsgfPrinter) where
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..))
import GF.Infra.Ident
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SISR
import GF.Speech.SRG
import GF.Speech.RegExp
import Debug.Trace
jsgfPrinter :: Ident -- ^ Grammar name
-> String -- ^ Start category
-> Options
-> Maybe SISRFormat
-> Maybe Probs -> CGrammar -> String
jsgfPrinter name start opts sisr probs cfg = trace (show srg) $ prJSGF srg sisr ""
where srg = makeSimpleSRG name start opts probs cfg
prJSGF :: SRG -> Maybe SISRFormat -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
= header . nl
. mainCat . nl
. unlinesS topCatRules . nl
. unlinesS (map prRule rs)
where
header = showString "#JSGF V1.0 UTF-8;" . nl
. comment ("JSGF speech recognition grammar for " ++ name)
. comment "Generated by GF"
. showString ("grammar " ++ name ++ ";") . nl
mainCat = comment ("Start category: " ++ origStart)
. rule True "MAIN" [prCat start]
prRule (SRGRule cat origCat rhs) =
comment origCat
. rule False cat (map prAlt (ebnfSRGAlts rhs))
-- FIXME: use the probability
prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats]
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs]
it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
catFormId :: String -> String
catFormId = (++ "_cat")
catFieldId :: String -> String
catFieldId = (++ "_field")
prCat :: SRGCat -> ShowS
prCat c = showChar '<' . showString c . showChar '>'
prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
prItem sisr = f 1
where
f _ (REUnion []) = showString "<VOID>"
f p (REUnion xs) = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
f _ (REConcat []) = showString "<NULL>"
f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
f p (RERepeat x) = f 3 x . showString "*"
f _ (RESymbol s) = prSymbol sisr s
prSymbol :: Maybe SISRFormat -> Symbol SRGNT Token -> ShowS
prSymbol sisr (Cat n@(c,_)) = prCat c . tag sisr (catSISR n)
prSymbol _ (Tok t) | all isPunct (prt t) = id -- removes punctuation
| otherwise = prtS t -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> [SISRExpr] -> ShowS
tag Nothing _ = id
tag _ [] = id
tag (Just fmt) t = showString "{" . showString (prSISR fmt t) . showString "}"
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comment :: String -> ShowS
comment s = showString "// " . showString s . nl
paren f = wrap "(" f ")"
rule :: Bool -> SRGCat -> [ShowS] -> ShowS
rule pub c xs = p . prCat c . showString " = " . joinS " | " xs . showChar ';' . nl
where p = if pub then showString "public " else id
|