summaryrefslogtreecommitdiff
path: root/src/GF/Speech/PrSLF.hs
blob: 9bc02555832784e5a1c769611dbdcabe9f6b2719 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
----------------------------------------------------------------------
-- |
-- Module      : PrSLF
-- Maintainer  : BB
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.12 $
--
-- This module converts a CFG to an SLF finite-state network
-- for use with the ATK recognizer. The SLF format is described
-- in the HTK manual, and an example for use in ATK is shown
-- in the ATK manual.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
-----------------------------------------------------------------------------

module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,
                        slfSubPrinter,slfSubGraphvizPrinter) where

import GF.Data.Utilities
import GF.Conversion.Types
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol)
import GF.Infra.Ident
import GF.Infra.Option (Options)
import GF.Infra.Print
import GF.Speech.CFGToFiniteState
import GF.Speech.FiniteState
import GF.Speech.TransformCFG
import qualified GF.Visualization.Graphviz as Dot
import GF.Compile.ShellState (StateGrammar)

import Control.Monad
import qualified Control.Monad.State as STM
import Data.Char (toUpper)
import Data.List
import Data.Maybe

data SLFs = SLFs [(String,SLF)] SLF

data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }

data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
             | SLFSubLat { nId :: Int, nLat :: String }

-- | An SLF word is a word, or the empty string.
type SLFWord = Maybe String

data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }

type SLF_FA = FA State (Maybe (MFALabel String)) ()

mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)])
mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
  where MFA start subs = {- renameSubs $ -} cfgToMFA opts s
        main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa        

slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
             . moveLabelsToNodes . dfa2nfa

-- | Give sequential names to subnetworks.
renameSubs :: MFA String -> MFA String
renameSubs (MFA start subs) = MFA (newName start) subs'
  where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
        newName s = lookup' s newNames
        subs' = [(newName s,renameLabels n) | (s,n) <- subs]
        renameLabels = mapTransitions (mapSymbol newName id)

--
-- * SLF graphviz printing (without sub-networks)
--

slfGraphvizPrinter :: Options -> StateGrammar -> String
slfGraphvizPrinter opts s
    = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s
  where 
  gvFA = mapStates (fromMaybe "") . mapTransitions (const "")

--
-- * SLF graphviz printing (with sub-networks)
--

slfSubGraphvizPrinter :: Options -> StateGrammar -> String
slfSubGraphvizPrinter opts s = Dot.prGraphviz g
  where (main, subs) = mkFAs opts s
        g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..] 
        ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
        m = gvSLFFA Nothing main

gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
gvSLFFA n fa = 
    liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv) 
            . mapTransitions (const "")) (rename fa)
  where mfaLabelToGv = symbol ("#"++) id
        mkCluster Nothing = id
        mkCluster (Just x) 
            = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
        rename fa = do
                    names <- STM.get
                    let fa' = renameStates names fa
                        names' = unusedNames fa'
                    STM.put names'
                    return fa'

--
-- * SLF printing (without sub-networks)
--

slfPrinter :: Options -> StateGrammar -> String
slfPrinter opts s 
    = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s

--
-- * SLF printing (with sub-networks)
--

-- | Make a network with subnetworks in SLF
slfSubPrinter :: Options -> StateGrammar -> String
slfSubPrinter opts s = prSLFs slfs
  where 
  (main,subs) = mkFAs opts s
  slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
  faToSLF = automatonToSLF mfaNodeToSLFNode

automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
  where ns = map (uncurry mkNode) (states fa)
        es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)

mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
mfaNodeToSLFNode i l = case l of
                              Nothing -> mkSLFNode i Nothing
                              Just (Tok x) -> mkSLFNode i (Just x)
                              Just (Cat s) -> mkSLFSubLat i s

mkSLFNode :: Int -> Maybe String -> SLFNode
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
mkSLFNode i (Just w)
    | isNonWord w = SLFNode { nId = i, 
                              nWord = Nothing, 
                              nTag = Just w }
    | otherwise = SLFNode { nId = i, 
                            nWord = Just (map toUpper w), 
                            nTag = Just w }

mkSLFSubLat :: Int -> String -> SLFNode
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }

mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }

prSLFs :: SLFs -> String
prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
  where prSub (n,s) = showString "SUBLAT=" . shows n 
                      . nl . prOneSLF s . showString "." . nl

prSLF :: SLF -> String
prSLF slf = prOneSLF slf ""

prOneSLF :: SLF -> ShowS
prOneSLF (SLF { slfNodes = ns, slfEdges = es}) 
    = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
    where
    header = prFields [("N",show (length ns)),("L", show (length es))] . nl
    prNode (SLFNode { nId = i, nWord = w, nTag = t })
            = prFields $ [("I",show i),("W",showWord w)] 
                         ++ maybe [] (\t -> [("s",t)]) t
    prNode (SLFSubLat { nId = i, nLat = l }) 
            = prFields [("I",show i),("L",show l)]
    prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]

-- | Check if a word should not correspond to a word in the SLF file.
isNonWord :: String -> Bool
isNonWord = any isPunct

isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!()[]{}"

showWord :: SLFWord -> String
showWord Nothing = "!NULL"
showWord (Just w) | null w = "!NULL"
                  | otherwise = w

prFields :: [(String,String)] -> ShowS
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]