summaryrefslogtreecommitdiff
path: root/src-3.0
diff options
context:
space:
mode:
authorbjorn <bjorn@bringert.net>2008-06-22 11:31:06 +0000
committerbjorn <bjorn@bringert.net>2008-06-22 11:31:06 +0000
commit08df93d892428fd259670635798e873cb46dcb84 (patch)
treef0565332ebfedbb4ca5b123f91b6ec7759627457 /src-3.0
parentff5ddf447fa6eea6367e98e7af46736334ef3470 (diff)
Added old GF.Speech.PrSLF back in
Diffstat (limited to 'src-3.0')
-rw-r--r--src-3.0/GF/Speech/SLF.hs190
1 files changed, 190 insertions, 0 deletions
diff --git a/src-3.0/GF/Speech/SLF.hs b/src-3.0/GF/Speech/SLF.hs
new file mode 100644
index 000000000..9bc025558
--- /dev/null
+++ b/src-3.0/GF/Speech/SLF.hs
@@ -0,0 +1,190 @@
+----------------------------------------------------------------------
+-- |
+-- Module : PrSLF
+-- Maintainer : BB
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/10 16:43:44 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.12 $
+--
+-- This module converts a CFG to an SLF finite-state network
+-- for use with the ATK recognizer. The SLF format is described
+-- in the HTK manual, and an example for use in ATK is shown
+-- in the ATK manual.
+--
+-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
+-- categories in the grammar
+-----------------------------------------------------------------------------
+
+module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,
+ slfSubPrinter,slfSubGraphvizPrinter) where
+
+import GF.Data.Utilities
+import GF.Conversion.Types
+import GF.Formalism.CFG
+import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol)
+import GF.Infra.Ident
+import GF.Infra.Option (Options)
+import GF.Infra.Print
+import GF.Speech.CFGToFiniteState
+import GF.Speech.FiniteState
+import GF.Speech.TransformCFG
+import qualified GF.Visualization.Graphviz as Dot
+import GF.Compile.ShellState (StateGrammar)
+
+import Control.Monad
+import qualified Control.Monad.State as STM
+import Data.Char (toUpper)
+import Data.List
+import Data.Maybe
+
+data SLFs = SLFs [(String,SLF)] SLF
+
+data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
+
+data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
+ | SLFSubLat { nId :: Int, nLat :: String }
+
+-- | An SLF word is a word, or the empty string.
+type SLFWord = Maybe String
+
+data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
+
+type SLF_FA = FA State (Maybe (MFALabel String)) ()
+
+mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)])
+mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
+ where MFA start subs = {- renameSubs $ -} cfgToMFA opts s
+ main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa
+
+slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
+slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
+ . moveLabelsToNodes . dfa2nfa
+
+-- | Give sequential names to subnetworks.
+renameSubs :: MFA String -> MFA String
+renameSubs (MFA start subs) = MFA (newName start) subs'
+ where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
+ newName s = lookup' s newNames
+ subs' = [(newName s,renameLabels n) | (s,n) <- subs]
+ renameLabels = mapTransitions (mapSymbol newName id)
+
+--
+-- * SLF graphviz printing (without sub-networks)
+--
+
+slfGraphvizPrinter :: Options -> StateGrammar -> String
+slfGraphvizPrinter opts s
+ = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s
+ where
+ gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
+
+--
+-- * SLF graphviz printing (with sub-networks)
+--
+
+slfSubGraphvizPrinter :: Options -> StateGrammar -> String
+slfSubGraphvizPrinter opts s = Dot.prGraphviz g
+ where (main, subs) = mkFAs opts s
+ g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
+ ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
+ m = gvSLFFA Nothing main
+
+gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
+gvSLFFA n fa =
+ liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
+ . mapTransitions (const "")) (rename fa)
+ where mfaLabelToGv = symbol ("#"++) id
+ mkCluster Nothing = id
+ mkCluster (Just x)
+ = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
+ rename fa = do
+ names <- STM.get
+ let fa' = renameStates names fa
+ names' = unusedNames fa'
+ STM.put names'
+ return fa'
+
+--
+-- * SLF printing (without sub-networks)
+--
+
+slfPrinter :: Options -> StateGrammar -> String
+slfPrinter opts s
+ = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s
+
+--
+-- * SLF printing (with sub-networks)
+--
+
+-- | Make a network with subnetworks in SLF
+slfSubPrinter :: Options -> StateGrammar -> String
+slfSubPrinter opts s = prSLFs slfs
+ where
+ (main,subs) = mkFAs opts s
+ slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
+ faToSLF = automatonToSLF mfaNodeToSLFNode
+
+automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
+automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
+ where ns = map (uncurry mkNode) (states fa)
+ es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
+
+mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
+mfaNodeToSLFNode i l = case l of
+ Nothing -> mkSLFNode i Nothing
+ Just (Tok x) -> mkSLFNode i (Just x)
+ Just (Cat s) -> mkSLFSubLat i s
+
+mkSLFNode :: Int -> Maybe String -> SLFNode
+mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
+mkSLFNode i (Just w)
+ | isNonWord w = SLFNode { nId = i,
+ nWord = Nothing,
+ nTag = Just w }
+ | otherwise = SLFNode { nId = i,
+ nWord = Just (map toUpper w),
+ nTag = Just w }
+
+mkSLFSubLat :: Int -> String -> SLFNode
+mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
+
+mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
+mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
+
+prSLFs :: SLFs -> String
+prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
+ where prSub (n,s) = showString "SUBLAT=" . shows n
+ . nl . prOneSLF s . showString "." . nl
+
+prSLF :: SLF -> String
+prSLF slf = prOneSLF slf ""
+
+prOneSLF :: SLF -> ShowS
+prOneSLF (SLF { slfNodes = ns, slfEdges = es})
+ = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
+ where
+ header = prFields [("N",show (length ns)),("L", show (length es))] . nl
+ prNode (SLFNode { nId = i, nWord = w, nTag = t })
+ = prFields $ [("I",show i),("W",showWord w)]
+ ++ maybe [] (\t -> [("s",t)]) t
+ prNode (SLFSubLat { nId = i, nLat = l })
+ = prFields [("I",show i),("L",show l)]
+ prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
+
+-- | Check if a word should not correspond to a word in the SLF file.
+isNonWord :: String -> Bool
+isNonWord = any isPunct
+
+isPunct :: Char -> Bool
+isPunct c = c `elem` "-_.;.,?!()[]{}"
+
+showWord :: SLFWord -> String
+showWord Nothing = "!NULL"
+showWord (Just w) | null w = "!NULL"
+ | otherwise = w
+
+prFields :: [(String,String)] -> ShowS
+prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]