summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GF/Speech/FiniteState.hs105
-rw-r--r--src/GF/Speech/PrGSL.hs7
-rw-r--r--src/GF/Speech/PrJSGF.hs7
-rw-r--r--src/GF/Speech/PrSLF.hs32
-rw-r--r--src/GF/Speech/SRG.hs31
-rw-r--r--src/GF/Speech/TransformCFG.hs82
-rw-r--r--src/Makefile2
7 files changed, 191 insertions, 75 deletions
diff --git a/src/GF/Speech/FiniteState.hs b/src/GF/Speech/FiniteState.hs
new file mode 100644
index 000000000..bdbd21b67
--- /dev/null
+++ b/src/GF/Speech/FiniteState.hs
@@ -0,0 +1,105 @@
+module GF.Speech.FiniteState (FA, State,
+ startState, finalStates,
+ states, transitions,
+ moveLabelsToNodes) where
+
+import Data.Graph.Inductive
+import Data.List (nub,partition)
+import Data.Maybe (fromJust)
+
+import Debug.Trace
+
+data FA a b = FA (Gr a b) Node [Node]
+
+type State = Node
+
+startState :: FA a b -> State
+startState (FA _ s _) = s
+
+finalStates :: FA a b -> [State]
+finalStates (FA _ _ ss) = ss
+
+states :: FA a b -> [(State,a)]
+states (FA g _ _) = labNodes g
+
+transitions :: FA a b -> [(State,State,b)]
+transitions (FA g _ _) = labEdges g
+
+onGraph :: (Gr a b -> Gr c d) -> FA a b -> FA c d
+onGraph f (FA g s ss) = FA (f g) s ss
+
+newState :: a -> FA a b -> (FA a b, State)
+newState x (FA g s ss) = (FA g' s ss, n)
+ where (g',n) = addNode x g
+
+newEdge :: Node -> Node -> b -> FA a b -> FA a b
+newEdge f t l = onGraph (insEdge (f,t,l))
+
+addNode :: DynGraph gr => a -> gr a b -> (gr a b, Node)
+addNode x g = let s = freshNode g in (insNode (s,x) g, s)
+
+freshNode :: Graph gr => gr a b -> Node
+freshNode = succ . snd . nodeRange
+
+-- | Get an infinte supply of new nodes.
+freshNodes :: Graph gr => gr a b -> [Node]
+freshNodes g = [snd (nodeRange g)+1..]
+
+-- | Transform a standard finite automaton with labelled edges
+-- to one where the labels are on the nodes instead. This can add
+-- up to one extra node per edge.
+moveLabelsToNodes :: Eq a => FA () (Maybe a) -> FA (Maybe a) ()
+moveLabelsToNodes = onGraph moveLabelsToNodes_
+
+moveLabelsToNodes_ :: (DynGraph gr, Eq a) => gr () (Maybe a) -> gr (Maybe a) ()
+moveLabelsToNodes_ g = gmap f g'
+ where g' = sameLabelIncoming g
+ f (to,n,(),fr) = (removeAdjLabels to, n, l, removeAdjLabels fr)
+ where l | not (allEqual ls)
+ = error $ "moveLabelsToNodes: not all incoming labels are equal"
+ | null ls = Nothing
+ | otherwise = head ls
+ ls = map snd $ lpre g' n
+ removeAdjLabels = map (\ (_,n) -> ((),n))
+
+-- | Add the extra nodes needed to make sure that all edges to a node
+-- have the same label.
+sameLabelIncoming :: (DynGraph gr, Eq b) => gr () (Maybe b) -> gr () (Maybe b)
+sameLabelIncoming gr = foldr fixIncoming gr (nodes gr)
+
+fixIncoming :: (DynGraph gr, Eq b) => Node -> gr () (Maybe b) -> gr () (Maybe b)
+fixIncoming n gr | allLabelsEqual to' = gr
+ | otherwise = addContexts newContexts $ delNode n gr
+ where (to,_,_,fr) = context gr n
+ -- move cyclic edges to the list of incoming edges
+ (cyc,fr') = partition (\ (_,t) -> t == n) fr
+ to' = to ++ cyc
+ -- make new nodes for each unique label
+ newNodes = zip (nub $ map fst to') (freshNodes gr)
+ -- for each cyclic edge, add an edge to the node for
+ -- that label (could be the current node).
+ fr'' = fr' ++ [ (l',fromJust (lookup l' newNodes)) | (l',f) <- to', f == n ]
+ -- keep all incoming non-cyclic edges with the right label.
+ to'' l = [ e | e@(l',f) <- to', l'==l, f /= n ]
+ newContexts = [ (to'' l,n',(),fr'') | (l,n') <- newNodes]
+
+allLabelsEqual :: Eq b => Adj b -> Bool
+allLabelsEqual = allEqual . map fst
+
+edgeLabel :: LEdge b -> b
+edgeLabel (_,_,l) = l
+
+ledgeToEdge :: LEdge b -> Edge
+ledgeToEdge (f,t,_) = (f,t)
+
+addContexts :: DynGraph gr => [Context a b] -> gr a b -> gr a b
+addContexts cs gr = foldr (&) gr cs
+
+--
+-- * Utilities
+--
+
+allEqual :: Eq a => [a] -> Bool
+allEqual [] = True
+allEqual (x:xs) = all (==x) xs
+
diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs
index 8ddf0a521..b98339914 100644
--- a/src/GF/Speech/PrGSL.hs
+++ b/src/GF/Speech/PrGSL.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/06/17 12:46:04 $
+-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.19 $
+-- > CVS $Revision: 1.20 $
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
@@ -18,7 +18,6 @@
module GF.Speech.PrGSL (gslPrinter) where
import GF.Speech.SRG
-import GF.Speech.TransformCFG
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -32,7 +31,7 @@ import Data.Char (toUpper,toLower)
gslPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
gslPrinter name opts cfg = prGSL srg ""
- where srg = makeSRG name opts (makeNice cfg)
+ where srg = makeSRG name opts cfg
prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs
index 8b73a080a..052299329 100644
--- a/src/GF/Speech/PrJSGF.hs
+++ b/src/GF/Speech/PrJSGF.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/06/17 12:46:05 $
+-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.13 $
+-- > CVS $Revision: 1.14 $
--
-- This module prints a CFG as a JSGF grammar.
--
@@ -20,7 +20,6 @@
module GF.Speech.PrJSGF (jsgfPrinter) where
import GF.Speech.SRG
-import GF.Speech.TransformCFG
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..))
@@ -31,7 +30,7 @@ import GF.Infra.Option
jsgfPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
jsgfPrinter name opts cfg = prJSGF srg ""
- where srg = makeSRG name opts (makeNice cfg)
+ where srg = makeSRG name opts cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
diff --git a/src/GF/Speech/PrSLF.hs b/src/GF/Speech/PrSLF.hs
index cec2b89fd..9fe7d20ee 100644
--- a/src/GF/Speech/PrSLF.hs
+++ b/src/GF/Speech/PrSLF.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/02 15:47:46 $
+-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.2 $
+-- > CVS $Revision: 1.3 $
--
-- This module converts a CFG to an SLF finite-state network
-- for use with the ATK recognizer. The SLF format is described
@@ -22,6 +22,7 @@ module GF.Speech.PrSLF (slfPrinter) where
import GF.Speech.SRG
import GF.Speech.TransformCFG
+import GF.Speech.FiniteState
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -31,24 +32,35 @@ import GF.Infra.Print
import GF.Infra.Option
import Data.Char (toUpper,toLower)
+import Data.Maybe (fromMaybe)
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord }
-- | An SLF word is a word, or the empty string.
-type SLFWord = String
+type SLFWord = Maybe String
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
slfPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
-slfPrinter name opts cfg = prSLF slf ""
- where slf = srg2slf $ makeSRG name opts $ makeRegular $ makeNice cfg
+slfPrinter name opts cfg = prSLF (regularToSLF start rgr) ""
+ where start = getStartCat opts
+ rgr = makeRegular $ removeEmptyCats $ cfgToCFRules cfg
+
+regularToSLF :: String -> CFRules -> SLF
+regularToSLF s rs = automatonToSLF $ compileAutomaton s rs
+
+automatonToSLF :: FA () (Maybe String) -> SLF
+automatonToSLF fa =
+ SLF { slfNodes = map mkSLFNode (states fa'),
+ slfEdges = zipWith mkSLFEdge [0..] (transitions fa') }
+ where fa' = moveLabelsToNodes fa
+ mkSLFNode (i,w) = SLFNode { nId = i, nWord = w }
+ mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
-srg2slf :: SRG -> SLF
-srg2slf = undefined -- should use TransformCFG.compileAutomaton
prSLF :: SLF -> ShowS
prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns) . unlinesS (map prEdge es)
@@ -60,8 +72,10 @@ prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns)
showWord :: SLFWord -> String
-showWord "" = "!NULL"
-showWord w = w -- FIXME: convert words to upper case
+showWord Nothing = "!NULL"
+showWord (Just w) = w -- FIXME: convert words to upper case
+ -- FIXME: could this be the empty string? if so, print as !NULL
prFields :: [(String,String)] -> ShowS
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
+
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index 2594e0a3e..9f4919ed2 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/06/17 12:46:05 $
+-- > CVS $Date: 2005/09/07 14:21:30 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.14 $
+-- > CVS $Revision: 1.15 $
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
@@ -21,10 +21,6 @@
module GF.Speech.SRG where
import GF.Infra.Ident
--- import GF.OldParsing.CFGrammar
--- import GF.OldParsing.Utilities (Symbol(..))
--- import GF.OldParsing.GrammarTypes
--- import GF.Printing.PrintParser
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
@@ -53,18 +49,18 @@ type CatNames = FiniteMap String String
makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
- -> [CFRule_] -- ^ A context-free grammar
+ -> CGrammar -- ^ A context-free grammar
-> SRG
makeSRG i opts gr = SRG { grammarName = name,
- startCat = start,
+ startCat = lookupFM_ names origStart,
origStartCat = origStart,
- rules = rs }
+ rules = map (cfgRulesToSRGRule names) cfgRules }
where
name = prIdent i
- origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
- start = lookupFM_ names origStart
- names = mkCatNames name (nub $ map ruleCat gr)
- rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr)
+ origStart = getStartCat opts
+ gr' = removeLeftRecursion $ removeEmptyCats $ cfgToCFRules gr
+ (cats,cfgRules) = unzip gr'
+ names = mkCatNames name cats
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
@@ -111,15 +107,6 @@ unlinesS = join "\n"
join :: String -> [ShowS] -> ShowS
join glue = concatS . intersperse (showString glue)
-sortAndGroupBy :: Ord b =>
- (a -> b) -- ^ Gets the value to sort and group by
- -> [a]
- -> [[a]]
-sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
-
-both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
-both f g x y = f (g x) (g y)
-
prtS :: Print a => a -> ShowS
prtS = showString . prt
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index 10f84bd79..9c3ed2c06 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/06 08:06:42 $
+-- > CVS $Date: 2005/09/07 14:21:31 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.15 $
+-- > CVS $Revision: 1.16 $
--
-- This module does some useful transformations on CFGs.
--
@@ -16,18 +16,25 @@
-- peb thinks: most of this module should be moved to GF.Conversion...
-----------------------------------------------------------------------------
-module GF.Speech.TransformCFG (makeNice, CFRule_, makeRegular) where
+module GF.Speech.TransformCFG (CFRule_, CFRules,
+ cfgToCFRules, getStartCat,
+ removeLeftRecursion,
+ removeEmptyCats,
+ makeRegular,
+ compileAutomaton) where
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Conversion.Types
import GF.Infra.Print
+import GF.Infra.Option
+import GF.Speech.FiniteState
import Control.Monad
import Data.FiniteMap
import Data.List
-import Data.Maybe (fromJust)
+import Data.Maybe (fromJust, fromMaybe)
import Debug.Trace
@@ -36,33 +43,33 @@ import Debug.Trace
type CFRule_ = CFRule Cat_ Name Token
type Cat_ = String
-type CFRules = FiniteMap Cat_ [CFRule_]
+type CFRules = [(Cat_,[CFRule_])]
--- | Remove left-recursion and categories with no productions
--- from a context-free grammar.
-makeNice :: CGrammar -> [CFRule_]
-makeNice = ungroupProds . makeNice' . groupProds . cfgToCFRules
- where makeNice' = removeLeftRecursion . removeEmptyCats
-
-cfgToCFRules :: CGrammar -> [CFRule_]
-cfgToCFRules cfg = [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg]
+cfgToCFRules :: CGrammar -> CFRules
+cfgToCFRules cfg = groupProds [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg]
where symb = mapSymbol catToString id
-- symb (Cat c) = Cat (catToString c)
-- symb (Tok t) = Tok t
catToString = prt
+getStartCat :: Options -> String
+getStartCat opts = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
+
-- | Group productions by their lhs categories
groupProds :: [CFRule_] -> CFRules
-groupProds = addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
+groupProds = fmToList . addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
ungroupProds :: CFRules -> [CFRule_]
-ungroupProds = concat . eltsFM
+ungroupProds = concat . map snd
+
+catRules :: CFRules -> Cat_ -> [CFRule_]
+catRules rs c = fromMaybe [] (lookup c rs)
-- | Remove productions which use categories which have no productions
removeEmptyCats :: CFRules -> CFRules
-removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
+removeEmptyCats = fix removeEmptyCats'
where
- removeEmptyCats' :: [(Cat_,[CFRule_])] -> [(Cat_,[CFRule_])]
+ removeEmptyCats' :: CFRules -> CFRules
removeEmptyCats' rs = k'
where
keep = filter (not . null . snd) rs
@@ -71,16 +78,16 @@ removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
removeLeftRecursion :: CFRules -> CFRules
-removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs
+removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
-- FIXME: this will give multiple rules with the same name
- [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookupFM rs aj)]
+ [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookup aj rs)]
handleProd r = [r]
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
- -> [(Cat_,[CFRule_])]
+ -> CFRules
removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)]
| otherwise = [(a, as), (a', a's)]
where
@@ -100,16 +107,14 @@ isDirectLeftRecursive _ = False
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
-- grammar
-makeRegular :: [CFRule_] -> [CFRule_]
-makeRegular g = concatMap trSet (mutRecCats g)
+makeRegular :: CFRules -> CFRules
+makeRegular g = groupProds $ concatMap trSet (mutRecCats g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat cs
where rs = concatMap (catRules g) cs
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
- ++ concatMap (makeRightLinearRules c) crs
- -- FIXME: add more rules here, see pg 255, item 2
- where crs = catRules rs c
- c' = newCat c
+ ++ concatMap (makeRightLinearRules c) (catRules g c)
+ where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
@@ -119,27 +124,29 @@ makeRegular g = concatMap trSet (mutRecCats g)
newCat c = c ++ "$"
--- | Check if all the rules are right-linear, or all the rules are
--- left-linear, with respect to given categories.
-allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
-allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-
-- | Get the sets of mutually recursive non-terminals for a grammar.
-mutRecCats :: Eq c => [CFRule c n t] -> [[c]]
+mutRecCats :: CFRules -> [[Cat_]]
mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r
- where r = nub [(c,c') | CFRule c ss _ <- g, Cat c' <- ss]
- allCats = nub [c | CFRule c _ _ <- g]
+ where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
+ allCats = map fst g
+
+
-- Convert a strongly regular grammar to a finite automaton.
--- compileAutomaton ::
+compileAutomaton :: Cat_ -- ^ Start category
+ -> CFRules
+ -> FA () (Maybe Token)
+compileAutomaton s g = undefined
--
-- * CFG rule utilities
--
+{-
-- | Get all the rules for a given category.
catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t]
catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c]
+-}
-- | Gets the set of LHS categories of a set of rules.
lhsCats :: Eq c => [CFRule c n t] -> [c]
@@ -148,6 +155,11 @@ lhsCats = nub . map lhsCat
lhsCat :: CFRule c n t -> c
lhsCat (CFRule c _ _) = c
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
diff --git a/src/Makefile b/src/Makefile
index 678d6cd6b..dea6000a7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -3,7 +3,7 @@ include config.mk
GHMAKE=$(GHC) --make
GHCXMAKE=ghcxmake
-GHCFLAGS+= -fglasgow-exts
+GHCFLAGS+= -fglasgow-exts -package fgl
GHCOPTFLAGS=-O2
GHCFUDFLAG=
JAVAFLAGS=-target 1.4 -source 1.4