summaryrefslogtreecommitdiff
path: root/src/GF/Speech/TransformCFG.hs
diff options
context:
space:
mode:
authorbringert <unknown>2005-09-12 14:46:44 +0000
committerbringert <unknown>2005-09-12 14:46:44 +0000
commitddda900d53ee3b8fa968bc8acb49f035f9ef860c (patch)
treeb83a52f978fbeffda4ed95d936b55a91b9f6c535 /src/GF/Speech/TransformCFG.hs
parentf882f97a22c9ed16c6f1735930698b8fba162351 (diff)
Completed unoptimized SLF generation.
Diffstat (limited to 'src/GF/Speech/TransformCFG.hs')
-rw-r--r--src/GF/Speech/TransformCFG.hs150
1 files changed, 23 insertions, 127 deletions
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index 5f1e4fb97..57d8ec87b 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/08 15:45:17 $
+-- > CVS $Date: 2005/09/12 15:46:44 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.19 $
+-- > CVS $Revision: 1.20 $
--
-- This module does some useful transformations on CFGs.
--
@@ -16,12 +16,12 @@
-- peb thinks: most of this module should be moved to GF.Conversion...
-----------------------------------------------------------------------------
-module GF.Speech.TransformCFG (CFRule_, CFRules,
+-- FIXME: lots of this stuff is used by CFGToFiniteState, thus
+-- the missing explicit expot list.
+module GF.Speech.TransformCFG {- (CFRule_, CFRules,
cfgToCFRules, getStartCat,
removeLeftRecursion,
- removeEmptyCats,
- makeRegular,
- compileAutomaton) where
+ removeEmptyCats, removeIdenticalRules) -} where
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -62,8 +62,6 @@ groupProds = fmToList . addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
ungroupProds :: CFRules -> [CFRule_]
ungroupProds = concat . map snd
-catRules :: CFRules -> Cat_ -> [CFRule_]
-catRules rs c = fromMaybe [] (lookup c rs)
-- | Remove productions which use categories which have no productions
removeEmptyCats :: CFRules -> CFRules
@@ -77,13 +75,18 @@ removeEmptyCats = fix removeEmptyCats'
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
+-- | Remove rules which are identical, not caring about the rule names.
+removeIdenticalRules :: CFRules -> CFRules
+removeIdenticalRules g = [(c,nubBy sameCatAndRhs rs) | (c,rs) <- g]
+ where sameCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) = c1 == c2 && ss1 == ss2
+
removeLeftRecursion :: CFRules -> CFRules
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
-- FIXME: this will give multiple rules with the same name
- [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookup aj rs)]
+ [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
handleProd r = [r]
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
@@ -103,92 +106,22 @@ isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
isDirectLeftRecursive _ = False
--- Use the transformation algorithm from \"Regular Approximation of Context-free
--- Grammars through Approximation\", Mohri and Nederhof, 2000
--- to create an over-generating regular frammar for a context-free
--- grammar
-makeRegular :: CFRules -> CFRules
-makeRegular g = groupProds $ concatMap trSet (mutRecCats g)
- where trSet cs | allXLinear cs rs = rs
- | otherwise = concatMap handleCat cs
- where rs = concatMap (catRules g) cs
- handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
- ++ concatMap (makeRightLinearRules c) (catRules g c)
- where c' = newCat c
- makeRightLinearRules b' (CFRule c ss n) =
- case ys of
- [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
- (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
- : makeRightLinearRules (newCat b) (CFRule c zs n)
- where (xs,ys) = break (`catElem` cs) ss
- newCat c = c ++ "$"
-
-
--- | Get the sets of mutually recursive non-terminals for a grammar.
-mutRecCats :: CFRules -> [[Cat_]]
-mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r
- where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
- allCats = map fst g
-
-
-
--- Convert a strongly regular grammar to a finite automaton.
-compileAutomaton :: Cat_ -- ^ Start category
- -> CFRules
- -> FA () (Maybe Token)
-compileAutomaton start g = make_fa s [Cat start] f g fa''
- where fa = newFA ()
- s = startState fa
- (fa',f) = newState () fa
- fa'' = addFinalState f fa'
-
--- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
-make_fa :: State -> [Symbol Cat_ Token] -> State
- -> CFRules -> FA () (Maybe Token) -> FA () (Maybe Token)
-make_fa q0 a q1 g fa =
- case a of
- [] -> newTrans q0 q1 Nothing fa
- [Tok t] -> newTrans q0 q1 (Just t) fa
- [Cat c] -> undefined
- (x:beta) -> let (fa',q) = newState () fa
- fa'' = make_fa q0 [x] q g fa'
- fa''' = make_fa q beta q1 g fa''
- in fa'''
-
--
-- * CFG rule utilities
--
-{-
--- | Get all the rules for a given category.
-catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t]
-catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c]
--}
+catRules :: CFRules -> Cat_ -> [CFRule_]
+catRules rs c = fromMaybe [] (lookup c rs)
--- | Gets the set of LHS categories of a set of rules.
-lhsCats :: Eq c => [CFRule c n t] -> [c]
-lhsCats = nub . map lhsCat
+catSetRules :: CFRules -> [Cat_] -> [CFRule_]
+catSetRules g s = concatMap (catRules g) s
lhsCat :: CFRule c n t -> c
lhsCat (CFRule c _ _) = c
--- | Check if all the rules are right-linear, or all the rules are
--- left-linear, with respect to given categories.
-allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
-allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+ruleRhs :: CFRule c n t -> [Symbol c t]
+ruleRhs (CFRule _ ss _) = ss
--- | Checks if a context-free rule is right-linear.
-isRightLinear :: Eq c => [c] -- ^ The categories to consider
- -> CFRule c n t -- ^ The rule to check for right-linearity
- -> Bool
-isRightLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (safeInit ss)
-
--- | Checks if a context-free rule is left-linear.
-isLeftLinear :: Eq c => [c] -- ^ The categories to consider
- -> CFRule c n t -- ^ The rule to check for right-linearity
- -> Bool
-isLeftLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (drop 1 ss)
-- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Eq c => Symbol c t -> [c] -> Bool
@@ -202,37 +135,14 @@ anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkName :: String -> Name
mkName n = Name (IC n) []
---
--- * Relations
---
-
--- FIXME: these could use a more efficent data structures and algorithms.
-
-isRelatedTo :: Eq a => [(a,a)] -> a -> a -> Bool
-isRelatedTo r x y = (x,y) `elem` r
-
-transitiveClosure :: Eq a => [(a,a)] -> [(a,a)]
-transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r
-
-reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined.
- -> [(a,a)] -> [(a,a)]
-reflexiveClosure u r = [(x,x) | x <- u] `union` r
-
-symmetricSubrelation :: Eq a => [(a,a)] -> [(a,a)]
-symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r]
-
--- | Get the equivalence classes from an equivalence relation. Since
--- the relation is relexive, the set can be recoved from the relation.
-equivalenceClasses :: Eq a => [(a,a)] -> [[a]]
-equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r
- where equivalenceClasses_ [] _ = []
- equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r
- where (ys,zs) = partition (isRelatedTo r x) xs
--
-- * Utilities
--
+findSet :: Eq c => c -> [[c]] -> Maybe [c]
+findSet x = find (x `elem`)
+
fix :: Eq a => (a -> a) -> a -> a
fix f x = let x' = f x in if x' == x then x else fix f x'
@@ -240,26 +150,12 @@ nothingOrNull :: Maybe [a] -> Bool
nothingOrNull Nothing = True
nothingOrNull (Just xs) = null xs
-safeInit :: [a] -> [a]
-safeInit [] = []
-safeInit xs = init xs
-
unionAll :: Eq a => [[a]] -> [a]
unionAll = nub . concat
whenMP :: MonadPlus m => Bool -> a -> m a
whenMP b x = if b then return x else mzero
---
--- * Testing stuff, can be removed
---
-
-c --> ss = CFRule c ss (mkName "")
-
-prGr g = putStrLn $ showGr g
-
-showGr g = unlines $ map showRule g
-
-showRule (CFRule c ss _) = c ++ " --> " ++ unwords (map showSym ss)
+lookup' :: Eq a => a -> [(a,b)] -> b
+lookup' x = fromJust . lookup x
-showSym s = symbol id show s \ No newline at end of file