diff options
| author | bringert <unknown> | 2005-09-12 14:46:44 +0000 |
|---|---|---|
| committer | bringert <unknown> | 2005-09-12 14:46:44 +0000 |
| commit | ddda900d53ee3b8fa968bc8acb49f035f9ef860c (patch) | |
| tree | b83a52f978fbeffda4ed95d936b55a91b9f6c535 /src/GF/Speech/TransformCFG.hs | |
| parent | f882f97a22c9ed16c6f1735930698b8fba162351 (diff) | |
Completed unoptimized SLF generation.
Diffstat (limited to 'src/GF/Speech/TransformCFG.hs')
| -rw-r--r-- | src/GF/Speech/TransformCFG.hs | 150 |
1 files changed, 23 insertions, 127 deletions
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 5f1e4fb97..57d8ec87b 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/09/08 15:45:17 $ +-- > CVS $Date: 2005/09/12 15:46:44 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.19 $ +-- > CVS $Revision: 1.20 $ -- -- This module does some useful transformations on CFGs. -- @@ -16,12 +16,12 @@ -- peb thinks: most of this module should be moved to GF.Conversion... ----------------------------------------------------------------------------- -module GF.Speech.TransformCFG (CFRule_, CFRules, +-- FIXME: lots of this stuff is used by CFGToFiniteState, thus +-- the missing explicit expot list. +module GF.Speech.TransformCFG {- (CFRule_, CFRules, cfgToCFRules, getStartCat, removeLeftRecursion, - removeEmptyCats, - makeRegular, - compileAutomaton) where + removeEmptyCats, removeIdenticalRules) -} where import GF.Infra.Ident import GF.Formalism.CFG @@ -62,8 +62,6 @@ groupProds = fmToList . addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r])) ungroupProds :: CFRules -> [CFRule_] ungroupProds = concat . map snd -catRules :: CFRules -> Cat_ -> [CFRule_] -catRules rs c = fromMaybe [] (lookup c rs) -- | Remove productions which use categories which have no productions removeEmptyCats :: CFRules -> CFRules @@ -77,13 +75,18 @@ removeEmptyCats = fix removeEmptyCats' emptyCats = filter (nothingOrNull . flip lookup rs) allCats k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep +-- | Remove rules which are identical, not caring about the rule names. +removeIdenticalRules :: CFRules -> CFRules +removeIdenticalRules g = [(c,nubBy sameCatAndRhs rs) | (c,rs) <- g] + where sameCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) = c1 == c2 && ss1 == ss2 + removeLeftRecursion :: CFRules -> CFRules removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs where handleProds (c, r) = (c, concatMap handleProd r) handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai = -- FIXME: this will give multiple rules with the same name - [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookup aj rs)] + [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs] handleProd r = [r] removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category @@ -103,92 +106,22 @@ isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c' isDirectLeftRecursive _ = False --- Use the transformation algorithm from \"Regular Approximation of Context-free --- Grammars through Approximation\", Mohri and Nederhof, 2000 --- to create an over-generating regular frammar for a context-free --- grammar -makeRegular :: CFRules -> CFRules -makeRegular g = groupProds $ concatMap trSet (mutRecCats g) - where trSet cs | allXLinear cs rs = rs - | otherwise = concatMap handleCat cs - where rs = concatMap (catRules g) cs - handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e - ++ concatMap (makeRightLinearRules c) (catRules g c) - where c' = newCat c - makeRightLinearRules b' (CFRule c ss n) = - case ys of - [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left - (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n - : makeRightLinearRules (newCat b) (CFRule c zs n) - where (xs,ys) = break (`catElem` cs) ss - newCat c = c ++ "$" - - --- | Get the sets of mutually recursive non-terminals for a grammar. -mutRecCats :: CFRules -> [[Cat_]] -mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r - where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] - allCats = map fst g - - - --- Convert a strongly regular grammar to a finite automaton. -compileAutomaton :: Cat_ -- ^ Start category - -> CFRules - -> FA () (Maybe Token) -compileAutomaton start g = make_fa s [Cat start] f g fa'' - where fa = newFA () - s = startState fa - (fa',f) = newState () fa - fa'' = addFinalState f fa' - --- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", --- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. -make_fa :: State -> [Symbol Cat_ Token] -> State - -> CFRules -> FA () (Maybe Token) -> FA () (Maybe Token) -make_fa q0 a q1 g fa = - case a of - [] -> newTrans q0 q1 Nothing fa - [Tok t] -> newTrans q0 q1 (Just t) fa - [Cat c] -> undefined - (x:beta) -> let (fa',q) = newState () fa - fa'' = make_fa q0 [x] q g fa' - fa''' = make_fa q beta q1 g fa'' - in fa''' - -- -- * CFG rule utilities -- -{- --- | Get all the rules for a given category. -catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t] -catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c] --} +catRules :: CFRules -> Cat_ -> [CFRule_] +catRules rs c = fromMaybe [] (lookup c rs) --- | Gets the set of LHS categories of a set of rules. -lhsCats :: Eq c => [CFRule c n t] -> [c] -lhsCats = nub . map lhsCat +catSetRules :: CFRules -> [Cat_] -> [CFRule_] +catSetRules g s = concatMap (catRules g) s lhsCat :: CFRule c n t -> c lhsCat (CFRule c _ _) = c --- | Check if all the rules are right-linear, or all the rules are --- left-linear, with respect to given categories. -allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool -allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs +ruleRhs :: CFRule c n t -> [Symbol c t] +ruleRhs (CFRule _ ss _) = ss --- | Checks if a context-free rule is right-linear. -isRightLinear :: Eq c => [c] -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity - -> Bool -isRightLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (safeInit ss) - --- | Checks if a context-free rule is left-linear. -isLeftLinear :: Eq c => [c] -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity - -> Bool -isLeftLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (drop 1 ss) -- | Checks if a symbol is a non-terminal of one of the given categories. catElem :: Eq c => Symbol c t -> [c] -> Bool @@ -202,37 +135,14 @@ anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) mkName :: String -> Name mkName n = Name (IC n) [] --- --- * Relations --- - --- FIXME: these could use a more efficent data structures and algorithms. - -isRelatedTo :: Eq a => [(a,a)] -> a -> a -> Bool -isRelatedTo r x y = (x,y) `elem` r - -transitiveClosure :: Eq a => [(a,a)] -> [(a,a)] -transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r - -reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined. - -> [(a,a)] -> [(a,a)] -reflexiveClosure u r = [(x,x) | x <- u] `union` r - -symmetricSubrelation :: Eq a => [(a,a)] -> [(a,a)] -symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r] - --- | Get the equivalence classes from an equivalence relation. Since --- the relation is relexive, the set can be recoved from the relation. -equivalenceClasses :: Eq a => [(a,a)] -> [[a]] -equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r - where equivalenceClasses_ [] _ = [] - equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r - where (ys,zs) = partition (isRelatedTo r x) xs -- -- * Utilities -- +findSet :: Eq c => c -> [[c]] -> Maybe [c] +findSet x = find (x `elem`) + fix :: Eq a => (a -> a) -> a -> a fix f x = let x' = f x in if x' == x then x else fix f x' @@ -240,26 +150,12 @@ nothingOrNull :: Maybe [a] -> Bool nothingOrNull Nothing = True nothingOrNull (Just xs) = null xs -safeInit :: [a] -> [a] -safeInit [] = [] -safeInit xs = init xs - unionAll :: Eq a => [[a]] -> [a] unionAll = nub . concat whenMP :: MonadPlus m => Bool -> a -> m a whenMP b x = if b then return x else mzero --- --- * Testing stuff, can be removed --- - -c --> ss = CFRule c ss (mkName "") - -prGr g = putStrLn $ showGr g - -showGr g = unlines $ map showRule g - -showRule (CFRule c ss _) = c ++ " --> " ++ unwords (map showSym ss) +lookup' :: Eq a => a -> [(a,b)] -> b +lookup' x = fromJust . lookup x -showSym s = symbol id show s
\ No newline at end of file |
