summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <unknown>2005-09-06 07:06:42 +0000
committerbringert <unknown>2005-09-06 07:06:42 +0000
commit7bbdc172110f1b7139ecca48c3249940264da10a (patch)
treed697779c3157e58ce4a39b68e319d793cef13cd9 /src
parent1a9a2ceaea22bca18424755becd4cb40e751a22b (diff)
Finished untested function for making context-free grammars regular.
Diffstat (limited to 'src')
-rw-r--r--src/GF/Speech/TransformCFG.hs120
1 files changed, 97 insertions, 23 deletions
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index df2a787f4..10f84bd79 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/09/02 15:47:47 $
+-- > CVS $Date: 2005/09/06 08:06:42 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.14 $
+-- > CVS $Revision: 1.15 $
--
-- This module does some useful transformations on CFGs.
--
@@ -20,10 +20,11 @@ module GF.Speech.TransformCFG (makeNice, CFRule_, makeRegular) where
import GF.Infra.Ident
import GF.Formalism.CFG
-import GF.Formalism.Utilities (Symbol(..), mapSymbol)
+import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Conversion.Types
import GF.Infra.Print
+import Control.Monad
import Data.FiniteMap
import Data.List
import Data.Maybe (fromJust)
@@ -52,8 +53,7 @@ cfgToCFRules cfg = [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg]
-- | Group productions by their lhs categories
groupProds :: [CFRule_] -> CFRules
-groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs]))
- where ruleCat (CFRule c _ _) = c
+groupProds = addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
ungroupProds :: CFRules -> [CFRule_]
ungroupProds = concat . eltsFM
@@ -101,49 +101,103 @@ isDirectLeftRecursive _ = False
-- to create an over-generating regular frammar for a context-free
-- grammar
makeRegular :: [CFRule_] -> [CFRule_]
-makeRegular = undefined
+makeRegular g = concatMap trSet (mutRecCats g)
+ where trSet cs | allXLinear cs rs = rs
+ | otherwise = concatMap handleCat cs
+ where rs = concatMap (catRules g) cs
+ handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
+ ++ concatMap (makeRightLinearRules c) crs
+ -- FIXME: add more rules here, see pg 255, item 2
+ where crs = catRules rs c
+ c' = newCat c
+ makeRightLinearRules b' (CFRule c ss n) =
+ case ys of
+ [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
+ (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
+ : makeRightLinearRules (newCat b) (CFRule c zs n)
+ where (xs,ys) = break (`catElem` cs) ss
+ newCat c = c ++ "$"
+
+
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-{-
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Eq c => [CFRule c n t] -> [[c]]
-mutRecCats =
--}
-
-{-
--- | Get a map of categories to all categories which can occur in
--- the result of rewriting each category.
-allCatsTrans :: CFRules -> FinitMap
-allCatsTrans g c =
--}
+mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r
+ where r = nub [(c,c') | CFRule c ss _ <- g, Cat c' <- ss]
+ allCats = nub [c | CFRule c _ _ <- g]
-- Convert a strongly regular grammar to a finite automaton.
-- compileAutomaton ::
--
--- CFG rule utilities
+-- * CFG rule utilities
--
+-- | Get all the rules for a given category.
+catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t]
+catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c]
+
+-- | Gets the set of LHS categories of a set of rules.
+lhsCats :: Eq c => [CFRule c n t] -> [c]
+lhsCats = nub . map lhsCat
+
+lhsCat :: CFRule c n t -> c
+lhsCat (CFRule c _ _) = c
+
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
-> Bool
-isRightLinear cs (CFRule _ ss _) = all (not . catElem cs) (safeInit ss)
+isRightLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (safeInit ss)
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
-> Bool
-isLeftLinear cs (CFRule _ ss _) = all (not . catElem cs) (drop 1 ss)
+isLeftLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (drop 1 ss)
-- | Checks if a symbol is a non-terminal of one of the given categories.
-catElem :: Eq c => [c] -> Symbol c t -> Bool
-catElem cs (Tok _) = False
-catElem cs (Cat c) = c `elem` cs
+catElem :: Eq c => Symbol c t -> [c] -> Bool
+catElem s cs = symbol (`elem` cs) (const False) s
-- | Check if any of the categories used on the right-hand side
-- are in the given list of categories.
anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
-anyUsedBy cs (CFRule _ ss _) = any (catElem cs) ss
+anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
+
+mkName :: String -> Name
+mkName n = Name (IC n) []
+
+--
+-- * Relations
+--
+
+-- FIXME: these could use a more efficent data structures and algorithms.
+
+isRelatedTo :: Eq a => [(a,a)] -> a -> a -> Bool
+isRelatedTo r x y = (x,y) `elem` r
+
+transitiveClosure :: Eq a => [(a,a)] -> [(a,a)]
+transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r
+
+reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined.
+ -> [(a,a)] -> [(a,a)]
+reflexiveClosure u r = [(x,x) | x <- u] `union` r
+
+symmetricSubrelation :: Eq a => [(a,a)] -> [(a,a)]
+symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r]
+
+-- | Get the equivalence classes from an equivalence relation. Since
+-- the relation is relexive, the set can be recoved from the relation.
+equivalenceClasses :: Eq a => [(a,a)] -> [[a]]
+equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r
+ where equivalenceClasses_ [] _ = []
+ equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r
+ where (ys,zs) = partition (isRelatedTo r x) xs
--
-- * Utilities
@@ -159,3 +213,23 @@ nothingOrNull (Just xs) = null xs
safeInit :: [a] -> [a]
safeInit [] = []
safeInit xs = init xs
+
+unionAll :: Eq a => [[a]] -> [a]
+unionAll = nub . concat
+
+whenMP :: MonadPlus m => Bool -> a -> m a
+whenMP b x = if b then return x else mzero
+
+--
+-- * Testing stuff, can be removed
+--
+
+c --> ss = CFRule c ss (mkName "")
+
+prGr g = putStrLn $ showGr g
+
+showGr g = unlines $ map showRule g
+
+showRule (CFRule c ss _) = c ++ " --> " ++ unwords (map showSym ss)
+
+showSym s = symbol id show s \ No newline at end of file