summaryrefslogtreecommitdiff
path: root/src/GF/Speech/TransformCFG.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
commitb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch)
tree0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/Speech/TransformCFG.hs
parentfe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff)
removed src for 2.9
Diffstat (limited to 'src/GF/Speech/TransformCFG.hs')
-rw-r--r--src/GF/Speech/TransformCFG.hs378
1 files changed, 0 insertions, 378 deletions
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
deleted file mode 100644
index 3d7ebd809..000000000
--- a/src/GF/Speech/TransformCFG.hs
+++ /dev/null
@@ -1,378 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : TransformCFG
--- Maintainer : BB
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/11/01 20:09:04 $
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.24 $
---
--- This module does some useful transformations on CFGs.
---
--- peb thinks: most of this module should be moved to GF.Conversion...
------------------------------------------------------------------------------
-
-module GF.Speech.TransformCFG where
-
-import GF.Canon.CanonToGFCC (canon2gfcc)
-import qualified GF.GFCC.CId as C
-import GF.GFCC.Macros (lookType,catSkeleton)
-import GF.GFCC.DataGFCC (GFCC)
-import GF.Conversion.Types
-import GF.CF.PPrCF (prCFCat)
-import GF.Data.Utilities
-import GF.Formalism.CFG
-import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
- NameProfile(..), Profile(..), name2fun, forestName)
-import GF.Infra.Ident
-import GF.Infra.Option
-import GF.Infra.Print
-import GF.Speech.Relation
-import GF.Compile.ShellState (StateGrammar, stateCFG, stateGrammarST, startCatStateOpts, stateOptions)
-
-import Control.Monad
-import Control.Monad.State (State, get, put, evalState)
-import Data.Map (Map)
-import qualified Data.Map as Map
-import Data.List
-import Data.Maybe (fromMaybe)
-import Data.Monoid (mconcat)
-import Data.Set (Set)
-import qualified Data.Set as Set
-
--- not very nice to replace the structured CFCat type with a simple string
-type CFRule_ = CFRule Cat_ CFTerm Token
-
-data CFTerm
- = CFObj Fun [CFTerm] -- ^ an abstract syntax function with arguments
- | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
- | CFApp CFTerm CFTerm -- ^ Application
- | CFRes Int -- ^ The result of the n:th (0-based) non-terminal
- | CFVar Int -- ^ A lambda-bound variable
- | CFMeta String -- ^ A metavariable
- deriving (Eq,Ord,Show)
-
-type Cat_ = String
-type CFSymbol_ = Symbol Cat_ Token
-
-type CFRules = Map Cat_ (Set CFRule_)
-
-
-cfgToCFRules :: StateGrammar -> CFRules
-cfgToCFRules s =
- groupProds [CFRule (catToString c) (map symb r) (nameToTerm n)
- | CFRule c r n <- cfg]
- where cfg = stateCFG s
- symb = mapSymbol catToString id
- catToString = prt
- gfcc = stateGFCC s
- nameToTerm (Name IW [Unify [n]]) = CFRes n
- nameToTerm (Name f@(IC c) prs) =
- CFObj f (zipWith profileToTerm args prs)
- where (args,_) = catSkeleton $ lookType gfcc (C.CId c)
- nameToTerm n = error $ "cfgToCFRules.nameToTerm" ++ show n
- profileToTerm (C.CId t) (Unify []) = CFMeta t
- profileToTerm _ (Unify xs) = CFRes (last xs) -- FIXME: unify
- profileToTerm (C.CId t) (Constant f) = maybe (CFMeta t) (\x -> CFObj x []) (forestName f)
-
-getStartCat :: Options -> StateGrammar -> String
-getStartCat opts sgr = prCFCat (startCatStateOpts opts' sgr)
- where opts' = addOptions opts (stateOptions sgr)
-
-getStartCatCF :: Options -> StateGrammar -> String
-getStartCatCF opts sgr = getStartCat opts sgr ++ "{}.s"
-
-stateGFCC :: StateGrammar -> GFCC
-stateGFCC = canon2gfcc noOptions . stateGrammarST
-
--- * Grammar filtering
-
--- | Removes all directly and indirectly cyclic productions.
--- FIXME: this may be too aggressive, only one production
--- needs to be removed to break a given cycle. But which
--- one should we pick?
--- FIXME: Does not (yet) remove productions which are cyclic
--- because of empty productions.
-removeCycles :: CFRules -> CFRules
-removeCycles = groupProds . f . allRules
- where f rs = filter (not . isCycle) rs
- where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [Cat c'] _ <- rs]
- isCycle (CFRule c [Cat c'] _) = isRelatedTo alias c' c
- isCycle _ = False
-
--- | Better bottom-up filter that also removes categories which contain no finite
--- strings.
-bottomUpFilter :: CFRules -> CFRules
-bottomUpFilter gr = fix grow Map.empty
- where grow g = g `unionCFRules` filterCFRules (all (okSym g) . ruleRhs) gr
- okSym g = symbol (`elem` allCats g) (const True)
-
--- | Removes categories which are not reachable from the start category.
-topDownFilter :: Cat_ -> CFRules -> CFRules
-topDownFilter start rules = filterCFRulesCats (isRelatedTo uses start) rules
- where
- rhsCats = [ (lhsCat r, c') | r <- allRules rules, c' <- filterCats (ruleRhs r) ]
- uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats
-
--- | Merges categories with identical right-hand-sides.
--- FIXME: handle probabilities
-mergeIdentical :: CFRules -> CFRules
-mergeIdentical g = groupProds $ map subst $ allRules g
- where
- -- maps categories to their replacement
- m = Map.fromList [(y,concat (intersperse "+" xs))
- | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList g], y <- xs]
- -- build data to compare for each category: a set of name,rhs pairs
- rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
- subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
- substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
-
--- * Removing left recursion
-
--- The LC_LR algorithm from
--- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
-removeLeftRecursion :: Cat_ -> CFRules -> CFRules
-removeLeftRecursion start gr
- = groupProds $ concat [scheme1, scheme2, scheme3, scheme4]
- where
- scheme1 = [CFRule a [x,Cat a_x] n' |
- a <- retainedLeftRecursive,
- x <- properLeftCornersOf a,
- not (isLeftRecursive x),
- let a_x = mkCat (Cat a) x,
- -- this is an extension of LC_LR to avoid generating
- -- A-X categories for which there are no productions:
- a_x `Set.member` newCats,
- let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
- (\_ -> CFRes 0) x]
- scheme2 = [CFRule a_x (beta++[Cat a_b]) n' |
- a <- retainedLeftRecursive,
- b@(Cat b') <- properLeftCornersOf a,
- isLeftRecursive b,
- CFRule _ (x:beta) n <- catRules gr b',
- let a_x = mkCat (Cat a) x,
- let a_b = mkCat (Cat a) b,
- let i = length $ filterCats beta,
- let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
- (\_ -> CFApp (CFRes i) n) x]
- scheme3 = [CFRule a_x beta n' |
- a <- retainedLeftRecursive,
- x <- properLeftCornersOf a,
- CFRule _ (x':beta) n <- catRules gr a,
- x == x',
- let a_x = mkCat (Cat a) x,
- let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
- (\_ -> n) x]
- scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats
-
- newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
-
- shiftTerm :: CFTerm -> CFTerm
- shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
- shiftTerm (CFRes 0) = CFVar 1
- shiftTerm (CFRes n) = CFRes (n-1)
- shiftTerm t = t
- -- note: the rest don't occur in the original grammar
-
- cats = allCats gr
- rules = allRules gr
-
- directLeftCorner = mkRel [(Cat c,t) | CFRule c (t:_) _ <- allRules gr]
- leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner
- properLeftCorner = transitiveClosure directLeftCorner
- properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat
- isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
-
- leftRecursive = reflexiveElements properLeftCorner
- isLeftRecursive = (`Set.member` leftRecursive)
-
- retained = start `Set.insert`
- Set.fromList [a | r <- allRules (filterCFRulesCats (not . isLeftRecursive . Cat) gr),
- Cat a <- ruleRhs r]
- isRetained = (`Set.member` retained)
-
- retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained
-
-mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_
-mkCat x y = showSymbol x ++ "-" ++ showSymbol y
- where showSymbol = symbol id show
-
-{-
-
--- Paull's algorithm, see
--- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
-removeLeftRecursion :: Cat_ -> CFRules -> CFRules
-removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs
- where
- handleProds (c, r) = (c, concatMap handleProd r)
- handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
- -- FIXME: for non-recursive categories, this changes
- -- the grammar unneccessarily, maybe we can use mutRecCats
- -- to make this less invasive
- -- FIXME: this will give multiple rules with the same name,
- -- which may mess up the probabilities.
- [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
- handleProd r = [r]
-
-removeDirectLeftRecursions :: CFRules -> CFRules
-removeDirectLeftRecursions = concat . flip evalState 0 . mapM removeDirectLeftRecursion
-
-removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
- -> State Int CFRules
-removeDirectLeftRecursion (a,rs)
- | null dr = return [(a,rs)]
- | otherwise =
- do
- a' <- fresh a
- let as = maybeEndWithA' nr
- is = [CFRule a' (tail r) n | CFRule _ r n <- dr]
- a's = maybeEndWithA' is
- -- the not null constraint here avoids creating new
- -- left recursive (cyclic) rules.
- maybeEndWithA' xs = xs ++ [CFRule c (r++[Cat a']) n | CFRule c r n <- xs,
- not (null r)]
- return [(a, as), (a', a's)]
- where
- (dr,nr) = partition isDirectLeftRecursive rs
- fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n }
-
-isDirectLeftRecursive :: CFRule_ -> Bool
-isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
-isDirectLeftRecursive _ = False
-
--}
-
--- | Get the sets of mutually recursive non-terminals for a grammar.
-mutRecCats :: Bool -- ^ If true, all categories will be in some set.
- -- If false, only recursive categories will be included.
- -> CFRules -> [Set Cat_]
-mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
- where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, Cat c' <- ss]
- refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
-
---
--- * Approximate context-free grammars with regular grammars.
---
-
--- Use the transformation algorithm from \"Regular Approximation of Context-free
--- Grammars through Approximation\", Mohri and Nederhof, 2000
--- to create an over-generating regular frammar for a context-free
--- grammar
-makeRegular :: CFRules -> CFRules
-makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
- where trSet cs | allXLinear cs rs = rs
- | otherwise = concatMap handleCat csl
- where csl = Set.toList cs
- rs = catSetRules g cs
- handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
- ++ concatMap (makeRightLinearRules c) (catRules g c)
- where c' = newCat c
- makeRightLinearRules b' (CFRule c ss n) =
- case ys of
- [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
- (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
- ++ makeRightLinearRules (newCat b) (CFRule c zs n)
- where (xs,ys) = break (`catElem` cs) ss
- -- don't add rules on the form A -> A
- newRule c rhs n | rhs == [Cat c] = []
- | otherwise = [CFRule c rhs n]
- newCat c = c ++ "$"
-
---
--- * CFG rule utilities
---
-
--- | Group productions by their lhs categories
-groupProds :: [CFRule_] -> CFRules
-groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
-
-allRules :: CFRules -> [CFRule_]
-allRules = concat . map Set.toList . Map.elems
-
-allRulesGrouped :: CFRules -> [(Cat_,[CFRule_])]
-allRulesGrouped = Map.toList . Map.map Set.toList
-
-allCats :: CFRules -> [Cat_]
-allCats = Map.keys
-
-catRules :: CFRules -> Cat_ -> [CFRule_]
-catRules rs c = Set.toList $ Map.findWithDefault Set.empty c rs
-
-catSetRules :: CFRules -> Set Cat_ -> [CFRule_]
-catSetRules g cs = allRules $ Map.filterWithKey (\c _ -> c `Set.member` cs) g
-
-cleanCFRules :: CFRules -> CFRules
-cleanCFRules = Map.filter (not . Set.null)
-
-unionCFRules :: CFRules -> CFRules -> CFRules
-unionCFRules = Map.unionWith Set.union
-
-filterCFRules :: (CFRule_ -> Bool) -> CFRules -> CFRules
-filterCFRules p = cleanCFRules . Map.map (Set.filter p)
-
-filterCFRulesCats :: (Cat_ -> Bool) -> CFRules -> CFRules
-filterCFRulesCats p = Map.filterWithKey (\c _ -> p c)
-
-countCats :: CFRules -> Int
-countCats = Map.size . cleanCFRules
-
-countRules :: CFRules -> Int
-countRules = length . allRules
-
-lhsCat :: CFRule c n t -> c
-lhsCat (CFRule c _ _) = c
-
-ruleRhs :: CFRule c n t -> [Symbol c t]
-ruleRhs (CFRule _ ss _) = ss
-
-ruleFun :: CFRule_ -> Fun
-ruleFun (CFRule _ _ t) = f t
- where f (CFObj n _) = n
- f (CFApp _ x) = f x
- f (CFAbs _ x) = f x
- f _ = IC ""
-
--- | Checks if a symbol is a non-terminal of one of the given categories.
-catElem :: Ord c => Symbol c t -> Set c -> Bool
-catElem s cs = symbol (`Set.member` cs) (const False) s
-
--- | Check if any of the categories used on the right-hand side
--- are in the given list of categories.
-anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
-anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
-
-mkCFTerm :: String -> CFTerm
-mkCFTerm n = CFObj (IC n) []
-
-ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
-ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
-
-noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
-noCatsInSet cs = not . any (`catElem` cs)
-
--- | Check if all the rules are right-linear, or all the rules are
--- left-linear, with respect to given categories.
-allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
-allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-
--- | Checks if a context-free rule is right-linear.
-isRightLinear :: Ord c =>
- Set c -- ^ The categories to consider
- -> CFRule c n t -- ^ The rule to check for right-linearity
- -> Bool
-isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-
--- | Checks if a context-free rule is left-linear.
-isLeftLinear :: Ord c =>
- Set c -- ^ The categories to consider
- -> CFRule c n t -- ^ The rule to check for left-linearity
- -> Bool
-isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
-
-prCFRules :: CFRules -> String
-prCFRules = unlines . map prRule . allRules
- where
- prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
- prSym = symbol id (\t -> "\""++ t ++"\"")