diff options
| author | bringert <unknown> | 2005-09-12 14:46:44 +0000 |
|---|---|---|
| committer | bringert <unknown> | 2005-09-12 14:46:44 +0000 |
| commit | ddda900d53ee3b8fa968bc8acb49f035f9ef860c (patch) | |
| tree | b83a52f978fbeffda4ed95d936b55a91b9f6c535 /src/GF/Speech/CFGToFiniteState.hs | |
| parent | f882f97a22c9ed16c6f1735930698b8fba162351 (diff) | |
Completed unoptimized SLF generation.
Diffstat (limited to 'src/GF/Speech/CFGToFiniteState.hs')
| -rw-r--r-- | src/GF/Speech/CFGToFiniteState.hs | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs new file mode 100644 index 000000000..0f121fec5 --- /dev/null +++ b/src/GF/Speech/CFGToFiniteState.hs @@ -0,0 +1,171 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFGToFiniteState +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/09/12 15:46:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.1 $ +-- +-- Approximates CFGs with finite state networks. +----------------------------------------------------------------------------- + +module GF.Speech.CFGToFiniteState (cfgToFA) where + +import Data.List + +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..)) +import GF.Conversion.Types +import GF.Infra.Ident (Ident) +import GF.Infra.Option (Options) + +import GF.Speech.FiniteState +import GF.Speech.TransformCFG + +cfgToFA :: Ident -- ^ Grammar name + -> Options -> CGrammar -> FA () (Maybe String) +cfgToFA name opts cfg = minimize $ compileAutomaton start rgr + where start = getStartCat opts + rgr = makeRegular $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules cfg + + +-- Use the transformation algorithm from \"Regular Approximation of Context-free +-- Grammars through Approximation\", Mohri and Nederhof, 2000 +-- to create an over-generating regular frammar for a context-free +-- grammar +makeRegular :: CFRules -> CFRules +makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) + where trSet cs | allXLinear cs rs = rs + | otherwise = concatMap handleCat cs + where rs = catSetRules g cs + handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c + makeRightLinearRules b' (CFRule c ss n) = + case ys of + [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left + (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n + : makeRightLinearRules (newCat b) (CFRule c zs n) + where (xs,ys) = break (`catElem` cs) ss + newCat c = c ++ "$" + + +-- | Get the sets of mutually recursive non-terminals for a grammar. +mutRecCats :: Bool -- ^ If true, all categories will be in some set. + -- If false, only recursive categories will be included. + -> CFRules -> [[Cat_]] +mutRecCats incAll g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure r' + where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] + allCats = map fst g + r' = (if incAll then reflexiveClosure allCats else id) r + +-- Convert a strongly regular grammar to a finite automaton. +compileAutomaton :: Cat_ -- ^ Start category + -> CFRules + -> FA () (Maybe Token) +compileAutomaton start g = make_fa s [Cat start] f fa'' + where fa = newFA () + s = startState fa + (fa',f) = newState () fa + fa'' = addFinalState f fa' + ns = mutRecCats False g + -- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", + -- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. + make_fa :: State -> [Symbol Cat_ Token] -> State + -> FA () (Maybe Token) -> FA () (Maybe Token) + make_fa q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [Tok t] -> newTransition q0 q1 (Just t) fa + [Cat a] -> case findSet a ns of + -- a is recursive + Just ni -> let (fa',ss) = addStatesForCats ni fa + getState x = lookup' x ss + niRules = catSetRules g ni + (nrs,rs) = partition (ruleIsNonRecursive ni) niRules + in if all (isRightLinear ni) niRules then + -- the set Ni is right-recursive or cyclic + let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa' + fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs, + let (xs,Cat d) = (init ss,last ss)] fa'' + in newTransition q0 (getState a) Nothing fa''' + else + -- the set Ni is left-recursive + let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa' + fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa'' + in newTransition (getState a) q1 Nothing fa''' + -- a is not recursive + Nothing -> let rs = catRules g a + in foldr (\ (CFRule _ b _) -> make_fa q0 b q1) fa rs + (x:beta) -> let (fa',q) = newState () fa + in make_fa q beta q1 $ make_fa q0 [x] q fa' + addStatesForCats [] fa = (fa,[]) + addStatesForCats (c:cs) fa = let (fa',s) = newState () fa + (fa'',ss) = addStatesForCats cs fa' + in (fa'',(c,s):ss) + ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + + +noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool +noCatsInSet cs = not . any (`catElem` cs) + +-- | Check if all the rules are right-linear, or all the rules are +-- left-linear, with respect to given categories. +allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool +allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs + +-- | Checks if a context-free rule is right-linear. +isRightLinear :: Eq c => [c] -- ^ The categories to consider + -> CFRule c n t -- ^ The rule to check for right-linearity + -> Bool +isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs + +-- | Checks if a context-free rule is left-linear. +isLeftLinear :: Eq c => [c] -- ^ The categories to consider + -> CFRule c n t -- ^ The rule to check for right-linearity + -> Bool +isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs + + +-- +-- * Relations +-- + +-- FIXME: these could use a more efficent data structures and algorithms. + +type Rel a = [(a,a)] + +isRelatedTo :: Eq a => Rel a -> a -> a -> Bool +isRelatedTo r x y = (x,y) `elem` r + +transitiveClosure :: Eq a => Rel a -> Rel a +transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r + +reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined. + -> Rel a -> Rel a +reflexiveClosure u r = [(x,x) | x <- u] `union` r + +symmetricSubrelation :: Eq a => Rel a -> Rel a +symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r] + +-- | Get the equivalence classes from an equivalence relation. Since +-- the relation is relexive, the set can be recoved from the relation. +equivalenceClasses :: Eq a => Rel a -> [[a]] +equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r + where equivalenceClasses_ [] _ = [] + equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r + where (ys,zs) = partition (isRelatedTo r x) xs + +-- +-- * Utilities +-- + +foldFuns :: [a -> a] -> a -> a +foldFuns fs x = foldr ($) x fs + +safeInit :: [a] -> [a] +safeInit [] = [] +safeInit xs = init xs
\ No newline at end of file |
