summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2007-06-25 13:38:40 +0000
committerbringert <bringert@cs.chalmers.se>2007-06-25 13:38:40 +0000
commit2b63a895690e6f4eb57c0a1b95692b640b9d9e2c (patch)
tree8006e803c44c86ba70473a7820fbb296345f8fa4
parentf081dc0d6bb73d5439420569c352e88b0f096a7f (diff)
Some refactorings needed for recursion removal.
-rw-r--r--src/GF/Speech/CFGToFiniteState.hs50
-rw-r--r--src/GF/Speech/RegExp.hs20
-rw-r--r--src/GF/Speech/SRG.hs6
-rw-r--r--src/GF/Speech/TransformCFG.hs56
4 files changed, 73 insertions, 59 deletions
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs
index efc4c562e..a8eb4e1de 100644
--- a/src/GF/Speech/CFGToFiniteState.hs
+++ b/src/GF/Speech/CFGToFiniteState.hs
@@ -68,33 +68,6 @@ makeSimpleRegular opts s = makeRegular $ cfgToCFRules s
preprocess = fix (topDownFilter start . bottomUpFilter)
. removeCycles
---
--- * Approximate context-free grammars with regular grammars.
---
-
--- Use the transformation algorithm from \"Regular Approximation of Context-free
--- Grammars through Approximation\", Mohri and Nederhof, 2000
--- to create an over-generating regular frammar for a context-free
--- grammar
-makeRegular :: CFRules -> CFRules
-makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
- where trSet cs | allXLinear cs rs = rs
- | otherwise = concatMap handleCat csl
- where csl = Set.toList cs
- rs = catSetRules g cs
- handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
- ++ concatMap (makeRightLinearRules c) (catRules g c)
- where c' = newCat c
- makeRightLinearRules b' (CFRule c ss n) =
- case ys of
- [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
- (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
- ++ makeRightLinearRules (newCat b) (CFRule c zs n)
- where (xs,ys) = break (`catElem` cs) ss
- -- don't add rules on the form A -> A
- newRule c rhs n | rhs == [Cat c] = []
- | otherwise = [CFRule c rhs n]
- newCat c = c ++ "$"
--
-- * Compile strongly regular grammars to NFAs
@@ -300,26 +273,3 @@ addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns))
-
-ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
-ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
-
-noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
-noCatsInSet cs = not . any (`catElem` cs)
-
--- | Check if all the rules are right-linear, or all the rules are
--- left-linear, with respect to given categories.
-allXLinear :: Set Cat_ -> [CFRule_] -> Bool
-allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-
--- | Checks if a context-free rule is right-linear.
-isRightLinear :: Set Cat_ -- ^ The categories to consider
- -> CFRule_ -- ^ The rule to check for right-linearity
- -> Bool
-isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-
--- | Checks if a context-free rule is left-linear.
-isLeftLinear :: Set Cat_ -- ^ The categories to consider
- -> CFRule_ -- ^ The rule to check for right-linearity
- -> Bool
-isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs
index 120a43d26..1842780ee 100644
--- a/src/GF/Speech/RegExp.hs
+++ b/src/GF/Speech/RegExp.hs
@@ -3,7 +3,8 @@ module GF.Speech.RegExp (RE(..),
isEpsilon, isNull,
unionRE, concatRE, seqRE,
repeatRE, minimizeRE,
- mapRE, joinRE,
+ mapRE, mapRE', joinRE,
+ symbolsRE,
dfa2re, prRE) where
import Data.List
@@ -107,10 +108,13 @@ firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b
-mapRE f (REConcat xs) = REConcat (map (mapRE f) xs)
-mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
-mapRE f (RERepeat xs) = RERepeat (mapRE f xs)
-mapRE f (RESymbol s) = RESymbol (f s)
+mapRE f = mapRE' (RESymbol . f)
+
+mapRE' :: (a -> RE b) -> RE a -> RE b
+mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
+mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
+mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
+mapRE' f (RESymbol s) = f s
joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
@@ -118,6 +122,12 @@ joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss
+symbolsRE :: RE a -> [a]
+symbolsRE (REConcat xs) = concatMap symbolsRE xs
+symbolsRE (REUnion xs) = concatMap symbolsRE xs
+symbolsRE (RERepeat x) = symbolsRE x
+symbolsRE (RESymbol x) = [x]
+
-- Debugging
prRE :: RE String -> String
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index 8370f130a..43969ab0d 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -49,7 +49,7 @@ import Data.Set (Set)
import qualified Data.Set as Set
data SRG = SRG { grammarName :: String -- ^ grammar name
- , startCat :: String -- ^ start category name
+ , startCat :: SRGCat -- ^ start category name
, origStartCat :: String -- ^ original start category name
, grammarLanguage :: Maybe String -- ^ The language for which the grammar
-- is intended, e.g. en-UK
@@ -61,7 +61,7 @@ data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original c
-- and productions
deriving (Eq,Show)
--- | maybe a probability, a rule name and a list of symbols
+-- | maybe a probability, a rule name and an EBNF right-hand side
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
deriving (Eq,Show)
@@ -163,6 +163,8 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
srgItem :: [[Symbol SRGNT Token]] -> SRGItem
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
+-- non-optimizing version:
+--srgItem = unionRE . map seqRE
-- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index a94cf3817..c640caa0f 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -257,6 +257,33 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
allCats = map fst g
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
+--
+-- * Approximate context-free grammars with regular grammars.
+--
+
+-- Use the transformation algorithm from \"Regular Approximation of Context-free
+-- Grammars through Approximation\", Mohri and Nederhof, 2000
+-- to create an over-generating regular frammar for a context-free
+-- grammar
+makeRegular :: CFRules -> CFRules
+makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
+ where trSet cs | allXLinear cs rs = rs
+ | otherwise = concatMap handleCat csl
+ where csl = Set.toList cs
+ rs = catSetRules g cs
+ handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
+ ++ concatMap (makeRightLinearRules c) (catRules g c)
+ where c' = newCat c
+ makeRightLinearRules b' (CFRule c ss n) =
+ case ys of
+ [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
+ (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
+ ++ makeRightLinearRules (newCat b) (CFRule c zs n)
+ where (xs,ys) = break (`catElem` cs) ss
+ -- don't add rules on the form A -> A
+ newRule c rhs n | rhs == [Cat c] = []
+ | otherwise = [CFRule c rhs n]
+ newCat c = c ++ "$"
--
-- * CFG rule utilities
@@ -292,7 +319,7 @@ ruleFun (CFRule _ _ t) = f t
f _ = IC ""
-- | Checks if a symbol is a non-terminal of one of the given categories.
-catElem :: Symbol Cat_ t -> Set Cat_ -> Bool
+catElem :: Ord c => Symbol c t -> Set c -> Bool
catElem s cs = symbol (`Set.member` cs) (const False) s
-- | Check if any of the categories used on the right-hand side
@@ -301,4 +328,29 @@ anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm
-mkCFTerm n = CFObj (IC n) [] \ No newline at end of file
+mkCFTerm n = CFObj (IC n) []
+
+ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
+ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+
+noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
+noCatsInSet cs = not . any (`catElem` cs)
+
+-- | Check if all the rules are right-linear, or all the rules are
+-- left-linear, with respect to given categories.
+allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
+allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
+
+-- | Checks if a context-free rule is right-linear.
+isRightLinear :: Ord c =>
+ Set c -- ^ The categories to consider
+ -> CFRule c n t -- ^ The rule to check for right-linearity
+ -> Bool
+isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
+
+-- | Checks if a context-free rule is left-linear.
+isLeftLinear :: Ord c =>
+ Set c -- ^ The categories to consider
+ -> CFRule c n t -- ^ The rule to check for left-linearity
+ -> Bool
+isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs