diff options
Diffstat (limited to 'src/compiler/GF/Speech')
| -rw-r--r-- | src/compiler/GF/Speech/CFG.hs | 370 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/CFGToFA.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/GSL.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/JSGF.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/PGFToCFG.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/PrRegExp.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SISR.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SLF.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SRG.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SRGS_ABNF.hs | 2 | ||||
| -rw-r--r-- | src/compiler/GF/Speech/SRGS_XML.hs | 2 |
11 files changed, 10 insertions, 380 deletions
diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Speech/CFG.hs deleted file mode 100644 index 1a252139e..000000000 --- a/src/compiler/GF/Speech/CFG.hs +++ /dev/null @@ -1,370 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : GF.Speech.CFG --- --- Context-free grammar representation and manipulation. ----------------------------------------------------------------------- -module GF.Speech.CFG where - -import GF.Data.Utilities -import PGF ---import GF.Infra.Option -import GF.Data.Relation - ---import Control.Monad ---import Control.Monad.State (State, get, put, evalState) -import Data.Map (Map) -import qualified Data.Map as Map -import Data.List ---import Data.Maybe (fromMaybe) ---import Data.Monoid (mconcat) -import Data.Set (Set) -import qualified Data.Set as Set - --- --- * Types --- - -type Cat = String - -data Symbol c t = NonTerminal c | Terminal t - deriving (Eq, Ord, Show) - -type CFSymbol = Symbol Cat Token - -data CFRule = CFRule { - lhsCat :: Cat, - ruleRhs :: [CFSymbol], - ruleName :: CFTerm - } - deriving (Eq, Ord, Show) - -data CFTerm - = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments - | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id. - | CFApp CFTerm CFTerm -- ^ Application - | CFRes Int -- ^ The result of the n:th (0-based) non-terminal - | CFVar Int -- ^ A lambda-bound variable - | CFMeta CId -- ^ A metavariable - deriving (Eq, Ord, Show) - -data CFG = CFG { cfgStartCat :: Cat, - cfgExternalCats :: Set Cat, - cfgRules :: Map Cat (Set CFRule) } - deriving (Eq, Ord, Show) - --- --- * Grammar filtering --- - --- | Removes all directly and indirectly cyclic productions. --- FIXME: this may be too aggressive, only one production --- needs to be removed to break a given cycle. But which --- one should we pick? --- FIXME: Does not (yet) remove productions which are cyclic --- because of empty productions. -removeCycles :: CFG -> CFG -removeCycles = onRules f - where f rs = filter (not . isCycle) rs - where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs] - isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c - isCycle _ = False - --- | Better bottom-up filter that also removes categories which contain no finite --- strings. -bottomUpFilter :: CFG -> CFG -bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty }) - where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr - okSym g = symbol (`elem` allCats g) (const True) - --- | Removes categories which are not reachable from any external category. -topDownFilter :: CFG -> CFG -topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg - where - rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ] - uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats - keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg - --- | Merges categories with identical right-hand-sides. --- FIXME: handle probabilities -mergeIdentical :: CFG -> CFG -mergeIdentical g = onRules (map subst) g - where - -- maps categories to their replacement - m = Map.fromList [(y,concat (intersperse "+" xs)) - | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs] - -- build data to compare for each category: a set of name,rhs pairs - rulesKey = Set.map (\ (CFRule _ r n) -> (n,r)) - subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n - substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m - --- | Keeps only the start category as an external category. -purgeExternalCats :: CFG -> CFG -purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) } - --- --- * Removing left recursion --- - --- The LC_LR algorithm from --- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf -removeLeftRecursion :: CFG -> CFG -removeLeftRecursion gr - = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] } - where - scheme1 = [CFRule a [x,NonTerminal a_x] n' | - a <- retainedLeftRecursive, - x <- properLeftCornersOf a, - not (isLeftRecursive x), - let a_x = mkCat (NonTerminal a) x, - -- this is an extension of LC_LR to avoid generating - -- A-X categories for which there are no productions: - a_x `Set.member` newCats, - let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0)) - (\_ -> CFRes 0) x] - scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' | - a <- retainedLeftRecursive, - b@(NonTerminal b') <- properLeftCornersOf a, - isLeftRecursive b, - CFRule _ (x:beta) n <- catRules gr b', - let a_x = mkCat (NonTerminal a) x, - let a_b = mkCat (NonTerminal a) b, - let i = length $ filterCats beta, - let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n))) - (\_ -> CFApp (CFRes i) n) x] - scheme3 = [CFRule a_x beta n' | - a <- retainedLeftRecursive, - x <- properLeftCornersOf a, - CFRule _ (x':beta) n <- catRules gr a, - x == x', - let a_x = mkCat (NonTerminal a) x, - let n' = symbol (\_ -> CFAbs 1 (shiftTerm n)) - (\_ -> n) x] - scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats - - newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3)) - - shiftTerm :: CFTerm -> CFTerm - shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts) - shiftTerm (CFRes 0) = CFVar 1 - shiftTerm (CFRes n) = CFRes (n-1) - shiftTerm t = t - -- note: the rest don't occur in the original grammar - - cats = allCats gr - rules = allRules gr - - directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr] - leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner - properLeftCorner = transitiveClosure directLeftCorner - properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal - isProperLeftCornerOf = flip (isRelatedTo properLeftCorner) - - leftRecursive = reflexiveElements properLeftCorner - isLeftRecursive = (`Set.member` leftRecursive) - - retained = cfgStartCat gr `Set.insert` - Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr), - NonTerminal a <- ruleRhs r] - isRetained = (`Set.member` retained) - - retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained - - mkCat :: CFSymbol -> CFSymbol -> Cat - mkCat x y = showSymbol x ++ "-" ++ showSymbol y - where showSymbol = symbol id show - --- | Get the sets of mutually recursive non-terminals for a grammar. -mutRecCats :: Bool -- ^ If true, all categories will be in some set. - -- If false, only recursive categories will be included. - -> CFG -> [Set Cat] -mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r - where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss] - refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation - --- --- * Approximate context-free grammars with regular grammars. --- - -makeSimpleRegular :: CFG -> CFG -makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles - --- Use the transformation algorithm from \"Regular Approximation of Context-free --- Grammars through Approximation\", Mohri and Nederhof, 2000 --- to create an over-generating regular grammar for a context-free --- grammar -makeRegular :: CFG -> CFG -makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) } - where trSet cs | allXLinear cs rs = rs - | otherwise = concatMap handleCat (Set.toList cs) - where rs = catSetRules g cs - handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e - ++ concatMap (makeRightLinearRules c) (catRules g c) - where c' = newCat c - makeRightLinearRules b' (CFRule c ss n) = - case ys of - [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left - (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n - ++ makeRightLinearRules (newCat b) (CFRule c zs n) - where (xs,ys) = break (`catElem` cs) ss - -- don't add rules on the form A -> A - newRule c rhs n | rhs == [NonTerminal c] = [] - | otherwise = [CFRule c rhs n] - newCat c = c ++ "$" - --- --- * CFG Utilities --- - -mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG -mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs } - -groupProds :: [CFRule] -> Map Cat (Set CFRule) -groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) - --- | Gets all rules in a CFG. -allRules :: CFG -> [CFRule] -allRules = concat . map Set.toList . Map.elems . cfgRules - --- | Gets all rules in a CFG, grouped by their LHS categories. -allRulesGrouped :: CFG -> [(Cat,[CFRule])] -allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules - --- | Gets all categories which have rules. -allCats :: CFG -> [Cat] -allCats = Map.keys . cfgRules - --- | Gets all categories which have rules or occur in a RHS. -allCats' :: CFG -> [Cat] -allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union` - Set.fromList [c | rs <- Map.elems (cfgRules cfg), - r <- Set.toList rs, - NonTerminal c <- ruleRhs r]) - --- | Gets all rules for the given category. -catRules :: CFG -> Cat -> [CFRule] -catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr) - --- | Gets all rules for categories in the given set. -catSetRules :: CFG -> Set Cat -> [CFRule] -catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr - -mapCFGCats :: (Cat -> Cat) -> CFG -> CFG -mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg)) - (Set.map f (cfgExternalCats cfg)) - [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg] - -onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG -onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) } - -onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG -onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg } - --- | Clean up CFG after rules have been removed. -cleanCFG :: CFG -> CFG -cleanCFG = onCFG (Map.filter (not . Set.null)) - --- | Combine two CFGs. -unionCFG :: CFG -> CFG -> CFG -unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x - -filterCFG :: (CFRule -> Bool) -> CFG -> CFG -filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p)) - -filterCFGCats :: (Cat -> Bool) -> CFG -> CFG -filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c)) - -countCats :: CFG -> Int -countCats = Map.size . cfgRules . cleanCFG - -countRules :: CFG -> Int -countRules = length . allRules - -prCFG :: CFG -> String -prCFG = prProductions . map prRule . allRules - where - prRule r = (lhsCat r, unwords (map prSym (ruleRhs r))) - prSym = symbol id (\t -> "\""++ t ++"\"") - -prProductions :: [(Cat,String)] -> String -prProductions prods = - unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods] - where - maxLHSWidth = maximum $ 0:(map (length . fst) prods) - rpad n s = s ++ replicate (n - length s) ' ' - -prCFTerm :: CFTerm -> String -prCFTerm = pr 0 - where - pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")") - pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t) - pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")") - pr _ (CFRes i) = "$" ++ show i - pr _ (CFVar i) = "x" ++ show i - pr _ (CFMeta c) = "?" ++ showCId c - paren 0 x = x - paren 1 x = "(" ++ x ++ ")" - --- --- * CFRule Utilities --- - -ruleFun :: CFRule -> CId -ruleFun (CFRule _ _ t) = f t - where f (CFObj n _) = n - f (CFApp _ x) = f x - f (CFAbs _ x) = f x - f _ = mkCId "" - --- | Check if any of the categories used on the right-hand side --- are in the given list of categories. -anyUsedBy :: [Cat] -> CFRule -> Bool -anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) - -mkCFTerm :: String -> CFTerm -mkCFTerm n = CFObj (mkCId n) [] - -ruleIsNonRecursive :: Set Cat -> CFRule -> Bool -ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs - --- | Check if all the rules are right-linear, or all the rules are --- left-linear, with respect to given categories. -allXLinear :: Set Cat -> [CFRule] -> Bool -allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs - --- | Checks if a context-free rule is right-linear. -isRightLinear :: Set Cat -- ^ The categories to consider - -> CFRule -- ^ The rule to check for right-linearity - -> Bool -isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs - --- | Checks if a context-free rule is left-linear. -isLeftLinear :: Set Cat -- ^ The categories to consider - -> CFRule -- ^ The rule to check for left-linearity - -> Bool -isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs - - --- --- * Symbol utilities --- - -symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a -symbol fc ft (NonTerminal cat) = fc cat -symbol fc ft (Terminal tok) = ft tok - -mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t' -mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft) - -filterCats :: [Symbol c t] -> [c] -filterCats syms = [ cat | NonTerminal cat <- syms ] - -filterToks :: [Symbol c t] -> [t] -filterToks syms = [ tok | Terminal tok <- syms ] - --- | Checks if a symbol is a non-terminal of one of the given categories. -catElem :: Ord c => Symbol c t -> Set c -> Bool -catElem s cs = symbol (`Set.member` cs) (const False) s - -noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool -noCatsInSet cs = not . any (`catElem` cs) diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs index 4f5e3621e..330c763e5 100644 --- a/src/compiler/GF/Speech/CFGToFA.hs +++ b/src/compiler/GF/Speech/CFGToFA.hs @@ -17,7 +17,7 @@ import qualified Data.Set as Set --import PGF.CId import PGF.Data import GF.Data.Utilities -import GF.Speech.CFG +import GF.Grammar.CFG --import GF.Speech.PGFToCFG --import GF.Infra.Ident (Ident) diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs index 3557ff21f..3eb4c20a7 100644 --- a/src/compiler/GF/Speech/GSL.hs +++ b/src/compiler/GF/Speech/GSL.hs @@ -9,7 +9,7 @@ module GF.Speech.GSL (gslPrinter) where --import GF.Data.Utilities -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.SRG import GF.Speech.RegExp import GF.Infra.Option diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs index 921108e11..6a4935a7f 100644 --- a/src/compiler/GF/Speech/JSGF.hs +++ b/src/compiler/GF/Speech/JSGF.hs @@ -14,7 +14,7 @@ module GF.Speech.JSGF (jsgfPrinter) where --import GF.Data.Utilities import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.RegExp import GF.Speech.SISR import GF.Speech.SRG diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index 5c13ca471..d70a74fe7 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -10,7 +10,7 @@ import PGF(showCId) import PGF.Data as PGF import PGF.Macros --import GF.Infra.Ident -import GF.Speech.CFG hiding (Symbol) +import GF.Grammar.CFG hiding (Symbol) import Data.Array.IArray as Array --import Data.List diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs index 0fc35d541..2829839f3 100644 --- a/src/compiler/GF/Speech/PrRegExp.hs +++ b/src/compiler/GF/Speech/PrRegExp.hs @@ -7,7 +7,7 @@ module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.CFGToFA import GF.Speech.PGFToCFG import GF.Speech.RegExp diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs index 8417fb203..5f9161547 100644 --- a/src/compiler/GF/Speech/SISR.hs +++ b/src/compiler/GF/Speech/SISR.hs @@ -13,7 +13,7 @@ import Data.List --import GF.Data.Utilities --import GF.Infra.Ident import GF.Infra.Option (SISRFormat(..)) -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.SRG (SRGNT) import PGF(showCId) diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs index 7785f2382..d93d1b362 100644 --- a/src/compiler/GF/Speech/SLF.hs +++ b/src/compiler/GF/Speech/SLF.hs @@ -13,7 +13,7 @@ module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter, slfSubPrinter,slfSubGraphvizPrinter) where import GF.Data.Utilities -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.FiniteState --import GF.Speech.CFG import GF.Speech.CFGToFA diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs index 4e5508de0..d5bedc797 100644 --- a/src/compiler/GF/Speech/SRG.hs +++ b/src/compiler/GF/Speech/SRG.hs @@ -21,7 +21,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol import GF.Data.Utilities --import GF.Infra.Ident import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.PGFToCFG --import GF.Data.Relation --import GF.Speech.FiniteState diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs index 5d07762bb..a359b2c38 100644 --- a/src/compiler/GF/Speech/SRGS_ABNF.hs +++ b/src/compiler/GF/Speech/SRGS_ABNF.hs @@ -21,7 +21,7 @@ module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where --import GF.Data.Utilities import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.SISR as SISR import GF.Speech.SRG import GF.Speech.RegExp diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs index fe973c2e6..397bfb739 100644 --- a/src/compiler/GF/Speech/SRGS_XML.hs +++ b/src/compiler/GF/Speech/SRGS_XML.hs @@ -9,7 +9,7 @@ module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where --import GF.Data.Utilities import GF.Data.XML import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.RegExp import GF.Speech.SISR as SISR import GF.Speech.SRG |
