summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Speech
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler/GF/Speech')
-rw-r--r--src/compiler/GF/Speech/CFG.hs370
-rw-r--r--src/compiler/GF/Speech/CFGToFA.hs2
-rw-r--r--src/compiler/GF/Speech/GSL.hs2
-rw-r--r--src/compiler/GF/Speech/JSGF.hs2
-rw-r--r--src/compiler/GF/Speech/PGFToCFG.hs2
-rw-r--r--src/compiler/GF/Speech/PrRegExp.hs2
-rw-r--r--src/compiler/GF/Speech/SISR.hs2
-rw-r--r--src/compiler/GF/Speech/SLF.hs2
-rw-r--r--src/compiler/GF/Speech/SRG.hs2
-rw-r--r--src/compiler/GF/Speech/SRGS_ABNF.hs2
-rw-r--r--src/compiler/GF/Speech/SRGS_XML.hs2
11 files changed, 10 insertions, 380 deletions
diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Speech/CFG.hs
deleted file mode 100644
index 1a252139e..000000000
--- a/src/compiler/GF/Speech/CFG.hs
+++ /dev/null
@@ -1,370 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : GF.Speech.CFG
---
--- Context-free grammar representation and manipulation.
-----------------------------------------------------------------------
-module GF.Speech.CFG where
-
-import GF.Data.Utilities
-import PGF
---import GF.Infra.Option
-import GF.Data.Relation
-
---import Control.Monad
---import Control.Monad.State (State, get, put, evalState)
-import Data.Map (Map)
-import qualified Data.Map as Map
-import Data.List
---import Data.Maybe (fromMaybe)
---import Data.Monoid (mconcat)
-import Data.Set (Set)
-import qualified Data.Set as Set
-
---
--- * Types
---
-
-type Cat = String
-
-data Symbol c t = NonTerminal c | Terminal t
- deriving (Eq, Ord, Show)
-
-type CFSymbol = Symbol Cat Token
-
-data CFRule = CFRule {
- lhsCat :: Cat,
- ruleRhs :: [CFSymbol],
- ruleName :: CFTerm
- }
- deriving (Eq, Ord, Show)
-
-data CFTerm
- = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
- | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
- | CFApp CFTerm CFTerm -- ^ Application
- | CFRes Int -- ^ The result of the n:th (0-based) non-terminal
- | CFVar Int -- ^ A lambda-bound variable
- | CFMeta CId -- ^ A metavariable
- deriving (Eq, Ord, Show)
-
-data CFG = CFG { cfgStartCat :: Cat,
- cfgExternalCats :: Set Cat,
- cfgRules :: Map Cat (Set CFRule) }
- deriving (Eq, Ord, Show)
-
---
--- * Grammar filtering
---
-
--- | Removes all directly and indirectly cyclic productions.
--- FIXME: this may be too aggressive, only one production
--- needs to be removed to break a given cycle. But which
--- one should we pick?
--- FIXME: Does not (yet) remove productions which are cyclic
--- because of empty productions.
-removeCycles :: CFG -> CFG
-removeCycles = onRules f
- where f rs = filter (not . isCycle) rs
- where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
- isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
- isCycle _ = False
-
--- | Better bottom-up filter that also removes categories which contain no finite
--- strings.
-bottomUpFilter :: CFG -> CFG
-bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
- where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
- okSym g = symbol (`elem` allCats g) (const True)
-
--- | Removes categories which are not reachable from any external category.
-topDownFilter :: CFG -> CFG
-topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg
- where
- rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
- uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
- keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg
-
--- | Merges categories with identical right-hand-sides.
--- FIXME: handle probabilities
-mergeIdentical :: CFG -> CFG
-mergeIdentical g = onRules (map subst) g
- where
- -- maps categories to their replacement
- m = Map.fromList [(y,concat (intersperse "+" xs))
- | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
- -- build data to compare for each category: a set of name,rhs pairs
- rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
- subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
- substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
-
--- | Keeps only the start category as an external category.
-purgeExternalCats :: CFG -> CFG
-purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) }
-
---
--- * Removing left recursion
---
-
--- The LC_LR algorithm from
--- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
-removeLeftRecursion :: CFG -> CFG
-removeLeftRecursion gr
- = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
- where
- scheme1 = [CFRule a [x,NonTerminal a_x] n' |
- a <- retainedLeftRecursive,
- x <- properLeftCornersOf a,
- not (isLeftRecursive x),
- let a_x = mkCat (NonTerminal a) x,
- -- this is an extension of LC_LR to avoid generating
- -- A-X categories for which there are no productions:
- a_x `Set.member` newCats,
- let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
- (\_ -> CFRes 0) x]
- scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
- a <- retainedLeftRecursive,
- b@(NonTerminal b') <- properLeftCornersOf a,
- isLeftRecursive b,
- CFRule _ (x:beta) n <- catRules gr b',
- let a_x = mkCat (NonTerminal a) x,
- let a_b = mkCat (NonTerminal a) b,
- let i = length $ filterCats beta,
- let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
- (\_ -> CFApp (CFRes i) n) x]
- scheme3 = [CFRule a_x beta n' |
- a <- retainedLeftRecursive,
- x <- properLeftCornersOf a,
- CFRule _ (x':beta) n <- catRules gr a,
- x == x',
- let a_x = mkCat (NonTerminal a) x,
- let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
- (\_ -> n) x]
- scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
-
- newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
-
- shiftTerm :: CFTerm -> CFTerm
- shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
- shiftTerm (CFRes 0) = CFVar 1
- shiftTerm (CFRes n) = CFRes (n-1)
- shiftTerm t = t
- -- note: the rest don't occur in the original grammar
-
- cats = allCats gr
- rules = allRules gr
-
- directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
- leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
- properLeftCorner = transitiveClosure directLeftCorner
- properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
- isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
-
- leftRecursive = reflexiveElements properLeftCorner
- isLeftRecursive = (`Set.member` leftRecursive)
-
- retained = cfgStartCat gr `Set.insert`
- Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
- NonTerminal a <- ruleRhs r]
- isRetained = (`Set.member` retained)
-
- retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
-
- mkCat :: CFSymbol -> CFSymbol -> Cat
- mkCat x y = showSymbol x ++ "-" ++ showSymbol y
- where showSymbol = symbol id show
-
--- | Get the sets of mutually recursive non-terminals for a grammar.
-mutRecCats :: Bool -- ^ If true, all categories will be in some set.
- -- If false, only recursive categories will be included.
- -> CFG -> [Set Cat]
-mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
- where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
- refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
-
---
--- * Approximate context-free grammars with regular grammars.
---
-
-makeSimpleRegular :: CFG -> CFG
-makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
-
--- Use the transformation algorithm from \"Regular Approximation of Context-free
--- Grammars through Approximation\", Mohri and Nederhof, 2000
--- to create an over-generating regular grammar for a context-free
--- grammar
-makeRegular :: CFG -> CFG
-makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
- where trSet cs | allXLinear cs rs = rs
- | otherwise = concatMap handleCat (Set.toList cs)
- where rs = catSetRules g cs
- handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
- ++ concatMap (makeRightLinearRules c) (catRules g c)
- where c' = newCat c
- makeRightLinearRules b' (CFRule c ss n) =
- case ys of
- [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
- (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
- ++ makeRightLinearRules (newCat b) (CFRule c zs n)
- where (xs,ys) = break (`catElem` cs) ss
- -- don't add rules on the form A -> A
- newRule c rhs n | rhs == [NonTerminal c] = []
- | otherwise = [CFRule c rhs n]
- newCat c = c ++ "$"
-
---
--- * CFG Utilities
---
-
-mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
-mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
-
-groupProds :: [CFRule] -> Map Cat (Set CFRule)
-groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
-
--- | Gets all rules in a CFG.
-allRules :: CFG -> [CFRule]
-allRules = concat . map Set.toList . Map.elems . cfgRules
-
--- | Gets all rules in a CFG, grouped by their LHS categories.
-allRulesGrouped :: CFG -> [(Cat,[CFRule])]
-allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
-
--- | Gets all categories which have rules.
-allCats :: CFG -> [Cat]
-allCats = Map.keys . cfgRules
-
--- | Gets all categories which have rules or occur in a RHS.
-allCats' :: CFG -> [Cat]
-allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union`
- Set.fromList [c | rs <- Map.elems (cfgRules cfg),
- r <- Set.toList rs,
- NonTerminal c <- ruleRhs r])
-
--- | Gets all rules for the given category.
-catRules :: CFG -> Cat -> [CFRule]
-catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
-
--- | Gets all rules for categories in the given set.
-catSetRules :: CFG -> Set Cat -> [CFRule]
-catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
-
-mapCFGCats :: (Cat -> Cat) -> CFG -> CFG
-mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg))
- (Set.map f (cfgExternalCats cfg))
- [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg]
-
-onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
-onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
-
-onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
-onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
-
--- | Clean up CFG after rules have been removed.
-cleanCFG :: CFG -> CFG
-cleanCFG = onCFG (Map.filter (not . Set.null))
-
--- | Combine two CFGs.
-unionCFG :: CFG -> CFG -> CFG
-unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
-
-filterCFG :: (CFRule -> Bool) -> CFG -> CFG
-filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
-
-filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
-filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
-
-countCats :: CFG -> Int
-countCats = Map.size . cfgRules . cleanCFG
-
-countRules :: CFG -> Int
-countRules = length . allRules
-
-prCFG :: CFG -> String
-prCFG = prProductions . map prRule . allRules
- where
- prRule r = (lhsCat r, unwords (map prSym (ruleRhs r)))
- prSym = symbol id (\t -> "\""++ t ++"\"")
-
-prProductions :: [(Cat,String)] -> String
-prProductions prods =
- unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods]
- where
- maxLHSWidth = maximum $ 0:(map (length . fst) prods)
- rpad n s = s ++ replicate (n - length s) ' '
-
-prCFTerm :: CFTerm -> String
-prCFTerm = pr 0
- where
- pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")")
- pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t)
- pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")")
- pr _ (CFRes i) = "$" ++ show i
- pr _ (CFVar i) = "x" ++ show i
- pr _ (CFMeta c) = "?" ++ showCId c
- paren 0 x = x
- paren 1 x = "(" ++ x ++ ")"
-
---
--- * CFRule Utilities
---
-
-ruleFun :: CFRule -> CId
-ruleFun (CFRule _ _ t) = f t
- where f (CFObj n _) = n
- f (CFApp _ x) = f x
- f (CFAbs _ x) = f x
- f _ = mkCId ""
-
--- | Check if any of the categories used on the right-hand side
--- are in the given list of categories.
-anyUsedBy :: [Cat] -> CFRule -> Bool
-anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
-
-mkCFTerm :: String -> CFTerm
-mkCFTerm n = CFObj (mkCId n) []
-
-ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
-ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
-
--- | Check if all the rules are right-linear, or all the rules are
--- left-linear, with respect to given categories.
-allXLinear :: Set Cat -> [CFRule] -> Bool
-allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-
--- | Checks if a context-free rule is right-linear.
-isRightLinear :: Set Cat -- ^ The categories to consider
- -> CFRule -- ^ The rule to check for right-linearity
- -> Bool
-isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-
--- | Checks if a context-free rule is left-linear.
-isLeftLinear :: Set Cat -- ^ The categories to consider
- -> CFRule -- ^ The rule to check for left-linearity
- -> Bool
-isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
-
-
---
--- * Symbol utilities
---
-
-symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
-symbol fc ft (NonTerminal cat) = fc cat
-symbol fc ft (Terminal tok) = ft tok
-
-mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
-mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
-
-filterCats :: [Symbol c t] -> [c]
-filterCats syms = [ cat | NonTerminal cat <- syms ]
-
-filterToks :: [Symbol c t] -> [t]
-filterToks syms = [ tok | Terminal tok <- syms ]
-
--- | Checks if a symbol is a non-terminal of one of the given categories.
-catElem :: Ord c => Symbol c t -> Set c -> Bool
-catElem s cs = symbol (`Set.member` cs) (const False) s
-
-noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
-noCatsInSet cs = not . any (`catElem` cs)
diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs
index 4f5e3621e..330c763e5 100644
--- a/src/compiler/GF/Speech/CFGToFA.hs
+++ b/src/compiler/GF/Speech/CFGToFA.hs
@@ -17,7 +17,7 @@ import qualified Data.Set as Set
--import PGF.CId
import PGF.Data
import GF.Data.Utilities
-import GF.Speech.CFG
+import GF.Grammar.CFG
--import GF.Speech.PGFToCFG
--import GF.Infra.Ident (Ident)
diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs
index 3557ff21f..3eb4c20a7 100644
--- a/src/compiler/GF/Speech/GSL.hs
+++ b/src/compiler/GF/Speech/GSL.hs
@@ -9,7 +9,7 @@
module GF.Speech.GSL (gslPrinter) where
--import GF.Data.Utilities
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Infra.Option
diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs
index 921108e11..6a4935a7f 100644
--- a/src/compiler/GF/Speech/JSGF.hs
+++ b/src/compiler/GF/Speech/JSGF.hs
@@ -14,7 +14,7 @@ module GF.Speech.JSGF (jsgfPrinter) where
--import GF.Data.Utilities
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.RegExp
import GF.Speech.SISR
import GF.Speech.SRG
diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs
index 5c13ca471..d70a74fe7 100644
--- a/src/compiler/GF/Speech/PGFToCFG.hs
+++ b/src/compiler/GF/Speech/PGFToCFG.hs
@@ -10,7 +10,7 @@ import PGF(showCId)
import PGF.Data as PGF
import PGF.Macros
--import GF.Infra.Ident
-import GF.Speech.CFG hiding (Symbol)
+import GF.Grammar.CFG hiding (Symbol)
import Data.Array.IArray as Array
--import Data.List
diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs
index 0fc35d541..2829839f3 100644
--- a/src/compiler/GF/Speech/PrRegExp.hs
+++ b/src/compiler/GF/Speech/PrRegExp.hs
@@ -7,7 +7,7 @@
module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.CFGToFA
import GF.Speech.PGFToCFG
import GF.Speech.RegExp
diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs
index 8417fb203..5f9161547 100644
--- a/src/compiler/GF/Speech/SISR.hs
+++ b/src/compiler/GF/Speech/SISR.hs
@@ -13,7 +13,7 @@ import Data.List
--import GF.Data.Utilities
--import GF.Infra.Ident
import GF.Infra.Option (SISRFormat(..))
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.SRG (SRGNT)
import PGF(showCId)
diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs
index 7785f2382..d93d1b362 100644
--- a/src/compiler/GF/Speech/SLF.hs
+++ b/src/compiler/GF/Speech/SLF.hs
@@ -13,7 +13,7 @@ module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter,
slfSubPrinter,slfSubGraphvizPrinter) where
import GF.Data.Utilities
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.FiniteState
--import GF.Speech.CFG
import GF.Speech.CFGToFA
diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs
index 4e5508de0..d5bedc797 100644
--- a/src/compiler/GF/Speech/SRG.hs
+++ b/src/compiler/GF/Speech/SRG.hs
@@ -21,7 +21,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol
import GF.Data.Utilities
--import GF.Infra.Ident
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.PGFToCFG
--import GF.Data.Relation
--import GF.Speech.FiniteState
diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs
index 5d07762bb..a359b2c38 100644
--- a/src/compiler/GF/Speech/SRGS_ABNF.hs
+++ b/src/compiler/GF/Speech/SRGS_ABNF.hs
@@ -21,7 +21,7 @@ module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
--import GF.Data.Utilities
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.SISR as SISR
import GF.Speech.SRG
import GF.Speech.RegExp
diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs
index fe973c2e6..397bfb739 100644
--- a/src/compiler/GF/Speech/SRGS_XML.hs
+++ b/src/compiler/GF/Speech/SRGS_XML.hs
@@ -9,7 +9,7 @@ module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
--import GF.Data.Utilities
import GF.Data.XML
import GF.Infra.Option
-import GF.Speech.CFG
+import GF.Grammar.CFG
import GF.Speech.RegExp
import GF.Speech.SISR as SISR
import GF.Speech.SRG