summaryrefslogtreecommitdiff
path: root/src/GF/Speech/TransformCFG.hs
diff options
context:
space:
mode:
authorbringert <unknown>2004-09-14 08:36:57 +0000
committerbringert <unknown>2004-09-14 08:36:57 +0000
commit87eec27336bd9c2f123cffe56a67cc919c8da09b (patch)
tree06e98973088af6b0ea48168c964951205fbfb40d /src/GF/Speech/TransformCFG.hs
parent318379f73a4f6beae40687e7122ac476abe526f1 (diff)
Added GSL speech recognition grammar generation.
Diffstat (limited to 'src/GF/Speech/TransformCFG.hs')
-rw-r--r--src/GF/Speech/TransformCFG.hs102
1 files changed, 102 insertions, 0 deletions
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
new file mode 100644
index 000000000..322888da5
--- /dev/null
+++ b/src/GF/Speech/TransformCFG.hs
@@ -0,0 +1,102 @@
+{-
+ **************************************************************
+ GF Module
+
+ Description : This module does some useful transformations
+ on CFGs.
+
+ Author : Björn Bringert (bringert@cs.chalmers.se)
+
+ License : GPL (GNU General Public License)
+
+ Created : September 13, 2004
+
+ Modified :
+ **************************************************************
+-}
+
+module TransformCFG where
+
+import Ident
+import CFGrammar
+import Parser (Symbol(..))
+import GrammarTypes
+import PrintParser
+
+import Data.FiniteMap
+import Data.List
+import Data.Maybe (fromJust)
+
+import Debug.Trace
+
+-- FIXME: remove cycles
+
+
+-- not very nice to get replace the structured CFCat type with a simple string
+type CFRule_ = Rule CFName String Token
+
+type CFRules = FiniteMap String [CFRule_]
+
+makeNice :: CFGrammar -> [CFRule_]
+makeNice = concat . eltsFM . makeNice' . groupProds . cfgToCFRules
+ where makeNice' = removeLeftRecursion . removeEmptyCats
+
+cfgToCFRules :: CFGrammar -> [CFRule_]
+cfgToCFRules cfg = [Rule (catToString c) (map symb r) n | Rule c r n <- cfg]
+ where symb (Cat c) = Cat (catToString c)
+ symb (Tok t) = Tok t
+ catToString = prt
+
+-- | Group productions by their lhs categories
+groupProds :: [CFRule_] -> CFRules
+groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs]))
+ where ruleCat (Rule c _ _) = c
+
+-- | Remove productions which use categories which have no productions
+removeEmptyCats :: CFRules -> CFRules
+removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
+ where
+ removeEmptyCats' :: [(String,[CFRule_])] -> [(String,[CFRule_])]
+ removeEmptyCats' rs = k'
+ where
+ keep = filter (not . null . snd) rs
+ allCats = nub [c | (_,r) <- rs, Rule _ rhs _ <- r, Cat c <- rhs]
+ emptyCats = filter (nothingOrNull . flip lookup rs) allCats
+ k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
+
+anyUsedBy :: [String] -> CFRule_ -> Bool
+anyUsedBy ss (Rule _ r _) = or [c `elem` ss | Cat c <- r]
+
+removeLeftRecursion :: CFRules -> CFRules
+removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs
+ where
+ handleProds (c, r) = (c, concatMap handleProd r)
+ handleProd (Rule ai (Cat aj:alpha) n) | aj < ai =
+ -- FIXME: this will give multiple rules with the same name
+ [Rule ai (beta ++ alpha) n | Rule _ beta _ <- fromJust (lookupFM rs aj)]
+ handleProd r = [r]
+
+removeDirectLeftRecursion :: (String,[CFRule_]) -- ^ All productions for a category
+ -> [(String,[CFRule_])]
+removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)]
+ | otherwise = [(a, as), (a', a's)]
+ where
+ a' = a ++ "'" -- FIXME: this might not be unique
+ (dr,nr) = partition isDirectLeftRecursive rs
+ as = maybeEndWithA' nr
+ is = [Rule a' (tail r) n | Rule _ r n <- dr]
+ a's = maybeEndWithA' is
+ maybeEndWithA' xs = xs ++ [Rule c (r++[Cat a']) n | Rule c r n <- xs]
+
+isDirectLeftRecursive :: CFRule_ -> Bool
+isDirectLeftRecursive (Rule c (Cat c':_) _) = c == c'
+isDirectLeftRecursive _ = False
+
+
+
+fix :: Eq a => (a -> a) -> a -> a
+fix f x = let x' = f x in if x' == x then x else fix f x'
+
+nothingOrNull :: Maybe [a] -> Bool
+nothingOrNull Nothing = True
+nothingOrNull (Just xs) = null xs \ No newline at end of file