summaryrefslogtreecommitdiff
path: root/src/GF/Speech/TransformCFG.hs
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2007-03-24 23:29:38 +0000
committerbringert <bringert@cs.chalmers.se>2007-03-24 23:29:38 +0000
commited1e7f448574662d8b54fe92b3c08ed4942f575d (patch)
tree53ff156e69c13ba5449dc3ba3bde68af852a676a /src/GF/Speech/TransformCFG.hs
parent436ddd5ebf531c2693af9402236a5a0c462dc5b7 (diff)
SRG generation: merge categories with identical set of productions. The LC_LR algorithm produces lots of those, especially when there is little inflection.
Diffstat (limited to 'src/GF/Speech/TransformCFG.hs')
-rw-r--r--src/GF/Speech/TransformCFG.hs12
1 files changed, 12 insertions, 0 deletions
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index dcb666bf3..93f2a1be9 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -112,6 +112,18 @@ topDownFilter start rules = filter ((`Set.member` keep) . fst) rules
uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats
keep = allRelated uses start
+-- | Merges categories with identical right-hand-sides.
+-- FIXME: handle probabilities
+mergeIdentical :: CFRules -> CFRules
+mergeIdentical g = sortNubBy (compareBy fst) [(substCat c, map subst rs) | (c,rs) <- g]
+ where
+ -- maps categories to their replacement
+ m = Map.fromList [(y,concat (intersperse "+" xs)) | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- g], y <- xs]
+ -- build data to compare for each category: a set of name,rhs pairs
+ rulesKey rs = Set.fromList [(n,r) | CFRule _ r n <- rs]
+ subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
+ substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
+
-- * Removing left recursion
-- The LC_LR algorithm from