summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2007-03-24 23:29:38 +0000
committerbringert <bringert@cs.chalmers.se>2007-03-24 23:29:38 +0000
commited1e7f448574662d8b54fe92b3c08ed4942f575d (patch)
tree53ff156e69c13ba5449dc3ba3bde68af852a676a /src
parent436ddd5ebf531c2693af9402236a5a0c462dc5b7 (diff)
SRG generation: merge categories with identical set of productions. The LC_LR algorithm produces lots of those, especially when there is little inflection.
Diffstat (limited to 'src')
-rw-r--r--src/GF/Speech/SRG.hs3
-rw-r--r--src/GF/Speech/TransformCFG.hs12
2 files changed, 14 insertions, 1 deletions
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index 8c3c5e02c..054cf62f6 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -95,7 +95,8 @@ makeSimpleSRG opt s =
probs = stateProbs s
l = fmap (replace '_' '-') $ getOptVal opts speechLanguage
(cats,cfgRules) = unzip $ preprocess $ cfgToCFRules s
- preprocess = removeLeftRecursion origStart
+ preprocess = mergeIdentical
+ . removeLeftRecursion origStart
. fix (topDownFilter origStart . bottomUpFilter)
. removeCycles
names = mkCatNames name cats
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index dcb666bf3..93f2a1be9 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -112,6 +112,18 @@ topDownFilter start rules = filter ((`Set.member` keep) . fst) rules
uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats
keep = allRelated uses start
+-- | Merges categories with identical right-hand-sides.
+-- FIXME: handle probabilities
+mergeIdentical :: CFRules -> CFRules
+mergeIdentical g = sortNubBy (compareBy fst) [(substCat c, map subst rs) | (c,rs) <- g]
+ where
+ -- maps categories to their replacement
+ m = Map.fromList [(y,concat (intersperse "+" xs)) | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- g], y <- xs]
+ -- build data to compare for each category: a set of name,rhs pairs
+ rulesKey rs = Set.fromList [(n,r) | CFRule _ r n <- rs]
+ subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
+ substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
+
-- * Removing left recursion
-- The LC_LR algorithm from