summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2007-01-07 23:16:32 +0000
committerbringert <bringert@cs.chalmers.se>2007-01-07 23:16:32 +0000
commit1b8bc71b28997f7902d2809ce34254ac0168514f (patch)
tree064401e495161d13e3e555fff2b2ddd4080ad524 /src
parentd18ccbf02ef5a7d9ea98775f3e64c10e0105c7f0 (diff)
Fixed bug in SRG EBNF generation. Before it assumed that all variation came from variants, and overgenerated if this was not true.
Diffstat (limited to 'src')
-rw-r--r--src/GF/Speech/RegExp.hs35
-rw-r--r--src/GF/Speech/SRG.hs21
2 files changed, 46 insertions, 10 deletions
diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs
index 1eb6efa4d..6c787b714 100644
--- a/src/GF/Speech/RegExp.hs
+++ b/src/GF/Speech/RegExp.hs
@@ -2,7 +2,8 @@ module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE,
isEpsilon, isNull,
unionRE, concatRE, seqRE,
- repeatRE,
+ repeatRE, minimizeRE,
+ mapRE, joinRE,
dfa2re, prRE) where
import Data.List
@@ -85,6 +86,38 @@ finalRE fa = concatRE [repeatRE r1, r2,
r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
+reverseRE :: RE a -> RE a
+reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
+reverseRE (REUnion xs) = REUnion (map reverseRE xs)
+reverseRE (RERepeat x) = RERepeat (reverseRE x)
+reverseRE x = x
+
+minimizeRE :: Ord a => RE a -> RE a
+minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
+
+mergeForward :: Ord a => RE a -> RE a
+mergeForward (REUnion xs) =
+ unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
+mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
+mergeForward (RERepeat r) = repeatRE (mergeForward r)
+mergeForward r = r
+
+firstRE :: RE a -> (RE a, RE a)
+firstRE (REConcat (x:xs)) = (x, REConcat xs)
+firstRE r = (r,epsilonRE)
+
+mapRE :: (a -> b) -> RE a -> RE b
+mapRE f (REConcat xs) = REConcat (map (mapRE f) xs)
+mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
+mapRE f (RERepeat xs) = RERepeat (mapRE f xs)
+mapRE f (RESymbol s) = RESymbol (f s)
+
+joinRE :: RE (RE a) -> RE a
+joinRE (REConcat xs) = REConcat (map joinRE xs)
+joinRE (REUnion xs) = REUnion (map joinRE xs)
+joinRE (RERepeat xs) = RERepeat (joinRE xs)
+joinRE (RESymbol ss) = ss
+
-- Debugging
prRE :: Show a => RE a -> String
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index cc03cdca6..e0a347480 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -190,17 +190,20 @@ ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
--- ^ Merges a list of right-hand sides which all have the same
+-- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
---mergeItems = unionRE . map seqRE
-mergeItems [] = nullRE
-mergeItems sss | any null rss = t
- | otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
- where (tss,rss) = unzip $ map (span isToken) sss
- t = unionRE (map seqRE tss)
- (cs,nss) = unzip $ map (splitAt 1) rss
- isToken = symbol (const False) (const True)
+mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
+
+groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
+groupTokens [] = []
+groupTokens (Tok t:ss) = case groupTokens ss of
+ Tok ts:ss' -> Tok (t:ts):ss'
+ ss' -> Tok [t]:ss'
+groupTokens (Cat c:ss) = Cat c : groupTokens ss
+
+ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
+ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
--
-- * Utilities for building and printing SRGs