diff options
Diffstat (limited to 'src/GF/Speech/RegExp.hs')
| -rw-r--r-- | src/GF/Speech/RegExp.hs | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs new file mode 100644 index 000000000..5ee40828e --- /dev/null +++ b/src/GF/Speech/RegExp.hs @@ -0,0 +1,143 @@ +module GF.Speech.RegExp (RE(..), + epsilonRE, nullRE, + isEpsilon, isNull, + unionRE, concatRE, seqRE, + repeatRE, minimizeRE, + mapRE, mapRE', joinRE, + symbolsRE, + dfa2re, prRE) where + +import Data.List + +import GF.Data.Utilities +import GF.Speech.FiniteState + +data RE a = + REUnion [RE a] -- ^ REUnion [] is null + | REConcat [RE a] -- ^ REConcat [] is epsilon + | RERepeat (RE a) + | RESymbol a + deriving (Eq,Ord,Show) + + +dfa2re :: (Ord a) => DFA a -> RE a +dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops + . oneFinalState () epsilonRE . mapTransitions RESymbol + where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa + merge es = [(f,t,unionRE ls) + | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]] + +elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a) +elimStates fa = + case [s | (s,_) <- states fa, isInternal fa s] of + [] -> fa + sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa + where sAs = nonLoopTransitionsTo sE fa + sBs = nonLoopTransitionsFrom sE fa + r2 = unionRE $ loops sE fa + ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs] + r r1 r3 = concatRE [r1, repeatRE r2, r3] + +epsilonRE :: RE a +epsilonRE = REConcat [] + +nullRE :: RE a +nullRE = REUnion [] + +isNull :: RE a -> Bool +isNull (REUnion []) = True +isNull _ = False + +isEpsilon :: RE a -> Bool +isEpsilon (REConcat []) = True +isEpsilon _ = False + +unionRE :: Ord a => [RE a] -> RE a +unionRE = unionOrId . sortNub . concatMap toList + where + toList (REUnion xs) = xs + toList x = [x] + unionOrId [r] = r + unionOrId rs = REUnion rs + +concatRE :: [RE a] -> RE a +concatRE xs | any isNull xs = nullRE + | otherwise = case concatMap toList xs of + [r] -> r + rs -> REConcat rs + where + toList (REConcat xs) = xs + toList x = [x] + +seqRE :: [a] -> RE a +seqRE = concatRE . map RESymbol + +repeatRE :: RE a -> RE a +repeatRE x | isNull x || isEpsilon x = epsilonRE + | otherwise = RERepeat x + +finalRE :: Ord a => DFA (RE a) -> RE a +finalRE fa = concatRE [repeatRE r1, r2, + repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])] + where + s0 = startState fa + [sF] = finalStates fa + r1 = unionRE $ loops s0 fa + r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa + r3 = unionRE $ loops sF fa + r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa + +reverseRE :: RE a -> RE a +reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs +reverseRE (REUnion xs) = REUnion (map reverseRE xs) +reverseRE (RERepeat x) = RERepeat (reverseRE x) +reverseRE x = x + +minimizeRE :: Ord a => RE a -> RE a +minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward + +mergeForward :: Ord a => RE a -> RE a +mergeForward (REUnion xs) = + unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)] +mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)] +mergeForward (RERepeat r) = repeatRE (mergeForward r) +mergeForward r = r + +firstRE :: RE a -> (RE a, RE a) +firstRE (REConcat (x:xs)) = (x, REConcat xs) +firstRE r = (r,epsilonRE) + +mapRE :: (a -> b) -> RE a -> RE b +mapRE f = mapRE' (RESymbol . f) + +mapRE' :: (a -> RE b) -> RE a -> RE b +mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs) +mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs) +mapRE' f (RERepeat x) = RERepeat (mapRE' f x) +mapRE' f (RESymbol s) = f s + +joinRE :: RE (RE a) -> RE a +joinRE (REConcat xs) = REConcat (map joinRE xs) +joinRE (REUnion xs) = REUnion (map joinRE xs) +joinRE (RERepeat xs) = RERepeat (joinRE xs) +joinRE (RESymbol ss) = ss + +symbolsRE :: RE a -> [a] +symbolsRE (REConcat xs) = concatMap symbolsRE xs +symbolsRE (REUnion xs) = concatMap symbolsRE xs +symbolsRE (RERepeat x) = symbolsRE x +symbolsRE (RESymbol x) = [x] + +-- Debugging + +prRE :: RE String -> String +prRE = prRE' 0 + +prRE' _ (REUnion []) = "<NULL>" +prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs))) +prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs)) +prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*" +prRE' _ (RESymbol s) = s + +p n m s | n >= m = "(" ++ s ++ ")" + | True = s |
