summaryrefslogtreecommitdiff
path: root/src/GF
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2005-12-22 23:16:15 +0000
committerbringert <bringert@cs.chalmers.se>2005-12-22 23:16:15 +0000
commite32472a9bb40468bd77db839d683bd15e89db2e1 (patch)
treee2756756fc5b75f577a500438e34922c3b7b6d5f /src/GF
parent29ba681242acd62a28e2facdad733619bf8d994f (diff)
Some performance improvements in the FA generation.
Diffstat (limited to 'src/GF')
-rw-r--r--src/GF/Data/Utilities.hs5
-rw-r--r--src/GF/Speech/CFGToFiniteState.hs116
-rw-r--r--src/GF/Speech/FiniteState.hs7
-rw-r--r--src/GF/Speech/Graph.hs7
-rw-r--r--src/GF/Speech/Relation.hs4
-rw-r--r--src/GF/Speech/TransformCFG.hs6
6 files changed, 92 insertions, 53 deletions
diff --git a/src/GF/Data/Utilities.hs b/src/GF/Data/Utilities.hs
index d7e6be2f0..50d1f5195 100644
--- a/src/GF/Data/Utilities.hs
+++ b/src/GF/Data/Utilities.hs
@@ -34,11 +34,6 @@ lookupList a [] = []
lookupList a (p:ps) | a == fst p = snd p : lookupList a ps
| otherwise = lookupList a ps
--- | Find the first list in a list of lists
--- which contains the argument.
-findSet :: Eq c => c -> [[c]] -> Maybe [c]
-findSet x = find (x `elem`)
-
split :: [a] -> ([a], [a])
split (x : y : as) = (x:xs, y:ys)
where (xs, ys) = split as
diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs
index 89ec88872..25790786a 100644
--- a/src/GF/Speech/CFGToFiniteState.hs
+++ b/src/GF/Speech/CFGToFiniteState.hs
@@ -15,6 +15,10 @@
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
import Data.List
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
import GF.Data.Utilities
import GF.Formalism.CFG
@@ -27,13 +31,19 @@ import GF.Speech.FiniteState
import GF.Speech.Relation
import GF.Speech.TransformCFG
-import Debug.Trace
+data MutRecSet = MutRecSet {
+ mrCats :: [Cat_],
+ mrNonRecRules :: [CFRule_],
+ mrRecRules :: [CFRule_],
+ mrIsRightRec :: Bool
+ }
+
+
+type MutRecSets = Map Cat_ MutRecSet
cfgToFA :: Options -> CGrammar -> DFA String
cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
---cfgToFA opts = trfa "minimal" . minimize . trfa "initial" . compileAutomaton start . makeSimpleRegular
where start = getStartCat opts
- trfa s fa = trace (s ++ ", states: " ++ show (length (states fa)) ++ ", transitions: " ++ show (length (transitions fa))) fa
makeSimpleRegular :: CGrammar -> CFRules
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
@@ -45,8 +55,9 @@ makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgTo
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs
- | otherwise = concatMap handleCat cs
- where rs = catSetRules g cs
+ | otherwise = concatMap handleCat csl
+ where csl = Set.toList cs
+ rs = catSetRules g csl
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
@@ -62,7 +73,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
- -> CFRules -> [[Cat_]]
+ -> CFRules -> [Set Cat_]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
allCats = map fst g
@@ -72,67 +83,88 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
compileAutomaton :: Cat_ -- ^ Start category
-> CFRules
-> NFA Token
-compileAutomaton start g = make_fa s [Cat start] f fa''
+compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
where
fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
- ns = mutRecCats False g
- -- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
- -- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
- make_fa :: State -> [Symbol Cat_ Token] -> State
+ ns = mutRecSets g $ mutRecCats False g
+
+mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
+mutRecSets g = Map.fromList . concatMap mkMutRecSet
+ where
+ mkMutRecSet cs = [ (c,ms) | c <- csl ]
+ where csl = Set.toList cs
+ rs = catSetRules g csl
+ (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
+ ms = MutRecSet {
+ mrCats = csl,
+ mrNonRecRules = nrs,
+ mrRecRules = rrs,
+ mrIsRightRec = all (isRightLinear cs) rrs
+ }
+
+-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
+-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
+make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
-> NFA Token -> NFA Token
- make_fa q0 alpha q1 fa =
+make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[Tok t] -> newTransition q0 q1 (Just t) fa
- [Cat a] -> case findSet a ns of
+ [Cat a] -> case Map.lookup a ns of
-- a is recursive
- Just ni -> let (fa',ss) = addStatesForCats ni fa
- getState x = lookup' x ss
- niRules = catSetRules g ni
- (nrs,rs) = partition (ruleIsNonRecursive ni) niRules
- in if all (isRightLinear ni) niRules
- then
- -- the set Ni is right-recursive or cyclic
- let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa'
- fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs,
- let (xs,Cat d) = (init ss,last ss)] fa''
- in newTransition q0 (getState a) Nothing fa'''
- else
- -- the set Ni is left-recursive
- let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa'
- fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa''
- in newTransition (getState a) q1 Nothing fa'''
+ Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
+ if mrIsRightRec n
+ then
+ -- the set Ni is right-recursive or cyclic
+ let fa'' = foldl (\ f (CFRule c xs _) -> make_fa_ (getState c) xs q1 f) fa' nrs
+ fa''' = foldl (\ f (CFRule c ss _) ->
+ let (xs,Cat d) = (init ss,last ss)
+ in make_fa_ (getState c) xs (getState d) f) fa'' rs
+ in newTransition q0 (getState a) Nothing fa'''
+ else
+ -- the set Ni is left-recursive
+ let fa'' = foldl (\f (CFRule c xs _) -> make_fa_ q0 xs (getState c) f) fa' nrs
+ fa''' = foldl (\f (CFRule c (Cat d:xs) _) -> make_fa_ (getState d) xs (getState c) f) fa'' rs
+ in newTransition (getState a) q1 Nothing fa'''
+ where
+ (fa',ss) = addStatesForCats ni fa
+ getState x = lookup' x ss
-- a is not recursive
Nothing -> let rs = catRules g a
- in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs
+ in foldl (\fa -> \ (CFRule _ b _) -> make_fa_ q0 b q1 fa) fa rs
(x:beta) -> let (fa',q) = newState () fa
- in make_fa q beta q1 $ make_fa q0 [x] q fa'
- addStatesForCats [] fa = (fa,[])
- addStatesForCats (c:cs) fa = let (fa',s) = newState () fa
- (fa'',ss) = addStatesForCats cs fa'
- in (fa'',(c,s):ss)
- ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+ in make_fa_ q beta q1 $! make_fa_ q0 [x] q fa'
+ where
+ make_fa_ = make_fa c
+
+addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, [(Cat_,State)])
+addStatesForCats cs fa = (fa', zip cs (map fst ns))
+ where (fa', ns) = newStates (replicate (length cs) ()) fa
+
+ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
+ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
+
-noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
+noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
-allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
+allXLinear :: Set Cat_ -> [CFRule_] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
-isRightLinear :: Eq c => [c] -- ^ The categories to consider
- -> CFRule c n t -- ^ The rule to check for right-linearity
+isRightLinear :: Set Cat_ -- ^ The categories to consider
+ -> CFRule_ -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
-isLeftLinear :: Eq c => [c] -- ^ The categories to consider
- -> CFRule c n t -- ^ The rule to check for right-linearity
+isLeftLinear :: Set Cat_ -- ^ The categories to consider
+ -> CFRule_ -- ^ The rule to check for right-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
diff --git a/src/GF/Speech/FiniteState.hs b/src/GF/Speech/FiniteState.hs
index dfd5d941d..a2f8952cb 100644
--- a/src/GF/Speech/FiniteState.hs
+++ b/src/GF/Speech/FiniteState.hs
@@ -16,7 +16,8 @@ module GF.Speech.FiniteState (FA, State, NFA, DFA,
states, transitions,
newFA,
addFinalState,
- newState, newTransition,
+ newState, newStates,
+ newTransition,
mapStates, mapTransitions,
oneFinalState,
moveLabelsToNodes, minimize,
@@ -65,6 +66,10 @@ newState :: a -> FA n a b -> (FA n a b, n)
newState x (FA g s ss) = (FA g' s ss, n)
where (g',n) = newNode x g
+newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
+newStates xs (FA g s ss) = (FA g' s ss, ns)
+ where (g',ns) = newNodes xs g
+
newTransition :: n -> n -> b -> FA n a b -> FA n a b
newTransition f t l = onGraph (newEdge (f,t,l))
diff --git a/src/GF/Speech/Graph.hs b/src/GF/Speech/Graph.hs
index 2f80ef0ca..7ebecdc34 100644
--- a/src/GF/Speech/Graph.hs
+++ b/src/GF/Speech/Graph.hs
@@ -13,7 +13,7 @@
-----------------------------------------------------------------------------
module GF.Speech.Graph ( Graph(..), Node, Edge, Incoming, Outgoing
, newGraph, nodes, edges
- , nmap, emap, newNode, newEdge, newEdges
+ , nmap, emap, newNode, newNodes, newEdge, newEdges
, incoming, outgoing, getOutgoing
, getFrom, getTo, getLabel
, reverseGraph, renameNodes
@@ -52,6 +52,11 @@ emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
newNode :: a -> Graph n a b -> (Graph n a b,n)
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
+newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
+newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
+ where (xs,cs') = splitAt (length ls) cs
+ ns' = zip xs ls
+
newEdge :: Edge n b -> Graph n a b -> Graph n a b
newEdge e (Graph c ns es) = Graph c ns (e:es)
diff --git a/src/GF/Speech/Relation.hs b/src/GF/Speech/Relation.hs
index 2ad3faccb..c66a07d10 100644
--- a/src/GF/Speech/Relation.hs
+++ b/src/GF/Speech/Relation.hs
@@ -100,10 +100,10 @@ purgeEmpty r = Map.filter (not . Set.null) r
-- | Get the equivalence classes from an equivalence relation.
-equivalenceClasses :: Ord a => Rel a -> [[a]]
+equivalenceClasses :: Ord a => Rel a -> [Set a]
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
where equivalenceClasses_ [] _ = []
- equivalenceClasses_ (x:xs) r = Set.toList ys:equivalenceClasses_ zs r
+ equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
where ys = allRelated r x
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index 39060206b..2c920cbda 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -37,6 +37,8 @@ import Control.Monad
import Data.FiniteMap
import Data.List
import Data.Maybe (fromJust, fromMaybe)
+import Data.Set (Set)
+import qualified Data.Set as Set
-- | not very nice to replace the structured CFCat type with a simple string
@@ -134,8 +136,8 @@ ruleFun :: CFRule_ -> Fun
ruleFun (CFRule _ _ n) = name2fun n
-- | Checks if a symbol is a non-terminal of one of the given categories.
-catElem :: Eq c => Symbol c t -> [c] -> Bool
-catElem s cs = symbol (`elem` cs) (const False) s
+catElem :: Symbol Cat_ t -> Set Cat_ -> Bool
+catElem s cs = symbol (`Set.member` cs) (const False) s
-- | Check if any of the categories used on the right-hand side
-- are in the given list of categories.