summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authorpeter.ljunglof <peter.ljunglof@gu.se>2012-08-29 21:45:10 +0000
committerpeter.ljunglof <peter.ljunglof@gu.se>2012-08-29 21:45:10 +0000
commita7de16c34b7ccffc5ae0ac4fd004dfc155b4f546 (patch)
treea39529476f339e06ba4b9283ef9073c9b5dcaf75 /src/compiler
parente2ecdfed1fea8dcc77ab3c8d83f74fc577908f5b (diff)
Added an O(n log n) version of nub
The new nub is called nub', and it replaces the old sortNub which was not lazy and did not retain the order between the elements.
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/GF/Compile/GeneratePMCFG.hs2
-rw-r--r--src/compiler/GF/Data/Utilities.hs22
-rw-r--r--src/compiler/GF/Speech/RegExp.hs2
3 files changed, 15 insertions, 11 deletions
diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs
index 0dcb8c8cd..bb4c5b549 100644
--- a/src/compiler/GF/Compile/GeneratePMCFG.hs
+++ b/src/compiler/GF/Compile/GeneratePMCFG.hs
@@ -22,7 +22,7 @@ import GF.Grammar.Lookup
import GF.Grammar.Predef
import GF.Data.BacktrackM
import GF.Data.Operations
-import GF.Data.Utilities (updateNthM, updateNth, sortNub)
+import GF.Data.Utilities (updateNthM, updateNth)
import System.IO
import qualified Data.Map as Map
diff --git a/src/compiler/GF/Data/Utilities.hs b/src/compiler/GF/Data/Utilities.hs
index 74d3ef81e..50269bef1 100644
--- a/src/compiler/GF/Data/Utilities.hs
+++ b/src/compiler/GF/Data/Utilities.hs
@@ -17,6 +17,7 @@ module GF.Data.Utilities where
import Data.Maybe
import Data.List
import Control.Monad (MonadPlus(..),liftM)
+import qualified Data.Set as Set
-- * functions on lists
@@ -67,15 +68,18 @@ safeInit :: [a] -> [a]
safeInit [] = []
safeInit xs = init xs
--- | Like 'nub', but more efficient as it uses sorting internally.
-sortNub :: Ord a => [a] -> [a]
-sortNub = map head . group . sort
-
--- | Like 'nubBy', but more efficient as it uses sorting internally.
-sortNubBy :: (a -> a -> Ordering) -> [a] -> [a]
-sortNubBy f = map head . sortGroupBy f
-
--- | Sorts and then groups elements given and ordering of the
+-- | Like 'nub', but O(n log n) instead of O(n^2), since it uses a set to lookup previous things.
+-- The result list is stable (the elements are returned in the order they occur), and lazy.
+-- Requires that the list elements can be compared by Ord.
+-- Code ruthlessly taken from http://hpaste.org/54411
+nub' :: Ord a => [a] -> [a]
+nub' = loop Set.empty
+ where loop _ [] = []
+ loop seen (x : xs)
+ | Set.member x seen = loop seen xs
+ | otherwise = x : loop (Set.insert x seen) xs
+
+-- | Sorts and then groups elements given an ordering of the
-- elements.
sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]]
sortGroupBy f = groupBy (compareEq f) . sortBy f
diff --git a/src/compiler/GF/Speech/RegExp.hs b/src/compiler/GF/Speech/RegExp.hs
index 2592b3d57..bf9f89cb3 100644
--- a/src/compiler/GF/Speech/RegExp.hs
+++ b/src/compiler/GF/Speech/RegExp.hs
@@ -53,7 +53,7 @@ isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: Ord a => [RE a] -> RE a
-unionRE = unionOrId . sortNub . concatMap toList
+unionRE = unionOrId . nub' . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]