**diff options**

author | MatthewFarkasDyck <> | 2020-07-30 08:33:00 (GMT) |
---|---|---|

committer | hdiff <hdiff@hdiff.luite.com> | 2020-07-30 08:33:00 (GMT) |

commit | 81a016b6d9b7fc69d28f7b98914a572b9e3cc303 (patch) | |

tree | a2df67e05447e54e8262eff6f9f3cf112f927558 | |

parent | 836b04bb1242616feb9dee672855a4ec8ca84186 (diff) |

version 0.1.4.00.1.4.0

-rw-r--r-- | Data/Filtrable.hs | 47 | ||||

-rw-r--r-- | Data/Set/Private.hs | 312 | ||||

-rw-r--r-- | filtrable.cabal | 22 |

3 files changed, 372 insertions, 9 deletions

diff --git a/Data/Filtrable.hs b/Data/Filtrable.hs index 40311c4..e6bfac5 100644 --- a/Data/Filtrable.hs +++ b/Data/Filtrable.hs @@ -1,9 +1,14 @@ -module Data.Filtrable (Filtrable (..), (<$?>), (<*?>)) where +module Data.Filtrable + ( Filtrable (..) + , (<$?>), (<*?>) + , nub, nubBy, nubOrd, nubOrdBy + ) where import Prelude hiding (filter) import Control.Applicative import Control.Monad +import qualified Control.Monad.Trans.State as M import Data.Bool (bool) import Data.Functor.Compose import Data.Functor.Product @@ -11,7 +16,11 @@ import Data.Functor.Sum import Data.Proxy import Data.Traversable --- | Laws: +import qualified Data.Set.Private as Set + +-- | Class of filtrable containers, i.e. containers we can map over while selectively dropping elements. +-- +-- Laws: -- -- * @'mapMaybe' 'Just' = id@ -- @@ -29,29 +38,37 @@ import Data.Traversable class Functor f => Filtrable f where {-# MINIMAL mapMaybe | catMaybes #-} + -- | Map the container with the given function, dropping the elements for which it returns 'Nothing'. mapMaybe :: (a -> Maybe b) -> f a -> f b mapMaybe f = catMaybes . fmap f + -- | @'catMaybes' = 'mapMaybe' 'id'@ catMaybes :: f (Maybe a) -> f a catMaybes = mapMaybe id + -- | Drop the elements for which the given predicate is 'False'. filter :: (a -> Bool) -> f a -> f a filter f = mapMaybe ((<$) <*> guard . f) + -- | Traverse the container with the given function, dropping the elements for which it returns 'Nothing'. mapMaybeA :: (Traversable f, Applicative p) => (a -> p (Maybe b)) -> f a -> p (f b) mapMaybeA f xs = catMaybes <$> traverse f xs + -- | Drop the elements for which the given predicate is 'False'. filterA :: (Traversable f, Applicative p) => (a -> p Bool) -> f a -> p (f a) filterA f = mapMaybeA (\ x -> (x <$) . guard <$> f x) + -- | Map the container with the given function, collecting the 'Left's and the 'Right's separately. mapEither :: (a -> Either b c) -> f a -> (f b, f c) mapEither f = (,) <$> mapMaybe (either Just (pure Nothing) . f) <*> mapMaybe (either (pure Nothing) Just . f) + -- | Traverse the container with the given function, collecting the 'Left's and the 'Right's separately. mapEitherA :: (Traversable f, Applicative p) => (a -> p (Either b c)) -> f a -> p (f b, f c) mapEitherA f = liftA2 (,) <$> mapMaybeA (fmap (Just `either` pure Nothing) . f) <*> mapMaybeA (fmap (pure Nothing `either` Just) . f) + -- | @'partitionEithers' = 'mapEither' 'id'@ partitionEithers :: f (Either a b) -> (f a, f b) partitionEithers = mapEither id @@ -88,3 +105,29 @@ infixl 4 <$?>, <*?> (<*?>) :: (Applicative p, Filtrable p) => p (a -> Maybe b) -> p a -> p b f <*?> a = catMaybes (f <*> a) + +-- | \(\mathcal{O}(n^2)\) +-- Delete all but the first copy of each element, special case of 'nubBy'. +nub :: (Filtrable f, Traversable f, Eq a) => f a -> f a +nub = nubBy (==) + +-- | \(\mathcal{O}(n^2)\) +-- Delete all but the first copy of each element, with the given relation. +nubBy :: (Filtrable f, Traversable f) => (a -> a -> Bool) -> f a -> f a +nubBy eq = fmap (flip M.evalState []) . filterA $ \ a -> do + as <- M.get + let b = all (not . eq a) as + b <$ when b (M.modify (a:)) + +-- | \(\mathcal{O}(n^2)\) +-- Delete all but the first copy of each element, special case of 'nubOrdBy'. +nubOrd :: (Filtrable f, Traversable f, Ord a) => f a -> f a +nubOrd = nubOrdBy compare + +-- | \(\mathcal{O}(n^2)\) +-- Delete all but the first copy of each element, with the given relation. +nubOrdBy :: (Filtrable f, Traversable f) => (a -> a -> Ordering) -> f a -> f a +nubOrdBy compare = fmap (flip M.evalState Set.empty) . filterA $ \ a -> M.state $ \ as -> + case Set.insertBy' compare a as of + Nothing -> (False, as) + Just as' -> (True, as') diff --git a/Data/Set/Private.hs b/Data/Set/Private.hs new file mode 100644 index 0000000..f033839 --- /dev/null +++ b/Data/Set/Private.hs @@ -0,0 +1,312 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE BangPatterns #-} +#if !defined(TESTING) && defined(__GLASGOW_HASKELL__) +{-# LANGUAGE Trustworthy #-} +#endif + +{-# OPTIONS_HADDOCK not-home #-} + +----------------------------------------------------------------------------- +-- | +-- Copyright : (c) Daan Leijen 2002 +-- License : BSD-style +-- +-- = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- This contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- +-- An efficient implementation of sets. +-- +-- These modules are intended to be imported qualified, to avoid name +-- clashes with Prelude functions, e.g. +-- +-- > import Data.Set (Set) +-- > import qualified Data.Set as Set +-- +-- The implementation of 'Set' is based on /size balanced/ binary trees (or +-- trees of /bounded balance/) as described by: +-- +-- * Stephen Adams, \"/Efficient sets: a balancing act/\", +-- Journal of Functional Programming 3(4):553-562, October 1993, +-- <http://www.swiss.ai.mit.edu/~adams/BB/>. +-- * J. Nievergelt and E.M. Reingold, +-- \"/Binary search trees of bounded balance/\", +-- SIAM journal of computing 2(1), March 1973. +-- +-- Bounds for 'union', 'intersection', and 'difference' are as given +-- by +-- +-- * Guy Blelloch, Daniel Ferizovic, and Yihan Sun, +-- \"/Just Join for Parallel Ordered Sets/\", +-- <https://arxiv.org/abs/1602.02120v3>. +-- +-- Note that the implementation is /left-biased/ -- the elements of a +-- first argument are always preferred to the second, for example in +-- 'union' or 'insert'. Of course, left-biasing can only be observed +-- when equality is an equivalence relation instead of structural +-- equality. +-- +-- /Warning/: The size of the set must not exceed @maxBound::Int@. Violation of +-- this condition is not detected and if the size limit is exceeded, the +-- behavior of the set is completely undefined. +-- +-- @since 0.5.9 +----------------------------------------------------------------------------- + +-- [Note: Using INLINABLE] +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- It is crucial to the performance that the functions specialize on the Ord +-- type when possible. GHC 7.0 and higher does this by itself when it sees th +-- unfolding of a function -- that is why all public functions are marked +-- INLINABLE (that exposes the unfolding). + + +-- [Note: Using INLINE] +-- ~~~~~~~~~~~~~~~~~~~~ +-- For other compilers and GHC pre 7.0, we mark some of the functions INLINE. +-- We mark the functions that just navigate down the tree (lookup, insert, +-- delete and similar). That navigation code gets inlined and thus specialized +-- when possible. There is a price to pay -- code growth. The code INLINED is +-- therefore only the tree navigation, all the real work (rebalancing) is not +-- INLINED by using a NOINLINE. +-- +-- All methods marked INLINE have to be nonrecursive -- a 'go' function doing +-- the real work is provided. + + +-- [Note: Type of local 'go' function] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- If the local 'go' function uses an Ord class, it sometimes heap-allocates +-- the Ord dictionary when the 'go' function does not have explicit type. +-- In that case we give 'go' explicit type. But this slightly decrease +-- performance, as the resulting 'go' function can float out to top level. + + +-- [Note: Local 'go' functions and capturing] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- As opposed to IntSet, when 'go' function captures an argument, increased +-- heap-allocation can occur: sometimes in a polymorphic function, the 'go' +-- floats out of its enclosing function and then it heap-allocates the +-- dictionary and the argument. Maybe it floats out too late and strictness +-- analyzer cannot see that these could be passed on stack. + +-- [Note: Order of constructors] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- The order of constructors of Set matters when considering performance. +-- Currently in GHC 7.0, when type has 2 constructors, a forward conditional +-- jump is made when successfully matching second constructor. Successful match +-- of first constructor results in the forward jump not taken. +-- On GHC 7.0, reordering constructors from Tip | Bin to Bin | Tip +-- improves the benchmark by up to 10% on x86. + +module Data.Set.Private ( + -- * Set type + Set(..) -- instance Eq,Ord,Show,Read,Data,Typeable + , Size + , insertBy' + , empty + ) where + +import Prelude hiding (filter,foldl,foldr,null,map,take,drop,splitAt) +import Control.Monad (join) + +#if __GLASGOW_HASKELL__ +import GHC.Exts ( lazy ) +#endif + +{-------------------------------------------------------------------- + Sets are size balanced trees +--------------------------------------------------------------------} +-- | A set of values @a@. + +-- See Note: Order of constructors +data Set a = Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a) | Tip + +type Size = Int + +{-------------------------------------------------------------------- + Query +--------------------------------------------------------------------} +-- | /O(1)/. The number of elements in the set. +size :: Set a -> Int +size Tip = 0 +size (Bin sz _ _ _) = sz + +{-------------------------------------------------------------------- + Construction +--------------------------------------------------------------------} +-- | /O(1)/. The empty set. +empty :: Set a +empty = Tip + +-- | /O(1)/. Create a singleton set. +singleton :: a -> Set a +singleton x = Bin 1 x Tip Tip + +{-------------------------------------------------------------------- + Insertion, Deletion +--------------------------------------------------------------------} +-- | /O(log n)/. Insert an element in a set. +-- If the set already contains an element equal to the given value, +-- it is replaced with the new value. + +-- See Note: Type of local 'go' function +-- See Note: Avoiding worker/wrapper (in Data.Map.Internal) +insertBy' :: (a -> a -> Ordering) -> a -> Set a -> Maybe (Set a) +insertBy' compare = join go + where + go orig !_ Tip = Just $! singleton (lazy orig) + go orig !x (Bin _ y l r) = case compare x y of + LT -> (\ !l' -> balanceL y l' r) <$!> go orig x l + GT -> (\ !r' -> balanceR y l r') <$!> go orig x r + EQ -> Nothing +#if __GLASGOW_HASKELL__ +{-# INLINABLE insertBy' #-} +#else +{-# INLINE insertBy' #-} +#endif + +infixl 4 <$!> +(<$!>) :: (a -> b) -> Maybe a -> Maybe b +(<$!>) f = \ case + Nothing -> Nothing + Just a -> Just $! f a + +#ifndef __GLASGOW_HASKELL__ +lazy :: a -> a +lazy a = a +#endif + +{-------------------------------------------------------------------- + [balance x l r] balances two trees with value x. + The sizes of the trees should balance after decreasing the + size of one of them. (a rotation). + + [delta] is the maximal relative difference between the sizes of + two trees, it corresponds with the [w] in Adams' paper. + [ratio] is the ratio between an outer and inner sibling of the + heavier subtree in an unbalanced setting. It determines + whether a double or single rotation should be performed + to restore balance. It is correspondes with the inverse + of $\alpha$ in Adam's article. + + Note that according to the Adam's paper: + - [delta] should be larger than 4.646 with a [ratio] of 2. + - [delta] should be larger than 3.745 with a [ratio] of 1.534. + + But the Adam's paper is errorneous: + - it can be proved that for delta=2 and delta>=5 there does + not exist any ratio that would work + - delta=4.5 and ratio=2 does not work + + That leaves two reasonable variants, delta=3 and delta=4, + both with ratio=2. + + - A lower [delta] leads to a more 'perfectly' balanced tree. + - A higher [delta] performs less rebalancing. + + In the benchmarks, delta=3 is faster on insert operations, + and delta=4 has slightly better deletes. As the insert speedup + is larger, we currently use delta=3. + +--------------------------------------------------------------------} +delta,ratio :: Int +delta = 3 +ratio = 2 + +-- The balance function is equivalent to the following: +-- +-- balance :: a -> Set a -> Set a -> Set a +-- balance x l r +-- | sizeL + sizeR <= 1 = Bin sizeX x l r +-- | sizeR > delta*sizeL = rotateL x l r +-- | sizeL > delta*sizeR = rotateR x l r +-- | otherwise = Bin sizeX x l r +-- where +-- sizeL = size l +-- sizeR = size r +-- sizeX = sizeL + sizeR + 1 +-- +-- rotateL :: a -> Set a -> Set a -> Set a +-- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r +-- | otherwise = doubleL x l r +-- rotateR :: a -> Set a -> Set a -> Set a +-- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r +-- | otherwise = doubleR x l r +-- +-- singleL, singleR :: a -> Set a -> Set a -> Set a +-- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3 +-- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3) +-- +-- doubleL, doubleR :: a -> Set a -> Set a -> Set a +-- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4) +-- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4) +-- +-- It is only written in such a way that every node is pattern-matched only once. +-- +-- Only balanceL and balanceR are needed at the moment, so balance is not here anymore. +-- In case it is needed, it can be found in Data.Map. + +-- Functions balanceL and balanceR are specialised versions of balance. +-- balanceL only checks whether the left subtree is too big, +-- balanceR only checks whether the right subtree is too big. + +-- balanceL is called when left subtree might have been inserted to or when +-- right subtree might have been deleted from. +balanceL :: a -> Set a -> Set a -> Set a +balanceL x l r = case r of + Tip -> case l of + Tip -> Bin 1 x Tip Tip + (Bin _ _ Tip Tip) -> Bin 2 x l Tip + (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip) + (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip) + (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr)) + | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip) + | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip) + + (Bin rs _ _ _) -> case l of + Tip -> Bin (1+rs) x Tip r + + (Bin ls lx ll lr) + | ls > delta*rs -> case (ll, lr) of + (Bin lls _ _ _, Bin lrs lrx lrl lrr) + | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r) + | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r) + (_, _) -> error "Failure in Data.Map.balanceL" + | otherwise -> Bin (1+ls+rs) x l r +{-# NOINLINE balanceL #-} + +-- balanceR is called when right subtree might have been inserted to or when +-- left subtree might have been deleted from. +balanceR :: a -> Set a -> Set a -> Set a +balanceR x l r = case l of + Tip -> case r of + Tip -> Bin 1 x Tip Tip + (Bin _ _ Tip Tip) -> Bin 2 x Tip r + (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr + (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip) + (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _)) + | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr + | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr) + + (Bin ls _ _ _) -> case r of + Tip -> Bin (1+ls) x l Tip + + (Bin rs rx rl rr) + | rs > delta*ls -> case (rl, rr) of + (Bin rls rlx rll rlr, Bin rrs _ _ _) + | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr + | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr) + (_, _) -> error "Failure in Data.Map.balanceR" + | otherwise -> Bin (1+ls+rs) x l r +{-# NOINLINE balanceR #-} diff --git a/filtrable.cabal b/filtrable.cabal index 6d76794..270faca 100644 --- a/filtrable.cabal +++ b/filtrable.cabal @@ -1,5 +1,5 @@ name: filtrable -version: 0.1.3.0 +version: 0.1.4.0 synopsis: Class of filtrable containers homepage: https://github.com/strake/filtrable.hs license: BSD3 @@ -9,16 +9,24 @@ maintainer: strake888@gmail.com category: Data build-type: Simple cabal-version: >=1.10 -tested-with: GHC ==7.8.*, - GHC ==7.10.*, - GHC ==7.12.*, - GHC ==8.0.* +tested-with: GHC ==8.0.* + GHC ==8.2.* + GHC ==8.4.* + GHC ==8.6.* + GHC ==8.8.* +description: + See "Data.Filtrable". library exposed-modules: Data.Filtrable - build-depends: base >=4.7 && <5 + other-modules: + Data.Set.Private + build-depends: base >=4.9 && <5 + , transformers >=0.5 && <0.6 default-language: Haskell2010 - default-extensions: ConstrainedClassMethods + default-extensions: + LambdaCase + ConstrainedClassMethods ghc-options: -Wall -Wcompat -Wredundant-constraints -Wno-name-shadowing -Wincomplete-record-updates -Wincomplete-uni-patterns -Werror=incomplete-patterns |