summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthewFarkasDyck <>2020-07-30 08:33:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2020-07-30 08:33:00 (GMT)
commit81a016b6d9b7fc69d28f7b98914a572b9e3cc303 (patch)
treea2df67e05447e54e8262eff6f9f3cf112f927558
parent836b04bb1242616feb9dee672855a4ec8ca84186 (diff)
version 0.1.4.00.1.4.0
-rw-r--r--Data/Filtrable.hs47
-rw-r--r--Data/Set/Private.hs312
-rw-r--r--filtrable.cabal22
3 files changed, 372 insertions, 9 deletions
diff --git a/Data/Filtrable.hs b/Data/Filtrable.hs
index 40311c4..e6bfac5 100644
--- a/Data/Filtrable.hs
+++ b/Data/Filtrable.hs
@@ -1,9 +1,14 @@
-module Data.Filtrable (Filtrable (..), (<$?>), (<*?>)) where
+module Data.Filtrable
+ ( Filtrable (..)
+ , (<$?>), (<*?>)
+ , nub, nubBy, nubOrd, nubOrdBy
+ ) where
import Prelude hiding (filter)
import Control.Applicative
import Control.Monad
+import qualified Control.Monad.Trans.State as M
import Data.Bool (bool)
import Data.Functor.Compose
import Data.Functor.Product
@@ -11,7 +16,11 @@ import Data.Functor.Sum
import Data.Proxy
import Data.Traversable
--- | Laws:
+import qualified Data.Set.Private as Set
+
+-- | Class of filtrable containers, i.e. containers we can map over while selectively dropping elements.
+--
+-- Laws:
--
-- * @'mapMaybe' 'Just' = id@
--
@@ -29,29 +38,37 @@ import Data.Traversable
class Functor f => Filtrable f where
{-# MINIMAL mapMaybe | catMaybes #-}
+ -- | Map the container with the given function, dropping the elements for which it returns 'Nothing'.
mapMaybe :: (a -> Maybe b) -> f a -> f b
mapMaybe f = catMaybes . fmap f
+ -- | @'catMaybes' = 'mapMaybe' 'id'@
catMaybes :: f (Maybe a) -> f a
catMaybes = mapMaybe id
+ -- | Drop the elements for which the given predicate is 'False'.
filter :: (a -> Bool) -> f a -> f a
filter f = mapMaybe ((<$) <*> guard . f)
+ -- | Traverse the container with the given function, dropping the elements for which it returns 'Nothing'.
mapMaybeA :: (Traversable f, Applicative p) => (a -> p (Maybe b)) -> f a -> p (f b)
mapMaybeA f xs = catMaybes <$> traverse f xs
+ -- | Drop the elements for which the given predicate is 'False'.
filterA :: (Traversable f, Applicative p) => (a -> p Bool) -> f a -> p (f a)
filterA f = mapMaybeA (\ x -> (x <$) . guard <$> f x)
+ -- | Map the container with the given function, collecting the 'Left's and the 'Right's separately.
mapEither :: (a -> Either b c) -> f a -> (f b, f c)
mapEither f = (,) <$> mapMaybe (either Just (pure Nothing) . f)
<*> mapMaybe (either (pure Nothing) Just . f)
+ -- | Traverse the container with the given function, collecting the 'Left's and the 'Right's separately.
mapEitherA :: (Traversable f, Applicative p) => (a -> p (Either b c)) -> f a -> p (f b, f c)
mapEitherA f = liftA2 (,) <$> mapMaybeA (fmap (Just `either` pure Nothing) . f)
<*> mapMaybeA (fmap (pure Nothing `either` Just) . f)
+ -- | @'partitionEithers' = 'mapEither' 'id'@
partitionEithers :: f (Either a b) -> (f a, f b)
partitionEithers = mapEither id
@@ -88,3 +105,29 @@ infixl 4 <$?>, <*?>
(<*?>) :: (Applicative p, Filtrable p) => p (a -> Maybe b) -> p a -> p b
f <*?> a = catMaybes (f <*> a)
+
+-- | \(\mathcal{O}(n^2)\)
+-- Delete all but the first copy of each element, special case of 'nubBy'.
+nub :: (Filtrable f, Traversable f, Eq a) => f a -> f a
+nub = nubBy (==)
+
+-- | \(\mathcal{O}(n^2)\)
+-- Delete all but the first copy of each element, with the given relation.
+nubBy :: (Filtrable f, Traversable f) => (a -> a -> Bool) -> f a -> f a
+nubBy eq = fmap (flip M.evalState []) . filterA $ \ a -> do
+ as <- M.get
+ let b = all (not . eq a) as
+ b <$ when b (M.modify (a:))
+
+-- | \(\mathcal{O}(n^2)\)
+-- Delete all but the first copy of each element, special case of 'nubOrdBy'.
+nubOrd :: (Filtrable f, Traversable f, Ord a) => f a -> f a
+nubOrd = nubOrdBy compare
+
+-- | \(\mathcal{O}(n^2)\)
+-- Delete all but the first copy of each element, with the given relation.
+nubOrdBy :: (Filtrable f, Traversable f) => (a -> a -> Ordering) -> f a -> f a
+nubOrdBy compare = fmap (flip M.evalState Set.empty) . filterA $ \ a -> M.state $ \ as ->
+ case Set.insertBy' compare a as of
+ Nothing -> (False, as)
+ Just as' -> (True, as')
diff --git a/Data/Set/Private.hs b/Data/Set/Private.hs
new file mode 100644
index 0000000..f033839
--- /dev/null
+++ b/Data/Set/Private.hs
@@ -0,0 +1,312 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE BangPatterns #-}
+#if !defined(TESTING) && defined(__GLASGOW_HASKELL__)
+{-# LANGUAGE Trustworthy #-}
+#endif
+
+{-# OPTIONS_HADDOCK not-home #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Copyright : (c) Daan Leijen 2002
+-- License : BSD-style
+--
+-- = WARNING
+--
+-- This module is considered __internal__.
+--
+-- The Package Versioning Policy __does not apply__.
+--
+-- This contents of this module may change __in any way whatsoever__
+-- and __without any warning__ between minor versions of this package.
+--
+-- Authors importing this module are expected to track development
+-- closely.
+--
+-- = Description
+--
+-- An efficient implementation of sets.
+--
+-- These modules are intended to be imported qualified, to avoid name
+-- clashes with Prelude functions, e.g.
+--
+-- > import Data.Set (Set)
+-- > import qualified Data.Set as Set
+--
+-- The implementation of 'Set' is based on /size balanced/ binary trees (or
+-- trees of /bounded balance/) as described by:
+--
+-- * Stephen Adams, \"/Efficient sets: a balancing act/\",
+-- Journal of Functional Programming 3(4):553-562, October 1993,
+-- <http://www.swiss.ai.mit.edu/~adams/BB/>.
+-- * J. Nievergelt and E.M. Reingold,
+-- \"/Binary search trees of bounded balance/\",
+-- SIAM journal of computing 2(1), March 1973.
+--
+-- Bounds for 'union', 'intersection', and 'difference' are as given
+-- by
+--
+-- * Guy Blelloch, Daniel Ferizovic, and Yihan Sun,
+-- \"/Just Join for Parallel Ordered Sets/\",
+-- <https://arxiv.org/abs/1602.02120v3>.
+--
+-- Note that the implementation is /left-biased/ -- the elements of a
+-- first argument are always preferred to the second, for example in
+-- 'union' or 'insert'. Of course, left-biasing can only be observed
+-- when equality is an equivalence relation instead of structural
+-- equality.
+--
+-- /Warning/: The size of the set must not exceed @maxBound::Int@. Violation of
+-- this condition is not detected and if the size limit is exceeded, the
+-- behavior of the set is completely undefined.
+--
+-- @since 0.5.9
+-----------------------------------------------------------------------------
+
+-- [Note: Using INLINABLE]
+-- ~~~~~~~~~~~~~~~~~~~~~~~
+-- It is crucial to the performance that the functions specialize on the Ord
+-- type when possible. GHC 7.0 and higher does this by itself when it sees th
+-- unfolding of a function -- that is why all public functions are marked
+-- INLINABLE (that exposes the unfolding).
+
+
+-- [Note: Using INLINE]
+-- ~~~~~~~~~~~~~~~~~~~~
+-- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
+-- We mark the functions that just navigate down the tree (lookup, insert,
+-- delete and similar). That navigation code gets inlined and thus specialized
+-- when possible. There is a price to pay -- code growth. The code INLINED is
+-- therefore only the tree navigation, all the real work (rebalancing) is not
+-- INLINED by using a NOINLINE.
+--
+-- All methods marked INLINE have to be nonrecursive -- a 'go' function doing
+-- the real work is provided.
+
+
+-- [Note: Type of local 'go' function]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- If the local 'go' function uses an Ord class, it sometimes heap-allocates
+-- the Ord dictionary when the 'go' function does not have explicit type.
+-- In that case we give 'go' explicit type. But this slightly decrease
+-- performance, as the resulting 'go' function can float out to top level.
+
+
+-- [Note: Local 'go' functions and capturing]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- As opposed to IntSet, when 'go' function captures an argument, increased
+-- heap-allocation can occur: sometimes in a polymorphic function, the 'go'
+-- floats out of its enclosing function and then it heap-allocates the
+-- dictionary and the argument. Maybe it floats out too late and strictness
+-- analyzer cannot see that these could be passed on stack.
+
+-- [Note: Order of constructors]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- The order of constructors of Set matters when considering performance.
+-- Currently in GHC 7.0, when type has 2 constructors, a forward conditional
+-- jump is made when successfully matching second constructor. Successful match
+-- of first constructor results in the forward jump not taken.
+-- On GHC 7.0, reordering constructors from Tip | Bin to Bin | Tip
+-- improves the benchmark by up to 10% on x86.
+
+module Data.Set.Private (
+ -- * Set type
+ Set(..) -- instance Eq,Ord,Show,Read,Data,Typeable
+ , Size
+ , insertBy'
+ , empty
+ ) where
+
+import Prelude hiding (filter,foldl,foldr,null,map,take,drop,splitAt)
+import Control.Monad (join)
+
+#if __GLASGOW_HASKELL__
+import GHC.Exts ( lazy )
+#endif
+
+{--------------------------------------------------------------------
+ Sets are size balanced trees
+--------------------------------------------------------------------}
+-- | A set of values @a@.
+
+-- See Note: Order of constructors
+data Set a = Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a) | Tip
+
+type Size = Int
+
+{--------------------------------------------------------------------
+ Query
+--------------------------------------------------------------------}
+-- | /O(1)/. The number of elements in the set.
+size :: Set a -> Int
+size Tip = 0
+size (Bin sz _ _ _) = sz
+
+{--------------------------------------------------------------------
+ Construction
+--------------------------------------------------------------------}
+-- | /O(1)/. The empty set.
+empty :: Set a
+empty = Tip
+
+-- | /O(1)/. Create a singleton set.
+singleton :: a -> Set a
+singleton x = Bin 1 x Tip Tip
+
+{--------------------------------------------------------------------
+ Insertion, Deletion
+--------------------------------------------------------------------}
+-- | /O(log n)/. Insert an element in a set.
+-- If the set already contains an element equal to the given value,
+-- it is replaced with the new value.
+
+-- See Note: Type of local 'go' function
+-- See Note: Avoiding worker/wrapper (in Data.Map.Internal)
+insertBy' :: (a -> a -> Ordering) -> a -> Set a -> Maybe (Set a)
+insertBy' compare = join go
+ where
+ go orig !_ Tip = Just $! singleton (lazy orig)
+ go orig !x (Bin _ y l r) = case compare x y of
+ LT -> (\ !l' -> balanceL y l' r) <$!> go orig x l
+ GT -> (\ !r' -> balanceR y l r') <$!> go orig x r
+ EQ -> Nothing
+#if __GLASGOW_HASKELL__
+{-# INLINABLE insertBy' #-}
+#else
+{-# INLINE insertBy' #-}
+#endif
+
+infixl 4 <$!>
+(<$!>) :: (a -> b) -> Maybe a -> Maybe b
+(<$!>) f = \ case
+ Nothing -> Nothing
+ Just a -> Just $! f a
+
+#ifndef __GLASGOW_HASKELL__
+lazy :: a -> a
+lazy a = a
+#endif
+
+{--------------------------------------------------------------------
+ [balance x l r] balances two trees with value x.
+ The sizes of the trees should balance after decreasing the
+ size of one of them. (a rotation).
+
+ [delta] is the maximal relative difference between the sizes of
+ two trees, it corresponds with the [w] in Adams' paper.
+ [ratio] is the ratio between an outer and inner sibling of the
+ heavier subtree in an unbalanced setting. It determines
+ whether a double or single rotation should be performed
+ to restore balance. It is correspondes with the inverse
+ of $\alpha$ in Adam's article.
+
+ Note that according to the Adam's paper:
+ - [delta] should be larger than 4.646 with a [ratio] of 2.
+ - [delta] should be larger than 3.745 with a [ratio] of 1.534.
+
+ But the Adam's paper is errorneous:
+ - it can be proved that for delta=2 and delta>=5 there does
+ not exist any ratio that would work
+ - delta=4.5 and ratio=2 does not work
+
+ That leaves two reasonable variants, delta=3 and delta=4,
+ both with ratio=2.
+
+ - A lower [delta] leads to a more 'perfectly' balanced tree.
+ - A higher [delta] performs less rebalancing.
+
+ In the benchmarks, delta=3 is faster on insert operations,
+ and delta=4 has slightly better deletes. As the insert speedup
+ is larger, we currently use delta=3.
+
+--------------------------------------------------------------------}
+delta,ratio :: Int
+delta = 3
+ratio = 2
+
+-- The balance function is equivalent to the following:
+--
+-- balance :: a -> Set a -> Set a -> Set a
+-- balance x l r
+-- | sizeL + sizeR <= 1 = Bin sizeX x l r
+-- | sizeR > delta*sizeL = rotateL x l r
+-- | sizeL > delta*sizeR = rotateR x l r
+-- | otherwise = Bin sizeX x l r
+-- where
+-- sizeL = size l
+-- sizeR = size r
+-- sizeX = sizeL + sizeR + 1
+--
+-- rotateL :: a -> Set a -> Set a -> Set a
+-- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
+-- | otherwise = doubleL x l r
+-- rotateR :: a -> Set a -> Set a -> Set a
+-- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
+-- | otherwise = doubleR x l r
+--
+-- singleL, singleR :: a -> Set a -> Set a -> Set a
+-- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
+-- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
+--
+-- doubleL, doubleR :: a -> Set a -> Set a -> Set a
+-- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
+-- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
+--
+-- It is only written in such a way that every node is pattern-matched only once.
+--
+-- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
+-- In case it is needed, it can be found in Data.Map.
+
+-- Functions balanceL and balanceR are specialised versions of balance.
+-- balanceL only checks whether the left subtree is too big,
+-- balanceR only checks whether the right subtree is too big.
+
+-- balanceL is called when left subtree might have been inserted to or when
+-- right subtree might have been deleted from.
+balanceL :: a -> Set a -> Set a -> Set a
+balanceL x l r = case r of
+ Tip -> case l of
+ Tip -> Bin 1 x Tip Tip
+ (Bin _ _ Tip Tip) -> Bin 2 x l Tip
+ (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
+ (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
+ (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
+ | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
+ | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
+
+ (Bin rs _ _ _) -> case l of
+ Tip -> Bin (1+rs) x Tip r
+
+ (Bin ls lx ll lr)
+ | ls > delta*rs -> case (ll, lr) of
+ (Bin lls _ _ _, Bin lrs lrx lrl lrr)
+ | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
+ | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
+ (_, _) -> error "Failure in Data.Map.balanceL"
+ | otherwise -> Bin (1+ls+rs) x l r
+{-# NOINLINE balanceL #-}
+
+-- balanceR is called when right subtree might have been inserted to or when
+-- left subtree might have been deleted from.
+balanceR :: a -> Set a -> Set a -> Set a
+balanceR x l r = case l of
+ Tip -> case r of
+ Tip -> Bin 1 x Tip Tip
+ (Bin _ _ Tip Tip) -> Bin 2 x Tip r
+ (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
+ (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
+ (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
+ | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
+ | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
+
+ (Bin ls _ _ _) -> case r of
+ Tip -> Bin (1+ls) x l Tip
+
+ (Bin rs rx rl rr)
+ | rs > delta*ls -> case (rl, rr) of
+ (Bin rls rlx rll rlr, Bin rrs _ _ _)
+ | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
+ | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
+ (_, _) -> error "Failure in Data.Map.balanceR"
+ | otherwise -> Bin (1+ls+rs) x l r
+{-# NOINLINE balanceR #-}
diff --git a/filtrable.cabal b/filtrable.cabal
index 6d76794..270faca 100644
--- a/filtrable.cabal
+++ b/filtrable.cabal
@@ -1,5 +1,5 @@
name: filtrable
-version: 0.1.3.0
+version: 0.1.4.0
synopsis: Class of filtrable containers
homepage: https://github.com/strake/filtrable.hs
license: BSD3
@@ -9,16 +9,24 @@ maintainer: strake888@gmail.com
category: Data
build-type: Simple
cabal-version: >=1.10
-tested-with: GHC ==7.8.*,
- GHC ==7.10.*,
- GHC ==7.12.*,
- GHC ==8.0.*
+tested-with: GHC ==8.0.*
+ GHC ==8.2.*
+ GHC ==8.4.*
+ GHC ==8.6.*
+ GHC ==8.8.*
+description:
+ See "Data.Filtrable".
library
exposed-modules: Data.Filtrable
- build-depends: base >=4.7 && <5
+ other-modules:
+ Data.Set.Private
+ build-depends: base >=4.9 && <5
+ , transformers >=0.5 && <0.6
default-language: Haskell2010
- default-extensions: ConstrainedClassMethods
+ default-extensions:
+ LambdaCase
+ ConstrainedClassMethods
ghc-options: -Wall -Wcompat -Wredundant-constraints -Wno-name-shadowing
-Wincomplete-record-updates -Wincomplete-uni-patterns
-Werror=incomplete-patterns