summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryanOSullivan <>2010-03-29 21:39:43 (GMT)
committerLuite Stegeman <luite@luite.com>2010-03-29 21:39:43 (GMT)
commite63ca987d0ef138a01dcc8de83e1c699423a285b (patch)
tree6ab8b20748f4e590e22745d8c2c6f44f3b81d0ef
parent600429b61b8b38cef2d7a9a80af566022c020421 (diff)
version 0.5.0.00.5.0.0
-rw-r--r--Statistics/Autocorrelation.hs22
-rw-r--r--Statistics/Distribution/Binomial.hs4
-rw-r--r--Statistics/Distribution/Hypergeometric.hs4
-rw-r--r--Statistics/Distribution/Normal.hs9
-rw-r--r--Statistics/Distribution/Poisson.hs6
-rw-r--r--Statistics/Function.hs41
-rw-r--r--Statistics/KernelDensity.hs34
-rw-r--r--Statistics/Math.hs45
-rw-r--r--Statistics/Quantile.hs25
-rw-r--r--Statistics/Resampling.hs34
-rw-r--r--Statistics/Resampling/Bootstrap.hs13
-rw-r--r--Statistics/Sample.hs98
-rw-r--r--Statistics/Sample/Powers.hs43
-rw-r--r--Statistics/Types.hs10
-rw-r--r--statistics.cabal8
15 files changed, 228 insertions, 168 deletions
diff --git a/Statistics/Autocorrelation.hs b/Statistics/Autocorrelation.hs
index 4cc69e9..d22ef5c 100644
--- a/Statistics/Autocorrelation.hs
+++ b/Statistics/Autocorrelation.hs
@@ -16,31 +16,31 @@ module Statistics.Autocorrelation
, autocorrelation
) where
-import Data.Array.Vector
+import qualified Data.Vector.Unboxed as U
import Statistics.Sample (Sample, mean)
-- | Compute the autocovariance of a sample, i.e. the covariance of
-- the sample against a shifted version of itself.
-autocovariance :: Sample -> UArr Double
-autocovariance a = mapU f . enumFromToU 0 $ l-2
+autocovariance :: Sample -> U.Vector Double
+autocovariance a = U.map f . U.enumFromTo 0 $ l-2
where
- f k = sumU (zipWithU (*) (takeU (l-k) c) (sliceU c k (l-k)))
+ f k = U.sum (U.zipWith (*) (U.take (l-k) c) (U.slice k (l-k) c))
/ fromIntegral l
- c = mapU (subtract (mean a)) a
- l = lengthU a
+ c = U.map (subtract (mean a)) a
+ l = U.length a
-- | Compute the autocorrelation function of a sample, and the upper
-- and lower bounds of confidence intervals for each element.
--
-- /Note/: The calculation of the 95% confidence interval assumes a
-- stationary Gaussian process.
-autocorrelation :: Sample -> (UArr Double, UArr Double, UArr Double)
+autocorrelation :: Sample -> (U.Vector Double, U.Vector Double, U.Vector Double)
autocorrelation a = (r, ci (-), ci (+))
where
- r = mapU (/ headU c) c
+ r = U.map (/ U.head c) c
where c = autocovariance a
- dllse = mapU f . scanl1U (+) . mapU square $ r
+ dllse = U.map f . U.scanl1 (+) . U.map square $ r
where f v = 1.96 * sqrt ((v * 2 + 1) / l)
- l = fromIntegral (lengthU a)
- ci f = consU 1 . tailU . mapU (f (-1/l)) $ dllse
+ l = fromIntegral (U.length a)
+ ci f = U.cons 1 . U.tail . U.map (f (-1/l)) $ dllse
square x = x * x
diff --git a/Statistics/Distribution/Binomial.hs b/Statistics/Distribution/Binomial.hs
index 6a3bbe9..2d21bfb 100644
--- a/Statistics/Distribution/Binomial.hs
+++ b/Statistics/Distribution/Binomial.hs
@@ -24,7 +24,7 @@ module Statistics.Distribution.Binomial
) where
import Control.Exception (assert)
-import Data.Array.Vector
+import qualified Data.Vector.Unboxed as U
import Data.Int (Int64)
import Data.Typeable (Typeable)
import Statistics.Constants (m_epsilon)
@@ -76,7 +76,7 @@ density (BD n p) x
cumulative :: BinomialDistribution -> Double -> Double
cumulative d x
- | isIntegral x = sumU . mapU (density d . fromIntegral) . enumFromToU (0::Int) . floor $ x
+ | isIntegral x = U.sum . U.map (density d . fromIntegral) . U.enumFromTo (0::Int) . floor $ x
| otherwise = integralError "cumulative"
isIntegral :: Double -> Bool
diff --git a/Statistics/Distribution/Hypergeometric.hs b/Statistics/Distribution/Hypergeometric.hs
index 43beb1b..ad2fad6 100644
--- a/Statistics/Distribution/Hypergeometric.hs
+++ b/Statistics/Distribution/Hypergeometric.hs
@@ -28,7 +28,7 @@ module Statistics.Distribution.Hypergeometric
) where
import Control.Exception (assert)
-import Data.Array.Vector
+import qualified Data.Vector.Unboxed as U
import Data.Typeable (Typeable)
import Statistics.Math (choose, logFactorial)
import Statistics.Constants (m_max_exp)
@@ -99,7 +99,7 @@ cumulative d@(HD m l k) x
where
imin = max 0 (k - l + m)
imax = min k m
- r = sumU . mapU (density d . fromIntegral) . enumFromToU imin . floor $ x
+ r = U.sum . U.map (density d . fromIntegral) . U.enumFromTo imin . floor $ x
{-# INLINE cumulative #-}
quantile :: HypergeometricDistribution -> Double -> Double
diff --git a/Statistics/Distribution/Normal.hs b/Statistics/Distribution/Normal.hs
index 4b6125a..df2389c 100644
--- a/Statistics/Distribution/Normal.hs
+++ b/Statistics/Distribution/Normal.hs
@@ -46,6 +46,7 @@ instance D.Variance NormalDistribution where
instance D.Mean NormalDistribution where
mean = mean
+-- | Standard normal distribution with mean equal to 0 and variance equal to 1
standard :: NormalDistribution
standard = ND {
mean = 0.0
@@ -54,7 +55,10 @@ standard = ND {
, ndCdfDenom = m_sqrt_2
}
-fromParams :: Double -> Double -> NormalDistribution
+-- | Create normal distribution from parameters
+fromParams :: Double -- ^ Mean of distribution
+ -> Double -- ^ Variance of distribution
+ -> NormalDistribution
fromParams m v = assert (v > 0)
ND {
mean = m
@@ -64,6 +68,9 @@ fromParams m v = assert (v > 0)
}
where sv = sqrt v
+-- | Create distribution using parameters estimated from
+-- sample. Variance is estimated using maximum likelihood method
+-- (biased estimation).
fromSample :: S.Sample -> NormalDistribution
fromSample a = fromParams (S.mean a) (S.variance a)
diff --git a/Statistics/Distribution/Poisson.hs b/Statistics/Distribution/Poisson.hs
index 5c77a1e..205f7d1 100644
--- a/Statistics/Distribution/Poisson.hs
+++ b/Statistics/Distribution/Poisson.hs
@@ -21,8 +21,8 @@ module Statistics.Distribution.Poisson
-- , fromSample
) where
-import Data.Array.Vector
import Data.Typeable (Typeable)
+import qualified Data.Vector.Unboxed as U
import qualified Statistics.Distribution as D
import Statistics.Constants (m_huge)
import Statistics.Math (logGamma)
@@ -53,8 +53,8 @@ density (PD l) x = exp (x * log l - l - logGamma x)
{-# INLINE density #-}
cumulative :: PoissonDistribution -> Double -> Double
-cumulative d = sumU . mapU (density d . fromIntegral) .
- enumFromToU (0::Int) . floor
+cumulative d = U.sum . U.map (density d . fromIntegral) .
+ U.enumFromTo (0::Int) . floor
{-# INLINE cumulative #-}
quantile :: PoissonDistribution -> Double -> Double
diff --git a/Statistics/Function.hs b/Statistics/Function.hs
index 03612a5..80ee7e0 100644
--- a/Statistics/Function.hs
+++ b/Statistics/Function.hs
@@ -23,55 +23,56 @@ module Statistics.Function
import Control.Exception (assert)
import Control.Monad.ST (ST, unsafeIOToST, unsafeSTToIO)
-import Data.Array.Vector.Algorithms.Combinators (apply)
-import Data.Array.Vector
-import qualified Data.Array.Vector.Algorithms.Intro as I
+import Data.Vector.Algorithms.Combinators (apply)
+import qualified Data.Vector.Unboxed as U
+import Data.Vector.Generic (unsafeFreeze)
+import qualified Data.Vector.Unboxed.Mutable as MU
+import qualified Data.Vector.Algorithms.Intro as I
-- | Sort an array.
-sort :: (UA e, Ord e) => UArr e -> UArr e
+sort :: (U.Unbox e, Ord e) => U.Vector e -> U.Vector e
sort = apply I.sort
{-# INLINE sort #-}
-- | Partially sort an array, such that the least /k/ elements will be
-- at the front.
-partialSort :: (UA e, Ord e) =>
+partialSort :: (U.Unbox e, Ord e) =>
Int -- ^ The number /k/ of least elements.
- -> UArr e
- -> UArr e
+ -> U.Vector e
+ -> U.Vector e
partialSort k = apply (\a -> I.partialSort a k)
{-# INLINE partialSort #-}
-- | Return the indices of an array.
-indices :: (UA a) => UArr a -> UArr Int
-indices a = enumFromToU 0 (lengthU a - 1)
+indices :: (U.Unbox a) => U.Vector a -> U.Vector Int
+indices a = U.enumFromTo 0 (U.length a - 1)
{-# INLINE indices #-}
data MM = MM {-# UNPACK #-} !Double {-# UNPACK #-} !Double
-- | Compute the minimum and maximum of an array in one pass.
-minMax :: UArr Double -> Double :*: Double
-minMax = fini . foldlU go (MM (1/0) (-1/0))
+minMax :: U.Vector Double -> (Double , Double)
+minMax = fini . U.foldl go (MM (1/0) (-1/0))
where
go (MM lo hi) k = MM (min lo k) (max hi k)
- fini (MM lo hi) = lo :*: hi
+ fini (MM lo hi) = (lo , hi)
{-# INLINE minMax #-}
-- | Create an array, using the given 'ST' action to populate each
-- element.
-createU :: (UA e) => forall s. Int -> (Int -> ST s e) -> ST s (UArr e)
+createU :: (U.Unbox e) => forall s. Int -> (Int -> ST s e) -> ST s (U.Vector e)
createU size itemAt = assert (size >= 0) $
- newMU size >>= loop 0
+ MU.new size >>= loop 0
where
- loop k arr | k >= size = unsafeFreezeAllMU arr
- | otherwise = do
- r <- itemAt k
- writeMU arr k r
- loop (k+1) arr
+ loop k arr | k >= size = unsafeFreeze arr
+ | otherwise = do r <- itemAt k
+ MU.write arr k r
+ loop (k+1) arr
{-# INLINE createU #-}
-- | Create an array, using the given 'IO' action to populate each
-- element.
-createIO :: (UA e) => Int -> (Int -> IO e) -> IO (UArr e)
+createIO :: (U.Unbox e) => Int -> (Int -> IO e) -> IO (U.Vector e)
createIO size itemAt =
unsafeSTToIO $ createU size (unsafeIOToST . itemAt)
{-# INLINE createIO #-}
diff --git a/Statistics/KernelDensity.hs b/Statistics/KernelDensity.hs
index e0ecdc3..124d37f 100644
--- a/Statistics/KernelDensity.hs
+++ b/Statistics/KernelDensity.hs
@@ -37,7 +37,7 @@ module Statistics.KernelDensity
, simplePDF
) where
-import Data.Array.Vector ((:*:)(..), UArr, enumFromToU, lengthU, mapU, sumU)
+import qualified Data.Vector.Unboxed as U
import Statistics.Function (minMax)
import Statistics.Sample (stdDev)
import Statistics.Constants (m_1_sqrt_2, m_2_sqrt_pi)
@@ -45,7 +45,7 @@ import Statistics.Types (Sample)
-- | Points from the range of a 'Sample'.
newtype Points = Points {
- fromPoints :: UArr Double
+ fromPoints :: U.Vector Double
} deriving (Eq, Show)
-- | Bandwidth estimator for an Epanechnikov kernel.
@@ -64,7 +64,7 @@ type Bandwidth = Double
bandwidth :: (Double -> Bandwidth)
-> Sample
-> Bandwidth
-bandwidth kern values = stdDev values * kern (fromIntegral $ lengthU values)
+bandwidth kern values = stdDev values * kern (fromIntegral $ U.length values)
-- | Choose a uniform range of points at which to estimate a sample's
-- probability density function.
@@ -78,13 +78,13 @@ choosePoints :: Int -- ^ Number of points to select, /n/
-> Double -- ^ Sample bandwidth, /h/
-> Sample -- ^ Input data
-> Points
-choosePoints n h sample = Points . mapU f $ enumFromToU 0 n'
- where lo = a - h
- hi = z + h
- a :*: z = minMax sample
- d = (hi - lo) / fromIntegral n'
- f i = lo + fromIntegral i * d
- n' = n - 1
+choosePoints n h sample = Points . U.map f $ U.enumFromTo 0 n'
+ where lo = a - h
+ hi = z + h
+ (a, z) = minMax sample
+ d = (hi - lo) / fromIntegral n'
+ f i = lo + fromIntegral i * d
+ n' = n - 1
-- | The convolution kernel. Its parameters are as follows:
--
@@ -120,14 +120,14 @@ estimatePDF :: Kernel -- ^ Kernel function
-> Bandwidth -- ^ Bandwidth, /h/
-> Sample -- ^ Sample data
-> Points -- ^ Points at which to estimate
- -> UArr Double
+ -> U.Vector Double
estimatePDF kernel h sample
| n < 2 = errorShort "estimatePDF"
- | otherwise = mapU k . fromPoints
+ | otherwise = U.map k . fromPoints
where
- k p = sumU . mapU (kernel f h p) $ sample
+ k p = U.sum . U.map (kernel f h p) $ sample
f = 1 / (h * fromIntegral n)
- n = lengthU sample
+ n = U.length sample
{-# INLINE estimatePDF #-}
-- | A helper for creating a simple kernel density estimation function
@@ -137,7 +137,7 @@ simplePDF :: (Double -> Double) -- ^ Bandwidth function
-> Double -- ^ Bandwidth scaling factor (3 for a Gaussian kernel, 1 for all others)
-> Int -- ^ Number of points at which to estimate
-> Sample -- ^ Sample data
- -> (Points, UArr Double)
+ -> (Points, U.Vector Double)
simplePDF fbw fpdf k numPoints sample =
(points, estimatePDF fpdf bw sample points)
where points = choosePoints numPoints (bw*k) sample
@@ -149,7 +149,7 @@ simplePDF fbw fpdf k numPoints sample =
-- function was estimated, and the estimates at those points.
epanechnikovPDF :: Int -- ^ Number of points at which to estimate
-> Sample
- -> (Points, UArr Double)
+ -> (Points, U.Vector Double)
epanechnikovPDF = simplePDF epanechnikovBW epanechnikovKernel 1
-- | Simple Gaussian kernel density estimator. Returns the uniformly
@@ -157,7 +157,7 @@ epanechnikovPDF = simplePDF epanechnikovBW epanechnikovKernel 1
-- was estimated, and the estimates at those points.
gaussianPDF :: Int -- ^ Number of points at which to estimate
-> Sample
- -> (Points, UArr Double)
+ -> (Points, U.Vector Double)
gaussianPDF = simplePDF gaussianBW gaussianKernel 3
errorShort :: String -> a
diff --git a/Statistics/Math.hs b/Statistics/Math.hs
index 2c65b37..dafd92e 100644
--- a/Statistics/Math.hs
+++ b/Statistics/Math.hs
@@ -26,7 +26,8 @@ module Statistics.Math
-- $references
) where
-import Data.Array.Vector
+import qualified Data.Vector.Unboxed as U
+import Data.Vector.Unboxed ((!))
import Data.Word (Word64)
import Statistics.Constants (m_sqrt_2_pi)
import Statistics.Distribution (cumulative)
@@ -37,12 +38,12 @@ data C = C {-# UNPACK #-} !Double {-# UNPACK #-} !Double {-# UNPACK #-} !Double
-- | Evaluate a series of Chebyshev polynomials. Uses Clenshaw's
-- algorithm.
chebyshev :: Double -- ^ Parameter of each function.
- -> UArr Double -- ^ Coefficients of each polynomial
+ -> U.Vector Double -- ^ Coefficients of each polynomial
-- term, in increasing order.
-> Double
-chebyshev x a = fini . foldlU step (C 0 0 0) .
- enumFromThenToU (lengthU a - 1) (-1) $ 0
- where step (C u v w) k = C (x2 * v - w + indexU a k) u v
+chebyshev x a = fini . U.foldl step (C 0 0 0) .
+ U.enumFromThenTo (U.length a - 1) (-1) $ 0
+ where step (C u v w) k = C (x2 * v - w + (a ! k)) u v
fini (C u _ w) = (u - w) / 2
x2 = x * 2
@@ -52,7 +53,7 @@ chebyshev x a = fini . foldlU step (C 0 0 0) .
choose :: Int -> Int -> Double
n `choose` k
| k > n = 0
- | k < 30 = foldlU go 1 . enumFromToU 1 $ k'
+ | k < 30 = U.foldl go 1 . U.enumFromTo 1 $ k'
| otherwise = exp $ lg (n+1) - lg (k+1) - lg (n-k+1)
where go a i = a * (nk + j) / j
where j = fromIntegral i :: Double
@@ -71,13 +72,13 @@ factorial :: Int -> Double
factorial n
| n < 0 = error "Statistics.Math.factorial: negative input"
| n <= 1 = 0
- | n <= 14 = fini . foldlU goLong (F 1 1) $ ns
- | otherwise = foldlU goDouble 1 $ ns
+ | n <= 14 = fini . U.foldl goLong (F 1 1) $ ns
+ | otherwise = U.foldl goDouble 1 $ ns
where goDouble t k = t * fromIntegral k
goLong (F z x) _ = F (z * x') x'
where x' = x + 1
fini (F z _) = fromIntegral z
- ns = enumFromToU 2 n
+ ns = U.enumFromTo 2 n
{-# INLINE factorial #-}
-- | Compute the natural logarithm of the factorial function. Gives
@@ -163,9 +164,9 @@ logGamma x
((r4_2 * x2 + r4_1) * x2 + r4_0) /
((x2 + r4_4) * x2 + r4_3)
where
- a :*: b :*: c
- | x < 0.5 = -y :*: x + 1 :*: x
- | otherwise = 0 :*: x :*: x - 1
+ (a , b , c)
+ | x < 0.5 = (-y , x + 1 , x)
+ | otherwise = (0 , x , x - 1)
y = log x
k = x * (y-1) - 0.5 * y + alr2pi
@@ -204,20 +205,20 @@ data L = L {-# UNPACK #-} !Double {-# UNPACK #-} !Double
logGammaL :: Double -> Double
logGammaL x
| x <= 0 = 1/0
- | otherwise = fini . foldlU go (L 0 (x+7)) $ a
+ | otherwise = fini . U.foldl go (L 0 (x+7)) $ a
where fini (L l _) = log (l+a0) + log m_sqrt_2_pi - x65 + (x-0.5) * log x65
go (L l t) k = L (l + k / t) (t-1)
x65 = x + 6.5
a0 = 0.9999999999995183
- a = toU [ 0.1659470187408462e-06
- , 0.9934937113930748e-05
- , -0.1385710331296526
- , 12.50734324009056
- , -176.6150291498386
- , 771.3234287757674
- , -1259.139216722289
- , 676.5203681218835
- ]
+ a = U.fromList [ 0.1659470187408462e-06
+ , 0.9934937113930748e-05
+ , -0.1385710331296526
+ , 12.50734324009056
+ , -176.6150291498386
+ , 771.3234287757674
+ , -1259.139216722289
+ , 676.5203681218835
+ ]
-- $references
--
diff --git a/Statistics/Quantile.hs b/Statistics/Quantile.hs
index 858b106..cbfd8dc 100644
--- a/Statistics/Quantile.hs
+++ b/Statistics/Quantile.hs
@@ -38,7 +38,8 @@ module Statistics.Quantile
) where
import Control.Exception (assert)
-import Data.Array.Vector (allU, indexU, lengthU)
+import qualified Data.Vector.Unboxed as U
+import Data.Vector.Unboxed ((!))
import Statistics.Constants (m_epsilon)
import Statistics.Function (partialSort)
import Statistics.Types (Sample)
@@ -53,14 +54,14 @@ weightedAvg k q x =
assert (q >= 2) .
assert (k >= 0) .
assert (k < q) .
- assert (allU (not . isNaN) x) $
+ assert (U.all (not . isNaN) x) $
xj + g * (xj1 - xj)
where
j = floor idx
- idx = fromIntegral (lengthU x - 1) * fromIntegral k / fromIntegral q
+ idx = fromIntegral (U.length x - 1) * fromIntegral k / fromIntegral q
g = idx - fromIntegral j
- xj = indexU sx j
- xj1 = indexU sx (j+1)
+ xj = sx ! j
+ xj1 = sx ! (j+1)
sx = partialSort (j+2) x
{-# INLINE weightedAvg #-}
@@ -80,7 +81,7 @@ continuousBy (ContParam a b) k q x =
assert (q >= 2) .
assert (k >= 0) .
assert (k <= q) .
- assert (allU (not . isNaN) x) $
+ assert (U.all (not . isNaN) x) $
(1-h) * item (j-1) + h * item j
where
j = floor (t + eps)
@@ -90,8 +91,8 @@ continuousBy (ContParam a b) k q x =
| otherwise = r
where r = t - fromIntegral j
eps = m_epsilon * 4
- n = lengthU x
- item = indexU sx . bracket
+ n = U.length x
+ item = (sx !) . bracket
sx = partialSort (bracket j + 1) x
bracket m = min (max m 0) (n - 1)
{-# INLINE continuousBy #-}
@@ -103,14 +104,14 @@ continuousBy (ContParam a b) k q x =
-- For instance, the interquartile range (IQR) can be estimated as
-- follows:
--
--- > midspread medianUnbiased 4 (toU [1,1,2,2,3])
+-- > midspread medianUnbiased 4 (U.to [1,1,2,2,3])
-- > ==> 1.333333
midspread :: ContParam -- ^ Parameters /a/ and /b/.
-> Int -- ^ /q/, the number of quantiles.
-> Sample -- ^ /x/, the sample data.
-> Double
midspread (ContParam a b) k x =
- assert (allU (not . isNaN) x) .
+ assert (U.all (not . isNaN) x) .
assert (k > 0) $
quantile (1-frac) - quantile frac
where
@@ -121,8 +122,8 @@ midspread (ContParam a b) k x =
| otherwise = r
where r = t i - fromIntegral (j i)
eps = m_epsilon * 4
- n = lengthU x
- item = indexU sx . bracket
+ n = U.length x
+ item = (sx !) . bracket
sx = partialSort (bracket (j (1-frac)) + 1) x
bracket m = min (max m 0) (n - 1)
frac = 1 / fromIntegral k
diff --git a/Statistics/Resampling.hs b/Statistics/Resampling.hs
index 6979c1c..1493240 100644
--- a/Statistics/Resampling.hs
+++ b/Statistics/Resampling.hs
@@ -18,8 +18,11 @@ module Statistics.Resampling
import Control.Monad (forM_)
import Control.Monad.ST (ST)
-import Data.Array.Vector
-import Data.Array.Vector.Algorithms.Intro (sort)
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Unboxed.Mutable as MU
+import Data.Vector.Unboxed ((!))
+import Data.Vector.Generic (unsafeFreeze)
+import Data.Vector.Algorithms.Intro (sort)
import Statistics.Function (createU, indices)
import System.Random.MWC (Gen, uniform)
import Statistics.Types (Estimator, Sample)
@@ -28,37 +31,40 @@ import Statistics.Types (Estimator, Sample)
-- points. Distinct from a normal array to make it harder for your
-- humble author's brain to go wrong.
newtype Resample = Resample {
- fromResample :: UArr Double
+ fromResample :: U.Vector Double
} deriving (Eq, Show)
-- | Resample a data set repeatedly, with replacement, computing each
-- estimate over the resampled data.
resample :: Gen s -> [Estimator] -> Int -> Sample -> ST s [Resample]
resample gen ests numResamples samples = do
- results <- mapM (const (newMU numResamples)) $ ests
+ results <- mapM (const (MU.new numResamples)) $ ests
loop 0 (zip ests results)
mapM_ sort results
- mapM (fmap Resample . unsafeFreezeAllMU) results
+ mapM (fmap Resample . unsafeFreeze) results
where
loop k ers | k >= numResamples = return ()
| otherwise = do
re <- createU n $ \_ -> do
r <- uniform gen
- return (indexU samples (abs r `mod` n))
+ return (samples ! (abs r `mod` n))
forM_ ers $ \(est,arr) ->
- writeMU arr k . est $ re
+ MU.write arr k . est $ re
loop (k+1) ers
- n = lengthU samples
+ n = U.length samples
-- | Compute a statistical estimate repeatedly over a sample, each
-- time omitting a successive element.
-jackknife :: Estimator -> Sample -> UArr Double
-jackknife est sample = mapU f . indices $ sample
+jackknife :: Estimator -> Sample -> U.Vector Double
+jackknife est sample = U.map f . indices $ sample
where f i = est (dropAt i sample)
{-# INLINE jackknife #-}
+-- Reimplementation of indexed
+indexed :: U.Unbox e => U.Vector e -> U.Vector (Int,e)
+indexed a = U.zip (U.enumFromN 0 (U.length a)) a
+
-- | Drop the /k/th element of a vector.
-dropAt :: UA e => Int -> UArr e -> UArr e
-dropAt n = mapU sndT . filterU notN . indexedU
- where notN (i :*: _) = i /= n
- sndT (_ :*: k) = k
+dropAt :: U.Unbox e => Int -> U.Vector e -> U.Vector e
+dropAt n = U.map snd . U.filter notN . indexed
+ where notN (i , _) = i /= n
diff --git a/Statistics/Resampling/Bootstrap.hs b/Statistics/Resampling/Bootstrap.hs
index 7cc9d05..2a00f4c 100644
--- a/Statistics/Resampling/Bootstrap.hs
+++ b/Statistics/Resampling/Bootstrap.hs
@@ -18,7 +18,8 @@ module Statistics.Resampling.Bootstrap
) where
import Control.Exception (assert)
-import Data.Array.Vector (foldlU, filterU, indexU, lengthU)
+import qualified Data.Vector.Unboxed as U
+import Data.Vector.Unboxed ((!))
import Statistics.Distribution.Normal
import Statistics.Distribution (cumulative, quantile)
import Statistics.Resampling (Resample(..), jackknife)
@@ -64,9 +65,9 @@ bootstrapBCA confidenceLevel sample =
zipWith e
where
e est (Resample resample)
- | lengthU sample == 1 = estimate pt pt pt confidenceLevel
+ | U.length sample == 1 = estimate pt pt pt confidenceLevel
| otherwise =
- estimate pt (indexU resample lo) (indexU resample hi) confidenceLevel
+ estimate pt (resample ! lo) (resample ! hi) confidenceLevel
where
pt = est sample
lo = max (cumn a1) 0
@@ -78,11 +79,11 @@ bootstrapBCA confidenceLevel sample =
z1 = quantile standard ((1 - confidenceLevel) / 2)
cumn = round . (*n) . cumulative standard
bias = quantile standard (probN / n)
- where probN = fromIntegral . lengthU . filterU (<pt) $ resample
- ni = lengthU resample
+ where probN = fromIntegral . U.length . U.filter (<pt) $ resample
+ ni = U.length resample
n = fromIntegral ni
accel = sumCubes / (6 * (sumSquares ** 1.5))
- where (sumSquares :< sumCubes) = foldlU f (0 :< 0) jack
+ where (sumSquares :< sumCubes) = U.foldl f (0 :< 0) jack
f (s :< c) j = s + d2 :< c + d2 * d
where d = jackMean - j
d2 = d * d
diff --git a/Statistics/Sample.hs b/Statistics/Sample.hs
index 203a186..aaea36b 100644
--- a/Statistics/Sample.hs
+++ b/Statistics/Sample.hs
@@ -20,6 +20,7 @@ module Statistics.Sample
-- * Statistics of location
, mean
+ , meanWeighted
, harmonicMean
, geometricMean
@@ -37,6 +38,7 @@ module Statistics.Sample
, variance
, varianceUnbiased
, stdDev
+ , varianceWeighted
-- ** Single-pass functions (faster, less safe)
-- $cancellation
@@ -48,19 +50,20 @@ module Statistics.Sample
-- $references
) where
-import Data.Array.Vector
+import qualified Data.Vector.Unboxed as U
import Statistics.Function (minMax)
-import Statistics.Types (Sample)
+import Statistics.Types (Sample,WeightedSample)
+
range :: Sample -> Double
range s = hi - lo
- where lo :*: hi = minMax s
+ where (lo , hi) = minMax s
{-# INLINE range #-}
-- | Arithmetic mean. This uses Welford's algorithm to provide
-- numerical stability, using a single pass over the sample data.
mean :: Sample -> Double
-mean = fini . foldlU go (T 0 0)
+mean = fini . U.foldl go (T 0 0)
where
fini (T a _) = a
go (T m n) x = T m' n'
@@ -68,10 +71,21 @@ mean = fini . foldlU go (T 0 0)
n' = n + 1
{-# INLINE mean #-}
+-- | Arithmetic mean for weighted sample. It uses algorithm analogous
+-- to one in 'mean'
+meanWeighted :: WeightedSample -> Double
+meanWeighted = fini . U.foldl go (V 0 0)
+ where
+ fini (V a _) = a
+ go (V m w) (x,xw) = V m' w'
+ where m' = m + xw * (x - m) / w'
+ w' = w + xw
+{-# INLINE meanWeighted #-}
+
-- | Harmonic mean. This algorithm performs a single pass over the
-- sample.
harmonicMean :: Sample -> Double
-harmonicMean = fini . foldlU go (T 0 0)
+harmonicMean = fini . U.foldl go (T 0 0)
where
fini (T b a) = fromIntegral a / b
go (T x y) n = T (x + (1/n)) (y+1)
@@ -79,7 +93,7 @@ harmonicMean = fini . foldlU go (T 0 0)
-- | Geometric mean of a sample containing no negative values.
geometricMean :: Sample -> Double
-geometricMean = fini . foldlU go (T 1 0)
+geometricMean = fini . U.foldl go (T 1 0)
where
fini (T p n) = p ** (1 / fromIntegral n)
go (T p n) a = T (p * a) (n + 1)
@@ -98,7 +112,7 @@ centralMoment a xs
| a < 0 = error "Statistics.Sample.centralMoment: negative input"
| a == 0 = 1
| a == 1 = 0
- | otherwise = sumU (mapU go xs) / fromIntegral (lengthU xs)
+ | otherwise = U.sum (U.map go xs) / fromIntegral (U.length xs)
where
go x = (x-m) ^ a
m = mean xs
@@ -111,15 +125,15 @@ centralMoment a xs
--
-- For samples containing many values very close to the mean, this
-- function is subject to inaccuracy due to catastrophic cancellation.
-centralMoments :: Int -> Int -> Sample -> Double :*: Double
+centralMoments :: Int -> Int -> Sample -> (Double, Double)
centralMoments a b xs
- | a < 2 || b < 2 = centralMoment a xs :*: centralMoment b xs
- | otherwise = fini . foldlU go (V 0 0) $ xs
+ | a < 2 || b < 2 = (centralMoment a xs , centralMoment b xs)
+ | otherwise = fini . U.foldl go (V 0 0) $ xs
where go (V i j) x = V (i + d^a) (j + d^b)
where d = x - m
- fini (V i j) = i / n :*: j / n
+ fini (V i j) = (i / n , j / n)
m = mean xs
- n = fromIntegral (lengthU xs)
+ n = fromIntegral (U.length xs)
{-# INLINE centralMoments #-}
-- | Compute the skewness of a sample. This is a measure of the
@@ -129,12 +143,12 @@ centralMoments a b xs
-- its mass is on the right of the distribution, with the tail on the
-- left.
--
--- > skewness $ toU [1,100,101,102,103]
+-- > skewness $ U.to [1,100,101,102,103]
-- > ==> -1.497681449918257
--
-- A sample with positive skew is said to be /right-skewed/.
--
--- > skewness $ toU [1,2,3,4,100]
+-- > skewness $ U.to [1,2,3,4,100]
-- > ==> 1.4975367033335198
--
-- A sample's skewness is not defined if its 'variance' is zero.
@@ -146,7 +160,7 @@ centralMoments a b xs
-- function is subject to inaccuracy due to catastrophic cancellation.
skewness :: Sample -> Double
skewness xs = c3 * c2 ** (-1.5)
- where c3 :*: c2 = centralMoments 3 2 xs
+ where (c3 , c2) = centralMoments 3 2 xs
{-# INLINE skewness #-}
-- | Compute the excess kurtosis of a sample. This is a measure of
@@ -164,7 +178,7 @@ skewness xs = c3 * c2 ** (-1.5)
-- function is subject to inaccuracy due to catastrophic cancellation.
kurtosis :: Sample -> Double
kurtosis xs = c4 / (c2 * c2) - 3
- where c4 :*: c2 = centralMoments 4 2 xs
+ where (c4 , c2) = centralMoments 4 2 xs
{-# INLINE kurtosis #-}
-- $variance
@@ -183,31 +197,32 @@ kurtosis xs = c4 / (c2 * c2) - 3
data V = V {-# UNPACK #-} !Double {-# UNPACK #-} !Double
-robustVar :: Sample -> T
-robustVar samp = fini . foldlU go (V 0 0) $ samp
- where
- go (V s c) x = V (s + d * d) (c + d)
- where d = x - m
- fini (V s c) = T (s - (c * c) / fromIntegral n) n
- n = lengthU samp
- m = mean samp
+sqr :: Double -> Double
+sqr x = x * x
+
+robustSumVar :: Sample -> Double
+robustSumVar samp = U.sum . U.map (sqr . subtract m) $ samp
+ where
+ m = mean samp
-- | Maximum likelihood estimate of a sample's variance. Also known
-- as the population variance, where the denominator is /n/.
variance :: Sample -> Double
-variance = fini . robustVar
- where fini (T v n)
- | n > 1 = v / fromIntegral n
- | otherwise = 0
+variance samp
+ | n > 1 = robustSumVar samp / fromIntegral n
+ | otherwise = 0
+ where
+ n = U.length samp
{-# INLINE variance #-}
-- | Unbiased estimate of a sample's variance. Also known as the
-- sample variance, where the denominator is /n/-1.
varianceUnbiased :: Sample -> Double
-varianceUnbiased = fini . robustVar
- where fini (T v n)
- | n > 1 = v / fromIntegral (n-1)
- | otherwise = 0
+varianceUnbiased samp
+ | n > 1 = robustSumVar samp / fromIntegral (n-1)
+ | otherwise = 0
+ where
+ n = U.length samp
{-# INLINE varianceUnbiased #-}
-- | Standard deviation. This is simply the square root of the
@@ -215,6 +230,23 @@ varianceUnbiased = fini . robustVar
stdDev :: Sample -> Double
stdDev = sqrt . varianceUnbiased
+
+robustSumVarWeighted :: WeightedSample -> V
+robustSumVarWeighted samp = U.foldl go (V 0 0) samp
+ where
+ go (V s w) (x,xw) = V (s + xw*d*d) (w + xw)
+ where d = x - m
+ m = meanWeighted samp
+
+-- | Weighted variance. This is biased estimation.
+varianceWeighted :: WeightedSample -> Double
+varianceWeighted samp
+ | U.length samp > 1 = fini $ robustSumVarWeighted samp
+ | otherwise = 0
+ where
+ fini (V s w) = s / w
+{-# INLINE varianceWeighted #-}
+
-- $cancellation
--
-- The functions prefixed with the name @fast@ below perform a single
@@ -227,7 +259,7 @@ stdDev = sqrt . varianceUnbiased
-- catastrophic cancellation.
fastVar :: Sample -> T1
-fastVar = foldlU go (T1 0 0 0)
+fastVar = U.foldl go (T1 0 0 0)
where
go (T1 n m s) x = T1 n' m' s'
where n' = n + 1
diff --git a/Statistics/Sample/Powers.hs b/Statistics/Sample/Powers.hs
index 66b6a97..3bec94a 100644
--- a/Statistics/Sample/Powers.hs
+++ b/Statistics/Sample/Powers.hs
@@ -48,15 +48,18 @@ module Statistics.Sample.Powers
) where
import Control.Monad.ST (unsafeSTToIO)
-import Data.Array.Vector
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Unboxed.Mutable as MU
+import Data.Vector.Generic (unsafeFreeze)
+import Data.Vector.Unboxed ((!))
import Prelude hiding (sum)
import Statistics.Internal (inlinePerformIO)
import Statistics.Math (choose)
import Statistics.Types (Sample)
import System.IO.Unsafe (unsafePerformIO)
-newtype Powers = Powers (UArr Double)
- deriving (Eq, Read, Show)
+newtype Powers = Powers (U.Vector Double)
+ deriving (Eq, Show)
-- | O(/n/) Collect the /n/ simple powers of a sample.
--
@@ -78,25 +81,29 @@ powers :: Int -- ^ /n/, the number of powers, where /n/ >= 2.
-> Powers
powers k
| k < 2 = error "Statistics.Sample.powers: too few powers"
- | otherwise = fini . foldlU go (unsafePerformIO . unsafeSTToIO $ create)
+ | otherwise = fini . U.foldl go (unsafePerformIO . unsafeSTToIO $ create)
where
go ms x = inlinePerformIO . unsafeSTToIO $ loop 0 1
where loop !i !xk | i == l = return ms
| otherwise = do
- readMU ms i >>= writeMU ms i . (+ xk)
+ MU.read ms i >>= MU.write ms i . (+ xk)
loop (i+1) (xk*x)
- fini = Powers . unsafePerformIO . unsafeSTToIO . unsafeFreezeAllMU
- create = newMU l >>= fill 0
+ fini = Powers . unsafePerformIO . unsafeSTToIO . unsafeFreeze
+ create = MU.new l >>= fill 0
where fill !i ms | i == l = return ms
- | otherwise = writeMU ms i 0 >> fill (i+1) ms
+ | otherwise = MU.write ms i 0 >> fill (i+1) ms
l = k + 1
{-# INLINE powers #-}
-- | The order (number) of simple powers collected from a 'Sample'.
order :: Powers -> Int
-order (Powers pa) = lengthU pa - 1
+order (Powers pa) = U.length pa - 1
{-# INLINE order #-}
+-- Reimplementation of indexed
+indexed :: U.Unbox e => U.Vector e -> U.Vector (Int,e)
+indexed a = U.zip (U.enumFromN 0 (U.length a)) a
+
-- | Compute the /k/th central moment of a 'Sample'. The central
-- moment is also known as the moment about the mean.
centralMoment :: Int -> Powers -> Double
@@ -105,10 +112,10 @@ centralMoment k p@(Powers pa)
error ("Statistics.Sample.Powers.centralMoment: "
++ "invalid argument")
| k == 0 = 1
- | otherwise = (/n) . sumU . mapU go . indexedU . takeU (k+1) $ pa
+ | otherwise = (/n) . U.sum . U.map go . indexed . U.take (k+1) $ pa
where
- go (i :*: e) = (k `choose` i) * ((-m) ^ (k-i)) * e
- n = indexU pa 0
+ go (i , e) = (k `choose` i) * ((-m) ^ (k-i)) * e
+ n = U.head pa
m = mean p
{-# INLINE centralMoment #-}
@@ -139,7 +146,7 @@ varianceUnbiased :: Powers -> Double
varianceUnbiased p@(Powers pa)
| n > 1 = variance p * n / (n-1)
| otherwise = 0
- where n = indexU pa 0
+ where n = U.head pa
{-# INLINE varianceUnbiased #-}
-- | Compute the skewness of a sample. This is a measure of the
@@ -149,12 +156,12 @@ varianceUnbiased p@(Powers pa)
-- its mass is on the right of the distribution, with the tail on the
-- left.
--
--- > skewness . powers 3 $ toU [1,100,101,102,103]
+-- > skewness . powers 3 $ U.to [1,100,101,102,103]
-- > ==> -1.497681449918257
--
-- A sample with positive skew is said to be /right-skewed/.
--
--- > skewness . powers 3 $ toU [1,2,3,4,100]
+-- > skewness . powers 3 $ U.to [1,2,3,4,100]
-- > ==> 1.4975367033335198
--
-- A sample's skewness is not defined if its 'variance' is zero.
@@ -181,13 +188,13 @@ kurtosis p = centralMoment 4 p / (v * v) - 3
-- | The number of elements in the original 'Sample'. This is the
-- sample's zeroth simple power.
count :: Powers -> Int
-count (Powers pa) = floor $ indexU pa 0
+count (Powers pa) = floor $ U.head pa
{-# INLINE count #-}
-- | The sum of elements in the original 'Sample'. This is the
-- sample's first simple power.
sum :: Powers -> Double
-sum (Powers pa) = indexU pa 1
+sum (Powers pa) = pa ! 1
{-# INLINE sum #-}
-- | The arithmetic mean of elements in the original 'Sample'.
@@ -199,7 +206,7 @@ mean :: Powers -> Double
mean p@(Powers pa)
| n == 0 = 0
| otherwise = sum p / n
- where n = indexU pa 0
+ where n = U.head pa
{-# INLINE mean #-}
-- $references
diff --git a/Statistics/Types.hs b/Statistics/Types.hs
index 882e5ff..562f06e 100644
--- a/Statistics/Types.hs
+++ b/Statistics/Types.hs
@@ -13,17 +13,21 @@ module Statistics.Types
(
Estimator
, Sample
+ , WeightedSample
, Weights
) where
-import Data.Array.Vector (UArr)
+import qualified Data.Vector.Unboxed as U (Vector)
-- | Sample data.
-type Sample = UArr Double
+type Sample = U.Vector Double
+
+-- | Sample with weights. First element of sample is data, second is weight
+type WeightedSample = U.Vector (Double,Double)
-- | A function that estimates a property of a sample, such as its
-- 'mean'.
type Estimator = Sample -> Double
-- | Weights for affecting the importance of elements of a sample.
-type Weights = UArr Double
+type Weights = U.Vector Double
diff --git a/statistics.cabal b/statistics.cabal
index 347b301..671c754 100644
--- a/statistics.cabal
+++ b/statistics.cabal
@@ -1,5 +1,5 @@
name: statistics
-version: 0.4.1
+version: 0.5.0.0
synopsis: A library of statistical types, data, and functions
description:
This library provides a number of common functions and types useful
@@ -55,10 +55,10 @@ library
build-depends:
base < 5,
erf,
- mwc-random,
+ mwc-random >= 0.5.0.0,
time,
- uvector >= 0.1.0.4,
- uvector-algorithms >= 0.2
+ vector >= 0.5,
+ vector-algorithms >= 0.3
if impl(ghc >= 6.10)
build-depends:
base >= 4