summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexeyKhudyakov <>2018-10-11 21:14:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2018-10-11 21:14:00 (GMT)
commitd83adc3c5e34952beca8001f00997d887844d33e (patch)
tree46ce6b48e8396c38a9a1bdcebd4425a9159a6f2b
parent10738e76080db9e40be02f0e3d6c0da2cccc3178 (diff)
version 0.15.0.00.15.0.0
-rw-r--r--Statistics/Correlation/Kendall.hs2
-rw-r--r--Statistics/Distribution.hs10
-rw-r--r--Statistics/Distribution/CauchyLorentz.hs2
-rw-r--r--Statistics/Distribution/DiscreteUniform.hs2
-rw-r--r--Statistics/Distribution/Exponential.hs2
-rw-r--r--Statistics/Distribution/Laplace.hs2
-rw-r--r--Statistics/Distribution/Uniform.hs2
-rw-r--r--Statistics/Function.hs3
-rw-r--r--Statistics/Function/Comparison.hs18
-rw-r--r--Statistics/Math/RootFinding.hs148
-rw-r--r--Statistics/Matrix.hs270
-rw-r--r--Statistics/Matrix/Algorithms.hs42
-rw-r--r--Statistics/Matrix/Mutable.hs86
-rw-r--r--Statistics/Matrix/Types.hs64
-rw-r--r--Statistics/Quantile.hs387
-rw-r--r--Statistics/Regression.hs24
-rw-r--r--Statistics/Resampling.hs6
-rw-r--r--Statistics/Resampling/Bootstrap.hs4
-rw-r--r--Statistics/Sample.hs2
-rw-r--r--Statistics/Sample/KernelDensity.hs7
-rw-r--r--Statistics/Sample/Normalize.hs43
-rw-r--r--Statistics/Test/KolmogorovSmirnov.hs30
-rw-r--r--Statistics/Test/KruskalWallis.hs2
-rw-r--r--Statistics/Test/MannWhitneyU.hs4
-rw-r--r--Statistics/Test/StudentT.hs2
-rw-r--r--Statistics/Test/Types.hs2
-rw-r--r--Statistics/Test/WilcoxonT.hs4
-rw-r--r--changelog.md25
-rw-r--r--statistics.cabal96
-rw-r--r--tests/Tests/ApproxEq.hs4
-rw-r--r--tests/Tests/Correlation.hs2
-rw-r--r--tests/Tests/Distribution.hs3
-rw-r--r--tests/Tests/Matrix/Types.hs2
-rw-r--r--tests/Tests/NonParametric.hs2
-rw-r--r--tests/Tests/Orphanage.hs26
-rw-r--r--tests/Tests/Parametric.hs1
-rw-r--r--tests/Tests/Quantile.hs100
-rw-r--r--tests/tests.hs36
38 files changed, 623 insertions, 844 deletions
diff --git a/Statistics/Correlation/Kendall.hs b/Statistics/Correlation/Kendall.hs
index 4cd7de3..3d1b3d0 100644
--- a/Statistics/Correlation/Kendall.hs
+++ b/Statistics/Correlation/Kendall.hs
@@ -5,7 +5,7 @@
-- Fast O(NlogN) implementation of
-- <http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient Kendall's tau>.
--
--- This module implementes Kendall's tau form b which allows ties in the data.
+-- This module implements Kendall's tau form b which allows ties in the data.
-- This is the same formula used by other statistical packages, e.g., R, matlab.
--
-- > \tau = \frac{n_c - n_d}{\sqrt{(n_0 - n_1)(n_0 - n_2)}}
diff --git a/Statistics/Distribution.hs b/Statistics/Distribution.hs
index 0b27313..c448d4f 100644
--- a/Statistics/Distribution.hs
+++ b/Statistics/Distribution.hs
@@ -57,7 +57,7 @@ class Distribution d where
-- > cumulative d -∞ = 0
cumulative :: d -> Double -> Double
- -- | One's complement of cumulative distibution:
+ -- | One's complement of cumulative distribution:
--
-- > complCumulative d x = 1 - cumulative d x
--
@@ -79,7 +79,7 @@ class Distribution d => DiscreteDistr d where
logProbability d = log . probability d
--- | Continuous probability distributuion.
+-- | Continuous probability distribution.
--
-- Minimal complete definition is 'quantile' and either 'density' or
-- 'logDensity'.
@@ -111,7 +111,7 @@ class Distribution d => ContDistr d where
class Distribution d => MaybeMean d where
maybeMean :: d -> Maybe Double
--- | Type class for distributions with mean. If distribution have
+-- | Type class for distributions with mean. If a distribution has
-- finite mean for all valid values of parameters it should be
-- instance of this type class.
class MaybeMean d => Mean d where
@@ -130,7 +130,7 @@ class MaybeMean d => MaybeVariance d where
maybeStdDev :: d -> Maybe Double
maybeStdDev = fmap sqrt . maybeVariance
--- | Type class for distributions with variance. If distibution have
+-- | Type class for distributions with variance. If distribution have
-- finite variance for all valid parameter values it should be
-- instance of this type class.
--
@@ -228,6 +228,6 @@ findRoot d prob = loop 0 1
-- | Sum probabilities in inclusive interval.
sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double
sumProbabilities d low hi =
- -- Return value is forced to be less than 1 to guard againist roundoff errors.
+ -- Return value is forced to be less than 1 to guard against roundoff errors.
-- ATTENTION! this check should be removed for testing or it could mask bugs.
min 1 . sum . U.map (probability d) $ U.enumFromTo low hi
diff --git a/Statistics/Distribution/CauchyLorentz.hs b/Statistics/Distribution/CauchyLorentz.hs
index 9ff472c..da2cda7 100644
--- a/Statistics/Distribution/CauchyLorentz.hs
+++ b/Statistics/Distribution/CauchyLorentz.hs
@@ -88,7 +88,7 @@ errMsg _ s
= "Statistics.Distribution.CauchyLorentz.cauchyDistribution: FWHM must be positive. Got "
++ show s
--- | Standard Cauchy distribution. It's centered at 0 and and have 1 FWHM
+-- | Standard Cauchy distribution. It's centered at 0 and have 1 FWHM
standardCauchy :: CauchyDistribution
standardCauchy = CD 0 1
diff --git a/Statistics/Distribution/DiscreteUniform.hs b/Statistics/Distribution/DiscreteUniform.hs
index 6f3d7c8..0b33c9e 100644
--- a/Statistics/Distribution/DiscreteUniform.hs
+++ b/Statistics/Distribution/DiscreteUniform.hs
@@ -13,7 +13,7 @@
-- inclusive interval {1, ..., n}. This is parametrized with n only,
-- where p_1, ..., p_n = 1/n. ('discreteUniform').
--
--- The second parametrizaton is the uniform distribution on {a, ..., b} with
+-- The second parametrization is the uniform distribution on {a, ..., b} with
-- probabilities p_a, ..., p_b = 1/(a-b+1). This is parametrized with
-- /a/ and /b/. ('discreteUniformAB')
diff --git a/Statistics/Distribution/Exponential.hs b/Statistics/Distribution/Exponential.hs
index e5a3e4f..357e5ff 100644
--- a/Statistics/Distribution/Exponential.hs
+++ b/Statistics/Distribution/Exponential.hs
@@ -10,7 +10,7 @@
-- Stability : experimental
-- Portability : portable
--
--- The exponential distribution. This is the continunous probability
+-- The exponential distribution. This is the continuous probability
-- distribution of the times between events in a poisson process, in
-- which events occur continuously and independently at a constant
-- average rate.
diff --git a/Statistics/Distribution/Laplace.hs b/Statistics/Distribution/Laplace.hs
index cf1fdde..d9b303d 100644
--- a/Statistics/Distribution/Laplace.hs
+++ b/Statistics/Distribution/Laplace.hs
@@ -159,5 +159,5 @@ instance D.FromSample LaplaceDistribution Double where
| G.null xs = Nothing
| otherwise = Just $! LD s l
where
- s = Q.continuousBy Q.medianUnbiased 1 2 xs
+ s = Q.median Q.medianUnbiased xs
l = S.mean $ G.map (\x -> abs $ x - s) xs
diff --git a/Statistics/Distribution/Uniform.hs b/Statistics/Distribution/Uniform.hs
index e1aa45c..4c9fb2b 100644
--- a/Statistics/Distribution/Uniform.hs
+++ b/Statistics/Distribution/Uniform.hs
@@ -69,7 +69,7 @@ uniformDistrE a b
| b < a = Just $ UniformDistribution b a
| a < b = Just $ UniformDistribution a b
| otherwise = Nothing
--- NOTE: failure is in default branch to guard againist NaNs.
+-- NOTE: failure is in default branch to guard against NaNs.
errMsg :: String
errMsg = "Statistics.Distribution.Uniform.uniform: wrong parameters"
diff --git a/Statistics/Function.hs b/Statistics/Function.hs
index b0300f1..a2a9434 100644
--- a/Statistics/Function.hs
+++ b/Statistics/Function.hs
@@ -1,8 +1,5 @@
{-# LANGUAGE BangPatterns, CPP, FlexibleContexts, Rank2Types #-}
-#if __GLASGOW_HASKELL__ >= 704
{-# OPTIONS_GHC -fsimpl-tick-factor=200 #-}
-#endif
-
-- |
-- Module : Statistics.Function
-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
diff --git a/Statistics/Function/Comparison.hs b/Statistics/Function/Comparison.hs
deleted file mode 100644
index c714f47..0000000
--- a/Statistics/Function/Comparison.hs
+++ /dev/null
@@ -1,18 +0,0 @@
--- |
--- Module : Statistics.Function.Comparison
--- Copyright : (c) 2011 Bryan O'Sullivan
--- License : BSD3
---
--- Maintainer : bos@serpentine.com
--- Stability : experimental
--- Portability : portable
---
--- Approximate floating point comparison, based on Bruce Dawson's
--- \"Comparing floating point numbers\":
--- <http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm>
-module Statistics.Function.Comparison
- {-# DEPRECATED "Use Numeric.MathFunctions.Comparison from math-functions" #-}
- (
- within
- ) where
-import Numeric.MathFunctions.Comparison (within)
diff --git a/Statistics/Math/RootFinding.hs b/Statistics/Math/RootFinding.hs
deleted file mode 100644
index c23f1f4..0000000
--- a/Statistics/Math/RootFinding.hs
+++ /dev/null
@@ -1,148 +0,0 @@
-{-# LANGUAGE BangPatterns, DeriveDataTypeable, DeriveGeneric #-}
-
--- |
--- Module : Statistics.Math.RootFinding
--- Copyright : (c) 2011 Bryan O'Sullivan
--- License : BSD3
---
--- Maintainer : bos@serpentine.com
--- Stability : experimental
--- Portability : portable
---
--- Haskell functions for finding the roots of mathematical functions.
-
-module Statistics.Math.RootFinding
- (
- Root(..)
- , fromRoot
- , ridders
- -- * References
- -- $references
- ) where
-
-import Data.Aeson (FromJSON, ToJSON)
-import Control.Applicative (Alternative(..), Applicative(..))
-import Control.Monad (MonadPlus(..), ap)
-import Data.Binary (Binary)
-import Data.Binary (put, get)
-import Data.Binary.Get (getWord8)
-import Data.Binary.Put (putWord8)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
-import Numeric.MathFunctions.Comparison (within)
-
-
--- | The result of searching for a root of a mathematical function.
-data Root a = NotBracketed
- -- ^ The function does not have opposite signs when
- -- evaluated at the lower and upper bounds of the search.
- | SearchFailed
- -- ^ The search failed to converge to within the given
- -- error tolerance after the given number of iterations.
- | Root a
- -- ^ A root was successfully found.
- deriving (Eq, Read, Show, Typeable, Data, Generic)
-
-instance (FromJSON a) => FromJSON (Root a)
-instance (ToJSON a) => ToJSON (Root a)
-
-instance (Binary a) => Binary (Root a) where
- put NotBracketed = putWord8 0
- put SearchFailed = putWord8 1
- put (Root a) = putWord8 2 >> put a
-
- get = do
- i <- getWord8
- case i of
- 0 -> return NotBracketed
- 1 -> return SearchFailed
- 2 -> fmap Root get
- _ -> fail $ "Root.get: Invalid value: " ++ show i
-
-instance Functor Root where
- fmap _ NotBracketed = NotBracketed
- fmap _ SearchFailed = SearchFailed
- fmap f (Root a) = Root (f a)
-
-instance Monad Root where
- NotBracketed >>= _ = NotBracketed
- SearchFailed >>= _ = SearchFailed
- Root a >>= m = m a
-
- return = Root
-
-instance MonadPlus Root where
- mzero = SearchFailed
-
- r@(Root _) `mplus` _ = r
- _ `mplus` p = p
-
-instance Applicative Root where
- pure = Root
- (<*>) = ap
-
-instance Alternative Root where
- empty = SearchFailed
-
- r@(Root _) <|> _ = r
- _ <|> p = p
-
--- | Returns either the result of a search for a root, or the default
--- value if the search failed.
-fromRoot :: a -- ^ Default value.
- -> Root a -- ^ Result of search for a root.
- -> a
-fromRoot _ (Root a) = a
-fromRoot a _ = a
-
-
--- | Use the method of Ridders to compute a root of a function.
---
--- The function must have opposite signs when evaluated at the lower
--- and upper bounds of the search (i.e. the root must be bracketed).
-ridders :: Double -- ^ Absolute error tolerance.
- -> (Double,Double) -- ^ Lower and upper bounds for the search.
- -> (Double -> Double) -- ^ Function to find the roots of.
- -> Root Double
-ridders tol (lo,hi) f
- | flo == 0 = Root lo
- | fhi == 0 = Root hi
- | flo*fhi > 0 = NotBracketed -- root is not bracketed
- | otherwise = go lo flo hi fhi 0
- where
- go !a !fa !b !fb !i
- -- Root is bracketed within 1 ulp. No improvement could be made
- | within 1 a b = Root a
- -- Root is found. Check that f(m) == 0 is nessesary to ensure
- -- that root is never passed to 'go'
- | fm == 0 = Root m
- | fn == 0 = Root n
- | d < tol = Root n
- -- Too many iterations performed. Fail
- | i >= (100 :: Int) = SearchFailed
- -- Ridder's approximation coincide with one of old
- -- bounds. Revert to bisection
- | n == a || n == b = case () of
- _| fm*fa < 0 -> go a fa m fm (i+1)
- | otherwise -> go m fm b fb (i+1)
- -- Proceed as usual
- | fn*fm < 0 = go n fn m fm (i+1)
- | fn*fa < 0 = go a fa n fn (i+1)
- | otherwise = go n fn b fb (i+1)
- where
- d = abs (b - a)
- dm = (b - a) * 0.5
- !m = a + dm
- !fm = f m
- !dn = signum (fb - fa) * dm * fm / sqrt(fm*fm - fa*fb)
- !n = m - signum dn * min (abs dn) (abs dm - 0.5 * tol)
- !fn = f n
- !flo = f lo
- !fhi = f hi
-
-
--- $references
---
--- * Ridders, C.F.J. (1979) A new algorithm for computing a single
--- root of a real continuous function.
--- /IEEE Transactions on Circuits and Systems/ 26:979&#8211;980.
diff --git a/Statistics/Matrix.hs b/Statistics/Matrix.hs
deleted file mode 100644
index 2b01607..0000000
--- a/Statistics/Matrix.hs
+++ /dev/null
@@ -1,270 +0,0 @@
-{-# LANGUAGE PatternGuards #-}
--- |
--- Module : Statistics.Matrix
--- Copyright : 2011 Aleksey Khudyakov, 2014 Bryan O'Sullivan
--- License : BSD3
---
--- Basic matrix operations.
---
--- There isn't a widely used matrix package for Haskell yet, so
--- we implement the necessary minimum here.
-
-module Statistics.Matrix
- ( -- * Data types
- Matrix(..)
- , Vector
- -- * Conversion from/to lists/vectors
- , fromVector
- , fromList
- , fromRowLists
- , fromRows
- , fromColumns
- , toVector
- , toList
- , toRows
- , toColumns
- , toRowLists
- -- * Other
- , generate
- , generateSym
- , ident
- , diag
- , dimension
- , center
- , multiply
- , multiplyV
- , transpose
- , power
- , norm
- , column
- , row
- , map
- , for
- , unsafeIndex
- , hasNaN
- , bounds
- , unsafeBounds
- ) where
-
-import Prelude hiding (exponent, map, sum)
-import Control.Applicative ((<$>))
-import Control.Monad.ST
-import qualified Data.Vector.Unboxed as U
-import Data.Vector.Unboxed ((!))
-import qualified Data.Vector.Unboxed.Mutable as UM
-
-import Statistics.Function (for, square)
-import Statistics.Matrix.Types
-import Statistics.Matrix.Mutable (unsafeNew,unsafeWrite,unsafeFreeze)
-import Statistics.Sample.Internal (sum)
-
-
-----------------------------------------------------------------
--- Conversion to/from vectors/lists
-----------------------------------------------------------------
-
--- | Convert from a row-major list.
-fromList :: Int -- ^ Number of rows.
- -> Int -- ^ Number of columns.
- -> [Double] -- ^ Flat list of values, in row-major order.
- -> Matrix
-fromList r c = fromVector r c . U.fromList
-
--- | create a matrix from a list of lists, as rows
-fromRowLists :: [[Double]] -> Matrix
-fromRowLists = fromRows . fmap U.fromList
-
--- | Convert from a row-major vector.
-fromVector :: Int -- ^ Number of rows.
- -> Int -- ^ Number of columns.
- -> U.Vector Double -- ^ Flat list of values, in row-major order.
- -> Matrix
-fromVector r c v
- | r*c /= len = error "input size mismatch"
- | otherwise = Matrix r c 0 v
- where len = U.length v
-
--- | create a matrix from a list of vectors, as rows
-fromRows :: [Vector] -> Matrix
-fromRows xs
- | [] <- xs = error "Statistics.Matrix.fromRows: empty list of rows!"
- | any (/=nCol) ns = error "Statistics.Matrix.fromRows: row sizes do not match"
- | nCol == 0 = error "Statistics.Matrix.fromRows: zero columns in matrix"
- | otherwise = fromVector nRow nCol (U.concat xs)
- where
- nCol:ns = U.length <$> xs
- nRow = length xs
-
-
--- | create a matrix from a list of vectors, as columns
-fromColumns :: [Vector] -> Matrix
-fromColumns = transpose . fromRows
-
--- | Convert to a row-major flat vector.
-toVector :: Matrix -> U.Vector Double
-toVector (Matrix _ _ _ v) = v
-
--- | Convert to a row-major flat list.
-toList :: Matrix -> [Double]
-toList = U.toList . toVector
-
--- | Convert to a list of lists, as rows
-toRowLists :: Matrix -> [[Double]]
-toRowLists (Matrix _ nCol _ v)
- = chunks $ U.toList v
- where
- chunks [] = []
- chunks xs = case splitAt nCol xs of
- (rowE,rest) -> rowE : chunks rest
-
-
--- | Convert to a list of vectors, as rows
-toRows :: Matrix -> [Vector]
-toRows (Matrix _ nCol _ v) = chunks v
- where
- chunks xs
- | U.null xs = []
- | otherwise = case U.splitAt nCol xs of
- (rowE,rest) -> rowE : chunks rest
-
--- | Convert to a list of vectors, as columns
-toColumns :: Matrix -> [Vector]
-toColumns = toRows . transpose
-
-
-
-----------------------------------------------------------------
--- Other
-----------------------------------------------------------------
-
--- | Generate matrix using function
-generate :: Int -- ^ Number of rows
- -> Int -- ^ Number of columns
- -> (Int -> Int -> Double)
- -- ^ Function which takes /row/ and /column/ as argument.
- -> Matrix
-generate nRow nCol f
- = Matrix nRow nCol 0 $ U.generate (nRow*nCol) $ \i ->
- let (r,c) = i `quotRem` nCol in f r c
-
--- | Generate symmetric square matrix using function
-generateSym
- :: Int -- ^ Number of rows and columns
- -> (Int -> Int -> Double)
- -- ^ Function which takes /row/ and /column/ as argument. It must
- -- be symmetric in arguments: @f i j == f j i@
- -> Matrix
-generateSym n f = runST $ do
- m <- unsafeNew n n
- for 0 n $ \r -> do
- unsafeWrite m r r (f r r)
- for (r+1) n $ \c -> do
- let x = f r c
- unsafeWrite m r c x
- unsafeWrite m c r x
- unsafeFreeze m
-
-
--- | Create the square identity matrix with given dimensions.
-ident :: Int -> Matrix
-ident n = diag $ U.replicate n 1.0
-
--- | Create a square matrix with given diagonal, other entries default to 0
-diag :: Vector -> Matrix
-diag v
- = Matrix n n 0 $ U.create $ do
- arr <- UM.replicate (n*n) 0
- for 0 n $ \i ->
- UM.unsafeWrite arr (i*n + i) (v ! i)
- return arr
- where
- n = U.length v
-
--- | Return the dimensions of this matrix, as a (row,column) pair.
-dimension :: Matrix -> (Int, Int)
-dimension (Matrix r c _ _) = (r, c)
-
--- | Avoid overflow in the matrix.
-avoidOverflow :: Matrix -> Matrix
-avoidOverflow m@(Matrix r c e v)
- | center m > 1e140 = Matrix r c (e + 140) (U.map (* 1e-140) v)
- | otherwise = m
-
--- | Matrix-matrix multiplication. Matrices must be of compatible
--- sizes (/note: not checked/).
-multiply :: Matrix -> Matrix -> Matrix
-multiply m1@(Matrix r1 _ e1 _) m2@(Matrix _ c2 e2 _) =
- Matrix r1 c2 (e1 + e2) $ U.generate (r1*c2) go
- where
- go t = sum $ U.zipWith (*) (row m1 i) (column m2 j)
- where (i,j) = t `quotRem` c2
-
--- | Matrix-vector multiplication.
-multiplyV :: Matrix -> Vector -> Vector
-multiplyV m v
- | cols m == c = U.generate (rows m) (sum . U.zipWith (*) v . row m)
- | otherwise = error $ "matrix/vector unconformable " ++ show (cols m,c)
- where c = U.length v
-
--- | Raise matrix to /n/th power. Power must be positive
--- (/note: not checked).
-power :: Matrix -> Int -> Matrix
-power mat 1 = mat
-power mat n = avoidOverflow res
- where
- mat2 = power mat (n `quot` 2)
- pow = multiply mat2 mat2
- res | odd n = multiply pow mat
- | otherwise = pow
-
--- | Element in the center of matrix (not corrected for exponent).
-center :: Matrix -> Double
-center mat@(Matrix r c _ _) =
- unsafeBounds U.unsafeIndex mat (r `quot` 2) (c `quot` 2)
-
--- | Calculate the Euclidean norm of a vector.
-norm :: Vector -> Double
-norm = sqrt . sum . U.map square
-
--- | Return the given column.
-column :: Matrix -> Int -> Vector
-column (Matrix r c _ v) i = U.backpermute v $ U.enumFromStepN i c r
-{-# INLINE column #-}
-
--- | Return the given row.
-row :: Matrix -> Int -> Vector
-row (Matrix _ c _ v) i = U.slice (c*i) c v
-
-unsafeIndex :: Matrix
- -> Int -- ^ Row.
- -> Int -- ^ Column.
- -> Double
-unsafeIndex = unsafeBounds U.unsafeIndex
-
--- | Apply function to every element of matrix
-map :: (Double -> Double) -> Matrix -> Matrix
-map f (Matrix r c e v) = Matrix r c e (U.map f v)
-
--- | Indicate whether any element of the matrix is @NaN@.
-hasNaN :: Matrix -> Bool
-hasNaN = U.any isNaN . toVector
-
--- | Given row and column numbers, calculate the offset into the flat
--- row-major vector.
-bounds :: (Vector -> Int -> r) -> Matrix -> Int -> Int -> r
-bounds k (Matrix rs cs _ v) r c
- | r < 0 || r >= rs = error "row out of bounds"
- | c < 0 || c >= cs = error "column out of bounds"
- | otherwise = k v $! r * cs + c
-{-# INLINE bounds #-}
-
--- | Given row and column numbers, calculate the offset into the flat
--- row-major vector, without checking.
-unsafeBounds :: (Vector -> Int -> r) -> Matrix -> Int -> Int -> r
-unsafeBounds k (Matrix _ cs _ v) r c = k v $! r * cs + c
-{-# INLINE unsafeBounds #-}
-
-transpose :: Matrix -> Matrix
-transpose m@(Matrix r0 c0 e _) = Matrix c0 r0 e . U.generate (r0*c0) $ \i ->
- let (r,c) = i `quotRem` r0
- in unsafeIndex m c r
diff --git a/Statistics/Matrix/Algorithms.hs b/Statistics/Matrix/Algorithms.hs
deleted file mode 100644
index 5feef22..0000000
--- a/Statistics/Matrix/Algorithms.hs
+++ /dev/null
@@ -1,42 +0,0 @@
--- |
--- Module : Statistics.Matrix.Algorithms
--- Copyright : 2014 Bryan O'Sullivan
--- License : BSD3
---
--- Useful matrix functions.
-
-module Statistics.Matrix.Algorithms
- (
- qr
- ) where
-
-import Control.Applicative ((<$>), (<*>))
-import Control.Monad.ST (ST, runST)
-import Prelude hiding (sum, replicate)
-import Statistics.Matrix (Matrix, column, dimension, for, norm)
-import qualified Statistics.Matrix.Mutable as M
-import Statistics.Sample.Internal (sum)
-import qualified Data.Vector.Unboxed as U
-
--- | /O(r*c)/ Compute the QR decomposition of a matrix.
--- The result returned is the matrices (/q/,/r/).
-qr :: Matrix -> (Matrix, Matrix)
-qr mat = runST $ do
- let (m,n) = dimension mat
- r <- M.replicate n n 0
- a <- M.thaw mat
- for 0 n $ \j -> do
- cn <- M.immutably a $ \aa -> norm (column aa j)
- M.unsafeWrite r j j cn
- for 0 m $ \i -> M.unsafeModify a i j (/ cn)
- for (j+1) n $ \jj -> do
- p <- innerProduct a j jj
- M.unsafeWrite r j jj p
- for 0 m $ \i -> do
- aij <- M.unsafeRead a i j
- M.unsafeModify a i jj $ subtract (p * aij)
- (,) <$> M.unsafeFreeze a <*> M.unsafeFreeze r
-
-innerProduct :: M.MMatrix s -> Int -> Int -> ST s Double
-innerProduct mmat j k = M.immutably mmat $ \mat ->
- sum $ U.zipWith (*) (column mat j) (column mat k)
diff --git a/Statistics/Matrix/Mutable.hs b/Statistics/Matrix/Mutable.hs
deleted file mode 100644
index 24f58be..0000000
--- a/Statistics/Matrix/Mutable.hs
+++ /dev/null
@@ -1,86 +0,0 @@
--- |
--- Module : Statistics.Matrix.Mutable
--- Copyright : (c) 2014 Bryan O'Sullivan
--- License : BSD3
---
--- Basic mutable matrix operations.
-
-module Statistics.Matrix.Mutable
- (
- MMatrix(..)
- , MVector
- , replicate
- , thaw
- , bounds
- , unsafeNew
- , unsafeFreeze
- , unsafeRead
- , unsafeWrite
- , unsafeModify
- , immutably
- , unsafeBounds
- ) where
-
-import Control.Applicative ((<$>))
-import Control.DeepSeq (NFData(..))
-import Control.Monad.ST (ST)
-import Statistics.Matrix.Types (Matrix(..), MMatrix(..), MVector)
-import qualified Data.Vector.Unboxed as U
-import qualified Data.Vector.Unboxed.Mutable as M
-import Prelude hiding (replicate)
-
-replicate :: Int -> Int -> Double -> ST s (MMatrix s)
-replicate r c k = MMatrix r c 0 <$> M.replicate (r*c) k
-
-thaw :: Matrix -> ST s (MMatrix s)
-thaw (Matrix r c e v) = MMatrix r c e <$> U.thaw v
-
-unsafeFreeze :: MMatrix s -> ST s Matrix
-unsafeFreeze (MMatrix r c e mv) = Matrix r c e <$> U.unsafeFreeze mv
-
--- | Allocate new matrix. Matrix content is not initialized hence unsafe.
-unsafeNew :: Int -- ^ Number of row
- -> Int -- ^ Number of columns
- -> ST s (MMatrix s)
-unsafeNew r c
- | r < 0 = error "Statistics.Matrix.Mutable.unsafeNew: negative number of rows"
- | c < 0 = error "Statistics.Matrix.Mutable.unsafeNew: negative number of columns"
- | otherwise = do
- vec <- M.new (r*c)
- return $ MMatrix r c 0 vec
-
-unsafeRead :: MMatrix s -> Int -> Int -> ST s Double
-unsafeRead mat r c = unsafeBounds mat r c M.unsafeRead
-{-# INLINE unsafeRead #-}
-
-unsafeWrite :: MMatrix s -> Int -> Int -> Double -> ST s ()
-unsafeWrite mat row col k = unsafeBounds mat row col $ \v i ->
- M.unsafeWrite v i k
-{-# INLINE unsafeWrite #-}
-
-unsafeModify :: MMatrix s -> Int -> Int -> (Double -> Double) -> ST s ()
-unsafeModify mat row col f = unsafeBounds mat row col $ \v i -> do
- k <- M.unsafeRead v i
- M.unsafeWrite v i (f k)
-{-# INLINE unsafeModify #-}
-
--- | Given row and column numbers, calculate the offset into the flat
--- row-major vector.
-bounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r
-bounds (MMatrix rs cs _ mv) r c k
- | r < 0 || r >= rs = error "row out of bounds"
- | c < 0 || c >= cs = error "column out of bounds"
- | otherwise = k mv $! r * cs + c
-{-# INLINE bounds #-}
-
--- | Given row and column numbers, calculate the offset into the flat
--- row-major vector, without checking.
-unsafeBounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r
-unsafeBounds (MMatrix _ cs _ mv) r c k = k mv $! r * cs + c
-{-# INLINE unsafeBounds #-}
-
-immutably :: NFData a => MMatrix s -> (Matrix -> a) -> ST s a
-immutably mmat f = do
- k <- f <$> unsafeFreeze mmat
- rnf k `seq` return k
-{-# INLINE immutably #-}
diff --git a/Statistics/Matrix/Types.hs b/Statistics/Matrix/Types.hs
deleted file mode 100644
index 90fc625..0000000
--- a/Statistics/Matrix/Types.hs
+++ /dev/null
@@ -1,64 +0,0 @@
--- |
--- Module : Statistics.Matrix.Types
--- Copyright : 2014 Bryan O'Sullivan
--- License : BSD3
---
--- Basic matrix operations.
---
--- There isn't a widely used matrix package for Haskell yet, so
--- we implement the necessary minimum here.
-
-module Statistics.Matrix.Types
- (
- Vector
- , MVector
- , Matrix(..)
- , MMatrix(..)
- , debug
- ) where
-
-import Data.Char (isSpace)
-import Numeric (showFFloat)
-import qualified Data.Vector.Unboxed as U
-import qualified Data.Vector.Unboxed.Mutable as M
-
-type Vector = U.Vector Double
-type MVector s = M.MVector s Double
-
--- | Two-dimensional matrix, stored in row-major order.
-data Matrix = Matrix {
- rows :: {-# UNPACK #-} !Int -- ^ Rows of matrix.
- , cols :: {-# UNPACK #-} !Int -- ^ Columns of matrix.
- , exponent :: {-# UNPACK #-} !Int
- -- ^ In order to avoid overflows during matrix multiplication, a
- -- large exponent is stored separately.
- , _vector :: !Vector -- ^ Matrix data.
- } deriving (Eq)
-
--- | Two-dimensional mutable matrix, stored in row-major order.
-data MMatrix s = MMatrix
- {-# UNPACK #-} !Int
- {-# UNPACK #-} !Int
- {-# UNPACK #-} !Int
- !(MVector s)
-
--- The Show instance is useful only for debugging.
-instance Show Matrix where
- show = debug
-
-debug :: Matrix -> String
-debug (Matrix r c _ vs) = unlines $ zipWith (++) (hdr0 : repeat hdr) rrows
- where
- rrows = map (cleanEnd . unwords) . split $ zipWith (++) ldone tdone
- hdr0 = show (r,c) ++ " "
- hdr = replicate (length hdr0) ' '
- pad plus k xs = replicate (k - length xs) ' ' `plus` xs
- ldone = map (pad (++) (longest lstr)) lstr
- tdone = map (pad (flip (++)) (longest tstr)) tstr
- (lstr, tstr) = unzip . map (break (=='.') . render) . U.toList $ vs
- longest = maximum . map length
- render k = reverse . dropWhile (=='.') . dropWhile (=='0') . reverse .
- showFFloat (Just 4) k $ ""
- split [] = []
- split xs = i : split rest where (i, rest) = splitAt c xs
- cleanEnd = reverse . dropWhile isSpace . reverse
diff --git a/Statistics/Quantile.hs b/Statistics/Quantile.hs
index 06eacfb..53cc2a5 100644
--- a/Statistics/Quantile.hs
+++ b/Statistics/Quantile.hs
@@ -1,4 +1,10 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
-- |
-- Module : Statistics.Quantile
-- Copyright : (c) 2009 Bryan O'Sullivan
@@ -20,39 +26,62 @@
module Statistics.Quantile
(
-- * Quantile estimation functions
- weightedAvg
- , ContParam(..)
- , continuousBy
- , midspread
-
- -- * Parameters for the continuous sample method
+ -- $cont_quantiles
+ ContParam(..)
+ , Default(..)
+ , quantile
+ , quantiles
+ , quantilesVec
+ -- ** Parameters for the continuous sample method
, cadpw
, hazen
- , s
, spss
+ , s
, medianUnbiased
, normalUnbiased
-
+ -- * Other algorithms
+ , weightedAvg
+ -- * Median & other specializations
+ , median
+ , mad
+ , midspread
+ -- * Deprecated
+ , continuousBy
-- * References
-- $references
) where
-import Data.Vector.Generic ((!))
-import Numeric.MathFunctions.Constants (m_epsilon)
+import Data.Binary (Binary)
+import Data.Aeson (ToJSON,FromJSON)
+import Data.Data (Data,Typeable)
+import Data.Default.Class
+import Data.Functor
+import qualified Data.Foldable as F
+import Data.Vector.Generic ((!))
+import qualified Data.Vector as V
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Storable as S
+import GHC.Generics (Generic)
+
import Statistics.Function (partialSort)
-import qualified Data.Vector as V
-import qualified Data.Vector.Generic as G
-import qualified Data.Vector.Unboxed as U
--- | O(/n/ log /n/). Estimate the /k/th /q/-quantile of a sample,
--- using the weighted average method.
+
+----------------------------------------------------------------
+-- Quantile estimation
+----------------------------------------------------------------
+
+-- | O(/n/·log /n/). Estimate the /k/th /q/-quantile of a sample,
+-- using the weighted average method. Up to rounding errors it's same
+-- as @quantile s@.
+--
+-- The following properties should hold otherwise an error will be thrown.
--
--- The following properties should hold:
-- * the length of the input is greater than @0@
+--
-- * the input does not contain @NaN@
--- * k ≥ 0 and k ≤ q
--
--- otherwise an error will be thrown.
+-- * k ≥ 0 and k ≤ q
weightedAvg :: G.Vector v Double =>
Int -- ^ /k/, the desired quantile.
-> Int -- ^ /q/, the number of quantiles.
@@ -76,101 +105,208 @@ weightedAvg k q x
n = G.length x
{-# SPECIALIZE weightedAvg :: Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE weightedAvg :: Int -> Int -> V.Vector Double -> Double #-}
+{-# SPECIALIZE weightedAvg :: Int -> Int -> S.Vector Double -> Double #-}
--- | Parameters /a/ and /b/ to the 'continuousBy' function.
-data ContParam = ContParam {-# UNPACK #-} !Double {-# UNPACK #-} !Double
--- | O(/n/ log /n/). Estimate the /k/th /q/-quantile of a sample /x/,
--- using the continuous sample method with the given parameters. This
--- is the method used by most statistical software, such as R,
+----------------------------------------------------------------
+-- Quantiles continuous algorithm
+----------------------------------------------------------------
+
+-- $cont_quantiles
+--
+-- Below is family of functions which use same algorithm for estimation
+-- of sample quantiles. It approximates empirical CDF as continuous
+-- piecewise function which interpolates linearly between points
+-- \((X_k,p_k)\) where \(X_k\) is k-th order statistics (k-th smallest
+-- element) and \(p_k\) is probability corresponding to
+-- it. 'ContParam' determines how \(p_k\) is chosen. For more detailed
+-- explanation see [Hyndman1996].
+--
+-- This is the method used by most statistical software, such as R,
-- Mathematica, SPSS, and S.
-continuousBy :: G.Vector v Double =>
- ContParam -- ^ Parameters /a/ and /b/.
- -> Int -- ^ /k/, the desired quantile.
- -> Int -- ^ /q/, the number of quantiles.
- -> v Double -- ^ /x/, the sample data.
- -> Double
-continuousBy (ContParam a b) k q x
- | q < 2 = modErr "continuousBy" "At least 2 quantiles is needed"
- | k < 0 || k > q = modErr "continuousBy" "Wrong quantile number"
- | G.any isNaN x = modErr "continuousBy" "Sample contains NaNs"
- | otherwise = (1-h) * item (j-1) + h * item j
+
+
+-- | Parameters /α/ and /β/ to the 'continuousBy' function. Exact
+-- meaning of parameters is described in [Hyndman1996] in section
+-- \"Piecewise linear functions\"
+data ContParam = ContParam {-# UNPACK #-} !Double {-# UNPACK #-} !Double
+ deriving (Show,Eq,Ord,Data,Typeable,Generic)
+
+-- | We use 's' as default value which is same as R's default.
+instance Default ContParam where
+ def = s
+
+instance Binary ContParam
+instance ToJSON ContParam
+instance FromJSON ContParam
+
+-- | O(/n/·log /n/). Estimate the /k/th /q/-quantile of a sample /x/,
+-- using the continuous sample method with the given parameters.
+--
+-- The following properties should hold, otherwise an error will be thrown.
+--
+-- * input sample must be nonempty
+--
+-- * the input does not contain @NaN@
+--
+-- * 0 ≤ k ≤ q
+quantile :: G.Vector v Double
+ => ContParam -- ^ Parameters /α/ and /β/.
+ -> Int -- ^ /k/, the desired quantile.
+ -> Int -- ^ /q/, the number of quantiles.
+ -> v Double -- ^ /x/, the sample data.
+ -> Double
+quantile param q nQ xs
+ | nQ < 2 = modErr "continuousBy" "At least 2 quantiles is needed"
+ | badQ nQ q = modErr "continuousBy" "Wrong quantile number"
+ | G.any isNaN xs = modErr "continuousBy" "Sample contains NaNs"
+ | otherwise = estimateQuantile sortedXs pk
where
- j = floor (t + eps)
- t = a + p * (fromIntegral n + 1 - a - b)
- p = fromIntegral k / fromIntegral q
- h | abs r < eps = 0
- | otherwise = r
- where r = t - fromIntegral j
- eps = m_epsilon * 4
- n = G.length x
- item = (sx !) . bracket
- sx = partialSort (bracket j + 1) x
- bracket m = min (max m 0) (n - 1)
+ pk = toPk param n q nQ
+ sortedXs = psort xs $ floor pk + 1
+ n = G.length xs
+{-# INLINABLE quantile #-}
+{-# SPECIALIZE
+ quantile :: ContParam -> Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE
- continuousBy :: ContParam -> Int -> Int -> U.Vector Double -> Double #-}
+ quantile :: ContParam -> Int -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE
- continuousBy :: ContParam -> Int -> Int -> V.Vector Double -> Double #-}
+ quantile :: ContParam -> Int -> Int -> S.Vector Double -> Double #-}
--- | O(/n/ log /n/). Estimate the range between /q/-quantiles 1 and
--- /q/-1 of a sample /x/, using the continuous sample method with the
--- given parameters.
+-- | O(/k·n/·log /n/). Estimate set of the /k/th /q/-quantile of a
+-- sample /x/, using the continuous sample method with the given
+-- parameters. This is faster than calling quantile repeatedly since
+-- sample should be sorted only once
--
--- For instance, the interquartile range (IQR) can be estimated as
--- follows:
+-- The following properties should hold, otherwise an error will be thrown.
--
--- > midspread medianUnbiased 4 (U.fromList [1,1,2,2,3])
--- > ==> 1.333333
-midspread :: G.Vector v Double =>
- ContParam -- ^ Parameters /a/ and /b/.
- -> Int -- ^ /q/, the number of quantiles.
- -> v Double -- ^ /x/, the sample data.
- -> Double
-midspread (ContParam a b) k x
- | G.any isNaN x = modErr "midspread" "Sample contains NaNs"
- | k <= 0 = modErr "midspread" "Nonpositive number of quantiles"
- | otherwise = quantile (1-frac) - quantile frac
+-- * input sample must be nonempty
+--
+-- * the input does not contain @NaN@
+--
+-- * for every k in set of quantiles 0 ≤ k ≤ q
+quantiles :: (G.Vector v Double, F.Foldable f, Functor f)
+ => ContParam
+ -> f Int
+ -> Int
+ -> v Double
+ -> f Double
+quantiles param qs nQ xs
+ | nQ < 2 = modErr "quantiles" "At least 2 quantiles is needed"
+ | F.any (badQ nQ) qs = modErr "quantiles" "Wrong quantile number"
+ | G.any isNaN xs = modErr "quantiles" "Sample contains NaNs"
+ -- Doesn't matter what we put into empty container
+ | fnull qs = 0 <$ qs
+ | otherwise = fmap (estimateQuantile sortedXs) ks'
where
- quantile i = (1-h i) * item (j i-1) + h i * item (j i)
- j i = floor (t i + eps) :: Int
- t i = a + i * (fromIntegral n + 1 - a - b)
- h i | abs r < eps = 0
- | otherwise = r
- where r = t i - fromIntegral (j i)
- eps = m_epsilon * 4
- n = G.length x
- item = (sx !) . bracket
- sx = partialSort (bracket (j (1-frac)) + 1) x
- bracket m = min (max m 0) (n - 1)
- frac = 1 / fromIntegral k
-{-# SPECIALIZE midspread :: ContParam -> Int -> U.Vector Double -> Double #-}
-{-# SPECIALIZE midspread :: ContParam -> Int -> V.Vector Double -> Double #-}
+ ks' = fmap (\q -> toPk param n q nQ) qs
+ sortedXs = psort xs $ floor (F.maximum ks') + 1
+ n = G.length xs
+{-# INLINABLE quantiles #-}
+{-# SPECIALIZE quantiles
+ :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> V.Vector Double -> f Double #-}
+{-# SPECIALIZE quantiles
+ :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> U.Vector Double -> f Double #-}
+{-# SPECIALIZE quantiles
+ :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> S.Vector Double -> f Double #-}
+
+-- COMPAT
+fnull :: F.Foldable f => f a -> Bool
+#if !MIN_VERSION_base(4,8,0)
+fnull = F.foldr (\_ _ -> False) True
+#else
+fnull = null
+#endif
+
+-- | O(/k·n/·log /n/). Same as quantiles but uses 'G.Vector' container
+-- instead of 'Foldable' one.
+quantilesVec :: (G.Vector v Double, G.Vector v Int)
+ => ContParam
+ -> v Int
+ -> Int
+ -> v Double
+ -> v Double
+quantilesVec param qs nQ xs
+ | nQ < 2 = modErr "quantilesVec" "At least 2 quantiles is needed"
+ | G.any (badQ nQ) qs = modErr "quantilesVec" "Wrong quantile number"
+ | G.any isNaN xs = modErr "quantilesVec" "Sample contains NaNs"
+ | G.null qs = G.empty
+ | otherwise = G.map (estimateQuantile sortedXs) ks'
+ where
+ ks' = G.map (\q -> toPk param n q nQ) qs
+ sortedXs = psort xs $ floor (G.maximum ks') + 1
+ n = G.length xs
+{-# INLINABLE quantilesVec #-}
+{-# SPECIALIZE quantilesVec
+ :: ContParam -> V.Vector Int -> Int -> V.Vector Double -> V.Vector Double #-}
+{-# SPECIALIZE quantilesVec
+ :: ContParam -> U.Vector Int -> Int -> U.Vector Double -> U.Vector Double #-}
+{-# SPECIALIZE quantilesVec
+ :: ContParam -> S.Vector Int -> Int -> S.Vector Double -> S.Vector Double #-}
+
+
+-- Returns True if quantile number is out of range
+badQ :: Int -> Int -> Bool
+badQ nQ q = q < 0 || q > nQ
+
+-- Obtain k from equation for p_k [Hyndman1996] p.363. Note that
+-- equation defines p_k for integer k but we calculate it as real
+-- value and will use fractional part for linear interpolation. This
+-- is correct since equation is linear.
+toPk
+ :: ContParam
+ -> Int -- ^ /n/ number of elements
+ -> Int -- ^ /k/, the desired quantile.
+ -> Int -- ^ /q/, the number of quantiles.
+ -> Double
+toPk (ContParam a b) (fromIntegral -> n) q nQ
+ = a + p * (n + 1 - a - b)
+ where
+ p = fromIntegral q / fromIntegral nQ
+
+-- Estimate quantile for given k (including fractional part)
+estimateQuantile :: G.Vector v Double => v Double -> Double -> Double
+{-# INLINE estimateQuantile #-}
+estimateQuantile sortedXs k'
+ = (1-g) * item (k-1) + g * item k
+ where
+ (k,g) = properFraction k'
+ item = (sortedXs !) . clamp
+ --
+ clamp = max 0 . min (n - 1)
+ n = G.length sortedXs
--- | California Department of Public Works definition, /a/=0, /b/=1.
+psort :: G.Vector v Double => v Double -> Int -> v Double
+psort xs k = partialSort (max 0 $ min (G.length xs - 1) k) xs
+{-# INLINE psort #-}
+
+
+-- | California Department of Public Works definition, /α/=0, /β/=1.
-- Gives a linear interpolation of the empirical CDF. This
-- corresponds to method 4 in R and Mathematica.
cadpw :: ContParam
cadpw = ContParam 0 1
--- | Hazen's definition, /a/=0.5, /b/=0.5. This is claimed to be
+-- | Hazen's definition, /α/=0.5, /β/=0.5. This is claimed to be
-- popular among hydrologists. This corresponds to method 5 in R and
-- Mathematica.
hazen :: ContParam
hazen = ContParam 0.5 0.5
--- | Definition used by the SPSS statistics application, with /a/=0,
--- /b/=0 (also known as Weibull's definition). This corresponds to
+-- | Definition used by the SPSS statistics application, with /α/=0,
+-- /β/=0 (also known as Weibull's definition). This corresponds to
-- method 6 in R and Mathematica.
spss :: ContParam
spss = ContParam 0 0
--- | Definition used by the S statistics application, with /a/=1,
--- /b/=1. The interpolation points divide the sample range into @n-1@
--- intervals. This corresponds to method 7 in R and Mathematica.
+-- | Definition used by the S statistics application, with /α/=1,
+-- /β/=1. The interpolation points divide the sample range into @n-1@
+-- intervals. This corresponds to method 7 in R and Mathematica and
+-- is default in R.
s :: ContParam
s = ContParam 1 1
--- | Median unbiased definition, /a/=1\/3, /b/=1\/3. The resulting
+-- | Median unbiased definition, /α/=1\/3, /β/=1\/3. The resulting
-- quantile estimates are approximately median unbiased regardless of
-- the distribution of /x/. This corresponds to method 8 in R and
-- Mathematica.
@@ -178,7 +314,7 @@ medianUnbiased :: ContParam
medianUnbiased = ContParam third third
where third = 1/3
--- | Normal unbiased definition, /a/=3\/8, /b/=3\/8. An approximately
+-- | Normal unbiased definition, /α/=3\/8, /β/=3\/8. An approximately
-- unbiased estimate if the empirical distribution approximates the
-- normal distribution. This corresponds to method 9 in R and
-- Mathematica.
@@ -189,11 +325,86 @@ normalUnbiased = ContParam ta ta
modErr :: String -> String -> a
modErr f err = error $ "Statistics.Quantile." ++ f ++ ": " ++ err
+
+----------------------------------------------------------------
+-- Specializations
+----------------------------------------------------------------
+
+-- | O(/n/·log /n/) Estimate median of sample
+median :: G.Vector v Double
+ => ContParam -- ^ Parameters /α/ and /β/.
+ -> v Double -- ^ /x/, the sample data.
+ -> Double
+{-# INLINE median #-}
+median p = quantile p 1 2
+
+-- | O(/n/·log /n/). Estimate the range between /q/-quantiles 1 and
+-- /q/-1 of a sample /x/, using the continuous sample method with the
+-- given parameters.
+--
+-- For instance, the interquartile range (IQR) can be estimated as
+-- follows:
+--
+-- > midspread medianUnbiased 4 (U.fromList [1,1,2,2,3])
+-- > ==> 1.333333
+midspread :: G.Vector v Double =>
+ ContParam -- ^ Parameters /α/ and /β/.
+ -> Int -- ^ /q/, the number of quantiles.
+ -> v Double -- ^ /x/, the sample data.
+ -> Double
+midspread param k x
+ | G.any isNaN x = modErr "midspread" "Sample contains NaNs"
+ | k <= 0 = modErr "midspread" "Nonpositive number of quantiles"
+ | otherwise = let Pair x1 x2 = quantiles param (Pair 1 (k-1)) k x
+ in x2 - x1
+{-# INLINABLE midspread #-}
+{-# SPECIALIZE midspread :: ContParam -> Int -> U.Vector Double -> Double #-}
+{-# SPECIALIZE midspread :: ContParam -> Int -> V.Vector Double -> Double #-}
+{-# SPECIALIZE midspread :: ContParam -> Int -> S.Vector Double -> Double #-}
+
+data Pair a = Pair !a !a
+ deriving (Functor, F.Foldable)
+
+
+-- | O(/n/·log /n/). Estimate the median absolute deviation (MAD) of a
+-- sample /x/ using 'continuousBy'. It's robust estimate of
+-- variability in sample and defined as:
+--
+-- \[
+-- MAD = \operatorname{median}(| X_i - \operatorname{median}(X) |)
+-- \]
+mad :: G.Vector v Double
+ => ContParam -- ^ Parameters /α/ and /β/.
+ -> v Double -- ^ /x/, the sample data.
+ -> Double
+mad p xs
+ = median p $ G.map (abs . subtract med) xs
+ where
+ med = median p xs
+{-# INLINABLE mad #-}
+{-# SPECIALIZE mad :: ContParam -> U.Vector Double -> Double #-}
+{-# SPECIALIZE mad :: ContParam -> V.Vector Double -> Double #-}
+{-# SPECIALIZE mad :: ContParam -> S.Vector Double -> Double #-}
+
+
+----------------------------------------------------------------
+-- Deprecated
+----------------------------------------------------------------
+
+continuousBy :: G.Vector v Double =>
+ ContParam -- ^ Parameters /α/ and /β/.
+ -> Int -- ^ /k/, the desired quantile.
+ -> Int -- ^ /q/, the number of quantiles.
+ -> v Double -- ^ /x/, the sample data.
+ -> Double
+continuousBy = quantile
+{-# DEPRECATED continuousBy "Use quantile instead" #-}
+
-- $references
--
-- * Weisstein, E.W. Quantile. /MathWorld/.
-- <http://mathworld.wolfram.com/Quantile.html>
--
--- * Hyndman, R.J.; Fan, Y. (1996) Sample quantiles in statistical
+-- * [Hyndman1996] Hyndman, R.J.; Fan, Y. (1996) Sample quantiles in statistical
-- packages. /American Statistician/
-- 50(4):361&#8211;365. <http://www.jstor.org/stable/2684934>
diff --git a/Statistics/Regression.hs b/Statistics/Regression.hs
index f119c25..d886dcb 100644
--- a/Statistics/Regression.hs
+++ b/Statistics/Regression.hs
@@ -17,7 +17,8 @@ import Control.Applicative ((<$>))
import Control.Concurrent (forkIO)
import Control.Concurrent.Chan (newChan, readChan, writeChan)
import Control.DeepSeq (rnf)
-import Control.Monad (forM_, replicateM)
+import Control.Monad (forM_, replicateM, when)
+import Data.List (nub)
import GHC.Conc (getNumCapabilities)
import Prelude hiding (pred, sum)
import Statistics.Function as F
@@ -88,7 +89,7 @@ solve r b
rfor n 0 $ \i -> do
si <- (/ unsafeIndex r i i) <$> M.unsafeRead s i
M.unsafeWrite s i si
- for 0 i $ \j -> F.unsafeModify s j $ subtract (unsafeIndex r j i * si)
+ F.for 0 i $ \j -> F.unsafeModify s j $ subtract (unsafeIndex r j i * si)
return s
where n = rows r
l = U.length b
@@ -123,6 +124,20 @@ bootstrapRegress gen0 numResamples cl rgrss preds0 resp0
| numResamples < 1 = error $ "bootstrapRegress: number of resamples " ++
"must be positive"
| otherwise = do
+
+ -- some error checks so that we do not run into vector index out of bounds.
+ case nub (map U.length preds0) of
+ [] -> error "bootstrapRegress: predictor vectors must not be empty"
+ [plen] -> do
+ let rlen = U.length resp0
+ when (plen /= rlen) $
+ error $ "bootstrapRegress: responder vector length ["
+ ++ show rlen
+ ++ "] must be the same as predictor vectors' length ["
+ ++ show plen ++ "]"
+ xs -> error $ "bootstrapRegress: all predictor vectors must be of the same \
+ \length, lengths provided are: " ++ show xs
+
caps <- getNumCapabilities
gens <- splitGen caps gen0
done <- newChan
@@ -141,8 +156,9 @@ bootstrapRegress gen0 numResamples cl rgrss preds0 resp0
(coeffss, r2s) = rgrss preds0 resp0
est s v = estimateFromInterval s (w G.! lo, w G.! hi) cl
where w = F.sort v
- lo = round c
- hi = truncate (n - c)
+ bounded i = min (U.length w - 1) (max 0 i)
+ lo = bounded $ round c
+ hi = bounded $ truncate (n - c)
n = fromIntegral numResamples
c = n * (significanceLevel cl / 2)
return (coeffs, r2)
diff --git a/Statistics/Resampling.hs b/Statistics/Resampling.hs
index 8d96681..4ad98a4 100644
--- a/Statistics/Resampling.hs
+++ b/Statistics/Resampling.hs
@@ -242,7 +242,9 @@ jackknifeVariance_ c samp
-- | /O(n)/ Compute the unbiased jackknife variance of a sample.
jackknifeVarianceUnb :: Sample -> U.Vector Double
-jackknifeVarianceUnb = jackknifeVariance_ 1
+jackknifeVarianceUnb samp
+ | G.length samp == 2 = singletonErr "jackknifeVariance"
+ | otherwise = jackknifeVariance_ 1 samp
-- | /O(n)/ Compute the jackknife variance of a sample.
jackknifeVariance :: Sample -> U.Vector Double
@@ -264,7 +266,7 @@ dropAt n v = U.slice 0 n v U.++ U.slice (n+1) (U.length v - n - 1) v
singletonErr :: String -> a
singletonErr func = error $
- "Statistics.Resampling." ++ func ++ ": singleton input"
+ "Statistics.Resampling." ++ func ++ ": not enough elements in sample"
-- | Split a generator into several that can run independently.
splitGen :: Int -> GenIO -> IO [GenIO]
diff --git a/Statistics/Resampling/Bootstrap.hs b/Statistics/Resampling/Bootstrap.hs
index 0ad2cf0..0fc5cae 100644
--- a/Statistics/Resampling/Bootstrap.hs
+++ b/Statistics/Resampling/Bootstrap.hs
@@ -57,10 +57,10 @@ bootstrapBCA confidenceLevel sample resampledData
estimateFromInterval pt (resample ! lo, resample ! hi) confidenceLevel
where
-- Quantile estimates for given CL
- lo = max (cumn a1) 0
+ lo = min (max (cumn a1) 0) (ni - 1)
where a1 = bias + b1 / (1 - accel * b1)
b1 = bias + z1
- hi = min (cumn a2) (ni - 1)
+ hi = max (min (cumn a2) (ni - 1)) 0
where a2 = bias + b2 / (1 - accel * b2)
b2 = bias - z1
-- Number of resamples
diff --git a/Statistics/Sample.hs b/Statistics/Sample.hs
index 8fd7808..f6bda63 100644
--- a/Statistics/Sample.hs
+++ b/Statistics/Sample.hs
@@ -66,7 +66,7 @@ import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
--- Operator ^ will be overriden
+-- Operator ^ will be overridden
import Prelude hiding ((^), sum)
-- | /O(n)/ Range. The difference between the largest and smallest
diff --git a/Statistics/Sample/KernelDensity.hs b/Statistics/Sample/KernelDensity.hs
index bfee05e..d340af2 100644
--- a/Statistics/Sample/KernelDensity.hs
+++ b/Statistics/Sample/KernelDensity.hs
@@ -25,10 +25,11 @@ module Statistics.Sample.KernelDensity
-- $references
) where
+import Data.Default.Class
import Numeric.MathFunctions.Constants (m_sqrt_2_pi)
+import Numeric.RootFinding (fromRoot, ridders, RiddersParam(..), Tolerance(..))
import Prelude hiding (const, min, max, sum)
import Statistics.Function (minMax, nextHighestPowerOfTwo)
-import Statistics.Math.RootFinding (fromRoot, ridders)
import Statistics.Sample.Histogram (histogram_)
import Statistics.Sample.Internal (sum)
import Statistics.Transform (CD, dct, idct)
@@ -98,8 +99,8 @@ kde_ n0 min max xs
a = dct . G.map (/ sum h) $ h
where h = G.map (/ len) $ histogram_ ni min max xs
!len = fromIntegral (G.length xs)
- !t_star = fromRoot (0.28 * len ** (-0.4)) . ridders 1e-14 (0,0.1) $ \x ->
- x - (len * (2 * sqrt pi) * go 6 (f 7 x)) ** (-0.4)
+ !t_star = fromRoot (0.28 * len ** (-0.4)) . ridders def{ riddersTol = AbsTol 1e-14 } (0,0.1)
+ $ \x -> x - (len * (2 * sqrt pi) * go 6 (f 7 x)) ** (-0.4)
where
f q t = 2 * pi ** (q*2) * sum (G.zipWith g iv a2v)
where g i a2 = i ** q * a2 * exp ((-i) * sqr pi * t)
diff --git a/Statistics/Sample/Normalize.hs b/Statistics/Sample/Normalize.hs
new file mode 100644
index 0000000..0d81a8b
--- /dev/null
+++ b/Statistics/Sample/Normalize.hs
@@ -0,0 +1,43 @@
+{-# LANGUAGE FlexibleContexts #-}
+
+-- |
+-- Module : Statistics.Sample.Normalize
+-- Copyright : (c) 2017 Gregory W. Schwartz
+-- License : BSD3
+--
+-- Maintainer : gsch@mail.med.upenn.edu
+-- Stability : experimental
+-- Portability : portable
+--
+-- Functions for normalizing samples.
+
+module Statistics.Sample.Normalize
+ (
+ standardize
+ ) where
+
+import Statistics.Sample
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector as V
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Storable as S
+
+-- | /O(n)/ Normalize a sample using standard scores:
+--
+-- \[ z = \frac{x - \mu}{\sigma} \]
+--
+-- Where μ is sample mean and σ is standard deviation computed from
+-- unbiased variance estimation. If sample to small to compute σ or
+-- it's equal to 0 @Nothing@ is returned.
+standardize :: (G.Vector v Double) => v Double -> Maybe (v Double)
+standardize xs
+ | G.length xs < 2 = Nothing
+ | sigma == 0 = Nothing
+ | otherwise = Just $ G.map (\x -> (x - mu) / sigma) xs
+ where
+ mu = mean xs
+ sigma = stdDev xs
+{-# INLINABLE standardize #-}
+{-# SPECIALIZE standardize :: V.Vector Double -> Maybe (V.Vector Double) #-}
+{-# SPECIALIZE standardize :: U.Vector Double -> Maybe (U.Vector Double) #-}
+{-# SPECIALIZE standardize :: S.Vector Double -> Maybe (S.Vector Double) #-}
diff --git a/Statistics/Test/KolmogorovSmirnov.hs b/Statistics/Test/KolmogorovSmirnov.hs
index 2f56e54..c4370e0 100644
--- a/Statistics/Test/KolmogorovSmirnov.hs
+++ b/Statistics/Test/KolmogorovSmirnov.hs
@@ -32,7 +32,8 @@ import Control.Monad (when)
import Prelude hiding (exponent, sum)
import Statistics.Distribution (Distribution(..))
import Statistics.Function (gsort, unsafeModify)
-import Statistics.Matrix (center, exponent, for, fromVector, power)
+import Statistics.Matrix (center, for, fromVector)
+import qualified Statistics.Matrix as Mat
import Statistics.Test.Types
import Statistics.Types (mkPValue)
import qualified Data.Vector as V
@@ -61,7 +62,7 @@ kolmogorovSmirnovTest d
= kolmogorovSmirnovTestCdf (cumulative d)
--- | Variant of 'kolmogorovSmirnovTest' which uses CFD in form of
+-- | Variant of 'kolmogorovSmirnovTest' which uses CDF in form of
-- function.
kolmogorovSmirnovTestCdf :: (G.Vector v Double)
=> (Double -> Double) -- ^ CDF of distribution
@@ -213,7 +214,7 @@ kolmogorovSmirnovProbability n d
-- Avoid potentially lengthy calculations for large N and D > 0.999
| s > 7.24 || (s > 3.76 && n > 99) = 1 - 2 * exp( -(2.000071 + 0.331 / sqrt n' + 1.409 / n') * s)
-- Exact computation
- | otherwise = fini $ matrix `power` n
+ | otherwise = fini $ KSMatrix 0 matrix `power` n
where
s = n' * d * d
n' = fromIntegral n
@@ -249,7 +250,7 @@ kolmogorovSmirnovProbability n d
return mat
in fromVector size size m
-- Last calculation
- fini m = loop 1 (center m) (exponent m)
+ fini (KSMatrix e m) = loop 1 (center m) e
where
loop i ss eQ
| i > n = ss * 10 ^^ eQ
@@ -257,6 +258,27 @@ kolmogorovSmirnovProbability n d
| otherwise = loop (i+1) ss' eQ
where ss' = ss * fromIntegral i / fromIntegral n
+data KSMatrix = KSMatrix Int Mat.Matrix
+
+
+multiply :: KSMatrix -> KSMatrix -> KSMatrix
+multiply (KSMatrix e1 m1) (KSMatrix e2 m2) = KSMatrix (e1+e2) (Mat.multiply m1 m2)
+
+power :: KSMatrix -> Int -> KSMatrix
+power mat 1 = mat
+power mat n = avoidOverflow res
+ where
+ mat2 = power mat (n `quot` 2)
+ pow = multiply mat2 mat2
+ res | odd n = multiply pow mat
+ | otherwise = pow
+
+avoidOverflow :: KSMatrix -> KSMatrix
+avoidOverflow ksm@(KSMatrix e m)
+ | center m > 1e140 = KSMatrix (e + 140) (Mat.map (* 1e-140) m)
+ | otherwise = ksm
+
+
----------------------------------------------------------------
-- $references
diff --git a/Statistics/Test/KruskalWallis.hs b/Statistics/Test/KruskalWallis.hs
index d84af3a..664c637 100644
--- a/Statistics/Test/KruskalWallis.hs
+++ b/Statistics/Test/KruskalWallis.hs
@@ -78,7 +78,7 @@ kruskalWallis samples = (nTot - 1) * numerator / denominator
-- significance. For additional information check 'kruskalWallis'. This is just
-- a helper function.
--
--- It uses /Chi-Squared/ distribution for aproximation as long as the sizes are
+-- It uses /Chi-Squared/ distribution for approximation as long as the sizes are
-- larger than 5. Otherwise the test returns 'Nothing'.
kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ())
kruskalWallisTest [] = Nothing
diff --git a/Statistics/Test/MannWhitneyU.hs b/Statistics/Test/MannWhitneyU.hs
index 41c2e4c..3732251 100644
--- a/Statistics/Test/MannWhitneyU.hs
+++ b/Statistics/Test/MannWhitneyU.hs
@@ -8,7 +8,7 @@
-- Portability : portable
--
-- Mann-Whitney U test (also know as Mann-Whitney-Wilcoxon and
--- Wilcoxon rank sum test) is a non-parametric test for assesing
+-- Wilcoxon rank sum test) is a non-parametric test for assessing
-- whether two samples of independent observations have different
-- mean.
module Statistics.Test.MannWhitneyU (
@@ -109,7 +109,7 @@ mannWhitneyUCriticalValue
-> Maybe Int -- ^ The critical value (of U).
mannWhitneyUCriticalValue (m, n) p
| m < 1 || n < 1 = Nothing -- Sample must be nonempty
- | p' <= 1 = Nothing -- p-value is too small. Null hypothesys couln't be disproved
+ | p' <= 1 = Nothing -- p-value is too small. Null hypothesis couldn't be disproved
| otherwise = findIndex (>= p')
$ take (m*n)
$ tail
diff --git a/Statistics/Test/StudentT.hs b/Statistics/Test/StudentT.hs
index f68a3cc..580d00a 100644
--- a/Statistics/Test/StudentT.hs
+++ b/Statistics/Test/StudentT.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
--- | Student's T-test is for assesing whether two samples have
+-- | Student's T-test is for assessing whether two samples have
-- different mean. This module contain several variations of
-- T-test. It's a parametric tests and assumes that samples are
-- normally distributed.
diff --git a/Statistics/Test/Types.hs b/Statistics/Test/Types.hs
index c03d35c..aa3e88d 100644
--- a/Statistics/Test/Types.hs
+++ b/Statistics/Test/Types.hs
@@ -87,7 +87,7 @@ instance FromJSON PositionTest
instance ToJSON PositionTest
instance NFData PositionTest
--- | significant if parameter is 'True', not significant otherwiser
+-- | significant if parameter is 'True', not significant otherwise
significant :: Bool -> TestResult
significant True = Significant
significant False = NotSignificant
diff --git a/Statistics/Test/WilcoxonT.hs b/Statistics/Test/WilcoxonT.hs
index aed5fb0..18f2139 100644
--- a/Statistics/Test/WilcoxonT.hs
+++ b/Statistics/Test/WilcoxonT.hs
@@ -37,7 +37,7 @@ module Statistics.Test.WilcoxonT (
-- function in this module to get a meaningful result.
-- ranks of the differences where the first parameter is higher) whereas T- is
-- the sum of negative ranks (the ranks of the differences where the second parameter is higher).
--- to the the length of the shorter sample.
+-- to the length of the shorter sample.
import Control.Applicative ((<$>))
import Data.Function (on)
@@ -207,7 +207,7 @@ normalApprox ni
-- | The Wilcoxon matched-pairs signed-rank test. The samples are
-- zipped together: if one is longer than the other, both are
--- truncated to the the length of the shorter sample.
+-- truncated to the length of the shorter sample.
--
-- For one-tailed test it tests whether first sample is significantly
-- greater than the second. For two-tailed it checks whether they
diff --git a/changelog.md b/changelog.md
index 9ff3228..a856cc0 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,26 @@
+## Changes in 0.15.0.0
+
+ * Modules `Statistics.Matrix.*` are split into new package
+ `dense-linear-algebra` and exponent field is removed from `Matrix` data type.
+
+ * Module `Statistics.Normalize` which contains functions for normalization of
+ samples
+
+ * Module `Statistics.Quantile` reworked:
+
+ - `ContParam` given `Default` instance
+ - `quantile` should be used instead of `continuousBy`
+ - `median` and `mad` are added
+ - `quantiles` and `quantilesVec` functions for computation of set of
+ quantiles added.
+
+ * Modules `Statistics.Function.Comparison` and `Statistics.Math.RootFinding`
+ are removed. Corresponding functionality could be found in `math-functions`
+ package.
+
+ * Fix vector index out of bounds in `bootstrapBCA` and `bootstrapRegress`
+ (see issue #149)
+
## Changes in 0.14.0.2
* Compatibility fixes with older GHC
@@ -185,7 +208,7 @@ p-value. Also API for statistical tests is changed.
* Accesors for uniform distribution are added.
- * ContGen instances for all continous distribtuions are added.
+ * ContGen instances for all continuous distribtuions are added.
* Beta distribution is added.
diff --git a/statistics.cabal b/statistics.cabal
index ef52b84..3638361 100644
--- a/statistics.cabal
+++ b/statistics.cabal
@@ -1,5 +1,5 @@
name: statistics
-version: 0.14.0.2
+version: 0.15.0.0
synopsis: A library of statistical types, data, and functions
description:
This library provides a number of common functions and types useful
@@ -26,8 +26,8 @@ license: BSD2
license-file: LICENSE
homepage: https://github.com/bos/statistics
bug-reports: https://github.com/bos/statistics/issues
-author: Bryan O'Sullivan <bos@serpentine.com>
-maintainer: Bryan O'Sullivan <bos@serpentine.com>
+author: Bryan O'Sullivan <bos@serpentine.com>, Alexey Khudaykov <alexey.skladnoy@gmail.com>
+maintainer: Bryan O'Sullivan <bos@serpentine.com>, Alexey Khudaykov <alexey.skladnoy@gmail.com>
copyright: 2009-2014 Bryan O'Sullivan
category: Math, Statistics
build-type: Simple
@@ -44,8 +44,8 @@ extra-source-files:
tests/Tests/Math/gen.py
tests/utils/Makefile
tests/utils/fftw.c
-tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.1
-
+tested-with: GHC==7.4.2, GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2, GHC==8.2.2, GHC==8.4.3
+
library
exposed-modules:
Statistics.Autocorrelation
@@ -70,19 +70,16 @@ library
Statistics.Distribution.Transform
Statistics.Distribution.Uniform
Statistics.Function
- Statistics.Math.RootFinding
- Statistics.Matrix
- Statistics.Matrix.Algorithms
- Statistics.Matrix.Mutable
- Statistics.Matrix.Types
Statistics.Quantile
Statistics.Regression
Statistics.Resampling
Statistics.Resampling.Bootstrap
Statistics.Sample
+ Statistics.Sample.Internal
Statistics.Sample.Histogram
Statistics.Sample.KernelDensity
Statistics.Sample.KernelDensity.Simple
+ Statistics.Sample.Normalize
Statistics.Sample.Powers
Statistics.Test.ChiSquared
Statistics.Test.KolmogorovSmirnov
@@ -96,33 +93,29 @@ library
Statistics.Types
other-modules:
Statistics.Distribution.Poisson.Internal
- Statistics.Function.Comparison
Statistics.Internal
- Statistics.Sample.Internal
Statistics.Test.Internal
Statistics.Types.Internal
- build-depends:
- aeson >= 0.6.0.0,
- base >= 4.5 && < 5,
- base-orphans >= 0.6 && <0.7,
- binary >= 0.5.1.0,
- deepseq >= 1.1.0.2,
- erf,
- math-functions >= 0.1.7,
- monad-par >= 0.3.4,
- mwc-random >= 0.13.0.0,
- primitive >= 0.3,
- vector >= 0.10,
- vector-algorithms >= 0.4,
- vector-th-unbox,
- vector-binary-instances >= 0.2.1
+ build-depends: base >= 4.5 && < 5
+ , base-orphans >= 0.6 && <0.9
+ --
+ , math-functions >= 0.3
+ , mwc-random >= 0.13.0.0
+ --
+ , aeson >= 0.6.0.0
+ , deepseq >= 1.1.0.2
+ , binary >= 0.5.1.0
+ , monad-par >= 0.3.4
+ , primitive >= 0.3
+ , dense-linear-algebra >= 0.1 && <0.2
+ , vector >= 0.10
+ , vector-algorithms >= 0.4
+ , vector-th-unbox
+ , vector-binary-instances >= 0.2.1
+ , data-default-class >= 0.1.2
if impl(ghc < 7.6)
build-depends:
ghc-prim
-
- -- gather extensive profiling data for now
- ghc-prof-options: -auto-all
-
ghc-options: -O2 -Wall -fwarn-tabs -funbox-strict-fields
test-suite tests
@@ -144,32 +137,27 @@ test-suite tests
Tests.Parametric
Tests.Serialization
Tests.Transform
-
+ Tests.Quantile
ghc-options:
-Wall -threaded -rtsopts -fsimpl-tick-factor=500
-
- build-depends:
- HUnit,
- QuickCheck >= 2.7.5,
- base,
- binary,
- erf,
- aeson,
- ieee754 >= 0.7.3,
- math-functions,
- mwc-random,
- primitive,
- statistics,
- test-framework,
- test-framework-hunit,
- test-framework-quickcheck2,
- vector,
- vector-algorithms
+ build-depends: base
+ , statistics
+ , dense-linear-algebra
+ , HUnit
+ , QuickCheck >= 2.7.5
+ , binary
+ , erf
+ , aeson
+ , ieee754 >= 0.7.3
+ , math-functions
+ , mwc-random
+ , primitive
+ , test-framework
+ , test-framework-hunit
+ , test-framework-quickcheck2
+ , vector
+ , vector-algorithms
source-repository head
type: git
location: https://github.com/bos/statistics
-
-source-repository head
- type: mercurial
- location: https://bitbucket.org/bos/statistics
diff --git a/tests/Tests/ApproxEq.hs b/tests/Tests/ApproxEq.hs
index ba8ee0b..f20369a 100644
--- a/tests/Tests/ApproxEq.hs
+++ b/tests/Tests/ApproxEq.hs
@@ -78,8 +78,8 @@ instance ApproxEq (V.Vector Double) where
instance ApproxEq Matrix where
type Bounds Matrix = Double
- eq eps (Matrix r1 c1 e1 v1) (Matrix r2 c2 e2 v2) =
- (r1,c1,e1) == (r2,c2,e2) && eq eps v1 v2
+ eq eps (Matrix r1 c1 v1) (Matrix r2 c2 v2) =
+ (r1,c1) == (r2,c2) && eq eps v1 v2
(=~) = eq m_epsilon
eql eps a b = eqll dimension M.toList (`quotRem` cols a) eps a b
(==~) = eql m_epsilon
diff --git a/tests/Tests/Correlation.hs b/tests/Tests/Correlation.hs
index be85390..0e50278 100644
--- a/tests/Tests/Correlation.hs
+++ b/tests/Tests/Correlation.hs
@@ -11,7 +11,7 @@ import Test.QuickCheck ((==>),Property,counterexample)
import Test.Framework
import Test.Framework.Providers.QuickCheck2
import Test.Framework.Providers.HUnit
-import Test.HUnit (Assertion, (@=?), assertBool)
+import Test.HUnit (Assertion, (@=?))
import Tests.ApproxEq
diff --git a/tests/Tests/Distribution.hs b/tests/Tests/Distribution.hs
index 083912b..070485d 100644
--- a/tests/Tests/Distribution.hs
+++ b/tests/Tests/Distribution.hs
@@ -268,9 +268,6 @@ logProbabilityCheck _ d x
logP = logProbability d x
-instance QC.Arbitrary DiscreteUniform where
- arbitrary = discreteUniformAB <$> QC.choose (1,1000) <*> QC.choose(1,1000)
-
-- Parameters for distribution testing. Some distribution require
-- relaxing parameters a bit
class Param a where
diff --git a/tests/Tests/Matrix/Types.hs b/tests/Tests/Matrix/Types.hs
index 82e8049..ad2fc9a 100644
--- a/tests/Tests/Matrix/Types.hs
+++ b/tests/Tests/Matrix/Types.hs
@@ -23,7 +23,7 @@ fromMat :: Mat Double -> Matrix
fromMat (Mat r c xs) = fromList r c (concat xs)
toMat :: Matrix -> Mat Double
-toMat (Matrix r c _ v) = Mat r c . split . U.toList $ v
+toMat (Matrix r c v) = Mat r c . split . U.toList $ v
where split xs@(_:_) = let (h,t) = splitAt c xs
in h : split t
split [] = []
diff --git a/tests/Tests/NonParametric.hs b/tests/Tests/NonParametric.hs
index e4e53d6..e7086f5 100644
--- a/tests/Tests/NonParametric.hs
+++ b/tests/Tests/NonParametric.hs
@@ -8,7 +8,7 @@ import Statistics.Test.KolmogorovSmirnov
import Statistics.Test.MannWhitneyU
import Statistics.Test.KruskalWallis
import Statistics.Test.WilcoxonT
-import Statistics.Types (PValue,pValue,cl95,mkPValue)
+import Statistics.Types (PValue,pValue,mkPValue)
import Test.Framework (testGroup)
import Test.Framework.Providers.HUnit
diff --git a/tests/Tests/Orphanage.hs b/tests/Tests/Orphanage.hs
index c3f7980..c3fe664 100644
--- a/tests/Tests/Orphanage.hs
+++ b/tests/Tests/Orphanage.hs
@@ -6,21 +6,22 @@
module Tests.Orphanage where
import Control.Applicative
-import Statistics.Distribution.Beta (BetaDistribution, betaDistr)
-import Statistics.Distribution.Binomial (BinomialDistribution, binomial)
+import Statistics.Distribution.Beta (BetaDistribution, betaDistr)
+import Statistics.Distribution.Binomial (BinomialDistribution, binomial)
import Statistics.Distribution.CauchyLorentz
-import Statistics.Distribution.ChiSquared (ChiSquared, chiSquared)
-import Statistics.Distribution.Exponential (ExponentialDistribution, exponential)
-import Statistics.Distribution.FDistribution (FDistribution, fDistribution)
-import Statistics.Distribution.Gamma (GammaDistribution, gammaDistr)
+import Statistics.Distribution.ChiSquared (ChiSquared, chiSquared)
+import Statistics.Distribution.Exponential (ExponentialDistribution, exponential)
+import Statistics.Distribution.FDistribution (FDistribution, fDistribution)
+import Statistics.Distribution.Gamma (GammaDistribution, gammaDistr)
import Statistics.Distribution.Geometric
import Statistics.Distribution.Hypergeometric
-import Statistics.Distribution.Laplace (LaplaceDistribution, laplace)
-import Statistics.Distribution.Normal (NormalDistribution, normalDistr)
-import Statistics.Distribution.Poisson (PoissonDistribution, poisson)
+import Statistics.Distribution.Laplace (LaplaceDistribution, laplace)
+import Statistics.Distribution.Normal (NormalDistribution, normalDistr)
+import Statistics.Distribution.Poisson (PoissonDistribution, poisson)
import Statistics.Distribution.StudentT
-import Statistics.Distribution.Transform (LinearTransform, scaleAround)
-import Statistics.Distribution.Uniform (UniformDistribution, uniformDistr)
+import Statistics.Distribution.Transform (LinearTransform, scaleAround)
+import Statistics.Distribution.Uniform (UniformDistribution, uniformDistr)
+import Statistics.Distribution.DiscreteUniform (DiscreteUniform, discreteUniformAB)
import Statistics.Types
import Test.QuickCheck as QC
@@ -101,3 +102,6 @@ instance (Arbitrary a) => Arbitrary (UpperLimit a) where
instance (Arbitrary a) => Arbitrary (LowerLimit a) where
arbitrary = liftA2 LowerLimit arbitrary arbitrary
+
+instance QC.Arbitrary DiscreteUniform where
+ arbitrary = discreteUniformAB <$> QC.choose (1,1000) <*> QC.choose(1,1000)
diff --git a/tests/Tests/Parametric.hs b/tests/Tests/Parametric.hs
index 19c166e..11fb2e4 100644
--- a/tests/Tests/Parametric.hs
+++ b/tests/Tests/Parametric.hs
@@ -2,7 +2,6 @@ module Tests.Parametric (tests) where
import Data.Maybe (fromJust)
import Statistics.Test.StudentT
-import Statistics.Test.Types
import Statistics.Types
import qualified Data.Vector.Unboxed as U
import Test.Framework (testGroup)
diff --git a/tests/Tests/Quantile.hs b/tests/Tests/Quantile.hs
new file mode 100644
index 0000000..12a310f
--- /dev/null
+++ b/tests/Tests/Quantile.hs
@@ -0,0 +1,100 @@
+{-# LANGUAGE ViewPatterns #-}
+-- |
+-- Tests for quantile
+module Tests.Quantile (tests) where
+
+import Control.Exception
+import qualified Data.Vector.Unboxed as U
+import Test.Framework
+import Test.Framework.Providers.HUnit
+import Test.Framework.Providers.QuickCheck2
+import Test.HUnit (Assertion,assertEqual,assertFailure)
+import Test.QuickCheck hiding (sample)
+import Numeric.MathFunctions.Comparison (ulpDelta,ulpDistance)
+import Statistics.Quantile
+
+tests :: Test
+tests = testGroup "Quantiles"
+ [ testCase "R alg. 4" $ compareWithR cadpw (0.00, 0.50, 2.50, 8.25, 10.00)
+ , testCase "R alg. 5" $ compareWithR hazen (0.00, 1.00, 5.00, 9.00, 10.00)
+ , testCase "R alg. 6" $ compareWithR spss (0.00, 0.75, 5.00, 9.25, 10.00)
+ , testCase "R alg. 7" $ compareWithR s (0.000, 1.375, 5.000, 8.625,10.00)
+ , testCase "R alg. 8" $ compareWithR medianUnbiased
+ (0.0, 0.9166666666666667, 5.000000000000003, 9.083333333333334, 10.0)
+ , testCase "R alg. 9" $ compareWithR normalUnbiased
+ (0.0000, 0.9375, 5.0000, 9.0625, 10.0000)
+ , testProperty "alg 7." propWeigtedAverage
+ -- Test failures
+ , testCase "weightedAvg should throw errors" $ do
+ let xs = U.fromList [1,2,3]
+ xs0 = U.fromList []
+ shouldError "Empty sample" $ weightedAvg 1 4 xs0
+ shouldError "N=0" $ weightedAvg 1 0 xs
+ shouldError "N=1" $ weightedAvg 1 1 xs
+ shouldError "k<0" $ weightedAvg (-1) 4 xs
+ shouldError "k>N" $ weightedAvg 5 4 xs
+ , testCase "quantile should throw errors" $ do
+ let xs = U.fromList [1,2,3]
+ xs0 = U.fromList []
+ shouldError "Empty xs" $ quantile s 1 4 xs0
+ shouldError "N=0" $ quantile s 1 0 xs
+ shouldError "N=1" $ quantile s 1 1 xs
+ shouldError "k<0" $ quantile s (-1) 4 xs
+ shouldError "k>N" $ quantile s 5 4 xs
+ --
+ , testProperty "quantiles are OK" propQuantiles
+ , testProperty "quantilesVec are OK" propQuantilesVec
+ ]
+
+sample :: U.Vector Double
+sample = U.fromList [0, 1, 2.5, 7.5, 9, 10]
+
+-- Compare quantiles implementation with reference R implementation
+compareWithR :: ContParam -> (Double,Double,Double,Double,Double) -> Assertion
+compareWithR p (q0,q1,q2,q3,q4) = do
+ assertEqual "Q 0" q0 $ quantile p 0 4 sample
+ assertEqual "Q 1" q1 $ quantile p 1 4 sample
+ assertEqual "Q 2" q2 $ quantile p 2 4 sample
+ assertEqual "Q 3" q3 $ quantile p 3 4 sample
+ assertEqual "Q 4" q4 $ quantile p 4 4 sample
+
+propWeigtedAverage :: Positive Int -> Positive Int -> Property
+propWeigtedAverage (Positive k) (Positive q) =
+ (q >= 2 && k <= q) ==> let q1 = weightedAvg k q sample
+ q2 = quantile s k q sample
+ in counterexample ("weightedAvg = " ++ show q1)
+ $ counterexample ("quantile = " ++ show q2)
+ $ counterexample ("delta in ulps = " ++ show (ulpDelta q1 q2))
+ $ ulpDistance q1 q2 <= 16
+
+propQuantiles :: Positive Int -> Int -> Int -> NonEmptyList Double -> Property
+propQuantiles (Positive n)
+ ((`mod` n) -> k1)
+ ((`mod` n) -> k2)
+ (NonEmpty xs)
+ = n >= 2
+ ==> [x1,x2] == quantiles s [k1,k2] n rndXs
+ where
+ rndXs = U.fromList xs
+ x1 = quantile s k1 n rndXs
+ x2 = quantile s k2 n rndXs
+
+propQuantilesVec :: Positive Int -> Int -> Int -> NonEmptyList Double -> Property
+propQuantilesVec (Positive n)
+ ((`mod` n) -> k1)
+ ((`mod` n) -> k2)
+ (NonEmpty xs)
+ = n >= 2
+ ==> U.fromList [x1,x2] == quantilesVec s (U.fromList [k1,k2]) n rndXs
+ where
+ rndXs = U.fromList xs
+ x1 = quantile s k1 n rndXs
+ x2 = quantile s k2 n rndXs
+
+
+shouldError :: String -> a -> Assertion
+shouldError nm x = do
+ r <- try (evaluate x)
+ case r of
+ Left (ErrorCall{}) -> return ()
+ Right _ -> assertFailure ("Should call error: " ++ nm)
diff --git a/tests/tests.hs b/tests/tests.hs
index 039b8bb..7c8e395 100644
--- a/tests/tests.hs
+++ b/tests/tests.hs
@@ -1,21 +1,25 @@
import Test.Framework (defaultMain)
-import qualified Tests.Distribution as Distribution
-import qualified Tests.Function as Function
-import qualified Tests.KDE as KDE
-import qualified Tests.Matrix as Matrix
-import qualified Tests.NonParametric as NonParametric
-import qualified Tests.Parametric as Parametric
-import qualified Tests.Transform as Transform
-import qualified Tests.Correlation as Correlation
+
+import qualified Tests.Distribution
+import qualified Tests.Function
+import qualified Tests.KDE
+import qualified Tests.Matrix
+import qualified Tests.NonParametric
+import qualified Tests.Parametric
+import qualified Tests.Transform
+import qualified Tests.Correlation
import qualified Tests.Serialization
+import qualified Tests.Quantile
+
main :: IO ()
-main = defaultMain [ Distribution.tests
- , Function.tests
- , KDE.tests
- , Matrix.tests
- , NonParametric.tests
- , Parametric.tests
- , Transform.tests
- , Correlation.tests
+main = defaultMain [ Tests.Distribution.tests
+ , Tests.Function.tests
+ , Tests.KDE.tests
+ , Tests.Matrix.tests
+ , Tests.NonParametric.tests
+ , Tests.Parametric.tests
+ , Tests.Transform.tests
+ , Tests.Correlation.tests
, Tests.Serialization.tests
+ , Tests.Quantile.tests
]