summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexeyKhudyakov <>2017-05-11 19:17:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2017-05-11 19:17:00 (GMT)
commit6d08dca5993fefbb429cacd41f6331e33d60ba47 (patch)
tree2b1374ba62a379a65b223d3d8227f57924b74ad4
parent209598757c0acd2982e94d80cedc6d89e7a9da76 (diff)
version 0.14.0.00.14.0.0
-rw-r--r--Statistics/ConfidenceInt.hs85
-rw-r--r--Statistics/Constants.hs20
-rw-r--r--Statistics/Correlation.hs3
-rw-r--r--Statistics/Distribution.hs33
-rw-r--r--Statistics/Distribution/Beta.hs118
-rw-r--r--Statistics/Distribution/Binomial.hs85
-rw-r--r--Statistics/Distribution/CauchyLorentz.hs69
-rw-r--r--Statistics/Distribution/ChiSquared.hs91
-rw-r--r--Statistics/Distribution/DiscreteUniform.hs112
-rw-r--r--Statistics/Distribution/Exponential.hs83
-rw-r--r--Statistics/Distribution/FDistribution.hs85
-rw-r--r--Statistics/Distribution/Gamma.hs87
-rw-r--r--Statistics/Distribution/Geometric.hs106
-rw-r--r--Statistics/Distribution/Hypergeometric.hs97
-rw-r--r--Statistics/Distribution/Laplace.hs84
-rw-r--r--Statistics/Distribution/Normal.hs107
-rw-r--r--Statistics/Distribution/Poisson.hs55
-rw-r--r--Statistics/Distribution/Poisson/Internal.hs36
-rw-r--r--Statistics/Distribution/StudentT.hs69
-rw-r--r--Statistics/Distribution/Transform.hs9
-rw-r--r--Statistics/Distribution/Uniform.hs59
-rw-r--r--Statistics/Function.hs2
-rw-r--r--Statistics/Function/Comparison.hs26
-rw-r--r--Statistics/Internal.hs118
-rw-r--r--Statistics/Math/RootFinding.hs2
-rw-r--r--Statistics/Quantile.hs15
-rw-r--r--Statistics/Regression.hs34
-rw-r--r--Statistics/Resampling.hs137
-rw-r--r--Statistics/Resampling/Bootstrap.hs134
-rw-r--r--Statistics/Sample.hs10
-rw-r--r--Statistics/Sample/Histogram.hs5
-rw-r--r--Statistics/Sample/Powers.hs54
-rw-r--r--Statistics/Test/ChiSquared.hs58
-rw-r--r--Statistics/Test/Internal.hs4
-rw-r--r--Statistics/Test/KolmogorovSmirnov.hs162
-rw-r--r--Statistics/Test/KruskalWallis.hs51
-rw-r--r--Statistics/Test/MannWhitneyU.hs98
-rw-r--r--Statistics/Test/StudentT.hs149
-rw-r--r--Statistics/Test/Types.hs70
-rw-r--r--Statistics/Test/WilcoxonT.hs175
-rw-r--r--Statistics/Types.hs509
-rw-r--r--Statistics/Types/Internal.hs24
-rw-r--r--changelog.md134
-rw-r--r--statistics.cabal17
-rw-r--r--tests/Tests/ApproxEq.hs3
-rw-r--r--tests/Tests/Correlation.hs1
-rw-r--r--tests/Tests/Distribution.hs174
-rw-r--r--tests/Tests/Helpers.hs17
-rw-r--r--tests/Tests/NonParametric.hs67
-rw-r--r--tests/Tests/Orphanage.hs103
-rw-r--r--tests/Tests/Parametric.hs103
-rw-r--r--tests/Tests/Serialization.hs82
-rw-r--r--tests/Tests/Transform.hs27
-rw-r--r--tests/tests.hs5
54 files changed, 3076 insertions, 987 deletions
diff --git a/Statistics/ConfidenceInt.hs b/Statistics/ConfidenceInt.hs
new file mode 100644
index 0000000..020f23f
--- /dev/null
+++ b/Statistics/ConfidenceInt.hs
@@ -0,0 +1,85 @@
+{-# LANGUAGE ViewPatterns #-}
+-- | Calculation of confidence intervals
+module Statistics.ConfidenceInt (
+ poissonCI
+ , poissonNormalCI
+ , binomialCI
+ , naiveBinomialCI
+ -- * References
+ -- $references
+ ) where
+
+import Statistics.Distribution
+import Statistics.Distribution.ChiSquared
+import Statistics.Distribution.Beta
+import Statistics.Types
+
+
+
+-- | Calculate confidence intervals for Poisson-distributed value
+-- using normal approximation
+poissonNormalCI :: Int -> Estimate NormalErr Double
+poissonNormalCI n
+ | n < 0 = error "Statistics.ConfidenceInt.poissonNormalCI negative number of trials"
+ | otherwise = estimateNormErr n' (sqrt n')
+ where
+ n' = fromIntegral n
+
+-- | Calculate confidence intervals for Poisson-distributed value for
+-- single measurement. These are exact confidence intervals
+poissonCI :: CL Double -> Int -> Estimate ConfInt Double
+poissonCI cl@(significanceLevel -> p) n
+ | n < 0 = error "Statistics.ConfidenceInt.poissonCI: negative number of trials"
+ | n == 0 = estimateFromInterval m (m1,m2) cl
+ | otherwise = estimateFromInterval m (m1,m2) cl
+ where
+ m = fromIntegral n
+ m1 = 0.5 * quantile (chiSquared (2*n )) (p/2)
+ m2 = 0.5 * complQuantile (chiSquared (2*n+2)) (p/2)
+
+-- | Calculate confidence interval using normal approximation. Note
+-- that this approximation breaks down when /p/ is either close to 0
+-- or to 1. In particular if @np < 5@ or @1 - np < 5@ this
+-- approximation shouldn't be used.
+naiveBinomialCI :: Int -- ^ Number of trials
+ -> Int -- ^ Number of successes
+ -> Estimate NormalErr Double
+naiveBinomialCI n k
+ | n <= 0 || k < 0 = error "Statistics.ConfidenceInt.naiveBinomialCI: negative number of events"
+ | k > n = error "Statistics.ConfidenceInt.naiveBinomialCI: more successes than trials"
+ | otherwise = estimateNormErr p σ
+ where
+ p = fromIntegral k / fromIntegral n
+ σ = sqrt $ p * (1 - p) / fromIntegral n
+
+
+-- | Clopper-Pearson confidence interval also known as exact
+-- confidence intervals.
+binomialCI :: CL Double
+ -> Int -- ^ Number of trials
+ -> Int -- ^ Number of successes
+ -> Estimate ConfInt Double
+binomialCI cl@(significanceLevel -> p) ni ki
+ | ni <= 0 || ki < 0 = error "Statistics.ConfidenceInt.binomialCI: negative number of events"
+ | ki > ni = error "Statistics.ConfidenceInt.binomialCI: more successes than trials"
+ | ki == 0 = estimateFromInterval eff (0, ub) cl
+ | ni == ki = estimateFromInterval eff (lb,0 ) cl
+ | otherwise = estimateFromInterval eff (lb,ub) cl
+ where
+ k = fromIntegral ki
+ n = fromIntegral ni
+ eff = k / n
+ lb = quantile (betaDistr k (n - k + 1)) (p/2)
+ ub = complQuantile (betaDistr (k + 1) (n - k) ) (p/2)
+
+
+-- $references
+--
+-- * Clopper, C.; Pearson, E. S. (1934). "The use of confidence or
+-- fiducial limits illustrated in the case of the
+-- binomial". Biometrika 26: 404–413. doi:10.1093/biomet/26.4.404
+--
+-- * Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban
+-- (2001). "Interval Estimation for a Binomial Proportion". Statistical
+-- Science 16 (2): 101–133. doi:10.1214/ss/1009213286. MR 1861069.
+-- Zbl 02068924.
diff --git a/Statistics/Constants.hs b/Statistics/Constants.hs
deleted file mode 100644
index 702caf9..0000000
--- a/Statistics/Constants.hs
+++ /dev/null
@@ -1,20 +0,0 @@
--- |
--- Module : Statistics.Constants
--- Copyright : (c) 2009, 2011 Bryan O'Sullivan
--- License : BSD3
---
--- Maintainer : bos@serpentine.com
--- Stability : experimental
--- Portability : portable
---
--- Constant values common to much statistics code.
---
--- DEPRECATED: use module 'Numeric.MathFunctions.Constants' from
--- math-functions.
-
-module Statistics.Constants
-{-# DEPRECATED "use module Numeric.MathFunctions.Constants from math-functions" #-}
- ( module Numeric.MathFunctions.Constants
- ) where
-
-import Numeric.MathFunctions.Constants
diff --git a/Statistics/Correlation.hs b/Statistics/Correlation.hs
index 218274d..15ced2b 100644
--- a/Statistics/Correlation.hs
+++ b/Statistics/Correlation.hs
@@ -23,7 +23,8 @@ import Statistics.Test.Internal (rankUnsorted)
-- Pearson
----------------------------------------------------------------
--- | Pearson correlation for sample of pairs.
+-- | Pearson correlation for sample of pairs. Exactly same as
+-- 'Statistics.Sample.correlation'
pearson :: (G.Vector v (Double, Double), G.Vector v Double)
=> v (Double, Double) -> Double
pearson = correlation
diff --git a/Statistics/Distribution.hs b/Statistics/Distribution.hs
index bde1dc1..0b27313 100644
--- a/Statistics/Distribution.hs
+++ b/Statistics/Distribution.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
-- |
-- Module : Statistics.Distribution
@@ -23,9 +24,11 @@ module Statistics.Distribution
, Variance(..)
, MaybeEntropy(..)
, Entropy(..)
+ , FromSample(..)
-- ** Random number generation
, ContGen(..)
, DiscreteGen(..)
+ , genContinuous
, genContinous
-- * Helper functions
, findRoot
@@ -39,10 +42,11 @@ import Statistics.Function (square)
import Statistics.Sample.Internal (sum)
import System.Random.MWC (Gen, uniform)
import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Generic as G
-- | Type class common to all distributions. Only c.d.f. could be
--- defined for both discrete and continous distributions.
+-- defined for both discrete and continuous distributions.
class Distribution d where
-- | Cumulative distribution function. The probability that a
-- random variable /X/ is less or equal than /x/,
@@ -91,6 +95,12 @@ class Distribution d => ContDistr d where
-- of [0,1] range function should call 'error'
quantile :: d -> Double -> Double
+ -- | 1-complement of @quantile@:
+ --
+ -- > complQuantile x ≡ quantile (1 - x)
+ complQuantile :: d -> Double -> Double
+ complQuantile d x = quantile d (1 - x)
+
-- | Natural logarithm of density.
logDensity :: d -> Double -> Double
logDensity d = log . density d
@@ -159,13 +169,28 @@ class Distribution d => ContGen d where
class (DiscreteDistr d, ContGen d) => DiscreteGen d where
genDiscreteVar :: PrimMonad m => d -> Gen (PrimState m) -> m Int
--- | Generate variates from continous distribution using inverse
+-- | Estimate distribution from sample. First parameter in sample is
+-- distribution type and second is element type.
+class FromSample d a where
+ -- | Estimate distribution from sample. Returns nothing is there's
+ -- not enough data to estimate or sample clearly doesn't come from
+ -- distribution in question. For example if there's negative
+ -- samples in exponential distribution.
+ fromSample :: G.Vector v a => v a -> Maybe d
+
+
+-- | Generate variates from continuous distribution using inverse
-- transform rule.
-genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double
-genContinous d gen = do
+genContinuous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double
+genContinuous d gen = do
x <- uniform gen
return $! quantile d x
+-- | Backwards compatibility with genContinuous.
+genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double
+genContinous = genContinuous
+{-# DEPRECATED genContinous "Use genContinuous" #-}
+
data P = P {-# UNPACK #-} !Double {-# UNPACK #-} !Double
-- | Approximate the value of /X/ for which P(/x/>/X/)=/p/.
diff --git a/Statistics/Distribution/Beta.hs b/Statistics/Distribution/Beta.hs
index 595f830..01ce39e 100644
--- a/Statistics/Distribution/Beta.hs
+++ b/Statistics/Distribution/Beta.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-----------------------------------------------------------------------------
-- |
@@ -14,22 +15,25 @@ module Statistics.Distribution.Beta
( BetaDistribution
-- * Constructor
, betaDistr
+ , betaDistrE
, improperBetaDistr
+ , improperBetaDistrE
-- * Accessors
, bdAlpha
, bdBeta
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import Numeric.SpecFunctions (
- incompleteBeta, invIncompleteBeta, logBeta, digamma)
-import Numeric.MathFunctions.Constants (m_NaN)
+ incompleteBeta, invIncompleteBeta, logBeta, digamma, log1p)
+import Numeric.MathFunctions.Constants (m_NaN,m_neg_inf)
import qualified Statistics.Distribution as D
-import Data.Binary (put, get)
-import Control.Applicative ((<$>), (<*>))
+import Statistics.Internal
+
-- | The beta distribution
data BetaDistribution = BD
@@ -37,39 +41,92 @@ data BetaDistribution = BD
-- ^ Alpha shape parameter
, bdBeta :: {-# UNPACK #-} !Double
-- ^ Beta shape parameter
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show BetaDistribution where
+ showsPrec n (BD a b) = defaultShow2 "improperBetaDistr" a b n
+instance Read BetaDistribution where
+ readPrec = defaultReadPrecM2 "improperBetaDistr" improperBetaDistrE
-instance FromJSON BetaDistribution
instance ToJSON BetaDistribution
+instance FromJSON BetaDistribution where
+ parseJSON (Object v) = do
+ a <- v .: "bdAlpha"
+ b <- v .: "bdBeta"
+ maybe (fail $ errMsgI a b) return $ improperBetaDistrE a b
+ parseJSON _ = empty
instance Binary BetaDistribution where
- put (BD x y) = put x >> put y
- get = BD <$> get <*> get
+ put (BD a b) = put a >> put b
+ get = do
+ a <- get
+ b <- get
+ maybe (fail $ errMsgI a b) return $ improperBetaDistrE a b
+
-- | Create beta distribution. Both shape parameters must be positive.
betaDistr :: Double -- ^ Shape parameter alpha
-> Double -- ^ Shape parameter beta
-> BetaDistribution
-betaDistr a b
- | a > 0 && b > 0 = improperBetaDistr a b
- | otherwise =
- error $ "Statistics.Distribution.Beta.betaDistr: "
- ++ "shape parameters must be positive. Got a = "
- ++ show a
- ++ " b = "
- ++ show b
-
--- | Create beta distribution. This construtor doesn't check parameters.
+betaDistr a b = maybe (error $ errMsg a b) id $ betaDistrE a b
+
+-- | Create beta distribution. Both shape parameters must be positive.
+betaDistrE :: Double -- ^ Shape parameter alpha
+ -> Double -- ^ Shape parameter beta
+ -> Maybe BetaDistribution
+betaDistrE a b
+ | a > 0 && b > 0 = Just (BD a b)
+ | otherwise = Nothing
+
+errMsg :: Double -> Double -> String
+errMsg a b = "Statistics.Distribution.Beta.betaDistr: "
+ ++ "shape parameters must be positive. Got a = "
+ ++ show a
+ ++ " b = "
+ ++ show b
+
+
+-- | Create beta distribution. Both shape parameters must be
+-- non-negative. So it allows to construct improper beta distribution
+-- which could be used as improper prior.
improperBetaDistr :: Double -- ^ Shape parameter alpha
-> Double -- ^ Shape parameter beta
-> BetaDistribution
-improperBetaDistr = BD
+improperBetaDistr a b
+ = maybe (error $ errMsgI a b) id $ improperBetaDistrE a b
+
+-- | Create beta distribution. Both shape parameters must be
+-- non-negative. So it allows to construct improper beta distribution
+-- which could be used as improper prior.
+improperBetaDistrE :: Double -- ^ Shape parameter alpha
+ -> Double -- ^ Shape parameter beta
+ -> Maybe BetaDistribution
+improperBetaDistrE a b
+ | a >= 0 && b >= 0 = Just (BD a b)
+ | otherwise = Nothing
+
+errMsgI :: Double -> Double -> String
+errMsgI a b
+ = "Statistics.Distribution.Beta.betaDistr: "
+ ++ "shape parameters must be non-negative. Got a = " ++ show a
+ ++ " b = " ++ show b
+
+
instance D.Distribution BetaDistribution where
cumulative (BD a b) x
| x <= 0 = 0
| x >= 1 = 1
| otherwise = incompleteBeta a b x
+ complCumulative (BD a b) x
+ | x <= 0 = 1
+ | x >= 1 = 0
+ -- For small x we use direct computation to avoid precision loss
+ -- when computing (1-x)
+ | x < 0.5 = 1 - incompleteBeta a b x
+ -- Otherwise we use property of incomplete beta:
+ -- > I(x,a,b) = 1 - I(1-x,b,a)
+ | otherwise = incompleteBeta b a (1-x)
instance D.Mean BetaDistribution where
mean (BD a b) = a / (a + b)
@@ -96,10 +153,15 @@ instance D.MaybeEntropy BetaDistribution where
instance D.ContDistr BetaDistribution where
density (BD a b) x
- | a <= 0 || b <= 0 = m_NaN
- | x <= 0 = 0
- | x >= 1 = 0
- | otherwise = exp $ (a-1)*log x + (b-1)*log (1-x) - logBeta a b
+ | a <= 0 || b <= 0 = m_NaN
+ | x <= 0 = 0
+ | x >= 1 = 0
+ | otherwise = exp $ (a-1)*log x + (b-1) * log1p (-x) - logBeta a b
+ logDensity (BD a b) x
+ | a <= 0 || b <= 0 = m_NaN
+ | x <= 0 = m_neg_inf
+ | x >= 1 = m_neg_inf
+ | otherwise = (a-1)*log x + (b-1)*log1p (-x) - logBeta a b
quantile (BD a b) p
| p == 0 = 0
@@ -109,4 +171,4 @@ instance D.ContDistr BetaDistribution where
error $ "Statistics.Distribution.Gamma.quantile: p must be in [0,1] range. Got: "++show p
instance D.ContGen BetaDistribution where
- genContVar = D.genContinous
+ genContVar = D.genContinuous
diff --git a/Statistics/Distribution/Binomial.hs b/Statistics/Distribution/Binomial.hs
index eab8e9a..cdae867 100644
--- a/Statistics/Distribution/Binomial.hs
+++ b/Statistics/Distribution/Binomial.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Binomial
@@ -18,21 +19,23 @@ module Statistics.Distribution.Binomial
BinomialDistribution
-- * Constructors
, binomial
+ , binomialE
-- * Accessors
, bdTrials
, bdProbability
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
+import Numeric.SpecFunctions (choose,logChoose,incompleteBeta,log1p)
+import Numeric.MathFunctions.Constants (m_epsilon)
+
import qualified Statistics.Distribution as D
import qualified Statistics.Distribution.Poisson.Internal as I
-import Numeric.SpecFunctions (choose,incompleteBeta)
-import Numeric.MathFunctions.Constants (m_epsilon)
-import Data.Binary (put, get)
-import Control.Applicative ((<$>), (<*>))
+import Statistics.Internal
-- | The binomial distribution.
@@ -41,20 +44,36 @@ data BinomialDistribution = BD {
-- ^ Number of trials.
, bdProbability :: {-# UNPACK #-} !Double
-- ^ Probability.
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show BinomialDistribution where
+ showsPrec i (BD n p) = defaultShow2 "binomial" n p i
+instance Read BinomialDistribution where
+ readPrec = defaultReadPrecM2 "binomial" binomialE
-instance FromJSON BinomialDistribution
instance ToJSON BinomialDistribution
+instance FromJSON BinomialDistribution where
+ parseJSON (Object v) = do
+ n <- v .: "bdTrials"
+ p <- v .: "bdProbability"
+ maybe (fail $ errMsg n p) return $ binomialE n p
+ parseJSON _ = empty
instance Binary BinomialDistribution where
- put (BD x y) = put x >> put y
- get = BD <$> get <*> get
+ put (BD x y) = put x >> put y
+ get = do
+ n <- get
+ p <- get
+ maybe (fail $ errMsg n p) return $ binomialE n p
+
+
instance D.Distribution BinomialDistribution where
cumulative = cumulative
instance D.DiscreteDistr BinomialDistribution where
- probability = probability
+ probability = probability
+ logProbability = logProbability
instance D.Mean BinomialDistribution where
mean = mean
@@ -83,7 +102,22 @@ probability :: BinomialDistribution -> Int -> Double
probability (BD n p) k
| k < 0 || k > n = 0
| n == 0 = 1
- | otherwise = choose n k * p^k * (1-p)^(n-k)
+ -- choose could overflow Double for n >= 1030 so we switch to
+ -- log-domain to calculate probability
+ | n < 1000 = choose n k * p^k * (1-p)^(n-k)
+ | otherwise = exp $ logChoose n k + log p * k' + log1p (-p) * nk'
+ where
+ k' = fromIntegral k
+ nk' = fromIntegral $ n - k
+
+logProbability :: BinomialDistribution -> Int -> Double
+logProbability (BD n p) k
+ | k < 0 || k > n = (-1)/0
+ | n == 0 = 0
+ | otherwise = logChoose n k + log p * k' + log1p (-p) * nk'
+ where
+ k' = fromIntegral k
+ nk' = fromIntegral $ n - k
-- Summation from different sides required to reduce roundoff errors
cumulative :: BinomialDistribution -> Double -> Double
@@ -114,10 +148,19 @@ directEntropy d@(BD n _) =
binomial :: Int -- ^ Number of trials.
-> Double -- ^ Probability.
-> BinomialDistribution
-binomial n p
- | n < 0 =
- error $ msg ++ "number of trials must be non-negative. Got " ++ show n
- | p < 0 || p > 1 =
- error $ msg++"probability must be in [0,1] range. Got " ++ show p
- | otherwise = BD n p
- where msg = "Statistics.Distribution.Binomial.binomial: "
+binomial n p = maybe (error $ errMsg n p) id $ binomialE n p
+
+-- | Construct binomial distribution. Number of trials must be
+-- non-negative and probability must be in [0,1] range
+binomialE :: Int -- ^ Number of trials.
+ -> Double -- ^ Probability.
+ -> Maybe BinomialDistribution
+binomialE n p
+ | n < 0 = Nothing
+ | p >= 0 || p <= 1 = Just (BD n p)
+ | otherwise = Nothing
+
+errMsg :: Int -> Double -> String
+errMsg n p
+ = "Statistics.Distribution.Binomial.binomial: n=" ++ show n
+ ++ " p=" ++ show p ++ "but n>=0 and p in [0,1]"
diff --git a/Statistics/Distribution/CauchyLorentz.hs b/Statistics/Distribution/CauchyLorentz.hs
index 4dc4d19..9ff472c 100644
--- a/Statistics/Distribution/CauchyLorentz.hs
+++ b/Statistics/Distribution/CauchyLorentz.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.CauchyLorentz
@@ -18,16 +19,18 @@ module Statistics.Distribution.CauchyLorentz (
, cauchyDistribScale
-- * Constructors
, cauchyDistribution
+ , cauchyDistributionE
, standardCauchy
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Maybe (fromMaybe)
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import qualified Statistics.Distribution as D
-import Data.Binary (put, get)
-import Control.Applicative ((<$>), (<*>))
+import Statistics.Internal
-- | Cauchy-Lorentz distribution.
data CauchyDistribution = CD {
@@ -40,24 +43,52 @@ data CauchyDistribution = CD {
-- maximum (HWHM).
, cauchyDistribScale :: {-# UNPACK #-} !Double
}
- deriving (Eq, Show, Read, Typeable, Data, Generic)
+ deriving (Eq, Typeable, Data, Generic)
-instance FromJSON CauchyDistribution
-instance ToJSON CauchyDistribution
+instance Show CauchyDistribution where
+ showsPrec i (CD m s) = defaultShow2 "cauchyDistribution" m s i
+instance Read CauchyDistribution where
+ readPrec = defaultReadPrecM2 "cauchyDistribution" cauchyDistributionE
+
+instance ToJSON CauchyDistribution
+instance FromJSON CauchyDistribution where
+ parseJSON (Object v) = do
+ m <- v .: "cauchyDistribMedian"
+ s <- v .: "cauchyDistribScale"
+ maybe (fail $ errMsg m s) return $ cauchyDistributionE m s
+ parseJSON _ = empty
instance Binary CauchyDistribution where
- put (CD x y) = put x >> put y
- get = CD <$> get <*> get
+ put (CD m s) = put m >> put s
+ get = do
+ m <- get
+ s <- get
+ maybe (error $ errMsg m s) return $ cauchyDistributionE m s
+
-- | Cauchy distribution
cauchyDistribution :: Double -- ^ Central point
-> Double -- ^ Scale parameter (FWHM)
-> CauchyDistribution
cauchyDistribution m s
- | s > 0 = CD m s
- | otherwise =
- error $ "Statistics.Distribution.CauchyLorentz.cauchyDistribution: FWHM must be positive. Got " ++ show s
+ = fromMaybe (error $ errMsg m s)
+ $ cauchyDistributionE m s
+
+-- | Cauchy distribution
+cauchyDistributionE :: Double -- ^ Central point
+ -> Double -- ^ Scale parameter (FWHM)
+ -> Maybe CauchyDistribution
+cauchyDistributionE m s
+ | s > 0 = Just (CD m s)
+ | otherwise = Nothing
+
+errMsg :: Double -> Double -> String
+errMsg _ s
+ = "Statistics.Distribution.CauchyLorentz.cauchyDistribution: FWHM must be positive. Got "
+ ++ show s
+
+-- | Standard Cauchy distribution. It's centered at 0 and and have 1 FWHM
standardCauchy :: CauchyDistribution
standardCauchy = CD 0 1
@@ -73,10 +104,16 @@ instance D.ContDistr CauchyDistribution where
| p == 0 = -1 / 0
| p == 1 = 1 / 0
| otherwise =
- error $ "Statistics.Distribution.CauchyLorentz..quantile: p must be in [0,1] range. Got: "++show p
+ error $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p
+ complQuantile (CD m s) p
+ | p > 0 && p < 1 = m + s * tan( pi * (0.5 - p) )
+ | p == 0 = 1 / 0
+ | p == 1 = -1 / 0
+ | otherwise =
+ error $ "Statistics.Distribution.CauchyLorentz.complQuantile: p must be in [0,1] range. Got: "++show p
instance D.ContGen CauchyDistribution where
- genContVar = D.genContinous
+ genContVar = D.genContinuous
instance D.Entropy CauchyDistribution where
entropy (CD _ s) = log s + log (4*pi)
diff --git a/Statistics/Distribution/ChiSquared.hs b/Statistics/Distribution/ChiSquared.hs
index a6ccedb..ac47e54 100644
--- a/Statistics/Distribution/ChiSquared.hs
+++ b/Statistics/Distribution/ChiSquared.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.ChiSquared
@@ -13,51 +14,86 @@
-- distributions. It's commonly used in statistical tests
module Statistics.Distribution.ChiSquared (
ChiSquared
- -- Constructors
- , chiSquared
, chiSquaredNDF
+ -- * Constructors
+ , chiSquared
+ , chiSquaredE
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
-import Numeric.SpecFunctions (
- incompleteGamma,invIncompleteGamma,logGamma,digamma)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
+import Numeric.SpecFunctions ( incompleteGamma,invIncompleteGamma,logGamma,digamma)
+import Numeric.MathFunctions.Constants (m_neg_inf)
+import qualified System.Random.MWC.Distributions as MWC
import qualified Statistics.Distribution as D
-import qualified System.Random.MWC.Distributions as MWC
-import Data.Binary (put, get)
+import Statistics.Internal
+
-- | Chi-squared distribution
-newtype ChiSquared = ChiSquared Int
- deriving (Eq, Read, Show, Typeable, Data, Generic)
+newtype ChiSquared = ChiSquared
+ { chiSquaredNDF :: Int
+ -- ^ Get number of degrees of freedom
+ }
+ deriving (Eq, Typeable, Data, Generic)
+
+instance Show ChiSquared where
+ showsPrec i (ChiSquared n) = defaultShow1 "chiSquared" n i
+instance Read ChiSquared where
+ readPrec = defaultReadPrecM1 "chiSquared" chiSquaredE
-instance FromJSON ChiSquared
instance ToJSON ChiSquared
+instance FromJSON ChiSquared where
+ parseJSON (Object v) = do
+ n <- v .: "chiSquaredNDF"
+ maybe (fail $ errMsg n) return $ chiSquaredE n
+ parseJSON _ = empty
instance Binary ChiSquared where
- get = fmap ChiSquared get
- put (ChiSquared x) = put x
+ put (ChiSquared x) = put x
+ get = do n <- get
+ maybe (fail $ errMsg n) return $ chiSquaredE n
--- | Get number of degrees of freedom
-chiSquaredNDF :: ChiSquared -> Int
-chiSquaredNDF (ChiSquared ndf) = ndf
-- | Construct chi-squared distribution. Number of degrees of freedom
-- must be positive.
chiSquared :: Int -> ChiSquared
-chiSquared n
- | n <= 0 = error $
- "Statistics.Distribution.ChiSquared.chiSquared: N.D.F. must be positive. Got " ++ show n
- | otherwise = ChiSquared n
+chiSquared n = maybe (error $ errMsg n) id $ chiSquaredE n
+
+-- | Construct chi-squared distribution. Number of degrees of freedom
+-- must be positive.
+chiSquaredE :: Int -> Maybe ChiSquared
+chiSquaredE n
+ | n <= 0 = Nothing
+ | otherwise = Just (ChiSquared n)
+
+errMsg :: Int -> String
+errMsg n = "Statistics.Distribution.ChiSquared.chiSquared: N.D.F. must be positive. Got " ++ show n
instance D.Distribution ChiSquared where
cumulative = cumulative
instance D.ContDistr ChiSquared where
- density = density
+ density chi x
+ | x <= 0 = 0
+ | otherwise = exp $ log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2
+ where
+ ndf = fromIntegral $ chiSquaredNDF chi
+ ndf2 = ndf/2
+ x2 = x/2
+
+ logDensity chi x
+ | x <= 0 = m_neg_inf
+ | otherwise = log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2
+ where
+ ndf = fromIntegral $ chiSquaredNDF chi
+ ndf2 = ndf/2
+ x2 = x/2
+
quantile = quantile
instance D.Mean ChiSquared where
@@ -95,15 +131,6 @@ cumulative chi x
where
ndf = fromIntegral $ chiSquaredNDF chi
-density :: ChiSquared -> Double -> Double
-density chi x
- | x <= 0 = 0
- | otherwise = exp $ log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2
- where
- ndf = fromIntegral $ chiSquaredNDF chi
- ndf2 = ndf/2
- x2 = x/2
-
quantile :: ChiSquared -> Double -> Double
quantile (ChiSquared ndf) p
| p == 0 = 0
diff --git a/Statistics/Distribution/DiscreteUniform.hs b/Statistics/Distribution/DiscreteUniform.hs
new file mode 100644
index 0000000..6f3d7c8
--- /dev/null
+++ b/Statistics/Distribution/DiscreteUniform.hs
@@ -0,0 +1,112 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings #-}
+-- |
+-- Module : Statistics.Distribution.DiscreteUniform
+-- Copyright : (c) 2016 André Szabolcs Szelp
+-- License : BSD3
+--
+-- Maintainer : a.sz.szelp@gmail.com
+-- Stability : experimental
+-- Portability : portable
+--
+-- The discrete uniform distribution. There are two parametrizations of
+-- this distribution. First is the probability distribution on an
+-- inclusive interval {1, ..., n}. This is parametrized with n only,
+-- where p_1, ..., p_n = 1/n. ('discreteUniform').
+--
+-- The second parametrizaton is the uniform distribution on {a, ..., b} with
+-- probabilities p_a, ..., p_b = 1/(a-b+1). This is parametrized with
+-- /a/ and /b/. ('discreteUniformAB')
+
+module Statistics.Distribution.DiscreteUniform
+ (
+ DiscreteUniform
+ -- * Constructors
+ , discreteUniform
+ , discreteUniformAB
+ -- * Accessors
+ , rangeFrom
+ , rangeTo
+ ) where
+
+import Control.Applicative ((<$>), (<*>), empty)
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
+
+import qualified Statistics.Distribution as D
+import Statistics.Internal
+
+
+
+-- | The discrete uniform distribution.
+data DiscreteUniform = U {
+ rangeFrom :: {-# UNPACK #-} !Int
+ -- ^ /a/, the lower bound of the support {a, ..., b}
+ , rangeTo :: {-# UNPACK #-} !Int
+ -- ^ /b/, the upper bound of the support {a, ..., b}
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show DiscreteUniform where
+ showsPrec i (U a b) = defaultShow2 "discreteUniformAB" a b i
+instance Read DiscreteUniform where
+ readPrec = defaultReadPrecM2 "discreteUniformAB" (\a b -> Just (discreteUniformAB a b))
+
+instance ToJSON DiscreteUniform
+instance FromJSON DiscreteUniform where
+ parseJSON (Object v) = do
+ a <- v .: "uniformA"
+ b <- v .: "uniformB"
+ return $ discreteUniformAB a b
+ parseJSON _ = empty
+
+instance Binary DiscreteUniform where
+ put (U a b) = put a >> put b
+ get = discreteUniformAB <$> get <*> get
+
+instance D.Distribution DiscreteUniform where
+ cumulative (U a b) x
+ | x < fromIntegral a = 0
+ | x > fromIntegral b = 1
+ | otherwise = fromIntegral (floor x - a + 1) / fromIntegral (b - a + 1)
+
+instance D.DiscreteDistr DiscreteUniform where
+ probability (U a b) k
+ | k >= a && k <= b = 1 / fromIntegral (b - a + 1)
+ | otherwise = 0
+
+instance D.Mean DiscreteUniform where
+ mean (U a b) = fromIntegral (a+b)/2
+
+instance D.Variance DiscreteUniform where
+ variance (U a b) = (fromIntegral (b - a + 1)^(2::Int) - 1) / 12
+
+instance D.MaybeMean DiscreteUniform where
+ maybeMean = Just . D.mean
+
+instance D.MaybeVariance DiscreteUniform where
+ maybeStdDev = Just . D.stdDev
+ maybeVariance = Just . D.variance
+
+instance D.Entropy DiscreteUniform where
+ entropy (U a b) = log $ fromIntegral $ b - a + 1
+
+instance D.MaybeEntropy DiscreteUniform where
+ maybeEntropy = Just . D.entropy
+
+-- | Construct discrete uniform distribution on support {1, ..., n}.
+-- Range /n/ must be >0.
+discreteUniform :: Int -- ^ Range
+ -> DiscreteUniform
+discreteUniform n
+ | n < 1 = error $ msg ++ "range must be > 0. Got " ++ show n
+ | otherwise = U 1 n
+ where msg = "Statistics.Distribution.DiscreteUniform.discreteUniform: "
+
+-- | Construct discrete uniform distribution on support {a, ..., b}.
+discreteUniformAB :: Int -- ^ Lower boundary (inclusive)
+ -> Int -- ^ Upper boundary (inclusive)
+ -> DiscreteUniform
+discreteUniformAB a b
+ | b < a = U b a
+ | otherwise = U a b
diff --git a/Statistics/Distribution/Exponential.hs b/Statistics/Distribution/Exponential.hs
index e367547..e5a3e4f 100644
--- a/Statistics/Distribution/Exponential.hs
+++ b/Statistics/Distribution/Exponential.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Exponential
@@ -18,33 +20,48 @@ module Statistics.Distribution.Exponential
ExponentialDistribution
-- * Constructors
, exponential
- , exponentialFromSample
+ , exponentialE
-- * Accessors
, edLambda
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..),ToJSON,Value(..),(.:))
+import Data.Binary (Binary, put, get)
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
+import Numeric.SpecFunctions (log1p)
import Numeric.MathFunctions.Constants (m_neg_inf)
+import qualified System.Random.MWC.Distributions as MWC
+import qualified Data.Vector.Generic as G
+
import qualified Statistics.Distribution as D
import qualified Statistics.Sample as S
-import qualified System.Random.MWC.Distributions as MWC
-import Statistics.Types (Sample)
-import Data.Binary (put, get)
+import Statistics.Internal
+
newtype ExponentialDistribution = ED {
edLambda :: Double
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show ExponentialDistribution where
+ showsPrec n (ED l) = defaultShow1 "exponential" l n
+instance Read ExponentialDistribution where
+ readPrec = defaultReadPrecM1 "exponential" exponentialE
-instance FromJSON ExponentialDistribution
instance ToJSON ExponentialDistribution
+instance FromJSON ExponentialDistribution where
+ parseJSON (Object v) = do
+ l <- v .: "edLambda"
+ maybe (fail $ errMsg l) return $ exponentialE l
+ parseJSON _ = empty
instance Binary ExponentialDistribution where
- put = put . edLambda
- get = fmap ED get
+ put = put . edLambda
+ get = do
+ l <- get
+ maybe (fail $ errMsg l) return $ exponentialE l
instance D.Distribution ExponentialDistribution where
cumulative = cumulative
@@ -57,7 +74,8 @@ instance D.ContDistr ExponentialDistribution where
logDensity (ED l) x
| x < 0 = m_neg_inf
| otherwise = log l + (-l * x)
- quantile = quantile
+ quantile = quantile
+ complQuantile = complQuantile
instance D.Mean ExponentialDistribution where
mean (ED l) = 1 / l
@@ -92,20 +110,37 @@ complCumulative (ED l) x | x <= 0 = 1
quantile :: ExponentialDistribution -> Double -> Double
quantile (ED l) p
- | p == 1 = 1 / 0
- | p >= 0 && p < 1 = -log (1 - p) / l
+ | p >= 0 && p <= 1 = - log1p(-p) / l
+ | otherwise =
+ error $ "Statistics.Distribution.Exponential.quantile: p must be in [0,1] range. Got: "++show p
+
+complQuantile :: ExponentialDistribution -> Double -> Double
+complQuantile (ED l) p
+ | p == 0 = 0
+ | p >= 0 && p < 1 = -log p / l
| otherwise =
error $ "Statistics.Distribution.Exponential.quantile: p must be in [0,1] range. Got: "++show p
-- | Create an exponential distribution.
exponential :: Double -- ^ Rate parameter.
-> ExponentialDistribution
-exponential l
- | l <= 0 =
- error $ "Statistics.Distribution.Exponential.exponential: scale parameter must be positive. Got " ++ show l
- | otherwise = ED l
-
--- | Create exponential distribution from sample. No tests are made to
--- check whether it truly is exponential.
-exponentialFromSample :: Sample -> ExponentialDistribution
-exponentialFromSample = ED . S.mean
+exponential l = maybe (error $ errMsg l) id $ exponentialE l
+
+-- | Create an exponential distribution.
+exponentialE :: Double -- ^ Rate parameter.
+ -> Maybe ExponentialDistribution
+exponentialE l
+ | l > 0 = Just (ED l)
+ | otherwise = Nothing
+
+errMsg :: Double -> String
+errMsg l = "Statistics.Distribution.Exponential.exponential: scale parameter must be positive. Got " ++ show l
+
+-- | Create exponential distribution from sample. Returns @Nothing@ if
+-- sample is empty or contains negative elements. No other tests are
+-- made to check whether it truly is exponential.
+instance D.FromSample ExponentialDistribution Double where
+ fromSample xs
+ | G.null xs = Nothing
+ | G.all (>= 0) xs = Nothing
+ | otherwise = Just $! ED (S.mean xs)
diff --git a/Statistics/Distribution/FDistribution.hs b/Statistics/Distribution/FDistribution.hs
index 56ef912..5df26bc 100644
--- a/Statistics/Distribution/FDistribution.hs
+++ b/Statistics/Distribution/FDistribution.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.FDistribution
@@ -11,49 +12,90 @@
-- Fisher F distribution
module Statistics.Distribution.FDistribution (
FDistribution
+ -- * Constructors
, fDistribution
+ , fDistributionE
+ , fDistributionReal
+ , fDistributionRealE
+ -- * Accessors
, fDistributionNDF1
, fDistributionNDF2
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
+import Numeric.SpecFunctions (
+ logBeta, incompleteBeta, invIncompleteBeta, digamma)
import Numeric.MathFunctions.Constants (m_neg_inf)
-import GHC.Generics (Generic)
+
import qualified Statistics.Distribution as D
import Statistics.Function (square)
-import Numeric.SpecFunctions (
- logBeta, incompleteBeta, invIncompleteBeta, digamma)
-import Data.Binary (put, get)
-import Control.Applicative ((<$>), (<*>))
+import Statistics.Internal
+
-- | F distribution
data FDistribution = F { fDistributionNDF1 :: {-# UNPACK #-} !Double
, fDistributionNDF2 :: {-# UNPACK #-} !Double
, _pdfFactor :: {-# UNPACK #-} !Double
}
- deriving (Eq, Show, Read, Typeable, Data, Generic)
+ deriving (Eq, Typeable, Data, Generic)
+
+instance Show FDistribution where
+ showsPrec i (F n m _) = defaultShow2 "fDistributionReal" n m i
+instance Read FDistribution where
+ readPrec = defaultReadPrecM2 "fDistributionReal" fDistributionRealE
-instance FromJSON FDistribution
instance ToJSON FDistribution
+instance FromJSON FDistribution where
+ parseJSON (Object v) = do
+ n <- v .: "fDistributionNDF1"
+ m <- v .: "fDistributionNDF2"
+ maybe (fail $ errMsgR n m) return $ fDistributionRealE n m
+ parseJSON _ = empty
instance Binary FDistribution where
- get = F <$> get <*> get <*> get
- put (F x y z) = put x >> put y >> put z
+ put (F n m _) = put n >> put m
+ get = do
+ n <- get
+ m <- get
+ maybe (fail $ errMsgR n m) return $ fDistributionRealE n m
fDistribution :: Int -> Int -> FDistribution
-fDistribution n m
+fDistribution n m = maybe (error $ errMsg n m) id $ fDistributionE n m
+
+fDistributionReal :: Double -> Double -> FDistribution
+fDistributionReal n m = maybe (error $ errMsgR n m) id $ fDistributionRealE n m
+
+fDistributionE :: Int -> Int -> Maybe FDistribution
+fDistributionE n m
| n > 0 && m > 0 =
let n' = fromIntegral n
m' = fromIntegral m
f' = 0.5 * (log m' * m' + log n' * n') - logBeta (0.5*n') (0.5*m')
- in F n' m' f'
- | otherwise =
- error "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom"
+ in Just $ F n' m' f'
+ | otherwise = Nothing
+
+fDistributionRealE :: Double -> Double -> Maybe FDistribution
+fDistributionRealE n m
+ | n > 0 && m > 0 =
+ let f' = 0.5 * (log m * m + log n * n) - logBeta (0.5*n) (0.5*m)
+ in Just $ F n m f'
+ | otherwise = Nothing
+
+errMsg :: Int -> Int -> String
+errMsg _ _ = "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom"
+
+errMsgR :: Double -> Double -> String
+errMsgR _ _ = "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom"
+
+
instance D.Distribution FDistribution where
- cumulative = cumulative
+ cumulative = cumulative
+ complCumulative = complCumulative
instance D.ContDistr FDistribution where
density d x
@@ -70,6 +112,13 @@ cumulative (F n m _) x
| isInfinite x = 1 -- Only matches +∞
| otherwise = let y = n*x in incompleteBeta (0.5 * n) (0.5 * m) (y / (m + y))
+complCumulative :: FDistribution -> Double -> Double
+complCumulative (F n m _) x
+ | x <= 0 = 1
+ | isInfinite x = 0 -- Only matches +∞
+ | otherwise = let y = n*x
+ in incompleteBeta (0.5 * m) (0.5 * n) (m / (m + y))
+
logDensity :: FDistribution -> Double -> Double
logDensity (F n m fac) x
= fac + log x * (0.5 * n - 1) - log(m + n*x) * 0.5 * (n + m)
@@ -106,4 +155,4 @@ instance D.MaybeEntropy FDistribution where
maybeEntropy = Just . D.entropy
instance D.ContGen FDistribution where
- genContVar = D.genContinous
+ genContVar = D.genContinuous
diff --git a/Statistics/Distribution/Gamma.hs b/Statistics/Distribution/Gamma.hs
index d2cee28..4c41b8b 100644
--- a/Statistics/Distribution/Gamma.hs
+++ b/Statistics/Distribution/Gamma.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Gamma
@@ -19,36 +20,55 @@ module Statistics.Distribution.Gamma
GammaDistribution
-- * Constructors
, gammaDistr
+ , gammaDistrE
, improperGammaDistr
+ , improperGammaDistrE
-- * Accessors
, gdShape
, gdScale
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Control.Applicative ((<$>), (<*>))
-import Data.Binary (Binary)
-import Data.Binary (put, get)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import Numeric.MathFunctions.Constants (m_pos_inf, m_NaN, m_neg_inf)
import Numeric.SpecFunctions (incompleteGamma, invIncompleteGamma, logGamma, digamma)
+import qualified System.Random.MWC.Distributions as MWC
+
import Statistics.Distribution.Poisson.Internal as Poisson
import qualified Statistics.Distribution as D
-import qualified System.Random.MWC.Distributions as MWC
+import Statistics.Internal
+
-- | The gamma distribution.
data GammaDistribution = GD {
gdShape :: {-# UNPACK #-} !Double -- ^ Shape parameter, /k/.
, gdScale :: {-# UNPACK #-} !Double -- ^ Scale parameter, &#977;.
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show GammaDistribution where
+ showsPrec i (GD k theta) = defaultShow2 "improperGammaDistr" k theta i
+instance Read GammaDistribution where
+ readPrec = defaultReadPrecM2 "improperGammaDistr" improperGammaDistrE
+
-instance FromJSON GammaDistribution
instance ToJSON GammaDistribution
+instance FromJSON GammaDistribution where
+ parseJSON (Object v) = do
+ k <- v .: "gdShape"
+ theta <- v .: "gdScale"
+ maybe (fail $ errMsgI k theta) return $ improperGammaDistrE k theta
+ parseJSON _ = empty
instance Binary GammaDistribution where
- put (GD x y) = put x >> put y
- get = GD <$> get <*> get
+ put (GD x y) = put x >> put y
+ get = do
+ k <- get
+ theta <- get
+ maybe (fail $ errMsgI k theta) return $ improperGammaDistrE k theta
+
-- | Create gamma distribution. Both shape and scale parameters must
-- be positive.
@@ -56,17 +76,48 @@ gammaDistr :: Double -- ^ Shape parameter. /k/
-> Double -- ^ Scale parameter, &#977;.
-> GammaDistribution
gammaDistr k theta
- | k <= 0 = error $ msg ++ "shape must be positive. Got " ++ show k
- | theta <= 0 = error $ msg ++ "scale must be positive. Got " ++ show theta
- | otherwise = improperGammaDistr k theta
- where msg = "Statistics.Distribution.Gamma.gammaDistr: "
+ = maybe (error $ errMsg k theta) id $ gammaDistrE k theta
+
+errMsg :: Double -> Double -> String
+errMsg k theta
+ = "Statistics.Distribution.Gamma.gammaDistr: "
+ ++ "k=" ++ show k
+ ++ "theta=" ++ show theta
+ ++ " but must be positive"
--- | Create gamma distribution. This constructor do not check whether
--- parameters are valid
+-- | Create gamma distribution. Both shape and scale parameters must
+-- be positive.
+gammaDistrE :: Double -- ^ Shape parameter. /k/
+ -> Double -- ^ Scale parameter, &#977;.
+ -> Maybe GammaDistribution
+gammaDistrE k theta
+ | k > 0 && theta > 0 = Just (GD k theta)
+ | otherwise = Nothing
+
+
+-- | Create gamma distribution. Both shape and scale parameters must
+-- be non-negative.
improperGammaDistr :: Double -- ^ Shape parameter. /k/
-> Double -- ^ Scale parameter, &#977;.
-> GammaDistribution
-improperGammaDistr = GD
+improperGammaDistr k theta
+ = maybe (error $ errMsgI k theta) id $ improperGammaDistrE k theta
+
+errMsgI :: Double -> Double -> String
+errMsgI k theta
+ = "Statistics.Distribution.Gamma.gammaDistr: "
+ ++ "k=" ++ show k
+ ++ "theta=" ++ show theta
+ ++ " but must be non-negative"
+
+-- | Create gamma distribution. Both shape and scale parameters must
+-- be non-negative.
+improperGammaDistrE :: Double -- ^ Shape parameter. /k/
+ -> Double -- ^ Scale parameter, &#977;.
+ -> Maybe GammaDistribution
+improperGammaDistrE k theta
+ | k >= 0 && theta >= 0 = Just (GD k theta)
+ | otherwise = Nothing
instance D.Distribution GammaDistribution where
cumulative = cumulative
diff --git a/Statistics/Distribution/Geometric.hs b/Statistics/Distribution/Geometric.hs
index bf2a767..30f4c3f 100644
--- a/Statistics/Distribution/Geometric.hs
+++ b/Statistics/Distribution/Geometric.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Geometric
@@ -24,36 +25,53 @@ module Statistics.Distribution.Geometric
, GeometricDistribution0
-- * Constructors
, geometric
+ , geometricE
, geometric0
+ , geometric0E
-- ** Accessors
, gdSuccess
, gdSuccess0
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Control.Applicative ((<$>))
-import Control.Monad (liftM)
-import Data.Binary (Binary)
-import Data.Binary (put, get)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Control.Monad (liftM)
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import Numeric.MathFunctions.Constants (m_pos_inf, m_neg_inf)
-import qualified Statistics.Distribution as D
import qualified System.Random.MWC.Distributions as MWC
+import qualified Statistics.Distribution as D
+import Statistics.Internal
+
+
+
----------------------------------------------------------------
--- Distribution over [1..]
+-- | Distribution over [1..]
newtype GeometricDistribution = GD {
gdSuccess :: Double
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show GeometricDistribution where
+ showsPrec i (GD x) = defaultShow1 "geometric" x i
+instance Read GeometricDistribution where
+ readPrec = defaultReadPrecM1 "geometric" geometricE
-instance FromJSON GeometricDistribution
instance ToJSON GeometricDistribution
+instance FromJSON GeometricDistribution where
+ parseJSON (Object v) = do
+ x <- v .: "gdSuccess"
+ maybe (fail $ errMsg x) return $ geometricE x
+ parseJSON _ = empty
instance Binary GeometricDistribution where
- get = GD <$> get
- put (GD x) = put x
+ put (GD x) = put x
+ get = do
+ x <- get
+ maybe (fail $ errMsg x) return $ geometricE x
+
instance D.Distribution GeometricDistribution where
cumulative = cumulative
@@ -95,14 +113,6 @@ instance D.DiscreteGen GeometricDistribution where
instance D.ContGen GeometricDistribution where
genContVar d g = fromIntegral `liftM` D.genDiscreteVar d g
--- | Create geometric distribution.
-geometric :: Double -- ^ Success rate
- -> GeometricDistribution
-geometric x
- | x >= 0 && x <= 1 = GD x
- | otherwise =
- error $ "Statistics.Distribution.Geometric.geometric: probability must be in [0,1] range. Got " ++ show x
-
cumulative :: GeometricDistribution -> Double -> Double
cumulative (GD s) x
| x < 1 = 0
@@ -111,19 +121,47 @@ cumulative (GD s) x
| otherwise = 1 - (1-s) ^ (floor x :: Int)
+-- | Create geometric distribution.
+geometric :: Double -- ^ Success rate
+ -> GeometricDistribution
+geometric x = maybe (error $ errMsg x) id $ geometricE x
+
+-- | Create geometric distribution.
+geometricE :: Double -- ^ Success rate
+ -> Maybe GeometricDistribution
+geometricE x
+ | x >= 0 && x <= 1 = Just (GD x)
+ | otherwise = Nothing
+
+errMsg :: Double -> String
+errMsg x = "Statistics.Distribution.Geometric.geometric: probability must be in [0,1] range. Got " ++ show x
+
+
----------------------------------------------------------------
--- Distribution over [0..]
+-- | Distribution over [0..]
newtype GeometricDistribution0 = GD0 {
gdSuccess0 :: Double
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show GeometricDistribution0 where
+ showsPrec i (GD0 x) = defaultShow1 "geometric0" x i
+instance Read GeometricDistribution0 where
+ readPrec = defaultReadPrecM1 "geometric0" geometric0E
-instance FromJSON GeometricDistribution0
instance ToJSON GeometricDistribution0
+instance FromJSON GeometricDistribution0 where
+ parseJSON (Object v) = do
+ x <- v .: "gdSuccess0"
+ maybe (fail $ errMsg x) return $ geometric0E x
+ parseJSON _ = empty
instance Binary GeometricDistribution0 where
- get = GD0 <$> get
- put (GD0 x) = put x
+ put (GD0 x) = put x
+ get = do
+ x <- get
+ maybe (fail $ errMsg x) return $ geometric0E x
+
instance D.Distribution GeometricDistribution0 where
cumulative (GD0 s) x = cumulative (GD s) (x + 1)
@@ -157,10 +195,18 @@ instance D.DiscreteGen GeometricDistribution0 where
instance D.ContGen GeometricDistribution0 where
genContVar d g = fromIntegral `liftM` D.genDiscreteVar d g
+
-- | Create geometric distribution.
geometric0 :: Double -- ^ Success rate
-> GeometricDistribution0
-geometric0 x
- | x >= 0 && x <= 1 = GD0 x
- | otherwise =
- error $ "Statistics.Distribution.Geometric.geometric: probability must be in [0,1] range. Got " ++ show x
+geometric0 x = maybe (error $ errMsg0 x) id $ geometric0E x
+
+-- | Create geometric distribution.
+geometric0E :: Double -- ^ Success rate
+ -> Maybe GeometricDistribution0
+geometric0E x
+ | x >= 0 && x <= 1 = Just (GD0 x)
+ | otherwise = Nothing
+
+errMsg0 :: Double -> String
+errMsg0 x = "Statistics.Distribution.Geometric.geometric0: probability must be in [0,1] range. Got " ++ show x
diff --git a/Statistics/Distribution/Hypergeometric.hs b/Statistics/Distribution/Hypergeometric.hs
index 8bc31e2..7951c37 100644
--- a/Statistics/Distribution/Hypergeometric.hs
+++ b/Statistics/Distribution/Hypergeometric.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Hypergeometric
@@ -21,40 +22,59 @@ module Statistics.Distribution.Hypergeometric
HypergeometricDistribution
-- * Constructors
, hypergeometric
+ , hypergeometricE
-- ** Accessors
, hdM
, hdL
, hdK
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
-import Numeric.MathFunctions.Constants (m_epsilon)
-import Numeric.SpecFunctions (choose)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
+import Numeric.MathFunctions.Constants (m_epsilon,m_neg_inf)
+import Numeric.SpecFunctions (choose,logChoose)
+
import qualified Statistics.Distribution as D
-import Data.Binary (put, get)
-import Control.Applicative ((<$>), (<*>))
+import Statistics.Internal
+
data HypergeometricDistribution = HD {
hdM :: {-# UNPACK #-} !Int
, hdL :: {-# UNPACK #-} !Int
, hdK :: {-# UNPACK #-} !Int
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show HypergeometricDistribution where
+ showsPrec i (HD m l k) = defaultShow3 "hypergeometric" m l k i
+instance Read HypergeometricDistribution where
+ readPrec = defaultReadPrecM3 "hypergeometric" hypergeometricE
-instance FromJSON HypergeometricDistribution
instance ToJSON HypergeometricDistribution
+instance FromJSON HypergeometricDistribution where
+ parseJSON (Object v) = do
+ m <- v .: "hdM"
+ l <- v .: "hdL"
+ k <- v .: "hdK"
+ maybe (fail $ errMsg m l k) return $ hypergeometricE m l k
+ parseJSON _ = empty
instance Binary HypergeometricDistribution where
- get = HD <$> get <*> get <*> get
- put (HD x y z) = put x >> put y >> put z
+ put (HD m l k) = put m >> put l >> put k
+ get = do
+ m <- get
+ l <- get
+ k <- get
+ maybe (fail $ errMsg m l k) return $ hypergeometricE m l k
instance D.Distribution HypergeometricDistribution where
cumulative = cumulative
instance D.DiscreteDistr HypergeometricDistribution where
- probability = probability
+ probability = probability
+ logProbability = logProbability
instance D.Mean HypergeometricDistribution where
mean = mean
@@ -86,11 +106,11 @@ mean :: HypergeometricDistribution -> Double
mean (HD m l k) = fromIntegral k * fromIntegral m / fromIntegral l
directEntropy :: HypergeometricDistribution -> Double
-directEntropy d@(HD m _ _) =
- negate . sum $
- takeWhile (< negate m_epsilon) $
- dropWhile (not . (< negate m_epsilon)) $
- [ let x = probability d n in x * log x | n <- [0..m]]
+directEntropy d@(HD m _ _)
+ = negate . sum
+ $ takeWhile (< negate m_epsilon)
+ $ dropWhile (not . (< negate m_epsilon))
+ [ let x = probability d n in x * log x | n <- [0..m]]
hypergeometric :: Int -- ^ /m/
@@ -98,19 +118,44 @@ hypergeometric :: Int -- ^ /m/
-> Int -- ^ /k/
-> HypergeometricDistribution
hypergeometric m l k
- | not (l > 0) = error $ msg ++ "l must be positive"
- | not (m >= 0 && m <= l) = error $ msg ++ "m must lie in [0,l] range"
- | not (k > 0 && k <= l) = error $ msg ++ "k must lie in (0,l] range"
- | otherwise = HD m l k
- where
- msg = "Statistics.Distribution.Hypergeometric.hypergeometric: "
+ = maybe (error $ errMsg m l k) id $ hypergeometricE m l k
+
+hypergeometricE :: Int -- ^ /m/
+ -> Int -- ^ /l/
+ -> Int -- ^ /k/
+ -> Maybe HypergeometricDistribution
+hypergeometricE m l k
+ | not (l > 0) = Nothing
+ | not (m >= 0 && m <= l) = Nothing
+ | not (k > 0 && k <= l) = Nothing
+ | otherwise = Just (HD m l k)
+
+
+errMsg :: Int -> Int -> Int -> String
+errMsg m l k
+ = "Statistics.Distribution.Hypergeometric.hypergeometric: "
+ ++ "m=" ++ show m
+ ++ "l=" ++ show l
+ ++ "k=" ++ show k
+ ++ " should hold: l>0 & m in [0,l] & k in (0,l]"
-- Naive implementation
probability :: HypergeometricDistribution -> Int -> Double
probability (HD mi li ki) n
| n < max 0 (mi+ki-li) || n > min mi ki = 0
- | otherwise =
- choose mi n * choose (li - mi) (ki - n) / choose li ki
+ -- No overflow
+ | li < 1000 = choose mi n * choose (li - mi) (ki - n)
+ / choose li ki
+ | otherwise = exp $ logChoose mi n
+ + logChoose (li - mi) (ki - n)
+ - logChoose li ki
+
+logProbability :: HypergeometricDistribution -> Int -> Double
+logProbability (HD mi li ki) n
+ | n < max 0 (mi+ki-li) || n > min mi ki = m_neg_inf
+ | otherwise = logChoose mi n
+ + logChoose (li - mi) (ki - n)
+ - logChoose li ki
cumulative :: HypergeometricDistribution -> Double -> Double
cumulative d@(HD mi li ki) x
diff --git a/Statistics/Distribution/Laplace.hs b/Statistics/Distribution/Laplace.hs
index b9d10af..cf1fdde 100644
--- a/Statistics/Distribution/Laplace.hs
+++ b/Statistics/Distribution/Laplace.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Laplace
@@ -15,28 +17,27 @@
-- recognition and least absolute deviations method (Laplace's first
-- law of errors, giving a robust regression method)
--
-
module Statistics.Distribution.Laplace
(
LaplaceDistribution
-- * Constructors
, laplace
- , laplaceFromSample
+ , laplaceE
-- * Accessors
, ldLocation
, ldScale
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary(..))
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import qualified Data.Vector.Generic as G
import qualified Statistics.Distribution as D
import qualified Statistics.Quantile as Q
import qualified Statistics.Sample as S
-import Statistics.Types (Sample)
-import Control.Applicative ((<$>), (<*>))
+import Statistics.Internal
data LaplaceDistribution = LD {
@@ -44,14 +45,27 @@ data LaplaceDistribution = LD {
-- ^ Location.
, ldScale :: {-# UNPACK #-} !Double
-- ^ Scale.
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show LaplaceDistribution where
+ showsPrec i (LD l s) = defaultShow2 "laplace" l s i
+instance Read LaplaceDistribution where
+ readPrec = defaultReadPrecM2 "laplace" laplaceE
-instance FromJSON LaplaceDistribution
instance ToJSON LaplaceDistribution
+instance FromJSON LaplaceDistribution where
+ parseJSON (Object v) = do
+ l <- v .: "ldLocation"
+ s <- v .: "ldScale"
+ maybe (fail $ errMsg l s) return $ laplaceE l s
+ parseJSON _ = empty
instance Binary LaplaceDistribution where
- put (LD l s) = put l >> put s
- get = LD <$> get <*> get
+ put (LD l s) = put l >> put s
+ get = do
+ l <- get
+ s <- get
+ maybe (fail $ errMsg l s) return $ laplaceE l s
instance D.Distribution LaplaceDistribution where
cumulative = cumulative
@@ -60,7 +74,8 @@ instance D.Distribution LaplaceDistribution where
instance D.ContDistr LaplaceDistribution where
density (LD l s) x = exp (- abs (x - l) / s) / (2 * s)
logDensity (LD l s) x = - abs (x - l) / s - log 2 - log s
- quantile = quantile
+ quantile = quantile
+ complQuantile = complQuantile
instance D.Mean LaplaceDistribution where
mean (LD l _) = l
@@ -82,7 +97,7 @@ instance D.MaybeEntropy LaplaceDistribution where
maybeEntropy = Just . D.entropy
instance D.ContGen LaplaceDistribution where
- genContVar = D.genContinous
+ genContVar = D.genContinuous
cumulative :: LaplaceDistribution -> Double -> Double
cumulative (LD l s) x
@@ -106,20 +121,43 @@ quantile (LD l s) p
where
inf = 1 / 0
+complQuantile :: LaplaceDistribution -> Double -> Double
+complQuantile (LD l s) p
+ | p == 0 = inf
+ | p == 1 = -inf
+ | p == 0.5 = l
+ | p > 0 && p < 0.5 = l - s * log (2 * p)
+ | p > 0.5 && p < 1 = l + s * log (2 - 2 * p)
+ | otherwise =
+ error $ "Statistics.Distribution.Laplace.quantile: p must be in [0,1] range. Got: "++show p
+ where
+ inf = 1 / 0
+
-- | Create an Laplace distribution.
laplace :: Double -- ^ Location
-> Double -- ^ Scale
-> LaplaceDistribution
-laplace l s
- | s <= 0 =
- error $ "Statistics.Distribution.Laplace.laplace: scale parameter must be positive. Got " ++ show s
- | otherwise = LD l s
+laplace l s = maybe (error $ errMsg l s) id $ laplaceE l s
+
+-- | Create an Laplace distribution.
+laplaceE :: Double -- ^ Location
+ -> Double -- ^ Scale
+ -> Maybe LaplaceDistribution
+laplaceE l s
+ | s >= 0 = Just (LD l s)
+ | otherwise = Nothing
+
+errMsg :: Double -> Double -> String
+errMsg _ s = "Statistics.Distribution.Laplace.laplace: scale parameter must be positive. Got " ++ show s
+
-- | Create Laplace distribution from sample. No tests are made to
-- check whether it truly is Laplace. Location of distribution
-- estimated as median of sample.
-laplaceFromSample :: Sample -> LaplaceDistribution
-laplaceFromSample xs = LD s l
- where
- s = Q.continuousBy Q.medianUnbiased 1 2 xs
- l = S.mean $ G.map (\x -> abs $ x - s) xs
+instance D.FromSample LaplaceDistribution Double where
+ fromSample xs
+ | G.null xs = Nothing
+ | otherwise = Just $! LD s l
+ where
+ s = Q.continuousBy Q.medianUnbiased 1 2 xs
+ l = S.mean $ G.map (\x -> abs $ x - s) xs
diff --git a/Statistics/Distribution/Normal.hs b/Statistics/Distribution/Normal.hs
index f4a444e..e2c6ebe 100644
--- a/Statistics/Distribution/Normal.hs
+++ b/Statistics/Distribution/Normal.hs
@@ -1,4 +1,6 @@
-{-# LANGUAGE BangPatterns, DeriveDataTypeable, DeriveGeneric #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Normal
-- Copyright : (c) 2009 Bryan O'Sullivan
@@ -16,21 +18,24 @@ module Statistics.Distribution.Normal
NormalDistribution
-- * Constructors
, normalDistr
- , normalFromSample
+ , normalDistrE
, standard
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Control.Applicative ((<$>), (<*>))
-import Data.Binary (Binary)
-import Data.Binary (put, get)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import Numeric.MathFunctions.Constants (m_sqrt_2, m_sqrt_2_pi)
import Numeric.SpecFunctions (erfc, invErfc)
+import qualified System.Random.MWC.Distributions as MWC
+import qualified Data.Vector.Generic as G
+
import qualified Statistics.Distribution as D
import qualified Statistics.Sample as S
-import qualified System.Random.MWC.Distributions as MWC
+import Statistics.Internal
+
-- | The normal distribution.
data NormalDistribution = ND {
@@ -38,22 +43,36 @@ data NormalDistribution = ND {
, stdDev :: {-# UNPACK #-} !Double
, ndPdfDenom :: {-# UNPACK #-} !Double
, ndCdfDenom :: {-# UNPACK #-} !Double
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show NormalDistribution where
+ showsPrec i (ND m s _ _) = defaultShow2 "normalDistr" m s i
+instance Read NormalDistribution where
+ readPrec = defaultReadPrecM2 "normalDistr" normalDistrE
-instance FromJSON NormalDistribution
instance ToJSON NormalDistribution
+instance FromJSON NormalDistribution where
+ parseJSON (Object v) = do
+ m <- v .: "mean"
+ sd <- v .: "stdDev"
+ maybe (fail $ errMsg m sd) return $ normalDistrE m sd
+ parseJSON _ = empty
instance Binary NormalDistribution where
- put (ND w x y z) = put w >> put x >> put y >> put z
- get = ND <$> get <*> get <*> get <*> get
+ put (ND m sd _ _) = put m >> put sd
+ get = do
+ m <- get
+ sd <- get
+ maybe (fail $ errMsg m sd) return $ normalDistrE m sd
instance D.Distribution NormalDistribution where
cumulative = cumulative
complCumulative = complCumulative
instance D.ContDistr NormalDistribution where
- logDensity = logDensity
- quantile = quantile
+ logDensity = logDensity
+ quantile = quantile
+ complQuantile = complQuantile
instance D.MaybeMean NormalDistribution where
maybeMean = Just . D.mean
@@ -92,23 +111,38 @@ standard = ND { mean = 0.0
normalDistr :: Double -- ^ Mean of distribution
-> Double -- ^ Standard deviation of distribution
-> NormalDistribution
-normalDistr m sd
- | sd > 0 = ND { mean = m
- , stdDev = sd
- , ndPdfDenom = log $ m_sqrt_2_pi * sd
- , ndCdfDenom = m_sqrt_2 * sd
- }
- | otherwise =
- error $ "Statistics.Distribution.Normal.normalDistr: standard deviation must be positive. Got " ++ show sd
-
--- | Create distribution using parameters estimated from
--- sample. Variance is estimated using maximum likelihood method
+normalDistr m sd = maybe (error $ errMsg m sd) id $ normalDistrE m sd
+
+-- | Create normal distribution from parameters.
+--
+-- IMPORTANT: prior to 0.10 release second parameter was variance not
+-- standard deviation.
+normalDistrE :: Double -- ^ Mean of distribution
+ -> Double -- ^ Standard deviation of distribution
+ -> Maybe NormalDistribution
+normalDistrE m sd
+ | sd > 0 = Just ND { mean = m
+ , stdDev = sd
+ , ndPdfDenom = log $ m_sqrt_2_pi * sd
+ , ndCdfDenom = m_sqrt_2 * sd
+ }
+ | otherwise = Nothing
+
+errMsg :: Double -> Double -> String
+errMsg _ sd = "Statistics.Distribution.Normal.normalDistr: standard deviation must be positive. Got " ++ show sd
+
+-- | Variance is estimated using maximum likelihood method
-- (biased estimation).
-normalFromSample :: S.Sample -> NormalDistribution
-normalFromSample xs
- = normalDistr m (sqrt v)
- where
- (m,v) = S.meanVariance xs
+--
+-- Returns @Nothing@ if sample contains less than one element or
+-- variance is zero (all elements are equal)
+instance D.FromSample NormalDistribution Double where
+ fromSample xs
+ | G.length xs <= 1 = Nothing
+ | v == 0 = Nothing
+ | otherwise = Just $! normalDistr m (sqrt v)
+ where
+ (m,v) = S.meanVariance xs
logDensity :: NormalDistribution -> Double -> Double
logDensity d x = (-xm * xm / (2 * sd * sd)) - ndPdfDenom d
@@ -131,3 +165,14 @@ quantile d p
error $ "Statistics.Distribution.Normal.quantile: p must be in [0,1] range. Got: "++show p
where x = - invErfc (2 * p)
inf = 1/0
+
+complQuantile :: NormalDistribution -> Double -> Double
+complQuantile d p
+ | p == 0 = inf
+ | p == 1 = -inf
+ | p == 0.5 = mean d
+ | p > 0 && p < 1 = x * ndCdfDenom d + mean d
+ | otherwise =
+ error $ "Statistics.Distribution.Normal.complQuantile: p must be in [0,1] range. Got: "++show p
+ where x = invErfc (2 * p)
+ inf = 1/0
diff --git a/Statistics/Distribution/Poisson.hs b/Statistics/Distribution/Poisson.hs
index b545249..ba1441d 100644
--- a/Statistics/Distribution/Poisson.hs
+++ b/Statistics/Distribution/Poisson.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Poisson
@@ -18,33 +19,48 @@ module Statistics.Distribution.Poisson
PoissonDistribution
-- * Constructors
, poisson
+ , poissonE
-- * Accessors
, poissonLambda
-- * References
-- $references
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
-import qualified Statistics.Distribution as D
-import qualified Statistics.Distribution.Poisson.Internal as I
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import Numeric.SpecFunctions (incompleteGamma,logFactorial)
import Numeric.MathFunctions.Constants (m_neg_inf)
-import Data.Binary (put, get)
+
+import qualified Statistics.Distribution as D
+import qualified Statistics.Distribution.Poisson.Internal as I
+import Statistics.Internal
+
newtype PoissonDistribution = PD {
poissonLambda :: Double
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show PoissonDistribution where
+ showsPrec i (PD l) = defaultShow1 "poisson" l i
+instance Read PoissonDistribution where
+ readPrec = defaultReadPrecM1 "poisson" poissonE
-instance FromJSON PoissonDistribution
instance ToJSON PoissonDistribution
+instance FromJSON PoissonDistribution where
+ parseJSON (Object v) = do
+ l <- v .: "poissonLambda"
+ maybe (fail $ errMsg l) return $ poissonE l
+ parseJSON _ = empty
instance Binary PoissonDistribution where
- get = fmap PD get
- put = put . poissonLambda
+ put = put . poissonLambda
+ get = do
+ l <- get
+ maybe (fail $ errMsg l) return $ poissonE l
instance D.Distribution PoissonDistribution where
cumulative (PD lambda) x
@@ -79,11 +95,18 @@ instance D.MaybeEntropy PoissonDistribution where
-- | Create Poisson distribution.
poisson :: Double -> PoissonDistribution
-poisson l
- | l >= 0 = PD l
- | otherwise = error $
- "Statistics.Distribution.Poisson.poisson: lambda must be non-negative. Got "
- ++ show l
+poisson l = maybe (error $ errMsg l) id $ poissonE l
+
+-- | Create Poisson distribution.
+poissonE :: Double -> Maybe PoissonDistribution
+poissonE l
+ | l >= 0 = Just (PD l)
+ | otherwise = Nothing
+
+errMsg :: Double -> String
+errMsg l = "Statistics.Distribution.Poisson.poisson: lambda must be non-negative. Got "
+ ++ show l
+
-- $references
--
diff --git a/Statistics/Distribution/Poisson/Internal.hs b/Statistics/Distribution/Poisson/Internal.hs
index 214c641..15b9d4b 100644
--- a/Statistics/Distribution/Poisson/Internal.hs
+++ b/Statistics/Distribution/Poisson/Internal.hs
@@ -16,7 +16,7 @@ module Statistics.Distribution.Poisson.Internal
import Data.List (unfoldr)
import Numeric.MathFunctions.Constants (m_sqrt_2_pi, m_tiny, m_epsilon)
-import Numeric.SpecFunctions (logGamma, stirlingError, choose, logFactorial)
+import Numeric.SpecFunctions (logGamma, stirlingError {-, choose, logFactorial -})
import Numeric.SpecFunctions.Extra (bd0)
-- | An unchecked, non-integer-valued version of Loader's saddle point
@@ -32,23 +32,23 @@ probability lambda x
| otherwise = exp (-(stirlingError x) - bd0 x lambda) /
(m_sqrt_2_pi * sqrt x)
--- | Compute entropy using Theorem 1 from "Sharp Bounds on the Entropy
--- of the Poisson Law". This function is unused because 'directEntorpy'
--- is just as accurate and is faster by about a factor of 4.
-alyThm1 :: Double -> Double
-alyThm1 lambda =
- sum (takeWhile (\x -> abs x >= m_epsilon * lll) alySeries) + lll
- where lll = lambda * (1 - log lambda)
- alySeries =
- [ alyc k * exp (fromIntegral k * log lambda - logFactorial k)
- | k <- [2..] ]
-
-alyc :: Int -> Double
-alyc k =
- sum [ parity j * choose (k-1) j * log (fromIntegral j+1) | j <- [0..k-1] ]
- where parity j
- | even (k-j) = -1
- | otherwise = 1
+-- -- | Compute entropy using Theorem 1 from "Sharp Bounds on the Entropy
+-- -- of the Poisson Law". This function is unused because 'directEntorpy'
+-- -- is just as accurate and is faster by about a factor of 4.
+-- alyThm1 :: Double -> Double
+-- alyThm1 lambda =
+-- sum (takeWhile (\x -> abs x >= m_epsilon * lll) alySeries) + lll
+-- where lll = lambda * (1 - log lambda)
+-- alySeries =
+-- [ alyc k * exp (fromIntegral k * log lambda - logFactorial k)
+-- | k <- [2..] ]
+
+-- alyc :: Int -> Double
+-- alyc k =
+-- sum [ parity j * choose (k-1) j * log (fromIntegral j+1) | j <- [0..k-1] ]
+-- where parity j
+-- | even (k-j) = -1
+-- | otherwise = 1
-- | Returns [x, x^2, x^3, x^4, ...]
powers :: Double -> [Double]
diff --git a/Statistics/Distribution/StudentT.hs b/Statistics/Distribution/StudentT.hs
index 22f88f7..c982152 100644
--- a/Statistics/Distribution/StudentT.hs
+++ b/Statistics/Distribution/StudentT.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.StudentT
@@ -11,40 +12,66 @@
-- Student-T distribution
module Statistics.Distribution.StudentT (
StudentT
+ -- * Constructors
, studentT
- , studentTndf
+ , studentTE
, studentTUnstandardized
+ -- * Accessors
+ , studentTndf
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
-import qualified Statistics.Distribution as D
-import Statistics.Distribution.Transform (LinearTransform (..))
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import Numeric.SpecFunctions (
logBeta, incompleteBeta, invIncompleteBeta, digamma)
-import Data.Binary (put, get)
+
+import qualified Statistics.Distribution as D
+import Statistics.Distribution.Transform (LinearTransform (..))
+import Statistics.Internal
+
-- | Student-T distribution
newtype StudentT = StudentT { studentTndf :: Double }
- deriving (Eq, Show, Read, Typeable, Data, Generic)
+ deriving (Eq, Typeable, Data, Generic)
+
+instance Show StudentT where
+ showsPrec i (StudentT ndf) = defaultShow1 "studentT" ndf i
+instance Read StudentT where
+ readPrec = defaultReadPrecM1 "studentT" studentTE
-instance FromJSON StudentT
instance ToJSON StudentT
+instance FromJSON StudentT where
+ parseJSON (Object v) = do
+ ndf <- v .: "studentTndf"
+ maybe (fail $ errMsg ndf) return $ studentTE ndf
+ parseJSON _ = empty
instance Binary StudentT where
- put = put . studentTndf
- get = fmap StudentT get
+ put = put . studentTndf
+ get = do
+ ndf <- get
+ maybe (fail $ errMsg ndf) return $ studentTE ndf
-- | Create Student-T distribution. Number of parameters must be positive.
studentT :: Double -> StudentT
-studentT ndf
- | ndf > 0 = StudentT ndf
- | otherwise = modErr "studentT" "non-positive number of degrees of freedom"
+studentT ndf = maybe (error $ errMsg ndf) id $ studentTE ndf
+
+-- | Create Student-T distribution. Number of parameters must be positive.
+studentTE :: Double -> Maybe StudentT
+studentTE ndf
+ | ndf > 0 = Just (StudentT ndf)
+ | otherwise = Nothing
+
+errMsg :: Double -> String
+errMsg _ = modErr "studentT" "non-positive number of degrees of freedom"
+
instance D.Distribution StudentT where
- cumulative = cumulative
+ cumulative = cumulative
+ complCumulative = complCumulative
instance D.ContDistr StudentT where
density d@(StudentT ndf) x = exp (logDensityUnscaled d x) / sqrt ndf
@@ -58,6 +85,14 @@ cumulative (StudentT ndf) x
where
ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x))
+complCumulative :: StudentT -> Double -> Double
+complCumulative (StudentT ndf) x
+ | x > 0 = 0.5 * ibeta
+ | otherwise = 1 - 0.5 * ibeta
+ where
+ ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x))
+
+
logDensityUnscaled :: StudentT -> Double -> Double
logDensityUnscaled (StudentT ndf) x =
log (ndf / (ndf + x*x)) * (0.5 * (1 + ndf)) - logBeta 0.5 (0.5 * ndf)
@@ -90,7 +125,7 @@ instance D.MaybeEntropy StudentT where
maybeEntropy = Just . D.entropy
instance D.ContGen StudentT where
- genContVar = D.genContinous
+ genContVar = D.genContinuous
-- | Create an unstandardized Student-t distribution.
studentTUnstandardized :: Double -- ^ Number of degrees of freedom
diff --git a/Statistics/Distribution/Transform.hs b/Statistics/Distribution/Transform.hs
index e904e6e..d0f4643 100644
--- a/Statistics/Distribution/Transform.hs
+++ b/Statistics/Distribution/Transform.hs
@@ -66,7 +66,8 @@ instance D.Distribution d => D.Distribution (LinearTransform d) where
instance D.ContDistr d => D.ContDistr (LinearTransform d) where
density (LinearTransform loc sc dist) x = D.density dist ((x-loc) / sc) / sc
logDensity (LinearTransform loc sc dist) x = D.logDensity dist ((x-loc) / sc) - log sc
- quantile (LinearTransform loc sc dist) p = loc + sc * D.quantile dist p
+ quantile (LinearTransform loc sc dist) p = loc + sc * D.quantile dist p
+ complQuantile (LinearTransform loc sc dist) p = loc + sc * D.complQuantile dist p
instance D.MaybeMean d => D.MaybeMean (LinearTransform d) where
maybeMean (LinearTransform loc _ dist) = (+loc) <$> D.maybeMean dist
@@ -82,12 +83,10 @@ instance (D.Variance d) => D.Variance (LinearTransform d) where
variance (LinearTransform _ sc dist) = sc * sc * D.variance dist
stdDev (LinearTransform _ sc dist) = sc * D.stdDev dist
-instance (D.MaybeEntropy d, D.DiscreteDistr d)
- => D.MaybeEntropy (LinearTransform d) where
+instance (D.MaybeEntropy d) => D.MaybeEntropy (LinearTransform d) where
maybeEntropy (LinearTransform _ _ dist) = D.maybeEntropy dist
-instance (D.Entropy d, D.DiscreteDistr d)
- => D.Entropy (LinearTransform d) where
+instance (D.Entropy d) => D.Entropy (LinearTransform d) where
entropy (LinearTransform _ _ dist) = D.entropy dist
instance D.ContGen d => D.ContGen (LinearTransform d) where
diff --git a/Statistics/Distribution/Uniform.hs b/Statistics/Distribution/Uniform.hs
index 731d585..e1aa45c 100644
--- a/Statistics/Distribution/Uniform.hs
+++ b/Statistics/Distribution/Uniform.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Uniform
@@ -14,42 +15,66 @@ module Statistics.Distribution.Uniform
UniformDistribution
-- * Constructors
, uniformDistr
+ , uniformDistrE
-- ** Accessors
, uniformA
, uniformB
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Data (Data, Typeable)
-import GHC.Generics (Generic)
-import qualified Statistics.Distribution as D
+import Control.Applicative
+import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:))
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import GHC.Generics (Generic)
import qualified System.Random.MWC as MWC
-import Data.Binary (put, get)
-import Control.Applicative ((<$>), (<*>))
+
+import qualified Statistics.Distribution as D
+import Statistics.Internal
+
-- | Uniform distribution from A to B
data UniformDistribution = UniformDistribution {
uniformA :: {-# UNPACK #-} !Double -- ^ Low boundary of distribution
, uniformB :: {-# UNPACK #-} !Double -- ^ Upper boundary of distribution
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
+ } deriving (Eq, Typeable, Data, Generic)
+
+instance Show UniformDistribution where
+ showsPrec i (UniformDistribution a b) = defaultShow2 "uniformDistr" a b i
+instance Read UniformDistribution where
+ readPrec = defaultReadPrecM2 "uniformDistr" uniformDistrE
-instance FromJSON UniformDistribution
instance ToJSON UniformDistribution
+instance FromJSON UniformDistribution where
+ parseJSON (Object v) = do
+ a <- v .: "uniformA"
+ b <- v .: "uniformB"
+ maybe (fail errMsg) return $ uniformDistrE a b
+ parseJSON _ = empty
instance Binary UniformDistribution where
- put (UniformDistribution x y) = put x >> put y
- get = UniformDistribution <$> get <*> get
+ put (UniformDistribution x y) = put x >> put y
+ get = do
+ a <- get
+ b <- get
+ maybe (fail errMsg) return $ uniformDistrE a b
-- | Create uniform distribution.
uniformDistr :: Double -> Double -> UniformDistribution
-uniformDistr a b
- | b < a = uniformDistr b a
- | a < b = UniformDistribution a b
- | otherwise = error "Statistics.Distribution.Uniform.uniform: wrong parameters"
+uniformDistr a b = maybe (error errMsg) id $ uniformDistrE a b
+
+-- | Create uniform distribution.
+uniformDistrE :: Double -> Double -> Maybe UniformDistribution
+uniformDistrE a b
+ | b < a = Just $ UniformDistribution b a
+ | a < b = Just $ UniformDistribution a b
+ | otherwise = Nothing
-- NOTE: failure is in default branch to guard againist NaNs.
+errMsg :: String
+errMsg = "Statistics.Distribution.Uniform.uniform: wrong parameters"
+
+
instance D.Distribution UniformDistribution where
cumulative (UniformDistribution a b) x
| x < a = 0
@@ -65,6 +90,10 @@ instance D.ContDistr UniformDistribution where
| p >= 0 && p <= 1 = a + (b - a) * p
| otherwise =
error $ "Statistics.Distribution.Uniform.quantile: p must be in [0,1] range. Got: "++show p
+ complQuantile (UniformDistribution a b) p
+ | p >= 0 && p <= 1 = b + (a - b) * p
+ | otherwise =
+ error $ "Statistics.Distribution.Uniform.complQuantile: p must be in [0,1] range. Got: "++show p
instance D.Mean UniformDistribution where
mean (UniformDistribution a b) = 0.5 * (a + b)
diff --git a/Statistics/Function.hs b/Statistics/Function.hs
index c63d1b8..b0300f1 100644
--- a/Statistics/Function.hs
+++ b/Statistics/Function.hs
@@ -47,7 +47,7 @@ import qualified Data.Vector.Algorithms.Intro as I
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as M
-import Statistics.Function.Comparison (within)
+import Numeric.MathFunctions.Comparison (within)
-- | Sort a vector.
sort :: U.Vector Double -> U.Vector Double
diff --git a/Statistics/Function/Comparison.hs b/Statistics/Function/Comparison.hs
index 68eb140..c714f47 100644
--- a/Statistics/Function/Comparison.hs
+++ b/Statistics/Function/Comparison.hs
@@ -10,31 +10,9 @@
-- Approximate floating point comparison, based on Bruce Dawson's
-- \"Comparing floating point numbers\":
-- <http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm>
-
module Statistics.Function.Comparison
+ {-# DEPRECATED "Use Numeric.MathFunctions.Comparison from math-functions" #-}
(
within
) where
-
-import Control.Monad.ST (runST)
-import Data.Primitive.ByteArray (newByteArray, readByteArray, writeByteArray)
-import Data.Word (Word64)
-
--- | Compare two 'Double' values for approximate equality, using
--- Dawson's method.
---
--- The required accuracy is specified in ULPs (units of least
--- precision). If the two numbers differ by the given number of ULPs
--- or less, this function returns @True@.
-within :: Int -- ^ Number of ULPs of accuracy desired.
- -> Double -> Double -> Bool
-within ulps a b = runST $ do
- buf <- newByteArray 8
- ai0 <- writeByteArray buf 0 a >> readByteArray buf 0
- bi0 <- writeByteArray buf 0 b >> readByteArray buf 0
- let big = 0x8000000000000000 :: Word64
- ai | ai0 < 0 = big - ai0
- | otherwise = ai0
- bi | bi0 < 0 = big - bi0
- | otherwise = bi0
- return $ abs (ai - bi) <= fromIntegral ulps
+import Numeric.MathFunctions.Comparison (within)
diff --git a/Statistics/Internal.hs b/Statistics/Internal.hs
index c376728..30fbdbe 100644
--- a/Statistics/Internal.hs
+++ b/Statistics/Internal.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
-- |
-- Module : Statistics.Internal
-- Copyright : (c) 2009 Bryan O'Sullivan
@@ -8,34 +7,89 @@
-- Stability : experimental
-- Portability : portable
--
--- Scary internal functions.
-
-module Statistics.Internal
- (
- inlinePerformIO
- ) where
-
-#if __GLASGOW_HASKELL__ >= 611
-import GHC.IO (IO(IO))
-#else
-import GHC.IOBase (IO(IO))
-#endif
-import GHC.Base (realWorld#)
-#if !defined(__GLASGOW_HASKELL__)
-import System.IO.Unsafe (unsafePerformIO)
-#endif
-
--- Lifted from Data.ByteString.Internal so we don't introduce an
--- otherwise unnecessary dependency on the bytestring package.
-
--- | Just like unsafePerformIO, but we inline it. Big performance
--- gains as it exposes lots of things to further inlining. /Very
--- unsafe/. In particular, you should do no memory allocation inside
--- an 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-#if defined(__GLASGOW_HASKELL__)
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-#else
-inlinePerformIO = unsafePerformIO
-#endif
+--
+module Statistics.Internal (
+ -- * Default definitions for Show
+ defaultShow1
+ , defaultShow2
+ , defaultShow3
+ -- * Default definitions for Read
+ , defaultReadPrecM1
+ , defaultReadPrecM2
+ , defaultReadPrecM3
+ -- * Reexports
+ , Show(..)
+ , Read(..)
+ ) where
+
+import Control.Applicative
+import Control.Monad
+import Text.Read
+
+
+
+----------------------------------------------------------------
+-- Default show implementations
+----------------------------------------------------------------
+
+defaultShow1 :: (Show a) => String -> a -> Int -> ShowS
+defaultShow1 con a n
+ = showParen (n >= 11)
+ ( showString con
+ . showChar ' '
+ . showsPrec 11 a
+ )
+
+defaultShow2 :: (Show a, Show b) => String -> a -> b -> Int -> ShowS
+defaultShow2 con a b n
+ = showParen (n >= 11)
+ ( showString con
+ . showChar ' '
+ . showsPrec 11 a
+ . showChar ' '
+ . showsPrec 11 b
+ )
+
+defaultShow3 :: (Show a, Show b, Show c)
+ => String -> a -> b -> c -> Int -> ShowS
+defaultShow3 con a b c n
+ = showParen (n >= 11)
+ ( showString con
+ . showChar ' '
+ . showsPrec 11 a
+ . showChar ' '
+ . showsPrec 11 b
+ . showChar ' '
+ . showsPrec 11 c
+ )
+
+----------------------------------------------------------------
+-- Default read implementations
+----------------------------------------------------------------
+
+defaultReadPrecM1 :: (Read a) => String -> (a -> Maybe r) -> ReadPrec r
+defaultReadPrecM1 con f = parens $ prec 10 $ do
+ expect con
+ a <- readPrec
+ maybe empty return $ f a
+
+defaultReadPrecM2 :: (Read a, Read b) => String -> (a -> b -> Maybe r) -> ReadPrec r
+defaultReadPrecM2 con f = parens $ prec 10 $ do
+ expect con
+ a <- readPrec
+ b <- readPrec
+ maybe empty return $ f a b
+
+defaultReadPrecM3 :: (Read a, Read b, Read c)
+ => String -> (a -> b -> c -> Maybe r) -> ReadPrec r
+defaultReadPrecM3 con f = parens $ prec 10 $ do
+ expect con
+ a <- readPrec
+ b <- readPrec
+ c <- readPrec
+ maybe empty return $ f a b c
+
+expect :: String -> ReadPrec ()
+expect str = do
+ Ident s <- lexP
+ guard (s == str)
diff --git a/Statistics/Math/RootFinding.hs b/Statistics/Math/RootFinding.hs
index 491cbdc..c23f1f4 100644
--- a/Statistics/Math/RootFinding.hs
+++ b/Statistics/Math/RootFinding.hs
@@ -29,7 +29,7 @@ import Data.Binary.Get (getWord8)
import Data.Binary.Put (putWord8)
import Data.Data (Data, Typeable)
import GHC.Generics (Generic)
-import Statistics.Function.Comparison (within)
+import Numeric.MathFunctions.Comparison (within)
-- | The result of searching for a root of a mathematical function.
diff --git a/Statistics/Quantile.hs b/Statistics/Quantile.hs
index 209ee4c..06eacfb 100644
--- a/Statistics/Quantile.hs
+++ b/Statistics/Quantile.hs
@@ -15,7 +15,7 @@
-- The number of quantiles is described below by the variable /q/, so
-- with /q/=4, a 4-quantile (also known as a /quartile/) has 4
-- intervals, and contains 5 points. The parameter /k/ describes the
--- desired point, where 0 &#8804; /k/ &#8804; /q/.
+-- desired point, where 0 ≤ /k/ ≤ /q/.
module Statistics.Quantile
(
@@ -46,6 +46,13 @@ import qualified Data.Vector.Unboxed as U
-- | O(/n/ log /n/). Estimate the /k/th /q/-quantile of a sample,
-- using the weighted average method.
+--
+-- The following properties should hold:
+-- * the length of the input is greater than @0@
+-- * the input does not contain @NaN@
+-- * k ≥ 0 and k ≤ q
+--
+-- otherwise an error will be thrown.
weightedAvg :: G.Vector v Double =>
Int -- ^ /k/, the desired quantile.
-> Int -- ^ /q/, the number of quantiles.
@@ -53,10 +60,12 @@ weightedAvg :: G.Vector v Double =>
-> Double
weightedAvg k q x
| G.any isNaN x = modErr "weightedAvg" "Sample contains NaNs"
+ | n == 0 = modErr "weightedAvg" "Sample is empty"
| n == 1 = G.head x
| q < 2 = modErr "weightedAvg" "At least 2 quantiles is needed"
- | k < 0 || k >= q = modErr "weightedAvg" "Wrong quantile number"
- | otherwise = xj + g * (xj1 - xj)
+ | k == q = G.maximum x
+ | k >= 0 || k < q = xj + g * (xj1 - xj)
+ | otherwise = modErr "weightedAvg" "Wrong quantile number"
where
j = floor idx
idx = fromIntegral (n - 1) * fromIntegral k / fromIntegral q
diff --git a/Statistics/Regression.hs b/Statistics/Regression.hs
index b3b9f27..f119c25 100644
--- a/Statistics/Regression.hs
+++ b/Statistics/Regression.hs
@@ -24,7 +24,7 @@ import Statistics.Function as F
import Statistics.Matrix hiding (map)
import Statistics.Matrix.Algorithms (qr)
import Statistics.Resampling (splitGen)
-import Statistics.Resampling.Bootstrap (Estimate(..))
+import Statistics.Types (Estimate(..),ConfInt,CL,estimateFromInterval,significanceLevel)
import Statistics.Sample (mean)
import Statistics.Sample.Internal (sum)
import System.Random.MWC (GenIO, uniformR)
@@ -88,7 +88,7 @@ solve r b
rfor n 0 $ \i -> do
si <- (/ unsafeIndex r i i) <$> M.unsafeRead s i
M.unsafeWrite s i si
- for 0 i $ \j -> F.unsafeModify s j $ subtract ((unsafeIndex r j i) * si)
+ for 0 i $ \j -> F.unsafeModify s j $ subtract (unsafeIndex r j i * si)
return s
where n = rows r
l = U.length b
@@ -110,25 +110,23 @@ rSquare pred resp coeff = 1 - r / t
-- | Bootstrap a regression function. Returns both the results of the
-- regression and the requested confidence interval values.
-bootstrapRegress :: GenIO
- -> Int -- ^ Number of resamples to compute.
- -> Double -- ^ Confidence interval.
- -> ([Vector] -> Vector -> (Vector, Double))
- -- ^ Regression function.
- -> [Vector] -- ^ Predictor vectors.
- -> Vector -- ^ Responder vector.
- -> IO (V.Vector Estimate, Estimate)
-bootstrapRegress gen0 numResamples ci rgrss preds0 resp0
+bootstrapRegress
+ :: GenIO
+ -> Int -- ^ Number of resamples to compute.
+ -> CL Double -- ^ Confidence level.
+ -> ([Vector] -> Vector -> (Vector, Double))
+ -- ^ Regression function.
+ -> [Vector] -- ^ Predictor vectors.
+ -> Vector -- ^ Responder vector.
+ -> IO (V.Vector (Estimate ConfInt Double), Estimate ConfInt Double)
+bootstrapRegress gen0 numResamples cl rgrss preds0 resp0
| numResamples < 1 = error $ "bootstrapRegress: number of resamples " ++
"must be positive"
- | ci <= 0 || ci >= 1 = error $ "bootstrapRegress: confidence interval " ++
- "must lie between 0 and 1"
| otherwise = do
caps <- getNumCapabilities
gens <- splitGen caps gen0
done <- newChan
- forM_ (zip gens (balance caps numResamples)) $ \(gen,count) -> do
- forkIO $ do
+ forM_ (zip gens (balance caps numResamples)) $ \(gen,count) -> forkIO $ do
v <- V.replicateM count $ do
let n = U.length resp0
ixs <- U.replicateM n $ uniformR (0,n-1) gen
@@ -138,15 +136,15 @@ bootstrapRegress gen0 numResamples ci rgrss preds0 resp0
rnf v `seq` writeChan done v
(coeffsv, r2v) <- (G.unzip . V.concat) <$> replicateM caps (readChan done)
let coeffs = flip G.imap (G.convert coeffss) $ \i x ->
- est x . U.generate numResamples $ \k -> ((coeffsv G.! k) G.! i)
+ est x . U.generate numResamples $ \k -> (coeffsv G.! k) G.! i
r2 = est r2s (G.convert r2v)
(coeffss, r2s) = rgrss preds0 resp0
- est s v = Estimate s (w G.! lo) (w G.! hi) ci
+ est s v = estimateFromInterval s (w G.! lo, w G.! hi) cl
where w = F.sort v
lo = round c
hi = truncate (n - c)
n = fromIntegral numResamples
- c = n * ((1 - ci) / 2)
+ c = n * (significanceLevel cl / 2)
return (coeffs, r2)
-- | Balance units of work across workers.
diff --git a/Statistics/Resampling.hs b/Statistics/Resampling.hs
index c1c7192..4eb0116 100644
--- a/Statistics/Resampling.hs
+++ b/Statistics/Resampling.hs
@@ -1,4 +1,7 @@
-{-# LANGUAGE BangPatterns, DeriveDataTypeable, DeriveGeneric #-}
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE BangPatterns, DeriveDataTypeable, DeriveGeneric, FlexibleContexts #-}
-- |
-- Module : Statistics.Resampling
@@ -12,37 +15,54 @@
-- Resampling statistics.
module Statistics.Resampling
- (
+ ( -- * Data types
Resample(..)
+ , Bootstrap(..)
+ , Estimator(..)
+ , estimate
+ -- * Resampling
+ , resampleST
+ , resample
+ , resampleVector
+ -- * Jackknife
, jackknife
, jackknifeMean
, jackknifeVariance
, jackknifeVarianceUnb
, jackknifeStdDev
- , resample
- , estimate
+ -- * Helper functions
, splitGen
) where
import Data.Aeson (FromJSON, ToJSON)
+import Control.Applicative
import Control.Concurrent (forkIO, newChan, readChan, writeChan)
-import Control.Monad (forM_, liftM, replicateM, replicateM_)
+import Control.Monad (forM_, forM, replicateM, replicateM_)
+import Control.Monad.Primitive (PrimMonad(..))
import Data.Binary (Binary(..))
import Data.Data (Data, Typeable)
import Data.Vector.Algorithms.Intro (sort)
import Data.Vector.Binary ()
-import Data.Vector.Generic (unsafeFreeze)
+import Data.Vector.Generic (unsafeFreeze,unsafeThaw)
import Data.Word (Word32)
+import qualified Data.Foldable as T
+import qualified Data.Traversable as T
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Unboxed.Mutable as MU
+
import GHC.Conc (numCapabilities)
import GHC.Generics (Generic)
import Numeric.Sum (Summation(..), kbn)
import Statistics.Function (indices)
import Statistics.Sample (mean, stdDev, variance, varianceUnbiased)
-import Statistics.Types (Estimator(..), Sample)
-import System.Random.MWC (GenIO, initialize, uniform, uniformVector)
-import qualified Data.Vector.Generic as G
-import qualified Data.Vector.Unboxed as U
-import qualified Data.Vector.Unboxed.Mutable as MU
+import Statistics.Types (Sample)
+import System.Random.MWC (Gen, GenIO, initialize, uniformR, uniformVector)
+
+
+----------------------------------------------------------------
+-- Data types
+----------------------------------------------------------------
-- | A resample drawn randomly, with replacement, from a set of data
-- points. Distinct from a normal array to make it harder for your
@@ -58,6 +78,62 @@ instance Binary Resample where
put = put . fromResample
get = fmap Resample get
+data Bootstrap v a = Bootstrap
+ { fullSample :: !a
+ , resamples :: v a
+ }
+ deriving (Eq, Read, Show, Typeable, Data, Generic, Functor, T.Foldable, T.Traversable)
+
+instance (Binary a, Binary (v a)) => Binary (Bootstrap v a)
+instance (FromJSON a, FromJSON (v a)) => FromJSON (Bootstrap v a)
+instance (ToJSON a, ToJSON (v a)) => ToJSON (Bootstrap v a)
+
+
+
+-- | An estimator of a property of a sample, such as its 'mean'.
+--
+-- The use of an algebraic data type here allows functions such as
+-- 'jackknife' and 'bootstrapBCA' to use more efficient algorithms
+-- when possible.
+data Estimator = Mean
+ | Variance
+ | VarianceUnbiased
+ | StdDev
+ | Function (Sample -> Double)
+
+-- | Run an 'Estimator' over a sample.
+estimate :: Estimator -> Sample -> Double
+estimate Mean = mean
+estimate Variance = variance
+estimate VarianceUnbiased = varianceUnbiased
+estimate StdDev = stdDev
+estimate (Function est) = est
+
+
+----------------------------------------------------------------
+-- Resampling
+----------------------------------------------------------------
+
+-- | Single threaded and deterministic version of resample.
+resampleST :: PrimMonad m
+ => Gen (PrimState m)
+ -> [Estimator] -- ^ Estimation functions.
+ -> Int -- ^ Number of resamples to compute.
+ -> U.Vector Double -- ^ Original sample.
+ -> m [Bootstrap U.Vector Double]
+resampleST gen ests numResamples sample = do
+ -- Generate resamples
+ res <- forM ests $ \e -> U.replicateM numResamples $ do
+ v <- resampleVector gen sample
+ return $! estimate e v
+ -- Sort resamples
+ resM <- mapM unsafeThaw res
+ mapM_ sort resM
+ resSorted <- mapM unsafeFreeze resM
+ return $ zipWith Bootstrap [estimate e sample | e <- ests]
+ resSorted
+
+
-- | /O(e*r*s)/ Resample a data set repeatedly, with replacement,
-- computing each estimate over the resampled data.
--
@@ -73,41 +149,48 @@ instance Binary Resample where
resample :: GenIO
-> [Estimator] -- ^ Estimation functions.
-> Int -- ^ Number of resamples to compute.
- -> Sample -- ^ Original sample.
- -> IO [Resample]
+ -> U.Vector Double -- ^ Original sample.
+ -> IO [(Estimator, Bootstrap U.Vector Double)]
resample gen ests numResamples samples = do
- let !numSamples = U.length samples
- ixs = scanl (+) 0 $
+ let ixs = scanl (+) 0 $
zipWith (+) (replicate numCapabilities q)
(replicate r 1 ++ repeat 0)
where (q,r) = numResamples `quotRem` numCapabilities
results <- mapM (const (MU.new numResamples)) ests
done <- newChan
gens <- splitGen numCapabilities gen
- forM_ (zip3 ixs (tail ixs) gens) $ \ (start,!end,gen') -> do
+ forM_ (zip3 ixs (tail ixs) gens) $ \ (start,!end,gen') ->
forkIO $ do
let loop k ers | k >= end = writeChan done ()
| otherwise = do
- re <- U.replicateM numSamples $ do
- r <- uniform gen'
- return (U.unsafeIndex samples (r `mod` numSamples))
+ re <- resampleVector gen' samples
forM_ ers $ \(est,arr) ->
MU.write arr k . est $ re
loop (k+1) ers
loop start (zip ests' results)
replicateM_ numCapabilities $ readChan done
mapM_ sort results
- mapM (liftM Resample . unsafeFreeze) results
+ -- Build resamples
+ res <- mapM unsafeFreeze results
+ return $ zip ests
+ $ zipWith Bootstrap [estimate e samples | e <- ests]
+ res
where
ests' = map estimate ests
--- | Run an 'Estimator' over a sample.
-estimate :: Estimator -> Sample -> Double
-estimate Mean = mean
-estimate Variance = variance
-estimate VarianceUnbiased = varianceUnbiased
-estimate StdDev = stdDev
-estimate (Function est) = est
+-- | Create vector using resamples
+resampleVector :: (PrimMonad m, G.Vector v a)
+ => Gen (PrimState m) -> v a -> m (v a)
+resampleVector gen v
+ = G.replicateM n $ do i <- uniformR (0,n-1) gen
+ return $! G.unsafeIndex v i
+ where
+ n = G.length v
+
+
+----------------------------------------------------------------
+-- Jackknife
+----------------------------------------------------------------
-- | /O(n) or O(n^2)/ Compute a statistical estimate repeatedly over a
-- sample, each time omitting a successive element.
diff --git a/Statistics/Resampling/Bootstrap.hs b/Statistics/Resampling/Bootstrap.hs
index e1eef10..0ad2cf0 100644
--- a/Statistics/Resampling/Bootstrap.hs
+++ b/Statistics/Resampling/Bootstrap.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings,
- RecordWildCards #-}
-
-- |
-- Module : Statistics.Resampling.Bootstrap
-- Copyright : (c) 2009, 2011 Bryan O'Sullivan
@@ -13,109 +10,67 @@
-- The bootstrap method for statistical inference.
module Statistics.Resampling.Bootstrap
- (
- Estimate(..)
- , bootstrapBCA
- , scale
+ ( bootstrapBCA
+ , basicBootstrap
-- * References
-- $references
) where
-import Control.Applicative ((<$>), (<*>))
-import Control.DeepSeq (NFData)
-import Control.Exception (assert)
import Control.Monad.Par (parMap, runPar)
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary)
-import Data.Binary (put, get)
-import Data.Data (Data)
-import Data.Typeable (Typeable)
-import Data.Vector.Unboxed ((!))
-import GHC.Generics (Generic)
+import Data.Vector.Generic ((!))
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Generic as G
+
import Statistics.Distribution (cumulative, quantile)
import Statistics.Distribution.Normal
-import Statistics.Resampling (Resample(..), jackknife)
+import Statistics.Resampling (Bootstrap(..), jackknife)
import Statistics.Sample (mean)
-import Statistics.Types (Estimator, Sample)
-import qualified Data.Vector.Unboxed as U
-import qualified Statistics.Resampling as R
+import Statistics.Types (Sample, CL, Estimate, ConfInt, estimateFromInterval,
+ estimateFromErr, CL, significanceLevel)
+import Statistics.Function (gsort)
--- | A point and interval estimate computed via an 'Estimator'.
-data Estimate = Estimate {
- estPoint :: {-# UNPACK #-} !Double
- -- ^ Point estimate.
- , estLowerBound :: {-# UNPACK #-} !Double
- -- ^ Lower bound of the estimate interval (i.e. the lower bound of
- -- the confidence interval).
- , estUpperBound :: {-# UNPACK #-} !Double
- -- ^ Upper bound of the estimate interval (i.e. the upper bound of
- -- the confidence interval).
- , estConfidenceLevel :: {-# UNPACK #-} !Double
- -- ^ Confidence level of the confidence intervals.
- } deriving (Eq, Read, Show, Typeable, Data, Generic)
-
-instance FromJSON Estimate
-instance ToJSON Estimate
-
-instance Binary Estimate where
- put (Estimate w x y z) = put w >> put x >> put y >> put z
- get = Estimate <$> get <*> get <*> get <*> get
-instance NFData Estimate
-
--- | Multiply the point, lower bound, and upper bound in an 'Estimate'
--- by the given value.
-scale :: Double -- ^ Value to multiply by.
- -> Estimate -> Estimate
-scale f e@Estimate{..} = e {
- estPoint = f * estPoint
- , estLowerBound = f * estLowerBound
- , estUpperBound = f * estUpperBound
- }
+import qualified Statistics.Resampling as R
-estimate :: Double -> Double -> Double -> Double -> Estimate
-estimate pt lb ub cl =
- assert (lb <= ub) .
- assert (cl > 0 && cl < 1) $
- Estimate { estPoint = pt
- , estLowerBound = lb
- , estUpperBound = ub
- , estConfidenceLevel = cl
- }
data T = {-# UNPACK #-} !Double :< {-# UNPACK #-} !Double
infixl 2 :<
-- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both
--- bias and skewness in the resampled distribution.
-bootstrapBCA :: Double -- ^ Confidence level
- -> Sample -- ^ Sample data
- -> [Estimator] -- ^ Estimators
- -> [Resample] -- ^ Resampled data
- -> [Estimate]
-bootstrapBCA confidenceLevel sample estimators resamples
- | confidenceLevel > 0 && confidenceLevel < 1
- = runPar $ parMap (uncurry e) (zip estimators resamples)
- | otherwise = error "Statistics.Resampling.Bootstrap.bootstrapBCA: confidence level outside (0,1) range"
+-- bias and skewness in the resampled distribution.
+--
+-- BCA algorithm is described in ch. 5 of Davison, Hinkley "Confidence
+-- intervals" in section 5.3 "Percentile method"
+bootstrapBCA
+ :: CL Double -- ^ Confidence level
+ -> Sample -- ^ Full data sample
+ -> [(R.Estimator, Bootstrap U.Vector Double)]
+ -- ^ Estimates obtained from resampled data and estimator used for
+ -- this.
+ -> [Estimate ConfInt Double]
+bootstrapBCA confidenceLevel sample resampledData
+ = runPar $ parMap e resampledData
where
- e est (Resample resample)
+ e (est, Bootstrap pt resample)
| U.length sample == 1 || isInfinite bias =
- estimate pt pt pt confidenceLevel
+ estimateFromErr pt (0,0) confidenceLevel
| otherwise =
- estimate pt (resample ! lo) (resample ! hi) confidenceLevel
+ estimateFromInterval pt (resample ! lo, resample ! hi) confidenceLevel
where
- pt = R.estimate est sample
+ -- Quantile estimates for given CL
lo = max (cumn a1) 0
where a1 = bias + b1 / (1 - accel * b1)
b1 = bias + z1
hi = min (cumn a2) (ni - 1)
where a2 = bias + b2 / (1 - accel * b2)
b2 = bias - z1
- z1 = quantile standard ((1 - confidenceLevel) / 2)
+ -- Number of resamples
+ ni = U.length resample
+ n = fromIntegral ni
+ -- Corrections
+ z1 = quantile standard (significanceLevel confidenceLevel / 2)
cumn = round . (*n) . cumulative standard
bias = quantile standard (probN / n)
where probN = fromIntegral . U.length . U.filter (<pt) $ resample
- ni = U.length resample
- n = fromIntegral ni
accel = sumCubes / (6 * (sumSquares ** 1.5))
where (sumSquares :< sumCubes) = U.foldl' f (0 :< 0) jack
f (s :< c) j = s + d2 :< c + d2 * d
@@ -124,6 +79,29 @@ bootstrapBCA confidenceLevel sample estimators resamples
jackMean = mean jack
jack = jackknife est sample
+
+-- | Basic bootstrap. This method simply uses empirical quantiles for
+-- confidence interval.
+basicBootstrap
+ :: (G.Vector v a, Ord a, Num a)
+ => CL Double -- ^ Confidence vector
+ -> Bootstrap v a -- ^ Estimate from full sample and vector of
+ -- estimates obtained from resamples
+ -> Estimate ConfInt a
+{-# INLINE basicBootstrap #-}
+basicBootstrap cl (Bootstrap e ests)
+ = estimateFromInterval e (sorted ! lo, sorted ! hi) cl
+ where
+ sorted = gsort ests
+ n = fromIntegral $ G.length ests
+ c = n * (significanceLevel cl / 2)
+ -- FIXME: can we have better estimates of quantiles in case when p
+ -- is not multiple of 1/N
+ --
+ -- FIXME: we could have undercoverage here
+ lo = round c
+ hi = truncate (n - c)
+
-- $references
--
-- * Davison, A.C; Hinkley, D.V. (1997) Bootstrap methods and their
diff --git a/Statistics/Sample.hs b/Statistics/Sample.hs
index f4ffb09..8fd7808 100644
--- a/Statistics/Sample.hs
+++ b/Statistics/Sample.hs
@@ -43,6 +43,7 @@ module Statistics.Sample
, meanVarianceUnb
, stdDev
, varianceWeighted
+ , stdErrMean
-- ** Single-pass functions (faster, less safe)
-- $cancellation
@@ -60,7 +61,7 @@ module Statistics.Sample
import Statistics.Function (minMax)
import Statistics.Sample.Internal (robustSumVar, sum)
-import Statistics.Types (Sample,WeightedSample)
+import Statistics.Types.Internal (Sample,WeightedSample)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
@@ -284,6 +285,13 @@ stdDev = sqrt . varianceUnbiased
{-# SPECIALIZE stdDev :: U.Vector Double -> Double #-}
{-# SPECIALIZE stdDev :: V.Vector Double -> Double #-}
+-- | Standard error of the mean. This is the standard deviation
+-- divided by the square root of the sample size.
+stdErrMean :: (G.Vector v Double) => v Double -> Double
+stdErrMean samp = stdDev samp / (sqrt . fromIntegral . G.length) samp
+{-# SPECIALIZE stdErrMean :: U.Vector Double -> Double #-}
+{-# SPECIALIZE stdErrMean :: V.Vector Double -> Double #-}
+
robustSumVarWeighted :: (G.Vector v (Double,Double)) => v (Double,Double) -> V
robustSumVarWeighted samp = G.foldl' go (V 0 0) samp
where
diff --git a/Statistics/Sample/Histogram.hs b/Statistics/Sample/Histogram.hs
index e639de6..7579286 100644
--- a/Statistics/Sample/Histogram.hs
+++ b/Statistics/Sample/Histogram.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts, BangPatterns #-}
-- |
-- Module : Statistics.Sample.Histogram
@@ -71,8 +71,9 @@ histogram_ numBins lo hi xs0 = G.create (GM.replicate numBins 0 >>= bin xs0)
| otherwise = do
let x = xs `G.unsafeIndex` i
b = truncate $ (x - lo) / d
- GM.write bins b . (+1) =<< GM.read bins b
+ write' bins b . (+1) =<< GM.read bins b
go (i+1)
+ write' bins b !e = GM.write bins b e
len = G.length xs
d = ((hi - lo) * (1 + realToFrac m_epsilon)) / fromIntegral numBins
{-# INLINE histogram_ #-}
diff --git a/Statistics/Sample/Powers.hs b/Statistics/Sample/Powers.hs
index 14459f6..352dba9 100644
--- a/Statistics/Sample/Powers.hs
+++ b/Statistics/Sample/Powers.hs
@@ -47,23 +47,22 @@ module Statistics.Sample.Powers
-- $references
) where
-import Data.Aeson (FromJSON, ToJSON)
-import Data.Binary (Binary(..))
-import Data.Data (Data, Typeable)
-import Data.Vector.Binary ()
-import Data.Vector.Generic (unsafeFreeze)
-import Data.Vector.Unboxed ((!))
-import GHC.Generics (Generic)
+import Control.Monad.ST
+import Data.Aeson (FromJSON, ToJSON)
+import Data.Binary (Binary(..))
+import Data.Data (Data, Typeable)
+import Data.Vector.Binary ()
+import Data.Vector.Unboxed ((!))
+import GHC.Generics (Generic)
import Numeric.SpecFunctions (choose)
import Prelude hiding (sum)
-import Statistics.Function (indexed)
-import Statistics.Internal (inlinePerformIO)
-import System.IO.Unsafe (unsafePerformIO)
-import qualified Data.Vector as V
-import qualified Data.Vector.Generic as G
-import qualified Data.Vector.Unboxed as U
+import Statistics.Function (indexed)
+import qualified Data.Vector as V
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector.Storable as SV
+import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as MU
-import qualified Statistics.Sample.Internal as S
+import qualified Statistics.Sample.Internal as S
newtype Powers = Powers (U.Vector Double)
deriving (Eq, Read, Show, Typeable, Data, Generic)
@@ -94,19 +93,22 @@ powers :: G.Vector v Double =>
Int -- ^ /n/, the number of powers, where /n/ >= 2.
-> v Double
-> Powers
-powers k
- | k < 2 = error "Statistics.Sample.powers: too few powers"
- | otherwise = fini . G.foldl' go (unsafePerformIO $ MU.replicate l 0)
+powers k sample
+ | k < 2 = error "Statistics.Sample.powers: too few powers"
+ | otherwise = runST $ do
+ acc <- MU.replicate l 0
+ G.forM_ sample $ \x ->
+ let loop !i !xk
+ | i == l = return ()
+ | otherwise = do MU.write acc i . (+ xk) =<< MU.read acc i
+ loop (i+1) (xk * x)
+ in loop 0 1
+ fmap Powers $ U.unsafeFreeze acc
where
- go ms x = inlinePerformIO $ loop 0 1
- where loop !i !xk | i == l = return ms
- | otherwise = do
- MU.read ms i >>= MU.write ms i . (+ xk)
- loop (i+1) (xk*x)
- fini = Powers . unsafePerformIO . unsafeFreeze
- l = k + 1
-{-# SPECIALIZE powers :: Int -> U.Vector Double -> Powers #-}
-{-# SPECIALIZE powers :: Int -> V.Vector Double -> Powers #-}
+ l = k + 1
+{-# SPECIALIZE powers :: Int -> U.Vector Double -> Powers #-}
+{-# SPECIALIZE powers :: Int -> V.Vector Double -> Powers #-}
+{-# SPECIALIZE powers :: Int -> SV.Vector Double -> Powers #-}
-- | The order (number) of simple powers collected from a 'sample'.
order :: Powers -> Int
diff --git a/Statistics/Test/ChiSquared.hs b/Statistics/Test/ChiSquared.hs
index 9cd3b82..a8df688 100644
--- a/Statistics/Test/ChiSquared.hs
+++ b/Statistics/Test/ChiSquared.hs
@@ -2,44 +2,74 @@
-- | Pearson's chi squared test.
module Statistics.Test.ChiSquared (
chi2test
- -- * Data types
- , TestType(..)
- , TestResult(..)
+ , chi2testCont
+ , module Statistics.Test.Types
) where
import Prelude hiding (sum)
+
import Statistics.Distribution
import Statistics.Distribution.ChiSquared
-import Statistics.Function (square)
+import Statistics.Function (square)
import Statistics.Sample.Internal (sum)
import Statistics.Test.Types
+import Statistics.Types
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
+
-- | Generic form of Pearson chi squared tests for binned data. Data
-- sample is supplied in form of tuples (observed quantity,
-- expected number of events). Both must be positive.
+--
+-- This test should be used only if all bins have expected values of
+-- at least 5.
chi2test :: (G.Vector v (Int,Double), G.Vector v Double)
- => Double -- ^ p-value
- -> Int -- ^ Number of additional degrees of
+ => Int -- ^ Number of additional degrees of
-- freedom. One degree of freedom
-- is due to the fact that the are
-- N observation in total and
-- accounted for automatically.
-> v (Int,Double) -- ^ Observation and expectation.
- -> TestResult
-chi2test p ndf vec
- | ndf < 0 = error $ "Statistics.Test.ChiSquare.chi2test: negative NDF " ++ show ndf
- | n < 0 = error $ "Statistics.Test.ChiSquare.chi2test: too short data sample"
- | p > 0 && p < 1 = significant $ complCumulative d chi2 < p
- | otherwise = error $ "Statistics.Test.ChiSquare.chi2test: bad p-value: " ++ show p
+ -> Maybe (Test ChiSquared)
+chi2test ndf vec
+ | ndf < 0 = error $ "Statistics.Test.ChiSquare.chi2test: negative NDF " ++ show ndf
+ | n > 0 = Just Test
+ { testSignificance = mkPValue $ complCumulative d chi2
+ , testStatistics = chi2
+ , testDistribution = chiSquared ndf
+ }
+ | otherwise = Nothing
where
n = G.length vec - ndf - 1
chi2 = sum $ G.map (\(o,e) -> square (fromIntegral o - e) / e) vec
d = chiSquared n
+{-# INLINABLE chi2test #-}
{-# SPECIALIZE
- chi2test :: Double -> Int -> U.Vector (Int,Double) -> TestResult #-}
+ chi2test :: Int -> U.Vector (Int,Double) -> Maybe (Test ChiSquared) #-}
{-# SPECIALIZE
- chi2test :: Double -> Int -> V.Vector (Int,Double) -> TestResult #-}
+ chi2test :: Int -> V.Vector (Int,Double) -> Maybe (Test ChiSquared) #-}
+
+
+-- | Chi squared test for data with normal errors. Data is supplied in
+-- form of pair (observation with error, and expectation).
+chi2testCont
+ :: (G.Vector v (Estimate NormalErr Double, Double), G.Vector v Double)
+ => Int -- ^ Number of additional
+ -- degrees of freedom.
+ -> v (Estimate NormalErr Double, Double) -- ^ Observation and expectation.
+ -> Maybe (Test ChiSquared)
+chi2testCont ndf vec
+ | ndf < 0 = error $ "Statistics.Test.ChiSquare.chi2testCont: negative NDF " ++ show ndf
+ | n > 0 = Just Test
+ { testSignificance = mkPValue $ complCumulative d chi2
+ , testStatistics = chi2
+ , testDistribution = chiSquared ndf
+ }
+ | otherwise = Nothing
+ where
+ n = G.length vec - ndf - 1
+ chi2 = sum $ G.map (\(Estimate o (NormalErr s),e) -> square (o - e) / s) vec
+ d = chiSquared n
diff --git a/Statistics/Test/Internal.hs b/Statistics/Test/Internal.hs
index bef70d5..225cfda 100644
--- a/Statistics/Test/Internal.hs
+++ b/Statistics/Test/Internal.hs
@@ -23,6 +23,10 @@ data Rank v a = Rank {
-- | Calculate rank of every element of sample. In case of ties ranks
-- are averaged. Sample should be already sorted in ascending order.
--
+-- Rank is index of element in the sample, numeration starts from 1.
+-- In case of ties average of ranks of equal elements is assigned
+-- to each
+--
-- >>> rank (==) (fromList [10,20,30::Int])
-- > fromList [1.0,2.0,3.0]
--
diff --git a/Statistics/Test/KolmogorovSmirnov.hs b/Statistics/Test/KolmogorovSmirnov.hs
index e2a618c..2f56e54 100644
--- a/Statistics/Test/KolmogorovSmirnov.hs
+++ b/Statistics/Test/KolmogorovSmirnov.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts #-}
-- |
-- Module : Statistics.Test.KolmogorovSmirnov
-- Copyright : (c) 2011 Aleksey Khudyakov
@@ -7,10 +8,10 @@
-- Stability : experimental
-- Portability : portable
--
--- Kolmogov-Smirnov tests are non-parametric tests for assesing
+-- Kolmogov-Smirnov tests are non-parametric tests for assessing
-- whether given sample could be described by distribution or whether
-- two samples have the same distribution. It's only applicable to
--- continous distributions.
+-- continuous distributions.
module Statistics.Test.KolmogorovSmirnov (
-- * Kolmogorov-Smirnov test
kolmogorovSmirnovTest
@@ -22,21 +23,23 @@ module Statistics.Test.KolmogorovSmirnov (
, kolmogorovSmirnov2D
-- * Probablities
, kolmogorovSmirnovProbability
- -- * Data types
- , TestType(..)
- , TestResult(..)
-- * References
-- $references
+ , module Statistics.Test.Types
) where
import Control.Monad (when)
import Prelude hiding (exponent, sum)
import Statistics.Distribution (Distribution(..))
-import Statistics.Function (sort, unsafeModify)
+import Statistics.Function (gsort, unsafeModify)
import Statistics.Matrix (center, exponent, for, fromVector, power)
-import Statistics.Test.Types (TestResult(..), TestType(..), significant)
-import Statistics.Types (Sample)
-import qualified Data.Vector.Unboxed as U
+import Statistics.Test.Types
+import Statistics.Types (mkPValue)
+import qualified Data.Vector as V
+import qualified Data.Vector.Storable as S
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Generic as G
+import Data.Vector.Generic ((!))
import qualified Data.Vector.Unboxed.Mutable as M
@@ -44,58 +47,75 @@ import qualified Data.Vector.Unboxed.Mutable as M
-- Test
----------------------------------------------------------------
--- | Check that sample could be described by
--- distribution. 'Significant' means distribution is not compatible
--- with data for given p-value.
+-- | Check that sample could be described by distribution. Returns
+-- @Nothing@ is sample is empty
--
--- This test uses Marsaglia-Tsang-Wang exact alogorithm for
+-- This test uses Marsaglia-Tsang-Wang exact algorithm for
-- calculation of p-value.
-kolmogorovSmirnovTest :: Distribution d
- => d -- ^ Distribution
- -> Double -- ^ p-value
- -> Sample -- ^ Data sample
- -> TestResult
-kolmogorovSmirnovTest d = kolmogorovSmirnovTestCdf (cumulative d)
+kolmogorovSmirnovTest :: (Distribution d, G.Vector v Double)
+ => d -- ^ Distribution
+ -> v Double -- ^ Data sample
+ -> Maybe (Test ())
+{-# INLINE kolmogorovSmirnovTest #-}
+kolmogorovSmirnovTest d
+ = kolmogorovSmirnovTestCdf (cumulative d)
+
-- | Variant of 'kolmogorovSmirnovTest' which uses CFD in form of
-- function.
-kolmogorovSmirnovTestCdf :: (Double -> Double) -- ^ CDF of distribution
- -> Double -- ^ p-value
- -> Sample -- ^ Data sample
- -> TestResult
-kolmogorovSmirnovTestCdf cdf p sample
- | p > 0 && p < 1 = significant $ 1 - prob < p
- | otherwise = error "Statistics.Test.KolmogorovSmirnov.kolmogorovSmirnovTestCdf:bad p-value"
+kolmogorovSmirnovTestCdf :: (G.Vector v Double)
+ => (Double -> Double) -- ^ CDF of distribution
+ -> v Double -- ^ Data sample
+ -> Maybe (Test ())
+{-# INLINE kolmogorovSmirnovTestCdf #-}
+kolmogorovSmirnovTestCdf cdf sample
+ | G.null sample = Nothing
+ | otherwise = Just Test
+ { testSignificance = mkPValue $ 1 - prob
+ , testStatistics = d
+ , testDistribution = ()
+ }
where
d = kolmogorovSmirnovCdfD cdf sample
- prob = kolmogorovSmirnovProbability (U.length sample) d
+ prob = kolmogorovSmirnovProbability (G.length sample) d
+
-- | Two sample Kolmogorov-Smirnov test. It tests whether two data
-- samples could be described by the same distribution without
--- making any assumptions about it.
+-- making any assumptions about it. If either of samples is empty
+-- returns Nothing.
--
--- This test uses approxmate formula for computing p-value.
-kolmogorovSmirnovTest2 :: Double -- ^ p-value
- -> Sample -- ^ Sample 1
- -> Sample -- ^ Sample 2
- -> TestResult
-kolmogorovSmirnovTest2 p xs1 xs2
- | p > 0 && p < 1 = significant $ 1 - prob( d*(en + 0.12 + 0.11/en) ) < p
- | otherwise = error "Statistics.Test.KolmogorovSmirnov.kolmogorovSmirnovTest2:bad p-value"
+-- This test uses approximate formula for computing p-value.
+kolmogorovSmirnovTest2 :: (G.Vector v Double)
+ => v Double -- ^ Sample 1
+ -> v Double -- ^ Sample 2
+ -> Maybe (Test ())
+kolmogorovSmirnovTest2 xs1 xs2
+ | G.null xs1 || G.null xs2 = Nothing
+ | otherwise = Just Test
+ { testSignificance = mkPValue $ 1 - prob d
+ , testStatistics = d
+ , testDistribution = ()
+ }
where
d = kolmogorovSmirnov2D xs1 xs2
+ * (en + 0.12 + 0.11/en)
-- Effective number of data points
- n1 = fromIntegral (U.length xs1)
- n2 = fromIntegral (U.length xs2)
+ n1 = fromIntegral (G.length xs1)
+ n2 = fromIntegral (G.length xs2)
en = sqrt $ n1 * n2 / (n1 + n2)
--
prob z
| z < 0 = error "kolmogorovSmirnov2D: internal error"
- | z == 0 = 1
+ | z == 0 = 0
| z < 1.18 = let y = exp( -1.23370055013616983 / (z*z) )
- in 2.25675833419102515 * sqrt( -log(y) ) * (y + y**9 + y**25 + y**49)
+ in 2.25675833419102515 * sqrt( -log y ) * (y + y**9 + y**25 + y**49)
| otherwise = let x = exp(-2 * z * z)
in 1 - 2*(x - x**4 + x**9)
+{-# INLINABLE kolmogorovSmirnovTest2 #-}
+{-# SPECIALIZE kolmogorovSmirnovTest2 :: U.Vector Double -> U.Vector Double -> Maybe (Test ()) #-}
+{-# SPECIALIZE kolmogorovSmirnovTest2 :: V.Vector Double -> V.Vector Double -> Maybe (Test ()) #-}
+{-# SPECIALIZE kolmogorovSmirnovTest2 :: S.Vector Double -> S.Vector Double -> Maybe (Test ()) #-}
-- FIXME: Find source for approximation for D
@@ -107,64 +127,76 @@ kolmogorovSmirnovTest2 p xs1 xs2
-- | Calculate Kolmogorov's statistic /D/ for given cumulative
-- distribution function (CDF) and data sample. If sample is empty
-- returns 0.
-kolmogorovSmirnovCdfD :: (Double -> Double) -- ^ CDF function
- -> Sample -- ^ Sample
+kolmogorovSmirnovCdfD :: G.Vector v Double
+ => (Double -> Double) -- ^ CDF function
+ -> v Double -- ^ Sample
-> Double
kolmogorovSmirnovCdfD cdf sample
- | U.null sample = 0
- | otherwise = U.maximum
- $ U.zipWith3 (\p a b -> abs (p-a) `max` abs (p-b))
- ps steps (U.tail steps)
+ | G.null sample = 0
+ | otherwise = G.maximum
+ $ G.zipWith3 (\p a b -> abs (p-a) `max` abs (p-b))
+ ps steps (G.tail steps)
where
- xs = sort sample
- n = U.length xs
+ xs = gsort sample
+ n = G.length xs
--
- ps = U.map cdf xs
- steps = U.map ((/ fromIntegral n) . fromIntegral)
- $ U.generate (n+1) id
+ ps = G.map cdf xs
+ steps = G.map (/ fromIntegral n)
+ $ G.generate (n+1) fromIntegral
+{-# INLINABLE kolmogorovSmirnovCdfD #-}
+{-# SPECIALIZE kolmogorovSmirnovCdfD :: (Double -> Double) -> U.Vector Double -> Double #-}
+{-# SPECIALIZE kolmogorovSmirnovCdfD :: (Double -> Double) -> V.Vector Double -> Double #-}
+{-# SPECIALIZE kolmogorovSmirnovCdfD :: (Double -> Double) -> S.Vector Double -> Double #-}
-- | Calculate Kolmogorov's statistic /D/ for given cumulative
-- distribution function (CDF) and data sample. If sample is empty
-- returns 0.
-kolmogorovSmirnovD :: (Distribution d)
+kolmogorovSmirnovD :: (Distribution d, G.Vector v Double)
=> d -- ^ Distribution
- -> Sample -- ^ Sample
+ -> v Double -- ^ Sample
-> Double
kolmogorovSmirnovD d = kolmogorovSmirnovCdfD (cumulative d)
+{-# INLINE kolmogorovSmirnovD #-}
+
-- | Calculate Kolmogorov's statistic /D/ for two data samples. If
-- either of samples is empty returns 0.
-kolmogorovSmirnov2D :: Sample -- ^ First sample
- -> Sample -- ^ Second sample
+kolmogorovSmirnov2D :: (G.Vector v Double)
+ => v Double -- ^ First sample
+ -> v Double -- ^ Second sample
-> Double
kolmogorovSmirnov2D sample1 sample2
- | U.null sample1 || U.null sample2 = 0
+ | G.null sample1 || G.null sample2 = 0
| otherwise = worker 0 0 0
where
- xs1 = sort sample1
- xs2 = sort sample2
- n1 = U.length xs1
- n2 = U.length xs2
+ xs1 = gsort sample1
+ xs2 = gsort sample2
+ n1 = G.length xs1
+ n2 = G.length xs2
en1 = fromIntegral n1
en2 = fromIntegral n2
-- Find new index
skip x i xs = go (i+1)
- where go n | n >= U.length xs = n
- | xs U.! n == x = go (n+1)
+ where go n | n >= G.length xs = n
+ | xs ! n == x = go (n+1)
| otherwise = n
-- Main loop
worker d i1 i2
| i1 >= n1 || i2 >= n2 = d
| otherwise = worker d' i1' i2'
where
- d1 = xs1 U.! i1
- d2 = xs2 U.! i2
+ d1 = xs1 ! i1
+ d2 = xs2 ! i2
i1' | d1 <= d2 = skip d1 i1 xs1
| otherwise = i1
i2' | d2 <= d1 = skip d2 i2 xs2
| otherwise = i2
d' = max d (abs $ fromIntegral i1' / en1 - fromIntegral i2' / en2)
+{-# INLINABLE kolmogorovSmirnov2D #-}
+{-# SPECIALIZE kolmogorovSmirnov2D :: U.Vector Double -> U.Vector Double -> Double #-}
+{-# SPECIALIZE kolmogorovSmirnov2D :: V.Vector Double -> V.Vector Double -> Double #-}
+{-# SPECIALIZE kolmogorovSmirnov2D :: S.Vector Double -> S.Vector Double -> Double #-}
@@ -178,7 +210,7 @@ kolmogorovSmirnovProbability :: Int -- ^ Size of the sample
-> Double -- ^ D value
-> Double
kolmogorovSmirnovProbability n d
- -- Avoid potencially lengthy calculations for large N and D > 0.999
+ -- Avoid potentially lengthy calculations for large N and D > 0.999
| s > 7.24 || (s > 3.76 && n > 99) = 1 - 2 * exp( -(2.000071 + 0.331 / sqrt n' + 1.409 / n') * s)
-- Exact computation
| otherwise = fini $ matrix `power` n
diff --git a/Statistics/Test/KruskalWallis.hs b/Statistics/Test/KruskalWallis.hs
index 1f44815..d84af3a 100644
--- a/Statistics/Test/KruskalWallis.hs
+++ b/Statistics/Test/KruskalWallis.hs
@@ -8,19 +8,22 @@
-- Portability : portable
--
module Statistics.Test.KruskalWallis
- ( kruskalWallisRank
+ ( -- * Kruskal-Wallis test
+ kruskalWallisTest
+ -- ** Building blocks
+ , kruskalWallisRank
, kruskalWallis
- , kruskalWallisSignificant
- , kruskalWallisTest
+ , module Statistics.Test.Types
) where
import Data.Ord (comparing)
import Data.Foldable (foldMap)
import qualified Data.Vector.Unboxed as U
import Statistics.Function (sort, sortBy, square)
-import Statistics.Distribution (quantile)
+import Statistics.Distribution (complCumulative)
import Statistics.Distribution.ChiSquared (chiSquared)
-import Statistics.Test.Types (TestResult(..), significant)
+import Statistics.Types
+import Statistics.Test.Types
import Statistics.Test.Internal (rank)
import Statistics.Sample
import qualified Statistics.Sample.Internal as Sample(sum)
@@ -32,7 +35,7 @@ import qualified Statistics.Sample.Internal as Sample(sum)
--
-- The samples and values need not to be ordered but the values in the result
-- are ordered. Assigned ranks (ties are given their average rank).
-kruskalWallisRank :: [Sample] -> [Sample]
+kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double]
kruskalWallisRank samples = groupByTags
. sortBy (comparing fst)
. U.zip tags
@@ -54,7 +57,7 @@ kruskalWallisRank samples = groupByTags
--
-- In textbooks the output value is usually represented by 'K' or 'H'. This
-- function already does the ranking.
-kruskalWallis :: [Sample] -> Double
+kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double
kruskalWallis samples = (nTot - 1) * numerator / denominator
where
-- Total number of elements in all samples
@@ -71,29 +74,25 @@ kruskalWallis samples = (nTot - 1) * numerator / denominator
rsamples = kruskalWallisRank samples
--- | Calculates whether the Kruskal-Wallis test is significant.
+-- | Perform Kruskal-Wallis Test for the given samples and required
+-- significance. For additional information check 'kruskalWallis'. This is just
+-- a helper function.
--
-- It uses /Chi-Squared/ distribution for aproximation as long as the sizes are
-- larger than 5. Otherwise the test returns 'Nothing'.
-kruskalWallisSignificant ::
- [Int] -- ^ The samples' size
- -> Double -- ^ The p-value at which to test (e.g. 0.05)
- -> Double -- ^ K value from 'kruskallWallis'
- -> Maybe TestResult
-kruskalWallisSignificant ns p k
- -- Use chi-squared approximation
- | all (>4) ns = Just . significant $ k > x
- -- TODO: Implement critical value calculation: kruskalWallisCriticalValue
- | otherwise = Nothing
+kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ())
+kruskalWallisTest [] = Nothing
+kruskalWallisTest samples
+ -- We use chi-squared approximation here
+ | all (>4) ns = Just Test { testSignificance = mkPValue $ complCumulative d k
+ , testStatistics = k
+ , testDistribution = ()
+ }
+ | otherwise = Nothing
where
- x = quantile (chiSquared (length ns - 1)) (1 - p)
-
--- | Perform Kruskal-Wallis Test for the given samples and required
--- significance. For additional information check 'kruskalWallis'. This is just
--- a helper function.
-kruskalWallisTest :: Double -> [Sample] -> Maybe TestResult
-kruskalWallisTest p samples =
- kruskalWallisSignificant (map U.length samples) p $ kruskalWallis samples
+ k = kruskalWallis samples
+ ns = map U.length samples
+ d = chiSquared (length ns - 1)
-- * Helper functions
diff --git a/Statistics/Test/MannWhitneyU.hs b/Statistics/Test/MannWhitneyU.hs
index a5be291..41c2e4c 100644
--- a/Statistics/Test/MannWhitneyU.hs
+++ b/Statistics/Test/MannWhitneyU.hs
@@ -19,9 +19,7 @@ module Statistics.Test.MannWhitneyU (
, mannWhitneyUSignificant
-- ** Wilcoxon rank sum test
, wilcoxonRankSums
- -- * Data types
- , TestType(..)
- , TestResult(..)
+ , module Statistics.Test.Types
-- * References
-- $references
) where
@@ -36,22 +34,22 @@ import Statistics.Distribution.Normal (standard)
import Statistics.Function (sortBy)
import Statistics.Sample.Internal (sum)
import Statistics.Test.Internal (rank, splitByTags)
-import Statistics.Test.Types (TestResult(..), TestType(..), significant)
-import Statistics.Types (Sample)
+import Statistics.Test.Types (TestResult(..), PositionTest(..), significant)
+import Statistics.Types (PValue,pValue)
import qualified Data.Vector.Unboxed as U
-- | The Wilcoxon Rank Sums Test.
--
--- This test calculates the sum of ranks for the given two samples. The samples
--- are ordered, and assigned ranks (ties are given their average rank), then these
--- ranks are summed for each sample.
+-- This test calculates the sum of ranks for the given two samples.
+-- The samples are ordered, and assigned ranks (ties are given their
+-- average rank), then these ranks are summed for each sample.
--
--- The return value is (W&#8321;, W&#8322;) where W&#8321; is the sum of ranks of the first sample
--- and W&#8322; is the sum of ranks of the second sample. This test is trivially transformed
+-- The return value is (W₁, W₂) where W₁ is the sum of ranks of the first sample
+-- and W₂ is the sum of ranks of the second sample. This test is trivially transformed
-- into the Mann-Whitney U test. You will probably want to use 'mannWhitneyU'
-- and the related functions for testing significance, but this function is exposed
-- for completeness.
-wilcoxonRankSums :: Sample -> Sample -> (Double, Double)
+wilcoxonRankSums :: (Ord a, U.Unbox a) => U.Vector a -> U.Vector a -> (Double, Double)
wilcoxonRankSums xs1 xs2 = (sum ranks1, sum ranks2)
where
-- Ranks for each sample
@@ -61,7 +59,7 @@ wilcoxonRankSums xs1 xs2 = (sum ranks1, sum ranks2)
$ sortBy (comparing snd)
$ tagSample True xs1 U.++ tagSample False xs2
-- Add tag to a sample
- tagSample t = U.map ((,) t)
+ tagSample t = U.map (\x -> (t,x))
@@ -72,19 +70,19 @@ wilcoxonRankSums xs1 xs2 = (sum ranks1, sum ranks2)
-- the Wilcoxon's rank sum test (which is provided as 'wilcoxonRankSums').
-- The Mann-Whitney U is a simple transform of Wilcoxon's rank sum test.
--
--- Again confusingly, different sources state reversed definitions for U&#8321;
--- and U&#8322;, so it is worth being explicit about what this function returns.
--- Given two samples, the first, xs&#8321;, of size n&#8321; and the second, xs&#8322;,
--- of size n&#8322;, this function returns (U&#8321;, U&#8322;)
--- where U&#8321; = W&#8321; - (n&#8321;(n&#8321;+1))\/2
--- and U&#8322; = W&#8322; - (n&#8322;(n&#8322;+1))\/2,
--- where (W&#8321;, W&#8322;) is the return value of @wilcoxonRankSums xs1 xs2@.
+-- Again confusingly, different sources state reversed definitions for U₁
+-- and U₂, so it is worth being explicit about what this function returns.
+-- Given two samples, the first, xs₁, of size n₁ and the second, xs₂,
+-- of size n₂, this function returns (U₁, U₂)
+-- where U₁ = W₁ - (n₁(n₁+1))\/2
+-- and U₂ = W₂ - (n₂(n₂+1))\/2,
+-- where (W₁, W₂) is the return value of @wilcoxonRankSums xs1 xs2@.
--
--- Some sources instead state that U&#8321; and U&#8322; should be the other way round, often
--- expressing this using U&#8321;' = n&#8321;n&#8322; - U&#8321; (since U&#8321; + U&#8322; = n&#8321;n&#8322;).
+-- Some sources instead state that U₁ and U₂ should be the other way round, often
+-- expressing this using U₁' = n₁n₂ - U₁ (since U₁ + U₂ = n₁n₂).
--
-- All of which you probably don't care about if you just feed this into 'mannWhitneyUSignificant'.
-mannWhitneyU :: Sample -> Sample -> (Double, Double)
+mannWhitneyU :: (Ord a, U.Unbox a) => U.Vector a -> U.Vector a -> (Double, Double)
mannWhitneyU xs1 xs2
= (fst summedRanks - (n1*(n1 + 1))/2
,snd summedRanks - (n2*(n2 + 1))/2)
@@ -105,12 +103,12 @@ mannWhitneyU xs1 xs2
-- The algorithm to generate these values is a faster, memoised version of the
-- simple unoptimised generating function given in section 2 of \"The Mann Whitney
-- Wilcoxon Distribution Using Linked Lists\"
-mannWhitneyUCriticalValue :: (Int, Int) -- ^ The sample size
- -> Double -- ^ The p-value (e.g. 0.05) for which you want the critical value.
- -> Maybe Int -- ^ The critical value (of U).
+mannWhitneyUCriticalValue
+ :: (Int, Int) -- ^ The sample size
+ -> PValue Double -- ^ The p-value (e.g. 0.05) for which you want the critical value.
+ -> Maybe Int -- ^ The critical value (of U).
mannWhitneyUCriticalValue (m, n) p
| m < 1 || n < 1 = Nothing -- Sample must be nonempty
- | p >= 1 = Nothing -- Nonsensical p-value
| p' <= 1 = Nothing -- p-value is too small. Null hypothesys couln't be disproved
| otherwise = findIndex (>= p')
$ take (m*n)
@@ -118,7 +116,7 @@ mannWhitneyUCriticalValue (m, n) p
$ alookup !! (m+n-2) !! (min m n - 1)
where
mnCn = (m+n) `choose` n
- p' = mnCn * p
+ p' = mnCn * pValue p
{-
@@ -181,31 +179,34 @@ alookup = gen 2 [1 : repeat 2]
--
-- If you use a one-tailed test, the test indicates whether the first sample is
-- significantly larger than the second. If you want the opposite, simply reverse
--- the order in both the sample size and the (U&#8321;, U&#8322;) pairs.
-mannWhitneyUSignificant ::
- TestType -- ^ Perform one-tailed test (see description above).
- -> (Int, Int) -- ^ The samples' size from which the (U&#8321;,U&#8322;) values were derived.
- -> Double -- ^ The p-value at which to test (e.g. 0.05)
- -> (Double, Double) -- ^ The (U&#8321;, U&#8322;) values from 'mannWhitneyU'.
+-- the order in both the sample size and the (U₁, U₂) pairs.
+mannWhitneyUSignificant
+ :: PositionTest -- ^ Perform one-tailed test (see description above).
+ -> (Int, Int) -- ^ The samples' size from which the (U₁,U₂) values were derived.
+ -> PValue Double -- ^ The p-value at which to test (e.g. 0.05)
+ -> (Double, Double) -- ^ The (U₁, U₂) values from 'mannWhitneyU'.
-> Maybe TestResult -- ^ Return 'Nothing' if the sample was too
-- small to make a decision.
-mannWhitneyUSignificant test (in1, in2) p (u1, u2)
- --Use normal approximation
+mannWhitneyUSignificant test (in1, in2) pVal (u1, u2)
+ -- Use normal approximation
| in1 > 20 || in2 > 20 =
- let mean = n1 * n2 / 2
+ let mean = n1 * n2 / 2 -- (u1+u2) / 2
sigma = sqrt $ n1*n2*(n1 + n2 + 1) / 12
z = (mean - u1) / sigma
in Just $ case test of
- OneTailed -> significant $ z < quantile standard p
- TwoTailed -> significant $ abs z > abs (quantile standard (p/2))
+ AGreater -> significant $ z < quantile standard p
+ BGreater -> significant $ (-z) < quantile standard p
+ SamplesDiffer -> significant $ abs z > abs (quantile standard (p/2))
-- Use exact critical value
- | otherwise = do crit <- fromIntegral <$> mannWhitneyUCriticalValue (in1, in2) p
+ | otherwise = do crit <- fromIntegral <$> mannWhitneyUCriticalValue (in1, in2) pVal
return $ case test of
- OneTailed -> significant $ u2 <= crit
- TwoTailed -> significant $ min u1 u2 <= crit
+ AGreater -> significant $ u2 <= crit
+ BGreater -> significant $ u1 <= crit
+ SamplesDiffer -> significant $ min u1 u2 <= crit
where
n1 = fromIntegral in1
n2 = fromIntegral in2
+ p = pValue pVal
-- | Perform Mann-Whitney U Test for two samples and required
@@ -215,13 +216,14 @@ mannWhitneyUSignificant test (in1, in2) p (u1, u2)
--
-- One-tailed test checks whether first sample is significantly larger
-- than second. Two-tailed whether they are significantly different.
-mannWhitneyUtest :: TestType -- ^ Perform one-tailed test (see description above).
- -> Double -- ^ The p-value at which to test (e.g. 0.05)
- -> Sample -- ^ First sample
- -> Sample -- ^ Second sample
- -> Maybe TestResult
- -- ^ Return 'Nothing' if the sample was too small to
- -- make a decision.
+mannWhitneyUtest
+ :: (Ord a, U.Unbox a)
+ => PositionTest -- ^ Perform one-tailed test (see description above).
+ -> PValue Double -- ^ The p-value at which to test (e.g. 0.05)
+ -> U.Vector a -- ^ First sample
+ -> U.Vector a -- ^ Second sample
+ -> Maybe TestResult -- ^ Return 'Nothing' if the sample was too small to
+ -- make a decision.
mannWhitneyUtest ontTail p smp1 smp2 =
mannWhitneyUSignificant ontTail (n1,n2) p $ mannWhitneyU smp1 smp2
where
diff --git a/Statistics/Test/StudentT.hs b/Statistics/Test/StudentT.hs
new file mode 100644
index 0000000..f68a3cc
--- /dev/null
+++ b/Statistics/Test/StudentT.hs
@@ -0,0 +1,149 @@
+{-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
+-- | Student's T-test is for assesing whether two samples have
+-- different mean. This module contain several variations of
+-- T-test. It's a parametric tests and assumes that samples are
+-- normally distributed.
+module Statistics.Test.StudentT
+ (
+ studentTTest
+ , welchTTest
+ , pairedTTest
+ , module Statistics.Test.Types
+ ) where
+
+import Statistics.Distribution hiding (mean)
+import Statistics.Distribution.StudentT
+import Statistics.Sample (mean, varianceUnbiased)
+import Statistics.Test.Types
+import Statistics.Types (mkPValue,PValue)
+import Statistics.Function (square)
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector.Unboxed as U
+import qualified Data.Vector.Storable as S
+import qualified Data.Vector as V
+
+
+
+-- | Two-sample Student's t-test. It assumes that both samples are
+-- normally distributed and have same variance. Returns @Nothing@ if
+-- sample sizes are not sufficient.
+studentTTest :: (G.Vector v Double)
+ => PositionTest -- ^ one- or two-tailed test
+ -> v Double -- ^ Sample A
+ -> v Double -- ^ Sample B
+ -> Maybe (Test StudentT)
+studentTTest test sample1 sample2
+ | G.length sample1 < 2 || G.length sample2 < 2 = Nothing
+ | otherwise = Just Test
+ { testSignificance = significance test t ndf
+ , testStatistics = t
+ , testDistribution = studentT ndf
+ }
+ where
+ (t, ndf) = tStatistics True sample1 sample2
+{-# INLINABLE studentTTest #-}
+{-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
+{-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
+{-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}
+
+-- | Two-sample Welch's t-test. It assumes that both samples are
+-- normally distributed but doesn't assume that they have same
+-- variance. Returns @Nothing@ if sample sizes are not sufficient.
+welchTTest :: (G.Vector v Double)
+ => PositionTest -- ^ one- or two-tailed test
+ -> v Double -- ^ Sample A
+ -> v Double -- ^ Sample B
+ -> Maybe (Test StudentT)
+welchTTest test sample1 sample2
+ | G.length sample1 < 2 || G.length sample2 < 2 = Nothing
+ | otherwise = Just Test
+ { testSignificance = significance test t ndf
+ , testStatistics = t
+ , testDistribution = studentT ndf
+ }
+ where
+ (t, ndf) = tStatistics False sample1 sample2
+{-# INLINABLE welchTTest #-}
+{-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
+{-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
+{-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}
+
+-- | Paired two-sample t-test. Two samples are paired in a
+-- within-subject design. Returns @Nothing@ if sample size is not
+-- sufficient.
+pairedTTest :: forall v. (G.Vector v (Double, Double), G.Vector v Double)
+ => PositionTest -- ^ one- or two-tailed test
+ -> v (Double, Double) -- ^ paired samples
+ -> Maybe (Test StudentT)
+pairedTTest test sample
+ | G.length sample < 2 = Nothing
+ | otherwise = Just Test
+ { testSignificance = significance test t ndf
+ , testStatistics = t
+ , testDistribution = studentT ndf
+ }
+ where
+ (t, ndf) = tStatisticsPaired sample
+{-# INLINABLE pairedTTest #-}
+{-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-}
+{-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-}
+
+
+-------------------------------------------------------------------------------
+
+significance :: PositionTest -- ^ one- or two-tailed
+ -> Double -- ^ t statistics
+ -> Double -- ^ degree of freedom
+ -> PValue Double -- ^ p-value
+significance test t df =
+ case test of
+ -- Here we exploit symmetry of T-distribution and calculate small tail
+ SamplesDiffer -> mkPValue $ 2 * tailArea (negate (abs t))
+ AGreater -> mkPValue $ tailArea (negate t)
+ BGreater -> mkPValue $ tailArea t
+ where
+ tailArea = cumulative (studentT df)
+
+
+-- Calculate T statistics for two samples
+tStatistics :: (G.Vector v Double)
+ => Bool -- variance equality
+ -> v Double
+ -> v Double
+ -> (Double, Double)
+{-# INLINE tStatistics #-}
+tStatistics varequal sample1 sample2 = (t, ndf)
+ where
+ -- t-statistics
+ t = (m1 - m2) / sqrt (
+ if varequal
+ then ((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2)
+ else s1 / n1 + s2 / n2)
+
+ -- degree of freedom
+ ndf | varequal = n1 + n2 - 2
+ | otherwise = square (s1 / n1 + s2 / n2)
+ / (square s1 / (square n1 * (n1 - 1)) + square s2 / (square n2 * (n2 - 1)))
+ -- statistics of two samples
+ n1 = fromIntegral $ G.length sample1
+ n2 = fromIntegral $ G.length sample2
+ m1 = mean sample1
+ m2 = mean sample2
+ s1 = varianceUnbiased sample1
+ s2 = varianceUnbiased sample2
+
+
+-- Calculate T-statistics for paired sample
+tStatisticsPaired :: (G.Vector v (Double, Double), G.Vector v Double)
+ => v (Double, Double)
+ -> (Double, Double)
+{-# INLINE tStatisticsPaired #-}
+tStatisticsPaired sample = (t, ndf)
+ where
+ -- t-statistics
+ t = let d = G.map (uncurry (-)) sample
+ sumd = G.sum d
+ in sumd / sqrt ((n * G.sum (G.map square d) - square sumd) / ndf)
+ -- degree of freedom
+ ndf = n - 1
+ n = fromIntegral $ G.length sample
diff --git a/Statistics/Test/Types.hs b/Statistics/Test/Types.hs
index 52a5cd0..9dfa8eb 100644
--- a/Statistics/Test/Types.hs
+++ b/Statistics/Test/Types.hs
@@ -1,34 +1,76 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
+{-# LANGUAGE DeriveFunctor, DeriveDataTypeable,DeriveGeneric #-}
module Statistics.Test.Types (
- TestType(..)
+ Test(..)
+ , isSignificant
, TestResult(..)
, significant
+ , PositionTest(..)
) where
-import Data.Aeson (FromJSON, ToJSON)
+import Control.DeepSeq (NFData(..))
+import Data.Aeson (FromJSON, ToJSON)
+import Data.Binary (Binary)
import Data.Data (Typeable, Data)
import GHC.Generics
+import Statistics.Types (PValue)
--- | Test type. Exact meaning depends on a specific test. But
--- generally it's tested whether some statistics is too big (small)
--- for 'OneTailed' or whether it too big or too small for 'TwoTailed'
-data TestType = OneTailed
- | TwoTailed
- deriving (Eq,Ord,Show,Typeable,Data,Generic)
-
-instance FromJSON TestType
-instance ToJSON TestType
-- | Result of hypothesis testing
data TestResult = Significant -- ^ Null hypothesis should be rejected
| NotSignificant -- ^ Data is compatible with hypothesis
deriving (Eq,Ord,Show,Typeable,Data,Generic)
+instance Binary TestResult
instance FromJSON TestResult
-instance ToJSON TestResult
+instance ToJSON TestResult
+instance NFData TestResult
+
+
+
+-- | Result of statistical test.
+data Test distr = Test
+ { testSignificance :: !(PValue Double)
+ -- ^ Probability of getting value of test statistics at least as
+ -- extreme as measured.
+ , testStatistics :: !Double
+ -- ^ Statistic used for test.
+ , testDistribution :: distr
+ -- ^ Distribution of test statistics if null hypothesis is correct.
+ }
+ deriving (Eq,Ord,Show,Typeable,Data,Generic,Functor)
+
+instance (Binary d) => Binary (Test d)
+instance (FromJSON d) => FromJSON (Test d)
+instance (ToJSON d) => ToJSON (Test d)
+instance (NFData d) => NFData (Test d) where
+ rnf (Test _ _ a) = rnf a
+
+-- | Check whether test is significant for given p-value.
+isSignificant :: PValue Double -> Test d -> TestResult
+isSignificant p t
+ = significant $ p >= testSignificance t
+
+
+-- | Test type for test which compare positional (mean,median etc.)
+-- information of samples.
+data PositionTest
+ = SamplesDiffer
+ -- ^ Test whether samples differ in position. Null hypothesis is
+ -- samples are not different
+ | AGreater
+ -- ^ Test if first sample (A) is larger than second (B). Null
+ -- hypothesis is first sample is not larger than second.
+ | BGreater
+ -- ^ Test if second sample is larger than first.
+ deriving (Eq,Ord,Show,Typeable,Data,Generic)
+
+instance Binary PositionTest
+instance FromJSON PositionTest
+instance ToJSON PositionTest
+instance NFData PositionTest
--- | Significant if parameter is 'True', not significant otherwiser
+-- | significant if parameter is 'True', not significant otherwiser
significant :: Bool -> TestResult
significant True = Significant
significant False = NotSignificant
diff --git a/Statistics/Test/WilcoxonT.hs b/Statistics/Test/WilcoxonT.hs
index 11fa382..aed5fb0 100644
--- a/Statistics/Test/WilcoxonT.hs
+++ b/Statistics/Test/WilcoxonT.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ViewPatterns #-}
-- |
-- Module : Statistics.Test.WilcoxonT
-- Copyright : (c) 2010 Neil Brown
@@ -8,22 +9,20 @@
-- Portability : portable
--
-- The Wilcoxon matched-pairs signed-rank test is non-parametric test
--- which could be used to whether two related samples have different
--- means.
---
--- WARNING: current implementation contain serious bug and couldn't be
--- used with samples larger than 1023.
--- <https://github.com/bos/statistics/issues/18>
+-- which could be used to test whether two related samples have
+-- different means.
module Statistics.Test.WilcoxonT (
-- * Wilcoxon signed-rank matched-pair test
+ -- ** Test
wilcoxonMatchedPairTest
+ -- ** Building blocks
, wilcoxonMatchedPairSignedRank
, wilcoxonMatchedPairSignificant
, wilcoxonMatchedPairSignificance
, wilcoxonMatchedPairCriticalValue
- -- * Data types
- , TestType(..)
- , TestResult(..)
+ , module Statistics.Test.Types
+ -- * References
+ -- $references
) where
@@ -44,24 +43,36 @@ import Control.Applicative ((<$>))
import Data.Function (on)
import Data.List (findIndex)
import Data.Ord (comparing)
+import qualified Data.Vector.Unboxed as U
import Prelude hiding (sum)
import Statistics.Function (sortBy)
import Statistics.Sample.Internal (sum)
import Statistics.Test.Internal (rank, splitByTags)
-import Statistics.Test.Types (TestResult(..), TestType(..), significant)
-import Statistics.Types (Sample)
-import qualified Data.Vector.Unboxed as U
+import Statistics.Test.Types
+import Statistics.Types -- (CL,pValue,getPValue)
+import Statistics.Distribution
+import Statistics.Distribution.Normal
-wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double)
-wilcoxonMatchedPairSignedRank a b = (sum ranks1, negate (sum ranks2))
+
+-- | Calculate (n,T⁺,T⁻) values for both samples. Where /n/ is reduced
+-- sample where equal pairs are removed.
+wilcoxonMatchedPairSignedRank :: (Ord a, Num a, U.Unbox a) => U.Vector (a,a) -> (Int, Double, Double)
+wilcoxonMatchedPairSignedRank ab
+ = (nRed, sum ranks1, negate (sum ranks2))
where
+ -- Positive and negative ranks
(ranks1, ranks2) = splitByTags
$ U.zip tags (rank ((==) `on` abs) diffs)
+ -- Sorted list of differences
+ diffsSorted = sortBy (comparing abs) -- Sort the differences by absolute difference
+ $ U.filter (/= 0) -- Remove equal elements
+ $ U.map (uncurry (-)) ab -- Work out differences
+ nRed = U.length diffsSorted
+ -- Sign tags and differences
(tags,diffs) = U.unzip
- $ U.map (\x -> (x>0 , x)) -- Attack tags to distribution elements
- $ U.filter (/= 0.0) -- Remove equal elements
- $ sortBy (comparing abs) -- Sort the differences by absolute difference
- $ U.zipWith (-) a b -- Work out differences
+ $ U.map (\x -> (x>0 , x)) -- Attach tags to distribution elements
+ $ diffsSorted
+
-- | The coefficients for x^0, x^1, x^2, etc, in the expression
@@ -92,6 +103,8 @@ summedCoefficients n
| n > 1023 = error "Statistics.Test.WilcoxonT.summedCoefficients: sample is too large (see bug #18)"
| otherwise = map fromIntegral $ scanl1 (+) $ coefficients n
+
+
-- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs test
-- is significant at the given level.
--
@@ -105,23 +118,32 @@ summedCoefficients n
-- in the opposite direction, you can either pass the parameters in a different
-- order to 'wilcoxonMatchedPairSignedRank', or simply swap the values in the resulting
-- pair before passing them to this function.
-wilcoxonMatchedPairSignificant ::
- TestType -- ^ Perform one- or two-tailed test (see description below).
- -> Int -- ^ The sample size from which the (T+,T-) values were derived.
- -> Double -- ^ The p-value at which to test (e.g. 0.05)
- -> (Double, Double) -- ^ The (T+, T-) values from 'wilcoxonMatchedPairSignedRank'.
- -> Maybe TestResult -- ^ Return 'Nothing' if the sample was too
- -- small to make a decision.
-wilcoxonMatchedPairSignificant test sampleSize p (tPlus, tMinus) =
+wilcoxonMatchedPairSignificant
+ :: PositionTest -- ^ How to compare two samples
+ -> PValue Double -- ^ The p-value at which to test (e.g. @mkPValue 0.05@)
+ -> (Int, Double, Double) -- ^ The (n,T⁺, T⁻) values from 'wilcoxonMatchedPairSignedRank'.
+ -> Maybe TestResult -- ^ Return 'Nothing' if the sample was too
+ -- small to make a decision.
+wilcoxonMatchedPairSignificant test pVal (sampleSize, tPlus, tMinus) =
case test of
-- According to my nearest book (Understanding Research Methods and Statistics
-- by Gary W. Heiman, p590), to check that the first sample is bigger you must
-- use the absolute value of T- for a one-tailed check:
- OneTailed -> (significant . (abs tMinus <=) . fromIntegral) <$> wilcoxonMatchedPairCriticalValue sampleSize p
+ AGreater -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize pVal
+ return $ significant $ abs tMinus <= fromIntegral crit
+ BGreater -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize pVal
+ return $ significant $ abs tPlus <= fromIntegral crit
-- Otherwise you must use the value of T+ and T- with the smallest absolute value:
- TwoTailed -> (significant . (t <=) . fromIntegral) <$> wilcoxonMatchedPairCriticalValue sampleSize (p/2)
+ --
+ -- Note that in absence of ties sum of |T+| and |T-| is constant
+ -- so by selecting minimal we are performing two-tailed test and
+ -- look and both tails of distribution of T.
+ SamplesDiffer -> do crit <- wilcoxonMatchedPairCriticalValue sampleSize (mkPValue $ p/2)
+ return $ significant $ t <= fromIntegral crit
where
t = min (abs tPlus) (abs tMinus)
+ p = pValue pVal
+
-- | Obtains the critical value of T to compare against, given a sample size
-- and a p-value (significance level). Your T value must be less than or
@@ -134,35 +156,54 @@ wilcoxonMatchedPairSignificant test sampleSize p (tPlus, tMinus) =
-- However, this function is useful, for example, for generating lookup tables
-- for Wilcoxon signed rank critical values.
--
--- The return values of this function are generated using the method detailed in
--- the paper \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
--- Mitic, The Mathematica Journal, volume 6, issue 3, 1996, which can be found
--- here: <http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>.
--- According to that paper, the results may differ from other published lookup tables, but
--- (Mitic claims) the values obtained by this function will be the correct ones.
+-- The return values of this function are generated using the method
+-- detailed in the Mitic's paper. According to that paper, the results
+-- may differ from other published lookup tables, but (Mitic claims)
+-- the values obtained by this function will be the correct ones.
wilcoxonMatchedPairCriticalValue ::
Int -- ^ The sample size
- -> Double -- ^ The p-value (e.g. 0.05) for which you want the critical value.
+ -> PValue Double -- ^ The p-value (e.g. @mkPValue 0.05@) for which you want the critical value.
-> Maybe Int -- ^ The critical value (of T), or Nothing if
-- the sample is too small to make a decision.
-wilcoxonMatchedPairCriticalValue sampleSize p
- = case critical of
- Just n | n < 0 -> Nothing
- | otherwise -> Just n
- Nothing -> Just maxBound -- shouldn't happen: beyond end of list
+wilcoxonMatchedPairCriticalValue n pVal
+ | n < 100 =
+ case subtract 1 <$> findIndex (> m) (summedCoefficients n) of
+ Just k | k < 0 -> Nothing
+ | otherwise -> Just k
+ Nothing -> error "Statistics.Test.WilcoxonT.wilcoxonMatchedPairCriticalValue: impossible happened"
+ | otherwise =
+ case quantile (normalApprox n) p of
+ z | z < 0 -> Nothing
+ | otherwise -> Just (round z)
where
- m = (2 ** fromIntegral sampleSize) * p
- critical = subtract 1 <$> findIndex (> m) (summedCoefficients sampleSize)
+ p = pValue pVal
+ m = (2 ** fromIntegral n) * p
+
-- | Works out the significance level (p-value) of a T value, given a sample
-- size and a T value from the Wilcoxon signed-rank matched-pairs test.
--
-- See the notes on 'wilcoxonCriticalValue' for how this is calculated.
-wilcoxonMatchedPairSignificance :: Int -- ^ The sample size
- -> Double -- ^ The value of T for which you want the significance.
- -> Double -- ^ The significance (p-value).
-wilcoxonMatchedPairSignificance sampleSize rnk
- = (summedCoefficients sampleSize !! floor rnk) / 2 ** fromIntegral sampleSize
+wilcoxonMatchedPairSignificance
+ :: Int -- ^ The sample size
+ -> Double -- ^ The value of T for which you want the significance.
+ -> PValue Double -- ^ The significance (p-value).
+wilcoxonMatchedPairSignificance n t
+ = mkPValue p
+ where
+ p | n < 100 = (summedCoefficients n !! floor t) / 2 ** fromIntegral n
+ | otherwise = cumulative (normalApprox n) t
+
+
+-- | Normal approximation for Wilcoxon T statistics
+normalApprox :: Int -> NormalDistribution
+normalApprox ni
+ = normalDistr m s
+ where
+ m = n * (n + 1) / 4
+ s = sqrt $ (n * (n + 1) * (2*n + 1)) / 24
+ n = fromIntegral ni
+
-- | The Wilcoxon matched-pairs signed-rank test. The samples are
-- zipped together: if one is longer than the other, both are
@@ -174,16 +215,32 @@ wilcoxonMatchedPairSignificance sampleSize rnk
--
-- Check 'wilcoxonMatchedPairSignedRank' and
-- 'wilcoxonMatchedPairSignificant' for additional information.
-wilcoxonMatchedPairTest :: TestType -- ^ Perform one-tailed test.
- -> Double -- ^ The p-value at which to test (e.g. 0.05)
- -> Sample -- ^ First sample
- -> Sample -- ^ Second sample
- -> Maybe TestResult
- -- ^ Return 'Nothing' if the sample was too
- -- small to make a decision.
-wilcoxonMatchedPairTest test p smp1 smp2 =
- wilcoxonMatchedPairSignificant test (min n1 n2) p
- $ wilcoxonMatchedPairSignedRank smp1 smp2
+wilcoxonMatchedPairTest
+ :: (Ord a, Num a, U.Unbox a)
+ => PositionTest -- ^ Perform one-tailed test.
+ -> U.Vector (a,a) -- ^ Sample of pairs
+ -> Test () -- ^ Return 'Nothing' if the sample was too
+ -- small to make a decision.
+wilcoxonMatchedPairTest test pairs =
+ Test { testSignificance = pVal
+ , testStatistics = t
+ , testDistribution = ()
+ }
where
- n1 = U.length smp1
- n2 = U.length smp2
+ (n,tPlus,tMinus) = wilcoxonMatchedPairSignedRank pairs
+ (t,pVal) = case test of
+ AGreater -> (abs tMinus, wilcoxonMatchedPairSignificance n (abs tMinus))
+ BGreater -> (abs tPlus, wilcoxonMatchedPairSignificance n (abs tPlus ))
+ -- Since we take minimum of T+,T- we can't get more
+ -- that p=0.5 and can multiply it by 2 without risk
+ -- of error.
+ SamplesDiffer -> let t' = min (abs tMinus) (abs tPlus)
+ p = wilcoxonMatchedPairSignificance n t'
+ in (t', mkPValue $ min 1 $ 2 * pValue p)
+
+
+-- $references
+--
+-- * \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
+-- Mitic, The Mathematica Journal, volume 6, issue 3, 1996
+-- (<http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>)
diff --git a/Statistics/Types.hs b/Statistics/Types.hs
index b7b87e3..bd7d3ca 100644
--- a/Statistics/Types.hs
+++ b/Statistics/Types.hs
@@ -1,3 +1,9 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Types
-- Copyright : (c) 2009 Bryan O'Sullivan
@@ -7,34 +13,497 @@
-- Stability : experimental
-- Portability : portable
--
--- Types for working with statistics.
-
+-- Data types common used in statistics
module Statistics.Types
- (
- Estimator(..)
+ ( -- * Confidence level
+ CL
+ -- ** Accessors
+ , confidenceLevel
+ , significanceLevel
+ -- ** Constructors
+ , mkCL
+ , mkCLE
+ , mkCLFromSignificance
+ , mkCLFromSignificanceE
+ -- ** Constants and conversion to nσ
+ , cl90
+ , cl95
+ , cl99
+ -- *** Normal approximation
+ , nSigma
+ , nSigma1
+ , getNSigma
+ , getNSigma1
+ -- * p-value
+ , PValue
+ -- ** Accessors
+ , pValue
+ -- ** Constructors
+ , mkPValue
+ , mkPValueE
+ -- * Estimates and upper/lower limits
+ , Estimate(..)
+ , NormalErr(..)
+ , ConfInt(..)
+ , UpperLimit(..)
+ , LowerLimit(..)
+ -- ** Constructors
+ , estimateNormErr
+ , (±)
+ , estimateFromInterval
+ , estimateFromErr
+ -- ** Accessors
+ , confidenceInterval
+ , asymErrors
+ , Scale(..)
+ -- * Other
, Sample
, WeightedSample
, Weights
) where
-import qualified Data.Vector.Unboxed as U (Vector)
+import Control.Monad ((<=<))
+import Control.DeepSeq (NFData(..))
+import Data.Aeson (FromJSON(..), ToJSON)
+import Data.Binary (Binary(..))
+import Data.Data (Data,Typeable)
+import Data.Maybe (fromMaybe)
+import Data.Vector.Unboxed (Unbox)
+import Data.Vector.Unboxed.Deriving (derivingUnbox)
+import GHC.Generics (Generic)
+
+import Statistics.Internal
+import Statistics.Types.Internal
+import Statistics.Distribution
+import Statistics.Distribution.Normal
+
+
+----------------------------------------------------------------
+-- Data type for confidence level
+----------------------------------------------------------------
+
+-- |
+-- Confidence level. In context of confidence intervals it's
+-- probability of said interval covering true value of measured
+-- value. In context of statistical tests it's @1-α@ where α is
+-- significance of test.
+--
+-- Since confidence level are usually close to 1 they are stored as
+-- @1-CL@ internally. There are two smart constructors for @CL@:
+-- 'mkCL' and 'mkCLFromSignificance' (and corresponding variant
+-- returning @Maybe@). First creates @CL@ from confidence level and
+-- second from @1 - CL@ or significance level.
+--
+-- >>> cl95
+-- mkCLFromSignificance 0.05
+--
+-- Prior to 0.14 confidence levels were passed to function as plain
+-- @Doubles@. Use 'mkCL' to convert them to @CL@.
+newtype CL a = CL a
+ deriving (Eq, Typeable, Data, Generic)
+
+instance Show a => Show (CL a) where
+ showsPrec n (CL p) = defaultShow1 "mkCLFromSignificance" p n
+instance (Num a, Ord a, Read a) => Read (CL a) where
+ readPrec = defaultReadPrecM1 "mkCLFromSignificance" mkCLFromSignificanceE
+
+instance (Binary a, Num a, Ord a) => Binary (CL a) where
+ put (CL p) = put p
+ get = maybe (fail errMkCL) return . mkCLFromSignificanceE =<< get
+
+instance (ToJSON a) => ToJSON (CL a)
+instance (FromJSON a, Num a, Ord a) => FromJSON (CL a) where
+ parseJSON = maybe (fail errMkCL) return . mkCLFromSignificanceE <=< parseJSON
+
+instance NFData a => NFData (CL a) where
+ rnf (CL a) = rnf a
+
+-- |
+-- >>> cl95 > cl90
+-- True
+instance Ord a => Ord (CL a) where
+ CL a < CL b = a > b
+ CL a <= CL b = a >= b
+ CL a > CL b = a < b
+ CL a >= CL b = a <= b
+ max (CL a) (CL b) = CL (min a b)
+ min (CL a) (CL b) = CL (max a b)
+
+
+-- | Create confidence level from probability β or probability
+-- confidence interval contain true value of estimate. Will throw
+-- exception if parameter is out of [0,1] range
+--
+-- >>> mkCL 0.95 -- same as cl95
+-- mkCLFromSignificance 0.05
+mkCL :: (Ord a, Num a) => a -> CL a
+mkCL
+ = fromMaybe (error "Statistics.Types.mkCL: probability is out if [0,1] range")
+ . mkCLE
+
+-- | Same as 'mkCL' but returns @Nothing@ instead of error if
+-- parameter is out of [0,1] range
+--
+-- >>> mkCLE 0.95 -- same as cl95
+-- Just (mkCLFromSignificance 0.05)
+mkCLE :: (Ord a, Num a) => a -> Maybe (CL a)
+mkCLE p
+ | p >= 0 && p <= 1 = Just $ CL (1 - p)
+ | otherwise = Nothing
+
+-- | Create confidence level from probability α or probability that
+-- confidence interval does not contain true value of estimate. Will
+-- throw exception if parameter is out of [0,1] range
+--
+-- >>> mkCLFromSignificance 0.05 -- same as cl95
+-- mkCLFromSignificance 0.05
+mkCLFromSignificance :: (Ord a, Num a) => a -> CL a
+mkCLFromSignificance = fromMaybe (error errMkCL) . mkCLFromSignificanceE
+
+-- | Same as 'mkCLFromSignificance' but returns @Nothing@ instead of error if
+-- parameter is out of [0,1] range
+--
+-- >>> mkCLFromSignificanceE 0.05 -- same as cl95
+-- Just (mkCLFromSignificance 0.05)
+mkCLFromSignificanceE :: (Ord a, Num a) => a -> Maybe (CL a)
+mkCLFromSignificanceE p
+ | p >= 0 && p <= 1 = Just $ CL p
+ | otherwise = Nothing
+
+errMkCL :: String
+errMkCL = "Statistics.Types.mkPValCL: probability is out if [0,1] range"
+
+
+-- | Get confidence level. This function is subject to rounding
+-- errors. If @1 - CL@ is needed use 'significanceLevel' instead
+confidenceLevel :: (Num a) => CL a -> a
+confidenceLevel (CL p) = 1 - p
--- | Sample data.
-type Sample = U.Vector Double
+-- | Get significance level.
+significanceLevel :: CL a -> a
+significanceLevel (CL p) = p
--- | Sample with weights. First element of sample is data, second is weight
-type WeightedSample = U.Vector (Double,Double)
--- | An estimator of a property of a sample, such as its 'mean'.
+
+-- | 90% confidence level
+cl90 :: Fractional a => CL a
+cl90 = CL 0.10
+
+-- | 95% confidence level
+cl95 :: Fractional a => CL a
+cl95 = CL 0.05
+
+-- | 99% confidence level
+cl99 :: Fractional a => CL a
+cl99 = CL 0.01
+
+
+
+----------------------------------------------------------------
+-- Data type for p-value
+----------------------------------------------------------------
+
+-- | Newtype wrapper for p-value.
+newtype PValue a = PValue a
+ deriving (Eq,Ord, Typeable, Data, Generic)
+
+instance Show a => Show (PValue a) where
+ showsPrec n (PValue p) = defaultShow1 "mkPValue" p n
+instance (Num a, Ord a, Read a) => Read (PValue a) where
+ readPrec = defaultReadPrecM1 "mkPValue" mkPValueE
+
+instance (Binary a, Num a, Ord a) => Binary (PValue a) where
+ put (PValue p) = put p
+ get = maybe (fail errMkPValue) return . mkPValueE =<< get
+
+instance (ToJSON a) => ToJSON (PValue a)
+instance (FromJSON a, Num a, Ord a) => FromJSON (PValue a) where
+ parseJSON = maybe (fail errMkPValue) return . mkPValueE <=< parseJSON
+
+instance NFData a => NFData (PValue a) where
+ rnf (PValue a) = rnf a
+
+
+-- | Construct PValue. Throws error if argument is out of [0,1] range.
--
--- The use of an algebraic data type here allows functions such as
--- 'jackknife' and 'bootstrapBCA' to use more efficient algorithms
--- when possible.
-data Estimator = Mean
- | Variance
- | VarianceUnbiased
- | StdDev
- | Function (Sample -> Double)
+mkPValue :: (Ord a, Num a) => a -> PValue a
+mkPValue = fromMaybe (error errMkPValue) . mkPValueE
+
+-- | Construct PValue. Returns @Nothing@ if argument is out of [0,1] range.
+mkPValueE :: (Ord a, Num a) => a -> Maybe (PValue a)
+mkPValueE p
+ | p >= 0 && p <= 1 = Just $ PValue p
+ | otherwise = Nothing
+
+-- | Get p-value
+pValue :: PValue a -> a
+pValue (PValue p) = p
+
+
+-- | P-value expressed in sigma. This is convention widely used in
+-- experimental physics. N sigma confidence level corresponds to
+-- probability within N sigma of normal distribution.
+--
+-- Note that this correspondence is for normal distribution. Other
+-- distribution will have different dependency. Also experimental
+-- distribution usually only approximately normal (especially at
+-- extreme tails).
+nSigma :: Double -> PValue Double
+nSigma n
+ | n > 0 = PValue $ 2 * cumulative standard (-n)
+ | otherwise = error "Statistics.Extra.Error.nSigma: non-positive number of sigma"
+
+-- | P-value expressed in sigma for one-tail hypothesis. This correspond to
+-- probability of obtaining value less than @N·σ@.
+nSigma1 :: Double -> PValue Double
+nSigma1 n
+ | n > 0 = PValue $ cumulative standard (-n)
+ | otherwise = error "Statistics.Extra.Error.nSigma1: non-positive number of sigma"
+
+-- | Express confidence level in sigmas
+getNSigma :: PValue Double -> Double
+getNSigma (PValue p) = negate $ quantile standard (p / 2)
+
+-- | Express confidence level in sigmas for one-tailed hypothesis.
+getNSigma1 :: PValue Double -> Double
+getNSigma1 (PValue p) = negate $ quantile standard p
+
+
+
+errMkPValue :: String
+errMkPValue = "Statistics.Types.mkPValue: probability is out if [0,1] range"
+
+
+
+----------------------------------------------------------------
+-- Point estimates
+----------------------------------------------------------------
+
+-- |
+-- A point estimate and its confidence interval. It's parametrized by
+-- both error type @e@ and value type @a@. This module provides two
+-- types of error: 'NormalErr' for normally distributed errors and
+-- 'ConfInt' for error with normal distribution. See their
+-- documentation for more details.
+--
+-- For example @144 ± 5@ (assuming normality) could be expressed as
+--
+-- > Estimate { estPoint = 144
+-- > , estError = NormalErr 5
+-- > }
+--
+-- Or if we want to express @144 + 6 - 4@ at CL95 we could write:
+--
+-- > Estimate { estPoint = 144
+-- > , estError = ConfInt
+-- > { confIntLDX = 4
+-- > , confIntUDX = 6
+-- > , confIntCL = cl95
+-- > }
+--
+-- Prior to statistics 0.14 @Estimate@ data type used following definition:
+--
+-- > data Estimate = Estimate {
+-- > estPoint :: {-# UNPACK #-} !Double
+-- > , estLowerBound :: {-# UNPACK #-} !Double
+-- > , estUpperBound :: {-# UNPACK #-} !Double
+-- > , estConfidenceLevel :: {-# UNPACK #-} !Double
+-- > }
+--
+-- Now type @Estimate ConfInt Double@ should be used instead. Function
+-- 'estimateFromInterval' allow to easily construct estimate from same inputs.
+data Estimate e a = Estimate
+ { estPoint :: !a
+ -- ^ Point estimate.
+ , estError :: !(e a)
+ -- ^ Confidence interval for estimate.
+ } deriving (Eq, Read, Show, Typeable, Data, Generic)
+
+instance (Binary (e a), Binary a) => Binary (Estimate e a)
+instance (FromJSON (e a), FromJSON a) => FromJSON (Estimate e a)
+instance (ToJSON (e a), ToJSON a) => ToJSON (Estimate e a)
+instance (NFData (e a), NFData a) => NFData (Estimate e a) where
+ rnf (Estimate x dx) = rnf x `seq` rnf dx
+
+
+
+-- |
+-- Normal errors. They are stored as 1σ errors which corresponds to
+-- 68.8% CL. Since we can recalculate them to any confidence level if
+-- needed we don't store it.
+newtype NormalErr a = NormalErr
+ { normalError :: a
+ }
+ deriving (Eq, Read, Show, Typeable, Data, Generic)
+
+instance Binary a => Binary (NormalErr a)
+instance FromJSON a => FromJSON (NormalErr a)
+instance ToJSON a => ToJSON (NormalErr a)
+instance NFData a => NFData (NormalErr a) where
+ rnf (NormalErr x) = rnf x
+
+
+-- | Confidence interval. It assumes that confidence interval forms
+-- single interval and isn't set of disjoint intervals.
+data ConfInt a = ConfInt
+ { confIntLDX :: !a
+ -- ^ Lower error estimate, or distance between point estimate and
+ -- lower bound of confidence interval.
+ , confIntUDX :: !a
+ -- ^ Upper error estimate, or distance between point estimate and
+ -- upper bound of confidence interval.
+ , confIntCL :: !(CL Double)
+ -- ^ Confidence level corresponding to given confidence interval.
+ }
+ deriving (Read,Show,Eq,Typeable,Data,Generic)
+
+instance Binary a => Binary (ConfInt a)
+instance FromJSON a => FromJSON (ConfInt a)
+instance ToJSON a => ToJSON (ConfInt a)
+instance NFData a => NFData (ConfInt a) where
+ rnf (ConfInt x y _) = rnf x `seq` rnf y
+
+
+
+----------------------------------------
+-- Constructors
+
+-- | Create estimate with normal errors
+estimateNormErr :: a -- ^ Point estimate
+ -> a -- ^ 1σ error
+ -> Estimate NormalErr a
+estimateNormErr x dx = Estimate x (NormalErr dx)
+
+-- | Synonym for 'estimateNormErr'
+(±) :: a -- ^ Point estimate
+ -> a -- ^ 1σ error
+ -> Estimate NormalErr a
+(±) = estimateNormErr
+
+-- | Create estimate with asymmetric error.
+estimateFromErr
+ :: a -- ^ Central estimate
+ -> (a,a) -- ^ Lower and upper errors. Both should be
+ -- positive but it's not checked.
+ -> CL Double -- ^ Confidence level for interval
+ -> Estimate ConfInt a
+estimateFromErr x (ldx,udx) cl = Estimate x (ConfInt ldx udx cl)
+
+-- | Create estimate with asymmetric error.
+estimateFromInterval
+ :: Num a
+ => a -- ^ Point estimate. Should lie within
+ -- interval but it's not checked.
+ -> (a,a) -- ^ Lower and upper bounds of interval
+ -> CL Double -- ^ Confidence level for interval
+ -> Estimate ConfInt a
+estimateFromInterval x (lx,ux) cl
+ = Estimate x (ConfInt (x-lx) (ux-x) cl)
+
+
+----------------------------------------
+-- Accessors
+
+-- | Get confidence interval
+confidenceInterval :: Num a => Estimate ConfInt a -> (a,a)
+confidenceInterval (Estimate x (ConfInt ldx udx _))
+ = (x - ldx, x + udx)
+
+-- | Get asymmetric errors
+asymErrors :: Estimate ConfInt a -> (a,a)
+asymErrors (Estimate _ (ConfInt ldx udx _)) = (ldx,udx)
+
+
+
+-- | Data types which could be multiplied by constant.
+class Scale e where
+ scale :: (Ord a, Num a) => a -> e a -> e a
+
+instance Scale NormalErr where
+ scale a (NormalErr e) = NormalErr (abs a * e)
+
+instance Scale ConfInt where
+ scale a (ConfInt l u cl) | a >= 0 = ConfInt (a*l) (a*u) cl
+ | otherwise = ConfInt (-a*u) (-a*l) cl
+
+instance Scale e => Scale (Estimate e) where
+ scale a (Estimate x dx) = Estimate (a*x) (scale a dx)
+
+
+
+----------------------------------------------------------------
+-- Upper/lower limit
+----------------------------------------------------------------
+
+-- | Upper limit. They are usually given for small non-negative values
+-- when it's not possible detect difference from zero.
+data UpperLimit a = UpperLimit
+ { upperLimit :: !a
+ -- ^ Upper limit
+ , ulConfidenceLevel :: !(CL Double)
+ -- ^ Confidence level for which limit was calculated
+ } deriving (Eq, Read, Show, Typeable, Data, Generic)
+
+
+instance Binary a => Binary (UpperLimit a)
+instance FromJSON a => FromJSON (UpperLimit a)
+instance ToJSON a => ToJSON (UpperLimit a)
+instance NFData a => NFData (UpperLimit a) where
+ rnf (UpperLimit x cl) = rnf x `seq` rnf cl
+
+
+
+-- | Lower limit. They are usually given for large quantities when
+-- it's not possible to measure them. For example: proton half-life
+data LowerLimit a = LowerLimit {
+ lowerLimit :: !a
+ -- ^ Lower limit
+ , llConfidenceLevel :: !(CL Double)
+ -- ^ Confidence level for which limit was calculated
+ } deriving (Eq, Read, Show, Typeable, Data, Generic)
+
+instance Binary a => Binary (LowerLimit a)
+instance FromJSON a => FromJSON (LowerLimit a)
+instance ToJSON a => ToJSON (LowerLimit a)
+instance NFData a => NFData (LowerLimit a) where
+ rnf (LowerLimit x cl) = rnf x `seq` rnf cl
+
+
+----------------------------------------------------------------
+-- Deriving unbox instances
+----------------------------------------------------------------
+
+derivingUnbox "CL"
+ [t| forall a. Unbox a => CL a -> a |]
+ [| \(CL a) -> a |]
+ [| CL |]
+
+derivingUnbox "PValue"
+ [t| forall a. Unbox a => PValue a -> a |]
+ [| \(PValue a) -> a |]
+ [| PValue |]
+
+derivingUnbox "Estimate"
+ [t| forall a e. (Unbox a, Unbox (e a)) => Estimate e a -> (a, e a) |]
+ [| \(Estimate x dx) -> (x,dx) |]
+ [| \(x,dx) -> (Estimate x dx) |]
+
+derivingUnbox "NormalErr"
+ [t| forall a. Unbox a => NormalErr a -> a |]
+ [| \(NormalErr a) -> a |]
+ [| NormalErr |]
+
+derivingUnbox "ConfInt"
+ [t| forall a. Unbox a => ConfInt a -> (a, a, CL Double) |]
+ [| \(ConfInt a b c) -> (a,b,c) |]
+ [| \(a,b,c) -> ConfInt a b c |]
+
+derivingUnbox "UpperLimit"
+ [t| forall a. Unbox a => UpperLimit a -> (a, CL Double) |]
+ [| \(UpperLimit a b) -> (a,b) |]
+ [| \(a,b) -> UpperLimit a b |]
--- | Weights for affecting the importance of elements of a sample.
-type Weights = U.Vector Double
+derivingUnbox "LowerLimit"
+ [t| forall a. Unbox a => LowerLimit a -> (a, CL Double) |]
+ [| \(LowerLimit a b) -> (a,b) |]
+ [| \(a,b) -> LowerLimit a b |]
diff --git a/Statistics/Types/Internal.hs b/Statistics/Types/Internal.hs
new file mode 100644
index 0000000..1d66643
--- /dev/null
+++ b/Statistics/Types/Internal.hs
@@ -0,0 +1,24 @@
+-- |
+-- Module : Statistics.Types.Internal
+-- Copyright : (c) 2009 Bryan O'Sullivan
+-- License : BSD3
+--
+-- Maintainer : bos@serpentine.com
+-- Stability : experimental
+-- Portability : portable
+--
+-- Types for working with statistics.
+module Statistics.Types.Internal where
+
+
+import qualified Data.Vector.Unboxed as U (Vector)
+
+-- | Sample data.
+type Sample = U.Vector Double
+
+-- | Sample with weights. First element of sample is data, second is weight
+type WeightedSample = U.Vector (Double,Double)
+
+-- | Weights for affecting the importance of elements of a sample.
+type Weights = U.Vector Double
+
diff --git a/changelog.md b/changelog.md
index c69138c..082c69b 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,8 +1,106 @@
-Changes in 0.13.0.0
+## Changes in 0.14.0.0
+
+Breaking update. It seriously changes parts of API. It adds new data types for
+dealing with with estimates, confidence intervals, confidence levels and
+p-value. Also API for statistical tests is changed.
+
+ * Module `Statistis.Types` now contains new data types for estimates,
+ upper/lower bounds, confidence level, and p-value.
+
+ - `CL` for representing confidence level
+ - `PValue` for representing p-values
+ - `Estimate` data type moved here from `Statistis.Resampling.Bootstrap` and
+ now parametrized by type of error.
+ - `NormalError` — represents normal error.
+ - `ConfInt` — generic confidence interval
+ - `UpperLimit`,`LowerLimit` for upper/lower limits.
+
+ * New API for statistical tests. Instead of simply return significant/not
+ significant it returns p-value, test statistics and distribution of test
+ statistics if it's available. Tests also return `Nothing` instead of throwing
+ error if sample size is not sufficient. Fixes #25.
+
+ * `Statistics.Tests.Types.TestType` data type dropped
+
+ * New smart constructors for distributions are added. They return `Nothing` if
+ parameters are outside of allowed range.
+
+ * Serialization instances (`Show/Read, Binary, ToJSON/FromJSON`) for
+ distributions no longer allows to create data types with invalid
+ parameters. They will fail to parse. Cached values are not serialized either
+ so `Binary` instances changed normal and F-distributions.
+
+ Encoding to JSON changed for Normal, F-distribution, and χ²
+ distributions. However data created using older statistics will be
+ successfully decoded.
+
+ Fixes #59.
+
+ * Statistics.Resample.Bootstrap uses new data types for central estimates.
+
+ * Function for calculation of confidence intervals for Poisson and binomial
+ distribution added in `Statistics.ConfidenceInt`
+
+ * Tests of position now allow to ask whether first sample on average larger
+ than second, second larger than first or whether they differ significantly.
+ Affects Wilcoxon-T, Mann-Whitney-U, and Student-T tests.
+
+ * API for bootstrap changed. New data types added.
+
+ * Bug fixes for #74, #81, #83, #92, #94
+
+ * `complCumulative` added for many distributions.
+
+
+
+## Changes in 0.13.3.0
+
+ * Kernel density estimation and FFT use generic versions now.
+
+ * Code for calculation of Spearman and Pearson correlation added. Modules
+ `Statistics.Correlation.Spearman` and `Statistics.Correlation.Pearson`.
+
+ * Function for calculation covariance added in `Statistics.Sample`.
+
+ * `Statistics.Function.pair` added. It zips vector and check that lengths are
+ equal.
+
+ * New functions added to `Statistics.Matrix`
+
+ * Laplace distribution added.
+
+
+## Changes in 0.13.2.3
+
+ * Vector dependency restored to >=0.10
+
+
+## Changes in 0.13.2.2
+
+ * Vector dependency lowered to >=0.9
+
+
+## Changes in 0.13.2.1
+
+ * Vector dependency bumped to >=0.10
+
+
+## Changes in 0.13.2.0
+
+ * Support for regression bootstrap added
+
+
+## Changes in 0.13.1.1
+
+ * Fix for out of bound access in bootstrap (see `bos/criterion#52`)
+
+
+## Changes in 0.13.1.0
* All types now support JSON encoding and decoding.
-Changes in 0.12.0.0
+
+## Changes in 0.12.0.0
* The `Statistics.Math` module has been removed, after being
deprecated for several years. Use the
@@ -20,7 +118,7 @@ Changes in 0.12.0.0
* Added the Kruskal-Wallis test.
-Changes in 0.11.0.3
+## Changes in 0.11.0.3
* Fixed a subtle bug in calculation of the jackknifed unbiased variance.
@@ -29,7 +127,7 @@ Changes in 0.11.0.3
* We now calculate quantiles for normal distribution in a more
numerically stable way (bug #64).
-Changes in 0.10.6.0
+## Changes in 0.10.6.0
* The Estimator type has become an algebraic data type. This allows
the jackknife function to potentially use more efficient jackknife
@@ -43,35 +141,35 @@ Changes in 0.10.6.0
implementation of mean has better numerical accuracy in almost all
cases.
-Changes in 0.10.5.2
+## Changes in 0.10.5.2
* histogram correctly chooses range when all elements in the sample are same
(bug #57)
-Changes in 0.10.5.1
+## Changes in 0.10.5.1
* Bug fix for S.Distributions.Normal.standard introduced in 0.10.5.0 (Bug #56)
-Changes in 0.10.5.0
+## Changes in 0.10.5.0
* Enthropy type class for distributions is added.
* Probability and probability density of distribution is given in
log domain too.
-Changes in 0.10.4.0
+## Changes in 0.10.4.0
* Support for versions of GHC older than 7.2 is discontinued.
* All datatypes now support 'Data.Binary' and 'GHC.Generics'.
-Changes in 0.10.3.0
+## Changes in 0.10.3.0
* Bug fixes
-Changes in 0.10.2.0
+## Changes in 0.10.2.0
* Bugs in DCT and IDCT are fixed.
@@ -91,7 +189,7 @@ Changes in 0.10.2.0
* Bug in 'ContGen' instance for normal distribution is fixed.
-Changes in 0.10.1.0
+## Changes in 0.10.1.0
* Kolmogorov-Smirnov nonparametric test added.
@@ -101,16 +199,16 @@ Changes in 0.10.1.0
is added.
* Modules 'Statistics.Math' and 'Statistics.Constants' are moved to
- the @math-functions@ package. They are still available but marked
+ the `math-functions` package. They are still available but marked
as deprecated.
-Changed in 0.10.0.1
+## Changes in 0.10.0.1
- * @dct@ and @idct@ now have type @Vector Double -> Vector Double@
+ * `dct` and `idct` now have type `Vector Double -> Vector Double`
-Changes in 0.10.0.0
+## Changes in 0.10.0.0
* The type classes Mean and Variance are split in two. This is
required for distributions which do not have finite variance or
@@ -154,12 +252,12 @@ Changes in 0.10.0.0
* One- and two-tailed tests in S.Tests.NonParametric are selected
with sum types instead of Bool.
- * Test results returned as enumeration instead of @Bool@.
+ * Test results returned as enumeration instead of `Bool`.
* Performance improvements for Mann-Whitney U and Wilcoxon tests.
- * Module @S.Tests.NonParamtric@ is split into @S.Tests.MannWhitneyU@
- and @S.Tests.WilcoxonT@
+ * Module `S.Tests.NonParamtric` is split into `S.Tests.MannWhitneyU`
+ and `S.Tests.WilcoxonT`
* sortBy is added to S.Function.
diff --git a/statistics.cabal b/statistics.cabal
index eda3465..056ce96 100644
--- a/statistics.cabal
+++ b/statistics.cabal
@@ -1,5 +1,5 @@
name: statistics
-version: 0.13.3.0
+version: 0.14.0.0
synopsis: A library of statistical types, data, and functions
description:
This library provides a number of common functions and types useful
@@ -22,7 +22,7 @@ description:
* Common statistical tests for significant differences between
samples.
-license: BSD3
+license: BSD2
license-file: LICENSE
homepage: https://github.com/bos/statistics
bug-reports: https://github.com/bos/statistics/issues
@@ -48,7 +48,7 @@ extra-source-files:
library
exposed-modules:
Statistics.Autocorrelation
- Statistics.Constants
+ Statistics.ConfidenceInt
Statistics.Correlation
Statistics.Correlation.Kendall
Statistics.Distribution
@@ -56,6 +56,7 @@ library
Statistics.Distribution.Binomial
Statistics.Distribution.CauchyLorentz
Statistics.Distribution.ChiSquared
+ Statistics.Distribution.DiscreteUniform
Statistics.Distribution.Exponential
Statistics.Distribution.FDistribution
Statistics.Distribution.Gamma
@@ -86,6 +87,8 @@ library
Statistics.Test.KolmogorovSmirnov
Statistics.Test.KruskalWallis
Statistics.Test.MannWhitneyU
+-- Statistics.Test.Runs
+ Statistics.Test.StudentT
Statistics.Test.Types
Statistics.Test.WilcoxonT
Statistics.Transform
@@ -96,18 +99,20 @@ library
Statistics.Internal
Statistics.Sample.Internal
Statistics.Test.Internal
+ Statistics.Types.Internal
build-depends:
aeson >= 0.6.0.0,
base >= 4.4 && < 5,
binary >= 0.5.1.0,
deepseq >= 1.1.0.2,
erf,
- math-functions >= 0.1.5.2,
+ math-functions >= 0.1.7,
monad-par >= 0.3.4,
mwc-random >= 0.13.0.0,
primitive >= 0.3,
vector >= 0.10,
vector-algorithms >= 0.4,
+ vector-th-unbox,
vector-binary-instances >= 0.2.1
if impl(ghc < 7.6)
build-depends:
@@ -133,6 +138,9 @@ test-suite tests
Tests.Matrix.Types
Tests.NonParametric
Tests.NonParametric.Table
+ Tests.Orphanage
+ Tests.Parametric
+ Tests.Serialization
Tests.Transform
ghc-options:
@@ -144,6 +152,7 @@ test-suite tests
base,
binary,
erf,
+ aeson,
ieee754 >= 0.7.3,
math-functions,
mwc-random,
diff --git a/tests/Tests/ApproxEq.hs b/tests/Tests/ApproxEq.hs
index a5483c2..ba8ee0b 100644
--- a/tests/Tests/ApproxEq.hs
+++ b/tests/Tests/ApproxEq.hs
@@ -24,7 +24,8 @@ class (Eq a, Show a) => ApproxEq a where
eql eps a b = counterexample (show a ++ " /=~ " ++ show b) (eq eps a b)
(=~) :: a -> a -> Bool
- (==~) :: ApproxEq a => a -> a -> Property
+
+ (==~) :: a -> a -> Property
a ==~ b = counterexample (show a ++ " /=~ " ++ show b) (a =~ b)
instance ApproxEq Double where
diff --git a/tests/Tests/Correlation.hs b/tests/Tests/Correlation.hs
index a473170..be85390 100644
--- a/tests/Tests/Correlation.hs
+++ b/tests/Tests/Correlation.hs
@@ -5,7 +5,6 @@ module Tests.Correlation
import Control.Arrow (Arrow(..))
import qualified Data.Vector as V
-import Statistics.Matrix hiding (map)
import Statistics.Correlation
import Statistics.Correlation.Kendall
import Test.QuickCheck ((==>),Property,counterexample)
diff --git a/tests/Tests/Distribution.hs b/tests/Tests/Distribution.hs
index 836513c..083912b 100644
--- a/tests/Tests/Distribution.hs
+++ b/tests/Tests/Distribution.hs
@@ -1,39 +1,41 @@
-{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE FlexibleInstances, OverlappingInstances, ScopedTypeVariables,
ViewPatterns #-}
module Tests.Distribution (tests) where
import Control.Applicative ((<$), (<$>), (<*>))
-import Data.Binary (Binary, decode, encode)
+import qualified Control.Exception as E
import Data.List (find)
import Data.Typeable (Typeable)
+import qualified Numeric.IEEE as IEEE
+import Numeric.MathFunctions.Constants (m_tiny,m_epsilon)
+import Numeric.MathFunctions.Comparison
import Statistics.Distribution
-import Statistics.Distribution.Beta (BetaDistribution, betaDistr)
-import Statistics.Distribution.Binomial (BinomialDistribution, binomial)
+import Statistics.Distribution.Beta (BetaDistribution)
+import Statistics.Distribution.Binomial (BinomialDistribution)
import Statistics.Distribution.CauchyLorentz
-import Statistics.Distribution.ChiSquared (ChiSquared, chiSquared)
-import Statistics.Distribution.Exponential (ExponentialDistribution, exponential)
-import Statistics.Distribution.FDistribution (FDistribution, fDistribution)
-import Statistics.Distribution.Gamma (GammaDistribution, gammaDistr)
+import Statistics.Distribution.ChiSquared (ChiSquared)
+import Statistics.Distribution.Exponential (ExponentialDistribution)
+import Statistics.Distribution.FDistribution (FDistribution,fDistribution)
+import Statistics.Distribution.Gamma (GammaDistribution,gammaDistr)
import Statistics.Distribution.Geometric
import Statistics.Distribution.Hypergeometric
-import Statistics.Distribution.Laplace (LaplaceDistribution, laplace)
-import Statistics.Distribution.Normal (NormalDistribution, normalDistr)
-import Statistics.Distribution.Poisson (PoissonDistribution, poisson)
+import Statistics.Distribution.Laplace (LaplaceDistribution)
+import Statistics.Distribution.Normal (NormalDistribution)
+import Statistics.Distribution.Poisson (PoissonDistribution)
import Statistics.Distribution.StudentT
-import Statistics.Distribution.Transform (LinearTransform, linTransDistr)
-import Statistics.Distribution.Uniform (UniformDistribution, uniformDistr)
+import Statistics.Distribution.Transform (LinearTransform, linTransDistr)
+import Statistics.Distribution.Uniform (UniformDistribution)
+import Statistics.Distribution.DiscreteUniform (DiscreteUniform, discreteUniformAB)
import Test.Framework (Test, testGroup)
import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.QuickCheck as QC
import Test.QuickCheck.Monadic as QC
-import Tests.ApproxEq (ApproxEq(..))
-import Tests.Helpers (T(..), testAssertion, typeName)
-import Tests.Helpers (monotonicallyIncreasesIEEE)
import Text.Printf (printf)
-import qualified Control.Exception as E
-import qualified Numeric.IEEE as IEEE
+import Tests.ApproxEq (ApproxEq(..))
+import Tests.Helpers (T(..), Double01(..), testAssertion, typeName)
+import Tests.Helpers (monotonicallyIncreasesIEEE,isDenorm)
+import Tests.Orphanage ()
-- | Tests for all distributions
tests :: Test
@@ -47,7 +49,7 @@ tests = testGroup "Tests for all distributions"
, contDistrTests (T :: T NormalDistribution )
, contDistrTests (T :: T UniformDistribution )
, contDistrTests (T :: T StudentT )
- , contDistrTests (T :: T (LinearTransform StudentT) )
+ , contDistrTests (T :: T (LinearTransform NormalDistribution))
, contDistrTests (T :: T FDistribution )
, discreteDistrTests (T :: T BinomialDistribution )
@@ -55,6 +57,7 @@ tests = testGroup "Tests for all distributions"
, discreteDistrTests (T :: T GeometricDistribution0 )
, discreteDistrTests (T :: T HypergeometricDistribution )
, discreteDistrTests (T :: T PoissonDistribution )
+ , discreteDistrTests (T :: T DiscreteUniform )
, unitTests
]
@@ -63,18 +66,19 @@ tests = testGroup "Tests for all distributions"
-- Tests
----------------------------------------------------------------
--- Tests for continous distribution
-contDistrTests :: (Param d, ContDistr d, QC.Arbitrary d, Typeable d, Show d, Binary d, Eq d) => T d -> Test
+-- Tests for continuous distribution
+contDistrTests :: (Param d, ContDistr d, QC.Arbitrary d, Typeable d, Show d) => T d -> Test
contDistrTests t = testGroup ("Tests for: " ++ typeName t) $
cdfTests t ++
[ testProperty "PDF sanity" $ pdfSanityCheck t
, testProperty "Quantile is CDF inverse" $ quantileIsInvCDF t
, testProperty "quantile fails p<0||p>1" $ quantileShouldFail t
, testProperty "log density check" $ logDensityCheck t
+ , testProperty "complQuantile" $ complQuantileCheck t
]
-- Tests for discrete distribution
-discreteDistrTests :: (Param d, DiscreteDistr d, QC.Arbitrary d, Typeable d, Show d, Binary d, Eq d) => T d -> Test
+discreteDistrTests :: (Param d, DiscreteDistr d, QC.Arbitrary d, Typeable d, Show d) => T d -> Test
discreteDistrTests t = testGroup ("Tests for: " ++ typeName t) $
cdfTests t ++
[ testProperty "Prob. sanity" $ probSanityCheck t
@@ -84,7 +88,7 @@ discreteDistrTests t = testGroup ("Tests for: " ++ typeName t) $
]
-- Tests for distributions which have CDF
-cdfTests :: (Param d, Distribution d, QC.Arbitrary d, Show d, Binary d, Eq d) => T d -> [Test]
+cdfTests :: (Param d, Distribution d, QC.Arbitrary d, Show d) => T d -> [Test]
cdfTests t =
[ testProperty "C.D.F. sanity" $ cdfSanityCheck t
, testProperty "CDF limit at +inf" $ cdfLimitAtPosInfinity t
@@ -93,7 +97,6 @@ cdfTests t =
, testProperty "CDF at -inf = 1" $ cdfAtNegInfinity t
, testProperty "CDF is nondecreasing" $ cdfIsNondecreasing t
, testProperty "1-CDF is correct" $ cdfComplementIsCorrect t
- , testProperty "Binary OK" $ p_binary t
]
@@ -109,12 +112,12 @@ cdfIsNondecreasing :: (Distribution d) => T d -> d -> Double -> Double -> Bool
cdfIsNondecreasing _ d = monotonicallyIncreasesIEEE $ cumulative d
-- cumulative d +∞ = 1
-cdfAtPosInfinity :: (Param d, Distribution d) => T d -> d -> Bool
+cdfAtPosInfinity :: (Distribution d) => T d -> d -> Bool
cdfAtPosInfinity _ d
= cumulative d (1/0) == 1
-- cumulative d - ∞ = 0
-cdfAtNegInfinity :: (Param d, Distribution d) => T d -> d -> Bool
+cdfAtNegInfinity :: (Distribution d) => T d -> d -> Bool
cdfAtNegInfinity _ d
= cumulative d (-1/0) == 0
@@ -165,14 +168,15 @@ cdfDiscreteIsCorrect _ d
logDensityCheck :: (ContDistr d) => T d -> d -> Double -> Property
logDensityCheck _ d x
- = counterexample (printf "density = %g" p)
- $ counterexample (printf "logDensity = %g" logP)
- $ counterexample (printf "log p = %g" (log p))
- $ counterexample (printf "eps = %g" (abs (logP - log p) / max (abs (log p)) (abs logP)))
- $ or [ p == 0 && logP == (-1/0)
- , p < 1e-308 && logP < 609
- , eq 1e-14 (log p) logP
- ]
+ = not (isDenorm x)
+ ==> ( counterexample (printf "density = %g" p)
+ $ counterexample (printf "logDensity = %g" logP)
+ $ counterexample (printf "log p = %g" (log p))
+ $ counterexample (printf "eps = %g" (abs (logP - log p) / max (abs (log p)) (abs logP)))
+ $ or [ p == 0 && logP == (-1/0)
+ , p <= m_tiny && logP < log m_tiny
+ , eq 1e-14 (log p) logP
+ ])
where
p = density d x
logP = logDensity d x
@@ -182,18 +186,41 @@ pdfSanityCheck :: (ContDistr d) => T d -> d -> Double -> Bool
pdfSanityCheck _ d x = p >= 0
where p = density d x
+complQuantileCheck :: (ContDistr d) => T d -> d -> Double01 -> Property
+complQuantileCheck _ d (Double01 p) =
+ -- We avoid extreme tails of distributions
+ --
+ -- FIXME: all parameters are arbitrary at the moment
+ p > 0.01 && p < 0.99 ==> (abs (x1 - x0) < 1e-6)
+ where
+ x0 = quantile d (1 - p)
+ x1 = complQuantile d p
+
-- Quantile is inverse of CDF
-quantileIsInvCDF :: (Param d, ContDistr d) => T d -> d -> Double -> Property
-quantileIsInvCDF _ d (snd . properFraction -> p) =
- p > 0 && p < 1 ==> ( counterexample (printf "Quantile = %g" q )
- $ counterexample (printf "Probability = %g" p )
- $ counterexample (printf "Probability' = %g" p')
- $ counterexample (printf "Error = %e" (abs $ p - p'))
- $ abs (p - p') < invQuantilePrec d
- )
+quantileIsInvCDF :: (ContDistr d) => T d -> d -> Double01 -> Property
+quantileIsInvCDF _ d (Double01 p) =
+ and [ p > 1e-250
+ , p < 1
+ , x > m_tiny
+ , dens > 0
+ ] ==>
+ ( counterexample (printf "Quantile = %g" x )
+ $ counterexample (printf "Probability = %g" p )
+ $ counterexample (printf "Probability' = %g" p')
+ $ counterexample (printf "Expected err. = %g" err)
+ $ counterexample (printf "Rel. error = %g" (relativeError p p'))
+ $ counterexample (printf "Abs. error = %e" (abs $ p - p'))
+ $ eqRelErr err p p'
+ )
where
- q = quantile d p
- p' = cumulative d q
+ -- Algorithm for error estimation is taken from here
+ --
+ -- http://sepulcarium.org/posts/2012-07-19-rounding_effect_on_inverse.html
+ dens = density d x
+ err = 64 * m_epsilon * (1 + abs (x / p) * dens)
+ --
+ x = quantile d p
+ p' = cumulative d x
-- Test that quantile fails if p<0 or p>1
quantileShouldFail :: (ContDistr d) => T d -> d -> Double -> Property
@@ -241,61 +268,8 @@ logProbabilityCheck _ d x
logP = logProbability d x
-p_binary :: (Eq a, Show a, Binary a) => T a -> a -> Bool
-p_binary _ a = a == (decode . encode) a
-
-
-
-----------------------------------------------------------------
--- Arbitrary instances for ditributions
-----------------------------------------------------------------
-
-instance QC.Arbitrary BinomialDistribution where
- arbitrary = binomial <$> QC.choose (1,100) <*> QC.choose (0,1)
-instance QC.Arbitrary ExponentialDistribution where
- arbitrary = exponential <$> QC.choose (0,100)
-instance QC.Arbitrary LaplaceDistribution where
- arbitrary = laplace <$> QC.choose (-10,10) <*> QC.choose (0, 2)
-instance QC.Arbitrary GammaDistribution where
- arbitrary = gammaDistr <$> QC.choose (0.1,10) <*> QC.choose (0.1,10)
-instance QC.Arbitrary BetaDistribution where
- arbitrary = betaDistr <$> QC.choose (1e-3,10) <*> QC.choose (1e-3,10)
-instance QC.Arbitrary GeometricDistribution where
- arbitrary = geometric <$> QC.choose (0,1)
-instance QC.Arbitrary GeometricDistribution0 where
- arbitrary = geometric0 <$> QC.choose (0,1)
-instance QC.Arbitrary HypergeometricDistribution where
- arbitrary = do l <- QC.choose (1,20)
- m <- QC.choose (0,l)
- k <- QC.choose (1,l)
- return $ hypergeometric m l k
-instance QC.Arbitrary NormalDistribution where
- arbitrary = normalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-3, 1e3)
-instance QC.Arbitrary PoissonDistribution where
- arbitrary = poisson <$> QC.choose (0,1)
-instance QC.Arbitrary ChiSquared where
- arbitrary = chiSquared <$> QC.choose (1,100)
-instance QC.Arbitrary UniformDistribution where
- arbitrary = do a <- QC.arbitrary
- b <- QC.arbitrary `suchThat` (/= a)
- return $ uniformDistr a b
-instance QC.Arbitrary CauchyDistribution where
- arbitrary = cauchyDistribution
- <$> arbitrary
- <*> ((abs <$> arbitrary) `suchThat` (> 0))
-instance QC.Arbitrary StudentT where
- arbitrary = studentT <$> ((abs <$> arbitrary) `suchThat` (>0))
-instance QC.Arbitrary (LinearTransform StudentT) where
- arbitrary = studentTUnstandardized
- <$> ((abs <$> arbitrary) `suchThat` (>0))
- <*> ((abs <$> arbitrary))
- <*> ((abs <$> arbitrary) `suchThat` (>0))
-instance QC.Arbitrary FDistribution where
- arbitrary = fDistribution
- <$> ((abs <$> arbitrary) `suchThat` (>0))
- <*> ((abs <$> arbitrary) `suchThat` (>0))
-
-
+instance QC.Arbitrary DiscreteUniform where
+ arbitrary = discreteUniformAB <$> QC.choose (1,1000) <*> QC.choose(1,1000)
-- Parameters for distribution testing. Some distribution require
-- relaxing parameters a bit
@@ -330,7 +304,7 @@ instance Param FDistribution where
unitTests :: Test
unitTests = testGroup "Unit tests"
[ testAssertion "density (gammaDistr 150 1/150) 1 == 4.883311" $
- 4.883311418525483 =~ (density (gammaDistr 150 (1/150)) 1)
+ 4.883311418525483 =~ density (gammaDistr 150 (1/150)) 1
-- Student-T
, testStudentPDF 0.3 1.34 0.0648215 -- PDF
, testStudentPDF 1 0.42 0.27058
diff --git a/tests/Tests/Helpers.hs b/tests/Tests/Helpers.hs
index 86eb081..eb35115 100644
--- a/tests/Tests/Helpers.hs
+++ b/tests/Tests/Helpers.hs
@@ -1,8 +1,12 @@
+{-# LANGUAGE ScopedTypeVariables #-}
-- | Helpers for testing
module Tests.Helpers (
-- * helpers
T(..)
, typeName
+ , Double01(..)
+ -- * IEEE 754
+ , isDenorm
-- * Generic QC tests
, monotonicallyIncreases
, monotonicallyIncreasesIEEE
@@ -16,6 +20,7 @@ module Tests.Helpers (
) where
import Data.Typeable
+import Numeric.MathFunctions.Constants (m_tiny)
import Test.Framework
import Test.Framework.Providers.HUnit
import Test.QuickCheck
@@ -32,6 +37,18 @@ typeName = show . typeOf . typeParam
typeParam :: T a -> a
typeParam _ = undefined
+-- | Check if Double denormalized
+isDenorm :: Double -> Bool
+isDenorm x = let ax = abs x in ax > 0 && ax < m_tiny
+
+-- | Generates Doubles in range [0,1]
+newtype Double01 = Double01 Double
+ deriving (Show)
+instance Arbitrary Double01 where
+ arbitrary = do
+ (_::Int, x) <- fmap properFraction arbitrary
+ return $ Double01 x
+
----------------------------------------------------------------
-- Generic QC
----------------------------------------------------------------
diff --git a/tests/Tests/NonParametric.hs b/tests/Tests/NonParametric.hs
index c4651b7..e4e53d6 100644
--- a/tests/Tests/NonParametric.hs
+++ b/tests/Tests/NonParametric.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
-- Tests for Statistics.Test.NonParametric
module Tests.NonParametric (tests) where
@@ -6,8 +8,11 @@ import Statistics.Test.KolmogorovSmirnov
import Statistics.Test.MannWhitneyU
import Statistics.Test.KruskalWallis
import Statistics.Test.WilcoxonT
-import Test.Framework (Test, testGroup)
+import Statistics.Types (PValue,pValue,cl95,mkPValue)
+
+import Test.Framework (testGroup)
import Test.Framework.Providers.HUnit
+import qualified Test.Framework as Tst
import Test.HUnit (assertEqual)
import Tests.ApproxEq (eq)
import Tests.Helpers (testAssertion, testEquality)
@@ -15,7 +20,7 @@ import Tests.NonParametric.Table (tableKSD, tableKS2D)
import qualified Data.Vector.Unboxed as U
-tests :: Test
+tests :: Tst.Test
tests = testGroup "Nonparametric tests"
$ concat [ mannWhitneyTests
, wilcoxonSumTests
@@ -27,20 +32,20 @@ tests = testGroup "Nonparametric tests"
----------------------------------------------------------------
-mannWhitneyTests :: [Test]
+mannWhitneyTests :: [Tst.Test]
mannWhitneyTests = zipWith test [(0::Int)..] testData ++
[ testEquality "Mann-Whitney U Critical Values, m=1"
(replicate (20*3) Nothing)
- [mannWhitneyUCriticalValue (1,x) p | x <- [1..20], p <- [0.005,0.01,0.025]]
+ [mannWhitneyUCriticalValue (1,x) (mkPValue p) | x <- [1..20], p <- [0.005,0.01,0.025]]
, testEquality "Mann-Whitney U Critical Values, m=2, p=0.025"
(replicate 7 Nothing ++ map Just [0,0,0,0,1,1,1,1,1,2,2,2,2])
- [mannWhitneyUCriticalValue (2,x) 0.025 | x <- [1..20]]
+ [mannWhitneyUCriticalValue (2,x) (mkPValue 0.025) | x <- [1..20]]
, testEquality "Mann-Whitney U Critical Values, m=6, p=0.05"
(replicate 1 Nothing ++ map Just [0, 2,3,5,7,8,10,12,14,16,17,19,21,23,25,26,28,30,32])
- [mannWhitneyUCriticalValue (6,x) 0.05 | x <- [1..20]]
+ [mannWhitneyUCriticalValue (6,x) (mkPValue 0.05) | x <- [1..20]]
, testEquality "Mann-Whitney U Critical Values, m=20, p=0.025"
(replicate 1 Nothing ++ map Just [2,8,14,20,27,34,41,48,55,62,69,76,83,90,98,105,112,119,127])
- [mannWhitneyUCriticalValue (20,x) 0.025 | x <- [1..20]]
+ [mannWhitneyUCriticalValue (20,x) (mkPValue 0.025) | x <- [1..20]]
]
where
test n (a, b, c, d)
@@ -49,7 +54,7 @@ mannWhitneyTests = zipWith test [(0::Int)..] testData ++
assertEqual ("Mann-Whitney U Sig " ++ show n) d ss
where
us = mannWhitneyU (U.fromList a) (U.fromList b)
- ss = mannWhitneyUSignificant TwoTailed (length a, length b) 0.05 us
+ ss = mannWhitneyUSignificant SamplesDiffer (length a, length b) p005 us
-- List of (Sample A, Sample B, (Positive Rank, Negative Rank))
testData :: [([Double], [Double], (Double, Double), Maybe TestResult)]
testData = [ ( [3,4,2,6,2,5]
@@ -84,7 +89,7 @@ mannWhitneyTests = zipWith test [(0::Int)..] testData ++
)
]
-wilcoxonSumTests :: [Test]
+wilcoxonSumTests :: [Tst.Test]
wilcoxonSumTests = zipWith test [(0::Int)..] testData
where
test n (a, b, c) = testCase "Wilcoxon Sum"
@@ -101,62 +106,64 @@ wilcoxonSumTests = zipWith test [(0::Int)..] testData
)
]
-wilcoxonPairTests :: [Test]
+wilcoxonPairTests :: [Tst.Test]
wilcoxonPairTests = zipWith test [(0::Int)..] testData ++
-- Taken from the Mitic paper:
[ testAssertion "Sig 16, 35" (to4dp 0.0467 $ wilcoxonMatchedPairSignificance 16 35)
, testAssertion "Sig 16, 36" (to4dp 0.0523 $ wilcoxonMatchedPairSignificance 16 36)
, testEquality "Wilcoxon critical values, p=0.05"
(replicate 4 Nothing ++ map Just [0,2,3,5,8,10,13,17,21,25,30,35,41,47,53,60,67,75,83,91,100,110,119])
- [wilcoxonMatchedPairCriticalValue x 0.05 | x <- [1..27]]
+ [wilcoxonMatchedPairCriticalValue x (mkPValue 0.05) | x <- [1..27]]
, testEquality "Wilcoxon critical values, p=0.025"
(replicate 5 Nothing ++ map Just [0,2,3,5,8,10,13,17,21,25,29,34,40,46,52,58,65,73,81,89,98,107])
- [wilcoxonMatchedPairCriticalValue x 0.025 | x <- [1..27]]
+ [wilcoxonMatchedPairCriticalValue x (mkPValue 0.025) | x <- [1..27]]
, testEquality "Wilcoxon critical values, p=0.01"
(replicate 6 Nothing ++ map Just [0,1,3,5,7,9,12,15,19,23,27,32,37,43,49,55,62,69,76,84,92])
- [wilcoxonMatchedPairCriticalValue x 0.01 | x <- [1..27]]
+ [wilcoxonMatchedPairCriticalValue x (mkPValue 0.01) | x <- [1..27]]
, testEquality "Wilcoxon critical values, p=0.005"
(replicate 7 Nothing ++ map Just [0,1,3,5,7,9,12,15,19,23,27,32,37,42,48,54,61,68,75,83])
- [wilcoxonMatchedPairCriticalValue x 0.005 | x <- [1..27]]
+ [wilcoxonMatchedPairCriticalValue x (mkPValue 0.005) | x <- [1..27]]
]
where
test n (a, b, c) = testEquality ("Wilcoxon Paired " ++ show n) c res
- where res = (wilcoxonMatchedPairSignedRank (U.fromList a) (U.fromList b))
+ where res = wilcoxonMatchedPairSignedRank (U.zip (U.fromList a) (U.fromList b))
-- List of (Sample A, Sample B, (Positive Rank, Negative Rank))
- testData :: [([Double], [Double], (Double, Double))]
- testData = [ ([1..10], [1..10], (0, 0 ))
- , ([1..5], [6..10], (0, 5*(-3)))
+ testData :: [([Double], [Double], (Int,Double, Double))]
+ testData = [ ([1..10], [1..10], (0, 0, 0 ))
+ , ([1..5], [6..10], (5, 0, 5*(-3)))
-- Worked example from the Internet:
, ( [125,115,130,140,140,115,140,125,140,135]
, [110,122,125,120,140,124,123,137,135,145]
- , ( sum $ filter (> 0) [7,-3,1.5,9,0,-4,8,-6,1.5,-5]
+ , ( 9
+ , sum $ filter (> 0) [7,-3,1.5,9,0,-4,8,-6,1.5,-5]
, sum $ filter (< 0) [7,-3,1.5,9,0,-4,8,-6,1.5,-5]
)
)
-- Worked examples from books/papers:
, ( [2.4,1.9,2.3,1.9,2.4,2.5]
, [2.0,2.1,2.0,2.0,1.8,2.0]
- , (18, -3)
+ , (6, 18, -3)
)
, ( [130,170,125,170,130,130,145,160]
, [120,163,120,135,143,136,144,120]
- , (27, -9)
+ , (8, 27, -9)
)
, ( [540,580,600,680,430,740,600,690,605,520]
, [760,710,1105,880,500,990,1050,640,595,520]
- , (3, -42)
+ , (9, 3, -42)
)
]
- to4dp tgt x = x >= tgt - 0.00005 && x < tgt + 0.00005
+ to4dp tgt (pValue -> x) = x >= tgt - 0.00005 && x < tgt + 0.00005
----------------------------------------------------------------
-kruskalWallisRankTests :: [Test]
+kruskalWallisRankTests :: [Tst.Test]
kruskalWallisRankTests = zipWith test [(0::Int)..] testData
where
test n (a, b) = testCase "Kruskal-Wallis Ranking"
$ assertEqual ("Kruskal-Wallis " ++ show n) (map U.fromList b) (kruskalWallisRank $ map U.fromList a)
+ testData :: [([[Int]],[[Double]])]
testData = [ ( [ [68,93,123,83,108,122]
, [119,116,101,103,113,84]
, [70,68,54,73,81,68]
@@ -170,18 +177,19 @@ kruskalWallisRankTests = zipWith test [(0::Int)..] testData
)
]
-kruskalWallisTests :: [Test]
+kruskalWallisTests :: [Tst.Test]
kruskalWallisTests = zipWith test [(0::Int)..] testData
where
test n (a, b, c) = testCase "Kruskal-Wallis" $ do
assertEqual ("Kruskal-Wallis " ++ show n) (round100 b) (round100 kw)
assertEqual ("Kruskal-Wallis Sig " ++ show n) c kwt
where
- kw = kruskalWallis $ map U.fromList a
- kwt = kruskalWallisTest 0.05 $ map U.fromList a
+ kw = kruskalWallis $ map U.fromList a
+ kwt = isSignificant p005 `fmap` kruskalWallisTest (map U.fromList a)
round100 :: Double -> Integer
round100 = round . (*100)
+ testData :: [([[Double]], Double, Maybe TestResult)]
testData = [ ( [ [68,93,123,83,108,122]
, [119,116,101,103,113,84]
, [70,68,54,73,81,68]
@@ -220,7 +228,7 @@ kruskalWallisTests = zipWith test [(0::Int)..] testData
----------------------------------------------------------------
-kolmogorovSmirnovDTest :: [Test]
+kolmogorovSmirnovDTest :: [Tst.Test]
kolmogorovSmirnovDTest =
[ testAssertion "K-S D statistics" $
and [ eq 1e-6 (kolmogorovSmirnovD standard (toU sample)) reference
@@ -291,3 +299,6 @@ kolmogorovSmirnovDTest =
, (0.392 , 30, 0.99988478803318 )
, (0.09 , 100, 0.629367974413669 )
]
+
+p005 :: PValue Double
+p005 = mkPValue 0.05
diff --git a/tests/Tests/Orphanage.hs b/tests/Tests/Orphanage.hs
new file mode 100644
index 0000000..c3f7980
--- /dev/null
+++ b/tests/Tests/Orphanage.hs
@@ -0,0 +1,103 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+-- |
+-- Orphan instances for common data types
+module Tests.Orphanage where
+
+import Control.Applicative
+import Statistics.Distribution.Beta (BetaDistribution, betaDistr)
+import Statistics.Distribution.Binomial (BinomialDistribution, binomial)
+import Statistics.Distribution.CauchyLorentz
+import Statistics.Distribution.ChiSquared (ChiSquared, chiSquared)
+import Statistics.Distribution.Exponential (ExponentialDistribution, exponential)
+import Statistics.Distribution.FDistribution (FDistribution, fDistribution)
+import Statistics.Distribution.Gamma (GammaDistribution, gammaDistr)
+import Statistics.Distribution.Geometric
+import Statistics.Distribution.Hypergeometric
+import Statistics.Distribution.Laplace (LaplaceDistribution, laplace)
+import Statistics.Distribution.Normal (NormalDistribution, normalDistr)
+import Statistics.Distribution.Poisson (PoissonDistribution, poisson)
+import Statistics.Distribution.StudentT
+import Statistics.Distribution.Transform (LinearTransform, scaleAround)
+import Statistics.Distribution.Uniform (UniformDistribution, uniformDistr)
+import Statistics.Types
+
+import Test.QuickCheck as QC
+
+
+----------------------------------------------------------------
+-- Arbitrary instances for ditributions
+----------------------------------------------------------------
+
+instance QC.Arbitrary BinomialDistribution where
+ arbitrary = binomial <$> QC.choose (1,100) <*> QC.choose (0,1)
+instance QC.Arbitrary ExponentialDistribution where
+ arbitrary = exponential <$> QC.choose (0,100)
+instance QC.Arbitrary LaplaceDistribution where
+ arbitrary = laplace <$> QC.choose (-10,10) <*> QC.choose (0, 2)
+instance QC.Arbitrary GammaDistribution where
+ arbitrary = gammaDistr <$> QC.choose (0.1,10) <*> QC.choose (0.1,10)
+instance QC.Arbitrary BetaDistribution where
+ arbitrary = betaDistr <$> QC.choose (1e-3,10) <*> QC.choose (1e-3,10)
+instance QC.Arbitrary GeometricDistribution where
+ arbitrary = geometric <$> QC.choose (0,1)
+instance QC.Arbitrary GeometricDistribution0 where
+ arbitrary = geometric0 <$> QC.choose (0,1)
+instance QC.Arbitrary HypergeometricDistribution where
+ arbitrary = do l <- QC.choose (1,20)
+ m <- QC.choose (0,l)
+ k <- QC.choose (1,l)
+ return $ hypergeometric m l k
+instance QC.Arbitrary NormalDistribution where
+ arbitrary = normalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-3, 1e3)
+instance QC.Arbitrary PoissonDistribution where
+ arbitrary = poisson <$> QC.choose (0,1)
+instance QC.Arbitrary ChiSquared where
+ arbitrary = chiSquared <$> QC.choose (1,100)
+instance QC.Arbitrary UniformDistribution where
+ arbitrary = do a <- QC.arbitrary
+ b <- QC.arbitrary `suchThat` (/= a)
+ return $ uniformDistr a b
+instance QC.Arbitrary CauchyDistribution where
+ arbitrary = cauchyDistribution
+ <$> arbitrary
+ <*> ((abs <$> arbitrary) `suchThat` (> 0))
+instance QC.Arbitrary StudentT where
+ arbitrary = studentT <$> ((abs <$> arbitrary) `suchThat` (>0))
+instance QC.Arbitrary d => QC.Arbitrary (LinearTransform d) where
+ arbitrary = do
+ m <- QC.choose (-10,10)
+ s <- QC.choose (1e-1,1e1)
+ d <- arbitrary
+ return $ scaleAround m s d
+instance QC.Arbitrary FDistribution where
+ arbitrary = fDistribution
+ <$> ((abs <$> arbitrary) `suchThat` (>0))
+ <*> ((abs <$> arbitrary) `suchThat` (>0))
+
+
+instance (Arbitrary a, Ord a, RealFrac a) => Arbitrary (PValue a) where
+ arbitrary = do
+ (_::Int,x) <- properFraction <$> arbitrary
+ return $ mkPValue $ abs x
+
+instance (Arbitrary a, Ord a, RealFrac a) => Arbitrary (CL a) where
+ arbitrary = do
+ (_::Int,x) <- properFraction <$> arbitrary
+ return $ mkCLFromSignificance $ abs x
+
+instance Arbitrary a => Arbitrary (NormalErr a) where
+ arbitrary = NormalErr <$> arbitrary
+
+instance Arbitrary a => Arbitrary (ConfInt a) where
+ arbitrary = liftA3 ConfInt arbitrary arbitrary arbitrary
+
+instance (Arbitrary (e a), Arbitrary a) => Arbitrary (Estimate e a) where
+ arbitrary = liftA2 Estimate arbitrary arbitrary
+
+instance (Arbitrary a) => Arbitrary (UpperLimit a) where
+ arbitrary = liftA2 UpperLimit arbitrary arbitrary
+
+instance (Arbitrary a) => Arbitrary (LowerLimit a) where
+ arbitrary = liftA2 LowerLimit arbitrary arbitrary
diff --git a/tests/Tests/Parametric.hs b/tests/Tests/Parametric.hs
new file mode 100644
index 0000000..19c166e
--- /dev/null
+++ b/tests/Tests/Parametric.hs
@@ -0,0 +1,103 @@
+module Tests.Parametric (tests) where
+
+import Data.Maybe (fromJust)
+import Statistics.Test.StudentT
+import Statistics.Test.Types
+import Statistics.Types
+import qualified Data.Vector.Unboxed as U
+import Test.Framework (testGroup)
+import Tests.Helpers (testEquality)
+import qualified Test.Framework as Tst
+
+tests :: Tst.Test
+tests = testGroup "Parametric tests" studentTTests
+
+-- 2 samples x 20 obs data
+--
+-- Both samples are samples from normal distributions with the same variance (= 1.0),
+-- but their means are different (0.0 and 0.5, respectively).
+--
+-- You can reproduce the data with R (3.1.0) as follows:
+-- set.seed(0)
+-- sample1 = rnorm(20)
+-- sample2 = rnorm(20, 0.5)
+-- student = t.test(sample1, sample2, var.equal=T)
+-- welch = t.test(sample1, sample2)
+-- paired = t.test(sample1, sample2, paired=T)
+sample1, sample2 :: U.Vector Double
+sample1 = U.fromList [
+ 1.262954284880793e+00,
+ -3.262333607056494e-01,
+ 1.329799262922501e+00,
+ 1.272429321429405e+00,
+ 4.146414344564082e-01,
+ -1.539950041903710e+00,
+ -9.285670347135381e-01,
+ -2.947204467905602e-01,
+ -5.767172747536955e-03,
+ 2.404653388857951e+00,
+ 7.635934611404596e-01,
+ -7.990092489893682e-01,
+ -1.147657009236351e+00,
+ -2.894615736882233e-01,
+ -2.992151178973161e-01,
+ -4.115108327950670e-01,
+ 2.522234481561323e-01,
+ -8.919211272845686e-01,
+ 4.356832993557186e-01,
+ -1.237538421929958e+00]
+sample2 = U.fromList [
+ 2.757321147216907e-01,
+ 8.773956459817011e-01,
+ 6.333363608148415e-01,
+ 1.304189509744908e+00,
+ 4.428932256161913e-01,
+ 1.003607972233726e+00,
+ 1.585769362145687e+00,
+ -1.909538396968303e-01,
+ -7.845993538721883e-01,
+ 5.467261721883520e-01,
+ 2.642934435604988e-01,
+ -4.288825501025439e-02,
+ 6.668968254321778e-02,
+ -1.494716467962331e-01,
+ 1.226750747385451e+00,
+ 1.651911754087200e+00,
+ 1.492160365445798e+00,
+ 7.048689050811874e-02,
+ 1.738304100853380e+00,
+ 2.206537181457307e-01]
+
+
+testTTest :: String
+ -> PValue Double
+ -> Test d
+ -> [Tst.Test]
+testTTest name pVal test =
+ [ testEquality name (isSignificant pVal test) NotSignificant
+ , testEquality name (isSignificant (mkPValue $ pValue pVal + 1e-5) test)
+ Significant
+ ]
+
+studentTTests :: [Tst.Test]
+studentTTests = concat
+ [ -- R: t.test(sample1, sample2, alt="two.sided", var.equal=T)
+ testTTest "two-sample t-test SamplesDiffer Student"
+ (mkPValue 0.03410) (fromJust $ studentTTest SamplesDiffer sample1 sample2)
+ -- R: t.test(sample1, sample2, alt="two.sided", var.equal=F)
+ , testTTest "two-sample t-test SamplesDiffer Welch"
+ (mkPValue 0.03483) (fromJust $ welchTTest SamplesDiffer sample1 sample2)
+ -- R: t.test(sample1, sample2, alt="two.sided", paired=T)
+ , testTTest "two-sample t-test SamplesDiffer Paired"
+ (mkPValue 0.03411) (fromJust $ pairedTTest SamplesDiffer sample12)
+ -- R: t.test(sample1, sample2, alt="less", var.equal=T)
+ , testTTest "two-sample t-test BGreater Student"
+ (mkPValue 0.01705) (fromJust $ studentTTest BGreater sample1 sample2)
+ -- R: t.test(sample1, sample2, alt="less", var.equal=F)
+ , testTTest "two-sample t-test BGreater Welch"
+ (mkPValue 0.01741) (fromJust $ welchTTest BGreater sample1 sample2)
+ -- R: t.test(sample1, sample2, alt="less", paired=F)
+ , testTTest "two-sample t-test BGreater Paired"
+ (mkPValue 0.01705) (fromJust $ pairedTTest BGreater sample12)
+ ]
+ where sample12 = U.zip sample1 sample2
diff --git a/tests/Tests/Serialization.hs b/tests/Tests/Serialization.hs
new file mode 100644
index 0000000..3de21c9
--- /dev/null
+++ b/tests/Tests/Serialization.hs
@@ -0,0 +1,82 @@
+-- |
+-- Tests for data serialization instances
+module Tests.Serialization where
+
+import Data.Binary (Binary,decode,encode)
+import Data.Aeson (FromJSON,ToJSON,Result(..),toJSON,fromJSON)
+import Data.Typeable
+
+import Statistics.Distribution.Beta (BetaDistribution)
+import Statistics.Distribution.Binomial (BinomialDistribution)
+import Statistics.Distribution.CauchyLorentz
+import Statistics.Distribution.ChiSquared (ChiSquared)
+import Statistics.Distribution.Exponential (ExponentialDistribution)
+import Statistics.Distribution.FDistribution (FDistribution)
+import Statistics.Distribution.Gamma (GammaDistribution)
+import Statistics.Distribution.Geometric
+import Statistics.Distribution.Hypergeometric
+import Statistics.Distribution.Laplace (LaplaceDistribution)
+import Statistics.Distribution.Normal (NormalDistribution)
+import Statistics.Distribution.Poisson (PoissonDistribution)
+import Statistics.Distribution.StudentT
+import Statistics.Distribution.Transform (LinearTransform)
+import Statistics.Distribution.Uniform (UniformDistribution)
+import Statistics.Types
+
+import Test.Framework (Test, testGroup)
+import Test.Framework.Providers.QuickCheck2 (testProperty)
+import Test.QuickCheck as QC
+
+import Tests.Helpers
+import Tests.Orphanage ()
+
+
+tests :: Test
+tests = testGroup "Test for data serialization"
+ [ serializationTests (T :: T (CL Float))
+ , serializationTests (T :: T (CL Double))
+ , serializationTests (T :: T (PValue Float))
+ , serializationTests (T :: T (PValue Double))
+ , serializationTests (T :: T (NormalErr Double))
+ , serializationTests (T :: T (ConfInt Double))
+ , serializationTests (T :: T (Estimate NormalErr Double))
+ , serializationTests (T :: T (Estimate ConfInt Double))
+ , serializationTests (T :: T (LowerLimit Double))
+ , serializationTests (T :: T (UpperLimit Double))
+ -- Distributions
+ , serializationTests (T :: T BetaDistribution )
+ , serializationTests (T :: T CauchyDistribution )
+ , serializationTests (T :: T ChiSquared )
+ , serializationTests (T :: T ExponentialDistribution )
+ , serializationTests (T :: T GammaDistribution )
+ , serializationTests (T :: T LaplaceDistribution )
+ , serializationTests (T :: T NormalDistribution )
+ , serializationTests (T :: T UniformDistribution )
+ , serializationTests (T :: T StudentT )
+ , serializationTests (T :: T (LinearTransform NormalDistribution))
+ , serializationTests (T :: T FDistribution )
+ , serializationTests (T :: T BinomialDistribution )
+ , serializationTests (T :: T GeometricDistribution )
+ , serializationTests (T :: T GeometricDistribution0 )
+ , serializationTests (T :: T HypergeometricDistribution )
+ , serializationTests (T :: T PoissonDistribution )
+ ]
+
+
+serializationTests
+ :: (Eq a, Typeable a, Binary a, Show a, Read a, ToJSON a, FromJSON a, Arbitrary a)
+ => T a -> Test
+serializationTests t = testGroup ("Tests for: " ++ typeName t)
+ [ testProperty "show/read" (p_showRead t)
+ , testProperty "binary" (p_binary t)
+ , testProperty "aeson" (p_aeson t)
+ ]
+
+p_binary :: (Eq a, Binary a) => T a -> a -> Bool
+p_binary _ a = a == (decode . encode) a
+
+p_showRead :: (Eq a, Read a, Show a) => T a -> a -> Bool
+p_showRead _ a = a == (read . show) a
+
+p_aeson :: (Eq a, ToJSON a, FromJSON a) => T a -> a -> Bool
+p_aeson _ a = Data.Aeson.Success a == (fromJSON . toJSON) a
diff --git a/tests/Tests/Transform.hs b/tests/Tests/Transform.hs
index 40afca5..64a6b1d 100644
--- a/tests/Tests/Transform.hs
+++ b/tests/Tests/Transform.hs
@@ -14,7 +14,8 @@ import Statistics.Function (within)
import Statistics.Transform (CD, dct, fft, idct, ifft)
import Test.Framework (Test, testGroup)
import Test.Framework.Providers.QuickCheck2 (testProperty)
-import Test.QuickCheck (Positive(..), Arbitrary(..), Gen, choose, vectorOf, counterexample)
+import Test.QuickCheck ( Positive(..), Arbitrary(..), Blind(..), (==>), Gen
+ , choose, vectorOf, counterexample, forAll)
import Test.QuickCheck.Property (Property(..))
import Tests.Helpers (testAssertion)
import Text.Printf (printf)
@@ -68,8 +69,11 @@ t_impulse k (Positive m) = U.all (c_near i) (fft v)
-- If a real-valued impulse is offset from the beginning of an
-- otherwise zero vector, the sum-of-squares of each component of the
-- result should equal the square of the impulse.
-t_impulse_offset :: Double -> Positive Int -> Positive Int -> Bool
-t_impulse_offset k (Positive x) (Positive m) = U.all ok (fft v)
+t_impulse_offset :: Double -> Positive Int -> Positive Int -> Property
+t_impulse_offset k (Positive x) (Positive m)
+ -- For numbers smaller than 1e-162 their square underflows and test
+ -- fails spuriously
+ = abs k >= 1e-100 ==> U.all ok (fft v)
where v = G.concat [G.replicate xn 0, G.singleton i, G.replicate (n-xn-1) 0]
ok (re :+ im) = within ulps (re*re + im*im) (k*k)
i = k :+ 0
@@ -83,15 +87,14 @@ t_impulse_offset k (Positive x) (Positive m) = U.all ok (fft v)
-- whole are approximate equal.
t_fftInverse :: (HasNorm (U.Vector a), U.Unbox a, Num a, Show a, Arbitrary a)
=> (U.Vector a -> U.Vector a) -> Property
-t_fftInverse roundtrip = MkProperty $ do
- x <- genFftVector
- let n = G.length x
- x' = roundtrip x
- d = G.zipWith (-) x x'
- nd = vectorNorm d
- nx = vectorNorm x
- unProperty
- $ counterexample "Original vector"
+t_fftInverse roundtrip =
+ forAll (Blind <$> genFftVector) $ \(Blind x) ->
+ let n = G.length x
+ x' = roundtrip x
+ d = G.zipWith (-) x x'
+ nd = vectorNorm d
+ nx = vectorNorm x
+ in counterexample "Original vector"
$ counterexample (show x )
$ counterexample "Transformed one"
$ counterexample (show x')
diff --git a/tests/tests.hs b/tests/tests.hs
index 128ec2e..039b8bb 100644
--- a/tests/tests.hs
+++ b/tests/tests.hs
@@ -4,15 +4,18 @@ import qualified Tests.Function as Function
import qualified Tests.KDE as KDE
import qualified Tests.Matrix as Matrix
import qualified Tests.NonParametric as NonParametric
+import qualified Tests.Parametric as Parametric
import qualified Tests.Transform as Transform
import qualified Tests.Correlation as Correlation
-
+import qualified Tests.Serialization
main :: IO ()
main = defaultMain [ Distribution.tests
, Function.tests
, KDE.tests
, Matrix.tests
, NonParametric.tests
+ , Parametric.tests
, Transform.tests
, Correlation.tests
+ , Tests.Serialization.tests
]