summaryrefslogtreecommitdiff
path: root/lib/BenchShow/Analysis.hs
blob: a61ad5db9bf3d026a40d2483e200ed1d3b002eae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}

-- |
-- Module      : BenchShow.Analysis
-- Copyright   : (c) 2009-2014 Bryan O'Sullivan
--               (c) 2018 Composewell Technologies
--
-- License     : BSD-style
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
-- Portability : GHC

module BenchShow.Analysis
    ( OutlierEffect(..)
    , OutlierVariance(..)
    , countOutliers
    , Estimator(..)
    , AnalyzedField(..)
    , getAnalyzedValue
    , BenchmarkMatrix(..)
    , BenchmarkIterMatrix(..)
    , foldBenchmark
    , filterSamples
    , isMaxField
    ) where

import Control.Applicative
import Data.Char (toLower)
import Data.Data (Data, Typeable)
import Data.Int (Int64)
import Data.List (elemIndex, transpose)
import Data.Maybe (fromMaybe)
import Data.Traversable
import GHC.Generics (Generic)
import Statistics.Function (sort)
import Statistics.Quantile (weightedAvg)
import Statistics.Regression (bootstrapRegress, olsRegress)
import Statistics.Resampling (resample)
import Statistics.Resampling.Bootstrap (bootstrapBCA)
import Statistics.Sample (mean, stdDev)
import Statistics.Sample.KernelDensity (kde)
import Statistics.Types (Sample, Estimate(..), ConfInt(..), cl95, CL)
import System.Random.MWC (GenIO, createSystemRandom)
import Prelude hiding (sequence, mapM)

import qualified Statistics.Resampling as St
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U

-------------------------------------------------------------------------------
-- Outliers
-------------------------------------------------------------------------------

-- | Outliers from sample data, calculated using the boxplot
-- technique.
data Outliers = Outliers {
      samplesSeen :: !Int64
    , lowSevere   :: !Int64
    -- ^ More than 3 times the interquartile range (IQR) below the
    -- first quartile.
    , lowMild     :: !Int64
    -- ^ Between 1.5 and 3 times the IQR below the first quartile.
    , highMild    :: !Int64
    -- ^ Between 1.5 and 3 times the IQR above the third quartile.
    , highSevere  :: !Int64
    -- ^ More than 3 times the IQR above the third quartile.
    } deriving (Eq, Show, Typeable, Data, Generic)

-- | A description of the extent to which outliers in the sample data
-- affect the sample mean and standard deviation.
data OutlierEffect = Unaffected -- ^ Less than 1% effect.
                   | Slight     -- ^ Between 1% and 10%.
                   | Moderate   -- ^ Between 10% and 50%.
                   | Severe     -- ^ Above 50% (i.e. measurements
                                -- are useless).
                     deriving (Eq, Ord, Show, Typeable, Data, Generic)

outliersEmpty :: Outliers
outliersEmpty = Outliers 0 0 0 0 0

addOutliers :: Outliers -> Outliers -> Outliers
addOutliers (Outliers s a b c d) (Outliers t w x y z) =
    Outliers (s+t) (a+w) (b+x) (c+y) (d+z)
{-# INLINE addOutliers #-}

-- | Analysis of the extent to which outliers in a sample affect its
-- standard deviation (and to some extent, its mean).
data OutlierVariance = OutlierVariance {
      ovEffect   :: !OutlierEffect
    -- ^ Qualitative description of effect.
    , ovDesc     :: !String
    -- ^ Brief textual description of effect.
    , ovFraction :: !Double
    -- ^ Quantitative description of effect (a fraction between 0 and 1).
    } deriving (Eq, Show, Typeable, Data, Generic)

-- | Classify outliers in a data set, using the boxplot technique.
classifyOutliers :: Sample -> Outliers
classifyOutliers sa = U.foldl' ((. outlier) . addOutliers) outliersEmpty ssa
    where outlier e = Outliers
                { samplesSeen = 1
                , lowSevere = if e <= loS && e < hiM then 1 else 0
                , lowMild = if e > loS && e <= loM then 1 else 0
                , highMild = if e >= hiM && e < hiS then 1 else 0
                , highSevere = if e >= hiS && e > loM then 1 else 0
                }
          !loS = q1 - (iqr * 3)
          !loM = q1 - (iqr * 1.5)
          !hiM = q3 + (iqr * 1.5)
          !hiS = q3 + (iqr * 3)
          q1   = weightedAvg 1 4 ssa
          q3   = weightedAvg 3 4 ssa
          ssa  = sort sa
          iqr  = q3 - q1

-- | Compute the extent to which outliers in the sample data affect
-- the sample mean and standard deviation.
outlierVariance
  :: Double -- ^ mean
  -> Double -- ^ standard deviation.
  -> Double -- ^ Number of original iterations.
  -> OutlierVariance
outlierVariance µ σ a = OutlierVariance effect desc varOutMin
    where
    µa    = µ / a
    µgMin = µa / 2
    σg2   = σg * σg where σg = min (µgMin / 4) (σ / sqrt a)
    σ2    = σ * σ
    varOut c  = (ac / a) * (σ2 - ac * σg2) where ac = a - c
    cMax x    = fromIntegral (floor (-2 * k0 / (k1 + sqrt det)) :: Int)
        where
        ad = a * d
            where
            d = k * k
            k = µa - x
        k0    = -a * ad
        k1    = σ2 - a * σg2 + ad
        det   = k1 * k1 - 4 * σg2 * k0

    minBy f q r = min (f q) (f r)
    varOutMin = if σ2 == 0
                then 0
                else (minBy varOut 1 (minBy cMax 0 µgMin)) / σ2

    (effect, desc) | varOutMin < 0.01 = (Unaffected, "no")
                   | varOutMin < 0.1  = (Slight,     "slight")
                   | varOutMin < 0.5  = (Moderate,   "moderate")
                   | otherwise        = (Severe,     "severe")

-- | Count the total number of outliers in a sample.
countOutliers :: Outliers -> Int64
countOutliers (Outliers _ a b c d) = a + b + c + d
{-# INLINE countOutliers #-}

-------------------------------------------------------------------------------
-- Linear regression
-------------------------------------------------------------------------------

useRegression :: Bool
useRegression = True

useBootstrap :: Bool
useBootstrap = True

resampleCount :: Int
resampleCount = 1000

confidence :: CL Double
confidence = cl95

regress
    :: GenIO
    -> Int  -- index of the iters field, we return the coefficient of only the
            -- iters field
    -> [String] -- responder column names
    -> [([Double], [Double])]
    -> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
regress randGen i rcols samples = do
    -- perform ordinary least squares regression for each field
    -- the main predictor is the number of iterations
    let predVectors = map U.fromList $ transpose $ map fst samples
        regressWithIters = mapM (bootstrapRegress randGen resampleCount
                                confidence olsRegress predVectors)

    let avoidMaxFields name vec =
            if isMaxField name
            then Nothing
            else Just vec
    let respVectors = map U.fromList $ transpose $ map snd samples
    res <- mapM regressWithIters (zipWith avoidMaxFields rcols respVectors)
    return $ map (fmap (\(v,r2) -> ((G.toList v) !! i, r2))) res

-------------------------------------------------------------------------------
-- Mean and std deviation by boostrap resampling
-------------------------------------------------------------------------------

estimateMeanAndStdDev
    :: GenIO
    -> [U.Vector Double]
    -> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
estimateMeanAndStdDev randGen vectors = do
    let resamp = resample randGen [St.Mean, St.StdDev] resampleCount
    res <- mapM resamp vectors
    return $ fmap (\[mn,dev] -> (mn, dev))
        $ getZipList
        $ bootstrapBCA confidence
            <$> ZipList vectors
            <*> ZipList res

-------------------------------------------------------------------------------
-- Statistical analysis of benchmark iterations
-------------------------------------------------------------------------------

-- By default the fields are considered "scaled" fields that is
-- they scale by iterations. However in case of maxrss field it is
-- a max value across the experiment and does not scale by
-- iterations, in this case we just need to take a mean or max
-- without scaling.
isMaxField :: String -> Bool
isMaxField fieldName = map toLower fieldName == "maxrss"

rescaleIteration :: Int -> [String] -> ([Double], [Double]) -> [Double]
rescaleIteration idx rcols (pvals, vals) =
    let iter = pvals !! idx
    in zipWith ($) (map ($ iter) foldFields) vals

    where

    getMeanOrMax fname i val =
        if isMaxField fname
        then val
        else val / i

    foldFields = map getMeanOrMax rcols

data AnalyzedField = AnalyzedField
    { analyzedMean       :: !Double
    , analyzedStdDev     :: !Double

    , analyzedMedian     :: !Double
    , analyzedOutliers   :: !Outliers
    , analyzedOutlierVar :: !OutlierVariance
    , analyzedKDE        :: !(U.Vector Double, U.Vector Double)

    , analyzedRegCoeff   :: Maybe (Estimate ConfInt Double)
    , analyzedRegRSq     :: Maybe (Estimate ConfInt Double)
    } deriving Show

-- | The statistical estimator used to arrive at a single value for a
-- benchmark when samples from multiple experiments are available.
--
-- @since 0.2.0
data Estimator =
      Median        -- ^ Report the median, outliers and outlier variance using
                    -- box-plot method. This is the most robust indicator
                    -- with respect to outliers when successive runs of
                    -- benchmarks are compared.
    | Mean          -- ^ Report the mean and the standard deviation from the
                    -- mean. This is less robust than median but more precise.
    | Regression    -- ^ Report the coefficient of regression, discarding the
                    -- constant factor, arrived at by linear regression using
                    -- ordinary least square method.  The R-square
                    -- goodness-of-fit estimate is also reported.  It works
                    -- better when larger number of samples are taken.  This
                    -- cannot be used when the number of samples is less than
                    -- 2, in that case a mean value is reported instead.
    deriving (Eq, Show, Read)

getAnalyzedValue :: Estimator -> AnalyzedField -> Double
getAnalyzedValue estimator AnalyzedField{..} =
    case estimator of
        Median -> analyzedMedian
        Mean -> analyzedMean
        Regression ->
            case analyzedRegCoeff of
                Nothing -> analyzedMean
                Just x -> estPoint x

-- | Perform an analysis of a measurement.
analyzeBenchmark :: GenIO
                 -> [String]
                 -> [String]
                 -> [([Double], [Double])]
                 -> IO [AnalyzedField]
analyzeBenchmark randGen pcols rcols samples = do
    let sampleCnt = length samples
        i = fromMaybe (error "bug") $ elemIndex "iters" pcols
        vectors = map U.fromList
            $ transpose
            $ map (rescaleIteration i rcols) samples

    (coeffs, r2s) <-
        -- olsRegress fails if there are fewer samples than predictors
        if useRegression && length samples >= (length pcols + 1)
        then do
            let f (Just (x, y)) = (Just x, Just y)
                f Nothing = (Nothing, Nothing)
            fmap (unzip . map f) $ regress randGen i rcols samples
        else do
            let n = length rcols
            return (replicate n Nothing, replicate n Nothing)

    (means, devs) <-
        if useBootstrap && length samples >= 3
        then do
            (ms, ds) <- fmap unzip $ estimateMeanAndStdDev randGen vectors
            return (map estPoint ms, map estPoint ds)
        else do
            -- Even for max fields (e.g. maxrss) we take the mean
            let ms = map mean vectors
                ds = map stdDev vectors
            return (ms, ds)

    let len = U.length $ head vectors
        median v = (sort v) U.! (len `div` 2)
        medians = map median vectors
        outliers = getZipList $ classifyOutliers <$> ZipList vectors
        outlierVars = getZipList
                $ outlierVariance
                    <$> ZipList means
                    <*> ZipList devs
                    <*> pure (fromIntegral sampleCnt)
        kdes = map (kde 128) vectors

    return $ getZipList $ AnalyzedField
        <$> ZipList means
        <*> ZipList devs

        <*> ZipList medians
        <*> ZipList outliers
        <*> ZipList outlierVars
        <*> ZipList kdes

        <*> ZipList coeffs
        <*> ZipList r2s

-- predictor matrix
data BenchmarkIterMatrix = BenchmarkIterMatrix
    { iterPredColNames  :: ![String]  -- predictor column names
    , iterRespColNames  :: ![String]  -- responder column names
    -- (Benchmark, [(predictor columns, responder columns)])
    , iterRowValues :: ![(String, [([Double], [Double])])]
    } deriving Show

-- Stored in row major order
data BenchmarkMatrix = BenchmarkMatrix
    { colNames  :: ![String]
    , rowValues :: ![(String, [AnalyzedField])] -- (Benchmark, columns)
    } deriving Show

foldBenchmark :: BenchmarkIterMatrix -> IO BenchmarkMatrix
foldBenchmark BenchmarkIterMatrix{..} = do
    randGen <- createSystemRandom
    rows <- mapM (foldIters randGen) iterRowValues
    return $ BenchmarkMatrix
        { colNames = iterRespColNames
        , rowValues = rows
        }

    where

    foldIters randGen (name, vals) = do
            vals' <- analyzeBenchmark randGen iterPredColNames
                                      iterRespColNames vals
            return (name, vals')

-- take top samples
-- XXX take equivalent iterations across multiple groups
filterSamples :: BenchmarkIterMatrix -> BenchmarkIterMatrix
filterSamples matrix@BenchmarkIterMatrix{..} =
    matrix
        {-
        {
          iterRowValues = map filterIters iterRowValues
        }

    where

    iterIndex = fromMaybe undefined
        $ elemIndex "iters" (map (map toLower) iterPredColNames)
    nivcswIndex = fromMaybe undefined
        $ elemIndex "nivcsw" (map (map toLower) iterPredColNames)
    filterIters (name, vals) =
        let vals'' = take 50 $ reverse $ sortBy (comparing ((!! iterIndex) .  fst)) vals
        let vals' = filter (\(x,_) -> x !! nivcswIndex < 10) vals
            vals'' =
                if null vals'
                then trace "null after filter" vals
                else vals'
        in (name, vals'')
        -}