| 1 | /* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
| 2 | |
| 3 | /* |
| 4 | Copyright (C) 2003 Ferdinando Ametrano |
| 5 | Copyright (C) 2003 RiskMap srl |
| 6 | |
| 7 | This file is part of QuantLib, a free-software/open-source library |
| 8 | for financial quantitative analysts and developers - http://quantlib.org/ |
| 9 | |
| 10 | QuantLib is free software: you can redistribute it and/or modify it |
| 11 | under the terms of the QuantLib license. You should have received a |
| 12 | copy of the license along with this program; if not, please email |
| 13 | <quantlib-dev@lists.sf.net>. The license is also available online at |
| 14 | <http://quantlib.org/license.shtml>. |
| 15 | |
| 16 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 17 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 18 | FOR A PARTICULAR PURPOSE. See the license for more details. |
| 19 | */ |
| 20 | |
| 21 | /*! \file generalstatistics.hpp |
| 22 | \brief statistics tool |
| 23 | */ |
| 24 | |
| 25 | #ifndef quantlib_general_statistics_hpp |
| 26 | #define quantlib_general_statistics_hpp |
| 27 | |
| 28 | #include <ql/utilities/null.hpp> |
| 29 | #include <ql/errors.hpp> |
| 30 | #include <vector> |
| 31 | #include <algorithm> |
| 32 | #include <utility> |
| 33 | |
| 34 | namespace QuantLib { |
| 35 | |
| 36 | //! Statistics tool |
| 37 | /*! This class accumulates a set of data and returns their |
| 38 | statistics (e.g: mean, variance, skewness, kurtosis, |
| 39 | error estimation, percentile, etc.) based on the empirical |
| 40 | distribution (no gaussian assumption) |
| 41 | |
| 42 | It doesn't suffer the numerical instability problem of |
| 43 | IncrementalStatistics. The downside is that it stores all |
| 44 | samples, thus increasing the memory requirements. |
| 45 | */ |
| 46 | class GeneralStatistics { |
| 47 | public: |
| 48 | typedef Real value_type; |
| 49 | GeneralStatistics(); |
| 50 | //! \name Inspectors |
| 51 | //@{ |
| 52 | //! number of samples collected |
| 53 | Size samples() const; |
| 54 | |
| 55 | //! collected data |
| 56 | const std::vector<std::pair<Real,Real> >& data() const; |
| 57 | |
| 58 | //! sum of data weights |
| 59 | Real weightSum() const; |
| 60 | |
| 61 | /*! returns the mean, defined as |
| 62 | \f[ \langle x \rangle = \frac{\sum w_i x_i}{\sum w_i}. \f] |
| 63 | */ |
| 64 | Real mean() const; |
| 65 | |
| 66 | /*! returns the variance, defined as |
| 67 | \f[ \sigma^2 = \frac{N}{N-1} \left\langle \left( |
| 68 | x-\langle x \rangle \right)^2 \right\rangle. \f] |
| 69 | */ |
| 70 | Real variance() const; |
| 71 | |
| 72 | /*! returns the standard deviation \f$ \sigma \f$, defined as the |
| 73 | square root of the variance. |
| 74 | */ |
| 75 | Real standardDeviation() const; |
| 76 | |
| 77 | /*! returns the error estimate on the mean value, defined as |
| 78 | \f$ \epsilon = \sigma/\sqrt{N}. \f$ |
| 79 | */ |
| 80 | Real errorEstimate() const; |
| 81 | |
| 82 | /*! returns the skewness, defined as |
| 83 | \f[ \frac{N^2}{(N-1)(N-2)} \frac{\left\langle \left( |
| 84 | x-\langle x \rangle \right)^3 \right\rangle}{\sigma^3}. \f] |
| 85 | The above evaluates to 0 for a Gaussian distribution. |
| 86 | */ |
| 87 | Real skewness() const; |
| 88 | |
| 89 | /*! returns the excess kurtosis, defined as |
| 90 | \f[ \frac{N^2(N+1)}{(N-1)(N-2)(N-3)} |
| 91 | \frac{\left\langle \left(x-\langle x \rangle \right)^4 |
| 92 | \right\rangle}{\sigma^4} - \frac{3(N-1)^2}{(N-2)(N-3)}. \f] |
| 93 | The above evaluates to 0 for a Gaussian distribution. |
| 94 | */ |
| 95 | Real kurtosis() const; |
| 96 | |
| 97 | /*! returns the minimum sample value */ |
| 98 | Real min() const; |
| 99 | |
| 100 | /*! returns the maximum sample value */ |
| 101 | Real max() const; |
| 102 | |
| 103 | /*! Expectation value of a function \f$ f \f$ on a given |
| 104 | range \f$ \mathcal{R} \f$, i.e., |
| 105 | \f[ \mathrm{E}\left[f \;|\; \mathcal{R}\right] = |
| 106 | \frac{\sum_{x_i \in \mathcal{R}} f(x_i) w_i}{ |
| 107 | \sum_{x_i \in \mathcal{R}} w_i}. \f] |
| 108 | The range is passed as a boolean function returning |
| 109 | <tt>true</tt> if the argument belongs to the range |
| 110 | or <tt>false</tt> otherwise. |
| 111 | |
| 112 | The function returns a pair made of the result and |
| 113 | the number of observations in the given range. |
| 114 | */ |
| 115 | template <class Func, class Predicate> |
| 116 | std::pair<Real,Size> expectationValue(const Func& f, |
| 117 | const Predicate& inRange) const { |
| 118 | Real num = 0.0, den = 0.0; |
| 119 | Size N = 0; |
| 120 | std::vector<std::pair<Real,Real> >::const_iterator i; |
| 121 | for (i=samples_.begin(); i!=samples_.end(); ++i) { |
| 122 | Real x = i->first, w = i->second; |
| 123 | if (inRange(x)) { |
| 124 | num += f(x)*w; |
| 125 | den += w; |
| 126 | N += 1; |
| 127 | } |
| 128 | } |
| 129 | if (N == 0) |
| 130 | return std::make_pair<Real,Size>(x: Null<Real>(),y: 0); |
| 131 | else |
| 132 | return std::make_pair(x: num/den,y&: N); |
| 133 | } |
| 134 | |
| 135 | /*! Expectation value of a function \f$ f \f$ over the whole |
| 136 | set of samples; equivalent to passing the other overload |
| 137 | a range function always returning <tt>true</tt>. |
| 138 | */ |
| 139 | template <class Func> |
| 140 | std::pair<Real,Size> expectationValue(const Func& f) const { |
| 141 | return expectationValue(f, [](Real x) { return true; }); |
| 142 | } |
| 143 | |
| 144 | /*! \f$ y \f$-th percentile, defined as the value \f$ \bar{x} \f$ |
| 145 | such that |
| 146 | \f[ y = \frac{\sum_{x_i < \bar{x}} w_i}{ |
| 147 | \sum_i w_i} \f] |
| 148 | |
| 149 | \pre \f$ y \f$ must be in the range \f$ (0-1]. \f$ |
| 150 | */ |
| 151 | Real percentile(Real y) const; |
| 152 | |
| 153 | /*! \f$ y \f$-th top percentile, defined as the value |
| 154 | \f$ \bar{x} \f$ such that |
| 155 | \f[ y = \frac{\sum_{x_i > \bar{x}} w_i}{ |
| 156 | \sum_i w_i} \f] |
| 157 | |
| 158 | \pre \f$ y \f$ must be in the range \f$ (0-1]. \f$ |
| 159 | */ |
| 160 | Real topPercentile(Real y) const; |
| 161 | //@} |
| 162 | |
| 163 | //! \name Modifiers |
| 164 | //@{ |
| 165 | //! adds a datum to the set, possibly with a weight |
| 166 | void add(Real value, Real weight = 1.0); |
| 167 | //! adds a sequence of data to the set, with default weight |
| 168 | template <class DataIterator> |
| 169 | void addSequence(DataIterator begin, DataIterator end) { |
| 170 | for (;begin!=end;++begin) |
| 171 | add(value: *begin); |
| 172 | } |
| 173 | //! adds a sequence of data to the set, each with its weight |
| 174 | template <class DataIterator, class WeightIterator> |
| 175 | void addSequence(DataIterator begin, DataIterator end, |
| 176 | WeightIterator wbegin) { |
| 177 | for (;begin!=end;++begin,++wbegin) |
| 178 | add(value: *begin, weight: *wbegin); |
| 179 | } |
| 180 | |
| 181 | //! resets the data to a null set |
| 182 | void reset(); |
| 183 | |
| 184 | //! informs the internal storage of a planned increase in size |
| 185 | void reserve(Size n) const; |
| 186 | |
| 187 | //! sort the data set in increasing order |
| 188 | void sort() const; |
| 189 | //@} |
| 190 | private: |
| 191 | mutable std::vector<std::pair<Real,Real> > samples_; |
| 192 | mutable bool sorted_; |
| 193 | }; |
| 194 | |
| 195 | |
| 196 | // inline definitions |
| 197 | |
| 198 | inline GeneralStatistics::GeneralStatistics() { |
| 199 | reset(); |
| 200 | } |
| 201 | |
| 202 | inline Size GeneralStatistics::samples() const { |
| 203 | return samples_.size(); |
| 204 | } |
| 205 | |
| 206 | inline const std::vector<std::pair<Real,Real> >& |
| 207 | GeneralStatistics::data() const { |
| 208 | return samples_; |
| 209 | } |
| 210 | |
| 211 | inline Real GeneralStatistics::standardDeviation() const { |
| 212 | return std::sqrt(x: variance()); |
| 213 | } |
| 214 | |
| 215 | inline Real GeneralStatistics::errorEstimate() const { |
| 216 | return std::sqrt(x: variance()/samples()); |
| 217 | } |
| 218 | |
| 219 | inline Real GeneralStatistics::min() const { |
| 220 | QL_REQUIRE(samples() > 0, "empty sample set" ); |
| 221 | return std::min_element(first: samples_.begin(), |
| 222 | last: samples_.end())->first; |
| 223 | } |
| 224 | |
| 225 | inline Real GeneralStatistics::max() const { |
| 226 | QL_REQUIRE(samples() > 0, "empty sample set" ); |
| 227 | return std::max_element(first: samples_.begin(), |
| 228 | last: samples_.end())->first; |
| 229 | } |
| 230 | |
| 231 | /*! \pre weights must be positive or null */ |
| 232 | inline void GeneralStatistics::add(Real value, Real weight) { |
| 233 | QL_REQUIRE(weight>=0.0, "negative weight not allowed" ); |
| 234 | samples_.emplace_back(args&: value, args&: weight); |
| 235 | sorted_ = false; |
| 236 | } |
| 237 | |
| 238 | inline void GeneralStatistics::reset() { |
| 239 | samples_ = std::vector<std::pair<Real,Real> >(); |
| 240 | sorted_ = true; |
| 241 | } |
| 242 | |
| 243 | inline void GeneralStatistics::reserve(Size n) const { |
| 244 | samples_.reserve(n: n); |
| 245 | } |
| 246 | |
| 247 | inline void GeneralStatistics::sort() const { |
| 248 | if (!sorted_) { |
| 249 | std::sort(first: samples_.begin(), last: samples_.end()); |
| 250 | sorted_ = true; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | } |
| 255 | |
| 256 | |
| 257 | #endif |
| 258 | |