| 1 | /* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
| 2 | |
| 3 | /* |
| 4 | Copyright (C) 2003, 2004, 2005, 2006, 2007 Ferdinando Ametrano |
| 5 | |
| 6 | This file is part of QuantLib, a free-software/open-source library |
| 7 | for financial quantitative analysts and developers - http://quantlib.org/ |
| 8 | |
| 9 | QuantLib is free software: you can redistribute it and/or modify it |
| 10 | under the terms of the QuantLib license. You should have received a |
| 11 | copy of the license along with this program; if not, please email |
| 12 | <quantlib-dev@lists.sf.net>. The license is also available online at |
| 13 | <http://quantlib.org/license.shtml>. |
| 14 | |
| 15 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 17 | FOR A PARTICULAR PURPOSE. See the license for more details. |
| 18 | */ |
| 19 | |
| 20 | /*! \file sequencestatistics.hpp |
| 21 | \brief Statistics tools for sequence (vector, list, array) samples |
| 22 | */ |
| 23 | |
| 24 | #ifndef quantlib_sequence_statistics_hpp |
| 25 | #define quantlib_sequence_statistics_hpp |
| 26 | |
| 27 | #include <ql/math/statistics/statistics.hpp> |
| 28 | #include <ql/math/statistics/incrementalstatistics.hpp> |
| 29 | #include <ql/math/matrix.hpp> |
| 30 | |
| 31 | namespace QuantLib { |
| 32 | |
| 33 | //! Statistics analysis of N-dimensional (sequence) data |
| 34 | /*! It provides 1-dimensional statistics as discrepancy plus |
| 35 | N-dimensional (sequence) statistics (e.g. mean, |
| 36 | variance, skewness, kurtosis, etc.) with one component for each |
| 37 | dimension of the sample space. |
| 38 | |
| 39 | For most of the statistics this class relies on |
| 40 | the StatisticsType underlying class to provide 1-D methods that |
| 41 | will be iterated for all the components of the N-D data. These |
| 42 | lifted methods are the union of all the methods that might be |
| 43 | requested to the 1-D underlying StatisticsType class, with the |
| 44 | usual compile-time checks provided by the template approach. |
| 45 | |
| 46 | \test the correctness of the returned values is tested by |
| 47 | checking them against numerical calculations. |
| 48 | */ |
| 49 | template <class StatisticsType> |
| 50 | class GenericSequenceStatistics { |
| 51 | public: |
| 52 | // typedefs |
| 53 | typedef StatisticsType statistics_type; |
| 54 | typedef std::vector<typename StatisticsType::value_type> value_type; |
| 55 | // constructor |
| 56 | GenericSequenceStatistics(Size dimension = 0); |
| 57 | //! \name inspectors |
| 58 | //@{ |
| 59 | Size size() const { return dimension_; } |
| 60 | //@} |
| 61 | //! \name covariance and correlation |
| 62 | //@{ |
| 63 | //! returns the covariance Matrix |
| 64 | Matrix covariance() const; |
| 65 | //! returns the correlation Matrix |
| 66 | Matrix correlation() const; |
| 67 | //@} |
| 68 | //! \name 1-D inspectors lifted from underlying statistics class |
| 69 | //@{ |
| 70 | Size samples() const; |
| 71 | Real weightSum() const; |
| 72 | //@} |
| 73 | //! \name N-D inspectors lifted from underlying statistics class |
| 74 | //@{ |
| 75 | // void argument list |
| 76 | std::vector<Real> mean() const; |
| 77 | std::vector<Real> variance() const; |
| 78 | std::vector<Real> standardDeviation() const; |
| 79 | std::vector<Real> downsideVariance() const; |
| 80 | std::vector<Real> downsideDeviation() const; |
| 81 | std::vector<Real> semiVariance() const; |
| 82 | std::vector<Real> semiDeviation() const; |
| 83 | std::vector<Real> errorEstimate() const; |
| 84 | std::vector<Real> skewness() const; |
| 85 | std::vector<Real> kurtosis() const; |
| 86 | std::vector<Real> min() const; |
| 87 | std::vector<Real> max() const; |
| 88 | |
| 89 | // single argument list |
| 90 | std::vector<Real> gaussianPercentile(Real y) const; |
| 91 | std::vector<Real> percentile(Real y) const; |
| 92 | |
| 93 | std::vector<Real> gaussianPotentialUpside(Real percentile) const; |
| 94 | std::vector<Real> potentialUpside(Real percentile) const; |
| 95 | |
| 96 | std::vector<Real> gaussianValueAtRisk(Real percentile) const; |
| 97 | std::vector<Real> valueAtRisk(Real percentile) const; |
| 98 | |
| 99 | std::vector<Real> gaussianExpectedShortfall(Real percentile) const; |
| 100 | std::vector<Real> expectedShortfall(Real percentile) const; |
| 101 | |
| 102 | std::vector<Real> regret(Real target) const; |
| 103 | |
| 104 | std::vector<Real> gaussianShortfall(Real target) const; |
| 105 | std::vector<Real> shortfall(Real target) const; |
| 106 | |
| 107 | std::vector<Real> gaussianAverageShortfall(Real target) const; |
| 108 | std::vector<Real> averageShortfall(Real target) const; |
| 109 | |
| 110 | //@} |
| 111 | //! \name Modifiers |
| 112 | //@{ |
| 113 | void reset(Size dimension = 0); |
| 114 | template <class Sequence> |
| 115 | void add(const Sequence& sample, |
| 116 | Real weight = 1.0) { |
| 117 | add(sample.begin(), sample.end(), weight); |
| 118 | } |
| 119 | template <class Iterator> |
| 120 | void add(Iterator begin, |
| 121 | Iterator end, |
| 122 | Real weight = 1.0) { |
| 123 | if (dimension_ == 0) { |
| 124 | // stat wasn't initialized yet |
| 125 | QL_REQUIRE(end>begin, "sample error: end<=begin" ); |
| 126 | Size dimension = std::distance(begin, end); |
| 127 | reset(dimension); |
| 128 | } |
| 129 | |
| 130 | QL_REQUIRE(std::distance(begin, end) == Integer(dimension_), |
| 131 | "sample size mismatch: " << dimension_ << |
| 132 | " required, " << std::distance(begin, end) << |
| 133 | " provided" ); |
| 134 | |
| 135 | quadraticSum_ += weight * outerProduct(begin, end, |
| 136 | begin, end); |
| 137 | |
| 138 | for (Size i=0; i<dimension_; ++begin, ++i) |
| 139 | stats_[i].add(*begin, weight); |
| 140 | |
| 141 | } |
| 142 | //@} |
| 143 | protected: |
| 144 | Size dimension_ = 0; |
| 145 | std::vector<statistics_type> stats_; |
| 146 | mutable std::vector<Real> results_; |
| 147 | Matrix quadraticSum_; |
| 148 | }; |
| 149 | |
| 150 | //! default multi-dimensional statistics tool |
| 151 | /*! \test the correctness of the returned values is tested by |
| 152 | checking them against numerical calculations. |
| 153 | */ |
| 154 | typedef GenericSequenceStatistics<Statistics> SequenceStatistics; |
| 155 | typedef GenericSequenceStatistics<IncrementalStatistics> SequenceStatisticsInc; |
| 156 | |
| 157 | // inline definitions |
| 158 | |
| 159 | template <class Stat> |
| 160 | inline GenericSequenceStatistics<Stat>::GenericSequenceStatistics(Size dimension) { |
| 161 | reset(dimension); |
| 162 | } |
| 163 | |
| 164 | template <class Stat> |
| 165 | inline Size GenericSequenceStatistics<Stat>::samples() const { |
| 166 | return (stats_.empty()) ? 0 : stats_[0].samples(); |
| 167 | } |
| 168 | |
| 169 | template <class Stat> |
| 170 | inline Real GenericSequenceStatistics<Stat>::weightSum() const { |
| 171 | return (stats_.empty()) ? 0.0 : stats_[0].weightSum(); |
| 172 | } |
| 173 | |
| 174 | |
| 175 | // macros for the implementation of the lifted methods |
| 176 | |
| 177 | // N-D methods' definition with void argument list |
| 178 | #define DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(METHOD) \ |
| 179 | template <class Stat> \ |
| 180 | std::vector<Real> \ |
| 181 | GenericSequenceStatistics<Stat>::METHOD() const { \ |
| 182 | for (Size i=0; i<dimension_; i++) \ |
| 183 | results_[i] = stats_[i].METHOD(); \ |
| 184 | return results_; \ |
| 185 | } |
| 186 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(mean) |
| 187 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(variance) |
| 188 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(standardDeviation) |
| 189 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(downsideVariance) |
| 190 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(downsideDeviation) |
| 191 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(semiVariance) |
| 192 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(semiDeviation) |
| 193 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(errorEstimate) |
| 194 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(skewness) |
| 195 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(kurtosis) |
| 196 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(min) |
| 197 | DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(max) |
| 198 | #undef DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID |
| 199 | |
| 200 | |
| 201 | // N-D methods' definition with single argument |
| 202 | #define DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(METHOD) \ |
| 203 | template <class Stat> \ |
| 204 | std::vector<Real> \ |
| 205 | GenericSequenceStatistics<Stat>::METHOD(Real x) const { \ |
| 206 | for (Size i=0; i<dimension_; i++) \ |
| 207 | results_[i] = stats_[i].METHOD(x); \ |
| 208 | return results_; \ |
| 209 | } |
| 210 | |
| 211 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianPercentile) |
| 212 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianPotentialUpside) |
| 213 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianValueAtRisk) |
| 214 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianExpectedShortfall) |
| 215 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianShortfall) |
| 216 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianAverageShortfall) |
| 217 | |
| 218 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(percentile) |
| 219 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(potentialUpside) |
| 220 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(valueAtRisk) |
| 221 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(expectedShortfall) |
| 222 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(regret) |
| 223 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(shortfall) |
| 224 | DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(averageShortfall) |
| 225 | #undef DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE |
| 226 | |
| 227 | |
| 228 | template <class Stat> |
| 229 | void GenericSequenceStatistics<Stat>::reset(Size dimension) { |
| 230 | // (re-)initialize |
| 231 | if (dimension > 0) { |
| 232 | if (dimension == dimension_) { |
| 233 | for (Size i=0; i<dimension_; ++i) |
| 234 | stats_[i].reset(); |
| 235 | } else { |
| 236 | dimension_ = dimension; |
| 237 | stats_ = std::vector<Stat>(dimension); |
| 238 | results_ = std::vector<Real>(dimension); |
| 239 | } |
| 240 | quadraticSum_ = Matrix(dimension_, dimension_, 0.0); |
| 241 | } else { |
| 242 | dimension_ = dimension; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | template <class Stat> |
| 247 | Matrix GenericSequenceStatistics<Stat>::covariance() const { |
| 248 | Real sampleWeight = weightSum(); |
| 249 | QL_REQUIRE(sampleWeight > 0.0, |
| 250 | "sampleWeight=0, unsufficient" ); |
| 251 | |
| 252 | Real sampleNumber = static_cast<Real>(samples()); |
| 253 | QL_REQUIRE(sampleNumber > 1.0, |
| 254 | "sample number <=1, unsufficient" ); |
| 255 | |
| 256 | std::vector<Real> m = mean(); |
| 257 | Real inv = 1.0/sampleWeight; |
| 258 | |
| 259 | Matrix result = inv*quadraticSum_; |
| 260 | result -= outerProduct(v1begin: m.begin(), v1end: m.end(), |
| 261 | v2begin: m.begin(), v2end: m.end()); |
| 262 | |
| 263 | result *= (sampleNumber/(sampleNumber-1.0)); |
| 264 | return result; |
| 265 | } |
| 266 | |
| 267 | |
| 268 | template <class Stat> |
| 269 | Matrix GenericSequenceStatistics<Stat>::correlation() const { |
| 270 | Matrix correlation = covariance(); |
| 271 | Array variances = correlation.diagonal(); |
| 272 | for (Size i=0; i<dimension_; i++){ |
| 273 | for (Size j=0; j<dimension_; j++){ |
| 274 | if (i==j) { |
| 275 | if (variances[i]==0.0) { |
| 276 | correlation[i][j] = 1.0; |
| 277 | } else { |
| 278 | correlation[i][j] *= |
| 279 | 1.0/std::sqrt(x: variances[i]*variances[j]); |
| 280 | } |
| 281 | } else { |
| 282 | if (variances[i]==0.0 && variances[j]==0) { |
| 283 | correlation[i][j] = 1.0; |
| 284 | } else if (variances[i]==0.0 || variances[j]==0.0) { |
| 285 | correlation[i][j] = 0.0; |
| 286 | } else { |
| 287 | correlation[i][j] *= |
| 288 | 1.0/std::sqrt(x: variances[i]*variances[j]); |
| 289 | } |
| 290 | } |
| 291 | } // j for |
| 292 | } // i for |
| 293 | |
| 294 | return correlation; |
| 295 | } |
| 296 | |
| 297 | } |
| 298 | |
| 299 | |
| 300 | #endif |
| 301 | |