1/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3/*
4 Copyright (C) 2003, 2004, 2005, 2006, 2007 Ferdinando Ametrano
5
6 This file is part of QuantLib, a free-software/open-source library
7 for financial quantitative analysts and developers - http://quantlib.org/
8
9 QuantLib is free software: you can redistribute it and/or modify it
10 under the terms of the QuantLib license. You should have received a
11 copy of the license along with this program; if not, please email
12 <quantlib-dev@lists.sf.net>. The license is also available online at
13 <http://quantlib.org/license.shtml>.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the license for more details.
18*/
19
20/*! \file sequencestatistics.hpp
21 \brief Statistics tools for sequence (vector, list, array) samples
22*/
23
24#ifndef quantlib_sequence_statistics_hpp
25#define quantlib_sequence_statistics_hpp
26
27#include <ql/math/statistics/statistics.hpp>
28#include <ql/math/statistics/incrementalstatistics.hpp>
29#include <ql/math/matrix.hpp>
30
31namespace QuantLib {
32
33 //! Statistics analysis of N-dimensional (sequence) data
34 /*! It provides 1-dimensional statistics as discrepancy plus
35 N-dimensional (sequence) statistics (e.g. mean,
36 variance, skewness, kurtosis, etc.) with one component for each
37 dimension of the sample space.
38
39 For most of the statistics this class relies on
40 the StatisticsType underlying class to provide 1-D methods that
41 will be iterated for all the components of the N-D data. These
42 lifted methods are the union of all the methods that might be
43 requested to the 1-D underlying StatisticsType class, with the
44 usual compile-time checks provided by the template approach.
45
46 \test the correctness of the returned values is tested by
47 checking them against numerical calculations.
48 */
49 template <class StatisticsType>
50 class GenericSequenceStatistics {
51 public:
52 // typedefs
53 typedef StatisticsType statistics_type;
54 typedef std::vector<typename StatisticsType::value_type> value_type;
55 // constructor
56 GenericSequenceStatistics(Size dimension = 0);
57 //! \name inspectors
58 //@{
59 Size size() const { return dimension_; }
60 //@}
61 //! \name covariance and correlation
62 //@{
63 //! returns the covariance Matrix
64 Matrix covariance() const;
65 //! returns the correlation Matrix
66 Matrix correlation() const;
67 //@}
68 //! \name 1-D inspectors lifted from underlying statistics class
69 //@{
70 Size samples() const;
71 Real weightSum() const;
72 //@}
73 //! \name N-D inspectors lifted from underlying statistics class
74 //@{
75 // void argument list
76 std::vector<Real> mean() const;
77 std::vector<Real> variance() const;
78 std::vector<Real> standardDeviation() const;
79 std::vector<Real> downsideVariance() const;
80 std::vector<Real> downsideDeviation() const;
81 std::vector<Real> semiVariance() const;
82 std::vector<Real> semiDeviation() const;
83 std::vector<Real> errorEstimate() const;
84 std::vector<Real> skewness() const;
85 std::vector<Real> kurtosis() const;
86 std::vector<Real> min() const;
87 std::vector<Real> max() const;
88
89 // single argument list
90 std::vector<Real> gaussianPercentile(Real y) const;
91 std::vector<Real> percentile(Real y) const;
92
93 std::vector<Real> gaussianPotentialUpside(Real percentile) const;
94 std::vector<Real> potentialUpside(Real percentile) const;
95
96 std::vector<Real> gaussianValueAtRisk(Real percentile) const;
97 std::vector<Real> valueAtRisk(Real percentile) const;
98
99 std::vector<Real> gaussianExpectedShortfall(Real percentile) const;
100 std::vector<Real> expectedShortfall(Real percentile) const;
101
102 std::vector<Real> regret(Real target) const;
103
104 std::vector<Real> gaussianShortfall(Real target) const;
105 std::vector<Real> shortfall(Real target) const;
106
107 std::vector<Real> gaussianAverageShortfall(Real target) const;
108 std::vector<Real> averageShortfall(Real target) const;
109
110 //@}
111 //! \name Modifiers
112 //@{
113 void reset(Size dimension = 0);
114 template <class Sequence>
115 void add(const Sequence& sample,
116 Real weight = 1.0) {
117 add(sample.begin(), sample.end(), weight);
118 }
119 template <class Iterator>
120 void add(Iterator begin,
121 Iterator end,
122 Real weight = 1.0) {
123 if (dimension_ == 0) {
124 // stat wasn't initialized yet
125 QL_REQUIRE(end>begin, "sample error: end<=begin");
126 Size dimension = std::distance(begin, end);
127 reset(dimension);
128 }
129
130 QL_REQUIRE(std::distance(begin, end) == Integer(dimension_),
131 "sample size mismatch: " << dimension_ <<
132 " required, " << std::distance(begin, end) <<
133 " provided");
134
135 quadraticSum_ += weight * outerProduct(begin, end,
136 begin, end);
137
138 for (Size i=0; i<dimension_; ++begin, ++i)
139 stats_[i].add(*begin, weight);
140
141 }
142 //@}
143 protected:
144 Size dimension_ = 0;
145 std::vector<statistics_type> stats_;
146 mutable std::vector<Real> results_;
147 Matrix quadraticSum_;
148 };
149
150 //! default multi-dimensional statistics tool
151 /*! \test the correctness of the returned values is tested by
152 checking them against numerical calculations.
153 */
154 typedef GenericSequenceStatistics<Statistics> SequenceStatistics;
155 typedef GenericSequenceStatistics<IncrementalStatistics> SequenceStatisticsInc;
156
157 // inline definitions
158
159 template <class Stat>
160 inline GenericSequenceStatistics<Stat>::GenericSequenceStatistics(Size dimension) {
161 reset(dimension);
162 }
163
164 template <class Stat>
165 inline Size GenericSequenceStatistics<Stat>::samples() const {
166 return (stats_.empty()) ? 0 : stats_[0].samples();
167 }
168
169 template <class Stat>
170 inline Real GenericSequenceStatistics<Stat>::weightSum() const {
171 return (stats_.empty()) ? 0.0 : stats_[0].weightSum();
172 }
173
174
175 // macros for the implementation of the lifted methods
176
177 // N-D methods' definition with void argument list
178 #define DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(METHOD) \
179 template <class Stat> \
180 std::vector<Real> \
181 GenericSequenceStatistics<Stat>::METHOD() const { \
182 for (Size i=0; i<dimension_; i++) \
183 results_[i] = stats_[i].METHOD(); \
184 return results_; \
185 }
186 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(mean)
187 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(variance)
188 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(standardDeviation)
189 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(downsideVariance)
190 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(downsideDeviation)
191 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(semiVariance)
192 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(semiDeviation)
193 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(errorEstimate)
194 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(skewness)
195 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(kurtosis)
196 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(min)
197 DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(max)
198 #undef DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID
199
200
201 // N-D methods' definition with single argument
202 #define DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(METHOD) \
203 template <class Stat> \
204 std::vector<Real> \
205 GenericSequenceStatistics<Stat>::METHOD(Real x) const { \
206 for (Size i=0; i<dimension_; i++) \
207 results_[i] = stats_[i].METHOD(x); \
208 return results_; \
209 }
210
211 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianPercentile)
212 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianPotentialUpside)
213 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianValueAtRisk)
214 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianExpectedShortfall)
215 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianShortfall)
216 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianAverageShortfall)
217
218 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(percentile)
219 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(potentialUpside)
220 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(valueAtRisk)
221 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(expectedShortfall)
222 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(regret)
223 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(shortfall)
224 DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(averageShortfall)
225 #undef DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE
226
227
228 template <class Stat>
229 void GenericSequenceStatistics<Stat>::reset(Size dimension) {
230 // (re-)initialize
231 if (dimension > 0) {
232 if (dimension == dimension_) {
233 for (Size i=0; i<dimension_; ++i)
234 stats_[i].reset();
235 } else {
236 dimension_ = dimension;
237 stats_ = std::vector<Stat>(dimension);
238 results_ = std::vector<Real>(dimension);
239 }
240 quadraticSum_ = Matrix(dimension_, dimension_, 0.0);
241 } else {
242 dimension_ = dimension;
243 }
244 }
245
246 template <class Stat>
247 Matrix GenericSequenceStatistics<Stat>::covariance() const {
248 Real sampleWeight = weightSum();
249 QL_REQUIRE(sampleWeight > 0.0,
250 "sampleWeight=0, unsufficient");
251
252 Real sampleNumber = static_cast<Real>(samples());
253 QL_REQUIRE(sampleNumber > 1.0,
254 "sample number <=1, unsufficient");
255
256 std::vector<Real> m = mean();
257 Real inv = 1.0/sampleWeight;
258
259 Matrix result = inv*quadraticSum_;
260 result -= outerProduct(v1begin: m.begin(), v1end: m.end(),
261 v2begin: m.begin(), v2end: m.end());
262
263 result *= (sampleNumber/(sampleNumber-1.0));
264 return result;
265 }
266
267
268 template <class Stat>
269 Matrix GenericSequenceStatistics<Stat>::correlation() const {
270 Matrix correlation = covariance();
271 Array variances = correlation.diagonal();
272 for (Size i=0; i<dimension_; i++){
273 for (Size j=0; j<dimension_; j++){
274 if (i==j) {
275 if (variances[i]==0.0) {
276 correlation[i][j] = 1.0;
277 } else {
278 correlation[i][j] *=
279 1.0/std::sqrt(x: variances[i]*variances[j]);
280 }
281 } else {
282 if (variances[i]==0.0 && variances[j]==0) {
283 correlation[i][j] = 1.0;
284 } else if (variances[i]==0.0 || variances[j]==0.0) {
285 correlation[i][j] = 0.0;
286 } else {
287 correlation[i][j] *=
288 1.0/std::sqrt(x: variances[i]*variances[j]);
289 }
290 }
291 } // j for
292 } // i for
293
294 return correlation;
295 }
296
297}
298
299
300#endif
301

source code of quantlib/ql/math/statistics/sequencestatistics.hpp