sequencestatistics.hpp source code [quantlib/ql/math/statistics/sequencestatistics.hpp]

1	/ -- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -- /
2
3	/*
4	Copyright (C) 2003, 2004, 2005, 2006, 2007 Ferdinando Ametrano
5
6	This file is part of QuantLib, a free-software/open-source library
7	for financial quantitative analysts and developers - http://quantlib.org/
8
9	QuantLib is free software: you can redistribute it and/or modify it
10	under the terms of the QuantLib license. You should have received a
11	copy of the license along with this program; if not, please email
12	<quantlib-dev@lists.sf.net>. The license is also available online at
13	<http://quantlib.org/license.shtml>.
14
15	This program is distributed in the hope that it will be useful, but WITHOUT
16	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17	FOR A PARTICULAR PURPOSE. See the license for more details.
18	*/
19
20	/! \file sequencestatistics.hpp*
21	\brief Statistics tools for sequence (vector, list, array) samples
22	*/
23
24	#ifndef quantlib_sequence_statistics_hpp
25	#define quantlib_sequence_statistics_hpp
26
27	#include <ql/math/statistics/statistics.hpp>
28	#include <ql/math/statistics/incrementalstatistics.hpp>
29	#include <ql/math/matrix.hpp>
30
31	namespace QuantLib {
32
33	//! Statistics analysis of N-dimensional (sequence) data
34	/! It provides 1-dimensional statistics as discrepancy plus*
35	N-dimensional (sequence) statistics (e.g. mean,
36	variance, skewness, kurtosis, etc.) with one component for each
37	dimension of the sample space.
38
39	For most of the statistics this class relies on
40	the StatisticsType underlying class to provide 1-D methods that
41	will be iterated for all the components of the N-D data. These
42	lifted methods are the union of all the methods that might be
43	requested to the 1-D underlying StatisticsType class, with the
44	usual compile-time checks provided by the template approach.
45
46	\test the correctness of the returned values is tested by
47	checking them against numerical calculations.
48	*/
49	template <class StatisticsType>
50	class GenericSequenceStatistics {
51	public:
52	// typedefs
53	typedef StatisticsType statistics_type;
54	typedef std::vector<typename StatisticsType::value_type> value_type;
55	// constructor
56	GenericSequenceStatistics(Size dimension = `0`);
57	//! \name inspectors
58	//@{
59	Size size() const { return dimension_; }
60	//@}
61	//! \name covariance and correlation
62	//@{
63	//! returns the covariance Matrix
64	Matrix covariance() const;
65	//! returns the correlation Matrix
66	Matrix correlation() const;
67	//@}
68	//! \name 1-D inspectors lifted from underlying statistics class
69	//@{
70	Size samples() const;
71	Real weightSum() const;
72	//@}
73	//! \name N-D inspectors lifted from underlying statistics class
74	//@{
75	// void argument list
76	std::vector<Real> mean() const;
77	std::vector<Real> variance() const;
78	std::vector<Real> standardDeviation() const;
79	std::vector<Real> downsideVariance() const;
80	std::vector<Real> downsideDeviation() const;
81	std::vector<Real> semiVariance() const;
82	std::vector<Real> semiDeviation() const;
83	std::vector<Real> errorEstimate() const;
84	std::vector<Real> skewness() const;
85	std::vector<Real> kurtosis() const;
86	std::vector<Real> min() const;
87	std::vector<Real> max() const;
88
89	// single argument list
90	std::vector<Real> gaussianPercentile(Real y) const;
91	std::vector<Real> percentile(Real y) const;
92
93	std::vector<Real> gaussianPotentialUpside(Real percentile) const;
94	std::vector<Real> potentialUpside(Real percentile) const;
95
96	std::vector<Real> gaussianValueAtRisk(Real percentile) const;
97	std::vector<Real> valueAtRisk(Real percentile) const;
98
99	std::vector<Real> gaussianExpectedShortfall(Real percentile) const;
100	std::vector<Real> expectedShortfall(Real percentile) const;
101
102	std::vector<Real> regret(Real target) const;
103
104	std::vector<Real> gaussianShortfall(Real target) const;
105	std::vector<Real> shortfall(Real target) const;
106
107	std::vector<Real> gaussianAverageShortfall(Real target) const;
108	std::vector<Real> averageShortfall(Real target) const;
109
110	//@}
111	//! \name Modifiers
112	//@{
113	void reset(Size dimension = `0`);
114	template <class Sequence>
115	void add(const Sequence& sample,
116	Real weight = `1.0`) {
117	add(sample.begin(), sample.end(), weight);
118	}
119	template <class Iterator>
120	void add(Iterator begin,
121	Iterator end,
122	Real weight = `1.0`) {
123	if (dimension_ == `0`) {
124	// stat wasn't initialized yet
125	QL_REQUIRE(end>begin, "sample error: end<=begin");
126	Size dimension = std::distance(begin, end);
127	reset(dimension);
128	}
129
130	QL_REQUIRE(std::distance(begin, end) == Integer(dimension_),
131	"sample size mismatch: " << dimension_ <<
132	" required, " << std::distance(begin, end) <<
133	" provided");
134
135	quadraticSum_ += weight * outerProduct(begin, end,
136	begin, end);
137
138	for (Size i=`0`; i<dimension_; ++begin, ++i)
139	stats_[i].add(*begin, weight);
140
141	}
142	//@}
143	protected:
144	Size dimension_ = `0`;
145	std::vector<statistics_type> stats_;
146	mutable std::vector<Real> results_;
147	Matrix quadraticSum_;
148	};
149
150	//! default multi-dimensional statistics tool
151	/! \test the correctness of the returned values is tested by*
152	checking them against numerical calculations.
153	*/
154	typedef GenericSequenceStatistics<Statistics> SequenceStatistics;
155	typedef GenericSequenceStatistics<IncrementalStatistics> SequenceStatisticsInc;
156
157	// inline definitions
158
159	template <class Stat>
160	inline GenericSequenceStatistics<Stat>::GenericSequenceStatistics(Size dimension) {
161	reset(dimension);
162	}
163
164	template <class Stat>
165	inline Size GenericSequenceStatistics<Stat>::samples() const {
166	return (stats_.empty()) ? `0` : stats_[`0`].samples();
167	}
168
169	template <class Stat>
170	inline Real GenericSequenceStatistics<Stat>::weightSum() const {
171	return (stats_.empty()) ? `0.0` : stats_[`0`].weightSum();
172	}
173
174
175	// macros for the implementation of the lifted methods
176
177	// N-D methods' definition with void argument list
178	#define DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(METHOD) \
179	template <class Stat> \
180	std::vector<Real> \
181	GenericSequenceStatistics<Stat>::METHOD() const { \
182	for (Size i=0; i<dimension_; i++) \
183	results_[i] = stats_[i].METHOD(); \
184	return results_; \
185	}
186	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(mean)
187	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(variance)
188	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(standardDeviation)
189	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(downsideVariance)
190	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(downsideDeviation)
191	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(semiVariance)
192	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(semiDeviation)
193	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(errorEstimate)
194	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(skewness)
195	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(kurtosis)
196	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(min)
197	DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID(max)
198	#undef DEFINE_SEQUENCE_STAT_CONST_METHOD_VOID
199
200
201	// N-D methods' definition with single argument
202	#define DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(METHOD) \
203	template <class Stat> \
204	std::vector<Real> \
205	GenericSequenceStatistics<Stat>::METHOD(Real x) const { \
206	for (Size i=0; i<dimension_; i++) \
207	results_[i] = stats_[i].METHOD(x); \
208	return results_; \
209	}
210
211	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianPercentile)
212	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianPotentialUpside)
213	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianValueAtRisk)
214	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianExpectedShortfall)
215	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianShortfall)
216	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(gaussianAverageShortfall)
217
218	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(percentile)
219	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(potentialUpside)
220	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(valueAtRisk)
221	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(expectedShortfall)
222	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(regret)
223	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(shortfall)
224	DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE(averageShortfall)
225	#undef DEFINE_SEQUENCE_STAT_CONST_METHOD_DOUBLE
226
227
228	template <class Stat>
229	void GenericSequenceStatistics<Stat>::reset(Size dimension) {
230	// (re-)initialize
231	if (dimension > `0`) {
232	if (dimension == dimension_) {
233	for (Size i=`0`; i<dimension_; ++i)
234	stats_[i].reset();
235	} else {
236	dimension_ = dimension;
237	stats_ = std::vector<Stat>(dimension);
238	results_ = std::vector<Real>(dimension);
239	}
240	quadraticSum_ = Matrix (dimension_, dimension_, `0.0`);
241	} else {
242	dimension_ = dimension;
243	}
244	}
245
246	template <class Stat>
247	Matrix GenericSequenceStatistics<Stat>::covariance() const {
248	Real sampleWeight = weightSum();
249	QL_REQUIRE(sampleWeight > `0.0`,
250	"sampleWeight=0, unsufficient");
251
252	Real sampleNumber = static_cast<Real>(samples());
253	QL_REQUIRE(sampleNumber > `1.0`,
254	"sample number <=1, unsufficient");
255
256	std::vector<Real> m = mean();
257	Real inv = `1.0`/sampleWeight;
258
259	Matrix result = inv *quadraticSum_;
260	result -= outerProduct(v1begin: m.begin(), v1end: m.end(),
261	v2begin: m.begin(), v2end: m.end());
262
263	result *= (sampleNumber/(sampleNumber-`1.0`));
264	return result;
265	}
266
267
268	template <class Stat>
269	Matrix GenericSequenceStatistics<Stat>::correlation() const {
270	Matrix correlation = covariance();
271	Array variances = correlation.diagonal();
272	for (Size i=`0`; i<dimension_; i++){
273	for (Size j=`0`; j<dimension_; j++){
274	if (i==j) {
275	if (variances [i]==`0.0`) {
276	correlation [i][j] = `1.0`;
277	} else {
278	correlation [i][j] *=
279	`1.0`/std::sqrt(x: variances [i]*variances [j]);
280	}
281	} else {
282	if (variances [i]==`0.0` && variances [j]==`0`) {
283	correlation [i][j] = `1.0`;
284	} else if (variances [i]==`0.0` \|\| variances [j]==`0.0`) {
285	correlation [i][j] = `0.0`;
286	} else {
287	correlation [i][j] *=
288	`1.0`/std::sqrt(x: variances [i]*variances [j]);
289	}
290	}
291	} // j for
292	} // i for
293
294	return correlation;
295	}
296
297	}
298
299
300	#endif
301

source code of quantlib/ql/math/statistics/sequencestatistics.hpp