Skip to content

Commit ead9d80

Browse files
matthiasrichterMohammadAlTurany
authored andcommitted
Adding a simple data generator for test programs
The data generator implements an interface to standard probability distributions. The configurable range is devided into bins, random values are sorted in those bins. The probability model implementation must provide a function to calculate the probability for each bin. Corresponding specializations for std normal, poisson and geometric distributions have been added in the initial version. new file: Utilities/DataCompression/test/DataGenerator.h
1 parent c9fc0e6 commit ead9d80

1 file changed

Lines changed: 262 additions & 0 deletions

File tree

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
//-*- Mode: C++ -*-
2+
3+
#ifndef DATAGENERATOR_H
4+
#define DATAGENERATOR_H
5+
//****************************************************************************
6+
//* This file is free software: you can redistribute it and/or modify *
7+
//* it under the terms of the GNU General Public License as published by *
8+
//* the Free Software Foundation, either version 3 of the License, or *
9+
//* (at your option) any later version. *
10+
//* *
11+
//* Primary Authors: Matthias Richter <richterm@scieq.net> *
12+
//* *
13+
//* The authors make no claims about the suitability of this software for *
14+
//* any purpose. It is provided "as is" without express or implied warranty. *
15+
//****************************************************************************
16+
17+
// @file DataGenerator.h
18+
// @author Matthias Richter
19+
// @since 2016-12-06
20+
// @brief A simple data generator
21+
22+
#include <stdexcept> // exeptions, range_error
23+
#include <utility> // std::forward
24+
#include <random> // random distribution
25+
#include <cmath> // exp
26+
//#include <iostream> // lets see if needed, or keep class fre from output
27+
//#include <iomanip> // lets see if needed, or keep class fre from output
28+
29+
namespace AliceO2 {
30+
namespace Test {
31+
32+
/**
33+
* @class DataGenerator
34+
* @brief A simple data generator
35+
*
36+
* Generate random numbers according to the distribution model ModelT, which
37+
* also has to provide the formula. Some distribution models like e.g. normal
38+
* distribution work on float type values. The random numbers are ordered in
39+
* bins between min and max with the configured step width.
40+
*
41+
* The underlying distribution model must provide an analytic function to
42+
* calculate the probability for every bin.
43+
*
44+
* TODO:
45+
* - float numbers can not serve as tempate parameters, but maybe there
46+
* is another way to move some of the computation to compile time
47+
* - number of bins: define policy for the last bin,
48+
* what to do if (max-min)/step is not integral?
49+
* - consider returning the bin number instead of random number
50+
* - configurable seed
51+
* - error policy
52+
*/
53+
template<typename ValueT
54+
, typename ModelT>
55+
class DataGenerator {
56+
public:
57+
typedef int size_type;
58+
typedef ValueT result_type;
59+
typedef DataGenerator self_type;
60+
61+
template<typename... Args>
62+
DataGenerator(result_type _min,
63+
result_type _max,
64+
result_type _step,
65+
Args&&... args)
66+
: mGenerator(), min(_min), max(_max), step(_step), nbins((max-min)/step), mModel(std::forward<Args>(args)...) {}
67+
~DataGenerator() {}
68+
DataGenerator(const DataGenerator&) = default;
69+
DataGenerator& operator=(const DataGenerator&) = default;
70+
71+
const result_type min;
72+
const result_type max;
73+
const result_type step;
74+
const size_type nbins;
75+
76+
typedef ValueT value_type;
77+
typedef std::default_random_engine random_engine;
78+
79+
/// get next random value
80+
// TODO: can it be const?
81+
value_type operator()() {
82+
value_type v;
83+
int trials = 0;
84+
while ((v = mModel(mGenerator)) < min || v >= max) {
85+
if (trials++ > 1000) {
86+
// this is a protection, just picked a reasonable threshold for number of trials
87+
throw std::range_error("random value outside configured range for too many trials");
88+
}
89+
}
90+
int bin = (v - min)/step;
91+
return min + bin * step;
92+
}
93+
94+
/// get next random value
95+
value_type getRandom() const {return (*this)();}
96+
97+
/// get minimum value
98+
value_type getMin() const {return ModelT::min;}
99+
100+
/// get maximum value
101+
value_type getMax() const {return ModelT::max;}
102+
103+
/// get theoretical probability of a value
104+
double getProbability(value_type v) const {
105+
return mModel.getProbability(v);
106+
}
107+
108+
typedef std::iterator<std::forward_iterator_tag, result_type> _iterator_base;
109+
110+
/**
111+
* @class iterator a forward iterator to access the bins
112+
*
113+
* TODO:
114+
* - check overhead by the computations in the deref operator
115+
*/
116+
template<class ContainerT>
117+
class iterator : public _iterator_base {
118+
public:
119+
iterator(const ContainerT& parent, size_type count = 0) : mParent(parent), mCount(count) {}
120+
~iterator() {}
121+
122+
typedef iterator self_type;
123+
typedef typename _iterator_base::value_type value_type;
124+
typedef typename _iterator_base::reference reference;
125+
126+
// prefix increment
127+
self_type& operator++() {
128+
if (mCount < mParent.nbins) mCount++;
129+
return *this;
130+
}
131+
132+
// postfix increment
133+
self_type operator++(int /*unused*/) {self_type copy(*this); ++*this; return copy;}
134+
135+
// addition
136+
self_type operator+(size_type n) const {
137+
self_type copy(*this);
138+
if (copy.mCount + n < mParent.nbins) {
139+
copy.mCount += n;
140+
} else {
141+
copy.mCount = mParent.nbins;
142+
}
143+
return copy;
144+
}
145+
146+
value_type operator*() {return mParent.min + (mCount +.5) * mParent.step;}
147+
//pointer operator->() const {return &mValue;}
148+
//reference operator[](size_type n) const;
149+
150+
bool operator==(const self_type& other) {
151+
return mCount == other.mCount;
152+
}
153+
bool operator!=(const self_type& other) {
154+
return not (*this == other);
155+
}
156+
157+
private:
158+
const ContainerT& mParent;
159+
size_type mCount;
160+
};
161+
162+
/// return forward iterator to begin of bins
163+
iterator<self_type> begin() {
164+
return iterator<self_type>(*this);
165+
}
166+
167+
/// return forward iterator to the end of bins
168+
iterator<self_type> end() {
169+
return iterator<self_type>(*this, nbins);
170+
}
171+
172+
private:
173+
random_engine mGenerator;
174+
ModelT mModel;
175+
};
176+
177+
/**
178+
* @class normal_distribution
179+
* @brief specialization of std::normal_distribution which implements
180+
* also the analytic formula.
181+
*/
182+
template <class RealType = double
183+
, class _BASE = std::normal_distribution<RealType>
184+
>
185+
class normal_distribution : public _BASE {
186+
public:
187+
typedef typename _BASE::result_type result_type;
188+
189+
normal_distribution(result_type _mean,
190+
result_type _stddev
191+
) : _BASE(_mean, _stddev), mean(_mean), stddev(_stddev) {}
192+
193+
const double sqrt2pi = 2.5066283;
194+
const result_type mean;
195+
const result_type stddev;
196+
197+
/// get theoretical probability of a value
198+
// if value_type is an integral type we want to have the probability
199+
// that the result value is in the range [v, v+1) whereas the step
200+
// can be something else than 1
201+
// also the values outside the specified range should be excluded
202+
// and the probability for intervals in the range has to be scaled
203+
template<typename value_type>
204+
double getProbability(value_type v) const {
205+
return (exp(-(v-mean)*(v-mean)/(2*stddev*stddev)))/(stddev * sqrt2pi);
206+
}
207+
};
208+
209+
/**
210+
* @class poisson_distribution
211+
* @brief specialization of std::poisson_distribution which implements
212+
* also the analytic formula.
213+
*/
214+
template <class IntType = int
215+
, class _BASE = std::poisson_distribution<IntType>
216+
>
217+
class poisson_distribution : public _BASE {
218+
public:
219+
typedef typename _BASE::result_type result_type;
220+
221+
poisson_distribution(result_type _mean) : _BASE(_mean), mean(_mean) {}
222+
~poisson_distribution() {};
223+
224+
const result_type mean;
225+
226+
int factorial(unsigned int n) const {
227+
return (n <= 1)? 1 : factorial(n-1) * n;
228+
}
229+
230+
/// get theoretical probability of a value
231+
template<typename value_type>
232+
double getProbability(value_type v) const {
233+
if (v<0) return 0.;
234+
return pow(mean, v) * exp(-mean) / factorial(v);
235+
}
236+
};
237+
238+
/**
239+
* @class geometric_distribution
240+
* @brief specialization of std::geometric_distribution which implements
241+
* also the analytic formula.
242+
*/
243+
template <class IntType = int
244+
, class _BASE = std::geometric_distribution<IntType>
245+
>
246+
class geometric_distribution : public _BASE {
247+
public:
248+
geometric_distribution(float _parameter) : _BASE(_parameter), parameter(_parameter) {}
249+
250+
const float parameter;
251+
252+
/// get theoretical probability of a value
253+
template<typename value_type>
254+
double getProbability(value_type v) const {
255+
if (v<0) return 0.;
256+
return parameter * pow((1-parameter), v);
257+
}
258+
};
259+
260+
}; // namespace test
261+
}; // namespace AliceO2
262+
#endif

0 commit comments

Comments
 (0)