Skip to content

Commit e2bf35f

Browse files
matthiasrichtersawenzel
authored andcommitted
Fix #489: correctly handling property of poisson distribution
- poisson distribution has two bins with equal probabilities at the maximum, checking now if the most abundant value is in one of those bins - bugfix: consistently calculate value in bin center While the iterator was adding half of the step size in order to return center of the bin, the random function of the DataGenerator was missing this 0.5 term - also replacing 'typedef's by 'using' - fixing compilation warnings (mostly comparison of signed and unsigned)
1 parent d7ac8a6 commit e2bf35f

8 files changed

Lines changed: 64 additions & 48 deletions

File tree

Utilities/DataCompression/include/DataCompression/DataDeflater.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ class DataDeflater {
8686
int reset() {
8787
mCurrent = 0;
8888
mFilledBits = 0;
89+
90+
return 0;
8991
}
9092

9193
/**
@@ -169,7 +171,7 @@ class DataDeflater {
169171
/// current target word
170172
target_type mCurrent;
171173
/// current bit position
172-
int mFilledBits;
174+
unsigned mFilledBits;
173175
/// codec instance
174176
Codec mCodec;
175177
};

Utilities/DataCompression/include/DataCompression/dc_primitives.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,13 @@ class ExampleAlphabet {
139139
static bool isValid(value_type v);
140140

141141
/// get index of value
142-
static int getIndex(value_type symbol);
142+
static unsigned getIndex(value_type symbol);
143143

144144
/// get symbol from index
145-
static value_type getSymbol(int index);
145+
static value_type getSymbol(unsigned index);
146+
147+
/// get the range of indices aka number of indices
148+
constexpr unsigned getIndexRange();
146149

147150
typedef std::iterator<std::forward_iterator_tag, T> _iterator_base;
148151

@@ -215,18 +218,24 @@ class ContiguousAlphabet {
215218
///
216219
/// Each alphabet has to provide a one to one mapping of symbols to
217220
/// index values used for internal storage
218-
static int getIndex(value_type symbol) {
221+
/// For performance reasons, there is no range check
222+
static unsigned getIndex(value_type symbol) {
219223
int index = symbol;
220224
if (_min < 0) index += -_min;
221225
else if (_min > 0) index -= _min;
222226
return index;
223227
}
224228

225229
/// get symbol from index
226-
static value_type getSymbol(int index) {
230+
static value_type getSymbol(unsigned index) {
227231
return _min + index;
228232
}
229233

234+
/// get the range of indices aka number of indices
235+
constexpr unsigned getIndexRange() {
236+
return _max - _min;
237+
}
238+
230239
/// get the name of the alphabet
231240
///
232241
/// name is part of the type definition, defined as a boost mpl string

Utilities/DataCompression/test/DataGenerator.h

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
// granted to it by virtue of its status as an Intergovernmental Organization
99
// or submit itself to any jurisdiction.
1010

11-
//-*- Mode: C++ -*-
12-
1311
#ifndef DATAGENERATOR_H
1412
#define DATAGENERATOR_H
1513
//****************************************************************************
@@ -64,16 +62,17 @@ template<typename ValueT
6462
, typename ModelT>
6563
class DataGenerator {
6664
public:
67-
typedef int size_type;
68-
typedef ValueT result_type;
69-
typedef DataGenerator self_type;
65+
using size_type = std::size_t;
66+
using value_type = ValueT;
67+
using result_type = value_type;
68+
using self_type = DataGenerator;
7069

7170
template<typename... Args>
7271
DataGenerator(result_type _min,
7372
result_type _max,
7473
result_type _step,
7574
Args&&... args)
76-
: mGenerator(), min(_min), max(_max), step(_step), nbins((max-min)/step), mModel(std::forward<Args>(args)...) {}
75+
: min(_min), max(_max), step(_step), nbins((max-min)/step), mGenerator(), mModel(std::forward<Args>(args)...) {}
7776
~DataGenerator() {}
7877
DataGenerator(const DataGenerator&) = default;
7978
DataGenerator& operator=(const DataGenerator&) = default;
@@ -83,8 +82,7 @@ class DataGenerator {
8382
const result_type step;
8483
const size_type nbins;
8584

86-
typedef ValueT value_type;
87-
typedef std::default_random_engine random_engine;
85+
using random_engine = std::default_random_engine;
8886

8987
/// get next random value
9088
// TODO: can it be const?
@@ -98,7 +96,7 @@ class DataGenerator {
9896
}
9997
}
10098
int bin = (v - min)/step;
101-
return min + bin * step;
99+
return min + bin * step + 0.5 * step;
102100
}
103101

104102
/// get next random value
@@ -115,7 +113,7 @@ class DataGenerator {
115113
return mModel.getProbability(v);
116114
}
117115

118-
typedef std::iterator<std::forward_iterator_tag, result_type> _iterator_base;
116+
using _iterator_base = std::iterator<std::forward_iterator_tag, result_type>;
119117

120118
/**
121119
* @class iterator a forward iterator to access the bins
@@ -129,9 +127,9 @@ class DataGenerator {
129127
iterator(const ContainerT& parent, size_type count = 0) : mParent(parent), mCount(count) {}
130128
~iterator() {}
131129

132-
typedef iterator self_type;
133-
typedef typename _iterator_base::value_type value_type;
134-
typedef typename _iterator_base::reference reference;
130+
using self_type = iterator;
131+
using value_type = typename _iterator_base::value_type;
132+
using reference = typename _iterator_base::reference;
135133

136134
// prefix increment
137135
self_type& operator++() {
@@ -153,7 +151,7 @@ class DataGenerator {
153151
return copy;
154152
}
155153

156-
value_type operator*() {return mParent.min + (mCount +.5) * mParent.step;}
154+
value_type operator*() {return mParent.min + mCount * mParent.step + .5 * mParent.step;}
157155
//pointer operator->() const {return &mValue;}
158156
//reference operator[](size_type n) const;
159157

@@ -194,7 +192,7 @@ template <class RealType = double
194192
>
195193
class normal_distribution : public _BASE {
196194
public:
197-
typedef typename _BASE::result_type result_type;
195+
using result_type = typename _BASE::result_type;
198196

199197
normal_distribution(result_type _mean,
200198
result_type _stddev
@@ -226,7 +224,7 @@ template <class IntType = int
226224
>
227225
class poisson_distribution : public _BASE {
228226
public:
229-
typedef typename _BASE::result_type result_type;
227+
using result_type = typename _BASE::result_type;
230228

231229
poisson_distribution(result_type _mean) : _BASE(_mean), mean(_mean) {}
232230
~poisson_distribution() {};

Utilities/DataCompression/test/Fifo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ template<class T
5858
class Fifo : protected _BASE {
5959
public:
6060
Fifo() : mMutex(), mFillStatus(), mStop(false) {}
61-
typedef T value_type;
61+
using value_type = T;
6262

6363
/**
6464
* Push value to the FIFO

Utilities/DataCompression/test/test_DataDeflater.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ namespace o2dc = o2::data_compression;
4646
template<typename DataContainerT, typename DeflatedDataT>
4747
bool compare(const DataContainerT& container, std::size_t bitwidth, const DeflatedDataT& targetBuffer)
4848
{
49-
int wordcount = 0;
49+
unsigned wordcount = 0;
5050
auto bufferWord = targetBuffer[wordcount];
5151
using target_type = typename DeflatedDataT::value_type;
5252
auto targetWidth = 8 * sizeof(target_type);
@@ -82,6 +82,8 @@ bool compare(const DataContainerT& container, std::size_t bitwidth, const Deflat
8282
length -= comparing;
8383
}
8484
}
85+
86+
return true;
8587
}
8688

8789
BOOST_AUTO_TEST_CASE(test_DataDeflaterRaw)

Utilities/DataCompression/test/test_DataGenerator.cxx

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,28 +41,27 @@ bool testWithDistribution(Args&&... args)
4141
o2::test::DataGenerator<value_type, DistributionType> dg(std::forward<Args>(args)...);
4242

4343
std::vector<int> throws(dg.nbins);
44-
const int nRolls = 1000000;
44+
const unsigned nRolls = 1000000;
4545

46-
for (int n = 0; n < nRolls; n++) {
46+
for (unsigned n = 0; n < nRolls; n++) {
4747
value_type v = dg();
48-
int bin = v/dg.step - dg.min;
48+
unsigned bin = v/dg.step - dg.min;
4949
BOOST_REQUIRE(bin < dg.nbins);
5050
throws[bin]++;
5151
}
5252

53-
value_type mostAbundantValue = dg.min;
53+
int mostAbundantValueBin = 0;
5454
int mostAbundantValueCount = 0;
55-
auto highestProbability = dg.getProbability(0);
56-
highestProbability = 0.;
57-
value_type mostProbableValue = dg.min;
55+
auto highestProbability = dg.getProbability(dg.min);
56+
highestProbability = 0;
5857
for (auto i : dg) {
5958
int bin = i/dg.step - dg.min;
59+
BOOST_REQUIRE(bin >= 0);
6060
if (mostAbundantValueCount < throws[bin]) {
61-
mostAbundantValue = i;
61+
mostAbundantValueBin = bin;
6262
mostAbundantValueCount = throws[bin];
6363
}
6464
if (highestProbability < dg.getProbability(i)) {
65-
mostProbableValue = i;
6665
highestProbability = dg.getProbability(i);
6766
}
6867
std::cout << std::setw(4) << std::right << i << ": "
@@ -71,7 +70,14 @@ bool testWithDistribution(Args&&... args)
7170
<< throws[bin]
7271
<< std::endl;
7372
}
74-
BOOST_CHECK(mostAbundantValue == mostProbableValue);
73+
std::vector<int> mostProbableValueBins;
74+
for (auto i : dg) {
75+
int bin = i/dg.step - dg.min;
76+
if (dg.getProbability(i) >= highestProbability) {
77+
mostProbableValueBins.push_back(bin);
78+
}
79+
}
80+
BOOST_CHECK(std::find(mostProbableValueBins.begin(), mostProbableValueBins.end(), mostAbundantValueBin) != mostProbableValueBins.end());
7581

7682
return true;
7783
}

Utilities/DataCompression/test/test_HuffmanCodec.cxx

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ BOOST_AUTO_TEST_CASE(test_HuffmanCodec)
7777
// in the range [-7, 10] including the upper bound
7878
// the first definition is a data type, then an object of this type is
7979
// defined
80-
typedef o2::test::normal_distribution<double> TestDistribution_t;
81-
typedef o2::test::DataGenerator<int16_t, TestDistribution_t> DataGenerator_t;
82-
DataGenerator_t dg(-7.5, 10.5, 1., 0., 1.);
83-
typedef ContiguousAlphabet<DataGenerator_t::value_type, -7, 10> SimpleRangeAlphabet_t;
80+
using TestDistribution_t = o2::test::normal_distribution<double>;
81+
using DataGenerator_t = o2::test::DataGenerator<int16_t, TestDistribution_t>;
82+
DataGenerator_t dg(-7, 10, 1, 0., 1.);
83+
using SimpleRangeAlphabet_t = ContiguousAlphabet<DataGenerator_t::value_type, -7, 10>;
8484
SimpleRangeAlphabet_t alphabet;
8585

8686
////////////////////////////////////////////////////////////////////////////
@@ -90,11 +90,11 @@ BOOST_AUTO_TEST_CASE(test_HuffmanCodec)
9090
// the node type is defined to be HuffmanNode specialized to bitset<16>
9191
// third template parameter determines whether code has to be decoded
9292
// MSB to LSB (true) or LSB to MSB (false)
93-
typedef o2::HuffmanModel<
93+
using HuffmanModel_t = o2::HuffmanModel<
9494
ProbabilityModel<SimpleRangeAlphabet_t>
9595
, o2::HuffmanNode<std::bitset<32> >
9696
, true
97-
> HuffmanModel_t;
97+
>;
9898
HuffmanModel_t huffmanmodel;
9999

100100
std::cout << std::endl << "Huffman probability model after initialization: " << std::endl;
@@ -104,7 +104,6 @@ BOOST_AUTO_TEST_CASE(test_HuffmanCodec)
104104
}
105105

106106
// add probabilities from data generator as weights for every symbol
107-
int x = 0;
108107
for (auto s : alphabet) {
109108
huffmanmodel.addWeight(s, dg.getProbability(s));
110109
}
@@ -123,7 +122,7 @@ BOOST_AUTO_TEST_CASE(test_HuffmanCodec)
123122
std::cout << std::endl << "Generating binary tree and Huffman codes" << std::endl;
124123
huffmanmodel.GenerateHuffmanTree();
125124
huffmanmodel.print();
126-
typedef o2::HuffmanCodec<HuffmanModel_t > Codec_t;
125+
using Codec_t = o2::HuffmanCodec<HuffmanModel_t >;
127126
Codec_t codec(huffmanmodel);
128127

129128
////////////////////////////////////////////////////////////////////////////
@@ -161,7 +160,7 @@ BOOST_AUTO_TEST_CASE(test_HuffmanCodec)
161160
o2::test::Fifo<DataGenerator_t::value_type> fifoRandvals;
162161

163162
// FIFO for encoded values
164-
typedef o2::test::Fifo<uint32_t> FifoBuffer_t;
163+
using FifoBuffer_t = o2::test::Fifo<uint32_t>;
165164
FifoBuffer_t fifoEncoded;
166165

167166
const int nRolls = 1000000;

Utilities/DataCompression/test/test_dc_primitives.cxx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE(test_dc_primitives)
6363
{
6464
// test the getmax meta program
6565
std::cout << std::endl << "Testing getmax meta program ..." << std::endl;
66-
typedef boost::mpl::vector_c<uint16_t, 0, 1, 2, 3, 4, 31, 32, 64> bitranges;
66+
using bitranges = boost::mpl::vector_c<uint16_t, 0, 1, 2, 3, 4, 31, 32, 64>;
6767
boost::mpl::for_each<bitranges, boost::type<boost::mpl::_> >(getmaxTester());
6868

6969
// test the getnofelements meta program
@@ -82,15 +82,15 @@ BOOST_AUTO_TEST_CASE(test_dc_primitives)
8282
std::cout << std::endl << "Testing alphabet template ..." << std::endl;
8383
// declare two types of alphabets: a contiguous range alphabet with symbols
8484
// between -1 and 10 and a bit-range alphabet for a 10-bit word
85-
typedef boost::mpl::string<'T','e','s','t'>::type TestAlphabetName;
86-
typedef boost::mpl::string<'1','0','-','b','i','t'>::type TenBitAlphabetName;
87-
typedef ContiguousAlphabet<int16_t, -1, 10, TestAlphabetName> TestAlphabet;
88-
typedef BitRangeContiguousAlphabet<int16_t, 10, TenBitAlphabetName> TenBitAlphabet;
85+
using TestAlphabetName = boost::mpl::string<'T','e','s','t'>::type;
86+
using TenBitAlphabetName = boost::mpl::string<'1','0','-','b','i','t'>::type;
87+
using TestAlphabet = ContiguousAlphabet<int16_t, -1, 10, TestAlphabetName>;
88+
using TenBitAlphabet = BitRangeContiguousAlphabet<int16_t, 10, TenBitAlphabetName>;
8989

9090
// now check a set of values if they are valid in each of the alphabets
9191
// the check is done at runtime on types of alphabets rather than on
9292
// actual objects
9393
std::vector<int16_t> values = {0 , 5, 15, -2, -1};
94-
typedef boost::mpl::vector<TestAlphabet, TenBitAlphabet> ParameterSet;
94+
using ParameterSet = boost::mpl::vector<TestAlphabet, TenBitAlphabet>;
9595
boost::mpl::for_each<ParameterSet>( AlphabetTester<std::vector<int16_t>>(values) );
9696
}

0 commit comments

Comments
 (0)