Skip to content

Commit 40f8cd1

Browse files
author
pradeep
committed
Fixed histogram cuda/opencl kernels for indexed arrays
Added a unit test for indexed arrays
1 parent b0c7c7b commit 40f8cd1

11 files changed

Lines changed: 63 additions & 27 deletions

File tree

src/api/c/histogram.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@ using namespace detail;
1919

2020
template<typename inType,typename outType>
2121
static inline af_array histogram(const af_array in, const unsigned &nbins,
22-
const double &minval, const double &maxval)
22+
const double &minval, const double &maxval,
23+
const bool islinear)
2324
{
24-
return getHandle(histogram<inType,outType>(getArray<inType>(in),nbins,minval,maxval));
25+
if (islinear)
26+
return getHandle(histogram<inType,outType, true>(getArray<inType>(in),nbins,minval,maxval));
27+
else
28+
return getHandle(histogram<inType,outType, false>(getArray<inType>(in),nbins,minval,maxval));
2529
}
2630

2731
af_err af_histogram(af_array *out, const af_array in,
@@ -33,12 +37,12 @@ af_err af_histogram(af_array *out, const af_array in,
3337

3438
af_array output;
3539
switch(type) {
36-
case f32: output = histogram<float , uint>(in, nbins, minval, maxval); break;
37-
case f64: output = histogram<double, uint>(in, nbins, minval, maxval); break;
38-
case b8 : output = histogram<char , uint>(in, nbins, minval, maxval); break;
39-
case s32: output = histogram<int , uint>(in, nbins, minval, maxval); break;
40-
case u32: output = histogram<uint , uint>(in, nbins, minval, maxval); break;
41-
case u8 : output = histogram<uchar , uint>(in, nbins, minval, maxval); break;
40+
case f32: output = histogram<float , uint>(in, nbins, minval, maxval, info.isLinear()); break;
41+
case f64: output = histogram<double, uint>(in, nbins, minval, maxval, info.isLinear()); break;
42+
case b8 : output = histogram<char , uint>(in, nbins, minval, maxval, info.isLinear()); break;
43+
case s32: output = histogram<int , uint>(in, nbins, minval, maxval, info.isLinear()); break;
44+
case u32: output = histogram<uint , uint>(in, nbins, minval, maxval, info.isLinear()); break;
45+
case u8 : output = histogram<uchar , uint>(in, nbins, minval, maxval, info.isLinear()); break;
4246
default : TYPE_ERROR(1, type);
4347
}
4448
std::swap(*out,output);

src/backend/cpu/histogram.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ using af::dim4;
1818
namespace cpu
1919
{
2020

21-
template<typename inType, typename outType>
21+
template<typename inType, typename outType, bool isLinear>
2222
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
2323
{
2424
float step = (maxval - minval)/(float)nbins;
@@ -36,7 +36,8 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
3636
for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
3737
for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
3838
for(dim_t i=0; i<nElems; i++) {
39-
int bin = (int)((inData[i] - minval) / step);
39+
int idx = isLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
40+
int bin = (int)((inData[idx] - minval) / step);
4041
bin = std::max(bin, 0);
4142
bin = std::min(bin, (int)(nbins - 1));
4243
outData[bin]++;
@@ -50,7 +51,8 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
5051
}
5152

5253
#define INSTANTIATE(in_t,out_t)\
53-
template Array<out_t> histogram(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
54+
template Array<out_t> histogram<in_t, out_t, true>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval); \
55+
template Array<out_t> histogram<in_t, out_t, false>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
5456

5557
INSTANTIATE(float , uint)
5658
INSTANTIATE(double, uint)

src/backend/cpu/histogram.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace cpu
1313
{
1414

15-
template<typename inType, typename outType>
15+
template<typename inType, typename outType, bool isLinear>
1616
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval);
1717

1818
}

src/backend/cuda/histogram.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ using std::vector;
2222
namespace cuda
2323
{
2424

25-
template<typename inType, typename outType>
25+
template<typename inType, typename outType, bool isLinear>
2626
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
2727
{
2828

@@ -44,13 +44,14 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
4444
dim4 minmax_dims(mmNElems*2);
4545
Array<cfloat> minmax = createHostDataArray<cfloat>(minmax_dims, &h_minmax.front());
4646

47-
kernel::histogram<inType, outType>(out, in, minmax.get(), nbins);
47+
kernel::histogram<inType, outType, isLinear>(out, in, minmax.get(), nbins);
4848

4949
return out;
5050
}
5151

5252
#define INSTANTIATE(in_t,out_t)\
53-
template Array<out_t> histogram(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
53+
template Array<out_t> histogram<in_t, out_t, true>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval); \
54+
template Array<out_t> histogram<in_t, out_t, false>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
5455

5556
INSTANTIATE(float , uint)
5657
INSTANTIATE(double, uint)

src/backend/cuda/histogram.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace cuda
1313
{
1414

15-
template<typename inType, typename outType>
15+
template<typename inType, typename outType, bool isLinear>
1616
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval);
1717

1818
}

src/backend/cuda/kernel/histogram.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ __forceinline__ __device__ int minimum(int a, int b)
2828
return (a < b ? a : b);
2929
}
3030

31-
template<typename inType, typename outType>
31+
template<typename inType, typename outType, bool isLinear>
3232
static __global__
3333
void histogramKernel(Param<outType> out, CParam<inType> in,
3434
const cfloat *d_minmax, int len,
@@ -62,7 +62,8 @@ void histogramKernel(Param<outType> out, CParam<inType> in,
6262
__syncthreads();
6363

6464
for (int row = start; row < end; row += blockDim.x) {
65-
int bin = (int)((iptr[row] - min) / step);
65+
int idx = isLinear ? row : ((row % in.dims[0]) + (row / in.dims[0])*in.strides[1]);
66+
int bin = (int)((iptr[idx] - min) / step);
6667
bin = (bin < 0) ? 0 : bin;
6768
bin = (bin >= nbins) ? (nbins-1) : bin;
6869
atomicAdd((shrdMem + bin), 1);
@@ -74,7 +75,7 @@ void histogramKernel(Param<outType> out, CParam<inType> in,
7475
}
7576
}
7677

77-
template<typename inType, typename outType>
78+
template<typename inType, typename outType, bool isLinear>
7879
void histogram(Param<outType> out, CParam<inType> in, cfloat *d_minmax, int nbins)
7980
{
8081
dim3 threads(kernel::THREADS_X, 1);
@@ -86,7 +87,7 @@ void histogram(Param<outType> out, CParam<inType> in, cfloat *d_minmax, int nbin
8687

8788
int smem_size = nbins * sizeof(outType);
8889

89-
CUDA_LAUNCH_SMEM((histogramKernel<inType, outType>), blocks, threads, smem_size,
90+
CUDA_LAUNCH_SMEM((histogramKernel<inType, outType, isLinear>), blocks, threads, smem_size,
9091
out, in, d_minmax, nElems, nbins, blk_x);
9192

9293
POST_LAUNCH_CHECK();

src/backend/opencl/histogram.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ using std::vector;
2222
namespace opencl
2323
{
2424

25-
template<typename inType, typename outType>
25+
template<typename inType, typename outType, bool isLinear>
2626
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
2727
{
2828
ARG_ASSERT(1, (nbins<=kernel::MAX_BINS));
@@ -43,13 +43,14 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
4343
dim4 minmax_dims(mmNElems*2);
4444
Array<cfloat> minmax = createHostDataArray<cfloat>(minmax_dims, h_minmax.data());
4545

46-
kernel::histogram<inType, outType>(out, in, minmax, nbins);
46+
kernel::histogram<inType, outType, isLinear>(out, in, minmax, nbins);
4747

4848
return out;
4949
}
5050

5151
#define INSTANTIATE(in_t,out_t)\
52-
template Array<out_t> histogram(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
52+
template Array<out_t> histogram<in_t, out_t, true>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval); \
53+
template Array<out_t> histogram<in_t, out_t, false>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
5354

5455
INSTANTIATE(float , uint)
5556
INSTANTIATE(double, uint)

src/backend/opencl/histogram.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace opencl
1313
{
1414

15-
template<typename inType, typename outType>
15+
template<typename inType, typename outType, bool isLinear>
1616
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval);
1717

1818
}

src/backend/opencl/kernel/histogram.cl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,14 @@ void histogram(__global outType * d_dst,
4141
barrier(CLK_LOCAL_MEM_FENCE);
4242

4343
for (int row = start; row < end; row += get_local_size(0)) {
44-
int bin = (int)(((float)in[row] - minval) / dx);
44+
#if defined(IS_LINEAR)
45+
int idx = row;
46+
#else
47+
int i0 = row % iInfo.dims[0];
48+
int i1 = row / iInfo.dims[0];
49+
int idx= i0+i1*iInfo.strides[1];
50+
#endif
51+
int bin = (int)(((float)in[idx] - minval) / dx);
4552
bin = max(bin, 0);
4653
bin = min(bin, (int)nbins-1);
4754
atomic_inc((localMem + bin));

src/backend/opencl/kernel/histogram.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ static const unsigned MAX_BINS = 4000;
3030
static const int THREADS_X = 256;
3131
static const int THRD_LOAD = 16;
3232

33-
template<typename inType, typename outType>
33+
template<typename inType, typename outType, bool isLinear>
3434
void histogram(Param out, const Param in, const Param minmax, int nbins)
3535
{
3636
try {
@@ -45,7 +45,8 @@ void histogram(Param out, const Param in, const Param minmax, int nbins)
4545
options << " -D inType=" << dtype_traits<inType>::getName()
4646
<< " -D outType=" << dtype_traits<outType>::getName()
4747
<< " -D THRD_LOAD=" << THRD_LOAD;
48-
48+
if (isLinear)
49+
options << " -D IS_LINEAR";
4950
if (std::is_same<inType, double>::value ||
5051
std::is_same<inType, cdouble>::value) {
5152
options << " -D USE_DOUBLE";

0 commit comments

Comments
 (0)