Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions src/backend/cuda/kernel/fftconvolve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,13 @@ void packDataHelper(Param<convT> sig_packed,
{
dim_t *sd = sig.dims;

int sig_packed_elem = sig_packed.strides[3] * sig_packed.dims[3];
int filter_packed_elem = filter_packed.strides[3] * filter_packed.dims[3];
int sig_packed_elem = 1;
int filter_packed_elem = 1;

for (int i = 0; i < 4; i++) {
sig_packed_elem *= sig_packed.dims[i];
filter_packed_elem *= filter_packed.dims[i];
}

// Number of packed complex elements in dimension 0
int sig_half_d0 = divup(sd[0], 2);
Expand Down Expand Up @@ -292,8 +297,13 @@ void complexMultiplyHelper(Param<T> out,
CParam<T> filter,
ConvolveBatchKind kind)
{
int sig_packed_elem = sig_packed.strides[3] * sig_packed.dims[3];
int filter_packed_elem = filter_packed.strides[3] * filter_packed.dims[3];
int sig_packed_elem = 1;
int filter_packed_elem = 1;

for (int i = 0; i < 4; i++) {
sig_packed_elem *= sig_packed.dims[i];
filter_packed_elem *= filter_packed.dims[i];
}

dim3 threads(THREADS);
dim3 blocks(divup(sig_packed_elem / 2, threads.x));
Expand Down
2 changes: 1 addition & 1 deletion src/backend/cuda/kernel/ireduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ namespace kernel
template<typename T, af_op_t op>
T ireduce_all(uint *idx, CParam<T> in)
{
int in_elements = in.strides[3] * in.dims[3];
int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];

// FIXME: Use better heuristics to get to the optimum number
if (in_elements > 4096) {
Expand Down
2 changes: 1 addition & 1 deletion src/backend/cuda/kernel/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ namespace kernel
template<typename Ti, typename To, af_op_t op>
To reduce_all(CParam<Ti> in, bool change_nan, double nanval)
{
int in_elements = in.strides[3] * in.dims[3];
int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];

// FIXME: Use better heuristics to get to the optimum number
if (in_elements > 4096) {
Expand Down
6 changes: 4 additions & 2 deletions src/backend/opencl/kernel/ireduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ namespace kernel
T ireduce_all(uint *loc, Param in)
{
try {
int in_elements = in.info.dims[3] * in.info.strides[3];
int in_elements = in.info.dims[0] * in.info.dims[1] * in.info.dims[2] * in.info.dims[3];

// FIXME: Use better heuristics to get to the optimum number
if (in_elements > 4096) {
Expand Down Expand Up @@ -397,7 +397,9 @@ namespace kernel

unique_ptr<T> h_ptr(new T[in_elements]);
T* h_ptr_raw = h_ptr.get();
getQueue().enqueueReadBuffer(*in.data, CL_TRUE, 0, sizeof(T) * in_elements, h_ptr_raw);

getQueue().enqueueReadBuffer(*in.data, CL_TRUE, sizeof(T) * in.info.offset,
sizeof(T) * in_elements, h_ptr_raw);


MinMaxOp<op, T> Op(h_ptr_raw[0], 0);
Expand Down
5 changes: 3 additions & 2 deletions src/backend/opencl/kernel/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ namespace kernel
To reduce_all(Param in, int change_nan, double nanval)
{
try {
int in_elements = in.info.dims[3] * in.info.strides[3];
int in_elements = in.info.dims[0] * in.info.dims[1] * in.info.dims[2] * in.info.dims[3];

// FIXME: Use better heuristics to get to the optimum number
if (in_elements > 4096) {
Expand Down Expand Up @@ -342,7 +342,8 @@ namespace kernel
} else {

unique_ptr<Ti> h_ptr(new Ti[in_elements]);
getQueue().enqueueReadBuffer(*in.data, CL_TRUE, 0, sizeof(Ti) * in_elements, h_ptr.get());
getQueue().enqueueReadBuffer(*in.data, CL_TRUE, sizeof(Ti) * in.info.offset,
sizeof(Ti) * in_elements, h_ptr.get());

Transform<Ti, To, op> transform;
Binary<To, op> reduce;
Expand Down
45 changes: 45 additions & 0 deletions test/ireduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <af/arith.h>
#include <af/data.h>
#include <testHelpers.hpp>
#include <algorithm>

using namespace std;
using namespace af;
Expand Down Expand Up @@ -98,3 +99,47 @@ MINMAXOP(max, int)
MINMAXOP(max, uint)
MINMAXOP(max, char)
MINMAXOP(max, uchar)

TEST(ImaxAll, IndexedSmall)
{
const int num = 1000;
const int st = 10;
const int en = num - 100;
af::array a = af::randu(num);

float b;
unsigned idx;
af::max<float>(&b, &idx, a(af::seq(st, en)));

std::vector<float> ha(num);
a.host(&ha[0]);

float res = ha[st];
for (int i = st; i <= en; i++) {
res = std::max(res, ha[i]);
}

ASSERT_EQ(b, res);
}

TEST(ImaxAll, IndexedBig)
{
const int num = 100000;
const int st = 1000;
const int en = num - 1000;
af::array a = af::randu(num);

float b;
unsigned idx;
af::max<float>(&b, &idx, a(af::seq(st, en)));

std::vector<float> ha(num);
a.host(&ha[0]);

float res = ha[st];
for (int i = st; i <= en; i++) {
res = std::max(res, ha[i]);
}

ASSERT_EQ(b, res);
}
39 changes: 39 additions & 0 deletions test/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <iostream>
#include <string>
#include <testHelpers.hpp>
#include <algorithm>

using std::vector;
using std::string;
Expand Down Expand Up @@ -522,3 +523,41 @@ TEST(AnyAll, NaN)
ASSERT_EQ(af::anyTrue<bool>(A), true);
ASSERT_EQ(af::allTrue<bool>(A), false);
}

TEST(MaxAll, IndexedSmall)
{
const int num = 1000;
const int st = 10;
const int en = num - 100;
af::array a = af::randu(num);
float b = af::max<float>(a(af::seq(st, en)));

std::vector<float> ha(num);
a.host(&ha[0]);

float res = ha[st];
for (int i = st; i <= en; i++) {
res = std::max(res, ha[i]);
}

ASSERT_EQ(b, res);
}

TEST(MaxAll, IndexedBig)
{
const int num = 100000;
const int st = 1000;
const int en = num - 1000;
af::array a = af::randu(num);
float b = af::max<float>(a(af::seq(st, en)));

std::vector<float> ha(num);
a.host(&ha[0]);

float res = ha[st];
for (int i = st; i <= en; i++) {
res = std::max(res, ha[i]);
}

ASSERT_EQ(b, res);
}