Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions src/backend/opencl/kernel/scan_dim_by_key.cl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ kernel void scanDimByKeyNonfinal(
// Hence increment ids[kDim] just after offseting out and before offsetting
// in
tData += ids[3] * tInfo.strides[3] + ids[2] * tInfo.strides[2] +
ids[1] * tInfo.strides[1] + ids[0];
ids[1] * tInfo.strides[1] + ids[0] ;
tfData += ids[3] * tfInfo.strides[3] + ids[2] * tfInfo.strides[2] +
ids[1] * tfInfo.strides[1] + ids[0];
tiData += ids[3] * tiInfo.strides[3] + ids[2] * tiInfo.strides[2] +
Expand All @@ -45,10 +45,9 @@ kernel void scanDimByKeyNonfinal(
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0];
iData += ids[3] * iInfo.strides[3] + ids[2] * iInfo.strides[2] +
ids[1] * iInfo.strides[1] + ids[0];
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
kData += ids[3] * kInfo.strides[3] + ids[2] * kInfo.strides[2] +
ids[1] * kInfo.strides[1] + ids[0];
iData += iInfo.offset;
ids[1] * kInfo.strides[1] + ids[0] + kInfo.offset;

int id_dim = ids[kDim];
const int out_dim = oInfo.dims[kDim];
Expand Down Expand Up @@ -192,10 +191,9 @@ kernel void scanDimByKeyFinal(global To *oData, KParam oInfo,
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0];
iData += ids[3] * iInfo.strides[3] + ids[2] * iInfo.strides[2] +
ids[1] * iInfo.strides[1] + ids[0];
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
kData += ids[3] * kInfo.strides[3] + ids[2] * kInfo.strides[2] +
ids[1] * kInfo.strides[1] + ids[0];
iData += iInfo.offset;
ids[1] * kInfo.strides[1] + ids[0] + kInfo.offset;

int id_dim = ids[kDim];
const int out_dim = oInfo.dims[kDim];
Expand Down
14 changes: 7 additions & 7 deletions src/backend/opencl/kernel/scan_first_by_key.cl
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ kernel void scanFirstByKeyNonfinal(global To *oData, KParam oInfo,
yid * kInfo.strides[1] + kInfo.offset;

tData += wid * tInfo.strides[3] + zid * tInfo.strides[2] +
yid * tInfo.strides[1] + tInfo.offset;
yid * tInfo.strides[1];

tfData += wid * tfInfo.strides[3] + zid * tfInfo.strides[2] +
yid * tfInfo.strides[1] + tfInfo.offset;
yid * tfInfo.strides[1];

tiData += wid * tiInfo.strides[3] + zid * tiInfo.strides[2] +
yid * tiInfo.strides[1] + tiInfo.offset;
yid * tiInfo.strides[1];

oData += wid * oInfo.strides[3] + zid * oInfo.strides[2] +
yid * oInfo.strides[1] + oInfo.offset;
Expand Down Expand Up @@ -179,7 +179,7 @@ kernel void scanFirstByKeyFinal(global To *oData, KParam oInfo,
yid * kInfo.strides[1] + kInfo.offset;

oData += wid * oInfo.strides[3] + zid * oInfo.strides[2] +
yid * oInfo.strides[1] + oInfo.offset;
yid * oInfo.strides[1];

local To l_val0[SHARED_MEM_SIZE];
local To l_val1[SHARED_MEM_SIZE];
Expand Down Expand Up @@ -283,13 +283,13 @@ kernel void bcastFirstByKey(global To *oData, KParam oInfo,

if (cond) {
tiData += wid * tiInfo.strides[3] + zid * tiInfo.strides[2] +
yid * tiInfo.strides[1] + tiInfo.offset;
yid * tiInfo.strides[1];

tData += wid * tInfo.strides[3] + zid * tInfo.strides[2] +
yid * tInfo.strides[1] + tInfo.offset;
yid * tInfo.strides[1];

oData += wid * oInfo.strides[3] + zid * oInfo.strides[2] +
yid * oInfo.strides[1] + oInfo.offset;
yid * oInfo.strides[1];

int boundary = tiData[groupId_x];
To accum = tData[groupId_x - 1];
Expand Down
178 changes: 167 additions & 11 deletions test/arrayfire_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,16 @@ std::string readNextNonEmptyLine(std::ifstream &file) {

std::string getBackendName(bool lower) {
af::Backend backend = af::getActiveBackend();
switch(backend) {
case AF_BACKEND_CPU:
return lower ? std::string("cpu") : std::string("CPU");
case AF_BACKEND_CUDA:
return lower ? std::string("cuda") : std::string("CUDA");
case AF_BACKEND_OPENCL:
return lower ? std::string("opencl") : std::string("OpenCL");
case AF_BACKEND_ONEAPI:
return lower ? std::string("oneapi") : std::string("oneAPI");
default:
return lower ? std::string("unknown") : std::string("Unknown");
switch (backend) {
case AF_BACKEND_CPU:
return lower ? std::string("cpu") : std::string("CPU");
case AF_BACKEND_CUDA:
return lower ? std::string("cuda") : std::string("CUDA");
case AF_BACKEND_OPENCL:
return lower ? std::string("opencl") : std::string("OpenCL");
case AF_BACKEND_ONEAPI:
return lower ? std::string("oneapi") : std::string("oneAPI");
default: return lower ? std::string("unknown") : std::string("Unknown");
}
}

Expand Down Expand Up @@ -2046,6 +2045,163 @@ INSTANTIATE(std::complex<float>);
INSTANTIATE(std::complex<double>);
#undef INSTANTIATE

af::array toTempFormat(tempFormat form, const af::array &in) {
af::array ret;
const af::dim4 &dims = in.dims();
switch (form) {
case JIT_FORMAT:
switch (in.type()) {
case b8: ret = not(in); break;
default: ret = in * 2;
}
// Make sure that the base array is <> form original
ret.eval();
switch (in.type()) {
case b8: ret = not(ret); break;
default: ret /= 2;
}
break;
case SUB_FORMAT_dim0: {
af::dim4 pdims(dims);
pdims[0] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::seq(1, dims[0]), af::span, af::span, af::span) = in;
ret = parent(af::seq(1, dims[0]), af::span, af::span, af::span);
}; break;
case SUB_FORMAT_dim1: {
af::dim4 pdims(dims);
pdims[1] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::span, af::seq(1, dims[1]), af::span, af::span) = in;
ret = parent(af::span, af::seq(1, dims[1]), af::span, af::span);
}; break;
case SUB_FORMAT_dim2: {
af::dim4 pdims(dims);
pdims[2] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::span, af::span, af::seq(1, dims[2]), af::span) = in;
ret = parent(af::span, af::span, af::seq(1, dims[2]), af::span);
}; break;
case SUB_FORMAT_dim3: {
af::dim4 pdims(dims);
pdims[3] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::span, af::span, af::span, af::seq(1, dims[3])) = in;
ret = parent(af::span, af::span, af::span, af::seq(1, dims[3]));
}; break;
case REORDERED_FORMAT: {
const dim_t idxs[4] = {0, 3, 1, 2};
// idxs[0] has to be 0, to keep the same data in mem
dim_t rev_idxs[4];
for (dim_t i = 0; i < 4; ++i) { rev_idxs[idxs[i]] = i; };
ret = af::reorder(in, idxs[0], idxs[1], idxs[2], idxs[3]);
ret = ret.copy(); // make data linear
ret = af::reorder(ret, rev_idxs[0], rev_idxs[1], rev_idxs[2],
rev_idxs[3]);
// ret has same content as in, although data is stored in
// different order
}; break;
case LINEAR_FORMAT:
default: ret = in.copy();
};
return ret;
}

void toTempFormat(tempFormat form, af_array *out, const af_array &in) {
dim_t dims[4];
af_get_dims(dims, dims + 1, dims + 2, dims + 3, in);
unsigned numdims;
af_get_numdims(&numdims, in);
af_dtype ty;
af_get_type(&ty, in);
switch (form) {
case JIT_FORMAT: {
// af_array one = nullptr, min_one = nullptr, res = nullptr;
af_array res = nullptr, two = nullptr;
ASSERT_SUCCESS(af_constant(&two, 2, numdims, dims, ty));
switch (ty) {
case b8: af_not(&res, in); break;
default:
// ret = in + af::constant(1, dims, in.type());
ASSERT_SUCCESS(af_mul(&res, in, two, false));
}
// Make sure that the base array is <> form original
ASSERT_SUCCESS(af_eval(res));
switch (ty) {
case b8: af_not(out, res); break;
default:
ASSERT_SUCCESS(af_div(out, res, two, false)); // NO EVAL!!
}
ASSERT_SUCCESS(af_release_array(two));
two = nullptr;
ASSERT_SUCCESS(af_release_array(res));
res = nullptr;
}; break;
case SUB_FORMAT_dim0: {
const dim_t pdims[4] = {dims[0] + 2, dims[1], dims[2], dims[3]};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(1u, numdims), pdims, ty));
const af_seq idxs[4] = {af_make_seq(1, dims[0], 1), af_span,
af_span, af_span};

ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
}; break;
case SUB_FORMAT_dim1: {
const dim_t pdims[4] = {dims[0], dims[1] + 2, dims[2], dims[3]};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(2u, numdims), pdims, ty));
const af_seq idxs[4] = {af_span, af_make_seq(1, dims[1], 1),
af_span, af_span};
ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
parent = nullptr;
}; break;
case SUB_FORMAT_dim2: {
const dim_t pdims[4] = {dims[0], dims[1], dims[2] + 2, dims[3]};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(3u, numdims), pdims, ty));
const af_seq idxs[4] = {af_span, af_span,
af_make_seq(1, dims[2], 1), af_span};
ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
parent = nullptr;
}; break;
case SUB_FORMAT_dim3: {
const dim_t pdims[4] = {dims[0], dims[1], dims[2], dims[3] + 2};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(4u, numdims), pdims, ty));
const af_seq idxs[4] = {af_span, af_span, af_span,
af_make_seq(1, dims[3], 1)};
ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
parent = nullptr;
}; break;
case REORDERED_FORMAT: {
const unsigned idxs[4] = {0, 3, 1, 2};
// idxs[0] has to be 0, to keep the same data in mem
dim_t rev_idxs[4];
for (dim_t i = 0; i < 4; ++i) { rev_idxs[idxs[i]] = i; };
af_array rev = nullptr;
ASSERT_SUCCESS(
af_reorder(&rev, in, idxs[0], idxs[1], idxs[2], idxs[3]));
ASSERT_SUCCESS(af_copy_array(out, rev));
ASSERT_SUCCESS(af_reorder(out, rev, rev_idxs[0], rev_idxs[1],
rev_idxs[2], rev_idxs[3]));
// ret has same content as in, although data is stored in
// different order
ASSERT_SUCCESS(af_release_array(rev));
rev = nullptr;
}; break;
case LINEAR_FORMAT:
default: af_copy_array(out, in);
};
}

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
Expand Down
23 changes: 23 additions & 0 deletions test/scan_by_key.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,26 @@ TEST(ScanByKey, FixOverflowWrite) {

ASSERT_EQ(prior, valsAF(0).scalar<float>());
}

#define TEST_TEMP_FORMAT(form, dim) \
TEST(TEMP_FORMAT, form##_Dim##dim) { \
UNSUPPORTED_BACKEND(AF_BACKEND_ONEAPI); \
const dim4 dims(2, 2, 2, 2); \
const array in(af::moddims(range(dim4(dims.elements())), dims)); \
in.eval(); \
const array keys(af::constant(0, dims, u32)); \
keys.eval(); \
const array gold = scanByKey(keys, in, dim); \
\
array out = \
scanByKey(toTempFormat(form, keys), toTempFormat(form, in), dim); \
ASSERT_ARRAYS_EQ(gold, out); \
}

#define TEST_TEMP_FORMATS(form) \
TEST_TEMP_FORMAT(form, 0) \
TEST_TEMP_FORMAT(form, 1) \
TEST_TEMP_FORMAT(form, 2) \
TEST_TEMP_FORMAT(form, 3)

FOREACH_TEMP_FORMAT(TEST_TEMP_FORMATS)
32 changes: 28 additions & 4 deletions test/testHelpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,10 @@ bool noHalfTests(af::dtype ty);
GTEST_SKIP() << "Device doesn't support Half"

#ifdef SKIP_UNSUPPORTED_TESTS
#define UNSUPPORTED_BACKEND(backend) \
if(backend == af::getActiveBackend()) \
GTEST_SKIP() << "Skipping unsupported function on " \
+ getBackendName() + " backend"
#define UNSUPPORTED_BACKEND(backend) \
if (backend == af::getActiveBackend()) \
GTEST_SKIP() << "Skipping unsupported function on " + getBackendName() + \
" backend"
#else
#define UNSUPPORTED_BACKEND(backend)
#endif
Expand Down Expand Up @@ -653,6 +653,30 @@ ::testing::AssertionResult assertArrayEq(std::string aName, std::string bName,
const af_array a, const af_array b,
TestOutputArrayInfo *metadata);

enum tempFormat {
LINEAR_FORMAT, // Linear array (= default)
JIT_FORMAT, // Array which has JIT operations outstanding
SUB_FORMAT_dim0, // Array where only a subset is allocated for dim0
SUB_FORMAT_dim1, // Array where only a subset is allocated for dim1
SUB_FORMAT_dim2, // Array where only a subset is allocated for dim2
SUB_FORMAT_dim3, // Array where only a subset is allocated for dim3
REORDERED_FORMAT // Array where the dimensions are reordered
};
// Calls the function fn for all available formats
#define FOREACH_TEMP_FORMAT(TESTS) \
TESTS(LINEAR_FORMAT) \
TESTS(JIT_FORMAT) \
TESTS(SUB_FORMAT_dim0) \
TESTS(SUB_FORMAT_dim1) \
TESTS(SUB_FORMAT_dim2) \
TESTS(SUB_FORMAT_dim3) \
TESTS(REORDERED_FORMAT)

// formats the "in" array according to provided format. The content remains
// unchanged.
af::array toTempFormat(tempFormat form, const af::array &in);
void toTempFormat(tempFormat form, af_array *out, const af_array &in);

#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
Loading