Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeModules/LSANSuppression.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# This is a known leak.
leak:getKernel
#leak:libOpenCL
leak:libnvidia-ptxjitcompile
leak:tbb::internal::task_stream
leak:libnvidia-opencl.so
leak:FFTRepo::FFTRepoKey::privatizeData

# Allocated by Intel's OpenMP implementation during inverse_dense_cpu
# This is not something we can control in ArrayFire
Expand Down
11 changes: 10 additions & 1 deletion src/backend/common/ModuleInterface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ class ModuleInterface {
ModuleType mModuleHandle;

public:
/// \brief Creates an uninitialized Module
ModuleInterface() = default;

/// \brief Creates a module given a backend specific ModuleType
///
/// \param[in] mod The backend specific module
ModuleInterface(ModuleType mod) : mModuleHandle(mod) {}

/// \brief Set module
Expand All @@ -28,10 +34,13 @@ class ModuleInterface {
/// \brief Get module
///
/// \returns handle to backend specific module
inline ModuleType get() const { return mModuleHandle; }
inline const ModuleType& get() const { return mModuleHandle; }

/// \brief Unload module
virtual void unload() = 0;

/// \brief Returns true if the module mModuleHandle is initialized
virtual operator bool() const = 0;
};

} // namespace common
12 changes: 7 additions & 5 deletions src/backend/common/kernel_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ shared_timed_mutex& getCacheMutex(const int device) {
}

ModuleMap& getCache(const int device) {
static ModuleMap caches[detail::DeviceManager::MAX_DEVICES];
static ModuleMap* caches =
new ModuleMap[detail::DeviceManager::MAX_DEVICES];
return caches[device];
}

Expand All @@ -51,7 +52,7 @@ Module findModule(const int device, const string& key) {
auto& cache = getCache(device);
auto iter = cache.find(key);
if (iter != cache.end()) { return iter->second; }
return Module{nullptr};
return Module{};
}

Kernel getKernel(const string& kernelName, const vector<string>& sources,
Expand Down Expand Up @@ -89,9 +90,9 @@ Kernel getKernel(const string& kernelName, const vector<string>& sources,
const int device = detail::getActiveDeviceId();
Module currModule = findModule(device, moduleKey);

if (currModule.get() == nullptr) {
if (!currModule) {
currModule = loadModuleFromDisk(device, moduleKey, sourceIsJIT);
if (currModule.get() == nullptr) {
if (!currModule) {
currModule = compileModule(moduleKey, sources, options, {tInstance},
sourceIsJIT);
}
Expand All @@ -102,7 +103,8 @@ Kernel getKernel(const string& kernelName, const vector<string>& sources,
if (iter == cache.end()) {
// If not found, this thread is the first one to compile this
// kernel. Keep the generated module.
getCache(device).emplace(moduleKey, currModule);
Module mod = currModule;
getCache(device).emplace(moduleKey, mod);
} else {
currModule.unload(); // dump the current threads extra compilation
currModule = iter->second;
Expand Down
3 changes: 3 additions & 0 deletions src/backend/cuda/Module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ class Module : public common::ModuleInterface<CUmodule> {
using ModuleType = CUmodule;
using BaseClass = common::ModuleInterface<ModuleType>;

Module() = default;
Module(ModuleType mod) : BaseClass(mod) {
mInstanceMangledNames.reserve(1);
}

operator bool() const final { return get(); }

void unload() final {
CU_CHECK(cuModuleUnload(get()));
set(nullptr);
Expand Down
4 changes: 1 addition & 3 deletions src/backend/opencl/Array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,7 @@ void evalMultiple(vector<Array<T> *> arrays) {
info.strides()[3]},
0};

Param res = {array->data.get(), kInfo};

outputs.push_back(res);
outputs.emplace_back(array->data.get(), kInfo);
output_arrays.push_back(array);
nodes.push_back(array->node.get());
}
Expand Down
14 changes: 7 additions & 7 deletions src/backend/opencl/Kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,24 @@ namespace opencl {

struct Enqueuer {
template<typename... Args>
void operator()(void* ker, const cl::EnqueueArgs& qArgs, Args... args) {
auto launchOp =
cl::KernelFunctor<Args...>(*static_cast<const cl::Kernel*>(ker));
void operator()(cl::Kernel ker, const cl::EnqueueArgs& qArgs,
Args... args) {
auto launchOp = cl::KernelFunctor<Args...>(ker);
launchOp(qArgs, std::forward<Args>(args)...);
}
};

class Kernel
: public common::KernelInterface<cl::Program*, cl::Kernel*, Enqueuer,
: public common::KernelInterface<const cl::Program*, cl::Kernel, Enqueuer,
cl::Buffer*> {
public:
using ModuleType = cl::Program*;
using KernelType = cl::Kernel*;
using ModuleType = const cl::Program*;
using KernelType = cl::Kernel;
using DevPtrType = cl::Buffer*;
using BaseClass =
common::KernelInterface<ModuleType, KernelType, Enqueuer, DevPtrType>;

Kernel() : BaseClass(nullptr, nullptr) {}
Kernel() : BaseClass(nullptr, cl::Kernel{nullptr, false}) {}
Kernel(ModuleType mod, KernelType ker) : BaseClass(mod, ker) {}

// clang-format off
Expand Down
17 changes: 11 additions & 6 deletions src/backend/opencl/Module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,22 @@
namespace opencl {

/// OpenCL backend wrapper for cl::Program object
class Module : public common::ModuleInterface<cl::Program*> {
class Module : public common::ModuleInterface<cl::Program> {
public:
using ModuleType = cl::Program*;
using ModuleType = cl::Program;
using BaseClass = common::ModuleInterface<ModuleType>;

/// \brief Create an uninitialized Module
Module() = default;

/// \brief Create a module given a cl::Program type
Module(ModuleType mod) : BaseClass(mod) {}

void unload() final {
delete get();
set(nullptr);
}
/// \brief Unload module
operator bool() const final { return get()(); }

/// Unload the module
void unload() final { set(cl::Program()); }
};

} // namespace opencl
5 changes: 3 additions & 2 deletions src/backend/opencl/Param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ namespace opencl {
Param::Param() : data(nullptr), info{{0, 0, 0, 0}, {0, 0, 0, 0}, 0} {}
Param::Param(cl::Buffer *data_, KParam info_) : data(data_), info(info_) {}

Param makeParam(cl_mem mem, int off, const int dims[4], const int strides[4]) {
Param makeParam(cl::Buffer &mem, int off, const int dims[4],
const int strides[4]) {
Param out;
out.data = new cl::Buffer(mem);
out.data = &mem;
out.info.offset = off;
for (int i = 0; i < 4; i++) {
out.info.dims[i] = dims[i];
Expand Down
3 changes: 2 additions & 1 deletion src/backend/opencl/Param.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ struct Param {
};

// AF_DEPRECATED("Use Array<T>")
Param makeParam(cl_mem mem, int off, const int dims[4], const int strides[4]);
Param makeParam(cl::Buffer& mem, int off, const int dims[4],
const int strides[4]);
} // namespace opencl
3 changes: 1 addition & 2 deletions src/backend/opencl/clfft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ SharedPlan findPlan(clfftLayout iLayout, clfftLayout oLayout, clfftDim rank,
// thrown. This is related to
// https://github.com/arrayfire/arrayfire/pull/1899
CLFFT_CHECK(clfftDestroyPlan(p));
// NOLINTNEXTLINE(hicpp-no-malloc)
free(p);
delete p;
#endif
});
// push the plan into plan cache
Expand Down
82 changes: 42 additions & 40 deletions src/backend/opencl/compile_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,48 @@
#include <string>
#include <vector>

using detail::Kernel;
using detail::Module;

using cl::Error;
using cl::Program;
using common::loggerFactory;
using opencl::getActiveDeviceId;
using opencl::getDevice;
using opencl::Kernel;
using opencl::Module;
using spdlog::logger;

using std::begin;
using std::end;
using std::ostringstream;
using std::shared_ptr;
using std::string;
using std::vector;
using std::chrono::duration_cast;
using std::chrono::high_resolution_clock;
using std::chrono::milliseconds;

spdlog::logger *getLogger() {
static std::shared_ptr<spdlog::logger> logger(common::loggerFactory("jit"));
logger *getLogger() {
static shared_ptr<logger> logger(loggerFactory("jit"));
return logger.get();
}

#define SHOW_DEBUG_BUILD_INFO(PROG) \
do { \
cl_uint numDevices = PROG->getInfo<CL_PROGRAM_NUM_DEVICES>(); \
for (unsigned int i = 0; i < numDevices; ++i) { \
printf("%s\n", PROG->getBuildInfo<CL_PROGRAM_BUILD_LOG>( \
PROG->getInfo<CL_PROGRAM_DEVICES>()[i]) \
.c_str()); \
printf("%s\n", PROG->getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>( \
PROG->getInfo<CL_PROGRAM_DEVICES>()[i]) \
.c_str()); \
} \
#define SHOW_DEBUG_BUILD_INFO(PROG) \
do { \
cl_uint numDevices = PROG.getInfo<CL_PROGRAM_NUM_DEVICES>(); \
for (unsigned int i = 0; i < numDevices; ++i) { \
printf("%s\n", PROG.getBuildInfo<CL_PROGRAM_BUILD_LOG>( \
PROG.getInfo<CL_PROGRAM_DEVICES>()[i]) \
.c_str()); \
printf("%s\n", PROG.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>( \
PROG.getInfo<CL_PROGRAM_DEVICES>()[i]) \
.c_str()); \
} \
} while (0)

#if defined(NDEBUG)

#define SHOW_BUILD_INFO(PROG) \
do { \
std::string info = getEnvVar("AF_OPENCL_SHOW_BUILD_INFO"); \
string info = getEnvVar("AF_OPENCL_SHOW_BUILD_INFO"); \
if (!info.empty() && info != "0") { SHOW_DEBUG_BUILD_INFO(PROG); } \
} while (0)

Expand All @@ -67,7 +76,7 @@ spdlog::logger *getLogger() {

namespace opencl {

const static std::string DEFAULT_MACROS_STR(
const static string DEFAULT_MACROS_STR(
"\n\
#ifdef USE_DOUBLE\n\
#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\
Expand All @@ -82,36 +91,32 @@ const static std::string DEFAULT_MACROS_STR(
#endif\n \
");

cl::Program *buildProgram(const std::vector<std::string> &kernelSources,
const std::vector<std::string> &compileOpts) {
using std::begin;
using std::end;

cl::Program *retVal = nullptr;
Program buildProgram(const vector<string> &kernelSources,
const vector<string> &compileOpts) {
Program retVal;
try {
static const std::string defaults =
std::string(" -D dim_t=") +
std::string(dtype_traits<dim_t>::getName());
static const string defaults =
string(" -D dim_t=") + string(dtype_traits<dim_t>::getName());

auto device = getDevice();

const std::string cl_std =
std::string(" -cl-std=CL") +
const string cl_std =
string(" -cl-std=CL") +
device.getInfo<CL_DEVICE_OPENCL_C_VERSION>().substr(9, 3);

cl::Program::Sources sources;
Program::Sources sources;
sources.emplace_back(DEFAULT_MACROS_STR);
sources.emplace_back(KParam_hpp, KParam_hpp_len);
sources.insert(end(sources), begin(kernelSources), end(kernelSources));

retVal = new cl::Program(getContext(), sources);
retVal = Program(getContext(), sources);

ostringstream options;
for (auto &opt : compileOpts) { options << opt; }

retVal->build({device}, (cl_std + defaults + options.str()).c_str());
} catch (...) {
if (retVal) { SHOW_BUILD_INFO(retVal); }
retVal.build({device}, (cl_std + defaults + options.str()).c_str());
} catch (Error &err) {
if (err.err() == CL_BUILD_ERROR) { SHOW_BUILD_INFO(retVal); }
throw;
}
return retVal;
Expand All @@ -124,14 +129,11 @@ namespace common {
Module compileModule(const string &moduleKey, const vector<string> &sources,
const vector<string> &options,
const vector<string> &kInstances, const bool isJIT) {
using opencl::getActiveDeviceId;
using opencl::getDevice;

UNUSED(kInstances);
UNUSED(isJIT);

auto compileBegin = high_resolution_clock::now();
auto program = detail::buildProgram(sources, options);
auto program = opencl::buildProgram(sources, options);
auto compileEnd = high_resolution_clock::now();

AF_TRACE("{{{:<30} : {{ compile:{:>5} ms, {{ {} }}, {} }}}}", moduleKey,
Expand All @@ -147,13 +149,13 @@ Module loadModuleFromDisk(const int device, const string &moduleKey,
UNUSED(device);
UNUSED(moduleKey);
UNUSED(isJIT);
return {nullptr};
return {};
}

Kernel getKernel(const Module &mod, const string &nameExpr,
const bool sourceWasJIT) {
UNUSED(sourceWasJIT);
return {mod.get(), new cl::Kernel(*mod.get(), nameExpr.c_str())};
return {&mod.get(), cl::Kernel(mod.get(), nameExpr.c_str())};
}

} // namespace common
7 changes: 2 additions & 5 deletions src/backend/opencl/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[]) {

cl::Buffer* bPtrs[4];

auto buf = cl::Buffer();
std::vector<Array<uint>> idxArrs(4, createEmptyArray<uint>(dim4()));
// look through indexs to read af_array indexs
for (dim_t x = 0; x < 4; ++x) {
Expand All @@ -56,7 +57,7 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[]) {
oDims[x] = idxArrs[x].elements();
} else {
// alloc an 1-element buffer to avoid OpenCL from failing
bPtrs[x] = bufferAlloc(sizeof(uint));
bPtrs[x] = &buf;
}
}

Expand All @@ -65,10 +66,6 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[]) {

kernel::index<T>(out, in, p, bPtrs);

for (dim_t x = 0; x < 4; ++x) {
if (p.isSeq[x]) { bufferFree(bPtrs[x]); }
}

return out;
}

Expand Down
Loading