Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions docs/details/device.dox
Original file line number Diff line number Diff line change
Expand Up @@ -77,25 +77,51 @@ have finished.

===============================================================================

\defgroup device_func_alloc alloc
\defgroup device_func_alloc allocV2
\ingroup device_mat

\brief Allocate memory using the ArrayFire memory manager

This function will allocate memory on the device and return a pointer
to it. The memory is allocated using ArrayFire's memory manager which
has some different characteristics to standard method of memory
allocation
will defer releasing memory to the driver and reuse the same memory
for later operations.

This function will return different objects based on the type used. The
interface returns a void pointer that needs to be cast to the backend
appropriate memory type.


| function | CPU | CUDA | OpenCL |
|------------------------------|-----|------|-------------|
| af_alloc_device_v2 | T* | T* | cl_mem |
| af::allocV2 | T* | T* | cl_mem |
| af_alloc_device (deprecated) | T* | T* | cl::Buffer* |
| af::alloc (deprecated) | T* | T* | cl::Buffer* |

CPU Backend
-----------
\snippet test/memory.cpp ex_alloc_v2_cpu

CUDA Backend
------------
\snippet test/cuda.cu ex_alloc_v2_cuda

OpenCL Backend
--------------
\snippet test/ocl_ext_context.cpp ex_alloc_v2_opencl

===============================================================================

\defgroup device_func_free free
\defgroup device_func_free freeV2
\ingroup device_mat

\brief Free device memory allocated by ArrayFire's memory manager
\brief Returns memory to ArrayFire's memory manager. The memory will
return to the memory pool.

These calls free the device memory. These functions need to be called on
pointers allocated using alloc function.
Releases control of the memory allocated by af::allocV2 functions to ArrayFire's
memory manager. ArrayFire may reuse the memory for subsequent operations. This
memory should not be used by the client after this point.

===============================================================================

Expand Down
129 changes: 112 additions & 17 deletions include/af/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,40 +106,82 @@ namespace af
/// @{
/// \brief Allocates memory using ArrayFire's memory manager
///
/// \copydoc device_func_alloc
/// \param[in] elements the number of elements to allocate
/// \param[in] type is the type of the elements to allocate
/// \returns the pointer to the memory
///
/// \note The device memory returned by this function is only freed if af::free() is called explicitly

/// \returns Pointer to the device memory on the current device. This is a
/// CUDA device pointer for the CUDA backend. A cl::Buffer pointer
/// from the cl2.hpp header on the OpenCL backend and a C pointer
/// for the CPU backend
///
/// \note The device memory returned by this function is only freed if
/// af::free() is called explicitly
/// \deprecated Use allocV2 instead. allocV2 accepts number of bytes
/// instead of number of elements and returns a cl_mem object
/// instead of the cl::Buffer object for the OpenCL backend.
/// Otherwise the functionallity is identical to af::alloc.
AF_DEPRECATED("Use af::allocV2 instead")
AFAPI void *alloc(const size_t elements, const dtype type);

#if AF_API_VERSION >= 38
/// \brief Allocates memory using ArrayFire's memory manager
///
/// \param[in] bytes the number of bytes to allocate
/// \returns Pointer to the device memory on the current device. This is a
/// CUDA device pointer for the CUDA backend. A cl_mem pointer
/// on the OpenCL backend and a C pointer for the CPU backend
///
/// \note The device memory returned by this function is only freed if
/// af::freeV2() is called explicitly
AFAPI void *allocV2(const size_t bytes);
#endif

/// \brief Allocates memory using ArrayFire's memory manager
//
/// \copydoc device_func_alloc
/// \param[in] elements the number of elements to allocate
/// \returns the pointer to the memory
/// \returns Pointer to the device memory on the current device. This is a
/// CUDA device pointer for the CUDA backend. A cl::Buffer pointer
/// from the cl2.hpp header on the OpenCL backend and a C pointer
/// for the CPU backend
///
/// \note the size of the memory allocated is the number of \p elements *
/// sizeof(type)
///
/// \note The device memory returned by this function is only freed if af::free() is called explicitly
template<typename T>
T* alloc(const size_t elements);
/// sizeof(type)
/// \note The device memory returned by this function is only freed if
/// af::free() is called explicitly
/// \deprecated Use allocV2 instead. allocV2 accepts number of bytes
/// instead of number of elements and returns a cl_mem object
/// instead of the cl::Buffer object for the OpenCL backend.
/// Otherwise the functionallity is identical to af::alloc.
template <typename T>
AF_DEPRECATED("Use af::allocV2 instead")
T *alloc(const size_t elements);
/// @}

/// \ingroup device_func_free
///
/// \copydoc device_func_free
/// \param[in] ptr the memory to free
///
/// This function will free a device pointer even if it has been previously locked.
/// \param[in] ptr the memory allocated by the af::alloc function that
/// will be freed
///
/// \note This function will free a device pointer even if it has been
/// previously locked.
/// \deprecated Use af::freeV2 instead. af_alloc_device_v2 returns a
/// cl_mem object instead of the cl::Buffer object for the
/// OpenCL backend. Otherwise the functionallity is identical
AF_DEPRECATED("Use af::freeV2 instead")
AFAPI void free(const void *ptr);

#if AF_API_VERSION >= 38
/// \ingroup device_func_free
/// \copydoc device_func_free
/// \param[in] ptr The pointer returned by af::allocV2
///
/// This function will free a device pointer even if it has been previously
/// locked.
AFAPI void freeV2(const void *ptr);
#endif

/// \ingroup device_func_pinned
/// @{
///
/// \copydoc device_func_pinned
///
/// \param[in] elements the number of elements to allocate
Expand Down Expand Up @@ -312,21 +354,74 @@ extern "C" {
AFAPI af_err af_sync(const int device);

/**
\brief Allocates memory using ArrayFire's memory manager
\ingroup device_func_alloc

This device memory returned by this function can only be freed using
af_free_device

\param [out] ptr Pointer to the device memory on the current device. This
is a CUDA device pointer for the CUDA backend. A
cl::Buffer pointer on the OpenCL backend and a C pointer
for the CPU backend
\param [in] bytes The number of bites to allocate on the device

\returns AF_SUCCESS if a pointer could be allocated. AF_ERR_NO_MEM if
there is no memory
\deprecated Use af_alloc_device_v2 instead. af_alloc_device_v2 returns a
cl_mem object instead of the cl::Buffer object for the OpenCL
backend. Otherwise the functionallity is identical
*/
AF_DEPRECATED("Use af_alloc_device_v2 instead")
AFAPI af_err af_alloc_device(void **ptr, const dim_t bytes);

/**
\ingroup device_func_free
\brief Returns memory to ArrayFire's memory manager.

This function will free a device pointer even if it has been previously
locked.

\param[in] ptr The pointer allocated by af_alloc_device to be freed

\deprecated Use af_free_device_v2 instead. The new function handles the
new behavior of the af_alloc_device_v2 function.
\ingroup device_func_free
*/
AF_DEPRECATED("Use af_free_device_v2 instead")
AFAPI af_err af_free_device(void *ptr);

#if AF_API_VERSION >= 38
/**
\brief Allocates memory using ArrayFire's memory manager

This device memory returned by this function can only be freed using
af_free_device_v2.

\param [out] ptr Pointer to the device memory on the current device. This
is a CUDA device pointer for the CUDA backend. A
cl::Buffer pointer on the OpenCL backend and a C pointer
for the CPU backend
\param [in] bytes The number of bites to allocate on the device

\returns AF_SUCCESS if a pointer could be allocated. AF_ERR_NO_MEM if
there is no memory
\ingroup device_func_alloc
*/
AFAPI af_err af_alloc_device_v2(void **ptr, const dim_t bytes);

/**
\brief Returns memory to ArrayFire's memory manager.

This function will free a device pointer even if it has been previously
locked.

\param[in] ptr The pointer allocated by af_alloc_device_v2 to be freed
\note this function will not work for pointers allocated using the
af_alloc_device function for all backends
\ingroup device_func_free
*/
AFAPI af_err af_free_device_v2(void *ptr);
#endif
/**
\ingroup device_func_pinned
*/
Expand Down
2 changes: 1 addition & 1 deletion include/af/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ AFAPI af_err af_memory_manager_get_active_device_id(af_memory_manager handle,

\param[in] handle the \ref af_memory_manager handle
\param[out] ptr the pointer to the allocated buffer (for the CUDA and CPU
backends). For the OpenCL backend, this is a pointer to a cl::Buffer, which
backends). For the OpenCL backend, this is a pointer to a cl_mem, which
Comment thread
umar456 marked this conversation as resolved.
can be cast accordingly
\param[in] size the size of the pointer allocation

Expand Down
36 changes: 34 additions & 2 deletions src/api/c/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ inline void lockArray(const af_array arr) {
// Ideally we need to use .get(false), i.e. get ptr without offset
// This is however not supported in opencl
// Use getData().get() as alternative
memLock(static_cast<void *>(getArray<T>(arr).getData().get()));
memLock(getArray<T>(arr).getData().get());
}

af_err af_lock_device_ptr(const af_array arr) { return af_lock_array(arr); }
Expand Down Expand Up @@ -217,7 +217,7 @@ inline void unlockArray(const af_array arr) {
// Ideally we need to use .get(false), i.e. get ptr without offset
// This is however not supported in opencl
// Use getData().get() as alternative
memUnlock(static_cast<void *>(getArray<T>(arr).getData().get()));
memUnlock(getArray<T>(arr).getData().get());
}

af_err af_unlock_device_ptr(const af_array arr) { return af_unlock_array(arr); }
Expand Down Expand Up @@ -257,6 +257,25 @@ af_err af_alloc_device(void **ptr, const dim_t bytes) {
return AF_SUCCESS;
}

af_err af_alloc_device_v2(void **ptr, const dim_t bytes) {
try {
AF_CHECK(af_init());
#ifdef AF_OPENCL
auto *buf = static_cast<cl::Buffer *>(memAllocUser(bytes));
*ptr = buf->operator()();

// Calling retain to offset the decrement the reference count by the
// destructor of cl::Buffer
clRetainMemObject(cl_mem(*ptr));
delete buf;
#else
*ptr = static_cast<void *>(memAllocUser(bytes));
#endif
}
CATCHALL;
return AF_SUCCESS;
}

af_err af_alloc_pinned(void **ptr, const dim_t bytes) {
try {
AF_CHECK(af_init());
Expand All @@ -274,6 +293,19 @@ af_err af_free_device(void *ptr) {
return AF_SUCCESS;
}

af_err af_free_device_v2(void *ptr) {
try {
#ifdef AF_OPENCL
auto mem = static_cast<cl_mem>(ptr);
memFreeUser(new cl::Buffer(mem, false));
#else
memFreeUser(ptr);
#endif
}
CATCHALL;
return AF_SUCCESS;
}

af_err af_free_pinned(void *ptr) {
try {
pinnedFree<char>(static_cast<char *>(ptr));
Expand Down
22 changes: 21 additions & 1 deletion src/api/cpp/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,21 @@ void sync(int device) { AF_THROW(af_sync(device)); }
// Alloc device memory
void *alloc(const size_t elements, const af::dtype type) {
void *ptr;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
AF_THROW(af_alloc_device(&ptr, elements * size_of(type)));
#pragma GCC diagnostic pop
// FIXME: Add to map
return ptr;
}

// Alloc device memory
void *allocV2(const size_t bytes) {
void *ptr;
AF_THROW(af_alloc_device_v2(&ptr, bytes));
return ptr;
}

// Alloc pinned memory
void *pinned(const size_t elements, const af::dtype type) {
void *ptr;
Expand All @@ -117,7 +127,14 @@ void *pinned(const size_t elements, const af::dtype type) {

void free(const void *ptr) {
// FIXME: look up map and call the right free
AF_THROW(af_free_device((void *)ptr));
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
AF_THROW(af_free_device(const_cast<void *>(ptr)));
#pragma GCC diagnostic pop
}

void freeV2(const void *ptr) {
AF_THROW(af_free_device_v2(const_cast<void *>(ptr)));
}

void freePinned(const void *ptr) {
Expand Down Expand Up @@ -155,6 +172,8 @@ size_t getMemStepSize() {
return size_bytes;
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#define INSTANTIATE(T) \
template<> \
AFAPI T *alloc(const size_t elements) { \
Expand All @@ -181,5 +200,6 @@ INSTANTIATE(short)
INSTANTIATE(unsigned short)
INSTANTIATE(long long)
INSTANTIATE(unsigned long long)
#pragma GCC diagnostic pop

} // namespace af
16 changes: 15 additions & 1 deletion src/api/unified/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,28 @@ af_err af_get_device(int *device) { CALL(af_get_device, device); }
af_err af_sync(const int device) { CALL(af_sync, device); }

af_err af_alloc_device(void **ptr, const dim_t bytes) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
CALL(af_alloc_device, ptr, bytes);
#pragma GCC diagnostic pop
}

af_err af_alloc_device_v2(void **ptr, const dim_t bytes) {
CALL(af_alloc_device_v2, ptr, bytes);
}

af_err af_alloc_pinned(void **ptr, const dim_t bytes) {
CALL(af_alloc_pinned, ptr, bytes);
}

af_err af_free_device(void *ptr) { CALL(af_free_device, ptr); }
af_err af_free_device(void *ptr) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
CALL(af_free_device, ptr);
#pragma GCC diagnostic pop
}

af_err af_free_device_v2(void *ptr) { CALL(af_free_device_v2, ptr); }

af_err af_free_pinned(void *ptr) { CALL(af_free_pinned, ptr); }

Expand Down
2 changes: 1 addition & 1 deletion src/backend/opencl/Array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ template<typename T>
void *getDevicePtr(const Array<T> &arr) {
const cl::Buffer *buf = arr.device();
if (!buf) { return NULL; }
memLock((T *)buf);
memLock(buf);
cl_mem mem = (*buf)();
return (void *)mem;
}
Expand Down
Loading