Skip to content

Commit 761f8e5

Browse files
committed
Create af::allocV2 and af::freeV2 which return cl_mem
* Older alloc functions were returning cl::Buffer objects. This behavior is deprecated in favor of cl_mem objects on the OpenCL backend
1 parent 09aef8f commit 761f8e5

8 files changed

Lines changed: 362 additions & 19 deletions

File tree

docs/details/device.dox

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ have finished.
7777

7878
===============================================================================
7979

80-
\defgroup device_func_alloc alloc
80+
\defgroup device_func_alloc allocV2
8181
\ingroup device_mat
8282

8383
\brief Allocate memory using the ArrayFire memory manager
@@ -92,21 +92,36 @@ interface returns a void pointer that needs to be cast to the backend
9292
appropriate memory type.
9393

9494

95-
| function | CPU | CUDA | OpenCL |
96-
|--------------------|-----|------|-------------|
97-
| af_alloc_device | T* | T* | cl::Buffer* |
98-
| af::alloc | T* | T* | cl::Buffer* |
95+
| function | CPU | CUDA | OpenCL |
96+
|------------------------------|-----|------|-------------|
97+
| af_alloc_device_v2 | T* | T* | cl_mem |
98+
| af::allocV2 | T* | T* | cl_mem |
99+
| af_alloc_device (deprecated) | T* | T* | cl::Buffer* |
100+
| af::alloc (deprecated) | T* | T* | cl::Buffer* |
101+
102+
CPU Backend
103+
-----------
104+
\snippet test/memory.cpp ex_alloc_v2_cpu
105+
106+
CUDA Backend
107+
------------
108+
\snippet test/cuda.cu ex_alloc_v2_cuda
109+
110+
OpenCL Backend
111+
--------------
112+
\snippet test/ocl_ext_context.cpp ex_alloc_v2_opencl
99113

100114
===============================================================================
101115

102-
\defgroup device_func_free free
116+
\defgroup device_func_free freeV2
103117
\ingroup device_mat
104118

105119
\brief Returns memory to ArrayFire's memory manager. The memory will
106120
return to the memory pool.
107121

108-
These calls free the device memory. These functions need to be called on
109-
pointers allocated using alloc function.
122+
Releases control of the memory allocated by af::allocV2 functions to ArrayFire's
123+
memory manager. ArrayFire may reuse the memory for subsequent operations. This
124+
memory should not be used by the client after this point.
110125

111126
===============================================================================
112127

include/af/device.h

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,26 @@ namespace af
115115
///
116116
/// \note The device memory returned by this function is only freed if
117117
/// af::free() is called explicitly
118+
/// \deprecated Use allocV2 instead. allocV2 accepts number of bytes
119+
/// instead of number of elements and returns a cl_mem object
120+
/// instead of the cl::Buffer object for the OpenCL backend.
121+
/// Otherwise the functionallity is identical to af::alloc.
122+
AF_DEPRECATED("Use af::allocV2 instead")
118123
AFAPI void *alloc(const size_t elements, const dtype type);
119124

125+
#if AF_API_VERSION >= 38
126+
/// \brief Allocates memory using ArrayFire's memory manager
127+
///
128+
/// \param[in] bytes the number of bytes to allocate
129+
/// \returns Pointer to the device memory on the current device. This is a
130+
/// CUDA device pointer for the CUDA backend. A cl_mem pointer
131+
/// on the OpenCL backend and a C pointer for the CPU backend
132+
///
133+
/// \note The device memory returned by this function is only freed if
134+
/// af::freeV2() is called explicitly
135+
AFAPI void *allocV2(const size_t bytes);
136+
#endif
137+
120138
/// \brief Allocates memory using ArrayFire's memory manager
121139
//
122140
/// \param[in] elements the number of elements to allocate
@@ -129,7 +147,13 @@ namespace af
129147
/// sizeof(type)
130148
/// \note The device memory returned by this function is only freed if
131149
/// af::free() is called explicitly
132-
template <typename T> T *alloc(const size_t elements);
150+
/// \deprecated Use allocV2 instead. allocV2 accepts number of bytes
151+
/// instead of number of elements and returns a cl_mem object
152+
/// instead of the cl::Buffer object for the OpenCL backend.
153+
/// Otherwise the functionallity is identical to af::alloc.
154+
template <typename T>
155+
AF_DEPRECATED("Use af::allocV2 instead")
156+
T *alloc(const size_t elements);
133157
/// @}
134158

135159
/// \ingroup device_func_free
@@ -140,8 +164,22 @@ namespace af
140164
///
141165
/// \note This function will free a device pointer even if it has been
142166
/// previously locked.
167+
/// \deprecated Use af::freeV2 instead. af_alloc_device_v2 returns a
168+
/// cl_mem object instead of the cl::Buffer object for the
169+
/// OpenCL backend. Otherwise the functionallity is identical
170+
AF_DEPRECATED("Use af::freeV2 instead")
143171
AFAPI void free(const void *ptr);
144172

173+
#if AF_API_VERSION >= 38
174+
/// \ingroup device_func_free
175+
/// \copydoc device_func_free
176+
/// \param[in] ptr The pointer returned by af::allocV2
177+
///
178+
/// This function will free a device pointer even if it has been previously
179+
/// locked.
180+
AFAPI void freeV2(const void *ptr);
181+
#endif
182+
145183
/// \ingroup device_func_pinned
146184
/// @{
147185
/// \copydoc device_func_pinned
@@ -330,7 +368,11 @@ extern "C" {
330368
331369
\returns AF_SUCCESS if a pointer could be allocated. AF_ERR_NO_MEM if
332370
there is no memory
371+
\deprecated Use af_alloc_device_v2 instead. af_alloc_device_v2 returns a
372+
cl_mem object instead of the cl::Buffer object for the OpenCL
373+
backend. Otherwise the functionallity is identical
333374
*/
375+
AF_DEPRECATED("Use af_alloc_device_v2 instead")
334376
AFAPI af_err af_alloc_device(void **ptr, const dim_t bytes);
335377

336378
/**
@@ -341,10 +383,45 @@ extern "C" {
341383
342384
\param[in] ptr The pointer allocated by af_alloc_device to be freed
343385
386+
\deprecated Use af_free_device_v2 instead. The new function handles the
387+
new behavior of the af_alloc_device_v2 function.
344388
\ingroup device_func_free
345389
*/
390+
AF_DEPRECATED("Use af_free_device_v2 instead")
346391
AFAPI af_err af_free_device(void *ptr);
347392

393+
#if AF_API_VERSION >= 38
394+
/**
395+
\brief Allocates memory using ArrayFire's memory manager
396+
397+
This device memory returned by this function can only be freed using
398+
af_free_device_v2.
399+
400+
\param [out] ptr Pointer to the device memory on the current device. This
401+
is a CUDA device pointer for the CUDA backend. A
402+
cl::Buffer pointer on the OpenCL backend and a C pointer
403+
for the CPU backend
404+
\param [in] bytes The number of bites to allocate on the device
405+
406+
\returns AF_SUCCESS if a pointer could be allocated. AF_ERR_NO_MEM if
407+
there is no memory
408+
\ingroup device_func_alloc
409+
*/
410+
AFAPI af_err af_alloc_device_v2(void **ptr, const dim_t bytes);
411+
412+
/**
413+
\brief Returns memory to ArrayFire's memory manager.
414+
415+
This function will free a device pointer even if it has been previously
416+
locked.
417+
418+
\param[in] ptr The pointer allocated by af_alloc_device_v2 to be freed
419+
\note this function will not work for pointers allocated using the
420+
af_alloc_device function for all backends
421+
\ingroup device_func_free
422+
*/
423+
AFAPI af_err af_free_device_v2(void *ptr);
424+
#endif
348425
/**
349426
\ingroup device_func_pinned
350427
*/

src/api/c/memory.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,25 @@ af_err af_alloc_device(void **ptr, const dim_t bytes) {
257257
return AF_SUCCESS;
258258
}
259259

260+
af_err af_alloc_device_v2(void **ptr, const dim_t bytes) {
261+
try {
262+
AF_CHECK(af_init());
263+
#ifdef AF_OPENCL
264+
auto *buf = static_cast<cl::Buffer *>(memAllocUser(bytes));
265+
*ptr = buf->operator()();
266+
267+
// Calling retain to offset the decrement the reference count by the
268+
// destructor of cl::Buffer
269+
clRetainMemObject(cl_mem(*ptr));
270+
delete buf;
271+
#else
272+
*ptr = static_cast<void *>(memAllocUser(bytes));
273+
#endif
274+
}
275+
CATCHALL;
276+
return AF_SUCCESS;
277+
}
278+
260279
af_err af_alloc_pinned(void **ptr, const dim_t bytes) {
261280
try {
262281
AF_CHECK(af_init());
@@ -274,6 +293,19 @@ af_err af_free_device(void *ptr) {
274293
return AF_SUCCESS;
275294
}
276295

296+
af_err af_free_device_v2(void *ptr) {
297+
try {
298+
#ifdef AF_OPENCL
299+
auto mem = static_cast<cl_mem>(ptr);
300+
memFreeUser(new cl::Buffer(mem, false));
301+
#else
302+
memFreeUser(ptr);
303+
#endif
304+
}
305+
CATCHALL;
306+
return AF_SUCCESS;
307+
}
308+
277309
af_err af_free_pinned(void *ptr) {
278310
try {
279311
pinnedFree<char>(static_cast<char *>(ptr));

src/api/cpp/device.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,21 @@ void sync(int device) { AF_THROW(af_sync(device)); }
102102
// Alloc device memory
103103
void *alloc(const size_t elements, const af::dtype type) {
104104
void *ptr;
105+
#pragma GCC diagnostic push
106+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
105107
AF_THROW(af_alloc_device(&ptr, elements * size_of(type)));
108+
#pragma GCC diagnostic pop
106109
// FIXME: Add to map
107110
return ptr;
108111
}
109112

113+
// Alloc device memory
114+
void *allocV2(const size_t bytes) {
115+
void *ptr;
116+
AF_THROW(af_alloc_device_v2(&ptr, bytes));
117+
return ptr;
118+
}
119+
110120
// Alloc pinned memory
111121
void *pinned(const size_t elements, const af::dtype type) {
112122
void *ptr;
@@ -117,7 +127,14 @@ void *pinned(const size_t elements, const af::dtype type) {
117127

118128
void free(const void *ptr) {
119129
// FIXME: look up map and call the right free
120-
AF_THROW(af_free_device((void *)ptr));
130+
#pragma GCC diagnostic push
131+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
132+
AF_THROW(af_free_device(const_cast<void *>(ptr)));
133+
#pragma GCC diagnostic pop
134+
}
135+
136+
void freeV2(const void *ptr) {
137+
AF_THROW(af_free_device_v2(const_cast<void *>(ptr)));
121138
}
122139

123140
void freePinned(const void *ptr) {
@@ -155,6 +172,8 @@ size_t getMemStepSize() {
155172
return size_bytes;
156173
}
157174

175+
#pragma GCC diagnostic push
176+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
158177
#define INSTANTIATE(T) \
159178
template<> \
160179
AFAPI T *alloc(const size_t elements) { \
@@ -181,5 +200,6 @@ INSTANTIATE(short)
181200
INSTANTIATE(unsigned short)
182201
INSTANTIATE(long long)
183202
INSTANTIATE(unsigned long long)
203+
#pragma GCC diagnostic pop
184204

185205
} // namespace af

src/api/unified/device.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,28 @@ af_err af_get_device(int *device) { CALL(af_get_device, device); }
7474
af_err af_sync(const int device) { CALL(af_sync, device); }
7575

7676
af_err af_alloc_device(void **ptr, const dim_t bytes) {
77+
#pragma GCC diagnostic push
78+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
7779
CALL(af_alloc_device, ptr, bytes);
80+
#pragma GCC diagnostic pop
81+
}
82+
83+
af_err af_alloc_device_v2(void **ptr, const dim_t bytes) {
84+
CALL(af_alloc_device_v2, ptr, bytes);
7885
}
7986

8087
af_err af_alloc_pinned(void **ptr, const dim_t bytes) {
8188
CALL(af_alloc_pinned, ptr, bytes);
8289
}
8390

84-
af_err af_free_device(void *ptr) { CALL(af_free_device, ptr); }
91+
af_err af_free_device(void *ptr) {
92+
#pragma GCC diagnostic push
93+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
94+
CALL(af_free_device, ptr);
95+
#pragma GCC diagnostic pop
96+
}
97+
98+
af_err af_free_device_v2(void *ptr) { CALL(af_free_device_v2, ptr); }
8599

86100
af_err af_free_pinned(void *ptr) { CALL(af_free_pinned, ptr); }
87101

test/cuda.cu

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
#include <af/array.h>
1313
#include <af/device.h>
1414

15+
using af::allocV2;
16+
using af::freeV2;
17+
18+
#pragma GCC diagnostic push
19+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1520
TEST(Memory, AfAllocDeviceCUDA) {
1621
void *ptr;
1722
ASSERT_SUCCESS(af_alloc_device(&ptr, sizeof(float)));
@@ -33,3 +38,42 @@ TEST(Memory, AfAllocDeviceCUDA) {
3338

3439
ASSERT_EQ(5, host);
3540
}
41+
#pragma GCC diagnostic pop
42+
43+
TEST(Memory, AfAllocDeviceV2CUDA) {
44+
void *ptr;
45+
ASSERT_SUCCESS(af_alloc_device_v2(&ptr, sizeof(float)));
46+
47+
/// Tests to see if the pointer returned can be used by cuda functions
48+
float gold_val = 5;
49+
float *gold = NULL;
50+
ASSERT_EQ(cudaSuccess, cudaMalloc(&gold, sizeof(float)));
51+
ASSERT_EQ(cudaSuccess, cudaMemcpy(gold, &gold_val, sizeof(float),
52+
cudaMemcpyHostToDevice));
53+
54+
ASSERT_EQ(cudaSuccess,
55+
cudaMemcpy(ptr, gold, sizeof(float), cudaMemcpyDeviceToDevice));
56+
57+
float host;
58+
ASSERT_EQ(cudaSuccess,
59+
cudaMemcpy(&host, ptr, sizeof(float), cudaMemcpyDeviceToHost));
60+
ASSERT_SUCCESS(af_free_device_v2(ptr));
61+
62+
ASSERT_EQ(5, host);
63+
}
64+
65+
TEST(Memory, SNIPPET_AllocCUDA) {
66+
//! [ex_alloc_v2_cuda]
67+
68+
void *ptr = allocV2(sizeof(float));
69+
70+
float *dptr = static_cast<float *>(ptr);
71+
float host_data = 5.0f;
72+
73+
cudaError_t error = cudaSuccess;
74+
error = cudaMemcpy(dptr, &host_data, sizeof(float), cudaMemcpyHostToDevice);
75+
freeV2(ptr);
76+
77+
//! [ex_alloc_v2_cuda]
78+
ASSERT_EQ(cudaSuccess, error);
79+
}

0 commit comments

Comments
 (0)