This repository was archived by the owner on Aug 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 89
Expand file tree
/
Copy pathsmart-pointer.cpp
More file actions
168 lines (143 loc) · 5.11 KB
/
Copy pathsmart-pointer.cpp
File metadata and controls
168 lines (143 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/***************************************************************************
*
* Copyright (C) 2016 Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Codeplay's ComputeCpp SDK
*
* smart-pointer.cpp
*
* Description:
* Sample code that shows how SYCL can use custom allocators.
*
**************************************************************************/
#include <CL/sycl.hpp>
// Custom stack allocator
#include "stack_allocator.hpp"
using namespace cl::sycl;
class kernel0;
class kernel1;
class kernel2;
int main() {
const unsigned int nElems = 12;
std::shared_ptr<int> p(new int[nElems]);
bool correct = true;
for (unsigned int i = 0; i < nElems; i++) {
p.get()[i] = 0;
}
queue myQueue;
{
/* Buffers can take a shared_ptr as a parameter, and they
* will share ownership of the pointer.
* Data will be copied back only if the user still keeps
* a reference of the data. */
{
buffer<int, 1> buf(p, range<1>(nElems));
myQueue.submit([&](handler& cgh) {
auto myRange = nd_range<2>(range<2>(6, 2), range<2>(2, 1));
auto ptr = buf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<kernel0>(myRange, [=](nd_item<2> itemID) {
ptr[itemID.get_global_linear_id()] =
itemID.get_global_linear_id();
});
});
{
/* The runtime will make data available via hA. It might use mapped
* memory, temporary objects or internal allocations. It is up to
* the implementation. The original pointer is not updated. */
auto hA = buf.get_access<access::mode::read>();
int sum = 0;
for (unsigned int i = 0; i < nElems; i++) {
sum += hA[i];
}
if (sum != 66) {
correct = false;
}
}
}
/* Data now available in the original pointer, because the buffer
* has been destroyed. */
int sum = 0;
for (unsigned int i = 0; i < nElems; i++) {
sum += p.get()[i];
}
if (sum != 66) {
correct = false;
}
}
{
/* Custom allocators can be used - here we use a stack_allocator from
* https://github.com/charles-salvia/charles/blob/master/stack_allocator.hpp
*/
{
buffer<int, 1, stack_allocator<int, nElems>> buf{range<1>{nElems}};
/* buffer::set_final_data() tells the runtime that the data should be
* copied to p when the buffer is destroyed. */
buf.set_final_data(p);
myQueue.submit([&](handler& cgh) {
auto myRange = nd_range<2>(range<2>(6, 2), range<2>(2, 1));
auto ptr = buf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<kernel1>(myRange, [=](nd_item<2> itemID) {
ptr[itemID.get_global_linear_id()] = itemID.get_global_linear_id();
});
});
}
/* The buffers have now been destroyed, and the data copied in to p. */
int sum = 0;
for (unsigned int i = 0; i < nElems; i++) {
sum += p.get()[i];
}
if (sum != 66) {
correct = false;
}
}
{
{
/* The property "use_host_ptr" tells the runtime that the user
* pointer passed to the constructor should be used to store all
* data, rather than new internal allocations. When using this,
* all host accessors update the user-given host memory. This can
* improve performance, though you should always profile to see
* if it actually makes a difference. */
buffer<int, 1> buf(p, range<1>(nElems),
{property::buffer::use_host_ptr()});
myQueue.submit([&](handler& cgh) {
auto myRange = nd_range<2>(range<2>(6, 2), range<2>(2, 1));
auto ptr = buf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<kernel2>(myRange, [=](nd_item<2> itemID) {
ptr[itemID.get_global_linear_id()] =
itemID.get_global_linear_id();
});
});
{
/* Host accessors will actually block on creation, so in this case we
* know kernel2 has finished by the time hA is available. */
auto hA = buf.get_access<access::mode::read>();
int sum = 0;
for (unsigned int i = 0; i < nElems; i++) {
sum += p.get()[i];
}
if (sum != 66) {
correct = false;
}
}
}
/* Normally data is copied to the host when buffers are destroyed, which
* would happen here, but since `use_host_ptr` and a host accessor were
* used in concert no data copy happens (i.e., has *already* happened). */
}
return correct ? 0 : 1;
}