-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMain.java
More file actions
39 lines (32 loc) · 1.54 KB
/
Main.java
File metadata and controls
39 lines (32 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import CudaLib.CudaMemObj;
import static java.lang.foreign.ValueLayout.*;
import static CudaLib.CudaNumLib.*;
import static CudaLib.CudaNumLib.cudaMemcpyKind.*;
public class Main {
public static void main(String[] args) throws Throwable {
int num_elements = 1000, threadsPerBlock = 256;
CudaMemObj a_arr = hostMalloc(JAVA_DOUBLE, num_elements);
CudaMemObj b_arr = hostMalloc(JAVA_DOUBLE, num_elements);
CudaMemObj c_arr = hostMalloc(JAVA_DOUBLE, num_elements);
for (int i = 0; i < 5; i++) {
a_arr.get_ptr().setAtIndex(JAVA_DOUBLE, i, 10.0);
b_arr.get_ptr().setAtIndex(JAVA_DOUBLE, i, 10.0);
}
int byte_size_arr = (int)a_arr.get_ptr().byteSize();
CudaMemObj cu_a_arr = cudaMalloc(byte_size_arr);
CudaMemObj cu_b_arr = cudaMalloc(byte_size_arr);
CudaMemObj cu_c_arr = cudaMalloc(byte_size_arr);
cudaMemcpy(cu_a_arr, a_arr, byte_size_arr, cudaMemcpyHostToDevice);
cudaMemcpy(cu_b_arr, b_arr, byte_size_arr, cudaMemcpyHostToDevice);
cuda_add(threadsPerBlock, num_elements, cu_a_arr, cu_b_arr, cu_c_arr);
cuda__powf(threadsPerBlock, num_elements, cu_c_arr, 2.0, cu_c_arr);
cudaDeviceSynchronize();
cudaMemcpy(c_arr, cu_c_arr, byte_size_arr, cudaMemcpyDeviceToHost);
for (int i = 0; i < 5; i++) {
IO.println(c_arr.get_ptr().getAtIndex(JAVA_DOUBLE, i));
}
cudaFree(cu_a_arr);
cudaFree(cu_b_arr);
cudaFree(cu_c_arr);
}
}