-
Notifications
You must be signed in to change notification settings - Fork 548
Expand file tree
/
Copy pathcudnn.hpp
More file actions
188 lines (158 loc) · 8.74 KB
/
cudnn.hpp
File metadata and controls
188 lines (158 loc) · 8.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/*******************************************************
* Copyright (c) 2018, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#pragma once
#include <common/defines.hpp>
#include <common/half.hpp>
#include <common/unique_handle.hpp>
#include <cudnnModule.hpp>
#include <af/dim4.hpp>
// clang-format off
DEFINE_HANDLER(cudnnHandle_t, arrayfire::cuda::getCudnnPlugin().cudnnCreate, arrayfire::cuda::getCudnnPlugin().cudnnDestroy);
DEFINE_HANDLER(cudnnTensorDescriptor_t, arrayfire::cuda::getCudnnPlugin().cudnnCreateTensorDescriptor, arrayfire::cuda::getCudnnPlugin().cudnnDestroyTensorDescriptor);
DEFINE_HANDLER(cudnnFilterDescriptor_t, arrayfire::cuda::getCudnnPlugin().cudnnCreateFilterDescriptor, arrayfire::cuda::getCudnnPlugin().cudnnDestroyFilterDescriptor);
DEFINE_HANDLER(cudnnConvolutionDescriptor_t, arrayfire::cuda::getCudnnPlugin().cudnnCreateConvolutionDescriptor, arrayfire::cuda::getCudnnPlugin().cudnnDestroyConvolutionDescriptor);
// clang-format on
namespace arrayfire {
namespace cuda {
const char *errorString(cudnnStatus_t err);
#define CUDNN_CHECK(fn) \
do { \
cudnnStatus_t _error = (fn); \
if (_error == CUDNN_STATUS_SUCCESS) { \
break; \
} else if (_error == CUDNN_STATUS_ALLOC_FAILED) { \
AF_ERROR( \
"CUDNN Error(CUDNN_STATUS_ALLOC_FAILED): Error allocating " \
"for function all ", \
AF_ERR_NO_MEM); \
} else if (_error == CUDNN_STATUS_NOT_SUPPORTED) { \
CUDA_NOT_SUPPORTED( \
"CUDNN Error(CUDNN_STATUS_NOT_SUPPORTED): This version of " \
"CUDNN does not support the data type or the size of this " \
"operation"); \
} else { \
char _err_msg[1024]; \
snprintf(_err_msg, sizeof(_err_msg), "CUDNN Error(%s): \n", \
errorString(_error)); \
AF_ERROR(_err_msg, AF_ERR_INTERNAL); \
} \
} while (0)
/// Returns a cuDNN type based on the template parameter
template<typename T>
cudnnDataType_t getCudnnDataType();
void cudnnSet(cudnnTensorDescriptor_t desc, cudnnDataType_t cudnn_dtype,
af::dim4 dims);
void cudnnSet(cudnnFilterDescriptor_t desc, cudnnDataType_t cudnn_dtype,
af::dim4 dims);
// cuDNN Wrappers
//
// cuDNN deprecates and releases function names often between releases. in order
// to prevent locking arrayfire versions to specific cuDNN versions, we wrap all
// cuDNN calls so that the main codebase is not full of ifdefs. The Following
// functions are wrappers around cuDNN functions that abstract out the version
// differences between older versions of cuDNN.
//
cudnnStatus_t cudnnSetConvolution2dDescriptor(
cudnnConvolutionDescriptor_t convDesc,
int pad_h, // zero-padding height
int pad_w, // zero-padding width
int u, // vertical filter stride
int v, // horizontal filter stride
int upscalex, // upscale the input in x-direction
int upscaley, // upscale the input in y-direction
cudnnConvolutionMode_t mode, cudnnDataType_t computeType);
cudnnStatus_t cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
cudnnDataType_t dataType,
cudnnTensorFormat_t format, int k,
int c, int h, int w);
cudnnStatus_t cudnnSetTensor4dDescriptor(
cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
cudnnDataType_t dataType, /* image data type */
int n, /* number of inputs (batch size) */
int c, /* number of input feature maps */
int h, /* height of input section */
int w); /* width of input section */
cudnnStatus_t cudnnGetConvolutionBackwardDataWorkspaceSize(
cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes);
cudnnStatus_t cudnnConvolutionBackwardData(
cudnnHandle_t handle, const void *alpha,
const cudnnFilterDescriptor_t wDesc, const void *w,
const cudnnTensorDescriptor_t dyDesc, const void *dy,
const cudnnConvolutionDescriptor_t convDesc,
cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
size_t workSpaceSizeInBytes, const void *beta,
const cudnnTensorDescriptor_t dxDesc, void *dx);
cudnnStatus_t cudnnGetConvolutionNdForwardOutputDim(
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t inputTensorDesc,
const cudnnFilterDescriptor_t filterDesc, int nbDims,
int tensorOuputDimA[]);
cudnnStatus_t cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle,
int *count);
cudnnStatus_t cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
cudnnHandle_t handle, int *count);
cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnFilterDescriptor_t wDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
size_t *sizeInBytes);
cudnnStatus_t cudnnGetConvolutionBackwardFilterWorkspaceSize(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t gradDesc,
cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes);
cudnnStatus_t cudnnFindConvolutionForwardAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnFilterDescriptor_t wDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults);
cudnnStatus_t cudnnFindConvolutionBackwardFilterAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
cudnnStatus_t cudnnGetConvolutionForwardAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnFilterDescriptor_t wDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t yDesc,
cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
cudnnConvolutionFwdAlgo_t *algo);
cudnnStatus_t cudnnGetConvolutionBackwardFilterAlgorithm(
cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t dwDesc,
cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
cudnnConvolutionBwdFilterAlgo_t *algo);
cudnnStatus_t cudnnConvolutionForward(
cudnnHandle_t handle, const void *alpha,
const cudnnTensorDescriptor_t xDesc, const void *x,
const cudnnFilterDescriptor_t wDesc, const void *w,
const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
const cudnnTensorDescriptor_t yDesc, void *y);
cudnnStatus_t cudnnConvolutionBackwardFilter(
cudnnHandle_t handle, const void *alpha,
const cudnnTensorDescriptor_t xDesc, const void *x,
const cudnnTensorDescriptor_t dyDesc, const void *dy,
const cudnnConvolutionDescriptor_t convDesc,
cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
size_t workSpaceSizeInBytes, const void *beta,
const cudnnFilterDescriptor_t dwDesc, void *dw);
} // namespace cuda
} // namespace arrayfire