Skip to content

Commit 20b5f51

Browse files
committed
Cleaning up cpu blas / lapack in OpenCL backend
1 parent bdbf30e commit 20b5f51

18 files changed

Lines changed: 355 additions & 231 deletions

src/api/c/svd.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,11 @@
1212
#include <af/lapack.h>
1313

1414
#include <af/util.h>
15-
1615
#include <af/defines.h>
17-
1816
#include <err_common.hpp>
19-
2017
#include <backend.hpp>
21-
2218
#include <Array.hpp>
23-
2419
#include <handle.hpp>
25-
2620
#include <svd.hpp>
2721

2822
using namespace detail;

src/backend/cblas.cpp

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,34 +23,52 @@ static char transChar(CBLAS_TRANSPOSE Trans)
2323
}
2424
}
2525

26-
#define GEMM_F77(X, TS, TV, TY) \
27-
void cblas_##X##gemm( \
28-
const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, \
29-
const CBLAS_TRANSPOSE TransB, const int M, const int N, \
30-
const int K, const TS alpha, const TV *A, \
31-
const int lda, const TV *B, const int ldb, \
32-
const TS beta, TV *C, const int ldc) \
33-
{ \
34-
char aT = transChar(TransA); \
35-
char bT = transChar(TransB); \
36-
X##gemm_(&aT, &bT, &M, &N, &K, \
37-
(const TY *)ADDR(alpha), (const TY *)A, &lda, \
38-
(const TY *)B, &ldb, \
39-
(const TY *)ADDR(beta), (TY *)C, &ldc); \
40-
} \
41-
void cblas_##X##gemv( \
42-
const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, \
43-
const int M, const int N, \
44-
const TS alpha, const TV *A, const int lda, \
45-
const TV *X, const int incX, const TS beta, \
46-
TV *Y, const int incY) \
47-
{ \
48-
char aT = transChar(TransA); \
49-
X##gemv_(&aT, &M, &N, \
50-
(const TY *)ADDR(alpha), (const TY *)A, &lda, \
51-
(const TY *)X, &incX, \
52-
(const TY *)ADDR(beta), (TY *)Y, &incY); \
53-
} \
26+
#define GEMM_F77(X, TS, TV, TY) \
27+
void cblas_##X##gemm( \
28+
const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, \
29+
const CBLAS_TRANSPOSE TransB, const int M, const int N, \
30+
const int K, const TS alpha, const TV *A, \
31+
const int lda, const TV *B, const int ldb, \
32+
const TS beta, TV *C, const int ldc) \
33+
{ \
34+
char aT = transChar(TransA); \
35+
char bT = transChar(TransB); \
36+
X##gemm_(&aT, &bT, &M, &N, &K, \
37+
(const TY *)ADDR(alpha), (const TY *)A, &lda, \
38+
(const TY *)B, &ldb, \
39+
(const TY *)ADDR(beta), (TY *)C, &ldc); \
40+
} \
41+
void cblas_##X##gemv( \
42+
const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, \
43+
const int M, const int N, \
44+
const TS alpha, const TV *A, const int lda, \
45+
const TV *X, const int incX, const TS beta, \
46+
TV *Y, const int incY) \
47+
{ \
48+
char aT = transChar(TransA); \
49+
X##gemv_(&aT, &M, &N, \
50+
(const TY *)ADDR(alpha), (const TY *)A, &lda, \
51+
(const TY *)X, &incX, \
52+
(const TY *)ADDR(beta), (TY *)Y, &incY); \
53+
} \
54+
void cblas_##X##axpy( \
55+
const int N, const TS alpha, \
56+
const TV *X, const int incX, \
57+
TV *Y, const int incY) \
58+
{ \
59+
X##axpy_(&N, \
60+
(const TY *)ADDR(alpha), \
61+
(const TY *)X, &incX, \
62+
(TY *)Y, &incY); \
63+
} \
64+
void cblas_##X##scal( \
65+
const int N, const TS alpha, \
66+
TV *X, const int incX) \
67+
{ \
68+
X##scal_(&N, \
69+
(const TY *)ADDR(alpha), \
70+
(TY *)X, &incX); \
71+
} \
5472

5573
#define ADDR(val) &val
5674
GEMM_F77(s, float, float, float)

src/backend/opencl/CMakeLists.txt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,22 @@ ENDIF(APPLE)
2424
IF(NOT LAPACK_FOUND)
2525
MESSAGE(WARNING "LAPACK not found. Functionality will be disabled")
2626
ELSE(NOT LAPACK_FOUND)
27-
ADD_DEFINITIONS(-DWITH_OPENCL_LINEAR_ALGEBRA)
27+
ADD_DEFINITIONS(-DWITH_OPENCL_LINEAR_ALGEBRA)
28+
29+
IF(NOT USE_OPENCL_MKL)
30+
FIND_PACKAGE(CBLAS REQUIRED)
31+
32+
IF(USE_CPU_F77_BLAS)
33+
MESSAGE("Using F77 BLAS")
34+
ADD_DEFINITIONS(-DUSE_F77_BLAS)
35+
ENDIF()
36+
37+
IF (NOT CBLAS_LIBRARIES)
38+
MESSAGE(SEND_ERROR "CBLAS Library not set")
39+
ELSE()
40+
MESSAGE(STATUS "Using CBLAS Library: ${CBLAS_LIBRARIES}")
41+
ENDIF()
42+
ENDIF()
2843
ENDIF()
2944

3045
IF(NOT UNIX)
@@ -75,6 +90,7 @@ INCLUDE_DIRECTORIES(
7590
${CLFFT_INCLUDE_DIRS}
7691
${Boost_INCLUDE_DIR}
7792
${BoostCompute_INCLUDE_DIRS}
93+
${CBLAS_INCLUDE_DIR}
7894
${LAPACK_INCLUDE_DIR}
7995
)
8096

@@ -243,7 +259,9 @@ IF(FORGE_FOUND AND NOT USE_SYSTEM_FORGE)
243259
ENDIF()
244260

245261
IF(LAPACK_FOUND)
246-
TARGET_LINK_LIBRARIES(afopencl PRIVATE ${LAPACK_LIBRARIES})
262+
TARGET_LINK_LIBRARIES(afopencl
263+
PRIVATE ${LAPACK_LIBRARIES}
264+
PRIVATE ${CBLAS_LIBRARIES})
247265
ENDIF()
248266

249267
SET_TARGET_PROPERTIES(afopencl PROPERTIES

src/backend/opencl/magma/geqrf2.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,8 @@ magma_geqrf2_gpu(
245245
0, lwork*sizeof(Ty),
246246
0, NULL, NULL, NULL);
247247

248-
geqrf_work_func<Ty> cpu_geqrf;
249-
larft_func<Ty> cpu_larft;
248+
cpu_geqrf_work_func<Ty> cpu_geqrf;
249+
cpu_larft_func<Ty> cpu_larft;
250250

251251
nbmin = 2;
252252
nx = nb;
@@ -275,11 +275,11 @@ magma_geqrf2_gpu(
275275
}
276276

277277
magma_queue_sync(queue[0]);
278-
*info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work(i), ldwork, tau+i, hwork, lhwork);
278+
*info = cpu_geqrf( rows, ib, work(i), ldwork, tau+i, hwork, lhwork);
279279

280280
/* Form the triangular factor of the block reflector
281281
H = H(i) H(i+1) . . . H(i+ib-1) */
282-
cpu_larft(LAPACK_COL_MAJOR,
282+
cpu_larft(
283283
*MagmaForwardStr, *MagmaColumnwiseStr,
284284
rows, ib,
285285
work(i), ldwork, tau+i, hwork, ib);
@@ -329,7 +329,7 @@ magma_geqrf2_gpu(
329329
magma_queue_sync(queue[1]);
330330

331331
lhwork = lwork - rows*ib;
332-
*info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
332+
*info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
333333

334334
magma_setmatrix_async<Ty>(rows, ib, work, rows, dA(i, i), ldda, queue[1], NULL);
335335
}

src/backend/opencl/magma/geqrf3.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ magma_geqrf3_gpu(
217217
ldwork = m;
218218
lddwork= n;
219219

220-
geqrf_work_func<Ty> cpu_geqrf;
221-
larft_func<Ty> cpu_larft;
220+
cpu_geqrf_work_func<Ty> cpu_geqrf;
221+
cpu_larft_func<Ty> cpu_larft;
222222

223223
if ( (nb > 1) && (nb < k) ) {
224224
/* Use blocked code initially */
@@ -244,11 +244,11 @@ magma_geqrf3_gpu(
244244
}
245245

246246
magma_event_sync(event[1]);
247-
*info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork);
247+
*info = cpu_geqrf( rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork);
248248

249249
/* Form the triangular factor of the block reflector
250250
H = H(i) H(i+1) . . . H(i+ib-1) */
251-
cpu_larft(LAPACK_COL_MAJOR,
251+
cpu_larft(
252252
*MagmaForwardStr, *MagmaColumnwiseStr,
253253
rows, ib,
254254
work_ref(i), ldwork,
@@ -296,7 +296,7 @@ magma_geqrf3_gpu(
296296
magma_getmatrix<Ty>( rows, ib, a_ref(i, i), ldda, work, rows, queue );
297297

298298
lhwork = lwork - rows*ib;
299-
*info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
299+
*info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
300300

301301
magma_setmatrix<Ty>( rows, ib, work, rows, a_ref(i, i), ldda, queue );
302302
}

src/backend/opencl/magma/getrf.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,22 @@
3131
* * Redistributions of source code must retain the above copyright
3232
* notice, this list of conditions and the following disclaimer.
3333
* * Redistributions in binary form must reproduce the above copyright
34-
* notice, this list of conditions and the following disclaimer in the
34+
* notice, this list of conditions and the following disclaimer in the
3535
* documentation and/or other materials provided with the distribution.
36-
* * Neither the name of the University of Tennessee, Knoxville nor the
36+
* * Neither the name of the University of Tennessee, Knoxville nor the
3737
* names of its contributors may be used to endorse or promote products
3838
* derived from this software without specific prior written permission.
3939
*
4040
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41-
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42-
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
41+
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
4343
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
4444
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45-
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
4646
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47-
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
47+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
4848
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49-
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
49+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
5050
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5151
*
5252
**********************************************************************/
@@ -149,9 +149,9 @@ magma_int_t magma_getrf_gpu(
149149
if (m == 0 || n == 0)
150150
return *info;
151151

152-
gemm_func<Ty> gpu_gemm;
153-
trsm_func<Ty> gpu_trsm;
154-
getrf_func<Ty> cpu_getrf;
152+
gpu_gemm_func<Ty> gpu_gemm;
153+
gpu_trsm_func<Ty> gpu_trsm;
154+
cpu_getrf_func<Ty> cpu_getrf;
155155

156156
/* Function Body */
157157
mindim = std::min(m, n);
@@ -165,7 +165,7 @@ magma_int_t magma_getrf_gpu(
165165
return *info;
166166
}
167167
magma_getmatrix<Ty>(m, n, dA(0,0), ldda, work(0), m, queue);
168-
cpu_getrf(LAPACK_COL_MAJOR, m, n, work, m, ipiv);
168+
cpu_getrf( m, n, work, m, ipiv);
169169
magma_setmatrix<Ty>(m, n, work(0), m, dA(0,0), ldda, queue);
170170
magma_free_cpu(work);
171171
}
@@ -219,7 +219,7 @@ magma_int_t magma_getrf_gpu(
219219
magma_getmatrix<Ty>(m-j*nb, nb, dAP(0,0), maxm, work(0), ldwork, queue);
220220

221221
if (j > 0 && n > (j + 1) * nb) {
222-
gpu_trsm(clblasColumnMajor,
222+
gpu_trsm(
223223
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
224224
n - (j+1)*nb, nb,
225225
c_one,
@@ -228,7 +228,7 @@ magma_int_t magma_getrf_gpu(
228228
1, &queue, 0, nullptr, &event);
229229

230230
if (m > j * nb) {
231-
gpu_gemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans,
231+
gpu_gemm( clblasNoTrans, clblasNoTrans,
232232
n-(j+1)*nb, m-j*nb, nb,
233233
c_neg_one,
234234
dAT(j-1,j+1), lddat,
@@ -241,7 +241,7 @@ magma_int_t magma_getrf_gpu(
241241

242242
// do the cpu part
243243
rows = m - j*nb;
244-
cpu_getrf(LAPACK_COL_MAJOR, rows, nb, work, ldwork, ipiv+j*nb);
244+
cpu_getrf( rows, nb, work, ldwork, ipiv+j*nb);
245245
if (*info == 0 && iinfo > 0)
246246
*info = iinfo + j*nb;
247247

@@ -257,7 +257,7 @@ magma_int_t magma_getrf_gpu(
257257

258258
// do the small non-parallel computations (next panel update)
259259
if (s > (j+1)) {
260-
gpu_trsm(clblasColumnMajor,
260+
gpu_trsm(
261261
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
262262
nb, nb,
263263
c_one,
@@ -266,7 +266,7 @@ magma_int_t magma_getrf_gpu(
266266
1, &queue, 0, nullptr, &event);
267267

268268

269-
gpu_gemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans,
269+
gpu_gemm( clblasNoTrans, clblasNoTrans,
270270
nb, m-(j+1)*nb, nb,
271271
c_neg_one,
272272
dAT(j, j+1), lddat,
@@ -277,7 +277,7 @@ magma_int_t magma_getrf_gpu(
277277
}
278278
else {
279279
if (n > s * nb) {
280-
gpu_trsm(clblasColumnMajor,
280+
gpu_trsm(
281281
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
282282
n-s*nb, nb,
283283
c_one,
@@ -287,7 +287,7 @@ magma_int_t magma_getrf_gpu(
287287
}
288288

289289
if ((n > (j+1) * nb) && (m > (j+1) * nb)) {
290-
gpu_gemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans,
290+
gpu_gemm( clblasNoTrans, clblasNoTrans,
291291
n-(j+1)*nb, m-(j+1)*nb, nb,
292292
c_neg_one,
293293
dAT(j, j+1), lddat,
@@ -308,7 +308,7 @@ magma_int_t magma_getrf_gpu(
308308
magma_getmatrix<Ty>(rows, nb0, dAP(0,0), maxm, work(0), ldwork, queue);
309309

310310
// do the cpu part
311-
cpu_getrf(LAPACK_COL_MAJOR, rows, nb0, work, ldwork, ipiv+s*nb);
311+
cpu_getrf( rows, nb0, work, ldwork, ipiv+s*nb);
312312
if (*info == 0 && iinfo > 0)
313313
*info = iinfo + s*nb;
314314

@@ -322,7 +322,7 @@ magma_int_t magma_getrf_gpu(
322322
magmablas_transpose<Ty>(rows, nb0, dAP(0,0), maxm, dAT(s,s), lddat, queue);
323323

324324
if (n > s * nb + nb0) {
325-
gpu_trsm(clblasColumnMajor,
325+
gpu_trsm(
326326
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
327327
n-s*nb-nb0, nb0,
328328
c_one, dAT(s,s), lddat,

0 commit comments

Comments
 (0)