forked from Theano/libgpuarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblas.pyx
More file actions
129 lines (109 loc) · 4.24 KB
/
blas.pyx
File metadata and controls
129 lines (109 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from pygpu.gpuarray import GpuArrayException
from pygpu.gpuarray cimport (_GpuArray, GpuArray, GA_NO_ERROR, GpuArray_error,
pygpu_copy, pygpu_empty, pygpu_zeros,
GA_ANY_ORDER, GA_F_ORDER, GpuArray_ISONESEGMENT)
cdef extern from "gpuarray/buffer_blas.h":
ctypedef enum cb_transpose:
cb_no_trans,
cb_trans,
cb_conj_trans
cdef extern from "gpuarray/blas.h":
int GpuArray_rgemv(cb_transpose transA, double alpha, _GpuArray *A,
_GpuArray *X, double beta, _GpuArray *Y, int nocopy)
int GpuArray_rgemm(cb_transpose transA, cb_transpose transB,
double alpha, _GpuArray *A, _GpuArray *B,
double beta, _GpuArray *C, int nocopy)
int GpuArray_rger(double alpha, _GpuArray *X, _GpuArray *Y, _GpuArray *A,
int nocopy)
cdef api int pygpu_blas_rgemv(cb_transpose transA, double alpha, GpuArray A,
GpuArray X, double beta, GpuArray Y,
bint nocopy) except -1:
cdef int err
err = GpuArray_rgemv(transA, alpha, &A.ga, &X.ga, beta, &Y.ga, nocopy);
if err != GA_NO_ERROR:
raise GpuArrayException(GpuArray_error(&A.ga, err), err)
return 0
cdef api int pygpu_blas_rgemm(cb_transpose transA, cb_transpose transB,
double alpha, GpuArray A, GpuArray B,
double beta, GpuArray C, bint nocopy) except -1:
cdef int err
err = GpuArray_rgemm(transA, transB, alpha, &A.ga, &B.ga, beta, &C.ga, nocopy);
if err != GA_NO_ERROR:
raise GpuArrayException(GpuArray_error(&A.ga, err), err)
return 0
cdef api int pygpu_blas_rger(double alpha, GpuArray X, GpuArray Y, GpuArray A,
bint nocopy) except -1:
cdef int err
err = GpuArray_rger(alpha, &X.ga, &Y.ga, &A.ga, nocopy);
if err != GA_NO_ERROR:
raise GpuArrayException(GpuArray_error(&X.ga, err), err)
return 0
def gemv(double alpha, GpuArray A, GpuArray X, double beta=0.0,
GpuArray Y=None, trans_a=False, overwrite_y=False):
cdef cb_transpose transA
cdef size_t Yshp
if trans_a:
transA = cb_trans
else:
transA = cb_no_trans
if A.ga.nd != 2:
raise TypeError, "A is not a matrix"
if transA == cb_no_trans:
Yshp = A.ga.dimensions[0]
else:
Yshp = A.ga.dimensions[1]
if Y is None:
if beta != 0.0:
raise ValueError, "Y not provided and beta != 0"
Y = pygpu_empty(1, &Yshp, A.ga.typecode, GA_ANY_ORDER, A.context, None)
overwrite_y = True
if not overwrite_y:
Y = pygpu_copy(Y, GA_ANY_ORDER)
pygpu_blas_rgemv(transA, alpha, A, X, beta, Y, 0)
return Y
def gemm(double alpha, GpuArray A, GpuArray B, double beta, GpuArray C=None,
trans_a=False, trans_b=False, overwrite_c=False):
cdef cb_transpose transA
cdef cb_transpose transB
cdef size_t[2] Cshp
if trans_a:
transA = cb_trans
else:
transA = cb_no_trans
if trans_b:
transB = cb_trans
else:
transB = cb_no_trans
if A.ga.nd != 2:
raise TypeError, "A is not a matrix"
if B.ga.nd != 2:
raise TypeError, "B is not a matrix"
if transA == cb_no_trans:
Cshp[0] = A.ga.dimensions[0]
else:
Cshp[0] = A.ga.dimensions[1]
if transB == cb_no_trans:
Cshp[1] = B.ga.dimensions[1]
else:
Cshp[1] = B.ga.dimensions[0]
if C is None:
if beta != 0.0:
raise ValueError, "C not provided and beta != 0"
C = pygpu_empty(2, Cshp, A.ga.typecode, GA_ANY_ORDER, A.context, None)
overwrite_c = True
if not overwrite_c:
C = pygpu_copy(C, GA_ANY_ORDER)
pygpu_blas_rgemm(transA, transB, alpha, A, B, beta, C, 0)
return C
def ger(double alpha, GpuArray X, GpuArray Y, GpuArray A=None,
overwrite_a=False):
cdef size_t[2] Ashp
if A is None:
Ashp[0] = X.ga.dimensions[0];
Ashp[1] = Y.ga.dimensions[0];
A = pygpu_zeros(2, Ashp, X.ga.typecode, GA_ANY_ORDER, X.context, None)
overwrite_a = True
if not overwrite_a:
A = pygpu_copy(A, GA_ANY_ORDER)
pygpu_blas_rger(alpha, X, Y, A, 0)
return A