forked from Theano/libgpuarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathelemwise.py
More file actions
101 lines (76 loc) · 3.09 KB
/
elemwise.py
File metadata and controls
101 lines (76 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy
from .dtypes import dtype_to_ctype, get_common_dtype
from . import gpuarray
from ._elemwise import GpuElemwise, arg
__all__ = ['GpuElemwise', 'elemwise1', 'elemwise2', 'ielemwise2', 'compare']
def _dtype(o):
if hasattr(o, 'dtype'):
return o.dtype
return numpy.asarray(o).dtype
def as_argument(o, name, read=False, write=False):
if not read and not write:
raise ValueError('argument is neither read not write')
return arg(name, _dtype(o), scalar=not isinstance(o, gpuarray.GpuArray),
read=read, write=write)
def elemwise1(a, op, oper=None, op_tmpl="res = %(op)sa", out=None,
convert_f16=True):
args = (as_argument(a, 'res', write=True), as_argument(a, 'a', read=True))
if out is None:
res = a._empty_like_me()
else:
res = out
if oper is None:
oper = op_tmpl % {'op': op}
k = GpuElemwise(a.context, oper, args, convert_f16=convert_f16)
k(res, a)
return res
def elemwise2(a, op, b, ary, odtype=None, oper=None,
op_tmpl="res = (%(out_t)s)a %(op)s (%(out_t)s)b",
broadcast=False, convert_f16=True):
ndim_extend = True
if not isinstance(a, gpuarray.GpuArray):
a = numpy.asarray(a)
ndim_extend = False
if not isinstance(b, gpuarray.GpuArray):
b = numpy.asarray(b)
ndim_extend = False
if odtype is None:
odtype = get_common_dtype(a, b, True)
a_arg = as_argument(a, 'a', read=True)
b_arg = as_argument(b, 'b', read=True)
args = [arg('res', odtype, write=True), a_arg, b_arg]
if ndim_extend:
if a.ndim != b.ndim:
nd = max(a.ndim, b.ndim)
if a.ndim < nd:
a = a.reshape(((1,) * (nd - a.ndim)) + a.shape)
if b.ndim < nd:
b = b.reshape(((1,) * (nd - b.ndim)) + b.shape)
out_shape = tuple(max(sa, sb) for sa, sb in zip(a.shape, b.shape))
res = gpuarray.empty(out_shape, dtype=odtype, context=ary.context,
cls=ary.__class__)
else:
res = ary._empty_like_me(dtype=odtype)
if oper is None:
if convert_f16 and odtype == 'float16':
odtype = numpy.dtype('float32')
oper = op_tmpl % {'op': op, 'out_t': dtype_to_ctype(odtype)}
k = GpuElemwise(ary.context, oper, args, convert_f16=convert_f16)
k(res, a, b, broadcast=broadcast)
return res
def ielemwise2(a, op, b, oper=None, op_tmpl="a = a %(op)s b",
broadcast=False, convert_f16=True):
if not isinstance(b, gpuarray.GpuArray):
b = numpy.asarray(b)
a_arg = as_argument(a, 'a', read=True, write=True)
b_arg = as_argument(b, 'b', read=True)
args = [a_arg, b_arg]
if oper is None:
oper = op_tmpl % {'op': op}
k = GpuElemwise(a.context, oper, args, convert_f16=convert_f16)
k(a, b, broadcast=broadcast)
return a
def compare(a, op, b, broadcast=False, convert_f16=True):
return elemwise2(a, op, b, a, odtype=numpy.dtype('bool'),
op_tmpl="res = (a %(op)s b)",
broadcast=broadcast, convert_f16=convert_f16)