-
Notifications
You must be signed in to change notification settings - Fork 198
Expand file tree
/
Copy pathpyFormat.cpp
More file actions
307 lines (234 loc) · 11 KB
/
pyFormat.cpp
File metadata and controls
307 lines (234 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#include "pyFormat.h"
#include "pybind11/stl.h"
#include <functional>
namespace taco{
namespace pythonBindings{
static inline std::size_t orInBit(std::size_t currentValue, int bitToSet){
return currentValue | (1ULL << bitToSet);
}
// A hash function to satisfy python's requirement that objects that are equal should have the same hash value
static std::size_t hashModeFormat(const taco::ModeFormat& modeFormat){
std::size_t hashValue = 0;
hashValue = modeFormat.isFull()? orInBit(hashValue, 0): hashValue;
hashValue = modeFormat.isOrdered()? orInBit(hashValue, 1): hashValue;
hashValue = modeFormat.isUnique()? orInBit(hashValue, 2): hashValue;
hashValue = modeFormat.isBranchless()? orInBit(hashValue, 3): hashValue;
hashValue = modeFormat.isCompact()? orInBit(hashValue, 4): hashValue;
hashValue = modeFormat.hasCoordPosIter()? orInBit(hashValue, 5): hashValue;
hashValue = modeFormat.hasCoordValIter()? orInBit(hashValue, 6): hashValue;
hashValue = modeFormat.hasLocate()? orInBit(hashValue, 7): hashValue;
hashValue = modeFormat.hasInsert()? orInBit(hashValue, 8): hashValue;
hashValue = modeFormat.hasAppend()? orInBit(hashValue, 9): hashValue;
hashValue = modeFormat.defined()? orInBit(hashValue, 10): hashValue;
std::hash<std::string> string_hash;
return hashValue + string_hash(modeFormat.getName());
}
static std::size_t hashModeFormatPack(const taco::ModeFormatPack& modeFormatPack){
const auto& modeTypes = modeFormatPack.getModeFormats();
std::size_t hashValue = 0;
for(int i = 0; i < static_cast<int>(modeTypes.size()); ++i){
hashValue += (i+1)*hashModeFormat(modeTypes[i]);
}
return hashValue + 7*modeTypes.size();
}
std::size_t hashFormat(const taco::Format& format){
const auto& modeTypePacks = format.getModeFormatPacks();
const auto& ordering = format.getModeOrdering();
std::size_t hashValue = 0;
for(int i = 0; i < static_cast<int>(ordering.size()); ++i){
hashValue += hashModeFormatPack(modeTypePacks[i]) * (ordering[i] + 1);
}
return hashValue + 11 * ordering.size();
}
void defineModeFormats(py::module &m){
py::class_<taco::ModeFormat>(m, "mode_format", R"//(
Defines the storage format for a given dimension (mode) of a tensor.
Dimensions (modes) can either be dense (all elements are stored) or compressed as a sparse representation where
only the non-zeros of the dimension are stored.
Attributes
-----------
name
Examples
----------
>>> import pytaco as pt
>>> pt.dense
mode_format(dense)
>>> pt.compressed
mode_format(compressed)
>>> pt.dense.name
'dense'
Notes
----------
PyTaco currently exports the following mode formats:
:attr:`~pytaco.compressed` or :attr:`~pytaco.Compressed` - Only store non-zeros. eg. The second mode (dimension) in CSR
:attr:`~pytaco.dense` or :attr:`~pytaco.Dense` - Store all elements in dimension. eg. The first mode (dimension) in CSR
Explicit 0s resulting from computation are always stored even though a mode is marked as compressed. This is to avoid
checking every result from a computation which would slow down taco.
)//")
// .def(py::init<>())
.def_property_readonly("name", &taco::ModeFormat::getName, R"//(
Returns a string identifying the mode format. This will either be 'compressed' or 'dense'
)//")
// .def("is_full", &taco::ModeFormat::isFull)
// .def("is_ordered", &taco::ModeFormat::isOrdered)
// .def("is_unique", &taco::ModeFormat::isUnique)
// .def("is_branchless", &taco::ModeFormat::isBranchless)
// .def("is_compact", &taco::ModeFormat::isCompact)
// .def("has_coord_val_iter", &taco::ModeFormat::hasCoordValIter)
// .def("has_coord_pos_iter", &taco::ModeFormat::hasCoordPosIter)
// .def("has_locate", &taco::ModeFormat::hasLocate)
// .def("has_insert", &taco::ModeFormat::hasInsert)
// .def("has_append", &taco::ModeFormat::hasAppend)
// .def("defined", &taco::ModeFormat::defined)
.def("__repr__", [](const taco::ModeFormat& modeFormat) -> std::string{
std::ostringstream o;
o << "mode_format(" << modeFormat << ")";
return o.str();
}, py::is_operator())
.def("__eq__", [](const taco::ModeFormat& self, const taco::ModeFormat& other) -> bool{
return self == other;
}, py::is_operator())
.def("__ne__", [](const taco::ModeFormat& self, const taco::ModeFormat& other) -> bool{
return self != other;
}, py::is_operator())
.def("__hash__", [](const taco::ModeFormat &self) -> std::size_t {
return hashModeFormat(self);
}, py::is_operator());
m.attr("Compressed") = taco::ModeFormat::Compressed;
m.attr("compressed") = taco::ModeFormat::Compressed;
m.attr("Dense") = taco::ModeFormat::Dense;
m.attr("dense") = taco::ModeFormat::Dense;
}
void defineModeFormatPack(py::module& m){
py::class_<taco::ModeFormatPack>(m, "mode_format_pack")
.def(py::init<const std::vector<taco::ModeFormat>>())
.def(py::init<const taco::ModeFormat>())
.def("mode_formats", &taco::ModeFormatPack::getModeFormats)
.def("__eq__", [](const taco::ModeFormatPack& self, const taco::ModeFormatPack other) -> bool{
return self == other;
}, py::is_operator())
.def("__ne__", [](const taco::ModeFormatPack& self, const taco::ModeFormatPack& other) -> bool{
return self != other;
}, py::is_operator())
.def("__hash__", [](const taco::ModeFormatPack &self) -> std::size_t {
// Overflow doesn't affect python's required spec
return hashModeFormatPack(self);
}, py::is_operator())
.def("__repr__", [](const taco::ModeFormatPack& self) -> std::string{
std::ostringstream o;
o << "mode_format_pack(" << self << ")";
return o.str();
}, py::is_operator());
}
void defineFormat(py::module &m){
py::implicitly_convertible<taco::ModeFormat, taco::ModeFormatPack>();
py::class_<taco::Format>(m, "format", R"//(
format(mode_formats=[], mode_ordering=[])
Create a :class:`~pytaco.tensor` format.
The modes have the given mode storage formats and are stored in the given sequence. Mode i has the :class:`mode_format`
specified by mode_formats[mode_ordering[i]].
If no arguments are given a format for a 0-order tensor (a scalar) is created.
Parameters
-----------
mode_formats: pytaco.mode_format, iterable of pytaco.mode_format, optional
A list representing the mode format used to store each mode (dimension) of the tensor specified by mode_ordering[i].
If a single :class:`~pytaco.mode_format` is given, then a format for a 1-order tensor (vector) is created. The
default value is the empty list meaning a scalar is created.
mode_ordering: int, iterable of ints, optional
Can be specified if len(mode_formats) > 1. Specifies the order in which the dimensions (modes) of the tensor
should be stored in memory. That is, the mode stored in the i-th position in memory is specified by mode_ordering[i].
Defaults to mode_ordering[i] = i which corresponds to row-major storage.
Notes
--------
PyTaco exports the following common formats:
:attr:`~pytaco.csr` or :attr:`~pytaco.CSR` - Compressed Sparse Row storage format.
:attr:`~pytaco.csc` or :attr:`~pytaco.CSC` - Compressed Sparse Columns storage format.
Attributes
-----------
order
mode_formats
mode_ordering
Examples
----------
Here, we will create two common storage formats CSR and CSC in order to better understand formats. First, we look at
CSR.
We need a mode formats list to tell taco the first dimension it stores should be dense and the second dimension
should be sparse.
>>> import pytaco as pt
>>> mode_formats = [pt.dense, pt.compressed]
We then need to tell taco the order in which to store the dimensions. Since we want CSR, we want to store the rows first
then the columns. Once we do this, we can make the format.
>>> mode_ordering = [0, 1] # Taco will default this if no ordering is given.
>>> csr = pt.format(mode_formats, mode_ordering)
>>> csr.order
2
Now, it is easy to make a CSC format given what we have already. For CSC, we want to store the columns before the rows
but also have the columns be dense and the rows be sparse. We do so as follows:
>>> mode_ordering_csc = [1,0]
>>> csc = pt.format(mode_formats, mode_ordering_csc)
This tells taco to store the columns before the rows due to the ordering given and to store the columns as dense since
they are now the first storage dimension and the mode_formats[0] is dense.
We can generalize this to make a large number of storage formats.
)//")
.def(py::init<>())
.def(py::init<const taco::ModeFormat>())
.def(py::init<const std::vector<taco::ModeFormatPack> &>())
.def(py::init<const std::vector<taco::ModeFormatPack> &, const std::vector<int> &>())
.def_property_readonly("order", &taco::Format::getOrder, R"//(
Returns the number of modes (dimensions) stored in a format.
)//")
.def_property_readonly("mode_formats", &taco::Format::getModeFormats,R"//(
Returns the storage types of the modes. The type of mode stored in position i is specified by element i of the returned
vector.
)//")
// .def("mode_format_packs", &taco::Format::getModeFormatPacks)
.def_property_readonly("mode_ordering", &taco::Format::getModeOrdering, R"//(
Returns a list representing the ordering in which the modes are stored. The mode stored in position i is specified by
element i of the list returned.
)//")
// .def("level_array_types", &taco::Format::getLevelArrayTypes)
// .def("coordinate_type_pos", &taco::Format::getCoordinateTypePos)
// .def("coordinate_type_idx", &taco::Format::getCoordinateTypeIdx)
// .def("set_level_array_types", &taco::Format::setLevelArrayTypes)
.def("__eq__", [](const taco::Format& self, const taco::Format other) -> bool{
return self == other;
}, py::is_operator())
.def("__ne__", [](const taco::Format& self, const taco::Format& other) -> bool{
return self != other;
}, py::is_operator())
.def("__hash__", [](const taco::Format &self) -> std::size_t {
return hashFormat(self);
}, py::is_operator())
.def("__len__", &taco::Format::getOrder)
.def("__repr__", [](const taco::Format& self) -> std::string{
std::ostringstream o;
o << "Format(" << self << ")";
return o.str();
}, py::is_operator());
py::options options;
options.disable_function_signatures();
m.def("is_dense", &taco::isDense, R"//(
is_dense(fmt)
Checks if a format is all dense.
Parameters
-------------
fmt: pytaco.format
Returns
---------
bool
True of all dimensions (modes) in a tensor are stored in a dense format and False otherwise.
Examples
------------
>>> import pytaco as pt
>>> pt.is_dense(pt.csr)
False
>>> my_fmt = pt.format([pt.dense]*3)
>>> pt.is_dense(my_fmt)
True
)//");
m.attr("CSR") = CSR;
m.attr("csr") = CSR;
m.attr("CSC") = CSC;
m.attr("csc") = CSC;
}
}}