#include "pyTensor.h" #include #include "pybind11/operators.h" #include "pybind11/stl.h" #include "pybind11/numpy.h" #include "taco/type.h" #include "taco/tensor.h" #if CUDA_BUILT #include #endif // Add Python dictionary initializer with {tuple(coordinate) : data} pairs namespace taco{ namespace pythonBindings{ static void checkBounds(const std::vector& dims, const std::vector& indices){ // Check for potential scalar access. Don't throw error if syntax valid if(dims.empty() && (indices.empty() || (indices[0] == 0 && indices.size() == 1))){ return; } if(dims.size() != indices.size()){ std::ostringstream o; o << "Incorrect number of dimensions when indexing. Tensor is order " << dims.size() << " but got index of " "size " << indices.size(); o << ". To index multiple dimensions only \"fancy\" notation is supported. For example to access the first " "element of a matrix, use A[0, 0] instead of A[0][0]."; throw py::value_error(o.str()); } for(size_t i = 0; i < dims.size(); ++i){ if(indices[i] >= dims[i]){ std::ostringstream o; o << "Index out of range for dimension " << i << ". Dimension shape is " << dims[i] << " but index value is " << indices[i]; throw py::index_error(o.str()); } } } template static Tensor fromNpArr(py::buffer_info& array_buffer, Format& fmt, bool copy){ std::vector buf_shape = array_buffer.shape; std::vector shape(buf_shape.begin(), buf_shape.end()); const ssize_t size = array_buffer.size; // Creat row-major dense tensor Tensor tensor(shape, fmt); TensorStorage& storage = tensor.getStorage(); void *buf_data = array_buffer.ptr; Array::Policy policy = Array::Policy::UserOwns; if(should_use_CUDA_codegen()){ #if CUDA_BUILT taco_iassert(should_use_CUDA_unified_memory()); buf_data = cuda_unified_alloc(size * array_buffer.itemsize); cudaMemcpy(buf_data, array_buffer.ptr, size * array_buffer.itemsize, cudaMemcpyDefault); policy = Array::Policy::Free; #else taco_iassert(false); #endif } else if(copy){ buf_data = new T[size]; memcpy(buf_data, array_buffer.ptr, size*array_buffer.itemsize); policy = Array::Policy::Delete; } storage.setValues(makeArray(static_cast(buf_data), size, policy)); tensor.setStorage(storage); return tensor; } template static Tensor fromNumpyF(py::array_t &array, bool copy) { py::buffer_info array_buffer = array.request(); const ssize_t dims = array_buffer.ndim; // Creat col-major dense tensor std::vector ordering; for(int i = dims-1; i >= 0; --i){ ordering.push_back(i); } Format fmt(std::vector(dims, dense), ordering); return fromNpArr(array_buffer, fmt, copy); } template static Tensor fromNumpyC(py::array_t &array, bool copy) { py::buffer_info array_buffer = array.request(); const ssize_t dims = array_buffer.ndim; Format fmt(std::vector(dims, dense)); return fromNpArr(array_buffer, fmt, copy); } template static Tensor fromSpMatrix(py::array_t &ind_ptr, py::array_t &inds, py::array_t &data, const std::vector &dims, bool copy, bool CSR){ py::buffer_info ind_ptr_buf = ind_ptr.request(); py::buffer_info inds_buf = inds.request(); py::buffer_info data_buf = data.request(); if(ind_ptr_buf.ndim != 1 || inds_buf.ndim != 1 || data_buf.ndim != 1) { throw py::value_error("Data arrays must be 1D."); } IdxType *mat_ptr = static_cast(ind_ptr_buf.ptr); IdxType *mat_ind = static_cast(inds_buf.ptr); T *mat_data = static_cast(data_buf.ptr); Array::Policy policy = Array::Policy::UserOwns; if(should_use_CUDA_codegen()){ taco_iassert(should_use_CUDA_unified_memory()); // TODO: Should copy arrays to unified memory taco_not_supported_yet; } else if(copy){ mat_ptr = new IdxType[ind_ptr_buf.size]; mat_ind = new IdxType[inds_buf.size]; mat_data = new T[data_buf.size]; memcpy(mat_ptr, ind_ptr_buf.ptr, ind_ptr_buf.size*ind_ptr_buf.itemsize); memcpy(mat_ind, inds_buf.ptr, inds_buf.size * inds_buf.itemsize); memcpy(mat_data, data_buf.ptr, data_buf.size * data_buf.itemsize); policy = Array::Policy::Delete; } // Create CSR Matrix Tensor tensor; if(CSR){ tensor = makeCSR(util::uniqueName("csr"), dims, mat_ptr, mat_ind, mat_data, policy); } else{ tensor = makeCSC(util::uniqueName("csc"), dims, mat_ptr, mat_ind, mat_data, policy); } return tensor; } template static py::tuple toSpMatrix(Tensor &tensor, bool tocsr) { if(tensor.getOrder() != 2) { throw py::value_error("Must be a matrix to convert to scipy"); } // Force computation of the tensor tensor.pack(); if(tensor.needsCompute()){ tensor.evaluate(); } int *ptr, *idx; T* vals; size_t ptr_arr_size, idx_arr_size, val_arr_size; // We may get a matrix in any format so we copy into a new tensor. Also we remove any explicit 0s before // moving to the scipy representation since the scipy contructor from dense arrays seems to do this as well. Tensor t(tensor.getDimensions(), tocsr? CSR: CSC); for (auto& value : tensor) { if (value.second != 0) { t.insert(value.first.toVector(), value.second); } } t.pack(); if(tocsr){ getCSRArrays(t, &ptr, &idx, &vals); }else { getCSCArrays(t, &ptr, &idx, &vals); } // Could return these arrays without the memcpy. Would need to get the data pointers and change the // taco policies to UserOwn but would need to check the old policy to ensure that we free the right // way in general in the py capsules below. This code works so left with the double copy for now. auto index = t.getStorage().getIndex(); ptr_arr_size = index.getModeIndex(1).getIndexArray(0).getSize(); idx_arr_size = index.getModeIndex(1).getIndexArray(1).getSize(); val_arr_size = t.getStorage().getValues().getSize(); int *np_ptr = new int[ptr_arr_size]; int *np_idx = new int[idx_arr_size]; T *np_vals = new T[val_arr_size]; memcpy(np_ptr, ptr, ptr_arr_size*sizeof(int)); memcpy(np_idx, idx, idx_arr_size*sizeof(int)); memcpy(np_vals, vals, val_arr_size*sizeof(T)); py::capsule free_ptr(np_ptr, [](void *f) { int *p = static_cast(f); delete[] p; }); py::capsule free_idx(np_idx, [](void *f) { int *p = static_cast(f); delete[] p; }); py::capsule free_vals(np_vals, [](void *f) { T *p = static_cast(f); delete[] p; }); py::array_t ptr_arr({ptr_arr_size}, {sizeof(int)}, np_ptr, free_ptr); py::array_t idx_arr({idx_arr_size}, {sizeof(int)}, np_idx, free_idx); py::array_t val_arr({val_arr_size}, {sizeof(T)}, np_vals, free_vals); return py::make_tuple(ptr_arr, idx_arr, val_arr); } template static inline Access accessGetter(Tensor& tensor, idxVar& var) { return tensor(var); } template static inline CType elementGetter(Tensor& tensor, std::vector coords) { checkBounds(tensor.getDimensions(), coords); if(tensor.getOrder() == 0) { return tensor.at({}); } return tensor.at(coords); } template static inline void elementSetter(Tensor &tensor, std::vector coords, pyType value) { checkBounds(tensor.getDimensions(), coords); if(tensor.getOrder() == 0) { tensor = static_cast(value); } tensor.insert(coords, static_cast(value)); } template static void insert(Tensor &tensor, std::vector coords, double value) { checkBounds(tensor.getDimensions(), coords); if(tensor.getOrder() == 0) { tensor = static_cast(value); } tensor.insert(coords, static_cast(value)); } template static inline void singleElementSetter(Tensor &tensor, int coord, pyType value) { elementSetter(tensor, {coord}, value); } template static inline void exprSetter(Tensor &tensor, VarType idx, ExprType expr) { tensor(idx) = expr; } template static inline void exprScalarSetter(Tensor &tensor, VarType idx, SType scalar) { tensor(idx) = IndexExpr(scalar); } template static Tensor makeTensor(std::string s, std::vector shape, std::vector fmt) { return Tensor(s, shape, Format(fmt)); } template class PyTensorIter { public: PyTensorIter(Tensor &tensor) : end(tensor.end()), it(tensor.begin()) { } py::tuple advance() { // Ignore explicit zeros while (it != end && it->second == static_cast(0)) { ++it; } if (it == end) { throw py::stop_iteration(); } const auto coords = it->first.toVector(); const auto val = it->second; ++it; return py::make_tuple(coords, val); } private: const typename Tensor::template const_iterator end; typename Tensor::template const_iterator it; }; template static void declareTensor(py::module &m, const std::string typestr) { std::string pyIterName = std::string("py_tensor_iterator") + typestr; py::class_>(m, pyIterName.c_str()) .def("__iter__", [](PyTensorIter &it) -> PyTensorIter& { return it; }) .def("__next__", &PyTensorIter::advance); using typedTensor = Tensor; m.def("to_sp_matrix", &toSpMatrix); m.def("fromNpF", &fromNumpyF); m.def("fromNpC", &fromNumpyC); m.def("fromSpMatrix", &fromSpMatrix); std::string pyClassName = std::string("Tensor") + typestr; py::class_(m, pyClassName.c_str(), py::buffer_protocol()) .def(py::init<>()) .def(py::init(), py::arg("name")) .def(py::init(), py::arg("value")) .def(py::init, ModeFormat>(), py::arg("name"), py::arg("shape"), py::arg("format") = ModeFormat::compressed) .def(py::init, Format>(), py::arg("name"), py::arg("shape"), py::arg("format")) .def(py::init(&makeTensor)) .def(py::init()) .def_buffer([](typedTensor &t) -> py::buffer_info { if(!isDense(t.getFormat())){ throw py::value_error("Cannot export a compressed tensor. Make sure all dimensions are dense " "using to_dense() before attempting this conversion."); } // Force computation of the tensor t.pack(); if(t.needsCompute()){ t.evaluate(); } void *ptr = t.getStorage().getValues().getData(); std::vector shape (t.getDimensions().begin(), t.getDimensions().end()); std::vector row_major_strides; for(size_t i = 0; i < shape.size(); ++i) { ssize_t currentStride = sizeof(CType); for(size_t j = i + 1; j < shape.size(); ++j){ currentStride *= shape[j]; } row_major_strides.push_back(currentStride); } std::vector strides; for(const int &permutation : t.getFormat().getModeOrdering()){ strides.push_back(row_major_strides[permutation]); } return py::buffer_info( ptr, /* Pointer to buffer */ sizeof(CType), /* Size of one scalar */ py::format_descriptor::format(), /* Python struct-style format descriptor */ t.getOrder(), /* Number of dimensions */ shape, /* Buffer dimensions */ strides /* Strides (in bytes) for each index */ ); }) .def("set_name", &TensorBase::setName) .def("get_name", &TensorBase::getName) .def("order", &TensorBase::getOrder) .def("get_shape", &TensorBase::getDimension, py::arg("axis")) .def("dtype", &TensorBase::getComponentType) .def("get_dimensions", &TensorBase::getDimensions) .def("format", &TensorBase::getFormat) .def("pack", &typedTensor::pack) // only bind .compile(), not .compile(IndexStmt, bool) .def("compile", [](typedTensor &self) { self.compile(); } ) .def("assemble", &typedTensor::assemble) .def("evaluate", &typedTensor::evaluate) .def("compute", &typedTensor::compute) .def("insert", &insert) .def("remove_explicit_zeros", &typedTensor::removeExplicitZeros) .def("transpose", [](typedTensor &self, std::vector dims, Format format, std::string name) -> typedTensor { return self.transpose(name, dims, format); }, py::is_operator()) .def("__getitem__", [](typedTensor& self, const int &index) -> CType { return elementGetter(self, {index}); }, py::is_operator()) .def("__getitem__", [](typedTensor& self, const std::vector &indices) -> CType { return elementGetter(self, indices); }, py::is_operator()) .def("__getitem__", [](typedTensor& self, std::nullptr_t ptr) -> Access{ if(self.getOrder() != 0) { throw py::index_error("Can only index scalar tensors with None."); } return self(); }, py::is_operator()) .def("__iter__", [](typedTensor &t) {return PyTensorIter(t); } ) .def("__getitem__", &accessGetter, py::is_operator()) .def("__getitem__", &accessGetter&>, py::is_operator()) // Set scalars to expression using none .def("__setitem__", [](typedTensor& self, std::nullptr_t ptr, const IndexExpr expr) -> void { self = expr; }, py::is_operator()) .def("__setitem__", [](typedTensor& self, std::nullptr_t ptr, const Access access) -> void { self = access; }, py::is_operator()) .def("__setitem__", [](typedTensor& self, std::nullptr_t ptr, const TensorVar tensorVar) -> void { self = tensorVar; }, py::is_operator()) // Set expressions with varying types .def("__setitem__", &exprSetter, py::is_operator()) .def("__setitem__", &exprSetter, py::is_operator()) .def("__setitem__", &exprSetter, py::is_operator()) .def("__setitem__", &exprSetter, IndexExpr>, py::is_operator()) .def("__setitem__", &exprSetter, Access>, py::is_operator()) .def("__setitem__", &exprSetter, TensorVar>, py::is_operator()) .def("__setitem__", &exprScalarSetter, py::is_operator()) .def("__setitem__", &exprScalarSetter, py::is_operator()) .def("__setitem__", &exprScalarSetter, int64_t>, py::is_operator()) .def("__setitem__", &exprScalarSetter, double>, py::is_operator()) // This is a hack that exploits pybind11's resolution order. If we get here all other methods to resolve the // function failed and we throw an error. There may be better was to handle this in pybind. .def("__getitem__", [](typedTensor& self, const py::object &indices) -> void { std::ostringstream o; o << "Indices must be an iterable of integers or IndexVars but got " << indices; throw py::index_error(o.str()); }, py::is_operator()) .def("__setitem__", [](typedTensor& self, const py::object &indices, py::object value) -> void { std::ostringstream o; o << "Indices must be an iterable of IndexVars assigned to an index expression or a " "value that can be transformed to an index expression (float or int) but got " << indices << " and " << value << ". Note that element assignment is disabled in this release" "and replace with .insert which increment the element at a given position (see the docs)."; throw py::index_error(o.str()); }, py::is_operator()) .def("__repr__", [](typedTensor& self) -> std::string{ std::ostringstream o; o << self; return o.str(); }, py::is_operator()); } void defineTensor(py::module &m) { py::implicitly_convertible(); py::implicitly_convertible, Format>(); py::class_(m, "TensorBase") .def("dtype", &TensorBase::getComponentType); declareTensor(m, "Bool"); declareTensor(m, "Int8"); declareTensor(m, "Int16"); declareTensor(m, "Int32"); declareTensor(m, "Int64"); declareTensor(m, "UInt8"); declareTensor(m, "UInt16"); declareTensor(m, "UInt32"); declareTensor(m, "UInt64"); declareTensor(m, "Float"); declareTensor(m, "Double"); } }}