/******************************************************* * Copyright (c) 2014, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. * The complete license agreement can be obtained at: * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // IWYU pragma: keep #include #include #include #include using af::dim4; using common::half; using common::Node; using common::Node_map_t; using common::Node_ptr; using common::NodeIterator; using cpu::jit::BufferNode; using std::adjacent_find; using std::copy; using std::is_standard_layout; using std::move; using std::vector; namespace cpu { template Node_ptr bufferNodePtr() { return Node_ptr(reinterpret_cast(new BufferNode())); } template Array::Array(dim4 dims) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), static_cast(dtype_traits::af_type)) , data(memAlloc(dims.elements()).release(), memFree) , data_dims(dims) , node(bufferNodePtr()) , ready(true) , owner(true) {} template Array::Array(const dim4 &dims, T *const in_data, bool is_device, bool copy_device) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), static_cast(dtype_traits::af_type)) , data((is_device & !copy_device) ? in_data : memAlloc(dims.elements()).release(), memFree) , data_dims(dims) , node(bufferNodePtr()) , ready(true) , owner(true) { static_assert(is_standard_layout>::value, "Array must be a standard layout type"); static_assert(std::is_nothrow_move_assignable>::value, "Array is not move assignable"); static_assert(std::is_nothrow_move_constructible>::value, "Array is not move constructible"); static_assert( offsetof(Array, info) == 0, "Array::info must be the first member variable of Array"); if (!is_device || copy_device) { // Ensure the memory being written to isnt used anywhere else. getQueue().sync(); copy(in_data, in_data + dims.elements(), data.get()); } } template Array::Array(const af::dim4 &dims, Node_ptr n) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), static_cast(dtype_traits::af_type)) , data() , data_dims(dims) , node(move(n)) , ready(false) , owner(true) {} template Array::Array(const Array &parent, const dim4 &dims, const dim_t &offset_, const dim4 &strides) : info(parent.getDevId(), dims, offset_, strides, static_cast(dtype_traits::af_type)) , data(parent.getData()) , data_dims(parent.getDataDims()) , node(bufferNodePtr()) , ready(true) , owner(false) {} template Array::Array(const dim4 &dims, const dim4 &strides, dim_t offset_, T *const in_data, bool is_device) : info(getActiveDeviceId(), dims, offset_, strides, static_cast(dtype_traits::af_type)) , data(is_device ? in_data : memAlloc(info.total()).release(), memFree) , data_dims(dims) , node(bufferNodePtr()) , ready(true) , owner(true) { if (!is_device) { // Ensure the memory being written to isnt used anywhere else. getQueue().sync(); copy(in_data, in_data + info.total(), data.get()); } } template void Array::eval() { if (isReady()) { return; } if (getQueue().is_worker()) { AF_ERROR("Array not evaluated", AF_ERR_INTERNAL); } this->setId(getActiveDeviceId()); data = shared_ptr(memAlloc(elements()).release(), memFree); getQueue().enqueue(kernel::evalArray, *this, this->node); // Reset shared_ptr this->node = bufferNodePtr(); ready = true; } template void Array::eval() const { if (isReady()) { return; } const_cast *>(this)->eval(); } template T *Array::device() { getQueue().sync(); if (!isOwner() || getOffset() || data.use_count() > 1) { *this = copyArray(*this); } return this->get(); } template void evalMultiple(vector *> array_ptrs) { vector *> outputs; vector nodes; vector> params; if (getQueue().is_worker()) { AF_ERROR("Array not evaluated", AF_ERR_INTERNAL); } // Check if all the arrays have the same dimension auto it = adjacent_find(begin(array_ptrs), end(array_ptrs), [](const Array *l, const Array *r) { return l->dims() != r->dims(); }); // If they are not the same. eval individually if (it != end(array_ptrs)) { for (auto ptr : array_ptrs) { ptr->eval(); } return; } for (Array *array : array_ptrs) { if (array->ready) { continue; } array->setId(getActiveDeviceId()); array->data = shared_ptr(memAlloc(array->elements()).release(), memFree); outputs.push_back(array); params.push_back(*array); nodes.push_back(array->node); } if (!outputs.empty()) { getQueue().enqueue(kernel::evalMultiple, params, nodes); for (Array *array : outputs) { array->ready = true; array->node = bufferNodePtr(); } } } template Node_ptr Array::getNode() { if (node->isBuffer()) { auto *bufNode = reinterpret_cast *>(node.get()); unsigned bytes = this->getDataDims().elements() * sizeof(T); bufNode->setData(data, bytes, getOffset(), dims().get(), strides().get(), isLinear()); } return node; } template Node_ptr Array::getNode() const { if (node->isBuffer()) { return const_cast *>(this)->getNode(); } return node; } template Array createHostDataArray(const dim4 &dims, const T *const data) { return Array(dims, const_cast(data), false); } template Array createDeviceDataArray(const dim4 &dims, void *data) { return Array(dims, static_cast(data), true); } template Array createValueArray(const dim4 &dims, const T &value) { auto *node = new jit::ScalarNode(value); return createNodeArray(dims, Node_ptr(node)); } template Array createEmptyArray(const dim4 &dims) { return Array(dims); } template kJITHeuristics passesJitHeuristics(Node *root_node) { if (!evalFlag()) { return kJITHeuristics::Pass; } if (root_node->getHeight() >= static_cast(getMaxJitSize())) { return kJITHeuristics::TreeHeight; } // Check if approaching the memory limit if (getMemoryPressure() >= getMemoryPressureThreshold()) { NodeIterator it(root_node); NodeIterator end_node; size_t bytes = accumulate(it, end_node, size_t(0), [=](const size_t prev, const Node &n) { // getBytes returns the size of the data // Array. Sub arrays will be represented // by their parent size. return prev + n.getBytes(); }); if (jitTreeExceedsMemoryPressure(bytes)) { return kJITHeuristics::MemoryPressure; } } return kJITHeuristics::Pass; } template Array createNodeArray(const dim4 &dims, Node_ptr node) { Array out = Array(dims, node); return out; } template Array createSubArray(const Array &parent, const vector &index, bool copy) { parent.eval(); dim4 dDims = parent.getDataDims(); dim4 dStrides = calcStrides(dDims); dim4 parent_strides = parent.strides(); if (dStrides != parent_strides) { const Array parentCopy = copyArray(parent); return createSubArray(parentCopy, index, copy); } const dim4 &pDims = parent.dims(); dim4 dims = toDims(index, pDims); dim4 strides = toStride(index, dDims); // Find total offsets after indexing dim4 offsets = toOffset(index, pDims); dim_t offset = parent.getOffset(); for (int i = 0; i < 4; i++) { offset += offsets[i] * parent_strides[i]; } Array out = Array(parent, dims, offset, strides); if (!copy) { return out; } if (strides[0] != 1 || strides[1] < 0 || strides[2] < 0 || strides[3] < 0) { out = copyArray(out); } return out; } template void destroyArray(Array *A) { delete A; } template void writeHostDataArray(Array &arr, const T *const data, const size_t bytes) { if (!arr.isOwner()) { arr = copyArray(arr); } arr.eval(); // Ensure the memory being written to isnt used anywhere else. getQueue().sync(); memcpy(arr.get(), data, bytes); } template void writeDeviceDataArray(Array &arr, const void *const data, const size_t bytes) { if (!arr.isOwner()) { arr = copyArray(arr); } memcpy(arr.get(), static_cast(data), bytes); } template void Array::setDataDims(const dim4 &new_dims) { modDims(new_dims); data_dims = new_dims; if (node->isBuffer()) { node = bufferNodePtr(); } } #define INSTANTIATE(T) \ template Array createHostDataArray(const dim4 &dims, \ const T *const data); \ template Array createDeviceDataArray(const dim4 &dims, void *data); \ template Array createValueArray(const dim4 &dims, const T &value); \ template Array createEmptyArray(const dim4 &dims); \ template Array createSubArray( \ const Array &parent, const vector &index, bool copy); \ template void destroyArray(Array * A); \ template Array createNodeArray(const dim4 &dims, Node_ptr node); \ template void Array::eval(); \ template void Array::eval() const; \ template T *Array::device(); \ template Array::Array(const af::dim4 &dims, T *const in_data, \ bool is_device, bool copy_device); \ template Array::Array(const af::dim4 &dims, const af::dim4 &strides, \ dim_t offset, T *const in_data, bool is_device); \ template Node_ptr Array::getNode(); \ template Node_ptr Array::getNode() const; \ template void writeHostDataArray(Array & arr, const T *const data, \ const size_t bytes); \ template void writeDeviceDataArray( \ Array & arr, const void *const data, const size_t bytes); \ template void evalMultiple(vector *> arrays); \ template void Array::setDataDims(const dim4 &new_dims); INSTANTIATE(float) INSTANTIATE(double) INSTANTIATE(cfloat) INSTANTIATE(cdouble) INSTANTIATE(int) INSTANTIATE(uint) INSTANTIATE(uchar) INSTANTIATE(char) INSTANTIATE(intl) INSTANTIATE(uintl) INSTANTIATE(short) INSTANTIATE(ushort) INSTANTIATE(half) } // namespace cpu