Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add extra reference ops
  • Loading branch information
mbartling committed May 15, 2020
commit a1fedd3775cda033e135532ca03e033e99eccad4
32 changes: 32 additions & 0 deletions TESTS/tensors/test_romtensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,35 @@ TEST(Rom_Tensor, read_write_i16) {
cout << "uint16 Sizeof IntegralValue " << sizeof(IntegralValue(5)) << endl;
delete[] buffer;
}

TEST(ScalarRom_Tensor, read_write_i8) {
///setup_context();
localCircularArenaAllocator<256> meta_allocator;
localCircularArenaAllocator<256> ram_allocator;
Context::get_default_context()->set_metadata_allocator(&meta_allocator);
Context::get_default_context()->set_ram_data_allocator(&ram_allocator);
int8_t* buffer = new int8_t[1];
buffer[0] = 5;
ScalarRomTensor r({1}, i8, buffer);
int8_t read = r(2,2);
EXPECT_EQ(read, 5);
cout << "i8 Sizeof IntegralValue " << sizeof(IntegralValue(5)) << endl;
cout << "Sizeof RomTensor " << sizeof(r) << endl;
delete[] buffer;
}

TEST(ScalarRom_Tensor, read_write_flt) {
///setup_context();
localCircularArenaAllocator<256> meta_allocator;
localCircularArenaAllocator<256> ram_allocator;
Context::get_default_context()->set_metadata_allocator(&meta_allocator);
Context::get_default_context()->set_ram_data_allocator(&ram_allocator);
float* buffer = new float[1];
buffer[0] = 5.0;
ScalarRomTensor r({1}, flt, buffer);
float read = r(2,2);
EXPECT_NEAR(read, 5.0, 0.0001);
cout << "float Sizeof IntegralValue " << sizeof(IntegralValue(5)) << endl;
cout << "Sizeof RomTensor " << sizeof(r) << endl;
delete[] buffer;
}
1 change: 1 addition & 0 deletions src/uTensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ set(src_utensor_tensors
)
set(src_utensor_ops
ops/Matrix.cpp
ops/Convolution_kernels.cpp
)
set(src_utensor_errhndl
errorHandlers/SimpleErrorHandler.cpp
Expand Down
27 changes: 27 additions & 0 deletions src/uTensor/ops/Arithmetic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,33 @@ class AddOperator : public OperatorInterface<2, 1> {
}
};

template <typename T>
class SubOperator : public OperatorInterface<2, 1> {
public:
enum names_in : uint8_t { a, b };
enum names_out : uint8_t { c };
// AddOperator(FixedTensorMap<2> inputs, FixedTensorMap<1> outputs) :
// OperatorBase(inputs, outputs) {}

protected:
virtual void compute() {
sub_kernel<T>(outputs[c].tensor(), inputs[a].tensor(), inputs[b].tensor());
}
};

template <typename T>
class MulOperator : public OperatorInterface<2, 1> {
public:
enum names_in : uint8_t { a, b };
enum names_out : uint8_t { c };
// AddOperator(FixedTensorMap<2> inputs, FixedTensorMap<1> outputs) :
// OperatorBase(inputs, outputs) {}

protected:
virtual void compute() {
mul_kernel<T>(outputs[c].tensor(), inputs[a].tensor(), inputs[b].tensor());
}
};

} // namespace uTensor
#endif
28 changes: 27 additions & 1 deletion src/uTensor/ops/Arithmetic_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace uTensor {
template <typename T>
void add_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
// Decide on c shape
TensorShape c_shape = c->get_shape();
const TensorShape& c_shape = c->get_shape();
uint32_t c_size = c_shape.get_linear_size();
// TensorInterface& C = reinterpret_cast<TensorInterface*>(*c);
// const TensorInterface& A = reinterpret_cast<TensorInterface*>(*a);
Expand All @@ -16,5 +16,31 @@ void add_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
c(i) = static_cast<T>(static_cast<T>(a(i)) + static_cast<T>(b(i)));
}

template <typename T>
void sub_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
// Decide on c shape
const TensorShape& c_shape = c->get_shape();
uint32_t c_size = c_shape.get_linear_size();
// TensorInterface& C = reinterpret_cast<TensorInterface*>(*c);
// const TensorInterface& A = reinterpret_cast<TensorInterface*>(*a);
// const TensorInterface& B = reinterpret_cast<TensorInterface*>(*b);

for (uint32_t i = 0; i < c_size; i++)
c(i) = static_cast<T>(static_cast<T>(a(i)) - static_cast<T>(b(i)));
}

template <typename T>
void mul_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
// Decide on c shape
const TensorShape& c_shape = c->get_shape();
uint32_t c_size = c_shape.get_linear_size();
// TensorInterface& C = reinterpret_cast<TensorInterface*>(*c);
// const TensorInterface& A = reinterpret_cast<TensorInterface*>(*a);
// const TensorInterface& B = reinterpret_cast<TensorInterface*>(*b);

for (uint32_t i = 0; i < c_size; i++)
c(i) = static_cast<T>(static_cast<T>(a(i)) * static_cast<T>(b(i)));
}

} // namespace uTensor
#endif
74 changes: 74 additions & 0 deletions src/uTensor/ops/Convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,5 +199,79 @@ using MaxPoolOperator = GenericPoolOperator<T, MaxFilter<T>>;
template <typename T>
using AvgPoolOperator = GenericPoolOperator<T, AvgFilter<T>>;

template <typename Tout>
class DepthwiseSeparableConvOperatorV2 : public OperatorInterface<3, 1> {
public:
enum names_in : uint8_t { in, filter, bias };
enum names_out : uint8_t { out };

public:
DepthwiseSeparableConvOperatorV2();
// TODO allow 4D bits later
//DepthwiseSeparableConvOperatorV2(
// const uint16_t (&strides)[4], Padding padding,
// const int depth_multiplier = 1, const uint16_t (&dialation)[2] = {1, 1});
DepthwiseSeparableConvOperatorV2(
const uint16_t (&strides)[2], Padding padding,
const int depth_multiplier = 1, const uint16_t (&dialation)[2] = {1, 1});

protected:
virtual void compute();

private:
// TfLiteDepthwiseConvParams
// Set by constructors
uint16_t _stride[4];
Padding _padding;
int depth_multiplier;
uint16_t _dialation[2];
};

template <typename Tout>
DepthwiseSeparableConvOperatorV2<
Tout>::DepthwiseSeparableConvOperatorV2()
: _stride{1, 1},
_padding(SAME),
depth_multiplier(1),
_dialation{1, 1} {}

template <typename Tout>
DepthwiseSeparableConvOperatorV2<Tout>::
DepthwiseSeparableConvOperatorV2(
const uint16_t (&strides)[2], Padding padding,
const int depth_multiplier, const uint16_t (&dialation)[2])
: _stride{1, strides[0], strides[1], 1}, _padding(padding),
depth_multiplier(depth_multiplier),
_dialation{dialation[0], dialation[1]}
{
}

template <typename Tout>
void DepthwiseSeparableConvOperatorV2<Tout>::compute() {
AllocatorInterface* ram_allocator =
Context::get_default_context()->get_ram_data_allocator();
const TensorShape& in_shape = inputs[in].tensor()->get_shape();
const TensorShape& df_shape = inputs[filter].tensor()->get_shape();
const TensorShape& bias_shape = inputs[bias].tensor()->get_shape();
const TensorShape& out_shape = outputs[out].tensor()->get_shape();

if (in_shape[3] != df_shape[2]) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}
if (bias_shape[0] != 1 || bias_shape[1] != 1) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}


depthwise_separable_convolution_kernel_v2<Tout>(
outputs[out].tensor(),
inputs[in].tensor(), inputs[filter].tensor(), inputs[bias].tensor(),
_padding, _stride, depth_multiplier, _dialation);

}


} // namespace uTensor
#endif
52 changes: 52 additions & 0 deletions src/uTensor/ops/Convolution_kernels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include "Convolution_kernels.hpp"

namespace uTensor {
// It's not guaranteed that padding is symmetric. It's important to keep
// offset for algorithms need all paddings.
int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
int filter_size, int out_size,
int* offset) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
int total_padding =
((out_size - 1) * stride + effective_filter_size - in_size);
total_padding = total_padding > 0 ? total_padding : 0;
*offset = total_padding % 2;
return total_padding / 2;
}

// Matching GetWindowedOutputSize in TensorFlow.
int ComputeOutSize(Padding padding, int image_size,
int filter_size, int stride, int dilation_rate = 1) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
switch (padding) {
case SAME:
return (image_size + stride - 1) / stride;
case VALID:
return (image_size + stride - effective_filter_size) / stride;
default:
return 0;
}
}

void uComputePaddingHeightWidth(int stride_height, int stride_width,
int dilation_rate_height,
int dilation_rate_width, int in_height,
int in_width, int filter_height,
int filter_width, int* padding_height,
int* padding_width, Padding padding,
int* out_height, int* out_width) {
*out_width = ComputeOutSize(padding, in_width, filter_width, stride_width,
dilation_rate_width);
*out_height = ComputeOutSize(padding, in_height, filter_height, stride_height,
dilation_rate_height);

int offset = 0;
*padding_height =
ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height,
filter_height, *out_height, &offset);
*padding_width =
ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width,
filter_width, *out_width, &offset);
}

}
113 changes: 113 additions & 0 deletions src/uTensor/ops/Convolution_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,5 +425,118 @@ void depthwise_separable_convolution_kernel(Tensor& out, const Tensor& in,
}
}


void uComputePaddingHeightWidth(int stride_height, int stride_width,
int dilation_rate_height,
int dilation_rate_width, int in_height,
int in_width, int filter_height,
int filter_width, int* padding_height,
int* padding_width, Padding padding,
int* out_height, int* out_width);

template <typename T>
void depthwise_separable_convolution_kernel_v2(Tensor& output, const Tensor& input,
const Tensor& filter,
const Tensor& bias,
const Padding padding,
const uint16_t (&strides)[4],
const int depth_multiplier,
const uint16_t (&dialation)[2]
) {

// Check dimensions of the tensors.
const TensorShape& input_shape = input->get_shape();
const TensorShape& filter_shape = filter->get_shape();
const TensorShape& output_shape = output->get_shape();

const int channels_out = filter_shape[3];
const int batches = input_shape[0];
const int output_depth = output_shape[3]; // This should be the same as filter_shape[3]
const int output_height = output_shape[1];
const int output_width = output_shape[2];
const int input_width = input_shape[2];
const int input_height = input_shape[1];
const int input_depth = input_shape[3];
const int filter_width = filter_shape[2];
const int filter_height = filter_shape[1];
const int stride_width = strides[2];
const int stride_height = strides[1];
const int dialation_width_factor = dialation[1];
const int dialation_height_factor = dialation[0];

int unused_output_height, unused_output_width;
int32_t pad_width, pad_height;

uComputePaddingHeightWidth(stride_height, stride_width, 1, 1, input_height,
input_width, filter_height, filter_width,
&pad_height, &pad_width,
padding,
&unused_output_height, &unused_output_width);

if (!(input_shape.num_dims() == 4)) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}
if (!(filter_shape.num_dims() == 4)) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}
if (!(output_shape.num_dims() == 4)) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}
if (!(output_depth == filter_shape[3])) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}
if (!(batches == output_shape[0])) {
Context::get_default_context()->throwError(
new InvalidTensorDimensionsError);
}

for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dialation_width_factor * filter_x;
const int in_y =
in_y_origin + dialation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
// int32_t input_val = input_data[Offset(input_shape, batch,
// in_y,
// in_x, in_channel)];
T input_val =
static_cast<T>(input(batch, in_y, in_x, in_channel));
// int32_t filter_val = filter_data[Offset(
// filter_shape, 0, filter_y, filter_x, output_channel)];
T filter_val = static_cast<T>(
filter(filter_y, filter_x, output_channel));
acc += filter_val * (input_val);
}
}
}
// assuming bias data will always be provided
acc += static_cast<T>(bias(output_channel));

output(batch, out_y, out_x, output_channel) =
static_cast<T>(acc);
}
}
}
}
}
}

} // namespace uTensor
#endif
Loading