Add extra reference ops

uTensor · mbartling · May 15, 2020 · May 21, 2020 · May 23, 2020 · May 23, 2020
commit a1fedd3775cda033e135532ca03e033e99eccad4
diff --git a/TESTS/tensors/test_romtensor.cpp b/TESTS/tensors/test_romtensor.cpp
@@ -125,3 +125,35 @@ TEST(Rom_Tensor, read_write_i16) {
   cout << "uint16 Sizeof IntegralValue " << sizeof(IntegralValue(5)) << endl;
   delete[] buffer;
 }
+
+TEST(ScalarRom_Tensor, read_write_i8) {
+  ///setup_context();
+  localCircularArenaAllocator<256> meta_allocator;
+  localCircularArenaAllocator<256> ram_allocator;
+  Context::get_default_context()->set_metadata_allocator(&meta_allocator);
+  Context::get_default_context()->set_ram_data_allocator(&ram_allocator);
+  int8_t* buffer = new int8_t[1];
+  buffer[0] = 5;
+  ScalarRomTensor r({1}, i8, buffer);
+  int8_t read = r(2,2);
+  EXPECT_EQ(read, 5);
+  cout << "i8 Sizeof IntegralValue " << sizeof(IntegralValue(5)) << endl;
+  cout << "Sizeof RomTensor " << sizeof(r) << endl;
+  delete[] buffer;
+}
+
+TEST(ScalarRom_Tensor, read_write_flt) {
+  ///setup_context();
+  localCircularArenaAllocator<256> meta_allocator;
+  localCircularArenaAllocator<256> ram_allocator;
+  Context::get_default_context()->set_metadata_allocator(&meta_allocator);
+  Context::get_default_context()->set_ram_data_allocator(&ram_allocator);
+  float* buffer = new float[1];
+  buffer[0] = 5.0;
+  ScalarRomTensor r({1}, flt, buffer);
+  float read = r(2,2);
+  EXPECT_NEAR(read, 5.0, 0.0001);
+  cout << "float Sizeof IntegralValue " << sizeof(IntegralValue(5)) << endl;
+  cout << "Sizeof RomTensor " << sizeof(r) << endl;
+  delete[] buffer;
+}
diff --git a/src/uTensor/CMakeLists.txt b/src/uTensor/CMakeLists.txt
@@ -14,6 +14,7 @@ set(src_utensor_tensors
    )
 set(src_utensor_ops
     ops/Matrix.cpp
+    ops/Convolution_kernels.cpp
    )
 set(src_utensor_errhndl
   errorHandlers/SimpleErrorHandler.cpp

diff --git a/src/uTensor/ops/Arithmetic.hpp b/src/uTensor/ops/Arithmetic.hpp
@@ -22,6 +22,33 @@ class AddOperator : public OperatorInterface<2, 1> {
   }
 };
 
+template <typename T>
+class SubOperator : public OperatorInterface<2, 1> {
+ public:
+  enum names_in : uint8_t { a, b };
+  enum names_out : uint8_t { c };
+  // AddOperator(FixedTensorMap<2> inputs, FixedTensorMap<1> outputs) :
+  // OperatorBase(inputs, outputs) {}
+
+ protected:
+  virtual void compute() {
+    sub_kernel<T>(outputs[c].tensor(), inputs[a].tensor(), inputs[b].tensor());
+  }
+};
+
+template <typename T>
+class MulOperator : public OperatorInterface<2, 1> {
+ public:
+  enum names_in : uint8_t { a, b };
+  enum names_out : uint8_t { c };
+  // AddOperator(FixedTensorMap<2> inputs, FixedTensorMap<1> outputs) :
+  // OperatorBase(inputs, outputs) {}
+
+ protected:
+  virtual void compute() {
+    mul_kernel<T>(outputs[c].tensor(), inputs[a].tensor(), inputs[b].tensor());
+  }
+};
 
 }  // namespace uTensor
 #endif
diff --git a/src/uTensor/ops/Arithmetic_kernels.hpp b/src/uTensor/ops/Arithmetic_kernels.hpp
@@ -6,7 +6,7 @@ namespace uTensor {
 template <typename T>
 void add_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
   // Decide on c shape
-  TensorShape c_shape = c->get_shape();
+  const TensorShape& c_shape = c->get_shape();
   uint32_t c_size = c_shape.get_linear_size();
   // TensorInterface& C = reinterpret_cast<TensorInterface*>(*c);
   // const TensorInterface& A = reinterpret_cast<TensorInterface*>(*a);
@@ -16,5 +16,31 @@ void add_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
     c(i) = static_cast<T>(static_cast<T>(a(i)) + static_cast<T>(b(i)));
 }
 
+template <typename T>
+void sub_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
+  // Decide on c shape
+  const TensorShape& c_shape = c->get_shape();
+  uint32_t c_size = c_shape.get_linear_size();
+  // TensorInterface& C = reinterpret_cast<TensorInterface*>(*c);
+  // const TensorInterface& A = reinterpret_cast<TensorInterface*>(*a);
+  // const TensorInterface& B = reinterpret_cast<TensorInterface*>(*b);
+
+  for (uint32_t i = 0; i < c_size; i++)
+    c(i) = static_cast<T>(static_cast<T>(a(i)) - static_cast<T>(b(i)));
+}
+
+template <typename T>
+void mul_kernel(Tensor& c, const Tensor& a, const Tensor& b) {
+  // Decide on c shape
+  const TensorShape& c_shape = c->get_shape();
+  uint32_t c_size = c_shape.get_linear_size();
+  // TensorInterface& C = reinterpret_cast<TensorInterface*>(*c);
+  // const TensorInterface& A = reinterpret_cast<TensorInterface*>(*a);
+  // const TensorInterface& B = reinterpret_cast<TensorInterface*>(*b);
+
+  for (uint32_t i = 0; i < c_size; i++)
+    c(i) = static_cast<T>(static_cast<T>(a(i)) * static_cast<T>(b(i)));
+}
+
 }  // namespace uTensor
 #endif
diff --git a/src/uTensor/ops/Convolution.hpp b/src/uTensor/ops/Convolution.hpp
@@ -199,5 +199,79 @@ using MaxPoolOperator = GenericPoolOperator<T, MaxFilter<T>>;
 template <typename T>
 using AvgPoolOperator = GenericPoolOperator<T, AvgFilter<T>>;
 
+template <typename Tout>
+class DepthwiseSeparableConvOperatorV2 : public OperatorInterface<3, 1> {
+ public:
+  enum names_in : uint8_t { in, filter, bias };
+  enum names_out : uint8_t { out };
+
+ public:
+  DepthwiseSeparableConvOperatorV2();
+  // TODO allow 4D bits later
+  //DepthwiseSeparableConvOperatorV2(
+  //    const uint16_t (&strides)[4], Padding padding,
+  //    const int depth_multiplier = 1, const uint16_t (&dialation)[2] = {1, 1});
+  DepthwiseSeparableConvOperatorV2(
+      const uint16_t (&strides)[2], Padding padding,
+      const int depth_multiplier = 1, const uint16_t (&dialation)[2] = {1, 1});
+
+ protected:
+  virtual void compute();
+
+ private:
+  // TfLiteDepthwiseConvParams
+  // Set by constructors
+  uint16_t _stride[4];
+  Padding _padding;
+  int depth_multiplier;
+  uint16_t _dialation[2];
+};
+
+template <typename Tout>
+DepthwiseSeparableConvOperatorV2<
+    Tout>::DepthwiseSeparableConvOperatorV2()
+    : _stride{1, 1},
+      _padding(SAME),
+      depth_multiplier(1),
+      _dialation{1, 1} {}
+
+template <typename Tout>
+DepthwiseSeparableConvOperatorV2<Tout>::
+    DepthwiseSeparableConvOperatorV2(
+        const uint16_t (&strides)[2], Padding padding,
+        const int depth_multiplier, const uint16_t (&dialation)[2])
+    : _stride{1, strides[0], strides[1], 1}, _padding(padding),
+      depth_multiplier(depth_multiplier),
+      _dialation{dialation[0], dialation[1]}
+{
+}
+
+template <typename Tout>
+void DepthwiseSeparableConvOperatorV2<Tout>::compute() {
+  AllocatorInterface* ram_allocator =
+      Context::get_default_context()->get_ram_data_allocator();
+  const TensorShape& in_shape = inputs[in].tensor()->get_shape();
+  const TensorShape& df_shape = inputs[filter].tensor()->get_shape();
+  const TensorShape& bias_shape = inputs[bias].tensor()->get_shape();
+  const TensorShape& out_shape = outputs[out].tensor()->get_shape();
+
+  if (in_shape[3] != df_shape[2]) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+  if (bias_shape[0] != 1 || bias_shape[1] != 1) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+
+
+  depthwise_separable_convolution_kernel_v2<Tout>(
+      outputs[out].tensor(),
+      inputs[in].tensor(), inputs[filter].tensor(), inputs[bias].tensor(),
+      _padding, _stride, depth_multiplier, _dialation);
+
+}
+
+
 }  // namespace uTensor
 #endif
diff --git a/src/uTensor/ops/Convolution_kernels.cpp b/src/uTensor/ops/Convolution_kernels.cpp
@@ -0,0 +1,52 @@
+#include "Convolution_kernels.hpp"
+
+namespace uTensor {
+// It's not guaranteed that padding is symmetric. It's important to keep
+// offset for algorithms need all paddings.
+int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
+                                    int filter_size, int out_size,
+                                    int* offset) {
+  int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  int total_padding =
+      ((out_size - 1) * stride + effective_filter_size - in_size);
+  total_padding = total_padding > 0 ? total_padding : 0;
+  *offset = total_padding % 2;
+  return total_padding / 2;
+}
+
+// Matching GetWindowedOutputSize in TensorFlow.
+int ComputeOutSize(Padding padding, int image_size,
+                          int filter_size, int stride, int dilation_rate = 1) {
+  int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding) {
+    case SAME:
+      return (image_size + stride - 1) / stride;
+    case VALID:
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      return 0;
+  }
+}
+
+void uComputePaddingHeightWidth(int stride_height, int stride_width,
+                               int dilation_rate_height,
+                               int dilation_rate_width, int in_height,
+                               int in_width, int filter_height,
+                               int filter_width, int* padding_height,
+                               int* padding_width, Padding padding,
+                               int* out_height, int* out_width) {
+  *out_width = ComputeOutSize(padding, in_width, filter_width, stride_width,
+                              dilation_rate_width);
+  *out_height = ComputeOutSize(padding, in_height, filter_height, stride_height,
+                               dilation_rate_height);
+
+  int offset = 0;
+  *padding_height =
+      ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height,
+                               filter_height, *out_height, &offset);
+  *padding_width =
+      ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width,
+                               filter_width, *out_width, &offset);
+}
+
+}
diff --git a/src/uTensor/ops/Convolution_kernels.hpp b/src/uTensor/ops/Convolution_kernels.hpp
@@ -425,5 +425,118 @@ void depthwise_separable_convolution_kernel(Tensor& out, const Tensor& in,
   }
 }
 
+
+void uComputePaddingHeightWidth(int stride_height, int stride_width,
+                               int dilation_rate_height,
+                               int dilation_rate_width, int in_height,
+                               int in_width, int filter_height,
+                               int filter_width, int* padding_height,
+                               int* padding_width, Padding padding,
+                               int* out_height, int* out_width);
+
+template <typename T>
+void depthwise_separable_convolution_kernel_v2(Tensor& output, const Tensor& input,
+                                            const Tensor& filter,
+                                            const Tensor& bias,
+                                            const Padding padding,
+                                            const uint16_t (&strides)[4],
+                                            const int depth_multiplier,
+                                            const uint16_t (&dialation)[2]
+                                            ) {
+
+  // Check dimensions of the tensors.
+  const TensorShape& input_shape = input->get_shape();
+  const TensorShape& filter_shape = filter->get_shape();
+  const TensorShape& output_shape = output->get_shape();
+
+  const int channels_out = filter_shape[3];
+  const int batches = input_shape[0];
+  const int output_depth = output_shape[3]; // This should be the same as filter_shape[3]
+  const int output_height = output_shape[1];
+  const int output_width = output_shape[2];
+  const int input_width = input_shape[2];
+  const int input_height = input_shape[1];
+  const int input_depth = input_shape[3];
+  const int filter_width = filter_shape[2];
+  const int filter_height = filter_shape[1];
+  const int stride_width = strides[2];
+  const int stride_height = strides[1];
+  const int dialation_width_factor = dialation[1];
+  const int dialation_height_factor = dialation[0];
+
+  int unused_output_height, unused_output_width;
+  int32_t pad_width, pad_height;
+
+  uComputePaddingHeightWidth(stride_height, stride_width, 1, 1, input_height,
+                                  input_width, filter_height, filter_width,
+                                  &pad_height, &pad_width,
+                                  padding,
+                                  &unused_output_height, &unused_output_width);
+
+  if (!(input_shape.num_dims() == 4)) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+  if (!(filter_shape.num_dims() == 4)) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+  if (!(output_shape.num_dims() == 4)) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+  if (!(output_depth == filter_shape[3])) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+  if (!(batches == output_shape[0])) {
+    Context::get_default_context()->throwError(
+        new InvalidTensorDimensionsError);
+  }
+
+  for (int batch = 0; batch < batches; ++batch) {
+    for (int out_y = 0; out_y < output_height; ++out_y) {
+      for (int out_x = 0; out_x < output_width; ++out_x) {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
+          for (int m = 0; m < depth_multiplier; ++m) {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - pad_width;
+            const int in_y_origin = (out_y * stride_height) - pad_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
+                const int in_x = in_x_origin + dialation_width_factor * filter_x;
+                const int in_y =
+                    in_y_origin + dialation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
+                    (in_y < input_height);
+                if (is_point_inside_image) {
+                  // int32_t input_val = input_data[Offset(input_shape, batch,
+                  // in_y,
+                  //                                     in_x, in_channel)];
+                  T input_val =
+                      static_cast<T>(input(batch, in_y, in_x, in_channel));
+                  // int32_t filter_val = filter_data[Offset(
+                  //     filter_shape, 0, filter_y, filter_x, output_channel)];
+                  T filter_val = static_cast<T>(
+                      filter(filter_y, filter_x, output_channel));
+                  acc += filter_val * (input_val);
+                }
+              }
+            }
+            // assuming bias data will always be provided
+            acc += static_cast<T>(bias(output_channel));
+
+            output(batch, out_y, out_x, output_channel) =
+                static_cast<T>(acc);
+          }
+        }
+      }
+    }
+  }
+}
+
 }  // namespace uTensor
 #endif