Merge branch 'main' into experimental/batch_matmul_unit_test_fix

tensorflow · Nov 6, 2024 · 16a7688 · 16a7688
2 parents 1420149 + 4bb78c7
commit 16a7688
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 44 deletions.
diff --git a/WORKSPACE b/WORKSPACE
@@ -101,6 +101,7 @@ py_pkg_cc_deps(
 http_archive(
     name = "nnlib_hifi4",
     build_file = "@tflite_micro//third_party/xtensa/nnlib_hifi4:nnlib_hifi4.BUILD",
+    integrity = "sha256-ulZ+uY4dRsbDUMZbZtD972eghclWQrqYRb0Y4Znfyyc=",
     strip_prefix = "nnlib-hifi4-34f5f995f28d298ae2b6e2ba6e76c32a5cb34989",
     urls = ["https://github.com/foss-xtensa/nnlib-hifi4/archive/34f5f995f28d298ae2b6e2ba6e76c32a5cb34989.zip"],
 )
diff --git a/ci/temp_patches/tf_update_visibility.patch b/ci/temp_patches/tf_update_visibility.patch
diff --git a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc
@@ -14,12 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #ifdef ETHOS_U
+#include <ethosu_driver.h>
 #include <inttypes.h>
+#include <pmu_ethosu.h>
 
 #include <algorithm>
-
-#include "ethosu_driver.h"
-#include "pmu_ethosu.h"
 #endif
 
 // This is set in micro/tools/make/targets/cortex_m_corstone_300_makefile.inc.
@@ -133,7 +132,7 @@ void InitializeTarget() {
   ARM_PMU_CNTR_Enable(PMU_CNTENSET_CCNTR_ENABLE_Msk);
 
 #else
-  CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
+  DCB->DEMCR |= DCB_DEMCR_TRCENA_Msk;
 
   // Reset and enable DWT cycle counter.
   DWT->CYCCNT = 0;
@@ -154,7 +153,7 @@ void InitializeTarget() {
     return;
   }
   NVIC_SetVector(static_cast<IRQn_Type>(ethosu_irq),
-                 (uint32_t)&ethosuIrqHandler0);
+                 reinterpret_cast<uint32_t>(&ethosuIrqHandler0));
   NVIC_SetPriority(static_cast<IRQn_Type>(ethosu_irq), ethosu_irq_priority);
   NVIC_EnableIRQ(static_cast<IRQn_Type>(ethosu_irq));
 #endif

diff --git a/tensorflow/lite/micro/cortex_m_generic/micro_time.cc b/tensorflow/lite/micro/cortex_m_generic/micro_time.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ uint32_t GetCurrentTimeTicks() {
 #ifdef ARMCM7
     DWT->LAR = 0xC5ACCE55;
 #endif
-    CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
+    DCB->DEMCR |= DCB_DEMCR_TRCENA_Msk;
 
     // Reset and DWT cycle counter.
     DWT->CYCCNT = 0;

diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/fully_connected.cc b/tensorflow/lite/micro/kernels/cmsis_nn/fully_connected.cc
@@ -148,22 +148,27 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     } else if (input->type == kTfLiteInt8) {
       buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
 
-      int8_t* filter_data = GetTensorData<int8_t>(filter);
       data->kernel_sums = nullptr;
 
+#if defined(KERNELS_OPTIMIZED_FOR_SPEED)
+      const int8_t* filter_data = GetTensorData<const int8_t>(filter);
+
       if (buf_size > 0 && filter_data != nullptr) {
+        const int32_t input_offset = -data->reference_op_data.input_zero_point;
+        const int32_t filter_offset =
+            -data->reference_op_data.filter_zero_point;
+
         data->kernel_sums = static_cast<int32_t*>(
             context->AllocatePersistentBuffer(context, buf_size));
 
-        int32_t input_offset = -data->reference_op_data.input_zero_point;
-        int32_t filter_offset = -data->reference_op_data.filter_zero_point;
         arm_vector_sum_s8(data->kernel_sums, filter_dims.n, data->output_depth,
                           filter_data, input_offset, filter_offset,
                           tflite::GetTensorData<int32_t>(bias));
 
         // Do not request a scratch buffer since using persistent memory
         buf_size = 0;
       }
+#endif
     }
   }
 

diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/svdf.cc b/tensorflow/lite/micro/kernels/cmsis_nn/svdf.cc
@@ -39,6 +39,9 @@ struct CmsisNnOpDataSvdf {
   int effective_scale_1_b;
   int effective_scale_2_b;
   int scratch_tensor_index;
+#if defined(KERNELS_OPTIMIZED_FOR_SIZE)
+  int scratch_weight_tensor_index;
+#endif
   int scratch_output_tensor_index;
 
   // Cached tensor zero point values for quantized operations.
@@ -189,13 +192,25 @@ TfLiteStatus CmsisNnPrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
     const int32_t buf_size = arm_svdf_s8_get_buffer_size(&weights_feature_dims);
 
     if (buf_size > 0) {
+#if defined(KERNELS_OPTIMIZED_FOR_SPEED)
       data->kernel_sums = static_cast<int32_t*>(
           context->AllocatePersistentBuffer(context, buf_size));
 
       arm_vector_sum_s8(data->kernel_sums, input_size, num_filters,
                         GetTensorData<int8_t>(weights_feature),
                         -data->input_zero_point,
                         -data->activation_state_zero_point, nullptr);
+#elif defined(KERNELS_OPTIMIZED_FOR_SIZE)
+      const TfLiteStatus scratch_kernel_status =
+          context->RequestScratchBufferInArena(
+              context, buf_size, &(data->scratch_weight_tensor_index));
+      TF_LITE_ENSURE_OK(context, scratch_kernel_status);
+#else
+      MicroPrintf(
+          "Either KERNELS_OPTIMIZED_FOR_SIZE or KERNELS_OPTIMIZED_FOR_SPEED "
+          "must be defined");
+      return kTfLiteError;
+#endif
     }
 
   } else {
@@ -291,7 +306,21 @@ TfLiteStatus EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
   switch (weights_time_tensor->type) {
     case kTfLiteInt8: {
       cmsis_nn_context ctx;
+
+#if defined(KERNELS_OPTIMIZED_FOR_SPEED)
       ctx.buf = data.kernel_sums;
+#elif defined(KERNELS_OPTIMIZED_FOR_SIZE)
+      ctx.buf = static_cast<int32_t*>(
+          context->GetScratchBuffer(context, data.scratch_weight_tensor_index));
+
+      const int input_size = input_tensor->dims->data[1];
+      const int num_filters = weights_feature_tensor->dims->data[0];
+
+      arm_vector_sum_s8(
+          static_cast<int32_t*>(ctx.buf), input_size, num_filters,
+          tflite::micro::GetTensorData<int8_t>(weights_feature_tensor),
+          -data.input_zero_point, -data.activation_state_zero_point, nullptr);
+#endif
 
       arm_svdf_s8(
           &ctx, &scratch_ctx, &scratch_output_ctx, &svdf_params,

diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
@@ -330,6 +330,8 @@ $(TENSORFLOW_ROOT)tensorflow/lite/micro/micro_resource_variable_test.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/micro_time_test.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/micro_utils_test.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/recording_micro_allocator_test.cc \
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/span_test.cc \
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/static_vector_test.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator_test.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator_test.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator_test.cc \