Skip to content

Commit

Permalink
[FOR DRAFT-PR ONLY] Enable -ffp-mode=fast for armclang
Browse files Browse the repository at this point in the history
-ffp-mode=fast enables extra compiler optimizations for floating point operators which increases performance,
previously set to -ffp-mode=std as it is uncompatible with std::numeric_limits::quiet_NaN/infinity.
See https://developer.arm.com/documentation/dui0774/latest/Compiler-Command-line-Options/-ffp-mode for more info.

This pull-request puts all incompatible code inside the TFLITE_EMULATE_FLOAT flag, which is not defined on arm targets.

Change-Id: Ic8fab0f11497ef4fd834a3a731a8a5625913486e
  • Loading branch information
AdrianLundell committed Nov 7, 2024
1 parent 9245002 commit 79d013c
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 11 deletions.
6 changes: 5 additions & 1 deletion tensorflow/lite/kernels/internal/quantization_util.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,7 @@ limitations under the License.

namespace tflite {

#ifdef TFLITE_EMULATE_FLOAT
namespace {
// These constants are used to manipulate the binary representation of doubles.
// Double-precision binary64 floating point format is:
Expand All @@ -49,6 +50,7 @@ constexpr uint32_t kFractionShift = 22;
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
} // namespace
#endif

void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift) {
Expand Down Expand Up @@ -122,6 +124,7 @@ void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
*left_shift = shift;
}

#ifdef TFLITE_EMULATE_FLOAT
int64_t IntegerFrExp(double input, int* shift) {
// Make sure our assumptions about the double layout hold.
TFLITE_CHECK_EQ(8, sizeof(double));
Expand Down Expand Up @@ -278,6 +281,7 @@ int IntegerDoubleCompare(double a, double b) {
return 0;
}
}
#endif

void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
Expand Down
10 changes: 2 additions & 8 deletions tensorflow/lite/micro/kernels/activations_common.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,7 +54,6 @@ template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);

Expand All @@ -69,12 +68,7 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
static_cast<int32_t>(std::numeric_limits<T>::max());
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
Expand Down
4 changes: 3 additions & 1 deletion tensorflow/lite/micro/kernels/quantization_util_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -232,6 +232,7 @@ TF_LITE_MICRO_TEST(
TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 255);
}

#ifdef TFLITE_EMULATE_FLOAT
TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerFrExp) {
int shift;
int64_t result = tflite::IntegerFrExp(0.0, &shift);
Expand Down Expand Up @@ -412,6 +413,7 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_CalculateInputRadius) {
TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(3, 28), 7);
TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(4, 2), 503316480);
}
#endif

TF_LITE_MICRO_TEST(QuantizationUtilTest_QuantizeMultiplierArray) {
const double weights[] = {-4, -2, -1, -0.5, -0.25, -0.125, 0,
Expand Down
1 change: 0 additions & 1 deletion tensorflow/lite/micro/tools/make/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,6 @@ $(BINDIR)%.test_target: $(BINDIR)%_test
# These are microcontroller-specific rules for converting the ELF output
# of the linker into a binary image that can be loaded directly.
ifeq ($(TOOLCHAIN), armclang)
CXXFLAGS += -ffp-mode=full
FROMELF := ${TARGET_TOOLCHAIN_ROOT}$(TARGET_TOOLCHAIN_PREFIX)fromelf
$(BINDIR)%.bin: $(BINDIR)%
@mkdir -p $(dir $@)
Expand Down

0 comments on commit 79d013c

Please sign in to comment.