diff --git a/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp b/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp index d8792a7..40de878 100644 --- a/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp +++ b/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp @@ -207,6 +207,16 @@ ma_err_t EngineHailo::load(const string& model_path) { tensor->shape.dims[2] = shape.features; tensor->shape.dims[3] = shape.width; break; + case HAILO_FORMAT_ORDER_NC: + if (shape.width != 1 || shape.height != 1) { + tensor->shape.dims[1] = shape.features; + tensor->shape.dims[2] = shape.height; + tensor->shape.dims[3] = shape.width; + break; + } + tensor->shape.dims[1] = shape.features; + tensor->shape.size = 2; + break; default: break; } diff --git a/src/components/sscma-micro/sscma/core/ma_definations.h b/src/components/sscma-micro/sscma/core/ma_definations.h index b3ef63a..796286a 100644 --- a/src/components/sscma-micro/sscma/core/ma_definations.h +++ b/src/components/sscma-micro/sscma/core/ma_definations.h @@ -42,6 +42,8 @@ #define MA_STORAGE_KEY_TRIGGER_RULES "trigger#rules" +#define MA_STORAGE_KEY_WS_PORT "ws#port" + #define MA_AT_CMD_PREFIX "AT+" #define MA_AT_CMD_QUERY "?" diff --git a/src/components/sscma-micro/sscma/core/ma_types.h b/src/components/sscma-micro/sscma/core/ma_types.h index 4b5d7f3..8507e1f 100644 --- a/src/components/sscma-micro/sscma/core/ma_types.h +++ b/src/components/sscma-micro/sscma/core/ma_types.h @@ -97,7 +97,7 @@ typedef struct { } data; bool is_physical; // For physical tensor bool is_variable; // For constant tensor - void* external_handler = nullptr; + void* external_handler; } ma_tensor_t; typedef enum { @@ -241,7 +241,8 @@ typedef enum { MA_TRANSPORT_MQTT = 5, MA_TRANSPORT_TCP = 6, MA_TRANSPORT_UDP = 7, - MA_TRANSPORT_RTSP = 8 + MA_TRANSPORT_RTSP = 8, + MA_TRANSPORT_WS = 9 } ma_transport_type_t; typedef enum { @@ -283,7 +284,9 @@ typedef enum { MA_MODEL_TYPE_YOLO_WORLD = 8u, MA_MODEL_TYPE_YOLO11 = 9u, MA_MODEL_TYPE_YOLO11_POSE = 10u, - MA_MODEL_TYPE_YOLO11_SEG = 11u, + MA_MODEL_TYPE_YOLO11_SEG = 11u, + MA_MODEL_TYPE_YOLOV8_SGE = 12u, + MA_MODEL_TYPE_RTMDET = 13u } ma_model_type_t; typedef struct { @@ -444,4 +447,4 @@ typedef struct in6_info_t { #endif -#endif // _MA_TYPES_H_ \ No newline at end of file +#endif // _MA_TYPES_H_ diff --git a/src/components/sscma-micro/sscma/core/math/ma_math_vectors.h b/src/components/sscma-micro/sscma/core/math/ma_math_vectors.h index 667c216..9d66571 100644 --- a/src/components/sscma-micro/sscma/core/math/ma_math_vectors.h +++ b/src/components/sscma-micro/sscma/core/math/ma_math_vectors.h @@ -1,8 +1,14 @@ #ifndef _MA_MATH_VECTORS_H_ #define _MA_MATH_VECTORS_H_ -#include #include +#include + +#if MA_USE_LIB_XTENSOR +#include +#include +#include +#endif namespace ma::math { @@ -10,6 +16,15 @@ void softmax(float* data, size_t size); void fastSoftmax(float* data, size_t size); +#if MA_USE_LIB_XTENSOR +template +static void dequantizeValues1D(xt::xarray& dequantized_outputs, int index, const xt::xarray& quantized_outputs, size_t dim1, float32_t qp_scale, float32_t qp_zp) { + for (size_t i = 0; i < dim1; ++i) { + dequantized_outputs(i) = (float(quantized_outputs(index, i)) - qp_zp) * qp_scale; + } +} +#endif + } // namespace ma::math #endif \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp index d9f07c5..62fee76 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp @@ -40,9 +40,7 @@ bool Classifier::isValid(Engine* engine) { return false; } - const auto& input_shape = engine->getInputShape(0); - const auto& output_shape{engine->getOutputShape(0)}; - + const auto input_shape = engine->getInputShape(0); int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; bool is_nhwc = c == 3 || c == 1; @@ -52,6 +50,10 @@ bool Classifier::isValid(Engine* engine) { if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) return false; + const auto output_shape = engine->getOutputShape(0); + if (output_shape.size != 2) { + return false; + } if (output_shape.dims[0] != 1 || // N = 1 output_shape.dims[1] < 2 // C >= 2 @@ -59,10 +61,6 @@ bool Classifier::isValid(Engine* engine) { return false; } - if (output_shape.size >= 3) { - return false; - } - return true; } @@ -91,32 +89,65 @@ ma_err_t Classifier::preprocess() { ma_err_t Classifier::postprocess() { results_.clear(); - if (output_.type == MA_TENSOR_TYPE_S8) { - auto scale{output_.quant_param.scale}; - auto zero_point{output_.quant_param.zero_point}; - bool rescale{scale < 0.1f ? true : false}; - auto* data = output_.data.s8; + switch (output_.type) { + case MA_TENSOR_TYPE_S8: { + auto scale{output_.quant_param.scale}; + auto zero_point{output_.quant_param.zero_point}; + bool rescale{scale < 0.1f ? true : false}; + auto* data = output_.data.s8; + auto pred_l{output_.shape.dims[1]}; + + for (decltype(pred_l) i{0}; i < pred_l; ++i) { + auto score{static_cast(data[i] - zero_point) * scale}; + score = rescale ? score : score / 100.f; + if (score > threshold_score_) + results_.emplace_front(ma_class_t{score, i}); + } + } break; + + case MA_TENSOR_TYPE_U8: { + auto scale{output_.quant_param.scale}; + auto zero_point{output_.quant_param.zero_point}; + bool rescale{scale < 0.1f ? true : false}; + auto* data = output_.data.u8; + auto pred_l{output_.shape.dims[1]}; + + for (decltype(pred_l) i{0}; i < pred_l; ++i) { + auto score{static_cast(data[i] - zero_point) * scale}; + score = rescale ? score : score / 100.f; + if (score > threshold_score_) + results_.emplace_front(ma_class_t{score, i}); + } + } break; + + case MA_TENSOR_TYPE_U16: { + auto scale{output_.quant_param.scale}; + auto zero_point{output_.quant_param.zero_point}; + bool rescale{scale < 0.1f ? true : false}; + auto* data = output_.data.u16; + auto pred_l{output_.shape.dims[1]}; + + for (decltype(pred_l) i{0}; i < pred_l; ++i) { + auto score{static_cast(data[i] - zero_point) * scale}; + score = rescale ? score : score / 100.f; + if (score > threshold_score_) + results_.emplace_front(ma_class_t{score, i}); + } + } break; + + + case MA_TENSOR_TYPE_F32: { + auto* data = output_.data.f32; + auto pred_l{output_.shape.dims[1]}; + for (decltype(pred_l) i{0}; i < pred_l; ++i) { + auto score{data[i]}; + if (score > threshold_score_) + results_.emplace_front(ma_class_t{score, i}); + } + } break; - auto pred_l{output_.shape.dims[1]}; - - for (decltype(pred_l) i{0}; i < pred_l; ++i) { - auto score{static_cast(data[i] - zero_point) * scale}; - score = rescale ? score : score / 100.f; - if (score > threshold_score_) - results_.emplace_front(ma_class_t{score, i}); - } - } - if (output_.type == MA_TENSOR_TYPE_F32) { - auto* data = output_.data.f32; - auto pred_l{output_.shape.dims[1]}; - for (decltype(pred_l) i{0}; i < pred_l; ++i) { - auto score{data[i]}; - if (score > threshold_score_) - results_.emplace_front(ma_class_t{score, i}); - } - - } else { - return MA_ENOTSUP; + default: + return MA_ENOTSUP; } results_.sort([](const ma_class_t& a, const ma_class_t& b) { return a.score > b.score; }); diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp index 573899d..9ac7858 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp @@ -37,6 +37,10 @@ Model* ModelFactory::create(Engine* engine, size_t algorithm_id) { if (YoloV8PoseHailo::isValid(engine)) { return new YoloV8PoseHailo(engine); } + + if (YoloV8SegHailo::isValid(engine)) { + return new YoloV8SegHailo(engine); + } #endif if (YoloV8Pose::isValid(engine)) { return new YoloV8Pose(engine); @@ -52,6 +56,11 @@ Model* ModelFactory::create(Engine* engine, size_t algorithm_id) { return new NvidiaDet(engine); } + case MA_MODEL_TYPE_RTMDET: + if (RTMDet::isValid(engine)) { + return new RTMDet(engine); + } + case MA_MODEL_TYPE_YOLO_WORLD: if (YoloWorld::isValid(engine)) { return new YoloWorld(engine); diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_factory.h b/src/components/sscma-micro/sscma/core/model/ma_model_factory.h index f86bfda..ddfdbbf 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_factory.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_factory.h @@ -21,6 +21,8 @@ #include "ma_model_yolov8.h" #include "ma_model_yolov8_pose.h" #include "ma_model_yolov8_pose_hailo.h" +#include "ma_model_yolov8_seg_hailo.h" +#include "ma_model_rtmdet.h" namespace ma { diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_rtmdet.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_rtmdet.cpp new file mode 100644 index 0000000..265d540 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_rtmdet.cpp @@ -0,0 +1,466 @@ +#include "ma_model_rtmdet.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "../math/ma_math.h" +#include "../utils/ma_anchors.h" +#include "../utils/ma_nms.h" + +namespace ma::model { + +static inline decltype(auto) estimateTensorHW(const ma_shape_t& shape) { + if (shape.size != 4) { + int32_t ph = 0; + return std::make_pair(ph, ph); + } + const auto is_nhwc{shape.dims[3] == 3 || shape.dims[3] == 1}; + + return is_nhwc ? std::make_pair(shape.dims[1], shape.dims[2]) : std::make_pair(shape.dims[2], shape.dims[3]); +} + +RTMDet::RTMDet(Engine* p_engine_) : Detector(p_engine_, "rtmdet", MA_MODEL_TYPE_RTMDET) { + MA_ASSERT(p_engine_ != nullptr); + + for (size_t i = 0; i < num_outputs_; ++i) { + outputs_[i] = p_engine_->getOutput(i); + } + + const auto [h, w] = estimateTensorHW(p_engine_->getInputShape(0)); + + anchor_strides_ = ma::utils::generateAnchorStrides(std::min(h, w)); + anchor_matrix_ = ma::utils::generateAnchorMatrix(anchor_strides_); + + + for (size_t i = 0; i < num_outputs_; ++i) { + const auto dim_1 = outputs_[i].shape.dims[1]; + const auto dim_2 = outputs_[i].shape.dims[2]; + + if (dim_2 == 4) { + for (size_t j = 0; j < anchor_variants_; ++j) { + if (dim_1 == static_cast(anchor_strides_[j].size)) { + output_bboxes_ids_[j] = i; + break; + } + } + } else { + for (size_t j = 0; j < anchor_variants_; ++j) { + if (dim_1 == static_cast(anchor_strides_[j].size)) { + output_scores_ids_[j] = i; + break; + } + } + } + } + + using CheckType = uint8_t; + size_t check_bytes = sizeof(CheckType) * 8u; + CheckType check = 0; + for (size_t i = 0; i < anchor_variants_; ++i) { + CheckType f_s = 1 << (output_scores_ids_[i] % check_bytes); + CheckType f_b = 1 << (output_bboxes_ids_[i] % check_bytes); + MA_ASSERT(!(f_s & f_b)); + MA_ASSERT(!(f_s & check)); + MA_ASSERT(!(f_b & check)); + check |= f_s | f_b; + } + MA_ASSERT(!(check ^ 0b00111111)); +} + +RTMDet::~RTMDet() {} + +bool RTMDet::isValid(Engine* engine) { + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); + + if (inputs_count != 1 || outputs_count != num_outputs_) { + return false; + } + + const auto input_shape{engine->getInputShape(0)}; + + if (input_shape.size != 4) { + return false; + } + + const auto is_nhwc{input_shape.dims[3] == 3 || input_shape.dims[3] == 1}; + + size_t n = 0, h = 0, w = 0, c = 0; + + if (is_nhwc) { + n = input_shape.dims[0]; + h = input_shape.dims[1]; + w = input_shape.dims[2]; + c = input_shape.dims[3]; + } else { + n = input_shape.dims[0]; + c = input_shape.dims[1]; + h = input_shape.dims[2]; + w = input_shape.dims[3]; + } + + if (n != 1 || h ^ w || h < 32 || h % 32 || (c != 3 && c != 1)) { + return false; + } + + auto anchor_strides_1 = ma::utils::generateAnchorStrides(std::min(h, w)); + auto anchor_strides_2 = anchor_strides_1; + + for (size_t i = 0; i < num_outputs_; ++i) { + const auto output_shape{engine->getOutputShape(i)}; + if (output_shape.size != 3 || output_shape.dims[0] != 1) { + return false; + } + + if (output_shape.dims[2] == 4) { + auto it = std::find_if( + anchor_strides_2.begin(), anchor_strides_2.end(), [&output_shape](const ma_anchor_stride_t& anchor_stride) { + return static_cast(anchor_stride.size) == output_shape.dims[1]; + }); + if (it == anchor_strides_2.end()) + return false; + else + anchor_strides_2.erase(it); + } else { + auto it = std::find_if( + anchor_strides_1.begin(), anchor_strides_1.end(), [&output_shape](const ma_anchor_stride_t& anchor_stride) { + return static_cast(anchor_stride.size) == output_shape.dims[1]; + }); + if (it == anchor_strides_1.end()) + return false; + else + anchor_strides_1.erase(it); + } + } + + if (anchor_strides_1.size() || anchor_strides_2.size()) { + return false; + } + + return true; +} + +const char* RTMDet::getTag() { + return "ma::model::rmdet"; +} + +ma_err_t RTMDet::postprocess() { + uint8_t check = 0; + + for (size_t i = 0; i < num_outputs_; ++i) { + switch (outputs_[i].type) { + case MA_TENSOR_TYPE_S8: + check += 1; + break; + + case MA_TENSOR_TYPE_U8: + check += 2; + break; + + case MA_TENSOR_TYPE_F32: + check += 4; + break; + + default: + return MA_ENOTSUP; + } + } + + switch (check) { + case 6: + return postProcessI8(); + + case 12: + return postProcessU8(); + +#ifdef MA_MODEL_POSTPROCESS_FP32_VARIANT + case 24: + return postProcessF32(); +#endif + + default: + return MA_ENOTSUP; + } + + return MA_OK; +} + +ma_err_t RTMDet::postProcessI8() { + results_.clear(); + + const int8_t* output_data[num_outputs_]; + + for (size_t i = 0; i < num_outputs_; ++i) { + output_data[i] = outputs_[i].data.s8; + } + + const auto score_threshold = threshold_score_; + const auto iou_threshold = threshold_nms_; + + const float score_threshold_non_sigmoid = ma::math::inverseSigmoid(score_threshold); + + const auto anchor_matrix_size = anchor_matrix_.size(); + + for (size_t i = 0; i < anchor_matrix_size; ++i) { + const auto output_scores_id = output_scores_ids_[i]; + const auto* output_scores = output_data[output_scores_id]; + const size_t output_scores_shape_dims_2 = outputs_[output_scores_id].shape.dims[2]; + const auto output_scores_quant_parm = outputs_[output_scores_id].quant_param; + + const auto output_bboxes_id = output_bboxes_ids_[i]; + const auto* output_bboxes = output_data[output_bboxes_id]; + const size_t output_bboxes_shape_dims_2 = outputs_[output_bboxes_id].shape.dims[2]; + const auto output_bboxes_quant_parm = outputs_[output_bboxes_id].quant_param; + + const auto stride = anchor_strides_[i]; + const float scale_w = float(stride.stride) / float(img_.width); + const float scale_h = float(stride.stride) / float(img_.height); + + const auto& anchor_array = anchor_matrix_[i]; + const auto anchor_array_size = anchor_array.size(); + + const int32_t score_threshold_quan_non_sigmoid = ma::math::quantizeValueFloor(score_threshold_non_sigmoid, output_scores_quant_parm.scale, output_scores_quant_parm.zero_point); + + for (size_t j = 0; j < anchor_array_size; ++j) { + const auto j_mul_output_scores_shape_dims_2 = j * output_scores_shape_dims_2; + + auto max_score_raw = score_threshold_quan_non_sigmoid; + int32_t target = -1; + + for (size_t k = 0; k < output_scores_shape_dims_2; ++k) { + int8_t score = output_scores[j_mul_output_scores_shape_dims_2 + k]; + + if (static_cast(score) < max_score_raw) [[likely]] + continue; + + max_score_raw = score; + target = k; + } + + if (target < 0) + continue; + + const float real_score = ma::math::sigmoid(ma::math::dequantizeValue(max_score_raw, output_scores_quant_parm.scale, output_scores_quant_parm.zero_point)); + + + float dist[4]; + const auto pre = j * output_bboxes_shape_dims_2; + for (size_t m = 0; m < 4; ++m) { + const size_t offset = pre + m; + dist[m] = ma::math::dequantizeValue(static_cast(output_bboxes[offset]), output_bboxes_quant_parm.scale, output_bboxes_quant_parm.zero_point); + } + + const auto anchor = anchor_array[j]; + + float cx = anchor.x + ((dist[2] - dist[0]) * 0.5f); + float cy = anchor.y + ((dist[3] - dist[1]) * 0.5f); + float w = dist[0] + dist[2]; + float h = dist[1] + dist[3]; + + ma_bbox_t res; + + res.x = cx * scale_w; + res.y = cy * scale_h; + res.w = w * scale_w; + res.h = h * scale_h; + res.score = real_score; + res.target = target; + + results_.emplace_front( + std::move(res) + ); + } + } + + ma::utils::nms(results_, threshold_nms_, threshold_score_, false, true); + + return MA_OK; +} + +ma_err_t RTMDet::postProcessU8() { + results_.clear(); + + const uint8_t* output_data[num_outputs_]; + + for (size_t i = 0; i < num_outputs_; ++i) { + output_data[i] = outputs_[i].data.u8; + } + + const auto score_threshold = threshold_score_; + const auto iou_threshold = threshold_nms_; + + const float score_threshold_non_sigmoid = ma::math::inverseSigmoid(score_threshold); + + const auto anchor_matrix_size = anchor_matrix_.size(); + + for (size_t i = 0; i < anchor_matrix_size; ++i) { + const auto output_scores_id = output_scores_ids_[i]; + const auto* output_scores = output_data[output_scores_id]; + const size_t output_scores_shape_dims_2 = outputs_[output_scores_id].shape.dims[2]; + const auto output_scores_quant_parm = outputs_[output_scores_id].quant_param; + + const auto output_bboxes_id = output_bboxes_ids_[i]; + const auto* output_bboxes = output_data[output_bboxes_id]; + const size_t output_bboxes_shape_dims_2 = outputs_[output_bboxes_id].shape.dims[2]; + const auto output_bboxes_quant_parm = outputs_[output_bboxes_id].quant_param; + + const auto stride = anchor_strides_[i]; + const float scale_w = float(stride.stride) / float(img_.width); + const float scale_h = float(stride.stride) / float(img_.height); + + const auto& anchor_array = anchor_matrix_[i]; + const auto anchor_array_size = anchor_array.size(); + + const int32_t score_threshold_quan_non_sigmoid = ma::math::quantizeValueFloor(score_threshold_non_sigmoid, output_scores_quant_parm.scale, output_scores_quant_parm.zero_point); + + for (size_t j = 0; j < anchor_array_size; ++j) { + const auto j_mul_output_scores_shape_dims_2 = j * output_scores_shape_dims_2; + + auto max_score_raw = score_threshold_quan_non_sigmoid; + int32_t target = -1; + + for (size_t k = 0; k < output_scores_shape_dims_2; ++k) { + uint8_t score = output_scores[j_mul_output_scores_shape_dims_2 + k]; + + if (static_cast(score) < max_score_raw) [[likely]] + continue; + + max_score_raw = score; + target = k; + } + + if (target < 0) + continue; + + const float real_score = ma::math::sigmoid(ma::math::dequantizeValue(max_score_raw, output_scores_quant_parm.scale, output_scores_quant_parm.zero_point)); + + // DFL + float dist[4]; + const auto pre = j * output_bboxes_shape_dims_2; + for (size_t m = 0; m < 4; ++m) { + const size_t offset = pre + m; + dist[m] = ma::math::dequantizeValue(static_cast(output_bboxes[offset]), output_bboxes_quant_parm.scale, output_bboxes_quant_parm.zero_point); + } + + const auto anchor = anchor_array[j]; + + float cx = anchor.x + ((dist[2] - dist[0]) * 0.5f); + float cy = anchor.y + ((dist[3] - dist[1]) * 0.5f); + float w = dist[0] + dist[2]; + float h = dist[1] + dist[3]; + + ma_bbox_t res; + + res.x = cx * scale_w; + res.y = cy * scale_h; + res.w = w * scale_w; + res.h = h * scale_h; + res.score = real_score; + res.target = target; + + results_.emplace_front( + std::move(res) + ); + } + } + + ma::utils::nms(results_, threshold_nms_, threshold_score_, false, true); + + return MA_OK; +} + +#ifdef MA_MODEL_POSTPROCESS_FP32_VARIANT +ma_err_t RTMDet::postProcessF32() { + results_.clear(); + + const float* output_data[num_outputs_]; + + for (size_t i = 0; i < num_outputs_; ++i) { + output_data[i] = outputs_[i].data.f32; + } + + const auto score_threshold = threshold_score_; + const auto iou_threshold = threshold_nms_; + + const float score_threshold_non_sigmoid = ma::math::inverseSigmoid(score_threshold); + + const auto anchor_matrix_size = anchor_matrix_.size(); + + for (size_t i = 0; i < anchor_matrix_size; ++i) { + const auto output_scores_id = output_scores_ids_[i]; + const auto* output_scores = output_data[output_scores_id]; + const size_t output_scores_shape_dims_2 = outputs_[output_scores_id].shape.dims[2]; + + const auto output_bboxes_id = output_bboxes_ids_[i]; + const auto* output_bboxes = output_data[output_bboxes_id]; + const size_t output_bboxes_shape_dims_2 = outputs_[output_bboxes_id].shape.dims[2]; + + const auto stride = anchor_strides_[i]; + const float scale_w = float(stride.stride) / float(img_.width); + const float scale_h = float(stride.stride) / float(img_.height); + + const auto& anchor_array = anchor_matrix_[i]; + const auto anchor_array_size = anchor_array.size(); + + for (size_t j = 0; j < anchor_array_size; ++j) { + const auto j_mul_output_scores_shape_dims_2 = j * output_scores_shape_dims_2; + + auto max_score_raw = score_threshold_non_sigmoid; + int32_t target = -1; + + for (size_t k = 0; k < output_scores_shape_dims_2; ++k) { + int8_t score = output_scores[j_mul_output_scores_shape_dims_2 + k]; + + if (static_cast(score) < max_score_raw) [[likely]] + continue; + + max_score_raw = score; + target = k; + } + + if (target < 0) + continue; + + const float real_score = ma::math::sigmoid(max_score_raw); + + float dist[4]; + const auto pre = j * output_bboxes_shape_dims_2; + for (size_t m = 0; m < 4; ++m) { + const size_t offset = pre + m; + dist[m] = output_bboxes[offset]; + } + + const auto anchor = anchor_array[j]; + + float cx = anchor.x + ((dist[2] - dist[0]) * 0.5f); + float cy = anchor.y + ((dist[3] - dist[1]) * 0.5f); + float w = dist[0] + dist[2]; + float h = dist[1] + dist[3]; + + ma_bbox_t res; + + res.x = cx * scale_w; + res.y = cy * scale_h; + res.w = w * scale_w; + res.h = h * scale_h; + res.score = real_score; + res.target = target; + + results_.emplace_front( + std::move(res) + ); + } + } + + ma::utils::nms(results_, threshold_nms_, threshold_score_, false, true); + + return MA_OK; +} +#endif + +} // namespace ma::model diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_rtmdet.h b/src/components/sscma-micro/sscma/core/model/ma_model_rtmdet.h new file mode 100644 index 0000000..9193023 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_rtmdet.h @@ -0,0 +1,46 @@ +#ifndef _MA_MODEL_RTMDET_H_ +#define _MA_MODEL_RTMDET_H_ + +#include +#include +#include +#include + +#include "ma_model_detector.h" + +namespace ma::model { + +class RTMDet : public Detector { + private: + static constexpr size_t num_outputs_ = 6; + static constexpr size_t anchor_variants_ = 3; + + ma_tensor_t outputs_[num_outputs_]; + + std::vector anchor_strides_; + std::vector> anchor_matrix_; + + size_t output_scores_ids_[anchor_variants_]; + size_t output_bboxes_ids_[anchor_variants_]; + + protected: + ma_err_t postprocess() override; + + ma_err_t postProcessI8(); + ma_err_t postProcessU8(); +#ifdef MA_MODEL_POSTPROCESS_FP32_VARIANT + ma_err_t postProcessF32(); +#endif + + public: + RTMDet(Engine* engine); + ~RTMDet(); + + static bool isValid(Engine* engine); + + static const char* getTag(); +}; + +} // namespace ma::model + +#endif // _MA_MODEL_YOLO_H diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp index 781b836..c849717 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp @@ -221,7 +221,7 @@ ma_err_t YoloV8Pose::postProcessI8() { const auto& anchor_array = anchor_matrix_[i]; const auto anchor_array_size = anchor_array.size(); - const int32_t score_threshold_quan_non_sigmoid = ma::math::quantizeValueFloor(score_threshold_non_sigmoid, output_scores_quant_parm.zero_point, output_scores_quant_parm.scale); + const int32_t score_threshold_quan_non_sigmoid = ma::math::quantizeValueFloor(score_threshold_non_sigmoid, output_scores_quant_parm.scale, output_scores_quant_parm.zero_point); for (size_t j = 0; j < anchor_array_size; ++j) { const auto j_mul_output_scores_shape_dims_2 = j * output_scores_shape_dims_2; @@ -242,7 +242,7 @@ ma_err_t YoloV8Pose::postProcessI8() { if (target < 0) continue; - const float real_score = ma::math::sigmoid(ma::math::dequantizeValue(max_score_raw, output_scores_quant_parm.zero_point, output_scores_quant_parm.scale)); + const float real_score = ma::math::sigmoid(ma::math::dequantizeValue(max_score_raw, output_scores_quant_parm.scale, output_scores_quant_parm.zero_point)); // DFL float dist[4]; @@ -252,7 +252,7 @@ ma_err_t YoloV8Pose::postProcessI8() { for (size_t m = 0; m < 4; ++m) { const size_t offset = pre + m * 16; for (size_t n = 0; n < 16; ++n) { - matrix[n] = ma::math::dequantizeValue(static_cast(output_bboxes[offset + n]), output_bboxes_quant_parm.zero_point, output_bboxes_quant_parm.scale); + matrix[n] = ma::math::dequantizeValue(static_cast(output_bboxes[offset + n]), output_bboxes_quant_parm.scale, output_bboxes_quant_parm.zero_point); } ma::math::softmax(matrix, 16); @@ -304,11 +304,11 @@ ma_err_t YoloV8Pose::postProcessI8() { for (size_t i = 0; i < keypoint_nums; ++i) { const auto offset = pre + i * 3; - const float x = ma::math::dequantizeValue(static_cast(output_keypoints[offset]), output_keypoints_quant_parm.zero_point, output_keypoints_quant_parm.scale); + const float x = ma::math::dequantizeValue(static_cast(output_keypoints[offset]), output_keypoints_quant_parm.scale, output_keypoints_quant_parm.zero_point); - const float y = ma::math::dequantizeValue(static_cast(output_keypoints[offset + 1]), output_keypoints_quant_parm.zero_point, output_keypoints_quant_parm.scale); + const float y = ma::math::dequantizeValue(static_cast(output_keypoints[offset + 1]), output_keypoints_quant_parm.scale, output_keypoints_quant_parm.zero_point); - const float z = ma::math::sigmoid(ma::math::dequantizeValue(static_cast(output_keypoints[offset + 2]), output_keypoints_quant_parm.zero_point, output_keypoints_quant_parm.scale)); + const float z = ma::math::sigmoid(ma::math::dequantizeValue(static_cast(output_keypoints[offset + 2]), output_keypoints_quant_parm.scale, output_keypoints_quant_parm.zero_point)); n_keypoint[i] = {x, y, z}; } diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_seg_hailo.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_seg_hailo.cpp new file mode 100644 index 0000000..22d0844 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_seg_hailo.cpp @@ -0,0 +1,435 @@ +#include "ma_model_yolov8_seg_hailo.h" + +#if MA_USE_ENGINE_HAILO + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../math/ma_math.h" +#include "../utils/ma_anchors.h" +#include "../utils/ma_nms.h" + +namespace ma::model { + +static inline decltype(auto) estimateTensorHW(const ma_shape_t& shape) { + if (shape.size != 4) { + int32_t ph = 0; + return std::make_pair(ph, ph); + } + const auto is_nhwc{shape.dims[3] == 3 || shape.dims[3] == 1}; + + return is_nhwc ? std::make_pair(shape.dims[1], shape.dims[2]) : std::make_pair(shape.dims[2], shape.dims[3]); +} + +static void nms(std::forward_list>>& decodings, const float iou_thr, bool should_nms_cross_classes) { + for (auto it = decodings.begin(); it != decodings.end(); ++it) { + if (it->first.score != 0.0f) { + for (auto it2 = std::next(it); it2 != decodings.end(); ++it2) { + if ((should_nms_cross_classes || (it->first.target == it2->first.target)) && it2->first.score != 0.0f) { + float iou = ma::utils::compute_iou(it->first, it2->first); + if (iou >= iou_thr) { + it2->first.score = 0.0f; + } + } + } + } + } + decodings.remove_if([](const auto& p) { return p.first.score == 0.0f; }); +} + +std::vector YoloV8SegHailo::strides_ = {8, 16, 32}; + +/** + * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the LGPL license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) + **/ +static decltype(auto) getBoxesScoresMasks(std::vector tensors, int num_classes) { + auto raw_proto = [&tensors]() { + auto it = std::find_if( + tensors.begin(), tensors.end(), [](const ma_tensor_t& t) { return t.size == 4 && t.shape.dims[0] == 1 && t.shape.dims[1] == 160 && t.shape.dims[2] == 160 && t.shape.dims[3] == 32; }); + if (it == tensors.end()) { + return ma_tensor_t{0}; + } + tensors.erase(it); + return *it; + }(); + + std::vector outputs_boxes(tensors.size() / 3); + std::vector outputs_masks(tensors.size() / 3); + + int total_scores = 0; + for (uint i = 0; i < tensors.size(); i = i + 3) { + auto w = tensors[i + 1].shape.dims[1]; // w + auto h = tensors[i + 1].shape.dims[2]; // h + total_scores += w * h; + } + + std::vector scores_shape = {(long unsigned int)total_scores, (long unsigned int)num_classes}; + xt::xarray scores(scores_shape); + + std::vector proto_shape = {(long unsigned int)raw_proto.shape.dims[1], (long unsigned int)raw_proto.shape.dims[2], (long unsigned int)raw_proto.shape.dims[3]}; + xt::xarray proto(proto_shape); + + int view_index_scores = 0; + + for (uint i = 0; i < tensors.size(); i = i + 3) { + outputs_boxes[i / 3] = tensors[i]; + + auto& tensor = tensors[i + 1]; + std::vector shape = {(size_t)tensor.shape.dims[1], (size_t)tensor.shape.dims[2], (size_t)tensor.shape.dims[3]}; + xt::xarray xtensor = xt::adapt(tensor.data.u8, tensor.size, xt::no_ownership(), shape); + auto dequantized_output_s = (xtensor - tensor.quant_param.zero_point) * tensor.quant_param.scale; + int num_proposals_scores = dequantized_output_s.shape(0) * dequantized_output_s.shape(1); + + auto output_scores = xt::view(dequantized_output_s, xt::all(), xt::all(), xt::all()); + xt::view(scores, xt::range(view_index_scores, view_index_scores + num_proposals_scores), xt::all()) = xt::reshape_view(output_scores, {num_proposals_scores, num_classes}); + view_index_scores += num_proposals_scores; + + outputs_masks[i / 3] = tensors[i + 2]; + } + + auto proto_tensor = xt::adapt(raw_proto.data.u8, raw_proto.size, xt::no_ownership(), proto_shape); + proto = (proto_tensor - raw_proto.quant_param.zero_point) * raw_proto.quant_param.scale; + + + return _internal::Quadruple{outputs_boxes, scores, outputs_masks, proto}; +} + + +YoloV8SegHailo::YoloV8SegHailo(Engine* p_engine_) : Segmentor(p_engine_, "yolov8_seg", MA_MODEL_TYPE_YOLOV8_SGE) { + MA_ASSERT(p_engine_ != nullptr); + + threshold_score_ = 0.6; + threshold_nms_ = 0.7; + + outputs_.resize(10); + for (size_t i = 0; i < outputs_.size(); ++i) { + outputs_[i] = p_engine_->getOutput(i); + } + + std::sort(outputs_.begin(), outputs_.end(), [](const ma_tensor_t& a, const ma_tensor_t& b) { return a.shape.dims[1] > b.shape.dims[1]; }); + + auto update_route_f = [&route = route_, this](ma_tensor_type_t t, int i) { + switch (t) { + case MA_TENSOR_TYPE_U8: + route |= 1 << i; + break; + case MA_TENSOR_TYPE_U16: + route |= 1 << (i + this->outputs_.size()); + break; + default: + break; + } + }; + + std::vector idx(outputs_.size()); + for (size_t i = 1; i < outputs_.size(); i += 3) { + for (size_t j = 0; j < 3; ++j) { + auto at = i + j; + switch (outputs_[at].shape.dims[3]) { + case 32: + idx[i + 2] = at; + break; + case 64: + idx[i] = at; + break; + default: + idx[i + 1] = at; + } + } + } + std::vector reordered_outputs(outputs_.size()); + reordered_outputs[0] = outputs_[0]; + update_route_f(reordered_outputs[0].type, 0); + for (size_t i = 1; i < outputs_.size(); ++i) { + reordered_outputs[i] = outputs_[idx[i]]; + update_route_f(reordered_outputs[i].type, i); + } + outputs_ = std::move(reordered_outputs); + classes_ = outputs_[2].shape.dims[3]; + + const auto [h, w] = estimateTensorHW(p_engine_->getInputShape(0)); + + centers_ = ma::utils::generateAnchorMatrix(strides_, {static_cast(w), static_cast(h)}, 3, 0, 0); + network_dims_ = {w, h}; +} + +YoloV8SegHailo::~YoloV8SegHailo() {} + +bool YoloV8SegHailo::isValid(Engine* engine) { + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); + + if (inputs_count != 1 || outputs_count != 10) { + return false; + } + + const auto input_shape{engine->getInputShape(0)}; + + if (input_shape.size != 4) { + return false; + } + + const auto is_nhwc{input_shape.dims[3] == 3 || input_shape.dims[3] == 1}; + + size_t n = 0, h = 0, w = 0, c = 0; + + if (is_nhwc) { + n = input_shape.dims[0]; + h = input_shape.dims[1]; + w = input_shape.dims[2]; + c = input_shape.dims[3]; + } else { + n = input_shape.dims[0]; + c = input_shape.dims[1]; + h = input_shape.dims[2]; + w = input_shape.dims[3]; + } + + if (n != 1 || h ^ w || h < 32 || h % 32 || (c != 3 && c != 1)) { + return false; + } + + const auto output_nums = engine->getOutputSize(); + if (output_nums != 9) { + return false; + } + + std::vector outputs(output_nums); + for (size_t i = 0; i < output_nums; ++i) { + outputs[i] = engine->getOutput(i); + } + + auto it = std::find_if( + outputs.begin(), outputs.end(), [](const ma_tensor_t& t) { return t.size == 4 && t.shape.dims[0] == 1 && t.shape.dims[1] == 160 && t.shape.dims[2] == 160 && t.shape.dims[3] == 32; }); + if (it == outputs.end()) { + return false; + } + outputs.erase(it); + + std::vector> dims{std::vector{int(w / strides_[0]), int(h / strides_[0]), 0}, + std::vector{int(w / strides_[1]), int(h / strides_[1]), 0}, + std::vector{int(w / strides_[2]), int(h / strides_[2]), 0}}; + + for (auto& out : outputs) { + if (out.shape.size != 4 || out.shape.dims[0] != 1) { + return false; + } + auto it = std::find_if(dims.begin(), dims.end(), [&out](const std::vector& dim) { return dim[0] == out.shape.dims[1] && dim[1] == out.shape.dims[2]; }); + if (it == dims.end()) { + return false; + } + switch (out.shape.dims[3]) { + case 32: + if (out.type != MA_TENSOR_TYPE_U8) { + return false; + } + (*it)[2] += 1; + break; + case 64: + if (out.type != MA_TENSOR_TYPE_U8) { + return false; + } + (*it)[2] += 1; + break; + default: + if (out.type != MA_TENSOR_TYPE_U8) { + return false; + } + (*it)[2] += 1; + } + } + + for (const auto& dim : dims) { + if (dim[2] != 3) { + return false; + } + } + + return true; +} + +const char* YoloV8SegHailo::getTag() { + return "ma::model::yolov8_pose"; +} + +template +static decltype(auto) decodeBoxesAndExtractMasks(const std::vector& raw_boxes_outputs, + const std::vector& raw_masks_outputs, + xt::xarray& scores, + const std::vector& network_dims, + const std::vector& strides, + const std::vector>& centers, + int regression_length, + float score_threshold) { + + int class_index = 0; + std::forward_list>> decodings; + + int instance_index = 0; + float confidence = 0.0; + std::string label; + + // Box distribution to distance + auto regression_distance = xt::reshape_view(xt::arange(0, regression_length + 1), {1, 1, regression_length + 1}); + + for (uint i = 0; i < raw_boxes_outputs.size(); ++i) { + // Boxes setup + float32_t qp_scale = raw_boxes_outputs[i].quant_param.scale; + float32_t qp_zp = raw_boxes_outputs[i].quant_param.zero_point; + + std::vector output_b_shape = {(size_t)raw_boxes_outputs[i].shape.dims[1], (size_t)raw_boxes_outputs[i].shape.dims[2], (size_t)raw_boxes_outputs[i].shape.dims[3]}; + auto output_b = xt::adapt(raw_boxes_outputs[i].data.u8, raw_boxes_outputs[i].size, xt::no_ownership(), output_b_shape); + + int num_proposals = output_b.shape(0) * output_b.shape(1); + auto output_boxes = xt::view(output_b, xt::all(), xt::all(), xt::all()); + auto quantized_boxes = xt::reshape_view(output_boxes, {num_proposals, 4, regression_length + 1}); + + auto shape = {quantized_boxes.shape(1), quantized_boxes.shape(2)}; + + // Masks setup + float32_t qp_scale_masks = raw_masks_outputs[i].quant_param.scale; + float32_t qp_zp_masks = raw_masks_outputs[i].quant_param.zero_point; + + std::vector output_m_shape = {(size_t)raw_masks_outputs[i].shape.dims[1], (size_t)raw_masks_outputs[i].shape.dims[2], (size_t)raw_masks_outputs[i].shape.dims[3]}; + auto output_m = xt::adapt(raw_masks_outputs[i].data.u8, raw_masks_outputs[i].size, xt::no_ownership(), output_m_shape); + + int num_proposals_masks = output_m.shape(0) * output_m.shape(1); + auto output_masks = xt::view(output_m, xt::all(), xt::all(), xt::all()); + auto quantized_masks = xt::reshape_view(output_masks, {num_proposals_masks, 1, regression_length + 1}); + + auto mask_shape = {quantized_masks.shape(1)}; + + // Bbox decoding + for (uint j = 0; j < (uint)num_proposals; ++j) { + class_index = xt::argmax(xt::row(scores, instance_index))(0); + confidence = scores(instance_index, class_index); + instance_index++; + if (confidence < score_threshold) + continue; + + xt::xarray box(shape); + xt::xarray mask(mask_shape); + + ma::math::dequantizeValues2D(box, j, quantized_boxes, box.shape(0), box.shape(1), qp_scale, qp_zp); + ma::math::softmax2D(box.data(), box.shape(0), box.shape(1)); + + ma::math::dequantizeValues1D(mask, j, quantized_masks, mask.shape(0), qp_scale_masks, qp_zp_masks); + + auto box_distance = box * regression_distance; + xt::xarray reduced_distances = xt::sum(box_distance, {2}); + auto strided_distances = reduced_distances * strides[i]; + + using namespace xt::placeholders; + auto distance_view1 = xt::view(strided_distances, xt::all(), xt::range(_, 2)) * -1; + auto distance_view2 = xt::view(strided_distances, xt::all(), xt::range(2, _)); + auto distance_view = xt::concatenate(xt::xtuple(distance_view1, distance_view2), 1); + auto decoded_box = centers[i] + distance_view; + + ma_bbox_t bbox; + auto x_min = decoded_box(j, 0) / network_dims[0]; + auto y_min = decoded_box(j, 1) / network_dims[1]; + auto w = (decoded_box(j, 2) - decoded_box(j, 0)) / network_dims[0]; + auto h = (decoded_box(j, 3) - decoded_box(j, 1)) / network_dims[1]; + + bbox.x = x_min + (w / 2); + bbox.y = y_min + (h / 2); + bbox.w = w; + bbox.h = h; + bbox.score = confidence; + bbox.target = class_index; + + decodings.emplace_front(std::make_pair(bbox, mask)); + } + } + + return decodings; +} + + +static xt::xarray dot(xt::xarray mask, xt::xarray reshaped_proto, size_t proto_height, size_t proto_width, size_t mask_num = 32) { + + auto shape = {proto_height, proto_width}; + xt::xarray mask_product(shape); + + for (size_t i = 0; i < mask_product.shape(0); i++) { + for (size_t j = 0; j < mask_product.shape(1); j++) { + for (size_t k = 0; k < mask_num; k++) { + mask_product(i, j) += mask(k) * reshaped_proto(k, i, j); + } + } + } + return mask_product; +} + +ma_err_t YoloV8SegHailo::postprocess() { + // TODO: could be optimized + + boxes_scores_masks_mask_matrix_ = getBoxesScoresMasks(outputs_, classes_); + std::forward_list>> decodings; + + switch (route_) { + case 1023: + decodings = decodeBoxesAndExtractMasks( + boxes_scores_masks_mask_matrix_.boxes, boxes_scores_masks_mask_matrix_.masks, boxes_scores_masks_mask_matrix_.scores, network_dims_, strides_, centers_, 15, threshold_score_); + break; + default: + return MA_ENOTSUP; + } + + nms(decodings, threshold_nms_, true); + + xt::xarray proto = boxes_scores_masks_mask_matrix_.proto_data; + int mask_height = static_cast(proto.shape(0)); + int mask_width = static_cast(proto.shape(1)); + int mask_features = static_cast(proto.shape(2)); + auto reshaped_proto = xt::reshape_view(xt::transpose(xt::reshape_view(proto, {-1, mask_features}), {1, 0}), {-1, mask_height, mask_width}); + + for (const auto& [bbox, curr_mask] : decodings) { + ma_segm2f_t segm; + segm.box = bbox; + + auto mask_product = dot(curr_mask, reshaped_proto, reshaped_proto.shape(1), reshaped_proto.shape(2), curr_mask.shape(0)); + for (auto& v : mask_product) { + v = ma::math::sigmoid(v); + } + + int x1 = (bbox.x - bbox.w / 2) * mask_width; + int y1 = (bbox.y - bbox.h / 2) * mask_height; + int x2 = (bbox.x + bbox.w / 2) * mask_width; + int y2 = (bbox.y + bbox.h / 2) * mask_height; + + segm.mask.width = mask_width; + segm.mask.height = mask_height; + auto sz = mask_width * mask_height; + segm.mask.data.resize(static_cast(std::ceil(static_cast(sz) / 8.f)), 0); // bitwise + + for (int i = y1; i < y2; ++i) { + for (int j = x1; j < x2; ++j) { + if (mask_product(i, j) > 0.5) { + segm.mask.data[i / 8] |= 1 << (i % 8); + } + } + } + + results_.emplace_front(std::move(segm)); + } + + return MA_OK; +} + +} // namespace ma::model + +#endif \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_seg_hailo.h b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_seg_hailo.h new file mode 100644 index 0000000..bc878b9 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_seg_hailo.h @@ -0,0 +1,55 @@ +#ifndef _MA_MODEL_YOLOV8_SEG_HAILO_H_ +#define _MA_MODEL_YOLOV8_SEG_HAILO_H_ + +#include "ma_model_segmentor.h" + +#if MA_USE_ENGINE_HAILO + +#include +#include +#include +#include + +#include +#include + +namespace ma::model { + +namespace _internal { + +struct Quadruple { + std::vector boxes; + xt::xarray scores; + std::vector masks; + xt::xarray proto_data; +}; + +} // namespace _internal + +class YoloV8SegHailo : public Segmentor { +private: + std::vector> centers_; + static std::vector strides_; + std::vector network_dims_; + std::vector outputs_; + _internal::Quadruple boxes_scores_masks_mask_matrix_; + int classes_ = 0; + int32_t route_ = 0; + +protected: + ma_err_t postprocess(); + +public: + YoloV8SegHailo(Engine* engine); + ~YoloV8SegHailo(); + + static bool isValid(Engine* engine); + + static const char* getTag(); +}; + +} // namespace ma::model + +#endif + +#endif // _MA_MODEL_YOLO_H diff --git a/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp b/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp index 8246b97..1cf655b 100644 --- a/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp +++ b/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp @@ -9,22 +9,6 @@ namespace ma::utils { -template , bool> = true> -static inline float compute_iou(const T& box1, const T& box2) { - const float x1 = std::max(box1.x, box2.x); - const float y1 = std::max(box1.y, box2.y); - const float x2 = std::min(box1.x + box1.w, box2.x + box2.w); - const float y2 = std::min(box1.y + box1.h, box2.y + box2.h); - const float w = std::max(0.0f, x2 - x1); - const float h = std::max(0.0f, y2 - y1); - const float inter = w * h; - const float d = box1.w * box1.h + box2.w * box2.h - inter; - if (std::abs(d) < std::numeric_limits::epsilon()) [[unlikely]] { - return 0; - } - return inter / d; -} - template static constexpr void nms_impl(Container& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target) { if constexpr (std::is_same_v>) { @@ -54,7 +38,7 @@ static constexpr void nms_impl(Container& bboxes, float threshold_iou, float thr } } - if constexpr (std::is_same_v>) { + if constexpr (std::is_same>::value) { bboxes.remove_if([](const auto& box) { return box.score == 0; }); } else { bboxes.erase(std::remove_if(bboxes.begin(), bboxes.end(), [](const auto& box) { return box.score == 0; }), bboxes.end()); diff --git a/src/components/sscma-micro/sscma/core/utils/ma_nms.h b/src/components/sscma-micro/sscma/core/utils/ma_nms.h index cc4d52e..8a152c3 100644 --- a/src/components/sscma-micro/sscma/core/utils/ma_nms.h +++ b/src/components/sscma-micro/sscma/core/utils/ma_nms.h @@ -2,8 +2,10 @@ #define _MA_NMS_H_ #include +#include #include #include +#include #include "../ma_types.h" @@ -11,6 +13,22 @@ namespace ma::utils { // skip use of template since it is not allowed +template ::value, bool> = true> +inline float compute_iou(const T& box1, const T& box2) { + const float x1 = std::max(box1.x, box2.x); + const float y1 = std::max(box1.y, box2.y); + const float x2 = std::min(box1.x + box1.w, box2.x + box2.w); + const float y2 = std::min(box1.y + box1.h, box2.y + box2.h); + const float w = std::max(0.0f, x2 - x1); + const float h = std::max(0.0f, y2 - y1); + const float inter = w * h; + const float d = box1.w * box1.h + box2.w * box2.h - inter; + if (std::abs(d) < std::numeric_limits::epsilon()) [[unlikely]] { + return 0; + } + return inter / d; +} + void nms(std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target); void nms(std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target);