diff --git a/src/components/sscma-micro/sscma/core/engine/ma_engine.h b/src/components/sscma-micro/sscma/core/engine/ma_engine.h index ac71936..e7b5fd1 100644 --- a/src/components/sscma-micro/sscma/core/engine/ma_engine.h +++ b/src/components/sscma-micro/sscma/core/engine/ma_engine.h @@ -13,9 +13,9 @@ using EngineDefault = ma::engine::EngineTFLite; using EngineDefault = ma::engine::EngineCVI; #endif -#ifdef MA_USE_ENGINE_HALIO -#include "ma_engine_halio.h" -using EngineDefault = ma::engine::EngineHalio; +#ifdef MA_USE_ENGINE_HAILO +#include "ma_engine_hailo.h" +using EngineDefault = ma::engine::EngineHailo; #endif #endif // _MA_ENGINE_H_ \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp b/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp similarity index 85% rename from src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp rename to src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp index 3a2bbb3..d8792a7 100644 --- a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp +++ b/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.cpp @@ -1,7 +1,8 @@ -#include "ma_engine_halio.h" +#include "ma_engine_hailo.h" -#if MA_USE_ENGINE_HALIO +#if MA_USE_ENGINE_HAILO +#include #include #include #include @@ -11,33 +12,57 @@ namespace ma::engine { using namespace std; -EngineHalio::EngineHalio() : _vdevice(nullptr), _model(nullptr), _configured_model(nullptr), _bindings(nullptr) {} +EngineHailo::EngineHailo() : _vdevice(nullptr), _model(nullptr), _configured_model(nullptr), _bindings(nullptr) {} -EngineHalio::~EngineHalio() {} +EngineHailo::~EngineHailo() {} -ma_err_t EngineHalio::init() { +ma_err_t EngineHailo::init() { if (_vdevice) { return MA_OK; } - auto vdevice = VDevice::create(); - if (!vdevice) { + // TODO: optimize + static auto get_vdevice_f = []() -> shared_ptr { + static unique_ptr vdevice = nullptr; + if (!vdevice) { + auto dev = VDevice::create(); + if (!dev) { + return nullptr; + } + vdevice = move(dev.release()); + } + if (!vdevice) { + return nullptr; + } + auto shared = vdevice.get(); + auto mgr = &vdevice; + static atomic ref_count = 0; + ref_count.fetch_add(1); + return shared_ptr(shared, [mgr](VDevice*) { + if (mgr) { + if (ref_count.fetch_sub(1) == 1) { + mgr->reset(); + } + } + }); + }; + _vdevice = get_vdevice_f(); + if (!_vdevice) { return MA_FAILED; } - _vdevice = move(vdevice.value()); return MA_OK; } -ma_err_t EngineHalio::init(size_t size) { +ma_err_t EngineHailo::init(size_t size) { return init(); } -ma_err_t EngineHalio::init(void* pool, size_t size) { +ma_err_t EngineHailo::init(void* pool, size_t size) { return init(); } -ma_err_t EngineHalio::run() { +ma_err_t EngineHailo::run() { if (!_configured_model || !_bindings) { return MA_FAILED; } @@ -65,7 +90,7 @@ ma_err_t EngineHalio::run() { } #if MA_USE_FILESYSTEM -ma_err_t EngineHalio::load(const string& model_path) { +ma_err_t EngineHailo::load(const string& model_path) { { _input_tensors.clear(); @@ -324,12 +349,12 @@ ma_err_t EngineHalio::load(const string& model_path) { return MA_OK; } -ma_err_t EngineHalio::load(const char* model_path) { +ma_err_t EngineHailo::load(const char* model_path) { return load(string(model_path)); } #endif -ma_err_t EngineHalio::load(const void* model_data, size_t model_size) { +ma_err_t EngineHailo::load(const void* model_data, size_t model_size) { #if MA_USE_FILESYSTEM string model_path(reinterpret_cast(model_data), model_size); return load(model_path); @@ -338,15 +363,15 @@ ma_err_t EngineHalio::load(const void* model_data, size_t model_size) { #endif } -int32_t EngineHalio::getInputSize() { +int32_t EngineHailo::getInputSize() { return _input_tensors.size(); } -int32_t EngineHalio::getOutputSize() { +int32_t EngineHailo::getOutputSize() { return _output_tensors.size(); } -ma_tensor_t EngineHalio::getInput(int32_t index) { +ma_tensor_t EngineHailo::getInput(int32_t index) { if (index < 0 || index >= static_cast(_input_tensors.size())) { return {0}; } @@ -354,7 +379,7 @@ ma_tensor_t EngineHalio::getInput(int32_t index) { return _input_tensors[index] ? *_input_tensors[index] : ma_tensor_t{0}; } -ma_tensor_t EngineHalio::getOutput(int32_t index) { +ma_tensor_t EngineHailo::getOutput(int32_t index) { if (index < 0 || index >= static_cast(_output_tensors.size())) { return {0}; } @@ -362,7 +387,7 @@ ma_tensor_t EngineHalio::getOutput(int32_t index) { return _output_tensors[index] ? *_output_tensors[index] : ma_tensor_t{0}; } -ma_shape_t EngineHalio::getInputShape(int32_t index) { +ma_shape_t EngineHailo::getInputShape(int32_t index) { if (index < 0 || index >= static_cast(_input_tensors.size())) { return {0}; } @@ -370,7 +395,7 @@ ma_shape_t EngineHalio::getInputShape(int32_t index) { return _input_tensors[index] ? _input_tensors[index]->shape : ma_shape_t{0}; } -ma_shape_t EngineHalio::getOutputShape(int32_t index) { +ma_shape_t EngineHailo::getOutputShape(int32_t index) { if (index < 0 || index >= static_cast(_output_tensors.size())) { return {0}; } @@ -378,7 +403,7 @@ ma_shape_t EngineHalio::getOutputShape(int32_t index) { return _output_tensors[index] ? _output_tensors[index]->shape : ma_shape_t{0}; } -ma_quant_param_t EngineHalio::getInputQuantParam(int32_t index) { +ma_quant_param_t EngineHailo::getInputQuantParam(int32_t index) { if (index < 0 || index >= static_cast(_input_tensors.size())) { return {0}; } @@ -386,7 +411,7 @@ ma_quant_param_t EngineHalio::getInputQuantParam(int32_t index) { return _input_tensors[index] ? _input_tensors[index]->quant_param : ma_quant_param_t{0}; } -ma_quant_param_t EngineHalio::getOutputQuantParam(int32_t index) { +ma_quant_param_t EngineHailo::getOutputQuantParam(int32_t index) { if (index < 0 || index >= static_cast(_output_tensors.size())) { return {0}; } @@ -395,7 +420,17 @@ ma_quant_param_t EngineHalio::getOutputQuantParam(int32_t index) { } -ma_err_t EngineHalio::setInput(int32_t index, const ma_tensor_t& tensor) { +ma_err_t EngineHailo::setInput(int32_t index, const ma_tensor_t& tensor) { + if (index < 0 || index >= static_cast(_input_tensors.size())) { + return MA_EINVAL; + } + + if (tensor.size != _input_tensors[index]->size) { + return MA_EINVAL; + } + + std::memcpy(_input_tensors[index]->data.data, tensor.data.data, tensor.size); + return MA_ENOTSUP; } diff --git a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h b/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.h similarity index 87% rename from src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h rename to src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.h index 2175767..76265c7 100644 --- a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h +++ b/src/components/sscma-micro/sscma/core/engine/ma_engine_hailo.h @@ -1,11 +1,11 @@ -#ifndef _MA_ENGINE_HALIO_H_ -#define _MA_ENGINE_HALIO_H_ +#ifndef _MA_ENGINE_HAILO_H_ +#define _MA_ENGINE_HAILO_H_ #include "../ma_common.h" -#if MA_USE_ENGINE_HALIO +#if MA_USE_ENGINE_HAILO -#include "ma_engine.h" +#include "ma_engine_base.h" #include #include @@ -21,12 +21,12 @@ namespace ma::engine { using namespace std; using namespace hailort; -class EngineHalio final : public Engine { +class EngineHailo final : public Engine { public: using ExternalHandler = function; - EngineHalio(); - ~EngineHalio() override; + EngineHailo(); + ~EngineHailo() override; ma_err_t init() override; ma_err_t init(size_t size) override; @@ -52,7 +52,7 @@ class EngineHalio final : public Engine { ma_err_t setInput(int32_t index, const ma_tensor_t& tensor) override; private: - unique_ptr _vdevice; + shared_ptr _vdevice; shared_ptr _model; shared_ptr _configured_model; shared_ptr _bindings; diff --git a/src/components/sscma-micro/sscma/core/ma_common.h b/src/components/sscma-micro/sscma/core/ma_common.h index 3d185d5..ebe01c7 100644 --- a/src/components/sscma-micro/sscma/core/ma_common.h +++ b/src/components/sscma-micro/sscma/core/ma_common.h @@ -15,7 +15,7 @@ #include "ma_exception.h" #include "ma_types.h" -#define MA_VERSION "2024.11.13" +#define MA_VERSION "2024.11.25" #define MA_VERSION_LENTH_MAX 32 #endif // MA_COMMON_H diff --git a/src/components/sscma-micro/sscma/core/ma_types.h b/src/components/sscma-micro/sscma/core/ma_types.h index 984c65e..4b5d7f3 100644 --- a/src/components/sscma-micro/sscma/core/ma_types.h +++ b/src/components/sscma-micro/sscma/core/ma_types.h @@ -267,7 +267,7 @@ typedef enum { MA_OUTPUT_TYPE_POINT = 0x0200, MA_OUTPUT_TYPE_BBOX = 0x0300, MA_OUTPUT_TYPE_KEYPOINT = 0x0400, - MA_OUTPUT_TYPE_SEGMENTATION = 0x0500, + MA_OUTPUT_TYPE_SEGMENT = 0x0500, } ma_output_type_t; diff --git a/src/components/sscma-micro/sscma/core/math/ma_math.h b/src/components/sscma-micro/sscma/core/math/ma_math.h index 2822488..1b1e2dd 100644 --- a/src/components/sscma-micro/sscma/core/math/ma_math.h +++ b/src/components/sscma-micro/sscma/core/math/ma_math.h @@ -3,5 +3,6 @@ #include "ma_math_scalars.h" #include "ma_math_vectors.h" +#include "ma_math_matrix.h" #endif // _MA_MATH_H diff --git a/src/components/sscma-micro/sscma/core/math/ma_math_matrix.cpp b/src/components/sscma-micro/sscma/core/math/ma_math_matrix.cpp new file mode 100644 index 0000000..46187be --- /dev/null +++ b/src/components/sscma-micro/sscma/core/math/ma_math_matrix.cpp @@ -0,0 +1,22 @@ +#include "ma_math_matrix.h" +#include "ma_math_vectors.h" + +#include + +namespace ma::math { + +void softmax2D(float* data, size_t rows, size_t cols) { + size_t size = rows * cols; + for (size_t i = 0; i < size; i += cols) { + softmax(&data[i], cols); + } +} + +void fastSoftmax2D(float* data, size_t rows, size_t cols) { + size_t size = rows * cols; + for (size_t i = 0; i < size; i += cols) { + fastSoftmax(&data[i], cols); + } +} + +} \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/math/ma_math_matrix.h b/src/components/sscma-micro/sscma/core/math/ma_math_matrix.h new file mode 100644 index 0000000..c2ffb18 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/math/ma_math_matrix.h @@ -0,0 +1,32 @@ +#ifndef _MA_MATH_MARTRIX_H_ +#define _MA_MATH_MARTRIX_H_ + +#include +#include + +#if MA_USE_LIB_XTENSOR +#include +#include +#include +#endif + +namespace ma::math { + +void softmax2D(float* data, size_t rows, size_t cols); + +void fastSoftmax2D(float* data, size_t rows, size_t cols); + +#if MA_USE_LIB_XTENSOR +template +static void dequantizeValues2D(xt::xarray& dequantized_outputs, int index, const xt::xarray& quantized_outputs, size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp) { + for (size_t i = 0; i < dim1; i++) { + for (size_t j = 0; j < dim2; j++) { + dequantized_outputs(i, j) = (float(quantized_outputs(index, i, j)) - qp_zp) * qp_scale; + } + } +} +#endif + +} // namespace ma::math + +#endif \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/math/ma_math_scalars.h b/src/components/sscma-micro/sscma/core/math/ma_math_scalars.h index 1f8b494..cad47bf 100644 --- a/src/components/sscma-micro/sscma/core/math/ma_math_scalars.h +++ b/src/components/sscma-micro/sscma/core/math/ma_math_scalars.h @@ -21,8 +21,8 @@ constexpr inline float fastLn(float x) { return -std::numeric_limits::infinity(); } - auto bx{*reinterpret_cast(&x)}; - auto ex{bx >> 23}; + auto bx{*reinterpret_cast(&x)}; + auto ex{bx >> 23}; const auto t{static_cast(ex) - static_cast(127)}; bx = 1065353216 | (bx & 8388607); @@ -41,7 +41,8 @@ constexpr inline float fastExp(float x) { const float c{8388608.f}; const float d{2139095040.f}; - if ((x < c) | (x > d)) x = (x < c) ? 0.0f : d; + if ((x < c) | (x > d)) + x = (x < c) ? 0.0f : d; uint32_t n = static_cast(x); x = *reinterpret_cast(&n); @@ -49,9 +50,13 @@ constexpr inline float fastExp(float x) { return x; } -constexpr inline float sigmoid(float x) { return 1.0f / (1.0f + std::exp(-x)); } +constexpr inline float sigmoid(float x) { + return 1.0f / (1.0f + std::exp(-x)); +} -constexpr inline float fastSigmoid(float x) { return 1.0f / (1.0f + fastExp(-x)); } +constexpr inline float fastSigmoid(float x) { + return 1.0f / (1.0f + fastExp(-x)); +} constexpr inline float inverseSigmoid(float x) { float denominator = 1.0f - x; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp index 9b51db1..d9f07c5 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp @@ -134,8 +134,9 @@ const void* Classifier::getInput() { } ma_err_t Classifier::run(const ma_img_t* img) { - // MA_ASSERT(img != nullptr); + input_img_ = img; + return underlyingRun(); } diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp index 52a0b53..90a44bf 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp @@ -63,8 +63,9 @@ const void* Detector::getInput() { } ma_err_t Detector::run(const ma_img_t* img) { - // MA_ASSERT(img != nullptr); + input_img_ = img; + return underlyingRun(); } diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp index 551d9f3..573899d 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp @@ -11,58 +11,63 @@ Model* ModelFactory::create(Engine* engine, size_t algorithm_id) { } switch (algorithm_id) { - case 0: - case MA_MODEL_TYPE_FOMO: - if (FOMO::isValid(engine)) { - return new FOMO(engine); - } - - case MA_MODEL_TYPE_IMCLS: - if (Classifier::isValid(engine)) { - return new Classifier(engine); - } + case 0: + case MA_MODEL_TYPE_FOMO: + if (FOMO::isValid(engine)) { + return new FOMO(engine); + } - case MA_MODEL_TYPE_PFLD: - if (PFLD::isValid(engine)) { - return new PFLD(engine); - } + case MA_MODEL_TYPE_IMCLS: + if (Classifier::isValid(engine)) { + return new Classifier(engine); + } - case MA_MODEL_TYPE_YOLOV5: - if (YoloV5::isValid(engine)) { - return new YoloV5(engine); - } + case MA_MODEL_TYPE_PFLD: + if (PFLD::isValid(engine)) { + return new PFLD(engine); + } - case MA_MODEL_TYPE_YOLOV8_POSE: - if (YoloV8Pose::isValid(engine)) { - return new YoloV8Pose(engine); - } + case MA_MODEL_TYPE_YOLOV5: + if (YoloV5::isValid(engine)) { + return new YoloV5(engine); + } - case MA_MODEL_TYPE_YOLOV8: - if (YoloV8::isValid(engine)) { - return new YoloV8(engine); - } + case MA_MODEL_TYPE_YOLOV8_POSE: +#if MA_USE_ENGINE_HAILO + if (YoloV8PoseHailo::isValid(engine)) { + return new YoloV8PoseHailo(engine); + } +#endif + if (YoloV8Pose::isValid(engine)) { + return new YoloV8Pose(engine); + } - case MA_MODEL_TYPE_NVIDIA_DET: - if (NvidiaDet::isValid(engine)) { - return new NvidiaDet(engine); - } + case MA_MODEL_TYPE_YOLOV8: + if (YoloV8::isValid(engine)) { + return new YoloV8(engine); + } - case MA_MODEL_TYPE_YOLO_WORLD: - if (YoloWorld::isValid(engine)) { - return new YoloWorld(engine); - } - case MA_MODEL_TYPE_YOLO11: - if (Yolo11::isValid(engine)) { - return new Yolo11(engine); - } - case MA_MODEL_TYPE_YOLO11_POSE: - if (Yolo11Pose::isValid(engine)) { - return new Yolo11Pose(engine); - } - case MA_MODEL_TYPE_YOLO11_SEG: - if (Yolo11Seg::isValid(engine)) { - return new Yolo11Seg(engine); - } + case MA_MODEL_TYPE_NVIDIA_DET: + if (NvidiaDet::isValid(engine)) { + return new NvidiaDet(engine); + } + + case MA_MODEL_TYPE_YOLO_WORLD: + if (YoloWorld::isValid(engine)) { + return new YoloWorld(engine); + } + case MA_MODEL_TYPE_YOLO11: + if (Yolo11::isValid(engine)) { + return new Yolo11(engine); + } + case MA_MODEL_TYPE_YOLO11_POSE: + if (Yolo11Pose::isValid(engine)) { + return new Yolo11Pose(engine); + } + case MA_MODEL_TYPE_YOLO11_SEG: + if (Yolo11Seg::isValid(engine)) { + return new Yolo11Seg(engine); + } } return nullptr; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_factory.h b/src/components/sscma-micro/sscma/core/model/ma_model_factory.h index c462610..f86bfda 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_factory.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_factory.h @@ -20,6 +20,7 @@ #include "ma_model_yolov5.h" #include "ma_model_yolov8.h" #include "ma_model_yolov8_pose.h" +#include "ma_model_yolov8_pose_hailo.h" namespace ma { diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_pfld.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_pfld.cpp index e52c189..190bb80 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_pfld.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_pfld.cpp @@ -1,7 +1,7 @@ #include "ma_model_pfld.h" #include -#include +#include namespace ma::model { @@ -96,11 +96,9 @@ ma_err_t PFLD::postProcessI8() { point.score = 1.0; point.target = i / 2; - results_.push_back(std::move(point)); + results_.push_front(std::move(point)); } - results_.shrink_to_fit(); - return MA_OK; } diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_pfld.h b/src/components/sscma-micro/sscma/core/model/ma_model_pfld.h index c8b173a..a9da5e6 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_pfld.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_pfld.h @@ -1,8 +1,6 @@ #ifndef _MA_MODEL_PFLD_ #define _MA_MODEL_PFLD_ -#include - #include "ma_model_point_detector.h" namespace ma::model { diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp index f4c78c4..b0c326f 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp @@ -30,13 +30,18 @@ PointDetector::PointDetector(Engine* p_engine, const char* name, ma_model_type_t PointDetector::~PointDetector() {} -const std::vector& PointDetector::getResults() const { +const std::forward_list& PointDetector::getResults() const { return results_; } ma_err_t PointDetector::preprocess() { ma_err_t ret = MA_OK; + + if (input_img_ == nullptr) { + return MA_OK; + } + ret = ma::cv::convert(input_img_, &img_); if (ret != MA_OK) { return ret; @@ -51,7 +56,6 @@ ma_err_t PointDetector::preprocess() { } ma_err_t PointDetector::run(const ma_img_t* img) { - MA_ASSERT(img != nullptr); input_img_ = img; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h index 80761b6..e98d65f 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h @@ -1,7 +1,7 @@ #ifndef _MA_MODEL_POINT_DETECTOR_H_ #define _MA_MODEL_POINT_DETECTOR_H_ -#include +#include #include "ma_model_base.h" @@ -17,7 +17,7 @@ class PointDetector : public Model { bool is_nhwc_; - std::vector results_; + std::forward_list results_; protected: ma_err_t preprocess() override; @@ -26,7 +26,7 @@ class PointDetector : public Model { PointDetector(Engine* engine, const char* name, ma_model_type_t type); virtual ~PointDetector(); - const std::vector& getResults() const; + const std::forward_list& getResults() const; ma_err_t run(const ma_img_t* img); diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp index f0efde5..1d84ac1 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp @@ -42,6 +42,10 @@ const void* PoseDetector::getInput() { ma_err_t PoseDetector::preprocess() { ma_err_t ret = MA_OK; + if (input_img_ == nullptr) { + return MA_OK; + } + ret = ma::cv::convert(input_img_, &img_); if (ret != MA_OK) { return ret; @@ -56,7 +60,6 @@ ma_err_t PoseDetector::preprocess() { } ma_err_t PoseDetector::run(const ma_img_t* img) { - MA_ASSERT(img != nullptr); input_img_ = img; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_segmentor.cpp similarity index 81% rename from src/components/sscma-micro/sscma/core/model/ma_model_segmenter.cpp rename to src/components/sscma-micro/sscma/core/model/ma_model_segmentor.cpp index e6c54ef..a3b1ad6 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_segmentor.cpp @@ -1,12 +1,12 @@ -#include "ma_model_segmenter.h" +#include "ma_model_segmentor.h" #include "../cv/ma_cv.h" namespace ma::model { -constexpr char TAG[] = "ma::model::segmenter"; +constexpr char TAG[] = "ma::model::Segmentor"; -Segmenter::Segmenter(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_SEGMENTATION | type) { +Segmentor::Segmentor(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_SEGMENT | type) { input_ = p_engine_->getInput(0); threshold_nms_ = 0.45; threshold_score_ = 0.25; @@ -29,10 +29,14 @@ Segmenter::Segmenter(Engine* p_engine, const char* name, ma_model_type_t type) : img_.data = input_.data.u8; } -Segmenter::~Segmenter() {} -ma_err_t Segmenter::preprocess() { +Segmentor::~Segmentor() {} +ma_err_t Segmentor::preprocess() { ma_err_t ret = MA_OK; + if (input_img_ == nullptr) { + return MA_OK; + } + ret = ma::cv::convert(input_img_, &img_); if (ret != MA_OK) { return ret; @@ -46,23 +50,22 @@ ma_err_t Segmenter::preprocess() { return ret; } -const void* Segmenter::getInput() { +const void* Segmentor::getInput() { return static_cast(&img_); } -const std::forward_list& Segmenter::getResults() const { +const std::forward_list& Segmentor::getResults() const { return results_; } -ma_err_t Segmenter::run(const ma_img_t* img) { - MA_ASSERT(img != nullptr); +ma_err_t Segmentor::run(const ma_img_t* img) { input_img_ = img; return underlyingRun(); } -ma_err_t Segmenter::setConfig(ma_model_cfg_opt_t opt, ...) { +ma_err_t Segmentor::setConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t ret = MA_OK; va_list args; va_start(args, opt); @@ -83,7 +86,7 @@ ma_err_t Segmenter::setConfig(ma_model_cfg_opt_t opt, ...) { return ret; } -ma_err_t Segmenter::getConfig(ma_model_cfg_opt_t opt, ...) { +ma_err_t Segmentor::getConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t ret = MA_OK; va_list args; void* p_arg = nullptr; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.h b/src/components/sscma-micro/sscma/core/model/ma_model_segmentor.h similarity index 74% rename from src/components/sscma-micro/sscma/core/model/ma_model_segmenter.h rename to src/components/sscma-micro/sscma/core/model/ma_model_segmentor.h index 258b340..05d2023 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_segmentor.h @@ -1,5 +1,5 @@ -#ifndef _MA_MODEL_SEGMENTER_H_ -#define _MA_MODEL_SEGMENTER_H_ +#ifndef _MA_MODEL_SEGMENTOR_H_ +#define _MA_MODEL_SEGMENTOR_H_ #include @@ -7,7 +7,7 @@ namespace ma::model { -class Segmenter : public Model { +class Segmentor : public Model { protected: ma_tensor_t input_; ma_img_t img_; @@ -24,8 +24,8 @@ class Segmenter : public Model { ma_err_t preprocess() override; public: - Segmenter(Engine* engine, const char* name, ma_model_type_t type); - virtual ~Segmenter(); + Segmentor(Engine* engine, const char* name, ma_model_type_t type); + virtual ~Segmentor(); const std::forward_list& getResults() const; @@ -40,4 +40,4 @@ class Segmenter : public Model { } // namespace ma::model -#endif // _MA_MODEL_SEGMENTER_H_ +#endif // _MA_MODEL_SEGMENTOR_H_ diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp index a9ccfe8..5df2ea4 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp @@ -15,7 +15,7 @@ constexpr char TAG[] = "ma::model::yolo11_seg"; namespace ma::model { -Yolo11Seg::Yolo11Seg(Engine* p_engine_) : Segmenter(p_engine_, "yolo11_seg", MA_MODEL_TYPE_YOLO11_SEG) { +Yolo11Seg::Yolo11Seg(Engine* p_engine_) : Segmentor(p_engine_, "yolo11_seg", MA_MODEL_TYPE_YOLO11_SEG) { MA_ASSERT(p_engine_ != nullptr); bboxes_ = p_engine_->getOutput(0); diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h index f7025e3..b0e7c6a 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h @@ -6,11 +6,11 @@ #include #include -#include "ma_model_segmenter.h" +#include "ma_model_segmentor.h" namespace ma::model { -class Yolo11Seg : public Segmenter { +class Yolo11Seg : public Segmentor { private: ma_tensor_t bboxes_; ma_tensor_t protos_; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp index 07619f2..9aee8ab 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include "../utils/ma_nms.h" @@ -24,7 +24,7 @@ YoloV5::YoloV5(Engine* p_engine_) : Detector(p_engine_, "yolov5", MA_MODEL_TYPE_ YoloV5::~YoloV5() {} static bool generalValid(Engine* engine) { - const auto inputs_count = engine->getInputSize(); + const auto inputs_count = engine->getInputSize(); const auto outputs_count = engine->getOutputSize(); if (inputs_count != 1 || outputs_count != 1) { @@ -37,8 +37,7 @@ static bool generalValid(Engine* engine) { if (input_shape.size != 4) return false; - int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], - c = input_shape.dims[3]; + int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; bool is_nhwc = c == 3 || c == 1; if (!is_nhwc) @@ -55,19 +54,18 @@ static bool generalValid(Engine* engine) { if (output_shape.size != 3 && output_shape.size != 4) return false; - if (output_shape.dims[0] != 1 || output_shape.dims[1] != ibox_len || output_shape.dims[2] < 6 || - output_shape.dims[2] > 85) + if (output_shape.dims[0] != 1 || output_shape.dims[1] != ibox_len || output_shape.dims[2] < 6 || output_shape.dims[2] > 85) return false; return true; } static bool nmsValid(Engine* engine) { -#if MA_USE_ENGINE_HALIO +#if MA_USE_ENGINE_HAILO if (engine->getInputSize() != 1 || engine->getOutputSize() != 1) return false; - auto input = engine->getInput(0); + auto input = engine->getInput(0); auto output = engine->getOutput(0); if (input.shape.size != 4 || output.shape.size != 4) @@ -86,7 +84,7 @@ static bool nmsValid(Engine* engine) { auto mb = output.shape.dims[2]; auto f = output.shape.dims[3]; - if (b != 1 || cs <= 0 || mb <= 1 || f != 0) + if (b != 1 || cs <= 0 || mb <= 1 || f != 0) return false; return true; @@ -146,12 +144,7 @@ ma_err_t YoloV5::generalPostProcess() { h /= img_.height; } - ma_bbox_t box{.x = MA_CLIP(x, 0, 1.0f), - .y = MA_CLIP(y, 0, 1.0f), - .w = MA_CLIP(w, 0, 1.0f), - .h = MA_CLIP(h, 0, 1.0f), - .score = score, - .target = target}; + ma_bbox_t box{.x = MA_CLIP(x, 0, 1.0f), .y = MA_CLIP(y, 0, 1.0f), .w = MA_CLIP(w, 0, 1.0f), .h = MA_CLIP(h, 0, 1.0f), .score = score, .target = target}; results_.emplace_front(box); } @@ -187,12 +180,7 @@ ma_err_t YoloV5::generalPostProcess() { h /= img_.height; } - ma_bbox_t box{.x = MA_CLIP(x, 0, 1.0f), - .y = MA_CLIP(y, 0, 1.0f), - .w = MA_CLIP(w, 0, 1.0f), - .h = MA_CLIP(h, 0, 1.0f), - .score = score, - .target = target}; + ma_bbox_t box{.x = MA_CLIP(x, 0, 1.0f), .y = MA_CLIP(y, 0, 1.0f), .w = MA_CLIP(w, 0, 1.0f), .h = MA_CLIP(h, 0, 1.0f), .score = score, .target = target}; results_.emplace_front(box); } @@ -208,7 +196,7 @@ ma_err_t YoloV5::generalPostProcess() { } ma_err_t YoloV5::nmsPostProcess() { -#if MA_USE_ENGINE_HALIO +#if MA_USE_ENGINE_HAILO auto& output = output_; @@ -222,7 +210,7 @@ ma_err_t YoloV5::nmsPostProcess() { hailo_nms_shape_t nms_shape; if (output.external_handler) { - auto rc = (*reinterpret_cast(output.external_handler))(4, &nms_shape, sizeof(hailo_nms_shape_t)); + auto rc = (*reinterpret_cast(output.external_handler))(4, &nms_shape, sizeof(hailo_nms_shape_t)); if (rc == MA_OK) { w = nms_shape.number_of_classes; h = nms_shape.max_bboxes_per_class; @@ -254,7 +242,7 @@ ma_err_t YoloV5::nmsPostProcess() { ptr += sizeof(P); ma_bbox_t res; - + auto x_min = static_cast(bbox.x_min - zp) * scale; auto y_min = static_cast(bbox.y_min - zp) * scale; auto x_max = static_cast(bbox.x_max - zp) * scale; @@ -264,7 +252,7 @@ ma_err_t YoloV5::nmsPostProcess() { res.x = x_min + res.w * 0.5; res.y = y_min + res.h * 0.5; res.score = static_cast(bbox.score - zp) * scale; - + res.target = static_cast(i); res.x = MA_CLIP(res.x, 0, 1.0f); @@ -276,7 +264,7 @@ ma_err_t YoloV5::nmsPostProcess() { } } } break; - + case MA_TENSOR_TYPE_NMS_BBOX_F32: { using T = float32_t; using P = hailo_bbox_float32_t; @@ -297,13 +285,13 @@ ma_err_t YoloV5::nmsPostProcess() { ptr += sizeof(P); ma_bbox_t res; - + res.w = bbox.x_max - bbox.x_min; res.h = bbox.y_max - bbox.y_min; res.x = bbox.x_min + res.w * 0.5; res.y = bbox.y_min + res.h * 0.5; res.score = bbox.score; - + res.target = static_cast(i); res.x = MA_CLIP(res.x, 0, 1.0f); @@ -315,11 +303,15 @@ ma_err_t YoloV5::nmsPostProcess() { } } } break; - + default: return MA_ENOTSUP; } + ma::utils::nms(results_, threshold_nms_, threshold_score_, false, false); + + results_.sort([](const ma_bbox_t& a, const ma_bbox_t& b) { return a.x < b.x; }); + return MA_OK; #else return MA_FAILED; @@ -332,17 +324,17 @@ ma_err_t YoloV5::postprocess() { switch (output_.type) { case MA_TENSOR_TYPE_NMS_BBOX_U16: case MA_TENSOR_TYPE_NMS_BBOX_F32: { -#if MA_USE_ENGINE_HALIO +#if MA_USE_ENGINE_HAILO // TODO: can be optimized by whihout calling this handler for each frame - if (output.external_handler) { - auto ph = reinterpret_cast(output.external_handler); + if (output_.external_handler) { + auto ph = reinterpret_cast(output_.external_handler); float thr = threshold_score_; auto rc = (*ph)(1, &thr, sizeof(float)); if (rc == MA_OK) { threshold_score_ = thr; } thr = threshold_nms_; - rc = (*ph)(3, &thr, sizeof(float)); + rc = (*ph)(3, &thr, sizeof(float)); if (rc == MA_OK) { threshold_nms_ = thr; } @@ -355,6 +347,7 @@ ma_err_t YoloV5::postprocess() { return generalPostProcess(); } + return MA_ENOTSUP; } } // namespace ma::model diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp index 5fbbc38..781b836 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp @@ -24,7 +24,7 @@ static inline decltype(auto) estimateTensorHW(const ma_shape_t& shape) { return is_nhwc ? std::make_pair(shape.dims[1], shape.dims[2]) : std::make_pair(shape.dims[2], shape.dims[3]); } -YoloV8Pose::YoloV8Pose(Engine* p_engine_) : PoseDetector(p_engine_, "yolo_world", MA_MODEL_TYPE_YOLO_WORLD) { +YoloV8Pose::YoloV8Pose(Engine* p_engine_) : PoseDetector(p_engine_, "yolov8_pose", MA_MODEL_TYPE_YOLOV8_POSE) { MA_ASSERT(p_engine_ != nullptr); for (size_t i = 0; i < num_outputs_; ++i) { @@ -153,7 +153,7 @@ bool YoloV8Pose::isValid(Engine* engine) { } const char* YoloV8Pose::getTag() { - return "ma::model::yolo_world"; + return "ma::model::yolov8_pose"; } ma_err_t YoloV8Pose::postprocess() { diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose_hailo.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose_hailo.cpp new file mode 100644 index 0000000..b637dc8 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose_hailo.cpp @@ -0,0 +1,373 @@ +#include "ma_model_yolov8_pose_hailo.h" + +#if MA_USE_ENGINE_HAILO + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../math/ma_math.h" +#include "../utils/ma_anchors.h" +#include "../utils/ma_nms.h" + +namespace ma::model { + +static inline decltype(auto) estimateTensorHW(const ma_shape_t& shape) { + if (shape.size != 4) { + int32_t ph = 0; + return std::make_pair(ph, ph); + } + const auto is_nhwc{shape.dims[3] == 3 || shape.dims[3] == 1}; + + return is_nhwc ? std::make_pair(shape.dims[1], shape.dims[2]) : std::make_pair(shape.dims[2], shape.dims[3]); +} + +std::vector YoloV8PoseHailo::strides_ = {8, 16, 32}; + +/** + * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the LGPL license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) + **/ +static decltype(auto) getBoxesScoresKeypoints(std::vector& tensors, int num_classes) { + std::vector outputs_boxes(tensors.size() / 3); + std::vector outputs_keypoints(tensors.size() / 3); + + int total_scores = 0; + for (uint i = 0; i < tensors.size(); i = i + 3) { + auto w = tensors[i + 1].shape.dims[1]; // w + auto h = tensors[i + 1].shape.dims[2]; // h + total_scores += w * h; + } + + std::vector scores_shape = {(long unsigned int)total_scores, (long unsigned int)num_classes}; + + xt::xarray scores(scores_shape); + + int view_index_scores = 0; + + for (uint i = 0; i < tensors.size(); i = i + 3) { + outputs_boxes[i / 3] = tensors[i]; + + auto& tensor = tensors[i + 1]; + std::vector shape = {(size_t)tensor.shape.dims[1], (size_t)tensor.shape.dims[2], (size_t)tensor.shape.dims[3]}; + xt::xarray xtensor = xt::adapt(tensor.data.u8, tensor.size, xt::no_ownership(), shape); + auto dequantized_output_s = (xtensor - tensor.quant_param.zero_point) * tensor.quant_param.scale; + + int num_proposals_scores = dequantized_output_s.shape(0) * dequantized_output_s.shape(1); + + auto output_scores = xt::view(dequantized_output_s, xt::all(), xt::all(), xt::all()); + xt::view(scores, xt::range(view_index_scores, view_index_scores + num_proposals_scores), xt::all()) = xt::reshape_view(output_scores, {num_proposals_scores, num_classes}); + view_index_scores += num_proposals_scores; + + outputs_keypoints[i / 3] = tensors[i + 2]; + } + + return _internal::Triple{outputs_boxes, scores, outputs_keypoints}; +} + + +YoloV8PoseHailo::YoloV8PoseHailo(Engine* p_engine_) : PoseDetector(p_engine_, "yolov8_pose", MA_MODEL_TYPE_YOLOV8_POSE) { + MA_ASSERT(p_engine_ != nullptr); + + threshold_score_ = 0.6; + threshold_nms_ = 0.7; + + outputs_.resize(9); + for (size_t i = 0; i < outputs_.size(); ++i) { + outputs_[i] = p_engine_->getOutput(i); + } + + std::sort(outputs_.begin(), outputs_.end(), [](const ma_tensor_t& a, const ma_tensor_t& b) { return a.shape.dims[1] > b.shape.dims[1]; }); + + auto update_route_f = [&route = route_](ma_tensor_type_t t, int i) { + switch (t) { + case MA_TENSOR_TYPE_U8: + route |= 1 << i; + break; + case MA_TENSOR_TYPE_U16: + route |= 1 << (i + 9); + break; + default: + break; + } + }; + + std::vector idx(outputs_.size()); + for (size_t i = 0; i < outputs_.size(); i += 3) { + for (size_t j = 0; j < 3; ++j) { + auto at = i + j; + switch (outputs_[at].shape.dims[3]) { + case 1: + idx[i + 1] = at; + break; + case 64: + idx[i] = at; + break; + default: + idx[i + 2] = at; + } + } + } + std::vector reordered_outputs(outputs_.size()); + for (size_t i = 0; i < outputs_.size(); ++i) { + reordered_outputs[i] = outputs_[idx[i]]; + update_route_f(reordered_outputs[i].type, i); + } + outputs_ = std::move(reordered_outputs); + + const auto [h, w] = estimateTensorHW(p_engine_->getInputShape(0)); + + centers_ = ma::utils::generateAnchorMatrix(strides_, {static_cast(w), static_cast(h)}, 3, 0, 0); + network_dims_ = {w, h}; +} + +YoloV8PoseHailo::~YoloV8PoseHailo() {} + +bool YoloV8PoseHailo::isValid(Engine* engine) { + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); + + if (inputs_count != 1 || outputs_count != 9) { + return false; + } + + const auto input_shape{engine->getInputShape(0)}; + + if (input_shape.size != 4) { + return false; + } + + const auto is_nhwc{input_shape.dims[3] == 3 || input_shape.dims[3] == 1}; + + size_t n = 0, h = 0, w = 0, c = 0; + + if (is_nhwc) { + n = input_shape.dims[0]; + h = input_shape.dims[1]; + w = input_shape.dims[2]; + c = input_shape.dims[3]; + } else { + n = input_shape.dims[0]; + c = input_shape.dims[1]; + h = input_shape.dims[2]; + w = input_shape.dims[3]; + } + + if (n != 1 || h ^ w || h < 32 || h % 32 || (c != 3 && c != 1)) { + return false; + } + + const auto output_nums = engine->getOutputSize(); + if (output_nums != 9) { + return false; + } + + std::vector outputs(output_nums); + for (size_t i = 0; i < output_nums; ++i) { + outputs[i] = engine->getOutput(i); + } + + std::vector> dims{std::vector{int(w / strides_[0]), int(h / strides_[0]), 0}, + std::vector{int(w / strides_[1]), int(h / strides_[1]), 0}, + std::vector{int(w / strides_[2]), int(h / strides_[2]), 0}}; + + for (auto& out : outputs) { + if (out.shape.size != 4 || out.shape.dims[0] != 1) { + return false; + } + auto it = std::find_if(dims.begin(), dims.end(), [&out](const std::vector& dim) { return dim[0] == out.shape.dims[1] && dim[1] == out.shape.dims[2]; }); + if (it == dims.end()) { + return false; + } + switch (out.shape.dims[3]) { + case 1: + if (out.type != MA_TENSOR_TYPE_U8) { + return false; + } + (*it)[2] += 1; + break; + case 64: + if (out.type != MA_TENSOR_TYPE_U8) { + return false; + } + (*it)[2] += 1; + break; + default: + if (out.shape.dims[3] % 3 != 0) { + return false; + } + if (out.type != MA_TENSOR_TYPE_U8 && out.type != MA_TENSOR_TYPE_U16) { + return false; + } + (*it)[2] += 1; + } + } + + for (const auto& dim : dims) { + if (dim[2] != 3) { + return false; + } + } + + return true; +} + +const char* YoloV8PoseHailo::getTag() { + return "ma::model::yolov8_pose"; +} + +template +static decltype(auto) decodeBoxesAndKeypoints(const std::vector& raw_boxes_outputs, + xt::xarray& scores, + const std::vector& raw_keypoints, + const std::vector& network_dims, + const std::vector& strides, + const std::vector>& centers, + int regression_length, + float score_threshold) { + + int class_index = 0; + std::forward_list decodings; + + int instance_index = 0; + float confidence = 0.0; + std::string label; + + // Box distribution to distance + auto regression_distance = xt::reshape_view(xt::arange(0, regression_length + 1), {1, 1, regression_length + 1}); + + for (uint i = 0; i < raw_boxes_outputs.size(); ++i) { + // Boxes setup + float32_t qp_scale = raw_boxes_outputs[i].quant_param.scale; + float32_t qp_zp = raw_boxes_outputs[i].quant_param.zero_point; + + std::vector output_b_shape = {(size_t)raw_boxes_outputs[i].shape.dims[1], (size_t)raw_boxes_outputs[i].shape.dims[2], (size_t)raw_boxes_outputs[i].shape.dims[3]}; + auto output_b = xt::adapt(raw_boxes_outputs[i].data.u8, raw_boxes_outputs[i].size, xt::no_ownership(), output_b_shape); + + int num_proposals = output_b.shape(0) * output_b.shape(1); + auto output_boxes = xt::view(output_b, xt::all(), xt::all(), xt::all()); + auto quantized_boxes = xt::reshape_view(output_boxes, {num_proposals, 4, regression_length + 1}); + + auto shape = {quantized_boxes.shape(1), quantized_boxes.shape(2)}; + + // Keypoints setup + float32_t qp_scale_kpts = raw_keypoints[i].quant_param.scale; + float32_t qp_zp_kpts = raw_keypoints[i].quant_param.zero_point; + + std::vector output_keypoints_shape = {(size_t)raw_keypoints[i].shape.dims[1], (size_t)raw_keypoints[i].shape.dims[2], (size_t)raw_keypoints[i].shape.dims[3]}; + + size_t output_keypoints_size = output_keypoints_shape[0] * output_keypoints_shape[1] * output_keypoints_shape[2]; + auto output_keypoints = xt::adapt(static_cast(raw_keypoints[i].data.data), output_keypoints_size, xt::no_ownership(), output_keypoints_shape); + + int num_proposals_keypoints = output_keypoints.shape(0) * output_keypoints.shape(1); + auto output_keypoints_quantized = xt::view(output_keypoints, xt::all(), xt::all(), xt::all()); + auto quantized_keypoints = xt::reshape_view(output_keypoints_quantized, {num_proposals_keypoints, int(output_keypoints_shape[2] / 3), 3}); + + auto keypoints_shape = {quantized_keypoints.shape(1), quantized_keypoints.shape(2)}; + + // Bbox decoding + for (uint j = 0; j < (uint)num_proposals; ++j) { + confidence = xt::row(scores, instance_index)(0); + instance_index++; + if (confidence < score_threshold) + continue; + + xt::xarray box(shape); + xt::xarray kpts_corrdinates_and_scores(keypoints_shape); + + ma::math::dequantizeValues2D(box, j, quantized_boxes, box.shape(0), box.shape(1), qp_scale, qp_zp); + ma::math::softmax2D(box.data(), box.shape(0), box.shape(1)); + + auto box_distance = box * regression_distance; + xt::xarray reduced_distances = xt::sum(box_distance, {2}); + auto strided_distances = reduced_distances * strides[i]; + + using namespace xt::placeholders; + auto distance_view1 = xt::view(strided_distances, xt::all(), xt::range(_, 2)) * -1; + auto distance_view2 = xt::view(strided_distances, xt::all(), xt::range(2, _)); + auto distance_view = xt::concatenate(xt::xtuple(distance_view1, distance_view2), 1); + auto decoded_box = centers[i] + distance_view; + + ma_keypoint3f_t kp; + auto x_min = decoded_box(j, 0) / network_dims[0]; + auto y_min = decoded_box(j, 1) / network_dims[1]; + auto w = (decoded_box(j, 2) - decoded_box(j, 0)) / network_dims[0]; + auto h = (decoded_box(j, 3) - decoded_box(j, 1)) / network_dims[1]; + + kp.box.x = x_min + (w / 2); + kp.box.y = y_min + (h / 2); + kp.box.w = w; + kp.box.h = h; + kp.box.score = confidence; + kp.box.target = class_index; + + ma::math::dequantizeValues2D( + kpts_corrdinates_and_scores, j, quantized_keypoints, kpts_corrdinates_and_scores.shape(0), kpts_corrdinates_and_scores.shape(1), qp_scale_kpts, qp_zp_kpts); + + auto kpts_corrdinates = xt::view(kpts_corrdinates_and_scores, xt::all(), xt::range(0, 2)); + auto keypoints_scores = xt::view(kpts_corrdinates_and_scores, xt::all(), xt::range(2, _)); + + kpts_corrdinates *= 2; + + auto center = xt::view(centers[i], xt::all(), xt::range(0, 2)); + auto center_values = xt::xarray{(float)center(j, 0), (float)center(j, 1)}; + + kpts_corrdinates = strides[i] * (kpts_corrdinates - 0.5) + center_values; + + auto sigmoided_scores = 1 / (1 + xt::exp(-keypoints_scores)); + + auto keypoint = std::make_pair(kpts_corrdinates, sigmoided_scores); + + int pt_size = kpts_corrdinates.shape(0); + for (int i = 0; i < pt_size; ++i) { + ma_pt3f_t pt; + pt.x = kpts_corrdinates(i, 0) / network_dims[0]; + pt.y = kpts_corrdinates(i, 1) / network_dims[1]; + pt.z = sigmoided_scores(i, 0); + kp.pts.push_back(pt); + } + + decodings.push_front(std::move(kp)); + } + } + + return decodings; +} + + +ma_err_t YoloV8PoseHailo::postprocess() { + // TODO: could be optimized + boxes_scores_keypoints_ = getBoxesScoresKeypoints(outputs_, 1); + + switch (route_) { + case 511: + results_ = decodeBoxesAndKeypoints( + boxes_scores_keypoints_.boxes, boxes_scores_keypoints_.scores, boxes_scores_keypoints_.keypoints, network_dims_, strides_, centers_, 15, threshold_score_); + break; + case 149723: + results_ = decodeBoxesAndKeypoints( + boxes_scores_keypoints_.boxes, boxes_scores_keypoints_.scores, boxes_scores_keypoints_.keypoints, network_dims_, strides_, centers_, 15, threshold_score_); + break; + default: + return MA_ENOTSUP; + } + + ma::utils::nms(results_, threshold_nms_, true); + + return MA_OK; +} + +} // namespace ma::model + +#endif \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose_hailo.h b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose_hailo.h new file mode 100644 index 0000000..5743d1b --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose_hailo.h @@ -0,0 +1,53 @@ +#ifndef _MA_MODEL_YOLOV8_POSE_HAILO_H_ +#define _MA_MODEL_YOLOV8_POSE_HAILO_H_ + +#include "ma_model_pose_detector.h" + +#if MA_USE_ENGINE_HAILO + +#include +#include +#include +#include + +#include +#include + +namespace ma::model { + +namespace _internal { + +struct Triple { + std::vector boxes; + xt::xarray scores; + std::vector keypoints; +}; + +} // namespace _internal + +class YoloV8PoseHailo : public PoseDetector { +private: + std::vector> centers_; + static std::vector strides_; + std::vector network_dims_; + std::vector outputs_; + _internal::Triple boxes_scores_keypoints_; + int32_t route_ = 0; + +protected: + ma_err_t postprocess() override; + +public: + YoloV8PoseHailo(Engine* engine); + ~YoloV8PoseHailo(); + + static bool isValid(Engine* engine); + + static const char* getTag(); +}; + +} // namespace ma::model + +#endif + +#endif // _MA_MODEL_YOLO_H diff --git a/src/components/sscma-micro/sscma/core/utils/ma_anchors.cpp b/src/components/sscma-micro/sscma/core/utils/ma_anchors.cpp index b4d6670..1bfa640 100644 --- a/src/components/sscma-micro/sscma/core/utils/ma_anchors.cpp +++ b/src/components/sscma-micro/sscma/core/utils/ma_anchors.cpp @@ -4,7 +4,7 @@ namespace ma::utils { std::vector generateAnchorStrides(size_t input_size, std::vector strides) { std::vector anchor_strides(strides.size()); - size_t nth_anchor = 0; + size_t nth_anchor = 0; for (size_t i = 0; i < strides.size(); ++i) { const size_t stride = strides[i]; @@ -17,19 +17,17 @@ std::vector generateAnchorStrides(size_t input_size, std::ve return anchor_strides; } -std::vector> generateAnchorMatrix(const std::vector& anchor_strides, - float shift_right, - float shift_down) { - const auto anchor_matrix_size = anchor_strides.size(); +std::vector> generateAnchorMatrix(const std::vector& anchor_strides, float shift_right, float shift_down) { + const auto anchor_matrix_size = anchor_strides.size(); std::vector> anchor_matrix(anchor_matrix_size); - const float shift_right_init = shift_right * 0.5f; - const float shift_down_init = shift_down * 0.5f; + const float shift_right_init = shift_right * 0.5f; + const float shift_down_init = shift_down * 0.5f; for (size_t i = 0; i < anchor_matrix_size; ++i) { - const auto& anchor_stride = anchor_strides[i]; - const auto split = anchor_stride.split; - const auto size = anchor_stride.size; - auto& anchor_matrix_i = anchor_matrix[i]; + const auto& anchor_stride = anchor_strides[i]; + const auto split = anchor_stride.split; + const auto size = anchor_stride.size; + auto& anchor_matrix_i = anchor_matrix[i]; anchor_matrix[i].resize(size); @@ -43,4 +41,38 @@ std::vector> generateAnchorMatrix(const std::vector> generateAnchorMatrix(std::vector& strides, std::vector network_dims, std::size_t boxes_num, int strided_width, int strided_height) { + std::vector> centers(boxes_num); + + for (uint i = 0; i < boxes_num; ++i) { + strided_width = network_dims[0] / strides[i]; + strided_height = network_dims[1] / strides[i]; + + // Create a meshgrid of the proper strides + xt::xarray grid_x = xt::arange(0, strided_width); + xt::xarray grid_y = xt::arange(0, strided_height); + + auto mesh = xt::meshgrid(grid_x, grid_y); + grid_x = std::get<1>(mesh); + grid_y = std::get<0>(mesh); + + // Use the meshgrid to build up box center prototypes + auto ct_row = (xt::flatten(grid_y) + 0.5) * strides[i]; + auto ct_col = (xt::flatten(grid_x) + 0.5) * strides[i]; + + centers[i] = xt::stack(xt::xtuple(ct_col, ct_row, ct_col, ct_row), 1); + } + + return centers; +} + +#endif + } // namespace ma::utils diff --git a/src/components/sscma-micro/sscma/core/utils/ma_anchors.h b/src/components/sscma-micro/sscma/core/utils/ma_anchors.h index 8142c8a..a6d6108 100644 --- a/src/components/sscma-micro/sscma/core/utils/ma_anchors.h +++ b/src/components/sscma-micro/sscma/core/utils/ma_anchors.h @@ -7,13 +7,21 @@ #include "../ma_types.h" +#if MA_USE_LIB_XTENSOR +#include +#include +#include +#endif + namespace ma::utils { std::vector generateAnchorStrides(size_t input_size, std::vector strides = {8, 16, 32}); -std::vector> generateAnchorMatrix(const std::vector& anchor_strides, - float shift_right = 1.f, - float shift_down = 1.f); +std::vector> generateAnchorMatrix(const std::vector& anchor_strides, float shift_right = 1.f, float shift_down = 1.f); + +#if MA_USE_LIB_XTENSOR +std::vector> generateAnchorMatrix(std::vector& strides, std::vector network_dims, std::size_t boxes_num, int strided_width, int strided_height); +#endif } // namespace ma::utils diff --git a/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp b/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp index 50b9a81..8246b97 100644 --- a/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp +++ b/src/components/sscma-micro/sscma/core/utils/ma_nms.cpp @@ -30,20 +30,23 @@ static constexpr void nms_impl(Container& bboxes, float threshold_iou, float thr if constexpr (std::is_same_v>) { bboxes.sort([](const auto& box1, const auto& box2) { return box1.score > box2.score; }); } else { - std::sort( - bboxes.begin(), bboxes.end(), [](const auto& box1, const auto& box2) { return box1.score > box2.score; }); + std::sort(bboxes.begin(), bboxes.end(), [](const auto& box1, const auto& box2) { return box1.score > box2.score; }); } for (auto it = bboxes.begin(); it != bboxes.end(); ++it) { - if (it->score == 0) continue; + if (it->score == 0) + continue; for (auto it2 = std::next(it); it2 != bboxes.end(); ++it2) { - if (it2->score == 0) continue; - if (multi_target && it->target != it2->target) continue; + if (it2->score == 0) + continue; + if (multi_target && it->target != it2->target) + continue; const auto iou = compute_iou(*it, *it2); if (iou > threshold_iou) { if (soft_nms) { it2->score = it2->score * (1 - iou); - if (it2->score < threshold_score) it2->score = 0; + if (it2->score < threshold_score) + it2->score = 0; } else { it2->score = 0; } @@ -54,22 +57,34 @@ static constexpr void nms_impl(Container& bboxes, float threshold_iou, float thr if constexpr (std::is_same_v>) { bboxes.remove_if([](const auto& box) { return box.score == 0; }); } else { - bboxes.erase(std::remove_if(bboxes.begin(), bboxes.end(), [](const auto& box) { return box.score == 0; }), - bboxes.end()); + bboxes.erase(std::remove_if(bboxes.begin(), bboxes.end(), [](const auto& box) { return box.score == 0; }), bboxes.end()); } } -void nms( - std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target) { +void nms(std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target) { nms_impl(bboxes, threshold_iou, threshold_score, soft_nms, multi_target); } -void nms(std::forward_list& bboxes, - float threshold_iou, - float threshold_score, - bool soft_nms, - bool multi_target) { +void nms(std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target) { nms_impl(bboxes, threshold_iou, threshold_score, soft_nms, multi_target); } +void nms(std::forward_list& decodings, const float iou_thr, bool should_nms_cross_classes) { + for ( + auto it = decodings.begin(); it != decodings.end(); ++it) { + if (it->box.score != 0.0f) { + for ( + auto it2 = std::next(it); it2 != decodings.end(); ++it2) { + if ((should_nms_cross_classes || (it->box.target == it2->box.target)) && it2->box.score != 0.0f) { + float iou = compute_iou(it->box, it2->box); + if (iou >= iou_thr) { + it2->box.score = 0.0f; + } + } + } + } + } + decodings.remove_if([](const auto& box) { return box.box.score == 0.0f; }); +} + } // namespace ma::utils \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/utils/ma_nms.h b/src/components/sscma-micro/sscma/core/utils/ma_nms.h index 14e2ef1..cc4d52e 100644 --- a/src/components/sscma-micro/sscma/core/utils/ma_nms.h +++ b/src/components/sscma-micro/sscma/core/utils/ma_nms.h @@ -4,7 +4,6 @@ #include #include #include -#include #include "../ma_types.h" @@ -14,11 +13,9 @@ namespace ma::utils { void nms(std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target); -void nms(std::forward_list& bboxes, - float threshold_iou, - float threshold_score, - bool soft_nms, - bool multi_target); +void nms(std::forward_list& bboxes, float threshold_iou, float threshold_score, bool soft_nms, bool multi_target); + +void nms(std::forward_list& decodings, const float iou_thr, bool should_nms_cross_classes); } // namespace ma::utils