From 5df9aae4e0e7b9768fe70c1e8898f2715bf4b4e6 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Thu, 13 Jun 2024 17:56:01 -0400 Subject: [PATCH 01/25] Initial cmpto implementation --- src/video_compress/cmpto_j2k.cpp | 853 +++++++++++++++++++++-------- src/video_decompress/cmpto_j2k.cpp | 520 ++++++++++++------ 2 files changed, 971 insertions(+), 402 deletions(-) mode change 100644 => 100755 src/video_compress/cmpto_j2k.cpp mode change 100644 => 100755 src/video_decompress/cmpto_j2k.cpp diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp old mode 100644 new mode 100755 index 58fbe11dd0..ffc212ccb1 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -48,22 +48,27 @@ #include "config.h" #endif // HAVE_CONFIG_H +#include + +#include +#include #include #include #include #include #include +#include +#include #include -#include - #ifdef HAVE_CUDA #include "cuda_wrapper.h" -#endif +#endif // HAVE_CUDA #include "debug.h" #include "host.h" #include "lib_common.h" #include "module.h" +#include "utils/string.h" // replace_all #include "tv.h" #include "utils/color_out.h" #include "utils/misc.h" @@ -71,84 +76,599 @@ #include "video.h" #include "video_compress.h" -#define MOD_NAME "[Cmpto J2K enc.] " +constexpr const char *MOD_NAME = "[Cmpto J2K enc.]"; + +#define ASSIGN_CHECK_VAL(var, str, minval) \ + do { \ + long long val = unit_evaluate(str, nullptr); \ + if (val < (minval) || val > UINT_MAX) { \ + LOG(LOG_LEVEL_ERROR) \ + << MOD_NAME << " Wrong value " << (str) \ + << " for " #var "! Value must be >= " << (minval) \ + << ".\n"; \ + throw InvalidArgument(); \ + } \ + (var) = val; \ + } while (0) #define CHECK_OK(cmd, err_msg, action_fail) do { \ int j2k_error = cmd; \ if (j2k_error != CMPTO_OK) {\ - log_msg(LOG_LEVEL_ERROR, "[J2K enc.] %s: %s\n", \ - err_msg, cmpto_j2k_enc_get_last_error()); \ + log_msg(LOG_LEVEL_ERROR, "%s %s: %s\n", \ + MOD_NAME, err_msg, cmpto_j2k_enc_get_last_error()); \ action_fail;\ } \ -} while(0) +} while (0) #define NOOP ((void) 0) -#define DEFAULT_QUALITY 0.7 -/// default max size of state_video_compress_j2k::pool and also value -/// for state_video_compress_j2k::max_in_frames -#define DEFAULT_POOL_SIZE 4 -/// number of frames that encoder encodes at moment -#define DEFAULT_TILE_LIMIT 1 -#define DEFAULT_MEM_LIMIT 1000000000LLU - -using std::condition_variable; + +// Default CPU Settings +#define DEFAULT_CPU_THREAD_COUNT CMPTO_J2K_ENC_CPU_DEFAULT +#define MIN_CPU_THREAD_COUNT CMPTO_J2K_ENC_CPU_NONE +#define DEFAULT_CPU_MEM_LIMIT 0 // DEFAULT_CPU_MEM_LIMIT should always be 0 +#define DEFAULT_CPU_POOL_SIZE 8 +#define DEFAULT_IMG_LIMIT 0 // Default number of images to be decoded by CPU (0 = CMPTO Default) +#define MIN_CPU_IMG_LIMIT 0 // Min number of images decoded by the CPU at once + +// Default CUDA Settings +#define DEFAULT_CUDA_POOL_SIZE 4 +#define DEFAULT_CUDA_TILE_LIMIT 1 +#define DEFAULT_CUDA_MEM_LIMIT 1000000000ULL + +// Default General Settings +#define DEFAULT_QUALITY 0.7 +#ifdef HAVE_CUDA +#define DEFAULT_POOL_SIZE DEFAULT_CUDA_POOL_SIZE +#else +#define DEFAULT_POOL_SIZE DEFAULT_CPU_POOL_SIZE +#endif + using std::mutex; -using std::stod; using std::shared_ptr; -using std::unique_lock; #ifdef HAVE_CUDA struct cmpto_j2k_enc_cuda_host_buffer_data_allocator : public video_frame_pool_allocator { - void *allocate(size_t size) override - { + void *allocate(size_t size) override { void *ptr = nullptr; if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc_host(&ptr, size)) { - MSG(ERROR, "Cannot allocate host buffer: %s\n", + log_msg(LOG_LEVEL_ERROR, "Cannot allocate host buffer: %s\n", cuda_wrapper_last_error_string()); return nullptr; } return ptr; } + void deallocate(void *ptr) override { cuda_wrapper_free(ptr); } - [[nodiscard]] video_frame_pool_allocator *clone() const override - { + + [[nodiscard]] video_frame_pool_allocator *clone() const override { return new cmpto_j2k_enc_cuda_host_buffer_data_allocator(*this); } }; -using allocator = cmpto_j2k_enc_cuda_host_buffer_data_allocator; +using allocator = cmpto_j2k_enc_cuda_host_buffer_data_allocator; +using cuda_allocator = cmpto_j2k_enc_cuda_host_buffer_data_allocator; +#else +using allocator = default_data_allocator; +#endif +using cpu_allocator = default_data_allocator; + +// Pre Declarations +static void j2k_compressed_frame_dispose(struct video_frame *frame); +static void j2k_compress_done(struct module *mod); +static void R12L_to_RG48(video_frame *dst, video_frame *src); + +/** + * @brief Platforms available for J2K Compression + */ +enum j2k_compress_platform { + NONE = 0, + CPU = 1, +#ifdef HAVE_CUDA + CUDA = 2, +#endif // HAVE_CUDA +}; + +/** + * @brief Struct to hold Platform Name and j2k_compress_platform Type + */ +struct j2k_compress_platform_info_t { + const char* name; + j2k_compress_platform platform; +}; + +// Supported Platforms for Compressing J2K +constexpr auto compress_platforms = std::array { + j2k_compress_platform_info_t{"none", j2k_compress_platform::NONE}, + j2k_compress_platform_info_t{"cpu", j2k_compress_platform::CPU}, +#ifdef HAVE_CUDA + j2k_compress_platform_info_t{"cuda", j2k_compress_platform::CUDA} +#endif +}; + +/** + * @fn get_platform_from_name + * @brief Search for j2k_compress_platform from friendly name + * @param name Friendly name of platform to search for + * @return j2k_compress_platform that corresponds to name. If no match, return j2k_compress_platform::NONE + */ +[[nodiscard]][[maybe_unused]] +static j2k_compress_platform get_platform_from_name(std::string name) { + std::transform(name.cbegin(), name.cend(), name.begin(), [](unsigned char c) { return std::tolower(c); }); + + auto matches = [&name](const auto& p) { return name.compare(p.name) == 0; }; + + if (const auto& it = std::find_if(compress_platforms.begin(), compress_platforms.end(), matches) ; it != compress_platforms.end()) { + return it->platform; + } + + return j2k_compress_platform::NONE; +} + +/** + * @brief Struct to hold UG and CMPTO Codec information + */ +struct Codec { + codec_t ug_codec; + enum cmpto_sample_format_type cmpto_sf; + codec_t convert_codec; + void (*convertFunc)(video_frame *dst, video_frame *src); +}; + +// Supported UG/CMPTO Compress Codecs +constexpr auto codecs = std::array{ + Codec{UYVY, CMPTO_422_U8_P1020, VIDEO_CODEC_NONE, nullptr}, + Codec{v210, CMPTO_422_U10_V210, VIDEO_CODEC_NONE, nullptr}, + Codec{RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, + Codec{RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, + Codec{R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, + Codec{R12L, CMPTO_444_U12_MSB16LE_P012, RG48, R12L_to_RG48}, +}; + +/** + * Exceptions for state_video_compress_j2k construction + */ + +/// @brief HelpRequested Exception +struct HelpRequested : public std::exception { + HelpRequested() = default; +}; + +/// @brief InvalidArgument Exception +struct InvalidArgument : public std::exception { + InvalidArgument() = default; +}; + +/// @brief UnableToCreateJ2KEncoderCTX Exception +struct UnableToCreateJ2KEncoderCTX : public std::exception { + UnableToCreateJ2KEncoderCTX() = default; +}; + +/// @brief Struct for options for J2K Compression Usage +struct opts { + const char *label; + const char *key; + const char *description; + const char *opt_str; + const bool is_boolean; +}; + +#ifdef HAVE_CUDA +constexpr opts cuda_opts[2] = { + {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_CUDA_MEM_LIMIT), ":mem_limit=", false}, + {"Tile limit", "tile_limit", "Number of tiles encoded at one moment by GPU (less to reduce latency, more to increase performance, 0 means infinity). default: " TOSTRING(DEFAULT_CUDA_TILE_LIMIT), ":tile_limit=", false}, +}; +constexpr opts platform_opts[1] = { + {"Plaform", "platform", "Platform device for the encoder to use, default: cuda", ":platform=", false}, +}; +#endif // HAVE_CUDA + +constexpr opts cpu_opts[2] = { + {"Thread count", "thread_count", "Number of threads to use on the CPU. 0 is all available. default: " TOSTRING(DEFAULT_CPU_THREAD_COUNT), ":thread_count=", false}, + {"Image limit", "img_limit", "Number of images which can be encoded at one moment by CPU. Maximum allowed limit is thread_count. 0 is default limit. default: " TOSTRING(DEFAULT_IMG_LIMIT), ":img_limit=", false}, +}; + +constexpr opts general_opts[5] = { + {"Bitrate", "quality", "Target bitrate", ":rate=", false}, + {"Quality", "quant_coeff", "Quality in range [0-1]. default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false}, +#ifdef HAVE_CUDA + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than tile_limit or img_limit. default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false}, #else -using allocator = default_data_allocator; + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than img_limit. default: " TOSTRING(DEFAULT_POOL_SIZE) , ":pool_size=", false}, +#endif + {"Use MCT", "mct", "Use MCT", ":mct", true}, + {"Lossless compression", "lossless", "Enable lossless compression. default: disabled", ":lossless", true} +}; + +/** + * @fn usage + * @brief Display J2K Compression Usage Information + */ +static void usage() { + col() << "J2K compress platform support:\n"; + col() << "\tCPU .... yes\n"; +#ifdef HAVE_CUDA + col() << "\tCUDA ... yes\n"; +#else + col() << "\tCUDA ... no\n"; +#endif + + col() << "J2K compress usage:\n"; + + auto show_syntax = [](const auto& options) { + for (const auto& opt : options) { + assert(strlen(opt.opt_str) >= 2); + col() << "[" << opt.opt_str; + if (!opt.is_boolean) { + col() << "<" << opt.opt_str[1] << ">"; // :quality -> (first letter used as ":quality=") + } + col() << "]"; + } + }; + + auto show_arguments = [](const auto& options) { + for (const auto& opt : options) { + assert(strlen(opt.opt_str) >= 2); + if (opt.is_boolean) { + col() << TBOLD("\t" << opt.opt_str + 1 <<); + } else { + col() << TBOLD("\t<" << opt.opt_str[1] << ">"); + } + col() << " - " << opt.description << "\n"; + } + }; + +#ifdef HAVE_CUDA + // CPU and CUDA Platforms Supported. Show platform= options + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cuda"); + show_syntax(cuda_opts); + show_syntax(general_opts); + col() << " [--cuda-device ]\n" << TERM_RESET; + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cpu"); + show_syntax(cpu_opts); + show_syntax(general_opts); +#else // HAVE_CUDA + // Only CPU Platform Supported. No option to switch platform from default. + col() << TERM_BOLD << TRED("\t-c cmpto_j2k"); + show_syntax(cpu_opts); + show_syntax(general_opts); #endif + col() << "\n" << TERM_RESET; + col() << "where:\n"; +#ifdef HAVE_CUDA + show_arguments(platform_opts); + show_arguments(cuda_opts); + col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; +#endif // HAVE_CUDA + show_arguments(cpu_opts); + show_arguments(general_opts); +} + +/** + * @brief state_video_compress_j2k Class + */ struct state_video_compress_j2k { - state_video_compress_j2k(long long int bitrate, unsigned int pool_size, int mct) - : rate(bitrate), mct(mct), pool(pool_size, allocator()), - max_in_frames(pool_size) - { - } + explicit state_video_compress_j2k(struct module *parent); + state_video_compress_j2k(struct module *parent, const char* opts); + + module module_data{}; + struct cmpto_j2k_enc_ctx *context{}; + struct cmpto_j2k_enc_cfg *enc_settings{}; + std::unique_ptr pool; + unsigned int in_frames{}; ///< number of currently encoding frames + mutex lock; + std::condition_variable frame_popped; + video_desc saved_desc{}; ///< for pool reconfiguration + video_desc precompress_desc{}; + video_desc compressed_desc{}; + + void (*convertFunc)(video_frame *dst, video_frame *src) { nullptr }; + + // Generic Parameters + double quality = DEFAULT_QUALITY; // default image quality + long long int rate = 0; // bitrate in bits per second + int mct = -1; // force use of mct - -1 means default + bool lossless = false; // lossless encoding + + // CPU Parameters + int cpu_thread_count = DEFAULT_CPU_THREAD_COUNT; + unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; + + // CUDA Parameters + unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // Platform to use by default +#ifdef HAVE_CUDA + j2k_compress_platform platform = j2k_compress_platform::CUDA; + unsigned int max_in_frames = DEFAULT_CUDA_POOL_SIZE; ///< max number of frames between push and pop +#else + j2k_compress_platform platform = j2k_compress_platform::CPU; + unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop +#endif + + private: + void parse_fmt(const char* opts); + bool initialize_j2k_enc_ctx(); - struct module module_data{}; - struct cmpto_j2k_enc_ctx *context{}; - struct cmpto_j2k_enc_cfg *enc_settings{}; - long long int rate; ///< bitrate in bits per second - int mct; // force use of mct - -1 means default - video_frame_pool pool; ///< pool for frames allocated by us but not yet consumed by encoder - unsigned int max_in_frames; ///< max number of frames between push and pop - unsigned int in_frames{}; ///< number of currently encoding frames - mutex lock; - condition_variable frame_popped; - video_desc saved_desc{}; ///< for pool reconfiguration - video_desc precompress_desc{}; - video_desc compressed_desc{}; - void (*convertFunc)(video_frame *dst, video_frame *src){nullptr}; + // CPU Parameter + const size_t cpu_mem_limit = 0; // Not yet implemented as of v2.8.1. Must be 0. }; -static void j2k_compressed_frame_dispose(struct video_frame *frame); -static void j2k_compress_done(struct module *mod); -static void R12L_to_RG48(video_frame *dst, video_frame *src){ +/** + * @brief state_video_compress_j2k default constructor to create from module + * @param parent Base Module Struct +*/ +state_video_compress_j2k::state_video_compress_j2k(struct module *parent) + : pool(std::make_unique(DEFAULT_POOL_SIZE, allocator())) { + module_init_default(&module_data); + module_data.cls = MODULE_CLASS_DATA; + module_data.priv_data = this; + module_data.deleter = j2k_compress_done; + module_register(&module_data, parent); +} + +/** + * @brief state_video_compress_j2k constructor to create from opts + * @param parent Base Module Struct + * @param opts Configuration options to construct class + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + * @throw UnableToCreateJ2KEncoderCTX if failure to create J2K CTX +*/ +state_video_compress_j2k::state_video_compress_j2k(struct module *parent, const char* opts) { + try { + parse_fmt(opts); + } catch (...) { + throw; + } + + if (!initialize_j2k_enc_ctx()) { + throw UnableToCreateJ2KEncoderCTX(); + } + + module_init_default(&module_data); + module_data.cls = MODULE_CLASS_DATA; + module_data.priv_data = this; + module_data.deleter = j2k_compress_done; + module_register(&module_data, parent); +} + +/// CUDA opt Syntax +// -c cmpto_j2k:platform=cuda[:mem_limit=][:tile_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] [--cuda-device ] +/// CPU opt Syntax +// -c cmpto_j2k:platform=cpu[:thread_count=][:img_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] +/** + * @fn parse_fmt + * @brief Parse options and configure class members accordingly + * @param opts Configuration options + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + */ +void state_video_compress_j2k::parse_fmt(const char* opts) { + auto split_arguments = [](std::string args, std::string delimiter) { + auto token = std::string{}; + auto pos = size_t{0}; + auto vec = std::vector{}; + + if (args == "\0") { + return vec; + } + + while ((pos = args.find(delimiter)) != std::string::npos) { + token = args.substr(0, pos); + vec.emplace_back(std::move(token)); + args.erase(0, pos + delimiter.length()); + } + + vec.emplace_back(std::move(args)); + return vec; + }; + + auto args = split_arguments(opts, ":"); + + // No Arguments provided, return and use defaults + if (args.empty()) { + return; + } + + const auto *version = cmpto_j2k_enc_get_version(); + log_msg(LOG_LEVEL_INFO, "%s Using Codec version: %s\n", + MOD_NAME, + (version == nullptr ? "(unknown)" : version->name)); + + const char* item = ""; + + /** + * Check if :pool_size= set manually during argument parsing. + * Since max_in_frames is default initialized to match compile time platform default (CUDA or CPU) + * Changing from :platform=cuda default to :platform=cpu default will not automatically + * set :pool_size= during argument parsing because opts can passed be out of order. + * + * To prevent potential for overwriting user's defined default, set is_pool_size_manually_configured=true + * during argument parsing and check before final function return + * + * If pool size is manually configured, do not set to default. + * Otherwise, set max_in_frames = platform default + */ + auto is_pool_size_manually_configured = false; + + for (const auto& arg : args) { + item = arg.c_str(); + if (strcasecmp("help", item) == 0) { // :help + usage(); + throw HelpRequested(); + + } else if (IS_KEY_PREFIX(item, "platform")) { // :platform= + const char *const platform_name = strchr(item, '=') + 1; + platform = get_platform_from_name(platform_name); + if (j2k_compress_platform::NONE == platform) { + log_msg(LOG_LEVEL_ERROR, + "%s Unable to find requested encoding platform: \"%s\"\n", + MOD_NAME, + platform_name); + throw InvalidArgument(); + } + + } else if (strcasecmp("lossless", item) == 0) { // :lossless + lossless = true; + + } else if (IS_KEY_PREFIX(item, "mem_limit")) { // :mem_limit= + ASSIGN_CHECK_VAL(cuda_mem_limit, strchr(item, '=') + 1, 1); + + } else if (IS_KEY_PREFIX(item, "thread_count")) { // :thread_count= + cpu_thread_count = atoi(strchr(item, '=') + 1); + ASSIGN_CHECK_VAL(cpu_thread_count, strchr(item, '=') + 1, MIN_CPU_THREAD_COUNT); + + } else if (IS_KEY_PREFIX(item, "tile_limit")) { // :tile_limit= + ASSIGN_CHECK_VAL(cuda_tile_limit, strchr(item, '=') + 1, 0); + + } else if (IS_KEY_PREFIX(item, "img_limit")) { // :img_limit= + ASSIGN_CHECK_VAL(cpu_img_limit, strchr(item, '=') + 1, MIN_CPU_IMG_LIMIT); + + } else if (IS_KEY_PREFIX(item, "rate")) { // :rate= + ASSIGN_CHECK_VAL(rate, strchr(item, '=') + 1, 1); + + } else if (IS_KEY_PREFIX(item, "quality")) { // :quality= + quality = std::stod(strchr(item, '=') + 1); + if (quality < 0.0 || quality > 1.0) { + log_msg(LOG_LEVEL_ERROR, + "%s Quality should be in interval [0-1]\n", + MOD_NAME); + throw InvalidArgument(); + } + + } else if (IS_KEY_PREFIX(item, "pool_size")) { // :pool_size= + ASSIGN_CHECK_VAL(max_in_frames, strchr(item, '=') + 1, 1); + is_pool_size_manually_configured = true; + + } else if (strcasecmp("mct", item) == 0) { // :mct + mct = strcasecmp("mct", item) ? 1 : 0; + + } else { + log_msg(LOG_LEVEL_ERROR, + "%s Unable to find option: \"%s\"\n", + MOD_NAME, item); + throw InvalidArgument(); + } + } + + // If CPU selected + if (j2k_compress_platform::CPU == platform) { + /** + * Confirm thread_count != CMPTO_J2K_ENC_CPU_DEFAULT (0) + * If it does, img_limit can be > thread_count since all threads used + * + * If thread_count is not 0, confirm img_limit doesn't exceed thread_count + * Set img_limit = thread_count if exeeded + */ + if (cpu_thread_count != CMPTO_J2K_ENC_CPU_DEFAULT && cpu_thread_count < static_cast(cpu_img_limit)) { + log_msg(LOG_LEVEL_INFO, + "%s img_limit (%i) exceeds thread_count. Lowering to img_limit to %i to match thread_count.\n", + MOD_NAME, + cpu_img_limit, + cpu_thread_count); + cpu_img_limit = cpu_thread_count; + } + + // If pool_size was manually set, ignore this check. + // Otherwise, if it was not set, confirm that max_in_frames matches DEFAULT_CPU_POOL_SIZE + if (!is_pool_size_manually_configured && max_in_frames != DEFAULT_CPU_POOL_SIZE) { + log_msg(LOG_LEVEL_DEBUG, + "%s max_in_frames set to CPU default: %i", + MOD_NAME, + DEFAULT_CPU_POOL_SIZE); + max_in_frames = DEFAULT_CPU_POOL_SIZE; + } + } +} + +/** + * @fn initialize_j2k_enc_ctx + * @brief Initialize internal cmpto_j2k_enc_ctx_cfg for requested platform and settings + * @return true if successsfully configured + * @return false if unable to configure + */ +[[nodiscard]] +bool state_video_compress_j2k::initialize_j2k_enc_ctx() { + struct cmpto_j2k_enc_ctx_cfg *ctx_cfg; + + CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), + "Context configuration create", + return false); + + if (j2k_compress_platform::CPU == platform) { + log_msg(LOG_LEVEL_INFO, "%s Configuring for CPU\n", MOD_NAME); + pool = std::make_unique(max_in_frames, cpu_allocator()); + // for (unsigned int i = 0; i < cpu_count ; ) + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cpu( + ctx_cfg, + cpu_thread_count, + cpu_mem_limit, + cpu_img_limit), + "Setting CPU device", + return false); + + log_msg(LOG_LEVEL_INFO, "%s Using %s threads on CPU. Thread Count = %i, Image Limit = %i\n", + MOD_NAME, + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_thread_count, + cpu_img_limit); + } + +#ifdef HAVE_CUDA + if (j2k_compress_platform::CUDA == platform) { + log_msg(LOG_LEVEL_INFO, "%s Configuring for CUDA\n", MOD_NAME); + pool = std::make_unique(max_in_frames, cuda_allocator()); + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( + ctx_cfg, + cuda_devices[i], + cuda_mem_limit, + cuda_tile_limit), + "Setting CUDA device", + return false); + } + } +#endif // HAVE_CUDA + + CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &context), + "Context create", + return false); + + CHECK_OK(cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg), + "Context configuration destroy", + NOOP); + + CHECK_OK(cmpto_j2k_enc_cfg_create( + context, + &enc_settings), + "Creating context configuration:", + return false); + if (lossless) { + CHECK_OK(cmpto_j2k_enc_cfg_set_lossless( + enc_settings, + lossless ? 1 : 0), + "Enabling lossless", + return false); + } else { + CHECK_OK(cmpto_j2k_enc_cfg_set_quantization( + enc_settings, + quality /* 0.0 = poor quality, 1.0 = full quality */), + "Setting quantization", + NOOP); + } + + CHECK_OK(cmpto_j2k_enc_cfg_set_resolutions(enc_settings, 6), + "Setting DWT levels", + NOOP); + + return true; +} + +static void R12L_to_RG48(video_frame *dst, video_frame *src) { int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); @@ -156,46 +676,30 @@ static void R12L_to_RG48(video_frame *dst, video_frame *src){ unsigned char *d = (unsigned char *) dst->tiles[0].data; decoder_t vc_copylineR12LtoRG48 = get_decoder_from_to(R12L, RG48); - for(unsigned i = 0; i < src->tiles[0].height; i++){ + for (unsigned i = 0; i < src->tiles[0].height; i++) { vc_copylineR12LtoRG48(d, s, dst_pitch, 0, 0, 0); s += src_pitch; d += dst_pitch; } } -static struct { - codec_t ug_codec; - enum cmpto_sample_format_type cmpto_sf; - codec_t convert_codec; - void (*convertFunc)(video_frame *dst, video_frame *src); -} codecs[] = { - {UYVY, CMPTO_422_U8_P1020, VIDEO_CODEC_NONE, nullptr}, - {v210, CMPTO_422_U10_V210, VIDEO_CODEC_NONE, nullptr}, - {RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, - {RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, - {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, - {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, R12L_to_RG48}, -}; - -static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc){ +static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc) { enum cmpto_sample_format_type sample_format; bool found = false; - - for(const auto &codec : codecs){ - if(codec.ug_codec == desc.color_spec){ - sample_format = codec.cmpto_sf; - s->convertFunc = codec.convertFunc; - s->precompress_desc = desc; - if(codec.convert_codec != VIDEO_CODEC_NONE){ - s->precompress_desc.color_spec = codec.convert_codec; - } - found = true; - break; + auto matches = [&](const Codec& codec) { return codec.ug_codec == desc.color_spec; }; + + if (const auto& codec = std::find_if(codecs.begin(), codecs.end(), matches) ; codec != codecs.end()) { + sample_format = codec->cmpto_sf; + s->convertFunc = codec->convertFunc; + s->precompress_desc = desc; + if (codec->convert_codec != VIDEO_CODEC_NONE) { + s->precompress_desc.color_spec = codec->convert_codec; } + found = true; } - if(!found){ - log_msg(LOG_LEVEL_ERROR, "[J2K] Failed to find suitable pixel format\n"); + if (!found) { + log_msg(LOG_LEVEL_ERROR, "%s Failed to find suitable pixel format\n", MOD_NAME); return false; } @@ -228,8 +732,8 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc return true; } -static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ - std::shared_ptr ret = s->pool.get_frame(); +static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame) { + std::shared_ptr ret = s->pool->get_frame(); if (s->convertFunc) { s->convertFunc(ret.get(), frame); @@ -261,8 +765,7 @@ struct custom_data { * pipeline. Because of that goto + start label is used. */ #define HANDLE_ERROR_COMPRESS_POP do { cmpto_j2k_enc_img_destroy(img); goto start; } while (0) -static std::shared_ptr j2k_compress_pop(struct module *state) -{ +static std::shared_ptr j2k_compress_pop(struct module *state) { start: struct state_video_compress_j2k *s = (struct state_video_compress_j2k *) state; @@ -275,7 +778,7 @@ static std::shared_ptr j2k_compress_pop(struct module *state) &img /* Set to NULL if encoder stopped */, &status), "Encode image", HANDLE_ERROR_COMPRESS_POP); { - unique_lock lk(s->lock); + std::unique_lock lk(s->lock); s->in_frames--; s->frame_popped.notify_one(); } @@ -311,162 +814,34 @@ static std::shared_ptr j2k_compress_pop(struct module *state) return shared_ptr(out, out->callbacks.dispose); } -struct { - const char *label; - const char *key; - const char *description; - const char *opt_str; - const bool is_boolean; -} usage_opts[] = { - {"Bitrate", "quality", "Target bitrate", ":rate=", false}, - {"Quality", "quant_coeff", "Quality in range [0-1], default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false}, - {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_MEM_LIMIT), ":mem_limit=", false}, - {"Tile limit", "tile_limit", "Number of tiles encoded at moment (less to reduce latency, more to increase performance, 0 means infinity), default: " TOSTRING(DEFAULT_TILE_LIMIT), ":tile_limit=", false}, - {"Pool size", "pool_size", "Total number of tiles encoder can hold at moment (same meaning as above), default: " TOSTRING(DEFAULT_POOL_SIZE) ", should be greater than ", ":pool_size=", false}, - {"Use MCT", "mct", "use MCT", ":mct", true}, -}; - -static void usage() { - col() << "J2K compress usage:\n"; - col() << TERM_BOLD << TRED("\t-c cmpto_j2k"); - for(const auto& opt : usage_opts){ - assert(strlen(opt.opt_str) >= 2); - col() << "[" << opt.opt_str; - if (!opt.is_boolean) { - col() << "<" << opt.opt_str[1] << ">"; // :quality -> (first letter used as ":quality=") - } - col() << "]"; +static struct module * j2k_compress_init(struct module *parent, const char *opts) { + try { + auto *s = new state_video_compress_j2k(parent, opts); + return &s->module_data; + } catch (HelpRequested const& e) { + return static_cast(INIT_NOERR); + } catch (InvalidArgument const& e) { + return NULL; + } catch (UnableToCreateJ2KEncoderCTX const& e) { + return NULL; + } catch (...) { + return NULL; } - col() << " [--cuda-device ]\n" << TERM_RESET; - - col() << "where:\n"; - for(const auto& opt : usage_opts){ - if (opt.is_boolean) { - col() << TBOLD("\t" << opt.opt_str + 1 <<); - } else { - col() << TBOLD("\t<" << opt.opt_str[1] << ">"); - } - col() << " - " << opt.description << "\n"; - } - col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; } -#define ASSIGN_CHECK_VAL(var, str, minval) \ - do { \ - long long val = unit_evaluate(str, nullptr); \ - if (val < (minval) || val > UINT_MAX) { \ - LOG(LOG_LEVEL_ERROR) \ - << "[J2K] Wrong value " << (str) \ - << " for " #var "! Value must be >= " << (minval) \ - << ".\n"; \ - return NULL; \ - } \ - (var) = val; \ - } while (0) - -static struct module * j2k_compress_init(struct module *parent, const char *c_cfg) -{ - double quality = DEFAULT_QUALITY; - int mct = -1; - long long int bitrate = 0; - long long int mem_limit = DEFAULT_MEM_LIMIT; - unsigned int tile_limit = DEFAULT_TILE_LIMIT; - unsigned int pool_size = DEFAULT_POOL_SIZE; - - const auto *version = cmpto_j2k_enc_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; - - char *tmp = (char *) alloca(strlen(c_cfg) + 1); - strcpy(tmp, c_cfg); - char *save_ptr, *item; - while ((item = strtok_r(tmp, ":", &save_ptr))) { - tmp = NULL; - if (strncasecmp("rate=", item, strlen("rate=")) == 0) { - ASSIGN_CHECK_VAL(bitrate, strchr(item, '=') + 1, 1); - } else if (strncasecmp("quality=", item, strlen("quality=")) == 0) { - quality = stod(strchr(item, '=') + 1); - } else if (strcasecmp("mct", item) == 0 || strcasecmp("nomct", item) == 0) { - mct = strcasecmp("mct", item) ? 1 : 0; - } else if (strncasecmp("mem_limit=", item, strlen("mem_limit=")) == 0) { - ASSIGN_CHECK_VAL(mem_limit, strchr(item, '=') + 1, 1); - } else if (strncasecmp("tile_limit=", item, strlen("tile_limit=")) == 0) { - ASSIGN_CHECK_VAL(tile_limit, strchr(item, '=') + 1, 0); - } else if (strncasecmp("pool_size=", item, strlen("pool_size=")) == 0) { - ASSIGN_CHECK_VAL(pool_size, strchr(item, '=') + 1, 1); - } else if (strcasecmp("help", item) == 0) { - usage(); - return static_cast(INIT_NOERR); - } else { - log_msg(LOG_LEVEL_ERROR, "[J2K] Wrong option: %s\n", item); - return NULL; - } - } - - if (quality < 0.0 || quality > 1.0) { - LOG(LOG_LEVEL_ERROR) << "[J2K] Quality should be in interval [0-1]!\n"; - return nullptr; - } - - auto *s = new state_video_compress_j2k(bitrate, pool_size, mct); - - struct cmpto_j2k_enc_ctx_cfg *ctx_cfg; - CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), "Context configuration create", - goto error); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], mem_limit, tile_limit), - "Setting CUDA device", goto error); - } - - CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &s->context), "Context create", - goto error); - CHECK_OK(cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg), "Context configuration destroy", - NOOP); - - CHECK_OK(cmpto_j2k_enc_cfg_create( - s->context, - &s->enc_settings), - "Creating context configuration:", - goto error); - CHECK_OK(cmpto_j2k_enc_cfg_set_quantization( - s->enc_settings, - quality /* 0.0 = poor quality, 1.0 = full quality */ - ), - "Setting quantization", - NOOP); - - CHECK_OK(cmpto_j2k_enc_cfg_set_resolutions( s->enc_settings, 6), - "Setting DWT levels", - NOOP); - - module_init_default(&s->module_data); - s->module_data.cls = MODULE_CLASS_DATA; - s->module_data.priv_data = s; - s->module_data.deleter = j2k_compress_done; - module_register(&s->module_data, parent); - - return &s->module_data; - -error: - delete s; - return NULL; -} - -static void j2k_compressed_frame_dispose(struct video_frame *frame) -{ +static void j2k_compressed_frame_dispose(struct video_frame *frame) { free(frame->tiles[0].data); vf_free(frame); } -static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) -{ +static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) { (void) codestream; (void) custom_data_size; (void) codestream_size; auto *udata = static_cast(custom_data); udata->frame.~shared_ptr(); } #define HANDLE_ERROR_COMPRESS_PUSH if (img) cmpto_j2k_enc_img_destroy(img); return -static void j2k_compress_push(struct module *state, std::shared_ptr tx) -{ +static void j2k_compress_push(struct module *state, std::shared_ptr tx) { struct state_video_compress_j2k *s = (struct state_video_compress_j2k *) state; struct cmpto_j2k_enc_img *img = NULL; @@ -483,7 +858,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr if (!ret) { return; } - s->pool.reconfigure(s->precompress_desc, vc_get_linesize(s->precompress_desc.width, s->precompress_desc.color_spec) + s->pool->reconfigure(s->precompress_desc, vc_get_linesize(s->precompress_desc.width, s->precompress_desc.color_spec) * s->precompress_desc.height); } @@ -492,7 +867,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr CHECK_OK(cmpto_j2k_enc_img_create(s->context, &img), "Image create", return); - /* + /** * Copy video desc to udata (to be able to reconstruct in j2k_compress_pop(). * Further make a place for a shared pointer of allocated data, deleter * returns frame to pool in call of release_cstream() callback (called when @@ -513,7 +888,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr release_cstream), "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); - unique_lock lk(s->lock); + std::unique_lock lk(s->lock); s->frame_popped.wait(lk, [s]{return s->in_frames < s->max_in_frames;}); lk.unlock(); CHECK_OK(cmpto_j2k_enc_img_encode(img, s->enc_settings), @@ -524,8 +899,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr } -static void j2k_compress_done(struct module *mod) -{ +static void j2k_compress_done(struct module *mod) { struct state_video_compress_j2k *s = (struct state_video_compress_j2k *) mod->priv_data; @@ -535,17 +909,25 @@ static void j2k_compress_done(struct module *mod) delete s; } -static compress_module_info get_cmpto_j2k_module_info(){ +static compress_module_info get_cmpto_j2k_module_info() { compress_module_info module_info; module_info.name = "cmpto_j2k"; - for(const auto& opt : usage_opts){ - module_info.opts.emplace_back(module_option{opt.label, - opt.description, opt.key, opt.opt_str, opt.is_boolean}); - } + auto add_module_options = [&](const auto& options) { + for (const auto& opt : options) { + module_info.opts.emplace_back(module_option{opt.label, + opt.description, opt.key, opt.opt_str, opt.is_boolean}); + } + }; + +#ifdef HAVE_CUDA + add_module_options(cuda_opts); +#endif // HAVE_CUDA + add_module_options(cpu_opts); + add_module_options(general_opts); codec codec_info; - codec_info.name = "Comprimato jpeg2000"; + codec_info.name = "Comprimato jpeg2000"; codec_info.priority = 400; codec_info.encoders.emplace_back(encoder{"default", ""}); @@ -567,4 +949,3 @@ static struct video_compress_info j2k_compress_info = { }; REGISTER_MODULE(cmpto_j2k, &j2k_compress_info, LIBRARY_CLASS_VIDEO_COMPRESS, VIDEO_COMPRESS_ABI_VERSION); - diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp old mode 100644 new mode 100755 index b74df29416..9bd2c51f0c --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -61,6 +61,14 @@ #include "config_unix.h" #include "config_win32.h" #endif // HAVE_CONFIG_H + +#include + +#include +#include +#include +#include + #include "debug.h" #include "host.h" #include "lib_common.h" @@ -69,25 +77,118 @@ #include "video.h" #include "video_decompress.h" -#include +constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; -#include -#include -#include +#define CHECK_OK(cmd, err_msg, action_fail) do { \ + int j2k_error = cmd; \ + if (j2k_error != CMPTO_OK) {\ + log_msg(LOG_LEVEL_ERROR, "%s %s: %s\n", \ + MOD_NAME, err_msg, cmpto_j2k_dec_get_last_error()); \ + action_fail;\ + } \ +} while (0) + +#define NOOP ((void) 0) + +// General Parameter Defaults +constexpr int DEFAULT_MAX_QUEUE_SIZE = 2; // maximal size of queue for decompressed frames +constexpr int DEFAULT_MAX_IN_FRAMES = 4; // maximal number of concurrently decompressed frames + +// CPU-specific Defaults +constexpr int DEFAULT_THREAD_COUNT = CMPTO_J2K_DEC_CPU_DEFAULT; // Number of threads equal to all cores +constexpr int MIN_CPU_THREAD_COUNT = CMPTO_J2K_DEC_CPU_NONE; // No threads will be created +constexpr size_t DEFAULT_CPU_MEM_LIMIT = 0; // Should always be 0. Not implemented as of v2.8.1 +constexpr unsigned int DEFAULT_CPU_IMG_LIMIT = 0; // 0 for default, thread_count for max +constexpr unsigned int MIN_CPU_IMG_LIMIT = 0; // Min number of images encoded by the CPU at once + +#ifdef HAVE_CUDA +// CUDA-specific Defaults +constexpr int64_t DEFAULT_CUDA_MEM_LIMIT = 1000000000; +constexpr int DEFAULT_CUDA_TILE_LIMIT = 2; +#endif // HAVE_CUDA + +using std::lock_guard; +using std::mutex; + +/* + * Function Predeclarations + */ +static void *decompress_j2k_worker(void *args); +static void rg48_to_r12l(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); + +/* + * Platform to use for J2K Decompression + */ +enum j2k_decompress_platform { + NONE = 0, + CPU = 1, +#ifdef HAVE_CUDA + CUDA = 2, +#endif // HAVE_CUDA +}; + +/** + * @brief Struct to hold UG and CMPTO Codec information + */ +struct Codec { + codec_t ug_codec; + enum cmpto_sample_format_type cmpto_sf; + void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); +}; + +// Supported UG/CMPTO Decompress Codecs +constexpr auto codecs = std::array{ + Codec{UYVY, CMPTO_422_U8_P1020, nullptr}, + Codec{v210, CMPTO_422_U10_V210, nullptr}, + Codec{RGB, CMPTO_444_U8_P012, nullptr}, + Codec{BGR, CMPTO_444_U8_P210, nullptr}, + Codec{RGBA, CMPTO_444_U8_P012Z, nullptr}, + Codec{R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr}, + Codec{R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l}, +}; + +/* + * Exceptions for state_video_decompress_j2k construction + */ +/// @brief UnableToCreateJ2KDecoderCTX Exception +struct UnableToCreateJ2KDecoderCTX : public std::exception { + UnableToCreateJ2KDecoderCTX() = default; +}; + +/* + * Command Line Parameters for state_video_decompress_j2k + */ +#ifdef HAVE_CUDA +// CUDA-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" + " use CUDA to decode images\n"); +ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" + " J2K max memory usage in bytes.\n"); +ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" + " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); +// Option to use CPU for image decompression only required if CUDA is also compiled. +// Otherwise, CPU will be the default, with no need to explicity specify. +ADD_TO_PARAM("j2k-dec-use-cpu", "* j2k-dec-use-cpu\n" + " use the CPU to decode images\n"); +#endif // HAVE_CUDA +// CPU-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n" + " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); +ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" + " number of images which can be decoded at one moment (0 means default, thread-count is maximum limit)\n"); +// General Command Line Parameters +ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-dec-queue-len=\n" + " max queue len\n"); +ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-dec-encoder-queue=\n" + " max number of frames held by encoder\n"); -constexpr const int DEFAULT_TILE_LIMIT = 2; -/// maximal size of queue for decompressed frames -constexpr const int DEFAULT_MAX_QUEUE_SIZE = 2; -/// maximal number of concurrently decompressed frames -constexpr const int DEFAULT_MAX_IN_FRAMES = 4; -constexpr const int64_t DEFAULT_MEM_LIMIT = 1000000000LL; -constexpr const char *MOD_NAME = "[J2K dec.] "; -using namespace std; +/** + * @brief state_video_decompress_j2k Class + */ +struct state_video_decompress_j2k { + state_video_decompress_j2k(); -struct state_decompress_j2k { - state_decompress_j2k(unsigned int mqs, unsigned int mif) - : max_queue_size(mqs), max_in_frames(mif) {} cmpto_j2k_dec_ctx *decoder{}; cmpto_j2k_dec_cfg *settings{}; @@ -95,50 +196,216 @@ struct state_decompress_j2k { codec_t out_codec{}; mutex lock; - queue> decompressed_frames; ///< buffer, length + std::queue> decompressed_frames; ///< buffer, length int pitch; pthread_t thread_id{}; - unsigned int max_queue_size; ///< maximal length of @ref decompressed_frames - unsigned int max_in_frames; ///< maximal frames that can be "in progress" unsigned int in_frames{}; ///< actual number of decompressed frames - unsigned long long int dropped{}; ///< number of dropped frames because queue was full +#ifdef HAVE_CUDA + // CUDA Defaults + unsigned int cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // Default Decompression Platform to Use + j2k_decompress_platform platform = j2k_decompress_platform::CUDA; +#else + // Default Decompression Platform to Use + j2k_decompress_platform platform = j2k_decompress_platform::CPU; +#endif + + // CPU Defaults + unsigned int cpu_img_limit = DEFAULT_CPU_IMG_LIMIT; + const size_t cpu_mem_limit = DEFAULT_CPU_MEM_LIMIT; // Should always be 0. Not yet implemented as of Cmpto v2.8.1 + signed int cpu_thread_count = DEFAULT_THREAD_COUNT; + + // General Defaults + unsigned int max_in_frames = DEFAULT_MAX_IN_FRAMES; // maximal frames that can be "in progress" + unsigned int max_queue_size = DEFAULT_MAX_QUEUE_SIZE; // maximal length of @ref decompressed_frames + void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, - unsigned int width, unsigned int height){nullptr}; + unsigned int width, unsigned int height) { nullptr }; + + private: + void parse_params(); + bool initialize_j2k_dec_ctx(); }; -#define CHECK_OK(cmd, err_msg, action_fail) do { \ - int j2k_error = cmd; \ - if (j2k_error != CMPTO_OK) {\ - LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ - action_fail;\ - } \ -} while(0) +/** + * @brief Default state_video_decompress_j2k Constructor + * @throw UnableToCreateJ2KDecoderCTX if unable to create J2K CTX + */ +state_video_decompress_j2k::state_video_decompress_j2k() { + parse_params(); + + if (!initialize_j2k_dec_ctx()) { + throw UnableToCreateJ2KDecoderCTX(); + } +} + +/** + * @fn parse_params + * @brief Parse Command Line Parameters and Initialize Struct Members + */ +void state_video_decompress_j2k::parse_params() { +#ifdef HAVE_CUDA + if (get_commandline_param("j2k-dec-use-cuda")) { + platform = j2k_decompress_platform::CUDA; + } + + if (get_commandline_param("j2k-dec-mem-limit")) { + cuda_mem_limit = unit_evaluate(get_commandline_param("j2k-dec-mem-limit"), nullptr); + } + + if (get_commandline_param("j2k-dec-tile-limit")) { + cuda_tile_limit = atoi(get_commandline_param("j2k-dec-tile-limit")); + } + + if (get_commandline_param("j2k-dec-use-cpu")) { + platform = j2k_decompress_platform::CPU; + } +#endif // HAVE_CUDA + + if (get_commandline_param("j2k-dec-cpu-thread-count")) { + cpu_thread_count = atoi(get_commandline_param("j2k-dec-cpu-thread-count")); + + // Confirm cpu_thread_count between MIN_CPU_THREAD_COUNT + 1 (0) + if (cpu_thread_count <= MIN_CPU_THREAD_COUNT) { + // Implementing this requires the creation of executor threads. + log_msg(LOG_LEVEL_INFO, "%s j2k-dec-cpu-thread-count must be 0 or higher. Setting to min allowed 0\n", + MOD_NAME); + cpu_thread_count = 0; + } + } + + if (get_commandline_param("j2k-dec-queue-len")) { + max_queue_size = atoi(get_commandline_param("j2k-dec-queue-len")); + } + + if (get_commandline_param("j2k-dec-img-limit")) { + cpu_img_limit = atoi(get_commandline_param("j2k-dec-img-limit")); + + // Confirm cpu_img_limit between MIN_CPU_IMG_LIMIT + if (cpu_img_limit < MIN_CPU_IMG_LIMIT) { + log_msg(LOG_LEVEL_INFO, "%s j2k-dec-img-limit below min allowed of %i. Setting to min allowed %i\n", + MOD_NAME, + MIN_CPU_IMG_LIMIT, + MIN_CPU_IMG_LIMIT); + cpu_img_limit = MIN_CPU_IMG_LIMIT; + } + } + + if (get_commandline_param("j2k-dec-encoder-queue")) { + max_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue")); + } + + const auto *version = cmpto_j2k_dec_get_version(); + log_msg(LOG_LEVEL_INFO, "%s Using codec version: %s\n", + MOD_NAME, + (version == nullptr ? "(unknown)" : version->name)); +} + + +/** + * @fn initialize_j2k_dec_ctx + * @brief Create cmpto_j2k_dec_ctx_cfg based on requested platform and command line arguments + * @return true if cmpto_j2k_dec_ctx_cfg successfully created + * @return false if unable to create cmpto_j2k_dec_ctx_cfg + */ +[[nodiscard]] +bool state_video_decompress_j2k::initialize_j2k_dec_ctx() { + struct cmpto_j2k_dec_ctx_cfg *dec_ctx_cfg; + CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&dec_ctx_cfg), "Error creating dec cfg", return false); + +#ifdef HAVE_CUDA + if (j2k_decompress_platform::CUDA == platform) { + log_msg(LOG_LEVEL_INFO, "%s Configuring for CUDA Decoding\n", MOD_NAME); + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(dec_ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), + "Error setting CUDA device", return false); + log_msg(LOG_LEVEL_INFO, "%s Using CUDA Device %s\n", MOD_NAME, std::to_string(cuda_devices[i]).c_str()); + } + } +#endif // HAVE_CUDA + if (j2k_decompress_platform::CPU == platform) { + log_msg(LOG_LEVEL_INFO, "%s Configuring for CPU Decoding\n", MOD_NAME); + + // Confirm that cpu_thread_count != 0 (unlimited). If it does, cpu_img_limit can exceed thread_count + if (cpu_thread_count != DEFAULT_THREAD_COUNT && cpu_img_limit > static_cast(cpu_thread_count)) { + log_msg(LOG_LEVEL_INFO, "%s j2k-dec-img-limit set to %i. Lowering to match to match j2k-dec-cpu-thread-count (%i)\n", + MOD_NAME, + cpu_img_limit, + cpu_thread_count); + cpu_img_limit = cpu_thread_count; + } + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( + dec_ctx_cfg, + cpu_thread_count, + cpu_mem_limit, + cpu_img_limit), + "Error configuring the CPU", + return false); + + log_msg(LOG_LEVEL_INFO, "%s Using %s threads on the CPU. Image Limit set to %i.\n", + MOD_NAME, + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_img_limit); + } + + CHECK_OK(cmpto_j2k_dec_ctx_create(dec_ctx_cfg, &this->decoder), + "Error initializing context", + return false); + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(dec_ctx_cfg), + "Destroy cfg", + NOOP); + + CHECK_OK(cmpto_j2k_dec_cfg_create(this->decoder, &this->settings), + "Error creating configuration", { + cmpto_j2k_dec_cfg_destroy(this->settings); + cmpto_j2k_dec_ctx_destroy(this->decoder); + return false; + }); + + auto ret = pthread_create(&this->thread_id, NULL, decompress_j2k_worker, static_cast(this)); + assert(ret == 0 && "Unable to create thread"); + + return true; +} -#define NOOP ((void) 0) static void rg48_to_r12l(unsigned char *dst_buffer, unsigned char *src_buffer, - unsigned int width, unsigned int height) -{ + unsigned int width, unsigned int height) { int src_pitch = vc_get_linesize(width, RG48); - int dst_len = vc_get_linesize(width, R12L); + int dst_len = vc_get_linesize(width, R12L); decoder_t vc_copylineRG48toR12L = get_decoder_from_to(RG48, R12L); - for(unsigned i = 0; i < height; i++){ + for (unsigned i = 0; i < height; i++) { vc_copylineRG48toR12L(dst_buffer, src_buffer, dst_len, 0, 0, 0); src_buffer += src_pitch; dst_buffer += dst_len; } } -static void print_dropped(unsigned long long int dropped) { +static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { if (dropped % 10 == 1) { - log_msg(LOG_LEVEL_WARNING, "[J2K dec] Some frames (%llu) dropped.\n", dropped); - log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "[J2K dec] You may try to increase " - "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n"); + log_msg(LOG_LEVEL_WARNING, "%s Some frames (%llu) dropped.\n", MOD_NAME, dropped); + + if (j2k_decompress_platform::CPU == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "image limit to increase the number of images decoded at one moment by adding parameter: --param j2k-dec-img-limit=#\n", + MOD_NAME); + } +#ifdef HAVE_CUDA + if (j2k_decompress_platform::CUDA == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n", + MOD_NAME); + } +#endif // HAVE_CUDA } } @@ -146,17 +413,15 @@ static void print_dropped(unsigned long long int dropped) { * This function just runs in thread and gets decompressed images from decoder * putting them to queue (or dropping if full). */ -static void *decompress_j2k_worker(void *args) -{ - struct state_decompress_j2k *s = - (struct state_decompress_j2k *) args; +static void *decompress_j2k_worker(void *args) { + auto *s = static_cast(args); while (true) { next_image: struct cmpto_j2k_dec_img *img; int decoded_img_status; CHECK_OK(cmpto_j2k_dec_ctx_get_decoded_img(s->decoder, 1, &img, &decoded_img_status), - "Decode image", goto next_image); + "Decode image", goto next_image); { lock_guard lk(s->lock); @@ -168,10 +433,10 @@ static void *decompress_j2k_worker(void *args) } if (decoded_img_status != CMPTO_J2K_DEC_IMG_OK) { - const char * decoding_error = ""; - CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", - decoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); + const char * decoding_error = ""; + CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", + decoding_error = "(failed)"); + log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); continue; } @@ -192,7 +457,7 @@ static void *decompress_j2k_worker(void *args) "Unable to to return processed image", NOOP); lock_guard lk(s->lock); while (s->decompressed_frames.size() >= s->max_queue_size) { - print_dropped(s->dropped++); + print_dropped(s->dropped++, s->platform); auto decoded = s->decompressed_frames.front(); s->decompressed_frames.pop(); free(decoded.first); @@ -203,97 +468,23 @@ static void *decompress_j2k_worker(void *args) return NULL; } -ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" - " J2K max memory usage in bytes.\n"); -ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" - " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); -ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-queue-len=\n" - " max queue len\n"); -ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-encoder-queue=\n" - " max number of frames held by encoder\n"); -static void * j2k_decompress_init(void) -{ - struct state_decompress_j2k *s = NULL; - long long int mem_limit = DEFAULT_MEM_LIMIT; - unsigned int tile_limit = DEFAULT_TILE_LIMIT; - unsigned int queue_len = DEFAULT_MAX_QUEUE_SIZE; - unsigned int encoder_in_frames = DEFAULT_MAX_IN_FRAMES; - int ret; - - if (get_commandline_param("j2k-dec-mem-limit")) { - mem_limit = unit_evaluate( - get_commandline_param("j2k-dec-mem-limit"), nullptr); - } - - if (get_commandline_param("j2k-dec-tile-limit")) { - tile_limit = atoi(get_commandline_param("j2k-dec-tile-limit")); - } - - if (get_commandline_param("j2k-dec-queue-len")) { - queue_len = atoi(get_commandline_param("j2k-dec-queue-len")); - } - - if (get_commandline_param("j2k-dec-encoder-queue")) { - encoder_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue")); - } - - const auto *version = cmpto_j2k_dec_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; - - s = new state_decompress_j2k(queue_len, encoder_in_frames); - - struct cmpto_j2k_dec_ctx_cfg *ctx_cfg; - CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", goto error); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], mem_limit, tile_limit), - "Error setting CUDA device", goto error); - } - - CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &s->decoder), "Error initializing context", - goto error); - - CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP); - CHECK_OK(cmpto_j2k_dec_cfg_create(s->decoder, &s->settings), "Error creating configuration", - goto error); - - ret = pthread_create(&s->thread_id, NULL, decompress_j2k_worker, (void *) s); - assert(ret == 0 && "Unable to create thread"); - - return s; - -error: - if (!s) { +/** + * @brief Initialize a new instance of state_video_decompress_j2k + * @return Null or Pointer to state_video_decompress_j2k + */ +static void * j2k_decompress_init(void) { + try { + auto *s = new state_video_decompress_j2k(); + return s; + } catch (...) { return NULL; } - if (s->settings) { - cmpto_j2k_dec_cfg_destroy(s->settings); - } - if (s->decoder) { - cmpto_j2k_dec_ctx_destroy(s->decoder); - } - delete s; - return NULL; } -static struct { - codec_t ug_codec; - enum cmpto_sample_format_type cmpto_sf; - void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); -} codecs[] = { - {UYVY, CMPTO_422_U8_P1020, nullptr}, - {v210, CMPTO_422_U10_V210, nullptr}, - {RGB, CMPTO_444_U8_P012, nullptr}, - {BGR, CMPTO_444_U8_P210, nullptr}, - {RGBA, CMPTO_444_U8_P012Z, nullptr}, - {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr}, - {R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l}, -}; - static int j2k_decompress_reconfigure(void *state, struct video_desc desc, - int rshift, int gshift, int bshift, int pitch, codec_t out_codec) -{ - struct state_decompress_j2k *s = (struct state_decompress_j2k *) state; + int rshift, int gshift, int bshift, int pitch, codec_t out_codec) { + auto *s = static_cast(state); if (out_codec == VIDEO_CODEC_NONE) { // probe format s->out_codec = VIDEO_CODEC_NONE; @@ -302,22 +493,22 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, } if (out_codec == R12L) { - LOG(LOG_LEVEL_NOTICE) << MOD_NAME << "Decoding to 12-bit RGB.\n"; + log_msg(LOG_LEVEL_NOTICE, "%s Decoding to 12-bit RGB.\n", MOD_NAME); } enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0; - for(const auto &codec : codecs){ - if(codec.ug_codec == out_codec){ - cmpto_sf = codec.cmpto_sf; - s->convert = codec.convert; - break; - } + auto matches = [&](const Codec& codec) { return codec.ug_codec == out_codec; }; + + if (const auto& codec = std::find_if(codecs.begin(), codecs.end(), matches) ; codec != codecs.end()) { + cmpto_sf = codec->cmpto_sf; + s->convert = codec->convert; } if (!cmpto_sf) { - LOG(LOG_LEVEL_ERROR) << MOD_NAME << "Unsupported output codec: " << - get_codec_name(out_codec) << "\n"; + log_msg(LOG_LEVEL_ERROR, "%s Unsupported output codec: %s", + MOD_NAME, + get_codec_name(out_codec)); abort(); } @@ -326,8 +517,8 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, "Error setting sample format type", return false); } else { // RGBA with non-standard shift if (rshift % 8 != 0 || gshift % 8 != 0 || bshift % 8 != 0) { - LOG(LOG_LEVEL_ERROR) << MOD_NAME << "Component shifts not aligned to a " - "byte boundary is not supported.\n"; + log_msg(LOG_LEVEL_ERROR, "%s Component shifts not aligned to a byte boundary is not supported.\n", + MOD_NAME); return false; } cmpto_j2k_dec_comp_format fmt[3] = {}; @@ -360,8 +551,7 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, /** * Callback called by the codec when codestream is no longer required. */ -static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) -{ +static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) { (void) custom_data; (void) custom_data_size; (void) codestream_size; free(const_cast(codestream)); } @@ -371,7 +561,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha struct cmpto_j2k_dec_comp_info comp_info[3]; if (cmpto_j2k_dec_cstream_get_img_info(buffer, len, &info) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 0, &comp_info[0]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get image or first component info.\n"); + log_msg(LOG_LEVEL_ERROR, "%s Failed to get image or first component info.\n", MOD_NAME); return DECODER_NO_FRAME; } @@ -380,7 +570,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha if (info.comp_count == 3) { if (cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 1, &comp_info[1]) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 2, &comp_info[2]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get componentt 1 or 2 info.\n"); + log_msg(LOG_LEVEL_ERROR, "%s Failed to get component 1 or 2 info.\n", MOD_NAME); return DECODER_NO_FRAME; } if (comp_info[0].sampling_factor_x == 1 && comp_info[0].sampling_factor_y == 1 && @@ -395,7 +585,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha } int msg_level = internal_prop->subsampling == 0 ? LOG_LEVEL_WARNING /* bogus? */ : LOG_LEVEL_VERBOSE; - log_msg(msg_level, "J2K stream properties: %s\n", get_pixdesc_desc(*internal_prop)); + log_msg(msg_level, "%s stream properties: %s\n", MOD_NAME, get_pixdesc_desc(*internal_prop)); return DECODER_GOT_CODEC; } @@ -406,12 +596,10 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha * it just returns false. */ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigned char *buffer, - unsigned int src_len, int /* frame_seq */, struct video_frame_callbacks * /* callbacks */, struct pixfmt_desc *internal_prop) -{ - struct state_decompress_j2k *s = - (struct state_decompress_j2k *) state; + unsigned int src_len, int /* frame_seq */, struct video_frame_callbacks * /* callbacks */, struct pixfmt_desc *internal_prop) { + auto *s = static_cast(state); struct cmpto_j2k_dec_img *img; - pair decoded; + std::pair decoded; void *tmp; if (s->out_codec == VIDEO_CODEC_NONE) { @@ -419,7 +607,7 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne } if (s->in_frames >= s->max_in_frames + 1) { - print_dropped(s->dropped++); + print_dropped(s->dropped++, s->platform); goto return_previous; } @@ -439,7 +627,7 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne } return_previous: - unique_lock lk(s->lock); + std::unique_lock lk(s->lock); if (s->decompressed_frames.size() == 0) { return DECODER_NO_FRAME; } @@ -450,11 +638,14 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne size_t linesize = vc_get_linesize(s->desc.width, s->out_codec); size_t frame_size = linesize * s->desc.height; if ((decoded.second + 3) / 4 * 4 != frame_size) { // for "RGBA with non-standard shift" (search) it would be (frame_size - 1) - LOG(LOG_LEVEL_WARNING) << MOD_NAME << "Incorrect decoded size (" << frame_size << " vs. " << decoded.second << ")\n"; + log_msg(LOG_LEVEL_WARNING, "%s Incorrect decoded size (%lu vs. %lu)\n", + MOD_NAME, + frame_size, + decoded.second); } for (size_t i = 0; i < s->desc.height; ++i) { - memcpy(dst + i * s->pitch, decoded.first + i * linesize, min(linesize, decoded.second - min(decoded.second, i * linesize))); + memcpy(dst + i * s->pitch, decoded.first + i * linesize, std::min(linesize, decoded.second - std::min(decoded.second, i * linesize))); } free(decoded.first); @@ -462,14 +653,13 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne return DECODER_GOT_FRAME; } -static int j2k_decompress_get_property(void *state, int property, void *val, size_t *len) -{ +static int j2k_decompress_get_property(void *state, int property, void *val, size_t *len) { UNUSED(state); int ret = false; - switch(property) { + switch (property) { case DECOMPRESS_PROPERTY_ACCEPTS_CORRUPTED_FRAME: - if(*len >= sizeof(int)) { + if (*len >= sizeof(int)) { *(int *) val = false; *len = sizeof(int); ret = true; @@ -482,13 +672,12 @@ static int j2k_decompress_get_property(void *state, int property, void *val, siz return ret; } -static void j2k_decompress_done(void *state) -{ - struct state_decompress_j2k *s = (struct state_decompress_j2k *) state; +static void j2k_decompress_done(void *state) { + auto *s = static_cast(state); cmpto_j2k_dec_ctx_stop(s->decoder); pthread_join(s->thread_id, NULL); - log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n"); + log_msg(LOG_LEVEL_VERBOSE, "%s Decoder stopped.\n", MOD_NAME); cmpto_j2k_dec_cfg_destroy(s->settings); cmpto_j2k_dec_ctx_destroy(s->decoder); @@ -519,7 +708,7 @@ static int j2k_decompress_get_priority(codec_t compression, struct pixfmt_desc i break; default: return -1; - }; + } if (ugc == VIDEO_CODEC_NONE) { return 50; // probe } @@ -539,4 +728,3 @@ static const struct video_decompress_info j2k_decompress_info = { }; REGISTER_MODULE(j2k, &j2k_decompress_info, LIBRARY_CLASS_VIDEO_DECOMPRESS, VIDEO_DECOMPRESS_ABI_VERSION); - From 4fb6d5f13a52ad58bc423893562355de48f33405 Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Mon, 27 May 2024 12:35:07 +0200 Subject: [PATCH 02/25] reduce merge request diff --- src/video_compress/cmpto_j2k.cpp | 616 ++++++++++++++--------------- src/video_decompress/cmpto_j2k.cpp | 420 ++++++++++---------- 2 files changed, 508 insertions(+), 528 deletions(-) mode change 100755 => 100644 src/video_compress/cmpto_j2k.cpp mode change 100755 => 100644 src/video_decompress/cmpto_j2k.cpp diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp old mode 100755 new mode 100644 index ffc212ccb1..d2df657603 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -48,8 +48,6 @@ #include "config.h" #endif // HAVE_CONFIG_H -#include - #include #include #include @@ -61,6 +59,8 @@ #include #include +#include + #ifdef HAVE_CUDA #include "cuda_wrapper.h" #endif // HAVE_CUDA @@ -68,7 +68,6 @@ #include "host.h" #include "lib_common.h" #include "module.h" -#include "utils/string.h" // replace_all #include "tv.h" #include "utils/color_out.h" #include "utils/misc.h" @@ -76,29 +75,16 @@ #include "video.h" #include "video_compress.h" -constexpr const char *MOD_NAME = "[Cmpto J2K enc.]"; - -#define ASSIGN_CHECK_VAL(var, str, minval) \ - do { \ - long long val = unit_evaluate(str, nullptr); \ - if (val < (minval) || val > UINT_MAX) { \ - LOG(LOG_LEVEL_ERROR) \ - << MOD_NAME << " Wrong value " << (str) \ - << " for " #var "! Value must be >= " << (minval) \ - << ".\n"; \ - throw InvalidArgument(); \ - } \ - (var) = val; \ - } while (0) +#define MOD_NAME "[Cmpto J2K enc.] " #define CHECK_OK(cmd, err_msg, action_fail) do { \ int j2k_error = cmd; \ if (j2k_error != CMPTO_OK) {\ - log_msg(LOG_LEVEL_ERROR, "%s %s: %s\n", \ - MOD_NAME, err_msg, cmpto_j2k_enc_get_last_error()); \ + log_msg(LOG_LEVEL_ERROR, "[J2K enc.] %s: %s\n", \ + err_msg, cmpto_j2k_enc_get_last_error()); \ action_fail;\ } \ -} while (0) +} while(0) #define NOOP ((void) 0) @@ -112,28 +98,34 @@ constexpr const char *MOD_NAME = "[Cmpto J2K enc.]"; // Default CUDA Settings #define DEFAULT_CUDA_POOL_SIZE 4 +/// number of frames that encoder encodes at moment #define DEFAULT_CUDA_TILE_LIMIT 1 -#define DEFAULT_CUDA_MEM_LIMIT 1000000000ULL +#define DEFAULT_CUDA_MEM_LIMIT 1000000000ULLU // Default General Settings #define DEFAULT_QUALITY 0.7 +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames #ifdef HAVE_CUDA #define DEFAULT_POOL_SIZE DEFAULT_CUDA_POOL_SIZE #else #define DEFAULT_POOL_SIZE DEFAULT_CPU_POOL_SIZE #endif +using std::condition_variable; using std::mutex; using std::shared_ptr; +using std::unique_lock; #ifdef HAVE_CUDA struct cmpto_j2k_enc_cuda_host_buffer_data_allocator : public video_frame_pool_allocator { - void *allocate(size_t size) override { + void *allocate(size_t size) override + { void *ptr = nullptr; if (CUDA_WRAPPER_SUCCESS != cuda_wrapper_malloc_host(&ptr, size)) { - log_msg(LOG_LEVEL_ERROR, "Cannot allocate host buffer: %s\n", + MSG(ERROR, "Cannot allocate host buffer: %s\n", cuda_wrapper_last_error_string()); return nullptr; } @@ -142,7 +134,8 @@ struct cmpto_j2k_enc_cuda_host_buffer_data_allocator void deallocate(void *ptr) override { cuda_wrapper_free(ptr); } - [[nodiscard]] video_frame_pool_allocator *clone() const override { + [[nodiscard]] video_frame_pool_allocator *clone() const override + { return new cmpto_j2k_enc_cuda_host_buffer_data_allocator(*this); } }; @@ -153,11 +146,6 @@ using allocator = default_data_allocator; #endif using cpu_allocator = default_data_allocator; -// Pre Declarations -static void j2k_compressed_frame_dispose(struct video_frame *frame); -static void j2k_compress_done(struct module *mod); -static void R12L_to_RG48(video_frame *dst, video_frame *src); - /** * @brief Platforms available for J2K Compression */ @@ -205,26 +193,6 @@ static j2k_compress_platform get_platform_from_name(std::string name) { return j2k_compress_platform::NONE; } -/** - * @brief Struct to hold UG and CMPTO Codec information - */ -struct Codec { - codec_t ug_codec; - enum cmpto_sample_format_type cmpto_sf; - codec_t convert_codec; - void (*convertFunc)(video_frame *dst, video_frame *src); -}; - -// Supported UG/CMPTO Compress Codecs -constexpr auto codecs = std::array{ - Codec{UYVY, CMPTO_422_U8_P1020, VIDEO_CODEC_NONE, nullptr}, - Codec{v210, CMPTO_422_U10_V210, VIDEO_CODEC_NONE, nullptr}, - Codec{RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, - Codec{RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, - Codec{R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, - Codec{R12L, CMPTO_444_U12_MSB16LE_P012, RG48, R12L_to_RG48}, -}; - /** * Exceptions for state_video_compress_j2k construction */ @@ -244,6 +212,249 @@ struct UnableToCreateJ2KEncoderCTX : public std::exception { UnableToCreateJ2KEncoderCTX() = default; }; + +/** + * @brief state_video_compress_j2k Class + */ +struct state_video_compress_j2k { + state_video_compress_j2k(struct module *parent, const char* opts); + + module module_data{}; + struct cmpto_j2k_enc_ctx *context{}; + struct cmpto_j2k_enc_cfg *enc_settings{}; + std::unique_ptr pool; + unsigned int in_frames{}; ///< number of currently encoding frames + mutex lock; + condition_variable frame_popped; + video_desc saved_desc{}; ///< for pool reconfiguration + video_desc precompress_desc{}; + video_desc compressed_desc{}; + + void (*convertFunc)(video_frame *dst, video_frame *src) { nullptr }; + + // Generic Parameters + double quality = DEFAULT_QUALITY; // default image quality + long long int rate = 0; // bitrate in bits per second + int mct = -1; // force use of mct - -1 means default + bool lossless = false; // lossless encoding + + // CPU Parameters + int cpu_thread_count = DEFAULT_CPU_THREAD_COUNT; + unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; + + // CUDA Parameters + unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // Platform to use by default +#ifdef HAVE_CUDA + j2k_compress_platform platform = j2k_compress_platform::CUDA; + unsigned int max_in_frames = DEFAULT_CUDA_POOL_SIZE; ///< max number of frames between push and pop +#else + j2k_compress_platform platform = j2k_compress_platform::CPU; + unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop +#endif + + private: + void parse_fmt(const char* opts); + bool initialize_j2k_enc_ctx(); + + // CPU Parameter + const size_t cpu_mem_limit = 0; // Not yet implemented as of v2.8.1. Must be 0. +}; + +static void j2k_compressed_frame_dispose(struct video_frame *frame); +static void j2k_compress_done(struct module *mod); + +/** + * @brief state_video_compress_j2k constructor to create from opts + * @param parent Base Module Struct + * @param opts Configuration options to construct class + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + * @throw UnableToCreateJ2KEncoderCTX if failure to create J2K CTX +*/ +state_video_compress_j2k::state_video_compress_j2k(struct module *parent, const char* opts) { + try { + parse_fmt(opts); + } catch (...) { + throw; + } + + if (!initialize_j2k_enc_ctx()) { + throw UnableToCreateJ2KEncoderCTX(); + } + + module_init_default(&module_data); + module_data.cls = MODULE_CLASS_DATA; + module_data.priv_data = this; + module_data.deleter = j2k_compress_done; + module_register(&module_data, parent); +} + +static void R12L_to_RG48(video_frame *dst, video_frame *src){ + int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); + int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); + + unsigned char *s = (unsigned char *) src->tiles[0].data; + unsigned char *d = (unsigned char *) dst->tiles[0].data; + decoder_t vc_copylineR12LtoRG48 = get_decoder_from_to(R12L, RG48); + + for(unsigned i = 0; i < src->tiles[0].height; i++){ + vc_copylineR12LtoRG48(d, s, dst_pitch, 0, 0, 0); + s += src_pitch; + d += dst_pitch; + } +} + +static struct { + codec_t ug_codec; + enum cmpto_sample_format_type cmpto_sf; + codec_t convert_codec; + void (*convertFunc)(video_frame *dst, video_frame *src); +} codecs[] = { + {UYVY, CMPTO_422_U8_P1020, VIDEO_CODEC_NONE, nullptr}, + {v210, CMPTO_422_U10_V210, VIDEO_CODEC_NONE, nullptr}, + {RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, + {RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, + {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, + {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, R12L_to_RG48}, +}; + +static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc){ + enum cmpto_sample_format_type sample_format; + bool found = false; + + for(const auto &codec : codecs){ + if(codec.ug_codec == desc.color_spec){ + sample_format = codec.cmpto_sf; + s->convertFunc = codec.convertFunc; + s->precompress_desc = desc; + if(codec.convert_codec != VIDEO_CODEC_NONE){ + s->precompress_desc.color_spec = codec.convert_codec; + } + found = true; + break; + } + } + + if(!found){ + log_msg(LOG_LEVEL_ERROR, "[J2K] Failed to find suitable pixel format\n"); + return false; + } + + CHECK_OK(cmpto_j2k_enc_cfg_set_samples_format_type(s->enc_settings, sample_format), + "Setting sample format", return false); + CHECK_OK(cmpto_j2k_enc_cfg_set_size(s->enc_settings, desc.width, desc.height), + "Setting image size", return false); + if (s->rate) { + CHECK_OK(cmpto_j2k_enc_cfg_set_rate_limit(s->enc_settings, + CMPTO_J2K_ENC_COMP_MASK_ALL, + CMPTO_J2K_ENC_RES_MASK_ALL, s->rate / 8 / desc.fps), + "Setting rate limit", + NOOP); + } + + int mct = s->mct; + if (mct == -1) { + mct = codec_is_a_rgb(desc.color_spec) ? 1 : 0; + } + CHECK_OK(cmpto_j2k_enc_cfg_set_mct(s->enc_settings, mct), + "Setting MCT", + NOOP); + + s->compressed_desc = desc; + s->compressed_desc.color_spec = codec_is_a_rgb(desc.color_spec) ? J2KR : J2K; + s->compressed_desc.tile_count = 1; + + s->saved_desc = desc; + + return true; +} + +static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ + std::shared_ptr ret = s->pool->get_frame(); + + if (s->convertFunc) { + s->convertFunc(ret.get(), frame); + } else { + memcpy(ret->tiles[0].data, frame->tiles[0].data, frame->tiles[0].data_len); + } + + return ret; +} + +/// auxilliary data structure passed with encoded frame +struct custom_data { + custom_data() = delete; + custom_data(custom_data &b) = delete; + custom_data &operator=(const custom_data &) = delete; + ~custom_data() = delete; + shared_ptr frame; + video_desc desc; + // metadata stored separately, frame may have already been deallocated + // by our release_cstream callback + char metadata[VF_METADATA_SIZE]; +}; + +/** + * @fn j2k_compress_pop + * @note + * Do not return empty frame in case of error - that would be interpreted + * as a poison pill (see below) and would stop the further processing + * pipeline. Because of that goto + start label is used. + */ +#define HANDLE_ERROR_COMPRESS_POP do { cmpto_j2k_enc_img_destroy(img); goto start; } while (0) +static std::shared_ptr j2k_compress_pop(struct module *state) +{ +start: + struct state_video_compress_j2k *s = + (struct state_video_compress_j2k *) state; + + struct cmpto_j2k_enc_img *img; + int status; + CHECK_OK(cmpto_j2k_enc_ctx_get_encoded_img( + s->context, + 1, + &img /* Set to NULL if encoder stopped */, + &status), "Encode image", HANDLE_ERROR_COMPRESS_POP); + { + unique_lock lk(s->lock); + s->in_frames--; + s->frame_popped.notify_one(); + } + if (!img) { + // this happens cmpto_j2k_enc_ctx_stop() is called + // pass poison pill further + return {}; + } + if (status != CMPTO_J2K_ENC_IMG_OK) { + const char * encoding_error = ""; + CHECK_OK(cmpto_j2k_enc_img_get_error(img, &encoding_error), "get error status", + encoding_error = "(failed)"); + log_msg(LOG_LEVEL_ERROR, "Image encoding failed: %s\n", encoding_error); + goto start; + } + struct custom_data *udata = nullptr; + size_t len; + CHECK_OK(cmpto_j2k_enc_img_get_custom_data(img, (void **) &udata, &len), + "get custom data", HANDLE_ERROR_COMPRESS_POP); + size_t size; + void * ptr; + CHECK_OK(cmpto_j2k_enc_img_get_cstream(img, &ptr, &size), + "get cstream", HANDLE_ERROR_COMPRESS_POP); + + struct video_frame *out = vf_alloc_desc(udata->desc); + vf_restore_metadata(out, udata->metadata); + out->tiles[0].data_len = size; + out->tiles[0].data = (char *) malloc(size); + memcpy(out->tiles[0].data, ptr, size); + CHECK_OK(cmpto_j2k_enc_img_destroy(img), "Destroy image", NOOP); + out->callbacks.dispose = j2k_compressed_frame_dispose; + out->compress_end = get_time_in_ns(); + return shared_ptr(out, out->callbacks.dispose); +} + /// @brief Struct for options for J2K Compression Usage struct opts { const char *label; @@ -253,6 +464,18 @@ struct opts { const bool is_boolean; }; +constexpr opts general_opts[5] = { + {"Bitrate", "quality", "Target bitrate", ":rate=", false}, + {"Quality", "quant_coeff", "Quality in range [0-1], default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false}, +#ifdef HAVE_CUDA + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than tile_limit. default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false}, +#else + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than img_limit. default: " TOSTRING(DEFAULT_POOL_SIZE) , ":pool_size=", false}, +#endif + {"Use MCT", "mct", "Use MCT", ":mct", true}, + {"Lossless compression", "lossless", "Enable lossless compression. default: disabled", ":lossless", true} +}; + #ifdef HAVE_CUDA constexpr opts cuda_opts[2] = { {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_CUDA_MEM_LIMIT), ":mem_limit=", false}, @@ -268,18 +491,6 @@ constexpr opts cpu_opts[2] = { {"Image limit", "img_limit", "Number of images which can be encoded at one moment by CPU. Maximum allowed limit is thread_count. 0 is default limit. default: " TOSTRING(DEFAULT_IMG_LIMIT), ":img_limit=", false}, }; -constexpr opts general_opts[5] = { - {"Bitrate", "quality", "Target bitrate", ":rate=", false}, - {"Quality", "quant_coeff", "Quality in range [0-1]. default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false}, -#ifdef HAVE_CUDA - {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than tile_limit or img_limit. default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false}, -#else - {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than img_limit. default: " TOSTRING(DEFAULT_POOL_SIZE) , ":pool_size=", false}, -#endif - {"Use MCT", "mct", "Use MCT", ":mct", true}, - {"Lossless compression", "lossless", "Enable lossless compression. default: disabled", ":lossless", true} -}; - /** * @fn usage * @brief Display J2K Compression Usage Information @@ -344,97 +555,18 @@ static void usage() { show_arguments(general_opts); } - -/** - * @brief state_video_compress_j2k Class - */ -struct state_video_compress_j2k { - explicit state_video_compress_j2k(struct module *parent); - state_video_compress_j2k(struct module *parent, const char* opts); - - module module_data{}; - struct cmpto_j2k_enc_ctx *context{}; - struct cmpto_j2k_enc_cfg *enc_settings{}; - std::unique_ptr pool; - unsigned int in_frames{}; ///< number of currently encoding frames - mutex lock; - std::condition_variable frame_popped; - video_desc saved_desc{}; ///< for pool reconfiguration - video_desc precompress_desc{}; - video_desc compressed_desc{}; - - void (*convertFunc)(video_frame *dst, video_frame *src) { nullptr }; - - // Generic Parameters - double quality = DEFAULT_QUALITY; // default image quality - long long int rate = 0; // bitrate in bits per second - int mct = -1; // force use of mct - -1 means default - bool lossless = false; // lossless encoding - - // CPU Parameters - int cpu_thread_count = DEFAULT_CPU_THREAD_COUNT; - unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; - - // CUDA Parameters - unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; - unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; - - // Platform to use by default -#ifdef HAVE_CUDA - j2k_compress_platform platform = j2k_compress_platform::CUDA; - unsigned int max_in_frames = DEFAULT_CUDA_POOL_SIZE; ///< max number of frames between push and pop -#else - j2k_compress_platform platform = j2k_compress_platform::CPU; - unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop -#endif - - private: - void parse_fmt(const char* opts); - bool initialize_j2k_enc_ctx(); - - // CPU Parameter - const size_t cpu_mem_limit = 0; // Not yet implemented as of v2.8.1. Must be 0. -}; - - -/** - * @brief state_video_compress_j2k default constructor to create from module - * @param parent Base Module Struct -*/ -state_video_compress_j2k::state_video_compress_j2k(struct module *parent) - : pool(std::make_unique(DEFAULT_POOL_SIZE, allocator())) { - module_init_default(&module_data); - module_data.cls = MODULE_CLASS_DATA; - module_data.priv_data = this; - module_data.deleter = j2k_compress_done; - module_register(&module_data, parent); -} - -/** - * @brief state_video_compress_j2k constructor to create from opts - * @param parent Base Module Struct - * @param opts Configuration options to construct class - * @throw HelpRequested if help requested - * @throw InvalidArgument if argument provided isn't known - * @throw UnableToCreateJ2KEncoderCTX if failure to create J2K CTX -*/ -state_video_compress_j2k::state_video_compress_j2k(struct module *parent, const char* opts) { - try { - parse_fmt(opts); - } catch (...) { - throw; - } - - if (!initialize_j2k_enc_ctx()) { - throw UnableToCreateJ2KEncoderCTX(); - } - - module_init_default(&module_data); - module_data.cls = MODULE_CLASS_DATA; - module_data.priv_data = this; - module_data.deleter = j2k_compress_done; - module_register(&module_data, parent); -} +#define ASSIGN_CHECK_VAL(var, str, minval) \ + do { \ + long long val = unit_evaluate(str, nullptr); \ + if (val < (minval) || val > UINT_MAX) { \ + LOG(LOG_LEVEL_ERROR) \ + << "[J2K] Wrong value " << (str) \ + << " for " #var "! Value must be >= " << (minval) \ + << ".\n"; \ + throw InvalidArgument(); \ + } \ + (var) = val; \ + } while (0) /// CUDA opt Syntax // -c cmpto_j2k:platform=cuda[:mem_limit=][:tile_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] [--cuda-device ] @@ -475,9 +607,7 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { } const auto *version = cmpto_j2k_enc_get_version(); - log_msg(LOG_LEVEL_INFO, "%s Using Codec version: %s\n", - MOD_NAME, - (version == nullptr ? "(unknown)" : version->name)); + LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; const char* item = ""; @@ -595,8 +725,7 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { bool state_video_compress_j2k::initialize_j2k_enc_ctx() { struct cmpto_j2k_enc_ctx_cfg *ctx_cfg; - CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), - "Context configuration create", + CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), "Context configuration create", return false); if (j2k_compress_platform::CPU == platform) { @@ -634,12 +763,10 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { } #endif // HAVE_CUDA - CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &context), - "Context create", + CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &context), "Context create", return false); - CHECK_OK(cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg), - "Context configuration destroy", + CHECK_OK(cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg), "Context configuration destroy", NOOP); CHECK_OK(cmpto_j2k_enc_cfg_create( @@ -668,152 +795,6 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { return true; } -static void R12L_to_RG48(video_frame *dst, video_frame *src) { - int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); - int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); - - unsigned char *s = (unsigned char *) src->tiles[0].data; - unsigned char *d = (unsigned char *) dst->tiles[0].data; - decoder_t vc_copylineR12LtoRG48 = get_decoder_from_to(R12L, RG48); - - for (unsigned i = 0; i < src->tiles[0].height; i++) { - vc_copylineR12LtoRG48(d, s, dst_pitch, 0, 0, 0); - s += src_pitch; - d += dst_pitch; - } -} - -static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc) { - enum cmpto_sample_format_type sample_format; - bool found = false; - auto matches = [&](const Codec& codec) { return codec.ug_codec == desc.color_spec; }; - - if (const auto& codec = std::find_if(codecs.begin(), codecs.end(), matches) ; codec != codecs.end()) { - sample_format = codec->cmpto_sf; - s->convertFunc = codec->convertFunc; - s->precompress_desc = desc; - if (codec->convert_codec != VIDEO_CODEC_NONE) { - s->precompress_desc.color_spec = codec->convert_codec; - } - found = true; - } - - if (!found) { - log_msg(LOG_LEVEL_ERROR, "%s Failed to find suitable pixel format\n", MOD_NAME); - return false; - } - - CHECK_OK(cmpto_j2k_enc_cfg_set_samples_format_type(s->enc_settings, sample_format), - "Setting sample format", return false); - CHECK_OK(cmpto_j2k_enc_cfg_set_size(s->enc_settings, desc.width, desc.height), - "Setting image size", return false); - if (s->rate) { - CHECK_OK(cmpto_j2k_enc_cfg_set_rate_limit(s->enc_settings, - CMPTO_J2K_ENC_COMP_MASK_ALL, - CMPTO_J2K_ENC_RES_MASK_ALL, s->rate / 8 / desc.fps), - "Setting rate limit", - NOOP); - } - - int mct = s->mct; - if (mct == -1) { - mct = codec_is_a_rgb(desc.color_spec) ? 1 : 0; - } - CHECK_OK(cmpto_j2k_enc_cfg_set_mct(s->enc_settings, mct), - "Setting MCT", - NOOP); - - s->compressed_desc = desc; - s->compressed_desc.color_spec = codec_is_a_rgb(desc.color_spec) ? J2KR : J2K; - s->compressed_desc.tile_count = 1; - - s->saved_desc = desc; - - return true; -} - -static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame) { - std::shared_ptr ret = s->pool->get_frame(); - - if (s->convertFunc) { - s->convertFunc(ret.get(), frame); - } else { - memcpy(ret->tiles[0].data, frame->tiles[0].data, frame->tiles[0].data_len); - } - - return ret; -} - -/// auxilliary data structure passed with encoded frame -struct custom_data { - custom_data() = delete; - custom_data(custom_data &b) = delete; - custom_data &operator=(const custom_data &) = delete; - ~custom_data() = delete; - shared_ptr frame; - video_desc desc; - // metadata stored separately, frame may have already been deallocated - // by our release_cstream callback - char metadata[VF_METADATA_SIZE]; -}; - -/** - * @fn j2k_compress_pop - * @note - * Do not return empty frame in case of error - that would be interpreted - * as a poison pill (see below) and would stop the further processing - * pipeline. Because of that goto + start label is used. - */ -#define HANDLE_ERROR_COMPRESS_POP do { cmpto_j2k_enc_img_destroy(img); goto start; } while (0) -static std::shared_ptr j2k_compress_pop(struct module *state) { -start: - struct state_video_compress_j2k *s = - (struct state_video_compress_j2k *) state; - - struct cmpto_j2k_enc_img *img; - int status; - CHECK_OK(cmpto_j2k_enc_ctx_get_encoded_img( - s->context, - 1, - &img /* Set to NULL if encoder stopped */, - &status), "Encode image", HANDLE_ERROR_COMPRESS_POP); - { - std::unique_lock lk(s->lock); - s->in_frames--; - s->frame_popped.notify_one(); - } - if (!img) { - // this happens cmpto_j2k_enc_ctx_stop() is called - // pass poison pill further - return {}; - } - if (status != CMPTO_J2K_ENC_IMG_OK) { - const char * encoding_error = ""; - CHECK_OK(cmpto_j2k_enc_img_get_error(img, &encoding_error), "get error status", - encoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image encoding failed: %s\n", encoding_error); - goto start; - } - struct custom_data *udata = nullptr; - size_t len; - CHECK_OK(cmpto_j2k_enc_img_get_custom_data(img, (void **) &udata, &len), - "get custom data", HANDLE_ERROR_COMPRESS_POP); - size_t size; - void * ptr; - CHECK_OK(cmpto_j2k_enc_img_get_cstream(img, &ptr, &size), - "get cstream", HANDLE_ERROR_COMPRESS_POP); - - struct video_frame *out = vf_alloc_desc(udata->desc); - vf_restore_metadata(out, udata->metadata); - out->tiles[0].data_len = size; - out->tiles[0].data = (char *) malloc(size); - memcpy(out->tiles[0].data, ptr, size); - CHECK_OK(cmpto_j2k_enc_img_destroy(img), "Destroy image", NOOP); - out->callbacks.dispose = j2k_compressed_frame_dispose; - out->compress_end = get_time_in_ns(); - return shared_ptr(out, out->callbacks.dispose); -} - static struct module * j2k_compress_init(struct module *parent, const char *opts) { try { auto *s = new state_video_compress_j2k(parent, opts); @@ -829,19 +810,22 @@ static struct module * j2k_compress_init(struct module *parent, const char *opts } } -static void j2k_compressed_frame_dispose(struct video_frame *frame) { +static void j2k_compressed_frame_dispose(struct video_frame *frame) +{ free(frame->tiles[0].data); vf_free(frame); } -static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) { +static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) +{ (void) codestream; (void) custom_data_size; (void) codestream_size; auto *udata = static_cast(custom_data); udata->frame.~shared_ptr(); } #define HANDLE_ERROR_COMPRESS_PUSH if (img) cmpto_j2k_enc_img_destroy(img); return -static void j2k_compress_push(struct module *state, std::shared_ptr tx) { +static void j2k_compress_push(struct module *state, std::shared_ptr tx) +{ struct state_video_compress_j2k *s = (struct state_video_compress_j2k *) state; struct cmpto_j2k_enc_img *img = NULL; @@ -867,7 +851,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr CHECK_OK(cmpto_j2k_enc_img_create(s->context, &img), "Image create", return); - /** + /* * Copy video desc to udata (to be able to reconstruct in j2k_compress_pop(). * Further make a place for a shared pointer of allocated data, deleter * returns frame to pool in call of release_cstream() callback (called when @@ -888,7 +872,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr release_cstream), "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); - std::unique_lock lk(s->lock); + unique_lock lk(s->lock); s->frame_popped.wait(lk, [s]{return s->in_frames < s->max_in_frames;}); lk.unlock(); CHECK_OK(cmpto_j2k_enc_img_encode(img, s->enc_settings), @@ -899,7 +883,8 @@ static void j2k_compress_push(struct module *state, std::shared_ptr } -static void j2k_compress_done(struct module *mod) { +static void j2k_compress_done(struct module *mod) +{ struct state_video_compress_j2k *s = (struct state_video_compress_j2k *) mod->priv_data; @@ -909,7 +894,7 @@ static void j2k_compress_done(struct module *mod) { delete s; } -static compress_module_info get_cmpto_j2k_module_info() { +static compress_module_info get_cmpto_j2k_module_info(){ compress_module_info module_info; module_info.name = "cmpto_j2k"; @@ -927,7 +912,7 @@ static compress_module_info get_cmpto_j2k_module_info() { add_module_options(general_opts); codec codec_info; - codec_info.name = "Comprimato jpeg2000"; + codec_info.name = "Comprimato jpeg2000"; codec_info.priority = 400; codec_info.encoders.emplace_back(encoder{"default", ""}); @@ -949,3 +934,4 @@ static struct video_compress_info j2k_compress_info = { }; REGISTER_MODULE(cmpto_j2k, &j2k_compress_info, LIBRARY_CLASS_VIDEO_COMPRESS, VIDEO_COMPRESS_ABI_VERSION); + diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp old mode 100755 new mode 100644 index 9bd2c51f0c..2183906988 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -79,15 +79,6 @@ constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; -#define CHECK_OK(cmd, err_msg, action_fail) do { \ - int j2k_error = cmd; \ - if (j2k_error != CMPTO_OK) {\ - log_msg(LOG_LEVEL_ERROR, "%s %s: %s\n", \ - MOD_NAME, err_msg, cmpto_j2k_dec_get_last_error()); \ - action_fail;\ - } \ -} while (0) - #define NOOP ((void) 0) // General Parameter Defaults @@ -108,13 +99,16 @@ constexpr int DEFAULT_CUDA_TILE_LIMIT = 2; #endif // HAVE_CUDA using std::lock_guard; +using std::min; using std::mutex; +using std::pair; +using std::queue; +using std::unique_lock; /* * Function Predeclarations - */ + */ static void *decompress_j2k_worker(void *args); -static void rg48_to_r12l(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); /* * Platform to use for J2K Decompression @@ -127,67 +121,16 @@ enum j2k_decompress_platform { #endif // HAVE_CUDA }; -/** - * @brief Struct to hold UG and CMPTO Codec information - */ -struct Codec { - codec_t ug_codec; - enum cmpto_sample_format_type cmpto_sf; - void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); -}; - -// Supported UG/CMPTO Decompress Codecs -constexpr auto codecs = std::array{ - Codec{UYVY, CMPTO_422_U8_P1020, nullptr}, - Codec{v210, CMPTO_422_U10_V210, nullptr}, - Codec{RGB, CMPTO_444_U8_P012, nullptr}, - Codec{BGR, CMPTO_444_U8_P210, nullptr}, - Codec{RGBA, CMPTO_444_U8_P012Z, nullptr}, - Codec{R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr}, - Codec{R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l}, -}; - /* - * Exceptions for state_video_decompress_j2k construction + * Exceptions for state_decompress_j2k construction */ /// @brief UnableToCreateJ2KDecoderCTX Exception struct UnableToCreateJ2KDecoderCTX : public std::exception { UnableToCreateJ2KDecoderCTX() = default; }; -/* - * Command Line Parameters for state_video_decompress_j2k - */ -#ifdef HAVE_CUDA -// CUDA-specific Command Line Parameters -ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" - " use CUDA to decode images\n"); -ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" - " J2K max memory usage in bytes.\n"); -ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" - " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); -// Option to use CPU for image decompression only required if CUDA is also compiled. -// Otherwise, CPU will be the default, with no need to explicity specify. -ADD_TO_PARAM("j2k-dec-use-cpu", "* j2k-dec-use-cpu\n" - " use the CPU to decode images\n"); -#endif // HAVE_CUDA -// CPU-specific Command Line Parameters -ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n" - " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); -ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" - " number of images which can be decoded at one moment (0 means default, thread-count is maximum limit)\n"); -// General Command Line Parameters -ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-dec-queue-len=\n" - " max queue len\n"); -ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-dec-encoder-queue=\n" - " max number of frames held by encoder\n"); - - -/** - * @brief state_video_decompress_j2k Class - */ -struct state_video_decompress_j2k { - state_video_decompress_j2k(); +struct state_decompress_j2k { + state_decompress_j2k(); cmpto_j2k_dec_ctx *decoder{}; cmpto_j2k_dec_cfg *settings{}; @@ -196,10 +139,11 @@ struct state_video_decompress_j2k { codec_t out_codec{}; mutex lock; - std::queue> decompressed_frames; ///< buffer, length + queue> decompressed_frames; ///< buffer, length int pitch; pthread_t thread_id{}; unsigned int in_frames{}; ///< actual number of decompressed frames + unsigned long long int dropped{}; ///< number of dropped frames because queue was full #ifdef HAVE_CUDA @@ -225,18 +169,26 @@ struct state_video_decompress_j2k { void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, - unsigned int width, unsigned int height) { nullptr }; + unsigned int width, unsigned int height){nullptr}; private: void parse_params(); bool initialize_j2k_dec_ctx(); }; +#define CHECK_OK(cmd, err_msg, action_fail) do { \ + int j2k_error = cmd; \ + if (j2k_error != CMPTO_OK) {\ + LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ + action_fail;\ + } \ +} while(0) + /** - * @brief Default state_video_decompress_j2k Constructor + * @brief Default state_decompress_j2k Constructor * @throw UnableToCreateJ2KDecoderCTX if unable to create J2K CTX */ -state_video_decompress_j2k::state_video_decompress_j2k() { +state_decompress_j2k::state_decompress_j2k() { parse_params(); if (!initialize_j2k_dec_ctx()) { @@ -244,11 +196,134 @@ state_video_decompress_j2k::state_video_decompress_j2k() { } } + +static void rg48_to_r12l(unsigned char *dst_buffer, + unsigned char *src_buffer, + unsigned int width, unsigned int height) +{ + int src_pitch = vc_get_linesize(width, RG48); + int dst_len = vc_get_linesize(width, R12L); + decoder_t vc_copylineRG48toR12L = get_decoder_from_to(RG48, R12L); + + for(unsigned i = 0; i < height; i++){ + vc_copylineRG48toR12L(dst_buffer, src_buffer, dst_len, 0, 0, 0); + src_buffer += src_pitch; + dst_buffer += dst_len; + } +} + +static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { + if (dropped % 10 == 1) { + log_msg(LOG_LEVEL_WARNING, "%s Some frames (%llu) dropped.\n", MOD_NAME, dropped); + + if (j2k_decompress_platform::CPU == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "image limit to increase the number of images decoded at one moment by adding parameter: --param j2k-dec-img-limit=#\n", + MOD_NAME); + } +#ifdef HAVE_CUDA + if (j2k_decompress_platform::CUDA == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n", + MOD_NAME); + } +#endif // HAVE_CUDA + } +} + +/** + * This function just runs in thread and gets decompressed images from decoder + * putting them to queue (or dropping if full). + */ +static void *decompress_j2k_worker(void *args) +{ + struct state_decompress_j2k *s = + (struct state_decompress_j2k *) args; + + while (true) { +next_image: + struct cmpto_j2k_dec_img *img; + int decoded_img_status; + CHECK_OK(cmpto_j2k_dec_ctx_get_decoded_img(s->decoder, 1, &img, &decoded_img_status), + "Decode image", goto next_image); + + { + lock_guard lk(s->lock); + if (s->in_frames) s->in_frames--; + } + + if (img == NULL) { // decoder stopped (poison pill) + break; + } + + if (decoded_img_status != CMPTO_J2K_DEC_IMG_OK) { + const char * decoding_error = ""; + CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", + decoding_error = "(failed)"); + log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); + continue; + } + + void *dec_data; + size_t len; + CHECK_OK(cmpto_j2k_dec_img_get_samples(img, &dec_data, &len), + "Error getting samples", cmpto_j2k_dec_img_destroy(img); goto next_image); + + char *buffer = (char *) malloc(len); + if (s->convert) { + s->convert((unsigned char*) buffer, (unsigned char*) dec_data, s->desc.width, s->desc.height); + len = vc_get_linesize(s->desc.width, s->out_codec) * s->desc.height; + } else { + memcpy(buffer, dec_data, len); + } + + CHECK_OK(cmpto_j2k_dec_img_destroy(img), + "Unable to to return processed image", NOOP); + lock_guard lk(s->lock); + while (s->decompressed_frames.size() >= s->max_queue_size) { + print_dropped(s->dropped++, s->platform); + auto decoded = s->decompressed_frames.front(); + s->decompressed_frames.pop(); + free(decoded.first); + } + s->decompressed_frames.push({buffer,len}); + } + + return NULL; +} + +/* + * Command Line Parameters for state_decompress_j2k + */ +#ifdef HAVE_CUDA +// CUDA-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" + " use CUDA to decode images\n"); +ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" + " J2K max memory usage in bytes.\n"); +ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" + " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); +// Option to use CPU for image decompression only required if CUDA is also compiled. +// Otherwise, CPU will be the default, with no need to explicity specify. +ADD_TO_PARAM("j2k-dec-use-cpu", "* j2k-dec-use-cpu\n" + " use the CPU to decode images\n"); +#endif // HAVE_CUDA +// CPU-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n" + " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); +ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" + " number of images which can be decoded at one moment (0 means default, thread-count is maximum limit)\n"); +// General Command Line Parameters +ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-dec-queue-len=\n" + " max queue len\n"); +ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-dec-encoder-queue=\n" + " max number of frames held by encoder\n"); + /** * @fn parse_params * @brief Parse Command Line Parameters and Initialize Struct Members */ -void state_video_decompress_j2k::parse_params() { +void state_decompress_j2k::parse_params() { #ifdef HAVE_CUDA if (get_commandline_param("j2k-dec-use-cuda")) { platform = j2k_decompress_platform::CUDA; @@ -301,12 +376,9 @@ void state_video_decompress_j2k::parse_params() { } const auto *version = cmpto_j2k_dec_get_version(); - log_msg(LOG_LEVEL_INFO, "%s Using codec version: %s\n", - MOD_NAME, - (version == nullptr ? "(unknown)" : version->name)); + LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; } - /** * @fn initialize_j2k_dec_ctx * @brief Create cmpto_j2k_dec_ctx_cfg based on requested platform and command line arguments @@ -314,15 +386,15 @@ void state_video_decompress_j2k::parse_params() { * @return false if unable to create cmpto_j2k_dec_ctx_cfg */ [[nodiscard]] -bool state_video_decompress_j2k::initialize_j2k_dec_ctx() { - struct cmpto_j2k_dec_ctx_cfg *dec_ctx_cfg; - CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&dec_ctx_cfg), "Error creating dec cfg", return false); +bool state_decompress_j2k::initialize_j2k_dec_ctx() { + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg; + CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); #ifdef HAVE_CUDA if (j2k_decompress_platform::CUDA == platform) { log_msg(LOG_LEVEL_INFO, "%s Configuring for CUDA Decoding\n", MOD_NAME); for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(dec_ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), "Error setting CUDA device", return false); log_msg(LOG_LEVEL_INFO, "%s Using CUDA Device %s\n", MOD_NAME, std::to_string(cuda_devices[i]).c_str()); } @@ -341,7 +413,7 @@ bool state_video_decompress_j2k::initialize_j2k_dec_ctx() { } CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( - dec_ctx_cfg, + ctx_cfg, cpu_thread_count, cpu_mem_limit, cpu_img_limit), @@ -354,13 +426,11 @@ bool state_video_decompress_j2k::initialize_j2k_dec_ctx() { cpu_img_limit); } - CHECK_OK(cmpto_j2k_dec_ctx_create(dec_ctx_cfg, &this->decoder), + CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &this->decoder), "Error initializing context", return false); - CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(dec_ctx_cfg), - "Destroy cfg", - NOOP); + CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP); CHECK_OK(cmpto_j2k_dec_cfg_create(this->decoder, &this->settings), "Error creating configuration", { @@ -375,116 +445,37 @@ bool state_video_decompress_j2k::initialize_j2k_dec_ctx() { return true; } - -static void rg48_to_r12l(unsigned char *dst_buffer, - unsigned char *src_buffer, - unsigned int width, unsigned int height) { - int src_pitch = vc_get_linesize(width, RG48); - int dst_len = vc_get_linesize(width, R12L); - decoder_t vc_copylineRG48toR12L = get_decoder_from_to(RG48, R12L); - - for (unsigned i = 0; i < height; i++) { - vc_copylineRG48toR12L(dst_buffer, src_buffer, dst_len, 0, 0, 0); - src_buffer += src_pitch; - dst_buffer += dst_len; - } -} - -static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { - if (dropped % 10 == 1) { - log_msg(LOG_LEVEL_WARNING, "%s Some frames (%llu) dropped.\n", MOD_NAME, dropped); - - if (j2k_decompress_platform::CPU == platform) { - log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " - "image limit to increase the number of images decoded at one moment by adding parameter: --param j2k-dec-img-limit=#\n", - MOD_NAME); - } -#ifdef HAVE_CUDA - if (j2k_decompress_platform::CUDA == platform) { - log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " - "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n", - MOD_NAME); - } -#endif // HAVE_CUDA - } -} - -/** - * This function just runs in thread and gets decompressed images from decoder - * putting them to queue (or dropping if full). - */ -static void *decompress_j2k_worker(void *args) { - auto *s = static_cast(args); - - while (true) { -next_image: - struct cmpto_j2k_dec_img *img; - int decoded_img_status; - CHECK_OK(cmpto_j2k_dec_ctx_get_decoded_img(s->decoder, 1, &img, &decoded_img_status), - "Decode image", goto next_image); - - { - lock_guard lk(s->lock); - if (s->in_frames) s->in_frames--; - } - - if (img == NULL) { // decoder stopped (poison pill) - break; - } - - if (decoded_img_status != CMPTO_J2K_DEC_IMG_OK) { - const char * decoding_error = ""; - CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", - decoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); - continue; - } - - void *dec_data; - size_t len; - CHECK_OK(cmpto_j2k_dec_img_get_samples(img, &dec_data, &len), - "Error getting samples", cmpto_j2k_dec_img_destroy(img); goto next_image); - - char *buffer = (char *) malloc(len); - if (s->convert) { - s->convert((unsigned char*) buffer, (unsigned char*) dec_data, s->desc.width, s->desc.height); - len = vc_get_linesize(s->desc.width, s->out_codec) * s->desc.height; - } else { - memcpy(buffer, dec_data, len); - } - - CHECK_OK(cmpto_j2k_dec_img_destroy(img), - "Unable to to return processed image", NOOP); - lock_guard lk(s->lock); - while (s->decompressed_frames.size() >= s->max_queue_size) { - print_dropped(s->dropped++, s->platform); - auto decoded = s->decompressed_frames.front(); - s->decompressed_frames.pop(); - free(decoded.first); - } - s->decompressed_frames.push({buffer,len}); - } - - return NULL; -} - - /** - * @brief Initialize a new instance of state_video_decompress_j2k - * @return Null or Pointer to state_video_decompress_j2k + * @brief Initialize a new instance of state_decompress_j2k + * @return Null or Pointer to state_decompress_j2k */ static void * j2k_decompress_init(void) { try { - auto *s = new state_video_decompress_j2k(); + auto *s = new state_decompress_j2k(); return s; } catch (...) { return NULL; } } +static struct { + codec_t ug_codec; + enum cmpto_sample_format_type cmpto_sf; + void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); +} codecs[] = { + {UYVY, CMPTO_422_U8_P1020, nullptr}, + {v210, CMPTO_422_U10_V210, nullptr}, + {RGB, CMPTO_444_U8_P012, nullptr}, + {BGR, CMPTO_444_U8_P210, nullptr}, + {RGBA, CMPTO_444_U8_P012Z, nullptr}, + {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr}, + {R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l}, +}; + static int j2k_decompress_reconfigure(void *state, struct video_desc desc, - int rshift, int gshift, int bshift, int pitch, codec_t out_codec) { - auto *s = static_cast(state); + int rshift, int gshift, int bshift, int pitch, codec_t out_codec) +{ + struct state_decompress_j2k *s = (struct state_decompress_j2k *) state; if (out_codec == VIDEO_CODEC_NONE) { // probe format s->out_codec = VIDEO_CODEC_NONE; @@ -493,22 +484,22 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, } if (out_codec == R12L) { - log_msg(LOG_LEVEL_NOTICE, "%s Decoding to 12-bit RGB.\n", MOD_NAME); + LOG(LOG_LEVEL_NOTICE) << MOD_NAME << "Decoding to 12-bit RGB.\n"; } enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0; - auto matches = [&](const Codec& codec) { return codec.ug_codec == out_codec; }; - - if (const auto& codec = std::find_if(codecs.begin(), codecs.end(), matches) ; codec != codecs.end()) { - cmpto_sf = codec->cmpto_sf; - s->convert = codec->convert; + for(const auto &codec : codecs){ + if(codec.ug_codec == out_codec){ + cmpto_sf = codec.cmpto_sf; + s->convert = codec.convert; + break; + } } if (!cmpto_sf) { - log_msg(LOG_LEVEL_ERROR, "%s Unsupported output codec: %s", - MOD_NAME, - get_codec_name(out_codec)); + LOG(LOG_LEVEL_ERROR) << MOD_NAME << "Unsupported output codec: " << + get_codec_name(out_codec) << "\n"; abort(); } @@ -517,8 +508,8 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, "Error setting sample format type", return false); } else { // RGBA with non-standard shift if (rshift % 8 != 0 || gshift % 8 != 0 || bshift % 8 != 0) { - log_msg(LOG_LEVEL_ERROR, "%s Component shifts not aligned to a byte boundary is not supported.\n", - MOD_NAME); + LOG(LOG_LEVEL_ERROR) << MOD_NAME << "Component shifts not aligned to a " + "byte boundary is not supported.\n"; return false; } cmpto_j2k_dec_comp_format fmt[3] = {}; @@ -551,7 +542,8 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, /** * Callback called by the codec when codestream is no longer required. */ -static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) { +static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size) +{ (void) custom_data; (void) custom_data_size; (void) codestream_size; free(const_cast(codestream)); } @@ -561,7 +553,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha struct cmpto_j2k_dec_comp_info comp_info[3]; if (cmpto_j2k_dec_cstream_get_img_info(buffer, len, &info) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 0, &comp_info[0]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "%s Failed to get image or first component info.\n", MOD_NAME); + log_msg(LOG_LEVEL_ERROR, "J2K Failed to get image or first component info.\n"); return DECODER_NO_FRAME; } @@ -570,7 +562,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha if (info.comp_count == 3) { if (cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 1, &comp_info[1]) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 2, &comp_info[2]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "%s Failed to get component 1 or 2 info.\n", MOD_NAME); + log_msg(LOG_LEVEL_ERROR, "J2K Failed to get componentt 1 or 2 info.\n"); return DECODER_NO_FRAME; } if (comp_info[0].sampling_factor_x == 1 && comp_info[0].sampling_factor_y == 1 && @@ -585,7 +577,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha } int msg_level = internal_prop->subsampling == 0 ? LOG_LEVEL_WARNING /* bogus? */ : LOG_LEVEL_VERBOSE; - log_msg(msg_level, "%s stream properties: %s\n", MOD_NAME, get_pixdesc_desc(*internal_prop)); + log_msg(msg_level, "J2K stream properties: %s\n", get_pixdesc_desc(*internal_prop)); return DECODER_GOT_CODEC; } @@ -596,10 +588,12 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha * it just returns false. */ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigned char *buffer, - unsigned int src_len, int /* frame_seq */, struct video_frame_callbacks * /* callbacks */, struct pixfmt_desc *internal_prop) { - auto *s = static_cast(state); + unsigned int src_len, int /* frame_seq */, struct video_frame_callbacks * /* callbacks */, struct pixfmt_desc *internal_prop) +{ + struct state_decompress_j2k *s = + (struct state_decompress_j2k *) state; struct cmpto_j2k_dec_img *img; - std::pair decoded; + pair decoded; void *tmp; if (s->out_codec == VIDEO_CODEC_NONE) { @@ -627,7 +621,7 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne } return_previous: - std::unique_lock lk(s->lock); + unique_lock lk(s->lock); if (s->decompressed_frames.size() == 0) { return DECODER_NO_FRAME; } @@ -638,14 +632,11 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne size_t linesize = vc_get_linesize(s->desc.width, s->out_codec); size_t frame_size = linesize * s->desc.height; if ((decoded.second + 3) / 4 * 4 != frame_size) { // for "RGBA with non-standard shift" (search) it would be (frame_size - 1) - log_msg(LOG_LEVEL_WARNING, "%s Incorrect decoded size (%lu vs. %lu)\n", - MOD_NAME, - frame_size, - decoded.second); + LOG(LOG_LEVEL_WARNING) << MOD_NAME << "Incorrect decoded size (" << frame_size << " vs. " << decoded.second << ")\n"; } for (size_t i = 0; i < s->desc.height; ++i) { - memcpy(dst + i * s->pitch, decoded.first + i * linesize, std::min(linesize, decoded.second - std::min(decoded.second, i * linesize))); + memcpy(dst + i * s->pitch, decoded.first + i * linesize, min(linesize, decoded.second - min(decoded.second, i * linesize))); } free(decoded.first); @@ -653,13 +644,14 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne return DECODER_GOT_FRAME; } -static int j2k_decompress_get_property(void *state, int property, void *val, size_t *len) { +static int j2k_decompress_get_property(void *state, int property, void *val, size_t *len) +{ UNUSED(state); int ret = false; - switch (property) { + switch(property) { case DECOMPRESS_PROPERTY_ACCEPTS_CORRUPTED_FRAME: - if (*len >= sizeof(int)) { + if(*len >= sizeof(int)) { *(int *) val = false; *len = sizeof(int); ret = true; @@ -672,12 +664,13 @@ static int j2k_decompress_get_property(void *state, int property, void *val, siz return ret; } -static void j2k_decompress_done(void *state) { - auto *s = static_cast(state); +static void j2k_decompress_done(void *state) +{ + struct state_decompress_j2k *s = (struct state_decompress_j2k *) state; cmpto_j2k_dec_ctx_stop(s->decoder); pthread_join(s->thread_id, NULL); - log_msg(LOG_LEVEL_VERBOSE, "%s Decoder stopped.\n", MOD_NAME); + log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n"); cmpto_j2k_dec_cfg_destroy(s->settings); cmpto_j2k_dec_ctx_destroy(s->decoder); @@ -708,7 +701,7 @@ static int j2k_decompress_get_priority(codec_t compression, struct pixfmt_desc i break; default: return -1; - } + }; if (ugc == VIDEO_CODEC_NONE) { return 50; // probe } @@ -728,3 +721,4 @@ static const struct video_decompress_info j2k_decompress_info = { }; REGISTER_MODULE(j2k, &j2k_decompress_info, LIBRARY_CLASS_VIDEO_DECOMPRESS, VIDEO_DECOMPRESS_ABI_VERSION); + From 3387b7f5b9e26268e9c373dd93aef0ebd098a607 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Mon, 17 Jun 2024 12:16:01 -0400 Subject: [PATCH 03/25] Refactor cmpto compress to prefer cmpto_version::technology checks over have_cuda conditionals j2k_compress_platform now uses CMPTO_TECHNOLOGY_{CPU,CUDA} instead of 1, 2 bool supports_cmpto_technology(int) function created for checking if supported technology version is supported on system Added NoCmptoTechnologyFound exception for error reporting --- src/video_compress/cmpto_j2k.cpp | 158 +++++++++++++++++++------------ 1 file changed, 98 insertions(+), 60 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index d2df657603..137c7a42f2 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -100,7 +100,7 @@ #define DEFAULT_CUDA_POOL_SIZE 4 /// number of frames that encoder encodes at moment #define DEFAULT_CUDA_TILE_LIMIT 1 -#define DEFAULT_CUDA_MEM_LIMIT 1000000000ULLU +#define DEFAULT_CUDA_MEM_LIMIT 1000000000LLU // Default General Settings #define DEFAULT_QUALITY 0.7 @@ -151,10 +151,8 @@ using cpu_allocator = default_data_allocator; */ enum j2k_compress_platform { NONE = 0, - CPU = 1, -#ifdef HAVE_CUDA - CUDA = 2, -#endif // HAVE_CUDA + CPU = CMPTO_TECHNOLOGY_CPU, + CUDA = CMPTO_TECHNOLOGY_CUDA, }; /** @@ -169,9 +167,7 @@ struct j2k_compress_platform_info_t { constexpr auto compress_platforms = std::array { j2k_compress_platform_info_t{"none", j2k_compress_platform::NONE}, j2k_compress_platform_info_t{"cpu", j2k_compress_platform::CPU}, -#ifdef HAVE_CUDA j2k_compress_platform_info_t{"cuda", j2k_compress_platform::CUDA} -#endif }; /** @@ -193,6 +189,18 @@ static j2k_compress_platform get_platform_from_name(std::string name) { return j2k_compress_platform::NONE; } +/** + * @fn supports_cmpto_technology + * @brief Check if Comprimato supports requested technology type + * @param cmpto_technology_type Technology type to check against + * @return True if supported, False if unsupported + */ +static bool supports_cmpto_technology(int cmpto_technology_type) { + const auto *version = cmpto_j2k_enc_get_version(); + + return (version == nullptr) ? false : (version->technology & cmpto_technology_type); +} + /** * Exceptions for state_video_compress_j2k construction */ @@ -207,6 +215,11 @@ struct InvalidArgument : public std::exception { InvalidArgument() = default; }; +/// @brief NoCmptoTechnologyFound Exception +struct NoCmptoTechnologyFound : public std::exception { + NoCmptoTechnologyFound() = default; +}; + /// @brief UnableToCreateJ2KEncoderCTX Exception struct UnableToCreateJ2KEncoderCTX : public std::exception { UnableToCreateJ2KEncoderCTX() = default; @@ -246,14 +259,9 @@ struct state_video_compress_j2k { unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; - // Platform to use by default -#ifdef HAVE_CUDA - j2k_compress_platform platform = j2k_compress_platform::CUDA; - unsigned int max_in_frames = DEFAULT_CUDA_POOL_SIZE; ///< max number of frames between push and pop -#else - j2k_compress_platform platform = j2k_compress_platform::CPU; - unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop -#endif + // j2k_compress_platform::NONE by default at initialization + j2k_compress_platform platform = j2k_compress_platform::NONE; + unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop private: void parse_fmt(const char* opts); @@ -467,28 +475,25 @@ struct opts { constexpr opts general_opts[5] = { {"Bitrate", "quality", "Target bitrate", ":rate=", false}, {"Quality", "quant_coeff", "Quality in range [0-1], default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false}, -#ifdef HAVE_CUDA - {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than tile_limit. default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false}, -#else - {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Should be greater than img_limit. default: " TOSTRING(DEFAULT_POOL_SIZE) , ":pool_size=", false}, -#endif + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Must be greater than tile_limit when platform=cuda and img_limit when platform=cpu. "\ + "default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false}, {"Use MCT", "mct", "Use MCT", ":mct", true}, {"Lossless compression", "lossless", "Enable lossless compression. default: disabled", ":lossless", true} }; -#ifdef HAVE_CUDA constexpr opts cuda_opts[2] = { {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_CUDA_MEM_LIMIT), ":mem_limit=", false}, - {"Tile limit", "tile_limit", "Number of tiles encoded at one moment by GPU (less to reduce latency, more to increase performance, 0 means infinity). default: " TOSTRING(DEFAULT_CUDA_TILE_LIMIT), ":tile_limit=", false}, + {"Tile limit", "tile_limit", "Number of tiles encoded at one moment by GPU (less to reduce latency, more to increase performance, 0 is infinity). "\ + "default: " TOSTRING(DEFAULT_CUDA_TILE_LIMIT), ":tile_limit=", false}, }; + constexpr opts platform_opts[1] = { - {"Plaform", "platform", "Platform device for the encoder to use, default: cuda", ":platform=", false}, + {"Plaform", "platform", "Platform device for the encoder to use", ":platform=", false}, }; -#endif // HAVE_CUDA constexpr opts cpu_opts[2] = { {"Thread count", "thread_count", "Number of threads to use on the CPU. 0 is all available. default: " TOSTRING(DEFAULT_CPU_THREAD_COUNT), ":thread_count=", false}, - {"Image limit", "img_limit", "Number of images which can be encoded at one moment by CPU. Maximum allowed limit is thread_count. 0 is default limit. default: " TOSTRING(DEFAULT_IMG_LIMIT), ":img_limit=", false}, + {"Image limit", "img_limit", "Number of images that can be encoded at one moment by CPU. Max limit is thread_count. 0 is default limit. default: " TOSTRING(DEFAULT_IMG_LIMIT), ":img_limit=", false}, }; /** @@ -497,14 +502,12 @@ constexpr opts cpu_opts[2] = { */ static void usage() { col() << "J2K compress platform support:\n"; - col() << "\tCPU .... yes\n"; -#ifdef HAVE_CUDA - col() << "\tCUDA ... yes\n"; -#else - col() << "\tCUDA ... no\n"; -#endif + const auto supports_cpu = supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU); + const auto supports_cuda = supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA); - col() << "J2K compress usage:\n"; + col() << "\tCPU .... " << (supports_cpu ? "yes" : "no") + << (supports_cuda ? "\n" : "\t[default]\n"); + col() << "\tCUDA ... " << (supports_cuda ? "yes\t[default]\n" : "no\n"); auto show_syntax = [](const auto& options) { for (const auto& opt : options) { @@ -529,30 +532,37 @@ static void usage() { } }; -#ifdef HAVE_CUDA - // CPU and CUDA Platforms Supported. Show platform= options - col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cuda"); - show_syntax(cuda_opts); - show_syntax(general_opts); - col() << " [--cuda-device ]\n" << TERM_RESET; - col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cpu"); - show_syntax(cpu_opts); - show_syntax(general_opts); -#else // HAVE_CUDA - // Only CPU Platform Supported. No option to switch platform from default. - col() << TERM_BOLD << TRED("\t-c cmpto_j2k"); - show_syntax(cpu_opts); - show_syntax(general_opts); -#endif + col() << "J2K compress usage:\n"; + if (supports_cuda) { + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cuda"); + show_syntax(cuda_opts); + show_syntax(general_opts); + col() << " [--cuda-device ]\n" << TERM_RESET; + } + if (supports_cpu) { + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cpu"); + show_syntax(cpu_opts); + show_syntax(general_opts); + } + col() << "\n" << TERM_RESET; col() << "where:\n"; -#ifdef HAVE_CUDA + show_arguments(platform_opts); - show_arguments(cuda_opts); - col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; -#endif // HAVE_CUDA - show_arguments(cpu_opts); + + if (supports_cuda) { + col() << "CUDA compress arguments:\n"; + show_arguments(cuda_opts); + col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; + } + if (supports_cpu) { + col() << "CPU compress arguments:\n"; + show_arguments(cpu_opts); + } + + col() << "General arguments:\n"; show_arguments(general_opts); + } #define ASSIGN_CHECK_VAL(var, str, minval) \ @@ -580,6 +590,30 @@ static void usage() { * @throw InvalidArgument if argument provided isn't known */ void state_video_compress_j2k::parse_fmt(const char* opts) { + const auto *version = cmpto_j2k_enc_get_version(); + LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; + + /** + * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. + * If it does, configure the preferred default platform and max_in_frames using priority below + * 1 - CUDA + * 2 - CPU + * + * If platform is not found, throw NoCmptoTechnologyFound exception + */ + if (supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA)) { // prefer CUDA compress by default + platform = j2k_compress_platform::CUDA; + max_in_frames = DEFAULT_CUDA_POOL_SIZE; + } else if (supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU)) { // prefer CPU compress by default + platform = j2k_compress_platform::CPU; + max_in_frames = DEFAULT_CPU_POOL_SIZE; + } else { + log_msg(LOG_LEVEL_ERROR, + "%s Unable to find supported CMPTO_TECHNOLOGY\n", + MOD_NAME); + throw NoCmptoTechnologyFound(); + } + auto split_arguments = [](std::string args, std::string delimiter) { auto token = std::string{}; auto pos = size_t{0}; @@ -606,9 +640,6 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { return; } - const auto *version = cmpto_j2k_enc_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; - const char* item = ""; /** @@ -641,6 +672,13 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { platform_name); throw InvalidArgument(); } + if (!supports_cmpto_technology(platform)) { + log_msg(LOG_LEVEL_ERROR, + "%s Does not support requested encoding platform: \"%s\"\n", + MOD_NAME, + platform_name); + throw InvalidArgument(); + } } else if (strcasecmp("lossless", item) == 0) { // :lossless lossless = true; @@ -747,10 +785,11 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { cpu_img_limit); } -#ifdef HAVE_CUDA if (j2k_compress_platform::CUDA == platform) { log_msg(LOG_LEVEL_INFO, "%s Configuring for CUDA\n", MOD_NAME); - pool = std::make_unique(max_in_frames, cuda_allocator()); + + pool = std::make_unique(max_in_frames, allocator()); + for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( ctx_cfg, @@ -761,7 +800,6 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { return false); } } -#endif // HAVE_CUDA CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &context), "Context create", return false); @@ -805,6 +843,8 @@ static struct module * j2k_compress_init(struct module *parent, const char *opts return NULL; } catch (UnableToCreateJ2KEncoderCTX const& e) { return NULL; + } catch (NoCmptoTechnologyFound const& e) { + return NULL; } catch (...) { return NULL; } @@ -905,9 +945,7 @@ static compress_module_info get_cmpto_j2k_module_info(){ } }; -#ifdef HAVE_CUDA add_module_options(cuda_opts); -#endif // HAVE_CUDA add_module_options(cpu_opts); add_module_options(general_opts); From fb19aaa82ea1823a2ef6911e4816a7f9e7dd51dd Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Mon, 17 Jun 2024 17:47:15 -0400 Subject: [PATCH 04/25] Refactor cmpto decompress to prefer cmpto_version::technology checks over cuda conditionals Remove #ifdef HAVE_CUDA j2k_decompress_platform now uses CMPTO_TECHNOLOGY_{CPU,CUDA} instead of 1, 2 bool supports_cmpto_technology(int) function created for checking if supported technology version is supported on system --- src/video_decompress/cmpto_j2k.cpp | 105 ++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 30 deletions(-) diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index 2183906988..8bf8c4ac85 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -92,11 +92,9 @@ constexpr size_t DEFAULT_CPU_MEM_LIMIT = 0; // Sho constexpr unsigned int DEFAULT_CPU_IMG_LIMIT = 0; // 0 for default, thread_count for max constexpr unsigned int MIN_CPU_IMG_LIMIT = 0; // Min number of images encoded by the CPU at once -#ifdef HAVE_CUDA // CUDA-specific Defaults constexpr int64_t DEFAULT_CUDA_MEM_LIMIT = 1000000000; constexpr int DEFAULT_CUDA_TILE_LIMIT = 2; -#endif // HAVE_CUDA using std::lock_guard; using std::min; @@ -115,12 +113,22 @@ static void *decompress_j2k_worker(void *args); */ enum j2k_decompress_platform { NONE = 0, - CPU = 1, -#ifdef HAVE_CUDA - CUDA = 2, -#endif // HAVE_CUDA + CPU = CMPTO_TECHNOLOGY_CPU, + CUDA = CMPTO_TECHNOLOGY_CUDA }; +/** + * @fn supports_cmpto_technology + * @brief Check if Comprimato supports requested technology type + * @param cmpto_technology_type Technology type to check against + * @return True if supported, False if unsupported + */ +static bool supports_cmpto_technology(int cmpto_technology_type) { + const auto *version = cmpto_j2k_dec_get_version(); + + return (version == nullptr) ? false : (version->technology & cmpto_technology_type); +} + /* * Exceptions for state_decompress_j2k construction */ @@ -146,21 +154,16 @@ struct state_decompress_j2k { unsigned long long int dropped{}; ///< number of dropped frames because queue was full -#ifdef HAVE_CUDA // CUDA Defaults unsigned int cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; // Default Decompression Platform to Use - j2k_decompress_platform platform = j2k_decompress_platform::CUDA; -#else - // Default Decompression Platform to Use - j2k_decompress_platform platform = j2k_decompress_platform::CPU; -#endif + j2k_decompress_platform platform = j2k_decompress_platform::NONE; // CPU Defaults unsigned int cpu_img_limit = DEFAULT_CPU_IMG_LIMIT; - const size_t cpu_mem_limit = DEFAULT_CPU_MEM_LIMIT; // Should always be 0. Not yet implemented as of Cmpto v2.8.1 + const size_t cpu_mem_limit = DEFAULT_CPU_MEM_LIMIT; // Should always be 0. Not yet implemented as of Cmpto v2.8.4 signed int cpu_thread_count = DEFAULT_THREAD_COUNT; // General Defaults @@ -221,13 +224,12 @@ static void print_dropped(unsigned long long int dropped, const j2k_decompress_p "image limit to increase the number of images decoded at one moment by adding parameter: --param j2k-dec-img-limit=#\n", MOD_NAME); } -#ifdef HAVE_CUDA + if (j2k_decompress_platform::CUDA == platform) { log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " - "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n", + "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=#\n", MOD_NAME); } -#endif // HAVE_CUDA } } @@ -295,7 +297,6 @@ static void *decompress_j2k_worker(void *args) /* * Command Line Parameters for state_decompress_j2k */ -#ifdef HAVE_CUDA // CUDA-specific Command Line Parameters ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" " use CUDA to decode images\n"); @@ -303,12 +304,9 @@ ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" " J2K max memory usage in bytes.\n"); ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); -// Option to use CPU for image decompression only required if CUDA is also compiled. -// Otherwise, CPU will be the default, with no need to explicity specify. +// CPU-specific Command Line Parameters ADD_TO_PARAM("j2k-dec-use-cpu", "* j2k-dec-use-cpu\n" " use the CPU to decode images\n"); -#endif // HAVE_CUDA -// CPU-specific Command Line Parameters ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n" " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" @@ -324,9 +322,46 @@ ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-dec-encoder-queue=\n" * @brief Parse Command Line Parameters and Initialize Struct Members */ void state_decompress_j2k::parse_params() { -#ifdef HAVE_CUDA - if (get_commandline_param("j2k-dec-use-cuda")) { + /** + * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. + * If it does, configure the preferred default platform and max_in_frames using priority below + * 1 - CUDA + * 2 - CPU + * + * If platform is not found set platform = j2k_decompress_platform::NONE + */ + + const auto supports_cpu = supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU); + const auto supports_cuda = supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA); + + if (supports_cuda) { // prefer CUDA decompress by default platform = j2k_decompress_platform::CUDA; + } else if (supports_cpu) { // prefer CPU decompress by default + platform = j2k_decompress_platform::CPU; + } else { + log_msg(LOG_LEVEL_ERROR, + "%s Unable to find supported CMPTO_TECHNOLOGY\n", + MOD_NAME); + platform = j2k_decompress_platform::NONE; // default to NONE + } + + // CUDA-specific commandline_params + if (get_commandline_param("j2k-dec-use-cuda")) { + if (supports_cuda) { + platform = j2k_decompress_platform::CUDA; + } else { + log_msg(LOG_LEVEL_ERROR, + "%s j2k-dec-use-cuda argument provided. CUDA decompress not supported.\n", + MOD_NAME); + + // Check if CPU is default decompress + // If it is, create a log message to notify this will be used automatically + if (j2k_decompress_platform::CPU == platform) { + log_msg(LOG_LEVEL_INFO, + "%s Defaulting to CPU decompress\n", + MOD_NAME); + } + } } if (get_commandline_param("j2k-dec-mem-limit")) { @@ -337,10 +372,16 @@ void state_decompress_j2k::parse_params() { cuda_tile_limit = atoi(get_commandline_param("j2k-dec-tile-limit")); } + // CPU-specific commandline_params if (get_commandline_param("j2k-dec-use-cpu")) { - platform = j2k_decompress_platform::CPU; + if (supports_cpu) { + platform = j2k_decompress_platform::CPU; + } else { + log_msg(LOG_LEVEL_ERROR, + "%s j2k-dec-use-cpu argument provided. CPU decompress not supported.\n", + MOD_NAME); + } } -#endif // HAVE_CUDA if (get_commandline_param("j2k-dec-cpu-thread-count")) { cpu_thread_count = atoi(get_commandline_param("j2k-dec-cpu-thread-count")); @@ -376,7 +417,7 @@ void state_decompress_j2k::parse_params() { } const auto *version = cmpto_j2k_dec_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; + LOG(LOG_LEVEL_INFO) << MOD_NAME << " Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; } /** @@ -390,18 +431,22 @@ bool state_decompress_j2k::initialize_j2k_dec_ctx() { struct cmpto_j2k_dec_ctx_cfg *ctx_cfg; CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); -#ifdef HAVE_CUDA + if (j2k_decompress_platform::NONE == platform) { + log_msg(LOG_LEVEL_ERROR, "%s No supported CMPTO_TECHNOLOGY found. Unable to create decompress context.\n", MOD_NAME); + return false; + } + if (j2k_decompress_platform::CUDA == platform) { - log_msg(LOG_LEVEL_INFO, "%s Configuring for CUDA Decoding\n", MOD_NAME); + log_msg(LOG_LEVEL_INFO, "%s Using platform CUDA for decompress\n", MOD_NAME); for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), "Error setting CUDA device", return false); log_msg(LOG_LEVEL_INFO, "%s Using CUDA Device %s\n", MOD_NAME, std::to_string(cuda_devices[i]).c_str()); } } -#endif // HAVE_CUDA + if (j2k_decompress_platform::CPU == platform) { - log_msg(LOG_LEVEL_INFO, "%s Configuring for CPU Decoding\n", MOD_NAME); + log_msg(LOG_LEVEL_INFO, "%s Using platform CPU for decompress\n", MOD_NAME); // Confirm that cpu_thread_count != 0 (unlimited). If it does, cpu_img_limit can exceed thread_count if (cpu_thread_count != DEFAULT_THREAD_COUNT && cpu_img_limit > static_cast(cpu_thread_count)) { From 95b9a7e6d310c69fec1c75f1c0a675f9a249495e Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Tue, 18 Jun 2024 13:42:31 -0400 Subject: [PATCH 05/25] Standardize on MSG() over log_msg() for most cases of message reporting --- src/video_compress/cmpto_j2k.cpp | 61 ++++++++++-------------------- src/video_decompress/cmpto_j2k.cpp | 61 ++++++++++++------------------ 2 files changed, 45 insertions(+), 77 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 137c7a42f2..654d2eb22a 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -80,8 +80,8 @@ #define CHECK_OK(cmd, err_msg, action_fail) do { \ int j2k_error = cmd; \ if (j2k_error != CMPTO_OK) {\ - log_msg(LOG_LEVEL_ERROR, "[J2K enc.] %s: %s\n", \ - err_msg, cmpto_j2k_enc_get_last_error()); \ + MSG(ERROR, "%s: %s\n", \ + err_msg, cmpto_j2k_enc_get_last_error()); \ action_fail;\ } \ } while(0) @@ -347,7 +347,7 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc } if(!found){ - log_msg(LOG_LEVEL_ERROR, "[J2K] Failed to find suitable pixel format\n"); + MSG(ERROR, "Failed to find suitable pixel format\n"); return false; } @@ -440,7 +440,7 @@ static std::shared_ptr j2k_compress_pop(struct module *state) const char * encoding_error = ""; CHECK_OK(cmpto_j2k_enc_img_get_error(img, &encoding_error), "get error status", encoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image encoding failed: %s\n", encoding_error); + MSG(ERROR, "Image encoding failed: %s\n", encoding_error); goto start; } struct custom_data *udata = nullptr; @@ -591,7 +591,7 @@ static void usage() { */ void state_video_compress_j2k::parse_fmt(const char* opts) { const auto *version = cmpto_j2k_enc_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; + MSG(INFO, "Using codec version: %s\n", (version == nullptr ? "(unknown)" : version->name)); /** * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. @@ -608,9 +608,7 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { platform = j2k_compress_platform::CPU; max_in_frames = DEFAULT_CPU_POOL_SIZE; } else { - log_msg(LOG_LEVEL_ERROR, - "%s Unable to find supported CMPTO_TECHNOLOGY\n", - MOD_NAME); + MSG(ERROR, "Unable to find supported CMPTO_TECHNOLOGY\n"); throw NoCmptoTechnologyFound(); } @@ -666,17 +664,11 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { const char *const platform_name = strchr(item, '=') + 1; platform = get_platform_from_name(platform_name); if (j2k_compress_platform::NONE == platform) { - log_msg(LOG_LEVEL_ERROR, - "%s Unable to find requested encoding platform: \"%s\"\n", - MOD_NAME, - platform_name); + MSG(ERROR, "Unable to find requested encoding platform: \"%s\"\n", platform_name); throw InvalidArgument(); } if (!supports_cmpto_technology(platform)) { - log_msg(LOG_LEVEL_ERROR, - "%s Does not support requested encoding platform: \"%s\"\n", - MOD_NAME, - platform_name); + MSG(ERROR, "Does not support requested encoding platform: \"%s\"\n", platform_name); throw InvalidArgument(); } @@ -702,9 +694,7 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { } else if (IS_KEY_PREFIX(item, "quality")) { // :quality= quality = std::stod(strchr(item, '=') + 1); if (quality < 0.0 || quality > 1.0) { - log_msg(LOG_LEVEL_ERROR, - "%s Quality should be in interval [0-1]\n", - MOD_NAME); + MSG(ERROR, "Quality should be in interval [0-1]\n"); throw InvalidArgument(); } @@ -716,9 +706,7 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { mct = strcasecmp("mct", item) ? 1 : 0; } else { - log_msg(LOG_LEVEL_ERROR, - "%s Unable to find option: \"%s\"\n", - MOD_NAME, item); + MSG(ERROR, "Unable to find option: \"%s\"\n", item); throw InvalidArgument(); } } @@ -733,21 +721,16 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { * Set img_limit = thread_count if exeeded */ if (cpu_thread_count != CMPTO_J2K_ENC_CPU_DEFAULT && cpu_thread_count < static_cast(cpu_img_limit)) { - log_msg(LOG_LEVEL_INFO, - "%s img_limit (%i) exceeds thread_count. Lowering to img_limit to %i to match thread_count.\n", - MOD_NAME, - cpu_img_limit, - cpu_thread_count); + MSG(INFO, "img_limit (%i) exceeds thread_count. Lowering img_limit to %i to match thread_count.\n", + cpu_img_limit, + cpu_thread_count); cpu_img_limit = cpu_thread_count; } // If pool_size was manually set, ignore this check. // Otherwise, if it was not set, confirm that max_in_frames matches DEFAULT_CPU_POOL_SIZE if (!is_pool_size_manually_configured && max_in_frames != DEFAULT_CPU_POOL_SIZE) { - log_msg(LOG_LEVEL_DEBUG, - "%s max_in_frames set to CPU default: %i", - MOD_NAME, - DEFAULT_CPU_POOL_SIZE); + MSG(DEBUG, "max_in_frames set to CPU default: %i", DEFAULT_CPU_POOL_SIZE); max_in_frames = DEFAULT_CPU_POOL_SIZE; } } @@ -767,9 +750,9 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { return false); if (j2k_compress_platform::CPU == platform) { - log_msg(LOG_LEVEL_INFO, "%s Configuring for CPU\n", MOD_NAME); + MSG(INFO, "Configuring for CPU\n"); pool = std::make_unique(max_in_frames, cpu_allocator()); - // for (unsigned int i = 0; i < cpu_count ; ) + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cpu( ctx_cfg, cpu_thread_count, @@ -778,16 +761,14 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { "Setting CPU device", return false); - log_msg(LOG_LEVEL_INFO, "%s Using %s threads on CPU. Thread Count = %i, Image Limit = %i\n", - MOD_NAME, - (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), - cpu_thread_count, - cpu_img_limit); + MSG(INFO, "Using %s threads on CPU. Thread Count = %i, Image Limit = %i\n", + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_thread_count, + cpu_img_limit); } if (j2k_compress_platform::CUDA == platform) { - log_msg(LOG_LEVEL_INFO, "%s Configuring for CUDA\n", MOD_NAME); - + MSG(INFO, "Configuring for CUDA\n"); pool = std::make_unique(max_in_frames, allocator()); for (unsigned int i = 0; i < cuda_devices_count; ++i) { diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index 8bf8c4ac85..5a84d0285b 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -217,7 +217,7 @@ static void rg48_to_r12l(unsigned char *dst_buffer, static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { if (dropped % 10 == 1) { - log_msg(LOG_LEVEL_WARNING, "%s Some frames (%llu) dropped.\n", MOD_NAME, dropped); + MSG(WARNING, "Some frames (%llu) dropped.\n", dropped); if (j2k_decompress_platform::CPU == platform) { log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " @@ -262,7 +262,7 @@ static void *decompress_j2k_worker(void *args) const char * decoding_error = ""; CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", decoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); + MSG(ERROR, "Image decoding failed: %s\n", decoding_error); continue; } @@ -339,9 +339,7 @@ void state_decompress_j2k::parse_params() { } else if (supports_cpu) { // prefer CPU decompress by default platform = j2k_decompress_platform::CPU; } else { - log_msg(LOG_LEVEL_ERROR, - "%s Unable to find supported CMPTO_TECHNOLOGY\n", - MOD_NAME); + MSG(ERROR, "Unable to find supported CMPTO_TECHNOLOGY\n"); platform = j2k_decompress_platform::NONE; // default to NONE } @@ -350,16 +348,12 @@ void state_decompress_j2k::parse_params() { if (supports_cuda) { platform = j2k_decompress_platform::CUDA; } else { - log_msg(LOG_LEVEL_ERROR, - "%s j2k-dec-use-cuda argument provided. CUDA decompress not supported.\n", - MOD_NAME); + MSG(ERROR, "j2k-dec-use-cuda argument provided. CUDA decompress not supported.\n"); // Check if CPU is default decompress // If it is, create a log message to notify this will be used automatically if (j2k_decompress_platform::CPU == platform) { - log_msg(LOG_LEVEL_INFO, - "%s Defaulting to CPU decompress\n", - MOD_NAME); + MSG(INFO, "Defaulting to CPU decompress\n"); } } } @@ -377,9 +371,7 @@ void state_decompress_j2k::parse_params() { if (supports_cpu) { platform = j2k_decompress_platform::CPU; } else { - log_msg(LOG_LEVEL_ERROR, - "%s j2k-dec-use-cpu argument provided. CPU decompress not supported.\n", - MOD_NAME); + MSG(ERROR, "j2k-dec-use-cpu argument provided. CPU decompress not supported.\n"); } } @@ -389,8 +381,7 @@ void state_decompress_j2k::parse_params() { // Confirm cpu_thread_count between MIN_CPU_THREAD_COUNT + 1 (0) if (cpu_thread_count <= MIN_CPU_THREAD_COUNT) { // Implementing this requires the creation of executor threads. - log_msg(LOG_LEVEL_INFO, "%s j2k-dec-cpu-thread-count must be 0 or higher. Setting to min allowed 0\n", - MOD_NAME); + MSG(ERROR, "j2k-dec-cpu-thread-count must be 0 or higher. Setting to min allowed 0\n"); cpu_thread_count = 0; } } @@ -404,10 +395,9 @@ void state_decompress_j2k::parse_params() { // Confirm cpu_img_limit between MIN_CPU_IMG_LIMIT if (cpu_img_limit < MIN_CPU_IMG_LIMIT) { - log_msg(LOG_LEVEL_INFO, "%s j2k-dec-img-limit below min allowed of %i. Setting to min allowed %i\n", - MOD_NAME, - MIN_CPU_IMG_LIMIT, - MIN_CPU_IMG_LIMIT); + MSG(INFO, "j2k-dec-img-limit below min allowed of %i. Setting to min allowed %i\n", + MIN_CPU_IMG_LIMIT, + MIN_CPU_IMG_LIMIT); cpu_img_limit = MIN_CPU_IMG_LIMIT; } } @@ -417,7 +407,7 @@ void state_decompress_j2k::parse_params() { } const auto *version = cmpto_j2k_dec_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << " Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; + MSG(INFO, "Using code version: %s\n", (version == nullptr ? "(unknown)" : version->name)); } /** @@ -432,28 +422,26 @@ bool state_decompress_j2k::initialize_j2k_dec_ctx() { CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); if (j2k_decompress_platform::NONE == platform) { - log_msg(LOG_LEVEL_ERROR, "%s No supported CMPTO_TECHNOLOGY found. Unable to create decompress context.\n", MOD_NAME); + MSG(ERROR, "No supported CMPTO_TECHNOLOGY found. Unable to create decompress context.\n"); return false; } if (j2k_decompress_platform::CUDA == platform) { - log_msg(LOG_LEVEL_INFO, "%s Using platform CUDA for decompress\n", MOD_NAME); + MSG(INFO, "Using platform CUDA for decompress\n"); for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), "Error setting CUDA device", return false); - log_msg(LOG_LEVEL_INFO, "%s Using CUDA Device %s\n", MOD_NAME, std::to_string(cuda_devices[i]).c_str()); + MSG(INFO, "Using CUDA Device %s\n", std::to_string(cuda_devices[i]).c_str()); } } if (j2k_decompress_platform::CPU == platform) { - log_msg(LOG_LEVEL_INFO, "%s Using platform CPU for decompress\n", MOD_NAME); - + MSG(INFO, "Using platform CPU for decompress\n"); // Confirm that cpu_thread_count != 0 (unlimited). If it does, cpu_img_limit can exceed thread_count if (cpu_thread_count != DEFAULT_THREAD_COUNT && cpu_img_limit > static_cast(cpu_thread_count)) { - log_msg(LOG_LEVEL_INFO, "%s j2k-dec-img-limit set to %i. Lowering to match to match j2k-dec-cpu-thread-count (%i)\n", - MOD_NAME, - cpu_img_limit, - cpu_thread_count); + MSG(INFO, "j2k-dec-img-limit set to %i. Lowering to match to match j2k-dec-cpu-thread-count (%i)\n", + cpu_img_limit, + cpu_thread_count); cpu_img_limit = cpu_thread_count; } @@ -465,10 +453,9 @@ bool state_decompress_j2k::initialize_j2k_dec_ctx() { "Error configuring the CPU", return false); - log_msg(LOG_LEVEL_INFO, "%s Using %s threads on the CPU. Image Limit set to %i.\n", - MOD_NAME, - (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), - cpu_img_limit); + MSG(INFO, "Using %s threads on the CPU. Image Limit set to %i.\n", + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_img_limit); } CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &this->decoder), @@ -598,7 +585,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha struct cmpto_j2k_dec_comp_info comp_info[3]; if (cmpto_j2k_dec_cstream_get_img_info(buffer, len, &info) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 0, &comp_info[0]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get image or first component info.\n"); + MSG(ERROR, "J2K Failed to get image or first component info.\n"); return DECODER_NO_FRAME; } @@ -607,7 +594,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha if (info.comp_count == 3) { if (cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 1, &comp_info[1]) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 2, &comp_info[2]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get componentt 1 or 2 info.\n"); + MSG(ERROR, "J2K Failed to get component 1 or 2 info.\n"); return DECODER_NO_FRAME; } if (comp_info[0].sampling_factor_x == 1 && comp_info[0].sampling_factor_y == 1 && @@ -715,7 +702,7 @@ static void j2k_decompress_done(void *state) cmpto_j2k_dec_ctx_stop(s->decoder); pthread_join(s->thread_id, NULL); - log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n"); + MSG(VERBOSE, "Decoder stopped.\n"); cmpto_j2k_dec_cfg_destroy(s->settings); cmpto_j2k_dec_ctx_destroy(s->decoder); From fbdeef3b5ba33af972b8f6e6dd3359af23a0fc7c Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Tue, 18 Jun 2024 13:53:38 -0400 Subject: [PATCH 06/25] Use cuda_allocator naming over allocator for using statement This matches cpu_allocator naming and helps to be explicit about what allocator is being used during video_frame_pool creation during `bool state_video_compress_j2k::initialize_j2k_enc_ctx()` --- src/video_compress/cmpto_j2k.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 654d2eb22a..80561c6102 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -139,10 +139,9 @@ struct cmpto_j2k_enc_cuda_host_buffer_data_allocator return new cmpto_j2k_enc_cuda_host_buffer_data_allocator(*this); } }; -using allocator = cmpto_j2k_enc_cuda_host_buffer_data_allocator; using cuda_allocator = cmpto_j2k_enc_cuda_host_buffer_data_allocator; #else -using allocator = default_data_allocator; +using cuda_allocator = default_data_allocator; #endif using cpu_allocator = default_data_allocator; @@ -769,7 +768,7 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { if (j2k_compress_platform::CUDA == platform) { MSG(INFO, "Configuring for CUDA\n"); - pool = std::make_unique(max_in_frames, allocator()); + pool = std::make_unique(max_in_frames, cuda_allocator()); for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( From 08969b51fb0e8503741635510c469fc04b13756c Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 15:26:13 -0400 Subject: [PATCH 07/25] Free data when enc fails Implementing Commit [9577372](https://github.com/CESNET/UltraGrid/commit/957737276037ff071b2a945e768d8c54c6a67b1d) from master --- src/video_compress/cmpto_j2k.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 80561c6102..a7da7205f2 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -421,10 +421,9 @@ static std::shared_ptr j2k_compress_pop(struct module *state) struct cmpto_j2k_enc_img *img; int status; CHECK_OK(cmpto_j2k_enc_ctx_get_encoded_img( - s->context, - 1, - &img /* Set to NULL if encoder stopped */, - &status), "Encode image", HANDLE_ERROR_COMPRESS_POP); + s->context, 1, &img /* Set to NULL if encoder stopped */, + &status), + "Encode image pop", HANDLE_ERROR_COMPRESS_POP); { unique_lock lk(s->lock); s->in_frames--; @@ -895,8 +894,14 @@ static void j2k_compress_push(struct module *state, std::shared_ptr unique_lock lk(s->lock); s->frame_popped.wait(lk, [s]{return s->in_frames < s->max_in_frames;}); lk.unlock(); + bool failed = false; CHECK_OK(cmpto_j2k_enc_img_encode(img, s->enc_settings), - "Encode image", return); + "Encode image push", failed = true); + if (failed) { + udata->frame.~shared_ptr(); + cmpto_j2k_enc_img_destroy(img); + return; + } lk.lock(); s->in_frames++; lk.unlock(); From 1ea3b5a707764678a5e664a1596176eb5c161707 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 15:28:03 -0400 Subject: [PATCH 08/25] Fixed mct comparison Implement commit [9e05752](https://github.com/CESNET/UltraGrid/commit/9e0575239b2a6cc512e27bdfc3d6b89a5293d546) --- src/video_compress/cmpto_j2k.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index a7da7205f2..9e168a007a 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -701,7 +701,7 @@ void state_video_compress_j2k::parse_fmt(const char* opts) { is_pool_size_manually_configured = true; } else if (strcasecmp("mct", item) == 0) { // :mct - mct = strcasecmp("mct", item) ? 1 : 0; + mct = strcasecmp("mct", item) == 0 ? 1 : 0; } else { MSG(ERROR, "Unable to find option: \"%s\"\n", item); From 9ae3bb2d82251ebf2dc41c872553738779733e4a Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 15:37:03 -0400 Subject: [PATCH 09/25] Mirror Commit 779021b Mirror [779021b](https://github.com/CESNET/UltraGrid/commit/779021b8041e52daaa36fcd25cbf698371464cb8) from master --- src/video_compress/cmpto_j2k.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 9e168a007a..a070e885e8 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -842,7 +842,16 @@ static void release_cstream(void * custom_data, size_t custom_data_size, const v udata->frame.~shared_ptr(); } -#define HANDLE_ERROR_COMPRESS_PUSH if (img) cmpto_j2k_enc_img_destroy(img); return +#define HANDLE_ERROR_COMPRESS_PUSH \ + if (udata != nullptr) { \ + udata->frame.~shared_ptr(); \ + } \ + if (img != nullptr) { \ + cmpto_j2k_enc_img_destroy(img); \ + } \ + return + + static void j2k_compress_push(struct module *state, std::shared_ptr tx) { struct state_video_compress_j2k *s = From aea5ffa0768088587895e779bb2a69b923091cf5 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 16:17:45 -0400 Subject: [PATCH 10/25] Implement Commit c2e7811 from master Implementation of [c2e7811}(https://github.com/CESNET/UltraGrid/commit/c2e78111528f64ad22539f555cf13988fcfbbe0a) from master --- .vscode/settings.json | 8 ++++ src/video_compress/cmpto_j2k.cpp | 76 ++++++++++++++++++++++++++++---- 2 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..a42bc8f0d5 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "files.associations": { + "array": "cpp", + "string": "cpp", + "string_view": "cpp", + "vector": "cpp" + } +} \ No newline at end of file diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index a070e885e8..93e0e48e69 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -3,7 +3,7 @@ * @author Martin Pulec */ /* - * Copyright (c) 2013-2023 CESNET, z. s. p. o. + * Copyright (c) 2013-2024 CESNET * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,6 +42,10 @@ * the GPU is powerful enough due to the fact that CUDA registers the new * buffers which is very slow and because of that the frames cumulate before * the GPU encoder. + * + * * @todo + * - check multiple CUDA devices - now the data are always copied to + * the first CUDA device */ #ifdef HAVE_CONFIG_H @@ -118,13 +122,15 @@ using std::shared_ptr; using std::unique_lock; #ifdef HAVE_CUDA -struct cmpto_j2k_enc_cuda_host_buffer_data_allocator +template +struct cmpto_j2k_enc_cuda_buffer_data_allocator : public video_frame_pool_allocator { void *allocate(size_t size) override { void *ptr = nullptr; if (CUDA_WRAPPER_SUCCESS != - cuda_wrapper_malloc_host(&ptr, size)) { + alloc(&ptr, size)) { MSG(ERROR, "Cannot allocate host buffer: %s\n", cuda_wrapper_last_error_string()); return nullptr; @@ -132,14 +138,14 @@ struct cmpto_j2k_enc_cuda_host_buffer_data_allocator return ptr; } - void deallocate(void *ptr) override { cuda_wrapper_free(ptr); } + void deallocate(void *ptr) override { free(ptr); } [[nodiscard]] video_frame_pool_allocator *clone() const override { - return new cmpto_j2k_enc_cuda_host_buffer_data_allocator(*this); + return new cmpto_j2k_enc_cuda_buffer_data_allocator(*this); } }; -using cuda_allocator = cmpto_j2k_enc_cuda_host_buffer_data_allocator; +using cuda_allocator = cmpto_j2k_enc_cuda_buffer_data_allocator; #else using cuda_allocator = default_data_allocator; #endif @@ -255,6 +261,7 @@ struct state_video_compress_j2k { unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; // CUDA Parameters + bool pool_in_device_memory = false; unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; @@ -370,6 +377,24 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc "Setting MCT", NOOP); + s->pool_in_device_memory = false; +#ifdef HAVE_CUDA + if (s->convertFunc == nullptr) { + s->pool_in_device_memory = true; + s->pool = std::make_unique( + s->max_in_frames, + cmpto_j2k_enc_cuda_buffer_data_allocator< + cuda_wrapper_malloc, cuda_wrapper_free>()); + } else { + s->pool = std::make_unique( + s->max_in_frames, + cmpto_j2k_enc_cuda_buffer_data_allocator< + cuda_wrapper_malloc_host, cuda_wrapper_free_host>()); + } +#else + s->pool = std::make_unique(s->max_in_frames, default_data_allocator()); +#endif + s->compressed_desc = desc; s->compressed_desc.color_spec = codec_is_a_rgb(desc.color_spec) ? J2KR : J2K; s->compressed_desc.tile_count = 1; @@ -384,8 +409,17 @@ static shared_ptr get_copy(struct state_video_compress_j2k *s, vide if (s->convertFunc) { s->convertFunc(ret.get(), frame); - } else { - memcpy(ret->tiles[0].data, frame->tiles[0].data, frame->tiles[0].data_len); + } else if (s->pool_in_device_memory) { +#ifdef HAVE_CUDA + cuda_wrapper_memcpy(ret->tiles[0].data, frame->tiles[0].data, + frame->tiles[0].data_len, + CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE); +#else + abort(); // must not reach here +#endif + else { + memcpy(ret->tiles[0].data, frame->tiles[0].data, + frame->tiles[0].data_len); } return ret; @@ -769,6 +803,11 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { MSG(INFO, "Configuring for CUDA\n"); pool = std::make_unique(max_in_frames, cuda_allocator()); + if (cuda_devices_count > 1) { + MSG(WARNING, "More than one CUDA device is not tested and may " + "not work. Please report...\n"); + } + for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( ctx_cfg, @@ -842,6 +881,13 @@ static void release_cstream(void * custom_data, size_t custom_data_size, const v udata->frame.~shared_ptr(); } +static void release_cstream_cuda(void *img_custom_data, size_t img_custom_data_size, + int /* device_id */, const void *samples, size_t samples_size) +{ + release_cstream(img_custom_data, img_custom_data_size, samples, + samples_size); +} + #define HANDLE_ERROR_COMPRESS_PUSH \ if (udata != nullptr) { \ udata->frame.~shared_ptr(); \ @@ -851,7 +897,6 @@ static void release_cstream(void * custom_data, size_t custom_data_size, const v } \ return - static void j2k_compress_push(struct module *state, std::shared_ptr tx) { struct state_video_compress_j2k *s = @@ -895,6 +940,19 @@ static void j2k_compress_push(struct module *state, std::shared_ptr new (&udata->frame) shared_ptr(get_copy(s, tx.get())); vf_store_metadata(tx.get(), udata->metadata); + if (s->pool_in_device_memory) { + CHECK_OK(cmpto_j2k_enc_img_set_samples_cuda( + img, cuda_devices[0], udata->frame->tiles[0].data, + udata->frame->tiles[0].data_len, release_cstream_cuda), + "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); + } else { + CHECK_OK(cmpto_j2k_enc_img_set_samples( + img, udata->frame->tiles[0].data, + udata->frame->tiles[0].data_len, release_cstream), + "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); + } + + CHECK_OK(cmpto_j2k_enc_img_set_samples(img, udata->frame->tiles[0].data, udata->frame->tiles[0].data_len, release_cstream), From 10b4c693f928f79ad1219dae3b49d4d8ad541520 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 16:21:05 -0400 Subject: [PATCH 11/25] Implementing ca71f59 from master Implementing [ca71f59](https://github.com/CESNET/UltraGrid/commit/ca71f59b9cd502f9d1651b418b53d1b2b537b2ab) from master --- src/video_compress/cmpto_j2k.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 93e0e48e69..b187919ad1 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -379,13 +379,17 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc s->pool_in_device_memory = false; #ifdef HAVE_CUDA - if (s->convertFunc == nullptr) { + if (s->convertFunc == nullptr && cuda_devices_count == 1) { s->pool_in_device_memory = true; s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator< cuda_wrapper_malloc, cuda_wrapper_free>()); } else { + if (cuda_devices_count > 1) { + MSG(WARNING, "More than 1 CUDA device will use CPU " + "buffers. Please report...\n"); + } s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator< @@ -803,11 +807,6 @@ bool state_video_compress_j2k::initialize_j2k_enc_ctx() { MSG(INFO, "Configuring for CUDA\n"); pool = std::make_unique(max_in_frames, cuda_allocator()); - if (cuda_devices_count > 1) { - MSG(WARNING, "More than one CUDA device is not tested and may " - "not work. Please report...\n"); - } - for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( ctx_cfg, From c133739bff52e871e1b1a9b0cce00409b0da9511 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 16:23:10 -0400 Subject: [PATCH 12/25] Implementing 4061f8d from master Implementing [4061f8d](https://github.com/CESNET/UltraGrid/commit/4061f8d0f298eab8c0b55798433f0ee51ff755cb) from master --- src/video_compress/cmpto_j2k.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index b187919ad1..c2fc2d9c51 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -128,6 +128,9 @@ struct cmpto_j2k_enc_cuda_buffer_data_allocator : public video_frame_pool_allocator { void *allocate(size_t size) override { + if (alloc == cuda_wrapper_malloc) { + cuda_wrapper_set_device((int) cuda_devices[0]); + } void *ptr = nullptr; if (CUDA_WRAPPER_SUCCESS != alloc(&ptr, size)) { @@ -415,6 +418,7 @@ static shared_ptr get_copy(struct state_video_compress_j2k *s, vide s->convertFunc(ret.get(), frame); } else if (s->pool_in_device_memory) { #ifdef HAVE_CUDA + cuda_wrapper_set_device((int) cuda_devices[0]); cuda_wrapper_memcpy(ret->tiles[0].data, frame->tiles[0].data, frame->tiles[0].data_len, CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE); From 0b5714727bff49deef5466fa53ddeaee8f07b80b Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 16:24:47 -0400 Subject: [PATCH 13/25] Implementing c2cebd3 from master Implementing [c2cebd3](https://github.com/CESNET/UltraGrid/commit/c2cebd319881048b44fcaf4dc643c4502448990e) from master --- src/video_compress/cmpto_j2k.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index c2fc2d9c51..b8bc64cd8d 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -75,6 +75,7 @@ #include "tv.h" #include "utils/color_out.h" #include "utils/misc.h" +#include "utils/parallel_conv.h" #include "utils/video_frame_pool.h" #include "video.h" #include "video_compress.h" @@ -313,15 +314,11 @@ static void R12L_to_RG48(video_frame *dst, video_frame *src){ int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); - unsigned char *s = (unsigned char *) src->tiles[0].data; - unsigned char *d = (unsigned char *) dst->tiles[0].data; decoder_t vc_copylineR12LtoRG48 = get_decoder_from_to(R12L, RG48); - - for(unsigned i = 0; i < src->tiles[0].height; i++){ - vc_copylineR12LtoRG48(d, s, dst_pitch, 0, 0, 0); - s += src_pitch; - d += dst_pitch; - } + + parallel_pix_conv((int) src->tiles[0].height, dst->tiles[0].data, + dst_pitch, src->tiles[0].data, src_pitch, + vc_copylineR12LtoRG48, 0); } static struct { From bf7e84dde70acde3170968583f681ec6bf85fbc9 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 17:25:38 -0400 Subject: [PATCH 14/25] Implement af5d584 and 3adb9a4 from master Implement [af5d584](https://github.com/CESNET/UltraGrid/commit/af5d5841d00bf981365377c838cc37a7a1f76c89) and [3adb9a4](https://github.com/CESNET/UltraGrid/commit/3adb9a46cc7fcb2b4d02ffc6a0d67aaccc4a694c) --- src/video_compress/cmpto_j2k.cpp | 40 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index b8bc64cd8d..298cd2219f 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -248,12 +248,10 @@ struct state_video_compress_j2k { unsigned int in_frames{}; ///< number of currently encoding frames mutex lock; condition_variable frame_popped; - video_desc saved_desc{}; ///< for pool reconfiguration - video_desc precompress_desc{}; + video_desc saved_desc{}; ///< pool properties + codec_t precompress_codec = VC_NONE; video_desc compressed_desc{}; - void (*convertFunc)(video_frame *dst, video_frame *src) { nullptr }; - // Generic Parameters double quality = DEFAULT_QUALITY; // default image quality long long int rate = 0; // bitrate in bits per second @@ -310,15 +308,18 @@ state_video_compress_j2k::state_video_compress_j2k(struct module *parent, const module_register(&module_data, parent); } -static void R12L_to_RG48(video_frame *dst, video_frame *src){ +static void parallel_conv(video_frame *dst, video_frame *src){ int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); - decoder_t vc_copylineR12LtoRG48 = get_decoder_from_to(R12L, RG48); + decoder_t decoder = + get_decoder_from_to(src->color_spec, dst->color_spec); + assert(decoder != nullptr); + parallel_pix_conv((int) src->tiles[0].height, dst->tiles[0].data, dst_pitch, src->tiles[0].data, src_pitch, - vc_copylineR12LtoRG48, 0); + decoder, 0); } static struct { @@ -332,7 +333,7 @@ static struct { {RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, {RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, - {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, R12L_to_RG48}, + {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, nullptr}, }; static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc){ @@ -342,11 +343,7 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc for(const auto &codec : codecs){ if(codec.ug_codec == desc.color_spec){ sample_format = codec.cmpto_sf; - s->convertFunc = codec.convertFunc; - s->precompress_desc = desc; - if(codec.convert_codec != VIDEO_CODEC_NONE){ - s->precompress_desc.color_spec = codec.convert_codec; - } + s->precompress_codec = codec.convert_codec; found = true; break; } @@ -379,7 +376,7 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc s->pool_in_device_memory = false; #ifdef HAVE_CUDA - if (s->convertFunc == nullptr && cuda_devices_count == 1) { + if (s->precompress_codec == VC_NONE && cuda_devices_count == 1) { s->pool_in_device_memory = true; s->pool = std::make_unique( s->max_in_frames, @@ -411,8 +408,8 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ std::shared_ptr ret = s->pool->get_frame(); - if (s->convertFunc) { - s->convertFunc(ret.get(), frame); + if (s->precompress_codec != VC_NONE) { + parallel_conv(ret.get(), frame); } else if (s->pool_in_device_memory) { #ifdef HAVE_CUDA cuda_wrapper_set_device((int) cuda_devices[0]); @@ -915,8 +912,15 @@ static void j2k_compress_push(struct module *state, std::shared_ptr if (!ret) { return; } - s->pool->reconfigure(s->precompress_desc, vc_get_linesize(s->precompress_desc.width, s->precompress_desc.color_spec) - * s->precompress_desc.height); + struct video_desc pool_desc = desc; + + if (s->precompress_codec != VC_NONE) { + pool_desc.color_spec = s->precompress_codec; + } + s->pool.reconfigure( + pool_desc, (size_t) vc_get_linesize(pool_desc.width, + pool_desc.color_spec) * + pool_desc.height); } assert(tx->tile_count == 1); // TODO From 2bd4c094cfeb45da5c91c16f4e26cc59f262d4b6 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 17:45:10 -0400 Subject: [PATCH 15/25] Implement 930abe5 from master Implement [930abe5](https://github.com/CESNET/UltraGrid/commit/930abe53258b0f1f3d35e439566f1feac844105c) from master --- .vscode/settings.json | 3 +- src/video_compress/cmpto_j2k.cpp | 87 ++++++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 22 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index a42bc8f0d5..7f162230c5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,7 @@ "array": "cpp", "string": "cpp", "string_view": "cpp", - "vector": "cpp" + "vector": "cpp", + "__config": "cpp" } } \ No newline at end of file diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 298cd2219f..1bd42d4b8f 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -67,6 +67,7 @@ #ifdef HAVE_CUDA #include "cuda_wrapper.h" +#include "cuda_wrapper/kernels.hpp" #endif // HAVE_CUDA #include "debug.h" #include "host.h" @@ -150,11 +151,15 @@ struct cmpto_j2k_enc_cuda_buffer_data_allocator } }; using cuda_allocator = cmpto_j2k_enc_cuda_buffer_data_allocator; +const cuda_convert_func_t r12l_to_rg48_cuda = preprocess_r12l_to_rg48; #else using cuda_allocator = default_data_allocator; +const cuda_convert_func_t r12l_to_rg48_cuda = nullptr; #endif using cpu_allocator = default_data_allocator; +typedef void (*cuda_convert_func_t)(int width, int height, void *src, void *dst); + /** * @brief Platforms available for J2K Compression */ @@ -263,9 +268,11 @@ struct state_video_compress_j2k { unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; // CUDA Parameters - bool pool_in_device_memory = false; - unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; - unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + bool pool_in_device_memory = false; + cuda_convert_func_t cuda_convert_func = nullptr; + uint8_t *cuda_conv_tmp_buf = nullptr; + unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; // j2k_compress_platform::NONE by default at initialization j2k_compress_platform platform = j2k_compress_platform::NONE; @@ -326,14 +333,15 @@ static struct { codec_t ug_codec; enum cmpto_sample_format_type cmpto_sf; codec_t convert_codec; - void (*convertFunc)(video_frame *dst, video_frame *src); + /// must be not-NULL if convert_codec != VC_NONE and HAVE_CUDA + cuda_convert_func_t cuda_convert_func; } codecs[] = { {UYVY, CMPTO_422_U8_P1020, VIDEO_CODEC_NONE, nullptr}, {v210, CMPTO_422_U10_V210, VIDEO_CODEC_NONE, nullptr}, {RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, {RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, - {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, nullptr}, + {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, r12l_to_rg48_cuda}, }; static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc){ @@ -344,11 +352,22 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc if(codec.ug_codec == desc.color_spec){ sample_format = codec.cmpto_sf; s->precompress_codec = codec.convert_codec; + s->cuda_convert_func = codec.cuda_convert_func; found = true; break; } } +#ifdef HAVE_CUDA + cuda_wrapper_set_device((int) cuda_devices[0]); + if (s->cuda_convert_func != nullptr) { + cuda_wrapper_free(s->cuda_conv_tmp_buf); + cuda_wrapper_malloc( + (void **) &s->cuda_conv_tmp_buf, + vc_get_datalen(desc.width, desc.height, desc.color_spec)); + } +#endif + if(!found){ MSG(ERROR, "Failed to find suitable pixel format\n"); return false; @@ -376,17 +395,15 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc s->pool_in_device_memory = false; #ifdef HAVE_CUDA - if (s->precompress_codec == VC_NONE && cuda_devices_count == 1) { + if (cuda_devices_count == 1) { s->pool_in_device_memory = true; s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator< cuda_wrapper_malloc, cuda_wrapper_free>()); } else { - if (cuda_devices_count > 1) { - MSG(WARNING, "More than 1 CUDA device will use CPU " - "buffers. Please report...\n"); - } + MSG(WARNING, "More than 1 CUDA device will use CPU " + "buffers. Please report...\n"); s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator< @@ -405,20 +422,44 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc return true; } -static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ - std::shared_ptr ret = s->pool->get_frame(); - - if (s->precompress_codec != VC_NONE) { - parallel_conv(ret.get(), frame); - } else if (s->pool_in_device_memory) { +/** + * @brief copies frame from RAM to GPU + * + * Does the pixel format conversion as well if specified. + */ +static void +do_gpu_copy(struct state_video_compress_j2k *s, + std::shared_ptr &ret, video_frame *in_frame) +{ #ifdef HAVE_CUDA - cuda_wrapper_set_device((int) cuda_devices[0]); - cuda_wrapper_memcpy(ret->tiles[0].data, frame->tiles[0].data, - frame->tiles[0].data_len, + cuda_wrapper_set_device((int) cuda_devices[0]); + if (s->cuda_convert_func == nullptr) { + assert(s->precompress_codec == VC_NONE); + cuda_wrapper_memcpy(ret->tiles[0].data, in_frame->tiles[0].data, + in_frame->tiles[0].data_len, CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE); + return; + } + cuda_wrapper_memcpy(s->cuda_conv_tmp_buf, in_frame->tiles[0].data, + in_frame->tiles[0].data_len, + CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE); + s->cuda_convert_func((int) in_frame->tiles[0].width, + (int) in_frame->tiles[0].height, + s->cuda_conv_tmp_buf, ret->tiles[0].data); #else - abort(); // must not reach here + (void) s, (void) ret, (void) in_frame; + abort(); // must not reach here #endif +} + + +static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ + std::shared_ptr ret = s->pool->get_frame(); + + if (s->pool_in_device_memory) { + do_gpu_copy(s, ret, frame); + } else if (s->precompress_codec != VC_NONE) { + parallel_conv(ret.get(), frame); else { memcpy(ret->tiles[0].data, frame->tiles[0].data, frame->tiles[0].data_len); @@ -913,7 +954,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr return; } struct video_desc pool_desc = desc; - + if (s->precompress_codec != VC_NONE) { pool_desc.color_spec = s->precompress_codec; } @@ -987,6 +1028,10 @@ static void j2k_compress_done(struct module *mod) cmpto_j2k_enc_cfg_destroy(s->enc_settings); cmpto_j2k_enc_ctx_destroy(s->context); +#ifdef HAVE_CUDA + cuda_wrapper_free(s->cuda_conv_tmp_buf); +#endif + delete s; } From cc0d31c30c09e4e74e9ce2d3c482b5d4bdbd350a Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 17:48:07 -0400 Subject: [PATCH 16/25] Implement 39c9c40 from master Implement [39c9c40](https://github.com/CESNET/UltraGrid/commit/39c9c40d67f69f5829df8d5fcb76d0eb2b25e0bd) from master --- src/video_compress/cmpto_j2k.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 1bd42d4b8f..4f18d1a957 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -341,6 +340,7 @@ static struct { {RGB, CMPTO_444_U8_P012, VIDEO_CODEC_NONE, nullptr}, {RGBA, CMPTO_444_U8_P012Z, VIDEO_CODEC_NONE, nullptr}, {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, VIDEO_CODEC_NONE, nullptr}, + {RG48, CMPTO_444_U12_MSB16LE_P012, VC_NONE, nullptr}, {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, r12l_to_rg48_cuda}, }; From 1c4c1df6dbfc72fbf84044b6152941d0cbc1b9aa Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 17:53:10 -0400 Subject: [PATCH 17/25] Implement cc6b820 from master Implement [cc6b820](https://github.com/CESNET/UltraGrid/commit/cc6b820db2584c60f82618db78397db432964f29) from master --- src/video_compress/cmpto_j2k.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 4f18d1a957..f9ab80a4bf 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -364,7 +364,8 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc cuda_wrapper_free(s->cuda_conv_tmp_buf); cuda_wrapper_malloc( (void **) &s->cuda_conv_tmp_buf, - vc_get_datalen(desc.width, desc.height, desc.color_spec)); + vc_get_datalen(desc.width, desc.height, desc.color_spec) + + MAX_PADDING); } #endif From c9e111fa9aac39ebbdc3d3a12cab2440f683d05f Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 17:58:43 -0400 Subject: [PATCH 18/25] Implement 1cffc72 from master Implement [1cffc72](https://github.com/CESNET/UltraGrid/commit/1cffc72fa8954a367da3482e0db06c7f50db7d2b) from master --- src/video_compress/cmpto_j2k.cpp | 44 ++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index f9ab80a4bf..6220aa19c4 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -344,6 +344,30 @@ static struct { {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, r12l_to_rg48_cuda}, }; +static void +set_pool(struct state_video_compress_j2k *s) +{ + s->pool_in_device_memory = false; +#ifdef HAVE_CUDA + if (cuda_devices_count == 1) { + s->pool_in_device_memory = true; + s->pool = std::make_unique( + s->max_in_frames, + cmpto_j2k_enc_cuda_buffer_data_allocator< + cuda_wrapper_malloc, cuda_wrapper_free>()); + return; + } + MSG(WARNING, "More than 1 CUDA device will use CPU buffers and " + "conversion...\n"); + s->pool = std::make_unique( + s->max_in_frames, + cmpto_j2k_enc_cuda_buffer_data_allocator()); +#else + s->pool = std::make_unique(s->max_in_frames, default_data_allocator()); +#endif +} + static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc){ enum cmpto_sample_format_type sample_format; bool found = false; @@ -394,25 +418,7 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc "Setting MCT", NOOP); - s->pool_in_device_memory = false; -#ifdef HAVE_CUDA - if (cuda_devices_count == 1) { - s->pool_in_device_memory = true; - s->pool = std::make_unique( - s->max_in_frames, - cmpto_j2k_enc_cuda_buffer_data_allocator< - cuda_wrapper_malloc, cuda_wrapper_free>()); - } else { - MSG(WARNING, "More than 1 CUDA device will use CPU " - "buffers. Please report...\n"); - s->pool = std::make_unique( - s->max_in_frames, - cmpto_j2k_enc_cuda_buffer_data_allocator< - cuda_wrapper_malloc_host, cuda_wrapper_free_host>()); - } -#else - s->pool = std::make_unique(s->max_in_frames, default_data_allocator()); -#endif + set_pool(s); s->compressed_desc = desc; s->compressed_desc.color_spec = codec_is_a_rgb(desc.color_spec) ? J2KR : J2K; From a612bbedf7b5e47bcd74fdc8991785f29c3c17f3 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 18:01:00 -0400 Subject: [PATCH 19/25] Implement 7b91ebb from master Implement [7b91ebb](https://github.com/CESNET/UltraGrid/commit/7b91ebb5d450d48d8d3d2915e09ee579c02e97cf) from master --- src/video_compress/cmpto_j2k.cpp | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 6220aa19c4..ccc9a4406b 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -345,11 +345,21 @@ static struct { }; static void -set_pool(struct state_video_compress_j2k *s) +set_pool(struct state_video_compress_j2k *s, struct video_desc desc) { s->pool_in_device_memory = false; #ifdef HAVE_CUDA if (cuda_devices_count == 1) { + cuda_wrapper_set_device((int) cuda_devices[0]); + + if (s->cuda_convert_func != nullptr) { + cuda_wrapper_free(s->cuda_conv_tmp_buf); + cuda_wrapper_malloc( + (void **) &s->cuda_conv_tmp_buf, + vc_get_datalen(desc.width, desc.height, desc.color_spec) + + MAX_PADDING); + } + s->pool_in_device_memory = true; s->pool = std::make_unique( s->max_in_frames, @@ -382,17 +392,6 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc } } -#ifdef HAVE_CUDA - cuda_wrapper_set_device((int) cuda_devices[0]); - if (s->cuda_convert_func != nullptr) { - cuda_wrapper_free(s->cuda_conv_tmp_buf); - cuda_wrapper_malloc( - (void **) &s->cuda_conv_tmp_buf, - vc_get_datalen(desc.width, desc.height, desc.color_spec) + - MAX_PADDING); - } -#endif - if(!found){ MSG(ERROR, "Failed to find suitable pixel format\n"); return false; @@ -418,7 +417,7 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc "Setting MCT", NOOP); - set_pool(s); + set_pool(s, desc); s->compressed_desc = desc; s->compressed_desc.color_spec = codec_is_a_rgb(desc.color_spec) ? J2KR : J2K; From 0fdabb432f46454940b3692d23760a796f43da8a Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 18:04:40 -0400 Subject: [PATCH 20/25] Implement 94afd6c from master Implement [94afd6c](https://github.com/CESNET/UltraGrid/commit/94afd6c5b439a1fafe0ae04ad16d0135ffa29c67) from master --- src/video_compress/cmpto_j2k.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index ccc9a4406b..0acbd97008 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -344,12 +344,24 @@ static struct { {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, r12l_to_rg48_cuda}, }; + +#define CPU_CONV_PARAM "j2k-enc-cpu-conv" +ADD_TO_PARAM( + CPU_CONV_PARAM, + "* " CPU_CONV_PARAM "\n" + " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); + static void set_pool(struct state_video_compress_j2k *s, struct video_desc desc) { + const bool force_cpu_conv = + get_commandline_param(CPU_CONV_PARAM) != nullptr; s->pool_in_device_memory = false; #ifdef HAVE_CUDA - if (cuda_devices_count == 1) { + if (cuda_devices_count > 1) { + MSG(WARNING, "More than 1 CUDA device will use CPU buffers and " + "conversion...\n"); + } else if (!force_cpu_conv || s->cuda_convert_func == nullptr) { cuda_wrapper_set_device((int) cuda_devices[0]); if (s->cuda_convert_func != nullptr) { @@ -367,8 +379,8 @@ set_pool(struct state_video_compress_j2k *s, struct video_desc desc) cuda_wrapper_malloc, cuda_wrapper_free>()); return; } - MSG(WARNING, "More than 1 CUDA device will use CPU buffers and " - "conversion...\n"); + s->cuda_convert_func = nullptr; // either was 0 or force_cpu_conv + s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator Date: Fri, 6 Sep 2024 18:25:44 -0400 Subject: [PATCH 21/25] Implement 9304717 from master Implement [9304717](https://github.com/CESNET/UltraGrid/commit/9304717149e67cff1bcc7ba5c38f66bbdd9fdeac) from master --- src/video_decompress/cmpto_j2k.cpp | 32 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index 5a84d0285b..2a05d66ea1 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -3,7 +3,7 @@ * @author Martin Pulec */ /* - * Copyright (c) 2013-2023 CESNET, z. s. p. o. + * Copyright (c) 2013-2024 CESNET * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,14 +56,17 @@ * another stream, which, however, creates a new decoder). */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#include "config_unix.h" -#include "config_win32.h" -#endif // HAVE_CONFIG_H - -#include - +#include // for min +#include // for assert +#include // for cmpto_sample_format_type, cmpto_j2k_de... +#include // for int64_t +#include // for free, atoi, malloc, abort +#include // for size_t, NULL, memcpy +#include // for mutex, lock_guard, unique_lock +#include // for operator<<, basic_ostream, char_traits +#include // for pthread_create, pthread_join, pthread_t +#include // for queue +#include // for pair #include #include #include @@ -72,13 +75,22 @@ #include "debug.h" #include "host.h" #include "lib_common.h" +#include "pixfmt_conv.h" // for get_decoder_from_to, decoder_t +#include "types.h" // for video_desc, pixfmt_desc, R12L, RGBA #include "utils/macros.h" #include "utils/misc.h" -#include "video.h" +#include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... #include "video_decompress.h" constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; +using std::lock_guard; +using std::min; +using std::mutex; +using std::pair; +using std::queue; +using std::unique_lock; + #define NOOP ((void) 0) // General Parameter Defaults From f893b43cf00d0794c77a792cedd0090f3484c3f9 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 18:47:31 -0400 Subject: [PATCH 22/25] Implement fa93411, 4f3add7, 876870f, ad7929b, 95dea89 Implement the following from master [fa93411](https://github.com/CESNET/UltraGrid/commit/fa93411f157698b05055c7ff8a2c17fe7f55c7c3) [4f3add7](https://github.com/CESNET/UltraGrid/commit/4f3add780dd7fffbb010958ac65752b93b34deae) [876870f](https://github.com/CESNET/UltraGrid/commit/876870f8f37f591d80cd5ef7c78fb0d118994ee7) [ad7929b](https://github.com/CESNET/UltraGrid/commit/ad7929b49fd5ed2f0fcfe1246d1ba711508f36db) [95dea89](https://github.com/CESNET/UltraGrid/commit/95dea895d6f824f05ec11f4bc5e1fb7611e80e38) --- src/video_decompress/cmpto_j2k.cpp | 141 ++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 33 deletions(-) diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index 2a05d66ea1..6c6d902bfc 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -72,6 +72,10 @@ #include #include +ifdef HAVE_CONFIG_H +#include "config.h" // for HAVE_CUDA +#endif +#include "cuda_wrapper/kernels.hpp" #include "debug.h" #include "host.h" #include "lib_common.h" @@ -79,6 +83,7 @@ #include "types.h" // for video_desc, pixfmt_desc, R12L, RGBA #include "utils/macros.h" #include "utils/misc.h" +#include "utils/parallel_conv.h" #include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... #include "video_decompress.h" @@ -91,6 +96,9 @@ using std::pair; using std::queue; using std::unique_lock; +static void +j2k_decompress_cleanup_common(struct state_decompress_j2k *s); + #define NOOP ((void) 0) // General Parameter Defaults @@ -220,11 +228,9 @@ static void rg48_to_r12l(unsigned char *dst_buffer, int dst_len = vc_get_linesize(width, R12L); decoder_t vc_copylineRG48toR12L = get_decoder_from_to(RG48, R12L); - for(unsigned i = 0; i < height; i++){ - vc_copylineRG48toR12L(dst_buffer, src_buffer, dst_len, 0, 0, 0); - src_buffer += src_pitch; - dst_buffer += dst_len; - } + parallel_pix_conv((int) height, (char *) dst_buffer, dst_len, + (const char *) src_buffer, src_pitch, + vc_copylineRG48toR12L, 0); } static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { @@ -375,7 +381,7 @@ void state_decompress_j2k::parse_params() { } if (get_commandline_param("j2k-dec-tile-limit")) { - cuda_tile_limit = atoi(get_commandline_param("j2k-dec-tile-limit")); + cuda_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit")); } // CPU-specific commandline_params @@ -502,18 +508,43 @@ static void * j2k_decompress_init(void) { } } +static void +r12l_postprocessor_get_sz( + void */*postprocessor*/, void */*img_custom_data*/, size_t /*img_custom_data_size*/, + int size_x, int size_y, struct cmpto_j2k_dec_comp_format */*comp_formats*/, + int comp_count, size_t *temp_buffer_size, size_t *output_buffer_size) +{ + assert(comp_count == 3); + *temp_buffer_size = 0; // no temp buffer required + *output_buffer_size = vc_get_datalen(size_x, size_y, R12L); +} + +#ifdef HAVE_CUDA +const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda = + postprocess_rg48_to_r12l; +#else +const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda = + nullptr; +#endif + static struct { codec_t ug_codec; enum cmpto_sample_format_type cmpto_sf; + // CPU postprocess void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height); + + // GPU postprocess + cmpto_j2k_dec_postprocessor_size_callback_cuda size_callback; + cmpto_j2k_dec_postprocessor_run_callback_cuda run_callback; } codecs[] = { - {UYVY, CMPTO_422_U8_P1020, nullptr}, - {v210, CMPTO_422_U10_V210, nullptr}, - {RGB, CMPTO_444_U8_P012, nullptr}, - {BGR, CMPTO_444_U8_P210, nullptr}, - {RGBA, CMPTO_444_U8_P012Z, nullptr}, - {R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr}, - {R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l}, + { UYVY, CMPTO_422_U8_P1020, nullptr, nullptr, nullptr }, + { v210, CMPTO_422_U10_V210, nullptr, nullptr, nullptr }, + { RGB, CMPTO_444_U8_P012, nullptr, nullptr, nullptr }, + { BGR, CMPTO_444_U8_P210, nullptr, nullptr, nullptr }, + { RGBA, CMPTO_444_U8_P012Z, nullptr, nullptr, nullptr }, + { R10k, CMPTO_444_U10U10U10_MSB32BE_P210, nullptr, nullptr, nullptr }, + { R12L, CMPTO_444_U12_MSB16LE_P012, rg48_to_r12l, + r12l_postprocessor_get_sz, r12l_postprocess_cuda }, }; static int j2k_decompress_reconfigure(void *state, struct video_desc desc, @@ -527,17 +558,40 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, return true; } + j2k_decompress_cleanup_common(s); + if (out_codec == R12L) { LOG(LOG_LEVEL_NOTICE) << MOD_NAME << "Decoding to 12-bit RGB.\n"; } enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0; + + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr; + CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( + ctx_cfg, cuda_devices[i], s->req_mem_limit, + s->req_tile_limit), + "Error setting CUDA device", return false); + } for(const auto &codec : codecs){ - if(codec.ug_codec == out_codec){ - cmpto_sf = codec.cmpto_sf; + if(codec.ug_codec != out_codec){ + continue; + } + cmpto_sf = codec.cmpto_sf; + if (codec.run_callback != nullptr) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_set_postprocessor_cuda( + ctx_cfg, nullptr, nullptr, + codec.size_callback, codec.run_callback), + "add postprocessor", return false); + } else { s->convert = codec.convert; - break; + if (s->convert != nullptr) { + MSG(WARNING, + "Compiled without CUDA, pixfmt conv will " + "be processed on CPU...\n"); + } } } @@ -547,6 +601,14 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, abort(); } + CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &s->decoder), + "Error initializing context", return false); + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP); + + CHECK_OK(cmpto_j2k_dec_cfg_create(s->decoder, &s->settings), + "Error creating configuration", return false); + if (out_codec != RGBA || (rshift == 0 && gshift == 8 && bshift == 16)) { CHECK_OK(cmpto_j2k_dec_cfg_set_samples_format_type(s->settings, cmpto_sf), "Error setting sample format type", return false); @@ -580,6 +642,9 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, s->out_codec = out_codec; s->pitch = pitch; + int ret = pthread_create(&s->thread_id, NULL, decompress_j2k_worker, (void *) s); + assert(ret == 0 && "Unable to create thread"); + return true; } @@ -708,7 +773,8 @@ static int j2k_decompress_get_property(void *state, int property, void *val, siz return ret; } -static void j2k_decompress_done(void *state) +static void +j2k_decompress_cleanup_common(struct state_decompress_j2k *s) { struct state_decompress_j2k *s = (struct state_decompress_j2k *) state; @@ -716,8 +782,14 @@ static void j2k_decompress_done(void *state) pthread_join(s->thread_id, NULL); MSG(VERBOSE, "Decoder stopped.\n"); - cmpto_j2k_dec_cfg_destroy(s->settings); - cmpto_j2k_dec_ctx_destroy(s->decoder); + if (s->settings != nullptr) { + cmpto_j2k_dec_cfg_destroy(s->settings); + s->settings = nullptr; + } + if (s->decoder != nullptr) { + cmpto_j2k_dec_ctx_destroy(s->decoder); + s->decoder = nullptr; + } while (s->decompressed_frames.size() > 0) { auto decoded = s->decompressed_frames.front(); @@ -725,6 +797,13 @@ static void j2k_decompress_done(void *state) free(decoded.first); } + s->convert = nullptr; +} + +static void j2k_decompress_done(void *state) +{ + auto *s = (struct state_decompress_j2k *) state; + j2k_decompress_cleanup_common(s); delete s; } @@ -732,22 +811,18 @@ static int j2k_decompress_get_priority(codec_t compression, struct pixfmt_desc i if (compression != J2K && compression != J2KR) { return -1; } - switch (ugc) { - case VIDEO_CODEC_NONE: - return 50; // probe - case UYVY: - case v210: - case RGB: - case BGR: - case RGBA: - case R10k: - case R12L: + if (ugc == VC_NONE) { // probe + return VDEC_PRIO_PROBE_HI; + } + bool codec_found = false; + for (const auto &codec : codecs) { + if (codec.ug_codec == ugc) { + codec_found = true; break; - default: - return -1; + } }; - if (ugc == VIDEO_CODEC_NONE) { - return 50; // probe + if (!codec_found) { + return VDEC_PRIO_NA; } if (internal.depth == 0) { // fallback - internal undefined return 800; From a7eba41df48a831333519f5c691fecde78322524 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Fri, 6 Sep 2024 19:00:55 -0400 Subject: [PATCH 23/25] Implement b1ff4c6, e37e58c, 94afd6c from master [b1ff4c6](https://github.com/CESNET/UltraGrid/commit/b1ff4c6d29100cdb276f032d910a40e664e61b67) [e37e58c](https://github.com/CESNET/UltraGrid/commit/e37e58cf52eaa20e40afbec0d3e1ef1938d6eedf) [94afd6c](https://github.com/CESNET/UltraGrid/commit/94afd6c5b439a1fafe0ae04ad16d0135ffa29c67) --- src/video_decompress/cmpto_j2k.cpp | 47 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index 6c6d902bfc..389071638b 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -315,6 +315,8 @@ static void *decompress_j2k_worker(void *args) /* * Command Line Parameters for state_decompress_j2k */ +#define CPU_CONV_PARAM "j2k-dec-cpu-conv" + // CUDA-specific Command Line Parameters ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" " use CUDA to decode images\n"); @@ -329,6 +331,8 @@ ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" " number of images which can be decoded at one moment (0 means default, thread-count is maximum limit)\n"); +ADD_TO_PARAM(CPU_CONV_PARAM, "* " CPU_CONV_PARAM "\n" + " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); // General Command Line Parameters ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-dec-queue-len=\n" " max queue len\n"); @@ -527,7 +531,7 @@ const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda = nullptr; #endif -static struct { +static const struct conv_props { codec_t ug_codec; enum cmpto_sample_format_type cmpto_sf; // CPU postprocess @@ -547,6 +551,33 @@ static struct { r12l_postprocessor_get_sz, r12l_postprocess_cuda }, }; +static bool +set_postprocess_convert(struct state_decompress_j2k *s, + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg, + const struct conv_props *codec) +{ + const bool force_cpu_conv = + get_commandline_param(CPU_CONV_PARAM) != nullptr; + if (codec->run_callback != nullptr && !force_cpu_conv) { + if (cuda_devices_count == 1) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_set_postprocessor_cuda( + ctx_cfg, nullptr, nullptr, + codec->size_callback, codec->run_callback), + "add postprocessor", return false); + return true; + } + MSG(WARNING, + "More than 1 CUDA device set, will use CPU conversion...\n"); + } + s->convert = codec->convert; + if (s->convert != nullptr && codec->run_callback == nullptr && + !force_cpu_conv) { + MSG(WARNING, "Compiled without CUDA, pixfmt conv will " + "be processed on CPU...\n"); + } + return true; +} + static int j2k_decompress_reconfigure(void *state, struct video_desc desc, int rshift, int gshift, int bshift, int pitch, codec_t out_codec) { @@ -580,18 +611,8 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, continue; } cmpto_sf = codec.cmpto_sf; - if (codec.run_callback != nullptr) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_set_postprocessor_cuda( - ctx_cfg, nullptr, nullptr, - codec.size_callback, codec.run_callback), - "add postprocessor", return false); - } else { - s->convert = codec.convert; - if (s->convert != nullptr) { - MSG(WARNING, - "Compiled without CUDA, pixfmt conv will " - "be processed on CPU...\n"); - } + if (!set_postprocess_convert(s, ctx_cfg, &codec)) { + return false; } } From c307088688abe7f4a0a75aa814a7f7ee8ea6e644 Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Tue, 10 Sep 2024 18:12:40 -0400 Subject: [PATCH 24/25] Resolve remaining merge issues Remove duplicate #include, variables, functions, etc Class member 'pool' is a unique_ptr. Changed calls to pool from . to -> Renamed req_tile_limit and req_mem_limit to cuda_tile_limit and cuda_mem_limit to match class member name --- src/video_compress/cmpto_j2k.cpp | 81 ++++++++++++++---------------- src/video_decompress/cmpto_j2k.cpp | 45 ++++++----------- 2 files changed, 53 insertions(+), 73 deletions(-) diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index e63bf1f280..b3c717371f 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -52,14 +52,14 @@ #endif // HAVE_CONFIG_H #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include @@ -79,6 +79,11 @@ #include "video.h" #include "video_compress.h" +using std::condition_variable; +using std::mutex; +using std::shared_ptr; +using std::unique_lock; + #define MOD_NAME "[Cmpto J2K enc.] " #define CHECK_OK(cmd, err_msg, action_fail) do { \ @@ -90,6 +95,15 @@ } \ } while(0) +#define HANDLE_ERROR_COMPRESS_PUSH \ + if (udata != nullptr) { \ + udata->frame.~shared_ptr(); \ + } \ + if (img != nullptr) { \ + cmpto_j2k_enc_img_destroy(img); \ + } \ + return + #define NOOP ((void) 0) // Default CPU Settings @@ -108,20 +122,13 @@ // Default General Settings #define DEFAULT_QUALITY 0.7 -/// default max size of state_video_compress_j2k::pool and also value -/// for state_video_compress_j2k::max_in_frames -#ifdef HAVE_CUDA -#define DEFAULT_POOL_SIZE DEFAULT_CUDA_POOL_SIZE -#else -#define DEFAULT_POOL_SIZE DEFAULT_CPU_POOL_SIZE -#endif -using std::condition_variable; -using std::mutex; -using std::shared_ptr; -using std::unique_lock; +typedef void (*cuda_convert_func_t)(int width, int height, void *src, void *dst); #ifdef HAVE_CUDA +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames +#define DEFAULT_POOL_SIZE DEFAULT_CUDA_POOL_SIZE template struct cmpto_j2k_enc_cuda_buffer_data_allocator @@ -147,15 +154,23 @@ struct cmpto_j2k_enc_cuda_buffer_data_allocator } }; -using cuda_allocator = cmpto_j2k_enc_cuda_buffer_data_allocator; +using cuda_allocator = cmpto_j2k_enc_cuda_buffer_data_allocator; const cuda_convert_func_t r12l_to_rg48_cuda = preprocess_r12l_to_rg48; #else -using cuda_allocator = default_data_allocator; +using cuda_allocator = default_data_allocator; const cuda_convert_func_t r12l_to_rg48_cuda = nullptr; + +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames +#define DEFAULT_POOL_SIZE DEFAULT_CPU_POOL_SIZE #endif using cpu_allocator = default_data_allocator; -typedef void (*cuda_convert_func_t)(int width, int height, void *src, void *dst); +/* + * Function Predeclarations + */ +static void j2k_compressed_frame_dispose(struct video_frame *frame); +static void j2k_compress_done(struct module *mod); /** * @brief Platforms available for J2K Compression @@ -283,9 +298,6 @@ struct state_video_compress_j2k { const size_t cpu_mem_limit = 0; // Not yet implemented as of v2.8.1. Must be 0. }; -static void j2k_compressed_frame_dispose(struct video_frame *frame); -static void j2k_compress_done(struct module *mod); - /** * @brief state_video_compress_j2k constructor to create from opts * @param parent Base Module Struct @@ -324,12 +336,6 @@ static void parallel_conv(video_frame *dst, video_frame *src){ decoder, 0); } -#ifdef HAVE_CUDA -const cuda_convert_func_t r12l_to_rg48_cuda = preprocess_r12l_to_rg48; -#else -const cuda_convert_func_t r12l_to_rg48_cuda = nullptr; -#endif - static struct { codec_t ug_codec; enum cmpto_sample_format_type cmpto_sf; @@ -471,7 +477,7 @@ do_gpu_copy(struct state_video_compress_j2k *s, } static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ - std::shared_ptr ret = s->pool.get_frame(); + std::shared_ptr ret = s->pool->get_frame(); if (s->pool_in_device_memory) { do_gpu_copy(s, ret, frame); } else if (s->precompress_codec != VC_NONE) { @@ -943,15 +949,6 @@ release_cstream_cuda(void *img_custom_data, size_t img_custom_data_size, samples_size); } -#define HANDLE_ERROR_COMPRESS_PUSH \ - if (udata != nullptr) { \ - udata->frame.~shared_ptr(); \ - } \ - if (img != nullptr) { \ - cmpto_j2k_enc_img_destroy(img); \ - } \ - return - static void j2k_compress_push(struct module *state, std::shared_ptr tx) { struct state_video_compress_j2k *s = @@ -974,7 +971,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr if (s->precompress_codec != VC_NONE) { pool_desc.color_spec = s->precompress_codec; } - s->pool.reconfigure( + s->pool->reconfigure( pool_desc, (size_t) vc_get_linesize(pool_desc.width, pool_desc.color_spec) * pool_desc.height); diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index 06fac79a0a..f88db5ceb0 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -79,16 +79,15 @@ #include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... #include "video_decompress.h" -constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; - using std::lock_guard; using std::min; using std::mutex; using std::pair; using std::queue; +using std::stoi; using std::unique_lock; -#define NOOP ((void) 0) +constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; // General Parameter Defaults constexpr int DEFAULT_MAX_QUEUE_SIZE = 2; // maximal size of queue for decompressed frames @@ -105,21 +104,21 @@ constexpr unsigned int MIN_CPU_IMG_LIMIT = 0; // Min constexpr int64_t DEFAULT_CUDA_MEM_LIMIT = 1000000000LL; constexpr int DEFAULT_CUDA_TILE_LIMIT = 2; -using std::lock_guard; -using std::min; -using std::mutex; -using std::pair; -using std::queue; -using std::stoi; -using std::unique_lock; - -static void -j2k_decompress_cleanup_common(struct state_decompress_j2k *s); +#define NOOP ((void) 0) +#define CHECK_OK(cmd, err_msg, action_fail) do { \ + int j2k_error = cmd; \ + if (j2k_error != CMPTO_OK) {\ + LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ + action_fail;\ + } \ +} while(0) /* * Function Predeclarations */ static void *decompress_j2k_worker(void *args); +static void j2k_decompress_cleanup_common(struct state_decompress_j2k *s); + /* * Platform to use for J2K Decompression @@ -192,14 +191,6 @@ struct state_decompress_j2k { bool initialize_j2k_dec_ctx(); }; -#define CHECK_OK(cmd, err_msg, action_fail) do { \ - int j2k_error = cmd; \ - if (j2k_error != CMPTO_OK) {\ - LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ - action_fail;\ - } \ -} while(0) - /** * @brief Default state_decompress_j2k Constructor * @throw UnableToCreateJ2KDecoderCTX if unable to create J2K CTX @@ -594,19 +585,11 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); for (unsigned int i = 0; i < cuda_devices_count; ++i) { CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( - ctx_cfg, cuda_devices[i], s->req_mem_limit, - s->req_tile_limit), + ctx_cfg, cuda_devices[i], s->cuda_mem_limit, + s->cuda_tile_limit), "Error setting CUDA device", return false); } - struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr; - CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( - ctx_cfg, cuda_devices[i], s->req_mem_limit, - s->req_tile_limit), - "Error setting CUDA device", return false); - } for(const auto &codec : codecs){ if(codec.ug_codec != out_codec){ From 28b98a96154040e413097573b9378df71d60115d Mon Sep 17 00:00:00 2001 From: atrivialatomic Date: Tue, 10 Sep 2024 18:47:07 -0400 Subject: [PATCH 25/25] j2k_decompress_reconfigure will check for platform type j2k_decompress_reconfigure now checks state_decompress_j2k class member platform to see if it is set to j2k_decompress_platform::CPU or j2k_decompress_platform::CUDA prior to calling cmpto_j2k_dec_ctx_cfg_add_cuda_device / cmpto_j2k_dec_ctx_cfg_add_cpu --- src/video_decompress/cmpto_j2k.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index c582156f99..a1cbbfb03c 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -588,11 +588,27 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr; CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( - ctx_cfg, cuda_devices[i], s->cuda_mem_limit, - s->cuda_tile_limit), - "Error setting CUDA device", return false); + if (j2k_decompress_platform::CUDA == s->platform) { + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( + ctx_cfg, cuda_devices[i], s->cuda_mem_limit, + s->cuda_tile_limit), + "Error setting CUDA device", return false); + } + } + + if (j2k_decompress_platform::CPU == s->platform) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( + ctx_cfg, + s->cpu_thread_count, + s->cpu_mem_limit, + s->cpu_img_limit), + "Error configuring the CPU", + return false); + + MSG(INFO, "Using %s threads on the CPU. Image Limit set to %i.\n", + (s->cpu_thread_count == 0 ? "all available" : std::to_string(s->cpu_thread_count).c_str()), + s->cpu_img_limit); }