diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..7f162230c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "files.associations": { + "array": "cpp", + "string": "cpp", + "string_view": "cpp", + "vector": "cpp", + "__config": "cpp" + } +} \ No newline at end of file diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 8eb2a946c..f91d1accb 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -51,18 +51,22 @@ #include "config.h" #endif // HAVE_CONFIG_H -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #ifdef HAVE_CUDA #include "cuda_wrapper.h" #include "cuda_wrapper/kernels.hpp" -#endif +#endif // HAVE_CUDA #include "debug.h" #include "host.h" #include "lib_common.h" @@ -75,34 +79,56 @@ #include "video.h" #include "video_compress.h" +using std::condition_variable; +using std::mutex; +using std::shared_ptr; +using std::unique_lock; + #define MOD_NAME "[Cmpto J2K enc.] " #define CHECK_OK(cmd, err_msg, action_fail) do { \ int j2k_error = cmd; \ if (j2k_error != CMPTO_OK) {\ - log_msg(LOG_LEVEL_ERROR, "[J2K enc.] %s: %s\n", \ - err_msg, cmpto_j2k_enc_get_last_error()); \ + MSG(ERROR, "%s: %s\n", \ + err_msg, cmpto_j2k_enc_get_last_error()); \ action_fail;\ } \ } while(0) +#define HANDLE_ERROR_COMPRESS_PUSH \ + if (udata != nullptr) { \ + udata->frame.~shared_ptr(); \ + } \ + if (img != nullptr) { \ + cmpto_j2k_enc_img_destroy(img); \ + } \ + return + #define NOOP ((void) 0) -#define DEFAULT_QUALITY 0.7 -/// default max size of state_video_compress_j2k::pool and also value -/// for state_video_compress_j2k::max_in_frames -#define DEFAULT_POOL_SIZE 4 +// Default CPU Settings +#define DEFAULT_CPU_THREAD_COUNT CMPTO_J2K_ENC_CPU_DEFAULT +#define MIN_CPU_THREAD_COUNT CMPTO_J2K_ENC_CPU_NONE +#define DEFAULT_CPU_MEM_LIMIT 0 // DEFAULT_CPU_MEM_LIMIT should always be 0 +#define DEFAULT_CPU_POOL_SIZE 8 +#define DEFAULT_IMG_LIMIT 0 // Default number of images to be decoded by CPU (0 = CMPTO Default) +#define MIN_CPU_IMG_LIMIT 0 // Min number of images decoded by the CPU at once + +// Default CUDA Settings +#define DEFAULT_CUDA_POOL_SIZE 4 /// number of frames that encoder encodes at moment -#define DEFAULT_TILE_LIMIT 1 -#define DEFAULT_MEM_LIMIT 1000000000LLU +#define DEFAULT_CUDA_TILE_LIMIT 1 +#define DEFAULT_CUDA_MEM_LIMIT 1000000000LLU -using std::condition_variable; -using std::mutex; -using std::stod; -using std::shared_ptr; -using std::unique_lock; +// Default General Settings +#define DEFAULT_QUALITY 0.7 + +typedef void (*cuda_convert_func_t)(int width, int height, void *src, void *dst); #ifdef HAVE_CUDA +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames +#define DEFAULT_POOL_SIZE DEFAULT_CUDA_POOL_SIZE template struct cmpto_j2k_enc_cuda_buffer_data_allocator @@ -127,32 +153,176 @@ struct cmpto_j2k_enc_cuda_buffer_data_allocator return new cmpto_j2k_enc_cuda_buffer_data_allocator(*this); } }; + +using cuda_allocator = cmpto_j2k_enc_cuda_buffer_data_allocator; +const cuda_convert_func_t r12l_to_rg48_cuda = preprocess_r12l_to_rg48; +#else +using cuda_allocator = default_data_allocator; +const cuda_convert_func_t r12l_to_rg48_cuda = nullptr; + +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames +#define DEFAULT_POOL_SIZE DEFAULT_CPU_POOL_SIZE #endif +using cpu_allocator = default_data_allocator; -typedef void (*cuda_convert_func_t)(int width, int height, void *src, void *dst); +/* + * Function Predeclarations + */ +static void j2k_compressed_frame_dispose(struct video_frame *frame); +static void j2k_compress_done(struct module *mod); -struct state_video_compress_j2k { - struct module module_data{}; - struct cmpto_j2k_enc_ctx *context{}; - struct cmpto_j2k_enc_cfg *enc_settings{}; - long long int rate = 0; ///< bitrate in bits per second - int mct = -1; // force use of mct - -1 means default - bool pool_in_device_memory = false; ///< frames in pool are on GPU - video_frame_pool pool; ///< pool for frames allocated by us but not yet consumed by encoder - unsigned int max_in_frames = DEFAULT_POOL_SIZE; ///< max number of frames between push and pop - unsigned int in_frames{}; ///< number of currently encoding frames - mutex lock; - condition_variable frame_popped; - video_desc saved_desc{}; - codec_t precompress_codec = VC_NONE; - video_desc compressed_desc{}; +/** + * @brief Platforms available for J2K Compression + */ +enum j2k_compress_platform { + NONE = 0, + CPU = CMPTO_TECHNOLOGY_CPU, + CUDA = CMPTO_TECHNOLOGY_CUDA, +}; +/** + * @brief Struct to hold Platform Name and j2k_compress_platform Type + */ +struct j2k_compress_platform_info_t { + const char* name; + j2k_compress_platform platform; +}; + +// Supported Platforms for Compressing J2K +constexpr auto compress_platforms = std::array { + j2k_compress_platform_info_t{"none", j2k_compress_platform::NONE}, + j2k_compress_platform_info_t{"cpu", j2k_compress_platform::CPU}, + j2k_compress_platform_info_t{"cuda", j2k_compress_platform::CUDA} +}; + +/** + * @fn get_platform_from_name + * @brief Search for j2k_compress_platform from friendly name + * @param name Friendly name of platform to search for + * @return j2k_compress_platform that corresponds to name. If no match, return j2k_compress_platform::NONE + */ +[[nodiscard]][[maybe_unused]] +static j2k_compress_platform get_platform_from_name(std::string name) { + std::transform(name.cbegin(), name.cend(), name.begin(), [](unsigned char c) { return std::tolower(c); }); + + auto matches = [&name](const auto& p) { return name.compare(p.name) == 0; }; + + if (const auto& it = std::find_if(compress_platforms.begin(), compress_platforms.end(), matches) ; it != compress_platforms.end()) { + return it->platform; + } + + return j2k_compress_platform::NONE; +} + +/** + * @fn supports_cmpto_technology + * @brief Check if Comprimato supports requested technology type + * @param cmpto_technology_type Technology type to check against + * @return True if supported, False if unsupported + */ +static bool supports_cmpto_technology(int cmpto_technology_type) { + const auto *version = cmpto_j2k_enc_get_version(); + + return (version == nullptr) ? false : (version->technology & cmpto_technology_type); +} + +/** + * Exceptions for state_video_compress_j2k construction + */ + +/// @brief HelpRequested Exception +struct HelpRequested : public std::exception { + HelpRequested() = default; +}; + +/// @brief InvalidArgument Exception +struct InvalidArgument : public std::exception { + InvalidArgument() = default; +}; + +/// @brief NoCmptoTechnologyFound Exception +struct NoCmptoTechnologyFound : public std::exception { + NoCmptoTechnologyFound() = default; +}; + +/// @brief UnableToCreateJ2KEncoderCTX Exception +struct UnableToCreateJ2KEncoderCTX : public std::exception { + UnableToCreateJ2KEncoderCTX() = default; +}; + + +/** + * @brief state_video_compress_j2k Class + */ +struct state_video_compress_j2k { + state_video_compress_j2k(struct module *parent, const char* opts); + + module module_data{}; + struct cmpto_j2k_enc_ctx *context{}; + struct cmpto_j2k_enc_cfg *enc_settings{}; + std::unique_ptr pool; + unsigned int in_frames{}; ///< number of currently encoding frames + mutex lock; + condition_variable frame_popped; + video_desc saved_desc{}; ///< pool properties + codec_t precompress_codec = VC_NONE; + video_desc compressed_desc{}; + + // Generic Parameters + double quality = DEFAULT_QUALITY; // default image quality + long long int rate = 0; // bitrate in bits per second + int mct = -1; // force use of mct - -1 means default + bool lossless = false; // lossless encoding + + // CPU Parameters + int cpu_thread_count = DEFAULT_CPU_THREAD_COUNT; + unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; + + // CUDA Parameters + bool pool_in_device_memory = false; cuda_convert_func_t cuda_convert_func = nullptr; uint8_t *cuda_conv_tmp_buf = nullptr; + unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // j2k_compress_platform::NONE by default at initialization + j2k_compress_platform platform = j2k_compress_platform::NONE; + unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop + + private: + void parse_fmt(const char* opts); + bool initialize_j2k_enc_ctx(); + + // CPU Parameter + const size_t cpu_mem_limit = 0; // Not yet implemented as of v2.8.1. Must be 0. }; -static void j2k_compressed_frame_dispose(struct video_frame *frame); -static void j2k_compress_done(struct module *mod); +/** + * @brief state_video_compress_j2k constructor to create from opts + * @param parent Base Module Struct + * @param opts Configuration options to construct class + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + * @throw UnableToCreateJ2KEncoderCTX if failure to create J2K CTX +*/ +state_video_compress_j2k::state_video_compress_j2k(struct module *parent, const char* opts) { + try { + parse_fmt(opts); + } catch (...) { + throw; + } + + if (!initialize_j2k_enc_ctx()) { + throw UnableToCreateJ2KEncoderCTX(); + } + + module_init_default(&module_data); + module_data.cls = MODULE_CLASS_DATA; + module_data.priv_data = this; + module_data.deleter = j2k_compress_done; + module_register(&module_data, parent); +} static void parallel_conv(video_frame *dst, video_frame *src){ int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); @@ -171,12 +341,6 @@ static void parallel_conv(video_frame *dst, video_frame *src){ } } -#ifdef HAVE_CUDA -const cuda_convert_func_t r12l_to_rg48_cuda = preprocess_r12l_to_rg48; -#else -const cuda_convert_func_t r12l_to_rg48_cuda = nullptr; -#endif - static struct { codec_t ug_codec; enum cmpto_sample_format_type cmpto_sf; @@ -198,6 +362,7 @@ ADD_TO_PARAM( CPU_CONV_PARAM, "* " CPU_CONV_PARAM "\n" " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); + static void set_pool(struct state_video_compress_j2k *s, struct video_desc desc) { @@ -220,19 +385,19 @@ set_pool(struct state_video_compress_j2k *s, struct video_desc desc) } s->pool_in_device_memory = true; - s->pool = video_frame_pool( + s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator< cuda_wrapper_malloc, cuda_wrapper_free>()); return; } s->cuda_convert_func = nullptr; // either was 0 or force_cpu_conv - s->pool = video_frame_pool( + s->pool = std::make_unique( s->max_in_frames, cmpto_j2k_enc_cuda_buffer_data_allocator()); #else - s->pool = video_frame_pool(s->max_in_frames, default_data_allocator()); + s->pool = std::make_unique(s->max_in_frames, default_data_allocator()); #endif } @@ -251,7 +416,7 @@ static bool configure_with(struct state_video_compress_j2k *s, struct video_desc } if(!found){ - log_msg(LOG_LEVEL_ERROR, "[J2K] Failed to find suitable pixel format\n"); + MSG(ERROR, "Failed to find suitable pixel format\n"); return false; } @@ -317,8 +482,7 @@ do_gpu_copy(struct state_video_compress_j2k *s, } static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ - std::shared_ptr ret = s->pool.get_frame(); - + std::shared_ptr ret = s->pool->get_frame(); if (s->pool_in_device_memory) { do_gpu_copy(s, ret, frame); } else if (s->precompress_codec != VC_NONE) { @@ -378,7 +542,7 @@ static std::shared_ptr j2k_compress_pop(struct module *state) const char * encoding_error = ""; CHECK_OK(cmpto_j2k_enc_img_get_error(img, &encoding_error), "get error status", encoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image encoding failed: %s\n", encoding_error); + MSG(ERROR, "Image encoding failed: %s\n", encoding_error); goto start; } struct custom_data *udata = nullptr; @@ -401,44 +565,106 @@ static std::shared_ptr j2k_compress_pop(struct module *state) return shared_ptr(out, out->callbacks.dispose); } -struct { +/// @brief Struct for options for J2K Compression Usage +struct opts { const char *label; const char *key; const char *description; const char *opt_str; const bool is_boolean; -} usage_opts[] = { +}; + +constexpr opts general_opts[5] = { {"Bitrate", "quality", "Target bitrate", ":rate=", false}, {"Quality", "quant_coeff", "Quality in range [0-1], default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false}, - {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_MEM_LIMIT), ":mem_limit=", false}, - {"Tile limit", "tile_limit", "Number of tiles encoded at moment (less to reduce latency, more to increase performance, 0 means infinity), default: " TOSTRING(DEFAULT_TILE_LIMIT), ":tile_limit=", false}, - {"Pool size", "pool_size", "Total number of tiles encoder can hold at moment (same meaning as above), default: " TOSTRING(DEFAULT_POOL_SIZE) ", should be greater than ", ":pool_size=", false}, - {"Use MCT", "mct", "use MCT", ":mct", true}, + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Must be greater than tile_limit when platform=cuda and img_limit when platform=cpu. "\ + "default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false}, + {"Use MCT", "mct", "Use MCT", ":mct", true}, + {"Lossless compression", "lossless", "Enable lossless compression. default: disabled", ":lossless", true} +}; + +constexpr opts cuda_opts[2] = { + {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_CUDA_MEM_LIMIT), ":mem_limit=", false}, + {"Tile limit", "tile_limit", "Number of tiles encoded at one moment by GPU (less to reduce latency, more to increase performance, 0 is infinity). "\ + "default: " TOSTRING(DEFAULT_CUDA_TILE_LIMIT), ":tile_limit=", false}, +}; + +constexpr opts platform_opts[1] = { + {"Plaform", "platform", "Platform device for the encoder to use", ":platform=", false}, +}; + +constexpr opts cpu_opts[2] = { + {"Thread count", "thread_count", "Number of threads to use on the CPU. 0 is all available. default: " TOSTRING(DEFAULT_CPU_THREAD_COUNT), ":thread_count=", false}, + {"Image limit", "img_limit", "Number of images that can be encoded at one moment by CPU. Max limit is thread_count. 0 is default limit. default: " TOSTRING(DEFAULT_IMG_LIMIT), ":img_limit=", false}, }; +/** + * @fn usage + * @brief Display J2K Compression Usage Information + */ static void usage() { - col() << "J2K compress usage:\n"; - col() << TERM_BOLD << TRED("\t-c cmpto_j2k"); - for(const auto& opt : usage_opts){ - assert(strlen(opt.opt_str) >= 2); - col() << "[" << opt.opt_str; - if (!opt.is_boolean) { - col() << "<" << opt.opt_str[1] << ">"; // :quality -> (first letter used as ":quality=") + col() << "J2K compress platform support:\n"; + const auto supports_cpu = supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU); + const auto supports_cuda = supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA); + + col() << "\tCPU .... " << (supports_cpu ? "yes" : "no") + << (supports_cuda ? "\n" : "\t[default]\n"); + col() << "\tCUDA ... " << (supports_cuda ? "yes\t[default]\n" : "no\n"); + + auto show_syntax = [](const auto& options) { + for (const auto& opt : options) { + assert(strlen(opt.opt_str) >= 2); + col() << "[" << opt.opt_str; + if (!opt.is_boolean) { + col() << "<" << opt.opt_str[1] << ">"; // :quality -> (first letter used as ":quality=") + } + col() << "]"; + } + }; + + auto show_arguments = [](const auto& options) { + for (const auto& opt : options) { + assert(strlen(opt.opt_str) >= 2); + if (opt.is_boolean) { + col() << TBOLD("\t" << opt.opt_str + 1 <<); + } else { + col() << TBOLD("\t<" << opt.opt_str[1] << ">"); + } + col() << " - " << opt.description << "\n"; } - col() << "]"; + }; + + col() << "J2K compress usage:\n"; + if (supports_cuda) { + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cuda"); + show_syntax(cuda_opts); + show_syntax(general_opts); + col() << " [--cuda-device ]\n" << TERM_RESET; + } + if (supports_cpu) { + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cpu"); + show_syntax(cpu_opts); + show_syntax(general_opts); } - col() << " [--cuda-device ]\n" << TERM_RESET; + col() << "\n" << TERM_RESET; col() << "where:\n"; - for(const auto& opt : usage_opts){ - if (opt.is_boolean) { - col() << TBOLD("\t" << opt.opt_str + 1 <<); - } else { - col() << TBOLD("\t<" << opt.opt_str[1] << ">"); - } - col() << " - " << opt.description << "\n"; + + show_arguments(platform_opts); + + if (supports_cuda) { + col() << "CUDA compress arguments:\n"; + show_arguments(cuda_opts); + col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; } - col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; + if (supports_cpu) { + col() << "CPU compress arguments:\n"; + show_arguments(cpu_opts); + } + + col() << "General arguments:\n"; + show_arguments(general_opts); + } #define ASSIGN_CHECK_VAL(var, str, minval) \ @@ -449,93 +675,262 @@ static void usage() { << "[J2K] Wrong value " << (str) \ << " for " #var "! Value must be >= " << (minval) \ << ".\n"; \ - return NULL; \ + throw InvalidArgument(); \ } \ (var) = val; \ } while (0) -static struct module * j2k_compress_init(struct module *parent, const char *c_cfg) -{ - double quality = DEFAULT_QUALITY; - long long int mem_limit = DEFAULT_MEM_LIMIT; - unsigned int tile_limit = DEFAULT_TILE_LIMIT; - +/// CUDA opt Syntax +// -c cmpto_j2k:platform=cuda[:mem_limit=][:tile_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] [--cuda-device ] +/// CPU opt Syntax +// -c cmpto_j2k:platform=cpu[:thread_count=][:img_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] +/** + * @fn parse_fmt + * @brief Parse options and configure class members accordingly + * @param opts Configuration options + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + */ +void state_video_compress_j2k::parse_fmt(const char* opts) { const auto *version = cmpto_j2k_enc_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; - - auto *s = new state_video_compress_j2k(); - - char *tmp = (char *) alloca(strlen(c_cfg) + 1); - strcpy(tmp, c_cfg); - char *save_ptr, *item; - while ((item = strtok_r(tmp, ":", &save_ptr))) { - tmp = NULL; - if (strncasecmp("rate=", item, strlen("rate=")) == 0) { - ASSIGN_CHECK_VAL(s->rate, strchr(item, '=') + 1, 1); - } else if (strncasecmp("quality=", item, strlen("quality=")) == 0) { - quality = stod(strchr(item, '=') + 1); - } else if (strcasecmp("mct", item) == 0 || strcasecmp("nomct", item) == 0) { - s->mct = strcasecmp("mct", item) == 0 ? 1 : 0; - } else if (strncasecmp("mem_limit=", item, strlen("mem_limit=")) == 0) { - ASSIGN_CHECK_VAL(mem_limit, strchr(item, '=') + 1, 1); - } else if (strncasecmp("tile_limit=", item, strlen("tile_limit=")) == 0) { - ASSIGN_CHECK_VAL(tile_limit, strchr(item, '=') + 1, 0); - } else if (strncasecmp("pool_size=", item, strlen("pool_size=")) == 0) { - ASSIGN_CHECK_VAL(s->max_in_frames, strchr(item, '=') + 1, 1); - } else if (strcasecmp("help", item) == 0) { + MSG(INFO, "Using codec version: %s\n", (version == nullptr ? "(unknown)" : version->name)); + + /** + * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. + * If it does, configure the preferred default platform and max_in_frames using priority below + * 1 - CUDA + * 2 - CPU + * + * If platform is not found, throw NoCmptoTechnologyFound exception + */ + if (supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA)) { // prefer CUDA compress by default + platform = j2k_compress_platform::CUDA; + max_in_frames = DEFAULT_CUDA_POOL_SIZE; + } else if (supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU)) { // prefer CPU compress by default + platform = j2k_compress_platform::CPU; + max_in_frames = DEFAULT_CPU_POOL_SIZE; + } else { + MSG(ERROR, "Unable to find supported CMPTO_TECHNOLOGY\n"); + throw NoCmptoTechnologyFound(); + } + + auto split_arguments = [](std::string args, std::string delimiter) { + auto token = std::string{}; + auto pos = size_t{0}; + auto vec = std::vector{}; + + if (args == "\0") { + return vec; + } + + while ((pos = args.find(delimiter)) != std::string::npos) { + token = args.substr(0, pos); + vec.emplace_back(std::move(token)); + args.erase(0, pos + delimiter.length()); + } + + vec.emplace_back(std::move(args)); + return vec; + }; + + auto args = split_arguments(opts, ":"); + + // No Arguments provided, return and use defaults + if (args.empty()) { + return; + } + + const char* item = ""; + + /** + * Check if :pool_size= set manually during argument parsing. + * Since max_in_frames is default initialized to match compile time platform default (CUDA or CPU) + * Changing from :platform=cuda default to :platform=cpu default will not automatically + * set :pool_size= during argument parsing because opts can passed be out of order. + * + * To prevent potential for overwriting user's defined default, set is_pool_size_manually_configured=true + * during argument parsing and check before final function return + * + * If pool size is manually configured, do not set to default. + * Otherwise, set max_in_frames = platform default + */ + auto is_pool_size_manually_configured = false; + + for (const auto& arg : args) { + item = arg.c_str(); + if (strcasecmp("help", item) == 0) { // :help usage(); - return static_cast(INIT_NOERR); + throw HelpRequested(); + + } else if (IS_KEY_PREFIX(item, "platform")) { // :platform= + const char *const platform_name = strchr(item, '=') + 1; + platform = get_platform_from_name(platform_name); + if (j2k_compress_platform::NONE == platform) { + MSG(ERROR, "Unable to find requested encoding platform: \"%s\"\n", platform_name); + throw InvalidArgument(); + } + if (!supports_cmpto_technology(platform)) { + MSG(ERROR, "Does not support requested encoding platform: \"%s\"\n", platform_name); + throw InvalidArgument(); + } + + } else if (strcasecmp("lossless", item) == 0) { // :lossless + lossless = true; + + } else if (IS_KEY_PREFIX(item, "mem_limit")) { // :mem_limit= + ASSIGN_CHECK_VAL(cuda_mem_limit, strchr(item, '=') + 1, 1); + + } else if (IS_KEY_PREFIX(item, "thread_count")) { // :thread_count= + cpu_thread_count = atoi(strchr(item, '=') + 1); + ASSIGN_CHECK_VAL(cpu_thread_count, strchr(item, '=') + 1, MIN_CPU_THREAD_COUNT); + + } else if (IS_KEY_PREFIX(item, "tile_limit")) { // :tile_limit= + ASSIGN_CHECK_VAL(cuda_tile_limit, strchr(item, '=') + 1, 0); + + } else if (IS_KEY_PREFIX(item, "img_limit")) { // :img_limit= + ASSIGN_CHECK_VAL(cpu_img_limit, strchr(item, '=') + 1, MIN_CPU_IMG_LIMIT); + + } else if (IS_KEY_PREFIX(item, "rate")) { // :rate= + ASSIGN_CHECK_VAL(rate, strchr(item, '=') + 1, 1); + + } else if (IS_KEY_PREFIX(item, "quality")) { // :quality= + quality = std::stod(strchr(item, '=') + 1); + if (quality < 0.0 || quality > 1.0) { + MSG(ERROR, "Quality should be in interval [0-1]\n"); + throw InvalidArgument(); + } + + } else if (IS_KEY_PREFIX(item, "pool_size")) { // :pool_size= + ASSIGN_CHECK_VAL(max_in_frames, strchr(item, '=') + 1, 1); + is_pool_size_manually_configured = true; + + } else if (strcasecmp("mct", item) == 0) { // :mct + mct = strcasecmp("mct", item) == 0 ? 1 : 0; + } else { - log_msg(LOG_LEVEL_ERROR, "[J2K] Wrong option: %s\n", item); - goto error; + MSG(ERROR, "Unable to find option: \"%s\"\n", item); + throw InvalidArgument(); } } - if (quality < 0.0 || quality > 1.0) { - LOG(LOG_LEVEL_ERROR) << "[J2K] Quality should be in interval [0-1]!\n"; - goto error; + // If CPU selected + if (j2k_compress_platform::CPU == platform) { + /** + * Confirm thread_count != CMPTO_J2K_ENC_CPU_DEFAULT (0) + * If it does, img_limit can be > thread_count since all threads used + * + * If thread_count is not 0, confirm img_limit doesn't exceed thread_count + * Set img_limit = thread_count if exeeded + */ + if (cpu_thread_count != CMPTO_J2K_ENC_CPU_DEFAULT && cpu_thread_count < static_cast(cpu_img_limit)) { + MSG(INFO, "img_limit (%i) exceeds thread_count. Lowering img_limit to %i to match thread_count.\n", + cpu_img_limit, + cpu_thread_count); + cpu_img_limit = cpu_thread_count; + } + + // If pool_size was manually set, ignore this check. + // Otherwise, if it was not set, confirm that max_in_frames matches DEFAULT_CPU_POOL_SIZE + if (!is_pool_size_manually_configured && max_in_frames != DEFAULT_CPU_POOL_SIZE) { + MSG(DEBUG, "max_in_frames set to CPU default: %i", DEFAULT_CPU_POOL_SIZE); + max_in_frames = DEFAULT_CPU_POOL_SIZE; + } } +} +/** + * @fn initialize_j2k_enc_ctx + * @brief Initialize internal cmpto_j2k_enc_ctx_cfg for requested platform and settings + * @return true if successsfully configured + * @return false if unable to configure + */ +[[nodiscard]] +bool state_video_compress_j2k::initialize_j2k_enc_ctx() { struct cmpto_j2k_enc_ctx_cfg *ctx_cfg; + CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), "Context configuration create", - goto error); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], mem_limit, tile_limit), - "Setting CUDA device", goto error); + return false); + + if (j2k_compress_platform::CPU == platform) { + MSG(INFO, "Configuring for CPU\n"); + pool = std::make_unique(max_in_frames, cpu_allocator()); + + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cpu( + ctx_cfg, + cpu_thread_count, + cpu_mem_limit, + cpu_img_limit), + "Setting CPU device", + return false); + + MSG(INFO, "Using %s threads on CPU. Thread Count = %i, Image Limit = %i\n", + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_thread_count, + cpu_img_limit); + } + + if (j2k_compress_platform::CUDA == platform) { + MSG(INFO, "Configuring for CUDA\n"); + pool = std::make_unique(max_in_frames, cuda_allocator()); + + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( + ctx_cfg, + cuda_devices[i], + cuda_mem_limit, + cuda_tile_limit), + "Setting CUDA device", + return false); + } } - CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &s->context), "Context create", - goto error); + CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &context), "Context create", + return false); + CHECK_OK(cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg), "Context configuration destroy", NOOP); CHECK_OK(cmpto_j2k_enc_cfg_create( - s->context, - &s->enc_settings), + context, + &enc_settings), "Creating context configuration:", - goto error); - CHECK_OK(cmpto_j2k_enc_cfg_set_quantization( - s->enc_settings, - quality /* 0.0 = poor quality, 1.0 = full quality */ - ), - "Setting quantization", - NOOP); + return false); + if (lossless) { + CHECK_OK(cmpto_j2k_enc_cfg_set_lossless( + enc_settings, + lossless ? 1 : 0), + "Enabling lossless", + return false); + } else { + CHECK_OK(cmpto_j2k_enc_cfg_set_quantization( + enc_settings, + quality /* 0.0 = poor quality, 1.0 = full quality */), + "Setting quantization", + NOOP); + } - CHECK_OK(cmpto_j2k_enc_cfg_set_resolutions( s->enc_settings, 6), + CHECK_OK(cmpto_j2k_enc_cfg_set_resolutions(enc_settings, 6), "Setting DWT levels", NOOP); - module_init_default(&s->module_data); - s->module_data.cls = MODULE_CLASS_DATA; - s->module_data.priv_data = s; - s->module_data.deleter = j2k_compress_done; - module_register(&s->module_data, parent); - - return &s->module_data; + return true; +} -error: - delete s; - return NULL; +static struct module * j2k_compress_init(struct module *parent, const char *opts) { + try { + auto *s = new state_video_compress_j2k(parent, opts); + return &s->module_data; + } catch (HelpRequested const& e) { + return static_cast(INIT_NOERR); + } catch (InvalidArgument const& e) { + return NULL; + } catch (UnableToCreateJ2KEncoderCTX const& e) { + return NULL; + } catch (NoCmptoTechnologyFound const& e) { + return NULL; + } catch (...) { + return NULL; + } } static void j2k_compressed_frame_dispose(struct video_frame *frame) @@ -559,15 +954,6 @@ release_cstream_cuda(void *img_custom_data, size_t img_custom_data_size, samples_size); } -#define HANDLE_ERROR_COMPRESS_PUSH \ - if (udata != nullptr) { \ - udata->frame.~shared_ptr(); \ - } \ - if (img != nullptr) { \ - cmpto_j2k_enc_img_destroy(img); \ - } \ - return - static void j2k_compress_push(struct module *state, std::shared_ptr tx) { struct state_video_compress_j2k *s = @@ -590,7 +976,7 @@ static void j2k_compress_push(struct module *state, std::shared_ptr if (s->precompress_codec != VC_NONE) { pool_desc.color_spec = s->precompress_codec; } - s->pool.reconfigure( + s->pool->reconfigure( pool_desc, (size_t) vc_get_linesize(pool_desc.width, pool_desc.color_spec) * pool_desc.height); @@ -665,10 +1051,16 @@ static compress_module_info get_cmpto_j2k_module_info(){ compress_module_info module_info; module_info.name = "cmpto_j2k"; - for(const auto& opt : usage_opts){ - module_info.opts.emplace_back(module_option{opt.label, - opt.description, opt.key, opt.opt_str, opt.is_boolean}); - } + auto add_module_options = [&](const auto& options) { + for (const auto& opt : options) { + module_info.opts.emplace_back(module_option{opt.label, + opt.description, opt.key, opt.opt_str, opt.is_boolean}); + } + }; + + add_module_options(cuda_opts); + add_module_options(cpu_opts); + add_module_options(general_opts); codec codec_info; codec_info.name = "Comprimato jpeg2000"; diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index f0c77c7ce..a1cbbfb03 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -76,14 +76,8 @@ #include "utils/parallel_conv.h" #include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... #include "video_decompress.h" - -constexpr const int DEFAULT_TILE_LIMIT = 2; -/// maximal size of queue for decompressed frames -constexpr const int DEFAULT_MAX_QUEUE_SIZE = 2; -/// maximal number of concurrently decompressed frames -constexpr const int DEFAULT_MAX_IN_FRAMES = 4; -constexpr const int64_t DEFAULT_MEM_LIMIT = 1000000000LL; -constexpr const char *MOD_NAME = "[J2K dec.] "; +#include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... +#include "video_decompress.h" using std::lock_guard; using std::min; @@ -93,14 +87,71 @@ using std::queue; using std::stoi; using std::unique_lock; -static void -j2k_decompress_cleanup_common(struct state_decompress_j2k *s); +constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; + +// General Parameter Defaults +constexpr int DEFAULT_MAX_QUEUE_SIZE = 2; // maximal size of queue for decompressed frames +constexpr int DEFAULT_MAX_IN_FRAMES = 4; // maximal number of concurrently decompressed frames + +// CPU-specific Defaults +constexpr int DEFAULT_THREAD_COUNT = CMPTO_J2K_DEC_CPU_DEFAULT; // Number of threads equal to all cores +constexpr int MIN_CPU_THREAD_COUNT = CMPTO_J2K_DEC_CPU_NONE; // No threads will be created +constexpr size_t DEFAULT_CPU_MEM_LIMIT = 0; // Should always be 0. Not implemented as of v2.8.1 +constexpr unsigned int DEFAULT_CPU_IMG_LIMIT = 0; // 0 for default, thread_count for max +constexpr unsigned int MIN_CPU_IMG_LIMIT = 0; // Min number of images encoded by the CPU at once + +// CUDA-specific Defaults +constexpr int64_t DEFAULT_CUDA_MEM_LIMIT = 1000000000LL; +constexpr int DEFAULT_CUDA_TILE_LIMIT = 2; + +#define NOOP ((void) 0) +#define CHECK_OK(cmd, err_msg, action_fail) do { \ + int j2k_error = cmd; \ + if (j2k_error != CMPTO_OK) {\ + LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ + action_fail;\ + } \ +} while(0) + +/* + * Function Predeclarations + */ +static void *decompress_j2k_worker(void *args); +static void j2k_decompress_cleanup_common(struct state_decompress_j2k *s); + + +/* + * Platform to use for J2K Decompression + */ +enum j2k_decompress_platform { + NONE = 0, + CPU = CMPTO_TECHNOLOGY_CPU, + CUDA = CMPTO_TECHNOLOGY_CUDA +}; + +/** + * @fn supports_cmpto_technology + * @brief Check if Comprimato supports requested technology type + * @param cmpto_technology_type Technology type to check against + * @return True if supported, False if unsupported + */ +static bool supports_cmpto_technology(int cmpto_technology_type) { + const auto *version = cmpto_j2k_dec_get_version(); + + return (version == nullptr) ? false : (version->technology & cmpto_technology_type); +} + +/* + * Exceptions for state_decompress_j2k construction + */ +/// @brief UnableToCreateJ2KDecoderCTX Exception +struct UnableToCreateJ2KDecoderCTX : public std::exception { + UnableToCreateJ2KDecoderCTX() = default; +}; struct state_decompress_j2k { - state_decompress_j2k(unsigned int mqs, unsigned int mif) - : max_queue_size(mqs), max_in_frames(mif) {} - long long int req_mem_limit = DEFAULT_MEM_LIMIT; - unsigned int req_tile_limit = DEFAULT_TILE_LIMIT; + state_decompress_j2k(); + cmpto_j2k_dec_ctx *decoder{}; cmpto_j2k_dec_cfg *settings{}; @@ -111,26 +162,47 @@ struct state_decompress_j2k { queue> decompressed_frames; ///< buffer, length int pitch; pthread_t thread_id{}; - unsigned int max_queue_size; ///< maximal length of @ref decompressed_frames - unsigned int max_in_frames; ///< maximal frames that can be "in progress" unsigned int in_frames{}; ///< actual number of decompressed frames unsigned long long int dropped{}; ///< number of dropped frames because queue was full + // CUDA Defaults + unsigned int cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // Default Decompression Platform to Use + j2k_decompress_platform platform = j2k_decompress_platform::NONE; + + // CPU Defaults + unsigned int cpu_img_limit = DEFAULT_CPU_IMG_LIMIT; + const size_t cpu_mem_limit = DEFAULT_CPU_MEM_LIMIT; // Should always be 0. Not yet implemented as of Cmpto v2.8.4 + signed int cpu_thread_count = DEFAULT_THREAD_COUNT; + + // General Defaults + unsigned int max_in_frames = DEFAULT_MAX_IN_FRAMES; // maximal frames that can be "in progress" + unsigned int max_queue_size = DEFAULT_MAX_QUEUE_SIZE; // maximal length of @ref decompressed_frames + void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height){nullptr}; + + private: + void parse_params(); + bool initialize_j2k_dec_ctx(); }; -#define CHECK_OK(cmd, err_msg, action_fail) do { \ - int j2k_error = cmd; \ - if (j2k_error != CMPTO_OK) {\ - LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ - action_fail;\ - } \ -} while(0) +/** + * @brief Default state_decompress_j2k Constructor + * @throw UnableToCreateJ2KDecoderCTX if unable to create J2K CTX + */ +state_decompress_j2k::state_decompress_j2k() { + parse_params(); + + if (!initialize_j2k_dec_ctx()) { + throw UnableToCreateJ2KDecoderCTX(); + } +} -#define NOOP ((void) 0) static void rg48_to_r12l(unsigned char *dst_buffer, unsigned char *src_buffer, @@ -150,11 +222,21 @@ static void rg48_to_r12l(unsigned char *dst_buffer, } } -static void print_dropped(unsigned long long int dropped) { +static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { if (dropped % 10 == 1) { - log_msg(LOG_LEVEL_WARNING, "[J2K dec] Some frames (%llu) dropped.\n", dropped); - log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "[J2K dec] You may try to increase " - "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n"); + MSG(WARNING, "Some frames (%llu) dropped.\n", dropped); + + if (j2k_decompress_platform::CPU == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "image limit to increase the number of images decoded at one moment by adding parameter: --param j2k-dec-img-limit=#\n", + MOD_NAME); + } + + if (j2k_decompress_platform::CUDA == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=#\n", + MOD_NAME); + } } } @@ -187,7 +269,7 @@ static void *decompress_j2k_worker(void *args) const char * decoding_error = ""; CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", decoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); + MSG(ERROR, "Image decoding failed: %s\n", decoding_error); continue; } @@ -208,7 +290,7 @@ static void *decompress_j2k_worker(void *args) "Unable to to return processed image", NOOP); lock_guard lk(s->lock); while (s->decompressed_frames.size() >= s->max_queue_size) { - print_dropped(s->dropped++); + print_dropped(s->dropped++, s->platform); auto decoded = s->decompressed_frames.front(); s->decompressed_frames.pop(); free(decoded.first); @@ -219,41 +301,204 @@ static void *decompress_j2k_worker(void *args) return NULL; } +/* + * Command Line Parameters for state_decompress_j2k + */ +#define CPU_CONV_PARAM "j2k-dec-cpu-conv" + +// CUDA-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" + " use CUDA to decode images\n"); ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" " J2K max memory usage in bytes.\n"); ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); -ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-queue-len=\n" +// CPU-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-use-cpu", "* j2k-dec-use-cpu\n" + " use the CPU to decode images\n"); +ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n" + " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); +ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" + " number of images which can be decoded at one moment (0 means default, thread-count is maximum limit)\n"); +ADD_TO_PARAM(CPU_CONV_PARAM, "* " CPU_CONV_PARAM "\n" + " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); +// General Command Line Parameters +ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-dec-queue-len=\n" " max queue len\n"); -ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-encoder-queue=\n" +ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-dec-encoder-queue=\n" " max number of frames held by encoder\n"); -static void * j2k_decompress_init(void) -{ - unsigned int queue_len = DEFAULT_MAX_QUEUE_SIZE; - unsigned int encoder_in_frames = DEFAULT_MAX_IN_FRAMES; + +/** + * @fn parse_params + * @brief Parse Command Line Parameters and Initialize Struct Members + */ +void state_decompress_j2k::parse_params() { + /** + * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. + * If it does, configure the preferred default platform and max_in_frames using priority below + * 1 - CUDA + * 2 - CPU + * + * If platform is not found set platform = j2k_decompress_platform::NONE + */ + + const auto supports_cpu = supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU); + const auto supports_cuda = supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA); + + if (supports_cuda) { // prefer CUDA decompress by default + platform = j2k_decompress_platform::CUDA; + } else if (supports_cpu) { // prefer CPU decompress by default + platform = j2k_decompress_platform::CPU; + } else { + MSG(ERROR, "Unable to find supported CMPTO_TECHNOLOGY\n"); + platform = j2k_decompress_platform::NONE; // default to NONE + } + + // CUDA-specific commandline_params + if (get_commandline_param("j2k-dec-use-cuda")) { + if (supports_cuda) { + platform = j2k_decompress_platform::CUDA; + } else { + MSG(ERROR, "j2k-dec-use-cuda argument provided. CUDA decompress not supported.\n"); + + // Check if CPU is default decompress + // If it is, create a log message to notify this will be used automatically + if (j2k_decompress_platform::CPU == platform) { + MSG(INFO, "Defaulting to CPU decompress\n"); + } + } + } + + if (get_commandline_param("j2k-dec-mem-limit")) { + cuda_mem_limit = unit_evaluate(get_commandline_param("j2k-dec-mem-limit"), nullptr); + } + + if (get_commandline_param("j2k-dec-tile-limit")) { + cuda_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit")); + } + + // CPU-specific commandline_params + if (get_commandline_param("j2k-dec-use-cpu")) { + if (supports_cpu) { + platform = j2k_decompress_platform::CPU; + } else { + MSG(ERROR, "j2k-dec-use-cpu argument provided. CPU decompress not supported.\n"); + } + } + + if (get_commandline_param("j2k-dec-cpu-thread-count")) { + cpu_thread_count = atoi(get_commandline_param("j2k-dec-cpu-thread-count")); + + // Confirm cpu_thread_count between MIN_CPU_THREAD_COUNT + 1 (0) + if (cpu_thread_count <= MIN_CPU_THREAD_COUNT) { + // Implementing this requires the creation of executor threads. + MSG(ERROR, "j2k-dec-cpu-thread-count must be 0 or higher. Setting to min allowed 0\n"); + cpu_thread_count = 0; + } + } if (get_commandline_param("j2k-dec-queue-len")) { - queue_len = atoi(get_commandline_param("j2k-dec-queue-len")); + max_queue_size = atoi(get_commandline_param("j2k-dec-queue-len")); + } + + if (get_commandline_param("j2k-dec-img-limit")) { + cpu_img_limit = atoi(get_commandline_param("j2k-dec-img-limit")); + + // Confirm cpu_img_limit between MIN_CPU_IMG_LIMIT + if (cpu_img_limit < MIN_CPU_IMG_LIMIT) { + MSG(INFO, "j2k-dec-img-limit below min allowed of %i. Setting to min allowed %i\n", + MIN_CPU_IMG_LIMIT, + MIN_CPU_IMG_LIMIT); + cpu_img_limit = MIN_CPU_IMG_LIMIT; + } } if (get_commandline_param("j2k-dec-encoder-queue")) { - encoder_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue")); + max_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue")); + } + + const auto *version = cmpto_j2k_dec_get_version(); + MSG(INFO, "Using code version: %s\n", (version == nullptr ? "(unknown)" : version->name)); +} + +/** + * @fn initialize_j2k_dec_ctx + * @brief Create cmpto_j2k_dec_ctx_cfg based on requested platform and command line arguments + * @return true if cmpto_j2k_dec_ctx_cfg successfully created + * @return false if unable to create cmpto_j2k_dec_ctx_cfg + */ +[[nodiscard]] +bool state_decompress_j2k::initialize_j2k_dec_ctx() { + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg; + CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); + + if (j2k_decompress_platform::NONE == platform) { + MSG(ERROR, "No supported CMPTO_TECHNOLOGY found. Unable to create decompress context.\n"); + return false; } - auto *s = new state_decompress_j2k(queue_len, encoder_in_frames); - if (get_commandline_param("j2k-dec-mem-limit") != nullptr) { - s->req_mem_limit = unit_evaluate( - get_commandline_param("j2k-dec-mem-limit"), nullptr); + if (j2k_decompress_platform::CUDA == platform) { + MSG(INFO, "Using platform CUDA for decompress\n"); + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), + "Error setting CUDA device", return false); + MSG(INFO, "Using CUDA Device %s\n", std::to_string(cuda_devices[i]).c_str()); + } } - if (get_commandline_param("j2k-dec-tile-limit") != nullptr) { - s->req_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit")); + if (j2k_decompress_platform::CPU == platform) { + MSG(INFO, "Using platform CPU for decompress\n"); + // Confirm that cpu_thread_count != 0 (unlimited). If it does, cpu_img_limit can exceed thread_count + if (cpu_thread_count != DEFAULT_THREAD_COUNT && cpu_img_limit > static_cast(cpu_thread_count)) { + MSG(INFO, "j2k-dec-img-limit set to %i. Lowering to match to match j2k-dec-cpu-thread-count (%i)\n", + cpu_img_limit, + cpu_thread_count); + cpu_img_limit = cpu_thread_count; + } + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( + ctx_cfg, + cpu_thread_count, + cpu_mem_limit, + cpu_img_limit), + "Error configuring the CPU", + return false); + + MSG(INFO, "Using %s threads on the CPU. Image Limit set to %i.\n", + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_img_limit); } - const auto *version = cmpto_j2k_dec_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; + CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &this->decoder), + "Error initializing context", + return false); + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP); + + CHECK_OK(cmpto_j2k_dec_cfg_create(this->decoder, &this->settings), + "Error creating configuration", { + cmpto_j2k_dec_cfg_destroy(this->settings); + cmpto_j2k_dec_ctx_destroy(this->decoder); + return false; + }); + + auto ret = pthread_create(&this->thread_id, NULL, decompress_j2k_worker, static_cast(this)); + assert(ret == 0 && "Unable to create thread"); - return s; + return true; +} + +/** + * @brief Initialize a new instance of state_decompress_j2k + * @return Null or Pointer to state_decompress_j2k + */ +static void * j2k_decompress_init(void) { + try { + auto *s = new state_decompress_j2k(); + return s; + } catch (...) { + return NULL; + } } static void @@ -266,6 +511,7 @@ r12l_postprocessor_get_sz( *temp_buffer_size = 0; // no temp buffer required *output_buffer_size = vc_get_datalen(size_x, size_y, R12L); } + #ifdef HAVE_CUDA const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda = postprocess_rg48_to_r12l; @@ -294,11 +540,6 @@ static const struct conv_props { r12l_postprocessor_get_sz, r12l_postprocess_cuda }, }; -#define CPU_CONV_PARAM "j2k-dec-cpu-conv" -ADD_TO_PARAM( - CPU_CONV_PARAM, - "* " CPU_CONV_PARAM "\n" - " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); static bool set_postprocess_convert(struct state_decompress_j2k *s, struct cmpto_j2k_dec_ctx_cfg *ctx_cfg, @@ -344,16 +585,33 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, } enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0; - + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr; CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( - ctx_cfg, cuda_devices[i], s->req_mem_limit, - s->req_tile_limit), - "Error setting CUDA device", return false); + if (j2k_decompress_platform::CUDA == s->platform) { + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( + ctx_cfg, cuda_devices[i], s->cuda_mem_limit, + s->cuda_tile_limit), + "Error setting CUDA device", return false); + } } + if (j2k_decompress_platform::CPU == s->platform) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( + ctx_cfg, + s->cpu_thread_count, + s->cpu_mem_limit, + s->cpu_img_limit), + "Error configuring the CPU", + return false); + + MSG(INFO, "Using %s threads on the CPU. Image Limit set to %i.\n", + (s->cpu_thread_count == 0 ? "all available" : std::to_string(s->cpu_thread_count).c_str()), + s->cpu_img_limit); + } + + for(const auto &codec : codecs){ if(codec.ug_codec != out_codec){ continue; @@ -431,7 +689,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha struct cmpto_j2k_dec_comp_info comp_info[3]; if (cmpto_j2k_dec_cstream_get_img_info(buffer, len, &info) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 0, &comp_info[0]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get image or first component info.\n"); + MSG(ERROR, "J2K Failed to get image or first component info.\n"); return DECODER_NO_FRAME; } @@ -440,7 +698,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha if (info.comp_count == 3) { if (cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 1, &comp_info[1]) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 2, &comp_info[2]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get componentt 1 or 2 info.\n"); + MSG(ERROR, "J2K Failed to get component 1 or 2 info.\n"); return DECODER_NO_FRAME; } if (comp_info[0].sampling_factor_x == 1 && comp_info[0].sampling_factor_y == 1 && @@ -479,7 +737,7 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne } if (s->in_frames >= s->max_in_frames + 1) { - print_dropped(s->dropped++); + print_dropped(s->dropped++, s->platform); goto return_previous; } @@ -547,7 +805,7 @@ j2k_decompress_cleanup_common(struct state_decompress_j2k *s) { cmpto_j2k_dec_ctx_stop(s->decoder); pthread_join(s->thread_id, NULL); - log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n"); + MSG(VERBOSE, "Decoder stopped.\n"); if (s->settings != nullptr) { cmpto_j2k_dec_cfg_destroy(s->settings);