diff --git a/CHANGES.md b/CHANGES.md index bcb7eae6..f6dd357e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,18 +1,28 @@ # 変更履歴 - CHANGE - - 下位互換のない変更 + - 後方互換性のない変更 - UPDATE - - 下位互換がある変更 + - 後方互換性がある変更 - ADD - - 下位互換がある追加 + - 後方互換性がある追加 - FIX - バグ修正 ## develop +## 2023.2.0 + +**2023-07-03** + +- [ADD] OpenH264 に対応 + - Ubunut 22.04 x86_64 でのみ対応 + - @melpon + ## 2023.1.2 +**2023-06-28** + - [FIX] Windows の Python 用ライブラリが dll ではなく pyd だったのを修正する - @melpon diff --git a/CMakeLists.txt b/CMakeLists.txt index 09a24023..d4836252 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ +cmake_minimum_required(VERSION 3.18) project(sora_sdk) -cmake_minimum_required(VERSION 3.18...3.23) # Only interpret if() arguments as variables or keywords when unquoted. cmake_policy(SET CMP0054 NEW) @@ -105,8 +105,10 @@ elseif(TARGET_OS STREQUAL "windows") # 文字コードを utf-8 として扱うのと、シンボルテーブル数を増やす target_compile_options(sora_sdk_ext PRIVATE /utf-8 /bigobj) # CRTライブラリを静的リンクさせる - # MSVC_RUNTIME_LIBRARY で設定ても反映されないため CMAKE_CXX_FLAGS を用いた - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + set_property(TARGET sora_sdk_ext PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + set_property(TARGET nanobind-static PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") target_compile_definitions(sora_sdk_ext PRIVATE _CONSOLE @@ -116,6 +118,16 @@ elseif(TARGET_OS STREQUAL "windows") HAVE_SNPRINTF ) endif() + +# Windows 以外は OpenH264 の動的呼び出しに対応する +if (NOT TARGET_OS STREQUAL "windows") + target_include_directories(sora_sdk_ext PRIVATE ${OPENH264_DIR}/include) + target_sources(sora_sdk_ext + PRIVATE + src/dynamic_h264_decoder.cpp + src/dynamic_h264_encoder.cpp) +endif() + target_link_libraries(sora_sdk_ext PRIVATE Sora::sora) install(TARGETS sora_sdk_ext LIBRARY DESTINATION .) diff --git a/NOTICE.md b/NOTICE.md index 232ce41a..4d5226e7 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -1,8 +1,208 @@ -Sora C++ SDK +Sora Python SDK -Copyright 2021-2022, Wandbox LLC (Original Author) +Copyright 2023-2023, tnoho (Original Author) +Copyright 2023-2023, Wandbox LLC (Original Author) +Copyright 2023-2023, Shiguredo Inc. -Copyright 2021-2022, Shiguredo Inc. +# Sora C++ SDK + +https://github.com/shiguredo/sora-cpp-sdk + +``` +Copyright 2021-2023, Wandbox LLC (Original Author) +Copyright 2021-2023, Shiguredo Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` + +``` + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS +``` # WebRTC diff --git a/README.md b/README.md index 02f1684d..0613a7d5 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ $ rye sync - Sora の機能へ追従 - VP8 / VP9 / AV1 / H.264 のハードウェアアクセラレーター (HWA) 対応 +- OpenH264 を利用した H.264 のソフトウェアエンコーダー/デコーダーへの対応 + - Ubuntu 22.04 x86_64 でのみ対応 ## 優先実装 @@ -125,3 +127,11 @@ limitations under the License. ``` このリポジトリに含まれる `shiguremaru.png` ファイルのライセンスは [CC BY-NC-ND 4.0](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.ja) です。 + +## OpenH264 + +https://www.openh264.org/BINARY_LICENSE.txt + +``` +"OpenH264 Video Codec provided by Cisco Systems, Inc." +``` diff --git a/VERSION b/VERSION index 904f88bd..25f64425 100644 --- a/VERSION +++ b/VERSION @@ -3,3 +3,4 @@ WEBRTC_BUILD_VERSION=m114.5735.2.0 BOOST_VERSION=1.82.0 LYRA_VERSION=1.3.0 CMAKE_VERSION=3.26.4 +OPENH264_VERSION=v2.3.1 diff --git a/pyproject.toml b/pyproject.toml index b140b35e..77da86dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "sora_sdk" authors = [{ name = "Shiguredo Inc.", email = "contact+pypi@shiguredo.jp" }] -version = "2023.1.2" +version = "2023.2.0" description = "WebRTC SFU Sora Python SDK" readme = "README.md" license = { file = "LICENSE" } diff --git a/requirements-dev.lock b/requirements-dev.lock index c487c899..c711ac77 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -9,7 +9,6 @@ -e file:. auditwheel==5.4.0 build==0.10.0 -colorama==0.4.6 exceptiongroup==1.1.1 iniconfig==2.0.0 nanobind==1.4.0 diff --git a/run.py b/run.py index 55db491e..ff7d685a 100644 --- a/run.py +++ b/run.py @@ -9,7 +9,8 @@ import zipfile from typing import Callable, Dict, List, NamedTuple, Optional, Union -from pypath import get_python_version, get_python_include_dir, get_python_library +from pypath import (get_python_include_dir, get_python_library, + get_python_version) def mkdir_p(path: str): @@ -431,6 +432,17 @@ def install_cmake(version, source_dir, install_dir, platform: str, ext): extract(path, install_dir, 'cmake') +@versioned +def install_openh264(version, source_dir, install_dir): + rm_rf(os.path.join(source_dir, 'openh264')) + rm_rf(os.path.join(install_dir, 'openh264')) + git_clone_shallow('https://github.com/cisco/openh264.git', + version, os.path.join(source_dir, 'openh264')) + with cd(os.path.join(source_dir, 'openh264')): + cmd([ + 'make', f'PREFIX={os.path.join(install_dir, "openh264")}', 'install-headers']) + + class PlatformTarget(object): def __init__(self, os, osver, arch): self.os = os @@ -615,6 +627,16 @@ def install_deps(build_platform: PlatformTarget, target_platform: PlatformTarget else: add_path(os.path.join(install_dir, 'cmake', 'bin')) + if build_platform.os != 'windows': + # OpenH264 + install_openh264_args = { + 'version': version['OPENH264_VERSION'], + 'version_file': os.path.join(install_dir, 'openh264.version'), + 'source_dir': source_dir, + 'install_dir': install_dir, + } + install_openh264(**install_openh264_args) + def cmake_path(path: str) -> str: return path.replace('\\', '/') @@ -672,6 +694,8 @@ def main(): f"-DWEBRTC_LIBRARY_DIR={cmake_path(webrtc_info.webrtc_library_dir)}") cmake_args.append( f"-DSORA_DIR={cmake_path(os.path.join(install_dir, 'sora'))}") + cmake_args.append( + f"-DOPENH264_DIR={cmake_path(os.path.join(install_dir, 'openh264'))}") python_version = get_python_version() cmake_args.append(f"-DPYTHON_VERSION_STRING={python_version}") cmake_args.append(f"-DPYTHON_INCLUDE_DIR={get_python_include_dir(python_version)}") diff --git a/src/dynamic_h264_decoder.cpp b/src/dynamic_h264_decoder.cpp new file mode 100644 index 00000000..e528950c --- /dev/null +++ b/src/dynamic_h264_decoder.cpp @@ -0,0 +1,139 @@ +#include "dynamic_h264_decoder.h" + +#include + +// WebRTC +#include +#include +#include + +// OpenH264 +#include + +namespace webrtc { + +DynamicH264Decoder::DynamicH264Decoder(std::string openh264) + : openh264_(std::move(openh264)) {} +DynamicH264Decoder::~DynamicH264Decoder() { + Release(); +} + +bool DynamicH264Decoder::Configure(const Settings& settings) { + Release(); + + void* handle = ::dlopen(openh264_.c_str(), RTLD_LAZY); + if (handle == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to dlopen"; + return false; + } + openh264_handle_ = handle; + create_decoder_ = (CreateDecoderFunc)::dlsym(handle, "WelsCreateDecoder"); + if (create_decoder_ == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to dlsym(WelsCreateDecoder)"; + Release(); + return false; + } + destroy_decoder_ = + (DestroyDecoderFunc)::dlsym(handle, "WelsDestroyDecoder"); + if (destroy_decoder_ == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to dlsym(WelsDestroyDecoder)"; + Release(); + return false; + } + + ISVCDecoder* decoder = nullptr; + int r = create_decoder_(&decoder); + if (r != 0) { + RTC_LOG(LS_ERROR) << "Failed to WelsCreateDecoder: r=" << r; + Release(); + return false; + } + + SDecodingParam param = {}; + r = decoder->Initialize(¶m); + if (r != 0) { + RTC_LOG(LS_ERROR) << "Failed to ISVCDecoder::Initialize: r=" << r; + Release(); + return false; + } + decoder_ = decoder; + + return true; +} +int32_t DynamicH264Decoder::Release() { + if (decoder_ != nullptr) { + decoder_->Uninitialize(); + destroy_decoder_(decoder_); + decoder_ = nullptr; + } + + if (openh264_handle_ != nullptr) { + ::dlclose(openh264_handle_); + openh264_handle_ = nullptr; + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Decoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Decoder::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t render_time_ms) { + if (decoder_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + h264_bitstream_parser_.ParseBitstream(input_image); + absl::optional qp = h264_bitstream_parser_.GetLastSliceQp(); + + std::array yuv; + SBufferInfo info = {}; + int r = decoder_->DecodeFrameNoDelay(input_image.data(), input_image.size(), + yuv.data(), &info); + if (r != 0) { + RTC_LOG(LS_ERROR) << "Failed to ISVCDecoder::DecodeFrameNoDelay: r=" << r; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (info.iBufferStatus == 0) { + return WEBRTC_VIDEO_CODEC_OK; + } + + int width_y = info.UsrData.sSystemBuffer.iWidth; + int height_y = info.UsrData.sSystemBuffer.iHeight; + int width_uv = (width_y + 1) / 2; + int height_uv = (height_y + 1) / 2; + int stride_y = info.UsrData.sSystemBuffer.iStride[0]; + int stride_uv = info.UsrData.sSystemBuffer.iStride[1]; + rtc::scoped_refptr i420_buffer( + webrtc::I420Buffer::Create(width_y, height_y)); + libyuv::I420Copy(yuv[0], stride_y, yuv[1], stride_uv, yuv[2], stride_uv, + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), width_y, + height_y); + + webrtc::VideoFrame video_frame = + webrtc::VideoFrame::Builder() + .set_video_frame_buffer(i420_buffer) + .set_timestamp_rtp(input_image.Timestamp()) + .build(); + if (input_image.ColorSpace() != nullptr) { + video_frame.set_color_space(*input_image.ColorSpace()); + } + + callback_->Decoded(video_frame, absl::nullopt, qp); + + return WEBRTC_VIDEO_CODEC_OK; +} + +const char* DynamicH264Decoder::ImplementationName() const { + return "OpenH264"; +} + +} // namespace webrtc diff --git a/src/dynamic_h264_decoder.h b/src/dynamic_h264_decoder.h new file mode 100644 index 00000000..affa6453 --- /dev/null +++ b/src/dynamic_h264_decoder.h @@ -0,0 +1,51 @@ +#ifndef DYNAMIC_H264_DECODER_H_ +#define DYNAMIC_H264_DECODER_H_ + +#include + +// WebRTC +#include +#include + +class ISVCDecoder; + +namespace webrtc { + +class DynamicH264Decoder : public H264Decoder { + public: + static std::unique_ptr Create(std::string openh264) { + return std::unique_ptr( + new DynamicH264Decoder(std::move(openh264))); + } + + DynamicH264Decoder(std::string openh264); + ~DynamicH264Decoder() override; + + bool Configure(const Settings& settings) override; + int32_t Release() override; + + int32_t RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) override; + + int32_t Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t render_time_ms = -1) override; + + const char* ImplementationName() const override; + + private: + DecodedImageCallback* callback_ = nullptr; + ISVCDecoder* decoder_ = nullptr; + webrtc::H264BitstreamParser h264_bitstream_parser_; + + std::string openh264_; + void* openh264_handle_ = nullptr; + using CreateDecoderFunc = int (*)(ISVCDecoder**); + using DestroyDecoderFunc = void (*)(ISVCDecoder*); + CreateDecoderFunc create_decoder_ = nullptr; + DestroyDecoderFunc destroy_decoder_ = nullptr; +}; + +} // namespace webrtc + +#endif diff --git a/src/dynamic_h264_encoder.cpp b/src/dynamic_h264_encoder.cpp new file mode 100644 index 00000000..be197433 --- /dev/null +++ b/src/dynamic_h264_encoder.cpp @@ -0,0 +1,766 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +// modules/video_coding/codecs/h264/h264_encoder_impl.{h,cc} の +// OpenH264 の関数を動的に読むようにしただけ + +#include "dynamic_h264_encoder.h" + +#include + +#include +#include +#include + +// WebRTC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// OpenH264 +#include +#include +#include +#include + +namespace webrtc { + +namespace { + +const bool kOpenH264EncoderDetailedLogging = false; + +// QP scaling thresholds. +static const int kLowH264QpThreshold = 24; +static const int kHighH264QpThreshold = 37; + +// Used by histograms. Values of entries should not be changed. +enum DynamicH264EncoderEvent { + kH264EncoderEventInit = 0, + kH264EncoderEventError = 1, + kH264EncoderEventMax = 16, +}; + +int NumberOfThreads(absl::optional encoder_thread_limit, + int width, + int height, + int number_of_cores) { + // TODO(hbos): In Chromium, multiple threads do not work with sandbox on Mac, + // see crbug.com/583348. Until further investigated, only use one thread. + // While this limitation is gone, this changes the bitstream format (see + // bugs.webrtc.org/14368) so still guarded by field trial to allow for + // experimentation using th experimental + // WebRTC-VideoEncoderSettings/encoder_thread_limit trial. + if (encoder_thread_limit.has_value()) { + int limit = encoder_thread_limit.value(); + RTC_DCHECK_GE(limit, 1); + if (width * height >= 1920 * 1080 && number_of_cores > 8) { + return std::min(limit, 8); // 8 threads for 1080p on high perf machines. + } else if (width * height > 1280 * 960 && number_of_cores >= 6) { + return std::min(limit, 3); // 3 threads for 1080p. + } else if (width * height > 640 * 480 && number_of_cores >= 3) { + return std::min(limit, 2); // 2 threads for qHD/HD. + } else { + return 1; // 1 thread for VGA or less. + } + } + // TODO(sprang): Also check sSliceArgument.uiSliceNum on GetEncoderParams(), + // before enabling multithreading here. + return 1; +} + +VideoFrameType ConvertToVideoFrameType(EVideoFrameType type) { + switch (type) { + case videoFrameTypeIDR: + return VideoFrameType::kVideoFrameKey; + case videoFrameTypeSkip: + case videoFrameTypeI: + case videoFrameTypeP: + case videoFrameTypeIPMixed: + return VideoFrameType::kVideoFrameDelta; + case videoFrameTypeInvalid: + break; + } + RTC_DCHECK_NOTREACHED() << "Unexpected/invalid frame type: " << type; + return VideoFrameType::kEmptyFrame; +} + +absl::optional ScalabilityModeFromTemporalLayers( + int num_temporal_layers) { + switch (num_temporal_layers) { + case 0: + break; + case 1: + return ScalabilityMode::kL1T1; + case 2: + return ScalabilityMode::kL1T2; + case 3: + return ScalabilityMode::kL1T3; + default: + RTC_DCHECK_NOTREACHED(); + } + return absl::nullopt; +} + +} // namespace + +// Helper method used by DynamicH264Encoder::Encode. +// Copies the encoded bytes from `info` to `encoded_image`. The +// `encoded_image->_buffer` may be deleted and reallocated if a bigger buffer is +// required. +// +// After OpenH264 encoding, the encoded bytes are stored in `info` spread out +// over a number of layers and "NAL units". Each NAL unit is a fragment starting +// with the four-byte start code {0,0,0,1}. All of this data (including the +// start codes) is copied to the `encoded_image->_buffer`. +static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { + // Calculate minimum buffer size required to hold encoded data. + size_t required_capacity = 0; + size_t fragments_count = 0; + for (int layer = 0; layer < info->iLayerNum; ++layer) { + const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; + for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++fragments_count) { + RTC_CHECK_GE(layerInfo.pNalLengthInByte[nal], 0); + // Ensure `required_capacity` will not overflow. + RTC_CHECK_LE(layerInfo.pNalLengthInByte[nal], + std::numeric_limits::max() - required_capacity); + required_capacity += layerInfo.pNalLengthInByte[nal]; + } + } + auto buffer = EncodedImageBuffer::Create(required_capacity); + encoded_image->SetEncodedData(buffer); + + // Iterate layers and NAL units, note each NAL unit as a fragment and copy + // the data to `encoded_image->_buffer`. + const uint8_t start_code[4] = {0, 0, 0, 1}; + size_t frag = 0; + encoded_image->set_size(0); + for (int layer = 0; layer < info->iLayerNum; ++layer) { + const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; + // Iterate NAL units making up this layer, noting fragments. + size_t layer_len = 0; + for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++frag) { + // Because the sum of all layer lengths, `required_capacity`, fits in a + // `size_t`, we know that any indices in-between will not overflow. + RTC_DCHECK_GE(layerInfo.pNalLengthInByte[nal], 4); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 0], start_code[0]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 1], start_code[1]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 2], start_code[2]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 3], start_code[3]); + layer_len += layerInfo.pNalLengthInByte[nal]; + } + // Copy the entire layer's data (including start codes). + memcpy(buffer->data() + encoded_image->size(), layerInfo.pBsBuf, layer_len); + encoded_image->set_size(encoded_image->size() + layer_len); + } +} + +DynamicH264Encoder::DynamicH264Encoder(const cricket::VideoCodec& codec, + std::string openh264) + : packetization_mode_(H264PacketizationMode::SingleNalUnit), + max_payload_size_(0), + number_of_cores_(0), + encoded_image_callback_(nullptr), + has_reported_init_(false), + has_reported_error_(false), + openh264_(std::move(openh264)) { + RTC_CHECK(absl::EqualsIgnoreCase(codec.name, cricket::kH264CodecName)); + std::string packetization_mode_string; + if (codec.GetParam(cricket::kH264FmtpPacketizationMode, + &packetization_mode_string) && + packetization_mode_string == "1") { + packetization_mode_ = H264PacketizationMode::NonInterleaved; + } + downscaled_buffers_.reserve(kMaxSimulcastStreams - 1); + encoded_images_.reserve(kMaxSimulcastStreams); + encoders_.reserve(kMaxSimulcastStreams); + configurations_.reserve(kMaxSimulcastStreams); + tl0sync_limit_.reserve(kMaxSimulcastStreams); + svc_controllers_.reserve(kMaxSimulcastStreams); +} + +DynamicH264Encoder::~DynamicH264Encoder() { + Release(); +} + +int32_t DynamicH264Encoder::InitEncode(const VideoCodec* inst, + const VideoEncoder::Settings& settings) { + ReportInit(); + if (!inst || inst->codecType != kVideoCodecH264) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate == 0) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + int32_t release_ret = Release(); + if (release_ret != WEBRTC_VIDEO_CODEC_OK) { + ReportError(); + return release_ret; + } + + if (!InitOpenH264()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + int number_of_streams = SimulcastUtility::NumberOfSimulcastStreams(*inst); + bool doing_simulcast = (number_of_streams > 1); + + if (doing_simulcast && + !SimulcastUtility::ValidSimulcastParameters(*inst, number_of_streams)) { + return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; + } + downscaled_buffers_.resize(number_of_streams - 1); + encoded_images_.resize(number_of_streams); + encoders_.resize(number_of_streams); + pictures_.resize(number_of_streams); + svc_controllers_.resize(number_of_streams); + scalability_modes_.resize(number_of_streams); + configurations_.resize(number_of_streams); + tl0sync_limit_.resize(number_of_streams); + + max_payload_size_ = settings.max_payload_size; + number_of_cores_ = settings.number_of_cores; + encoder_thread_limit_ = settings.encoder_thread_limit; + codec_ = *inst; + + // Code expects simulcastStream resolutions to be correct, make sure they are + // filled even when there are no simulcast layers. + if (codec_.numberOfSimulcastStreams == 0) { + codec_.simulcastStream[0].width = codec_.width; + codec_.simulcastStream[0].height = codec_.height; + } + + for (int i = 0, idx = number_of_streams - 1; i < number_of_streams; + ++i, --idx) { + ISVCEncoder* openh264_encoder; + // Create encoder. + if (create_encoder_(&openh264_encoder) != 0) { + // Failed to create encoder. + RTC_LOG(LS_ERROR) << "Failed to create OpenH264 encoder"; + RTC_DCHECK(!openh264_encoder); + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(openh264_encoder); + if (kOpenH264EncoderDetailedLogging) { + int trace_level = WELS_LOG_DETAIL; + openh264_encoder->SetOption(ENCODER_OPTION_TRACE_LEVEL, &trace_level); + } + // else WELS_LOG_DEFAULT is used by default. + + // Store h264 encoder. + encoders_[i] = openh264_encoder; + + // Set internal settings from codec_settings + configurations_[i].simulcast_idx = idx; + configurations_[i].sending = false; + configurations_[i].width = codec_.simulcastStream[idx].width; + configurations_[i].height = codec_.simulcastStream[idx].height; + configurations_[i].max_frame_rate = static_cast(codec_.maxFramerate); + configurations_[i].frame_dropping_on = codec_.GetFrameDropEnabled(); + configurations_[i].key_frame_interval = codec_.H264()->keyFrameInterval; + configurations_[i].num_temporal_layers = + std::max(codec_.H264()->numberOfTemporalLayers, + codec_.simulcastStream[idx].numberOfTemporalLayers); + + // Create downscaled image buffers. + if (i > 0) { + downscaled_buffers_[i - 1] = I420Buffer::Create( + configurations_[i].width, configurations_[i].height, + configurations_[i].width, configurations_[i].width / 2, + configurations_[i].width / 2); + } + + // Codec_settings uses kbits/second; encoder uses bits/second. + configurations_[i].max_bps = codec_.maxBitrate * 1000; + configurations_[i].target_bps = codec_.startBitrate * 1000; + + // Create encoder parameters based on the layer configuration. + SEncParamExt encoder_params = CreateEncoderParams(i); + + // Initialize. + if (openh264_encoder->InitializeExt(&encoder_params) != 0) { + RTC_LOG(LS_ERROR) << "Failed to initialize OpenH264 encoder"; + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + // TODO(pbos): Base init params on these values before submitting. + int video_format = EVideoFormatType::videoFormatI420; + openh264_encoder->SetOption(ENCODER_OPTION_DATAFORMAT, &video_format); + + // Initialize encoded image. Default buffer size: size of unencoded data. + + const size_t new_capacity = + CalcBufferSize(VideoType::kI420, codec_.simulcastStream[idx].width, + codec_.simulcastStream[idx].height); + encoded_images_[i].SetEncodedData(EncodedImageBuffer::Create(new_capacity)); + encoded_images_[i]._encodedWidth = codec_.simulcastStream[idx].width; + encoded_images_[i]._encodedHeight = codec_.simulcastStream[idx].height; + encoded_images_[i].set_size(0); + + tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + scalability_modes_[i] = ScalabilityModeFromTemporalLayers( + configurations_[i].num_temporal_layers); + if (scalability_modes_[i].has_value()) { + svc_controllers_[i] = CreateScalabilityStructure(*scalability_modes_[i]); + if (svc_controllers_[i] == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to create scalability structure"; + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + } + + SimulcastRateAllocator init_allocator(codec_); + VideoBitrateAllocation allocation = + init_allocator.Allocate(VideoBitrateAllocationParameters( + DataRate::KilobitsPerSec(codec_.startBitrate), codec_.maxFramerate)); + SetRates(RateControlParameters(allocation, codec_.maxFramerate)); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Encoder::Release() { + while (!encoders_.empty()) { + ISVCEncoder* openh264_encoder = encoders_.back(); + if (openh264_encoder) { + RTC_CHECK_EQ(0, openh264_encoder->Uninitialize()); + destroy_encoder_(openh264_encoder); + } + encoders_.pop_back(); + } + downscaled_buffers_.clear(); + configurations_.clear(); + encoded_images_.clear(); + pictures_.clear(); + tl0sync_limit_.clear(); + svc_controllers_.clear(); + scalability_modes_.clear(); + ReleaseOpenH264(); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_image_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +void DynamicH264Encoder::SetRates(const RateControlParameters& parameters) { + if (encoders_.empty()) { + RTC_LOG(LS_WARNING) << "SetRates() while uninitialized."; + return; + } + + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Invalid frame rate: " << parameters.framerate_fps; + return; + } + + if (parameters.bitrate.get_sum_bps() == 0) { + // Encoder paused, turn off all encoding. + for (size_t i = 0; i < configurations_.size(); ++i) { + configurations_[i].SetStreamState(false); + } + return; + } + + codec_.maxFramerate = static_cast(parameters.framerate_fps); + + size_t stream_idx = encoders_.size() - 1; + for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) { + // Update layer config. + configurations_[i].target_bps = + parameters.bitrate.GetSpatialLayerSum(stream_idx); + configurations_[i].max_frame_rate = parameters.framerate_fps; + + if (configurations_[i].target_bps) { + configurations_[i].SetStreamState(true); + + // Update h264 encoder. + SBitrateInfo target_bitrate; + memset(&target_bitrate, 0, sizeof(SBitrateInfo)); + target_bitrate.iLayer = SPATIAL_LAYER_ALL, + target_bitrate.iBitrate = configurations_[i].target_bps; + encoders_[i]->SetOption(ENCODER_OPTION_BITRATE, &target_bitrate); + encoders_[i]->SetOption(ENCODER_OPTION_FRAME_RATE, + &configurations_[i].max_frame_rate); + } else { + configurations_[i].SetStreamState(false); + } + } +} + +int32_t DynamicH264Encoder::Encode( + const VideoFrame& input_frame, + const std::vector* frame_types) { + if (encoders_.empty()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!encoded_image_callback_) { + RTC_LOG(LS_WARNING) + << "InitEncode() has been called, but a callback function " + "has not been set with RegisterEncodeCompleteCallback()"; + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + rtc::scoped_refptr frame_buffer = + input_frame.video_frame_buffer()->ToI420(); + if (!frame_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_frame.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE; + } + RTC_CHECK(frame_buffer->type() == VideoFrameBuffer::Type::kI420 || + frame_buffer->type() == VideoFrameBuffer::Type::kI420A); + + bool is_keyframe_needed = false; + for (size_t i = 0; i < configurations_.size(); ++i) { + if (configurations_[i].key_frame_request && configurations_[i].sending) { + // This is legacy behavior, generating a keyframe on all layers + // when generating one for a layer that became active for the first time + // or after being disabled. + is_keyframe_needed = true; + break; + } + } + + RTC_DCHECK_EQ(configurations_[0].width, frame_buffer->width()); + RTC_DCHECK_EQ(configurations_[0].height, frame_buffer->height()); + + // Encode image for each layer. + for (size_t i = 0; i < encoders_.size(); ++i) { + // EncodeFrame input. + pictures_[i] = {0}; + pictures_[i].iPicWidth = configurations_[i].width; + pictures_[i].iPicHeight = configurations_[i].height; + pictures_[i].iColorFormat = EVideoFormatType::videoFormatI420; + pictures_[i].uiTimeStamp = input_frame.ntp_time_ms(); + // Downscale images on second and ongoing layers. + if (i == 0) { + pictures_[i].iStride[0] = frame_buffer->StrideY(); + pictures_[i].iStride[1] = frame_buffer->StrideU(); + pictures_[i].iStride[2] = frame_buffer->StrideV(); + pictures_[i].pData[0] = const_cast(frame_buffer->DataY()); + pictures_[i].pData[1] = const_cast(frame_buffer->DataU()); + pictures_[i].pData[2] = const_cast(frame_buffer->DataV()); + } else { + pictures_[i].iStride[0] = downscaled_buffers_[i - 1]->StrideY(); + pictures_[i].iStride[1] = downscaled_buffers_[i - 1]->StrideU(); + pictures_[i].iStride[2] = downscaled_buffers_[i - 1]->StrideV(); + pictures_[i].pData[0] = + const_cast(downscaled_buffers_[i - 1]->DataY()); + pictures_[i].pData[1] = + const_cast(downscaled_buffers_[i - 1]->DataU()); + pictures_[i].pData[2] = + const_cast(downscaled_buffers_[i - 1]->DataV()); + // Scale the image down a number of times by downsampling factor. + libyuv::I420Scale(pictures_[i - 1].pData[0], pictures_[i - 1].iStride[0], + pictures_[i - 1].pData[1], pictures_[i - 1].iStride[1], + pictures_[i - 1].pData[2], pictures_[i - 1].iStride[2], + configurations_[i - 1].width, + configurations_[i - 1].height, pictures_[i].pData[0], + pictures_[i].iStride[0], pictures_[i].pData[1], + pictures_[i].iStride[1], pictures_[i].pData[2], + pictures_[i].iStride[2], configurations_[i].width, + configurations_[i].height, libyuv::kFilterBox); + } + + if (!configurations_[i].sending) { + continue; + } + if (frame_types != nullptr && i < frame_types->size()) { + // Skip frame? + if ((*frame_types)[i] == VideoFrameType::kEmptyFrame) { + continue; + } + } + // Send a key frame either when this layer is configured to require one + // or we have explicitly been asked to. + const size_t simulcast_idx = + static_cast(configurations_[i].simulcast_idx); + bool send_key_frame = + is_keyframe_needed || + (frame_types && simulcast_idx < frame_types->size() && + (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey); + if (send_key_frame) { + // API doc says ForceIntraFrame(false) does nothing, but calling this + // function forces a key frame regardless of the `bIDR` argument's value. + // (If every frame is a key frame we get lag/delays.) + encoders_[i]->ForceIntraFrame(true); + configurations_[i].key_frame_request = false; + } + // EncodeFrame output. + SFrameBSInfo info; + memset(&info, 0, sizeof(SFrameBSInfo)); + + std::vector layer_frames; + if (svc_controllers_[i]) { + layer_frames = svc_controllers_[i]->NextFrameConfig(send_key_frame); + RTC_CHECK_EQ(layer_frames.size(), 1); + } + + // Encode! + int enc_ret = encoders_[i]->EncodeFrame(&pictures_[i], &info); + if (enc_ret != 0) { + RTC_LOG(LS_ERROR) + << "OpenH264 frame encoding failed, EncodeFrame returned " << enc_ret + << "."; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + encoded_images_[i]._encodedWidth = configurations_[i].width; + encoded_images_[i]._encodedHeight = configurations_[i].height; + encoded_images_[i].SetTimestamp(input_frame.timestamp()); + encoded_images_[i].SetColorSpace(input_frame.color_space()); + encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType); + encoded_images_[i].SetSimulcastIndex(configurations_[i].simulcast_idx); + + // Split encoded image up into fragments. This also updates + // `encoded_image_`. + RtpFragmentize(&encoded_images_[i], &info); + + // Encoder can skip frames to save bandwidth in which case + // `encoded_images_[i]._length` == 0. + if (encoded_images_[i].size() > 0) { + // Parse QP. + h264_bitstream_parser_.ParseBitstream(encoded_images_[i]); + encoded_images_[i].qp_ = + h264_bitstream_parser_.GetLastSliceQp().value_or(-1); + + // Deliver encoded image. + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecH264; + codec_specific.codecSpecific.H264.packetization_mode = + packetization_mode_; + codec_specific.codecSpecific.H264.temporal_idx = kNoTemporalIdx; + codec_specific.codecSpecific.H264.idr_frame = + info.eFrameType == videoFrameTypeIDR; + codec_specific.codecSpecific.H264.base_layer_sync = false; + if (configurations_[i].num_temporal_layers > 1) { + const uint8_t tid = info.sLayerInfo[0].uiTemporalId; + codec_specific.codecSpecific.H264.temporal_idx = tid; + codec_specific.codecSpecific.H264.base_layer_sync = + tid > 0 && tid < tl0sync_limit_[i]; + if (svc_controllers_[i]) { + if (layer_frames[0].TemporalId() != tid) { + RTC_LOG(LS_WARNING) + << "Encoder produced a frame for layer S" << (i + 1) << "T" + << tid + 1 << " that wasn't requested."; + continue; + } + encoded_images_[i].SetTemporalIndex(tid); + } + if (codec_specific.codecSpecific.H264.base_layer_sync) { + tl0sync_limit_[i] = tid; + } + if (tid == 0) { + tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + } + } + if (svc_controllers_[i]) { + codec_specific.generic_frame_info = + svc_controllers_[i]->OnEncodeDone(layer_frames[0]); + if (send_key_frame && codec_specific.generic_frame_info.has_value()) { + codec_specific.template_structure = + svc_controllers_[i]->DependencyStructure(); + } + codec_specific.scalability_mode = scalability_modes_[i]; + } + encoded_image_callback_->OnEncodedImage(encoded_images_[i], + &codec_specific); + } + } + return WEBRTC_VIDEO_CODEC_OK; +} + +// Initialization parameters. +// There are two ways to initialize. There is SEncParamBase (cleared with +// memset(&p, 0, sizeof(SEncParamBase)) used in Initialize, and SEncParamExt +// which is a superset of SEncParamBase (cleared with GetDefaultParams) used +// in InitializeExt. +SEncParamExt DynamicH264Encoder::CreateEncoderParams(size_t i) const { + SEncParamExt encoder_params; + encoders_[i]->GetDefaultParams(&encoder_params); + if (codec_.mode == VideoCodecMode::kRealtimeVideo) { + encoder_params.iUsageType = CAMERA_VIDEO_REAL_TIME; + } else if (codec_.mode == VideoCodecMode::kScreensharing) { + encoder_params.iUsageType = SCREEN_CONTENT_REAL_TIME; + } else { + RTC_DCHECK_NOTREACHED(); + } + encoder_params.iPicWidth = configurations_[i].width; + encoder_params.iPicHeight = configurations_[i].height; + encoder_params.iTargetBitrate = configurations_[i].target_bps; + // Keep unspecified. WebRTC's max codec bitrate is not the same setting + // as OpenH264's iMaxBitrate. More details in https://crbug.com/webrtc/11543 + encoder_params.iMaxBitrate = UNSPECIFIED_BIT_RATE; + // Rate Control mode + encoder_params.iRCMode = RC_BITRATE_MODE; + encoder_params.fMaxFrameRate = configurations_[i].max_frame_rate; + + // The following parameters are extension parameters (they're in SEncParamExt, + // not in SEncParamBase). + encoder_params.bEnableFrameSkip = configurations_[i].frame_dropping_on; + // `uiIntraPeriod` - multiple of GOP size + // `keyFrameInterval` - number of frames + encoder_params.uiIntraPeriod = configurations_[i].key_frame_interval; + // Reuse SPS id if possible. This helps to avoid reset of chromium HW decoder + // on each key-frame. + // Note that WebRTC resets encoder on resolution change which makes all + // EParameterSetStrategy modes except INCREASING_ID (default) essentially + // equivalent to CONSTANT_ID. + encoder_params.eSpsPpsIdStrategy = SPS_LISTING; + encoder_params.uiMaxNalSize = 0; + // Threading model: use auto. + // 0: auto (dynamic imp. internal encoder) + // 1: single thread (default value) + // >1: number of threads + encoder_params.iMultipleThreadIdc = + NumberOfThreads(encoder_thread_limit_, encoder_params.iPicWidth, + encoder_params.iPicHeight, number_of_cores_); + // The base spatial layer 0 is the only one we use. + encoder_params.sSpatialLayers[0].iVideoWidth = encoder_params.iPicWidth; + encoder_params.sSpatialLayers[0].iVideoHeight = encoder_params.iPicHeight; + encoder_params.sSpatialLayers[0].fFrameRate = encoder_params.fMaxFrameRate; + encoder_params.sSpatialLayers[0].iSpatialBitrate = + encoder_params.iTargetBitrate; + encoder_params.sSpatialLayers[0].iMaxSpatialBitrate = + encoder_params.iMaxBitrate; + encoder_params.iTemporalLayerNum = configurations_[i].num_temporal_layers; + if (encoder_params.iTemporalLayerNum > 1) { + // iNumRefFrame specifies total number of reference buffers to allocate. + // For N temporal layers we need at least (N - 1) buffers to store last + // encoded frames of all reference temporal layers. + // Note that there is no API in OpenH264 encoder to specify exact set of + // references to be used to prediction of a given frame. Encoder can + // theoretically use all available reference buffers. + encoder_params.iNumRefFrame = encoder_params.iTemporalLayerNum - 1; + } + RTC_LOG(LS_INFO) << "OpenH264 version is " << OPENH264_MAJOR << "." + << OPENH264_MINOR; + switch (packetization_mode_) { + case H264PacketizationMode::SingleNalUnit: + // Limit the size of the packets produced. + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceNum = 1; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceMode = + SM_SIZELIMITED_SLICE; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = + static_cast(max_payload_size_); + RTC_LOG(LS_INFO) << "Encoder is configured with NALU constraint: " + << max_payload_size_ << " bytes"; + break; + case H264PacketizationMode::NonInterleaved: + // When uiSliceMode = SM_FIXEDSLCNUM_SLICE, uiSliceNum = 0 means auto + // design it with cpu core number. + // TODO(sprang): Set to 0 when we understand why the rate controller borks + // when uiSliceNum > 1. + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceNum = 1; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceMode = + SM_FIXEDSLCNUM_SLICE; + break; + } + return encoder_params; +} + +void DynamicH264Encoder::ReportInit() { + if (has_reported_init_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.DynamicH264Encoder.Event", + kH264EncoderEventInit, kH264EncoderEventMax); + has_reported_init_ = true; +} + +void DynamicH264Encoder::ReportError() { + if (has_reported_error_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.DynamicH264Encoder.Event", + kH264EncoderEventError, kH264EncoderEventMax); + has_reported_error_ = true; +} + +VideoEncoder::EncoderInfo DynamicH264Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "OpenH264"; + info.scaling_settings = + VideoEncoder::ScalingSettings(kLowH264QpThreshold, kHighH264QpThreshold); + info.is_hardware_accelerated = false; + info.supports_simulcast = true; + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420}; + return info; +} + +void DynamicH264Encoder::LayerConfig::SetStreamState(bool send_stream) { + if (send_stream && !sending) { + // Need a key frame if we have not sent this stream before. + key_frame_request = true; + } + sending = send_stream; +} + +bool DynamicH264Encoder::InitOpenH264() { + if (openh264_handle_ != nullptr) { + return true; + } + + void* handle = ::dlopen(openh264_.c_str(), RTLD_LAZY); + if (handle == nullptr) { + return false; + } + create_encoder_ = (CreateEncoderFunc)::dlsym(handle, "WelsCreateSVCEncoder"); + if (create_encoder_ == nullptr) { + ::dlclose(handle); + return false; + } + destroy_encoder_ = + (DestroyEncoderFunc)::dlsym(handle, "WelsDestroySVCEncoder"); + if (destroy_encoder_ == nullptr) { + ::dlclose(handle); + return false; + } + openh264_handle_ = handle; + return true; +} + +void DynamicH264Encoder::ReleaseOpenH264() { + if (openh264_handle_ != nullptr) { + ::dlclose(openh264_handle_); + openh264_handle_ = nullptr; + } +} + +} // namespace webrtc diff --git a/src/dynamic_h264_encoder.h b/src/dynamic_h264_encoder.h new file mode 100644 index 00000000..28f6c8b1 --- /dev/null +++ b/src/dynamic_h264_encoder.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +// modules/video_coding/codecs/h264/h264_encoder_impl.{h,cc} の +// OpenH264 の関数を動的に読むようにしただけ + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ + +#if defined(WEBRTC_WIN) && !defined(__clang__) +#error "See: bugs.webrtc.org/9213#c13." +#endif + +#include +#include + +// WebRTC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// OpenH264 +#include + +class ISVCEncoder; + +namespace webrtc { + +class DynamicH264Encoder : public H264Encoder { + public: + static std::unique_ptr Create(const cricket::VideoCodec& codec, + std::string openh264) { + return std::unique_ptr( + new DynamicH264Encoder(codec, std::move(openh264))); + } + + public: + struct LayerConfig { + int simulcast_idx = 0; + int width = -1; + int height = -1; + bool sending = true; + bool key_frame_request = false; + float max_frame_rate = 0; + uint32_t target_bps = 0; + uint32_t max_bps = 0; + bool frame_dropping_on = false; + int key_frame_interval = 0; + int num_temporal_layers = 1; + + void SetStreamState(bool send_stream); + }; + + public: + explicit DynamicH264Encoder(const cricket::VideoCodec& codec, + const std::string openh264); + ~DynamicH264Encoder() override; + + // `settings.max_payload_size` is ignored. + // The following members of `codec_settings` are used. The rest are ignored. + // - codecType (must be kVideoCodecH264) + // - targetBitrate + // - maxFramerate + // - width + // - height + int32_t InitEncode(const VideoCodec* codec_settings, + const VideoEncoder::Settings& settings) override; + int32_t Release() override; + + int32_t RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) override; + void SetRates(const RateControlParameters& parameters) override; + + // The result of encoding - an EncodedImage and CodecSpecificInfo - are + // passed to the encode complete callback. + int32_t Encode(const VideoFrame& frame, + const std::vector* frame_types) override; + + EncoderInfo GetEncoderInfo() const override; + + // Exposed for testing. + H264PacketizationMode PacketizationModeForTesting() const { + return packetization_mode_; + } + + private: + SEncParamExt CreateEncoderParams(size_t i) const; + + webrtc::H264BitstreamParser h264_bitstream_parser_; + // Reports statistics with histograms. + void ReportInit(); + void ReportError(); + + std::vector encoders_; + std::vector pictures_; + std::vector> downscaled_buffers_; + std::vector configurations_; + std::vector encoded_images_; + std::vector> svc_controllers_; + absl::InlinedVector, kMaxSimulcastStreams> + scalability_modes_; + + VideoCodec codec_; + H264PacketizationMode packetization_mode_; + size_t max_payload_size_; + int32_t number_of_cores_; + absl::optional encoder_thread_limit_; + EncodedImageCallback* encoded_image_callback_; + + bool has_reported_init_; + bool has_reported_error_; + + std::vector tl0sync_limit_; + + private: + bool InitOpenH264(); + void ReleaseOpenH264(); + + std::string openh264_; + void* openh264_handle_ = nullptr; + using CreateEncoderFunc = int (*)(ISVCEncoder**); + using DestroyEncoderFunc = void (*)(ISVCEncoder*); + CreateEncoderFunc create_encoder_ = nullptr; + DestroyEncoderFunc destroy_encoder_ = nullptr; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ diff --git a/src/sora.cpp b/src/sora.cpp index e7ec0e05..8e2f8aed 100644 --- a/src/sora.cpp +++ b/src/sora.cpp @@ -2,8 +2,9 @@ #include "sora.h" -Sora::Sora(bool use_hardware_encoder) { - factory_.reset(new SoraFactory(use_hardware_encoder)); +Sora::Sora(std::optional use_hardware_encoder, + std::optional openh264) { + factory_.reset(new SoraFactory(use_hardware_encoder, openh264)); } Sora::~Sora() { @@ -219,8 +220,8 @@ SoraVideoSource* Sora::CreateVideoSource() { auto source = rtc::make_ref_counted(config); std::string track_id = rtc::CreateRandomString(16); - auto track = factory_->GetPeerConnectionFactory()->CreateVideoTrack( - track_id, source.get()); + auto track = + factory_->GetPeerConnectionFactory()->CreateVideoTrack(source, track_id); SoraVideoSource* video_source = new SoraVideoSource(this, source, track); return video_source; diff --git a/src/sora.h b/src/sora.h index f9d231bb..cecbf127 100644 --- a/src/sora.h +++ b/src/sora.h @@ -14,7 +14,8 @@ class Sora : public DisposePublisher { public: - Sora(bool use_hardware_encoder); + Sora(std::optional use_hardware_encoder, + std::optional openh264); ~Sora(); std::shared_ptr CreateConnection( diff --git a/src/sora_factory.cpp b/src/sora_factory.cpp index 0a540bb4..4d4d0ab4 100644 --- a/src/sora_factory.cpp +++ b/src/sora_factory.cpp @@ -19,8 +19,13 @@ #include #include "dummy_audio_mixer.h" +#ifndef _WIN32 +#include "dynamic_h264_decoder.h" +#include "dynamic_h264_encoder.h" +#endif -SoraFactory::SoraFactory(bool use_hardware_encoder) { +SoraFactory::SoraFactory(std::optional use_hardware_encoder, + std::optional openh264) { // Lyra のモデルファイルを読み込むため SORA_LYRA_MODEL_COEFFS_PATH が設定されていない場合は // この共有ライブラリ直下に配置されているモデルファイルを利用する auto path = boost::dll::this_line_location().parent_path() / "model_coeffs"; @@ -32,13 +37,57 @@ SoraFactory::SoraFactory(bool use_hardware_encoder) { sora::SoraClientContextConfig context_config; context_config.use_audio_device = false; - context_config.use_hardware_encoder = use_hardware_encoder; + if (use_hardware_encoder) { + context_config.use_hardware_encoder = *use_hardware_encoder; + } context_config.configure_media_dependencies = - [](const webrtc::PeerConnectionFactoryDependencies& dependencies, - cricket::MediaEngineDependencies& media_dependencies) { + [use_hardware_encoder = context_config.use_hardware_encoder, openh264]( + const webrtc::PeerConnectionFactoryDependencies& dependencies, + cricket::MediaEngineDependencies& media_dependencies) { media_dependencies.audio_mixer = DummyAudioMixer::Create(media_dependencies.task_queue_factory); media_dependencies.audio_processing = nullptr; + +#ifndef _WIN32 + if (openh264) { + { + auto config = + use_hardware_encoder + ? sora::GetDefaultVideoEncoderFactoryConfig() + : sora::GetSoftwareOnlyVideoEncoderFactoryConfig(); + config.use_simulcast_adapter = true; + config.encoders.insert( + config.encoders.begin(), + sora::VideoEncoderConfig( + webrtc::kVideoCodecH264, + [openh264 = openh264]( + auto format) -> std::unique_ptr { + return webrtc::DynamicH264Encoder::Create( + cricket::VideoCodec(format), *openh264); + })); + media_dependencies.video_encoder_factory = + absl::make_unique( + std::move(config)); + } + { + auto config = + use_hardware_encoder + ? sora::GetDefaultVideoDecoderFactoryConfig() + : sora::GetSoftwareOnlyVideoDecoderFactoryConfig(); + config.decoders.insert( + config.decoders.begin(), + sora::VideoDecoderConfig( + webrtc::kVideoCodecH264, + [openh264 = openh264]( + auto format) -> std::unique_ptr { + return webrtc::DynamicH264Decoder::Create(*openh264); + })); + media_dependencies.video_decoder_factory = + absl::make_unique( + std::move(config)); + } + } +#endif }; context_ = sora::SoraClientContext::Create(context_config); } diff --git a/src/sora_factory.h b/src/sora_factory.h index a10eb63e..b637157b 100644 --- a/src/sora_factory.h +++ b/src/sora_factory.h @@ -1,6 +1,8 @@ #ifndef SORA_FACTORY_H_ #define SORA_FACTORY_H_ +#include + // WebRTC #include #include @@ -11,7 +13,8 @@ class SoraFactory { public: - SoraFactory(bool use_hardware_encoder); + SoraFactory(std::optional use_hardware_encoder, + std::optional openh264); rtc::scoped_refptr GetPeerConnectionFactory() const; diff --git a/src/sora_sdk_ext.cpp b/src/sora_sdk_ext.cpp index a8672f32..d0205689 100644 --- a/src/sora_sdk_ext.cpp +++ b/src/sora_sdk_ext.cpp @@ -211,7 +211,8 @@ NB_MODULE(sora_sdk_ext, m) { .def_rw("on_data_channel", &SoraConnection::on_data_channel_); nb::class_(m, "Sora") - .def(nb::init(), "use_hardware_encoder"_a = true) + .def(nb::init, std::optional>(), + "use_hardware_encoder"_a = nb::none(), "openh264"_a = nb::none()) .def("create_connection", &Sora::CreateConnection, "signaling_url"_a, "role"_a, "channel_id"_a, "client_id"_a = nb::none(), "bundle_id"_a = nb::none(), "metadata"_a = nb::none(),