From 916a756f83bb1954a45746344aefbd162b0392cc Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 25 Feb 2024 22:50:49 +0900 Subject: [PATCH 01/34] =?UTF-8?q?=E3=83=9E=E3=83=AB=E3=83=81=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=87=E3=83=83=E3=82=AF=E3=82=B5=E3=82=A4=E3=83=9E?= =?UTF-8?q?=E3=83=AB=E3=82=AD=E3=83=A3=E3=82=B9=E3=83=88=E3=81=AE=E9=80=81?= =?UTF-8?q?=E4=BF=A1=E3=81=AB=E5=AF=BE=E5=BF=9C=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 3 + examples/sumomo/sumomo.c | 31 +-- examples/sumomo/util.cpp | 19 +- examples/sumomo/util.h | 3 +- include/sorac/signaling.hpp | 2 +- include/sorac/simulcast_encoder_adapter.hpp | 4 +- include/sorac/sorac.h | 5 +- proto/soracp.proto | 39 ++-- src/signaling.cpp | 205 ++++++++++++-------- src/simulcast_encoder_adapter.cpp | 64 ++++-- src/sorac.cpp | 9 +- 11 files changed, 238 insertions(+), 146 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 22c5e1e..6a0ba61 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,9 @@ ## develop +- [UPDATE] protoc-gen-jsonif をアップデートして optional 対応する + - @melpon + ## 2024.1.0 **祝いリリース** diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index df2e6e5..e8c11f9 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -27,7 +27,7 @@ typedef struct State { SumomoRecorder* recorder; SumomoCapturer* capturer; SoracDataChannel* data_channel; - soracp_RtpEncodingParameters rtp_encoding_parameters; + soracp_RtpParameters rtp_parameters; } State; void on_capture_frame_scaled(SoracVideoFrameRef* frame, void* userdata) { @@ -37,22 +37,29 @@ void on_capture_frame_scaled(SoracVideoFrameRef* frame, void* userdata) { void on_capture_frame(SoracVideoFrameRef* frame, void* userdata) { State* state = (State*)userdata; - sorac_signaling_get_rtp_encoding_parameters(state->signaling, - &state->rtp_encoding_parameters); - if (!state->rtp_encoding_parameters.enable_parameters) { + sorac_signaling_get_rtp_parameters(state->signaling, &state->rtp_parameters); + if (state->rtp_parameters.encodings_len == 0) { sorac_signaling_send_video_frame(state->signaling, frame); } else { // 動的な確保が面倒なので適当に固定で持っておく const char* rids[10]; - int rids_len = state->rtp_encoding_parameters.parameters_len; - if (rids_len > sizeof(rids) / sizeof(rids[0])) { - rids_len = sizeof(rids) / sizeof(rids[0]); + float scales[10]; + int len = state->rtp_parameters.encodings_len; + if (len > sizeof(rids) / sizeof(rids[0])) { + len = sizeof(rids) / sizeof(rids[0]); } - for (int i = 0; i < rids_len; i++) { - rids[i] = state->rtp_encoding_parameters.parameters[i].rid; + for (int i = 0; i < len; i++) { + soracp_RtpEncodingParameters* encoding = + &state->rtp_parameters.encodings[i]; + rids[i] = encoding->rid; + if (soracp_RtpEncodingParameters_has_scale_resolution_down_by(encoding)) { + scales[i] = encoding->scale_resolution_down_by; + } else { + scales[i] = 1.0f; + } } - sumomo_util_scale_simulcast(rids, rids_len, frame, on_capture_frame_scaled, - state); + sumomo_util_scale_simulcast(rids, scales, len, frame, + on_capture_frame_scaled, state); } } @@ -164,7 +171,7 @@ int main(int argc, char* argv[]) { sorac_plog_init(); State state = {0}; - soracp_RtpEncodingParameters_init(&state.rtp_encoding_parameters); + soracp_RtpParameters_init(&state.rtp_parameters); soracp_SignalingConfig config; soracp_SoraConnectConfig sora_config; soracp_DataChannel dc; diff --git a/examples/sumomo/util.cpp b/examples/sumomo/util.cpp index 1c00167..c5346ec 100644 --- a/examples/sumomo/util.cpp +++ b/examples/sumomo/util.cpp @@ -8,26 +8,17 @@ extern "C" { void sumomo_util_scale_simulcast(const char* rids[], - int num_rids, + const float scales[], + int len, SoracVideoFrameRef* frame, void (*scaled)(SoracVideoFrameRef* frame, void* userdata), void* userdata) { - for (int i = 0; i < num_rids; i++) { + for (int i = 0; i < len; i++) { sorac::VideoFrame f = *(sorac::VideoFrame*)frame; f.rid = rids[i]; - int width; - int height; - if (*f.rid == "r0") { - width = f.width() / 4; - height = f.height() / 4; - } else if (*f.rid == "r1") { - width = f.width() / 2; - height = f.height() / 2; - } else { - width = f.width(); - height = f.height(); - } + int width = (int)(f.width() / scales[i]); + int height = (int)(f.height() / scales[i]); if (f.width() != width || f.height() != height) { if (f.i420_buffer) { auto fb = sorac::VideoFrameBufferI420::Create(width, height); diff --git a/examples/sumomo/util.h b/examples/sumomo/util.h index 9ecd227..95b4c7b 100644 --- a/examples/sumomo/util.h +++ b/examples/sumomo/util.h @@ -9,7 +9,8 @@ extern "C" { extern void sumomo_util_scale_simulcast( const char* rids[], - int num_rids, + const float scales[], + int len, SoracVideoFrameRef* frame, void (*scaled)(SoracVideoFrameRef* frame, void* userdata), void* userdata); diff --git a/include/sorac/signaling.hpp b/include/sorac/signaling.hpp index 8e4014e..4d99d9f 100644 --- a/include/sorac/signaling.hpp +++ b/include/sorac/signaling.hpp @@ -32,7 +32,7 @@ class Signaling { std::function on_notify) = 0; virtual void SetOnPush(std::function on_push) = 0; - virtual soracp::RtpEncodingParameters GetRtpEncodingParameters() const = 0; + virtual soracp::RtpParameters GetRtpParameters() const = 0; }; std::shared_ptr CreateSignaling( diff --git a/include/sorac/simulcast_encoder_adapter.hpp b/include/sorac/simulcast_encoder_adapter.hpp index 6c3369d..03f2453 100644 --- a/include/sorac/simulcast_encoder_adapter.hpp +++ b/include/sorac/simulcast_encoder_adapter.hpp @@ -13,8 +13,8 @@ namespace sorac { std::shared_ptr CreateSimulcastEncoderAdapter( - const soracp::RtpEncodingParameters& params, - std::function()> create_encoder); + const soracp::RtpParameters& params, + std::function(std::string)> create_encoder); } diff --git a/include/sorac/sorac.h b/include/sorac/sorac.h index 86985b3..8e73f68 100644 --- a/include/sorac/sorac.h +++ b/include/sorac/sorac.h @@ -225,9 +225,8 @@ extern void sorac_signaling_set_on_notify( extern void sorac_signaling_set_on_push(SoracSignaling* p, sorac_signaling_on_push_func on_push, void* userdata); -extern void sorac_signaling_get_rtp_encoding_parameters( - SoracSignaling* p, - soracp_RtpEncodingParameters* params); +extern void sorac_signaling_get_rtp_parameters(SoracSignaling* p, + soracp_RtpParameters* params); #ifdef __cplusplus } diff --git a/proto/soracp.proto b/proto/soracp.proto index 5c2d16f..1981d94 100644 --- a/proto/soracp.proto +++ b/proto/soracp.proto @@ -88,17 +88,34 @@ message SoraConnectConfig { optional ForwardingFilter forwarding_filter = 51; } -message RtpEncodingParameter { - string rid = 10; - bool active = 20; - optional double scale_resolution_down_by = 31; - optional int32 max_bitrate_bps = 41; - optional double max_framerate = 51; - bool adaptive_ptime = 60; - optional string scalability_mode = 71; +message RtpEncodingParameters { + string rid = 10; + bool active = 20; + optional double scale_resolution_down_by = 31; + optional int32 max_bitrate_bps = 41; + optional double max_framerate = 51; + bool adaptive_ptime = 60; + optional string scalability_mode = 71; } -message RtpEncodingParameters { - bool enable_parameters = 10; - repeated RtpEncodingParameter parameters = 11; +message RtpCodecParameters { + string kind = 10; + string name = 20; + int32 payload_type = 40; +} + +message RidDescription { + string rid = 10; + string direction = 20; + optional int32 payload_type = 30; +} + +message RtpParameters { + string mid = 10; + repeated RtpEncodingParameters encodings = 20; + repeated RtpCodecParameters codecs = 30; + + // rids は本来 RtpParameters には含まれないんだけど、 + // ここにあった方が便利なのでここに含める + repeated RidDescription rids = 40; } diff --git a/src/signaling.cpp b/src/signaling.cpp index 9770579..84ce325 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -194,8 +194,8 @@ class SignalingImpl : public Signaling { on_push_ = on_push; } - soracp::RtpEncodingParameters GetRtpEncodingParameters() const override { - return rtp_encoding_params_; + soracp::RtpParameters GetRtpParameters() const override { + return rtp_params_; } private: @@ -242,9 +242,8 @@ class SignalingImpl : public Signaling { } if (js["simulcast"].get()) { - rtp_encoding_params_.enable_parameters = true; for (auto& enc : js["encodings"]) { - soracp::RtpEncodingParameter p; + soracp::RtpEncodingParameters p; p.rid = enc["rid"].get(); p.active = true; if (enc.contains("active")) { @@ -266,7 +265,7 @@ class SignalingImpl : public Signaling { if (enc.contains("scalabilityMode")) { p.set_scalability_mode(enc["scalabilityMode"].get()); } - rtp_encoding_params_.parameters.push_back(p); + rtp_params_.encodings.push_back(p); } } @@ -274,10 +273,16 @@ class SignalingImpl : public Signaling { client_.pc = std::make_shared(config); client_.pc->onLocalDescription([this](rtc::Description desc) { auto sdp = desc.generateSdp(); - sdp += "a=rid:r0 send\r\n"; - sdp += "a=rid:r1 send\r\n"; - sdp += "a=rid:r2 send\r\n"; - sdp += "a=simulcast:send r0;r1;r2\r\n"; + if (IsSimulcast()) { + for (const auto& rd : rtp_params_.rids) { + sdp += "a=rid:" + rd.rid + " send"; + if (rd.has_payload_type()) { + sdp += " pt=" + std::to_string(rd.payload_type); + } + sdp += "\r\n"; + } + sdp += "a=simulcast:send r0;r1;r2\r\n"; + } PLOG_DEBUG << "answer sdp:" << sdp; nlohmann::json js = { {"type", desc.typeString()}, @@ -406,29 +411,26 @@ class SignalingImpl : public Signaling { } } // mid, payload_type, codec - std::string mid; - int payload_type; - std::string codec; - { - auto get_value = - [&video_lines](const std::string& search) -> std::string { - auto it = std::find_if(video_lines.begin(), video_lines.end(), - [&search](const std::string& s) { - return starts_with(s, search); - }); - if (it == video_lines.end()) { - return ""; + for (const auto& line : video_lines) { + if (auto s = std::string("a=mid:"); starts_with(line, s)) { + auto mid = line.substr(s.size()); + PLOG_DEBUG << "mid=" << mid; + rtp_params_.mid = mid; + } else if (auto s = std::string("a=rtpmap:"); starts_with(line, s)) { + auto rtpmap = line.substr(s.size()); + auto ys = split_with(rtpmap, " "); + auto payload_type = std::stoi(ys[0]); + auto codec = split_with(ys[1], "/")[0]; + if (codec == "H264" || codec == "H265") { + PLOG_DEBUG << "payload_type=" << payload_type + << ", codec=" << codec; + soracp::RtpCodecParameters cp; + cp.payload_type = payload_type; + cp.kind = "video"; + cp.name = codec; + rtp_params_.codecs.push_back(cp); } - return it->substr(search.size()); - }; - mid = get_value("a=mid:"); - PLOG_DEBUG << "mid=" << mid; - auto xs = split_with(get_value("a=msid:"), " "); - auto rtpmap = get_value("a=rtpmap:"); - auto ys = split_with(rtpmap, " "); - payload_type = std::stoi(ys[0]); - codec = split_with(ys[1], "/")[0]; - PLOG_DEBUG << "payload_type=" << payload_type << ", codec=" << codec; + } } // サイマルキャストの場合、拡張ヘッダーのどの ID を使えば良いか調べる if (IsSimulcast()) { @@ -444,17 +446,49 @@ class SignalingImpl : public Signaling { rtp_stream_id_ = std::stoi(ys[1]); PLOG_DEBUG << "rtp_stream_id=" << rtp_stream_id_; } + // rid が参照するべき payload_type の対応を作る + if (IsSimulcast()) { + for (const auto& line : video_lines) { + // 以下のような感じの行を探して値を設定する + // a=rid:r0 send + // a=rid:r0 recv pt=37 + + auto s = std::string("a=rid:"); + if (!starts_with(line, s)) { + continue; + } + auto xs = split_with(line, " "); + if (xs.size() < 2) { + continue; + } + soracp::RidDescription rd; + rd.rid = xs[0].substr(s.size()); + rd.direction = xs[1]; + s = "pt="; + if (xs.size() >= 3 && starts_with(xs[2], s)) { + rd.set_payload_type(std::stoi(xs[2].substr(s.size()))); + } + rtp_params_.rids.push_back(rd); + PLOG_DEBUG << "rid=" << rd.rid << ", direction=" << rd.direction + << ", payload_type=" + << (rd.has_payload_type() + ? std::to_string(rd.payload_type) + : "(none)"); + } + } std::shared_ptr track; std::map, std::shared_ptr> sr_reporters; - auto video = rtc::Description::Video(mid); - if (codec == "H264") { - video.addH264Codec(payload_type); - } else { - video.addH265Codec(payload_type); + auto video = rtc::Description::Video(rtp_params_.mid); + for (const auto& codec : rtp_params_.codecs) { + if (codec.name == "H264") { + video.addH264Codec(codec.payload_type); + } else if (codec.name == "H265") { + video.addH265Codec(codec.payload_type); + } } std::map, uint32_t> ssrcs; if (!IsSimulcast()) { @@ -462,7 +496,7 @@ class SignalingImpl : public Signaling { video.addSSRC(ssrc, cname, msid, track_id); ssrcs.insert(std::make_pair(std::nullopt, ssrc)); } else { - for (const auto& p : rtp_encoding_params_.parameters) { + for (const auto& p : rtp_params_.encodings) { uint32_t ssrc = generate_random_number(); video.addSSRC(ssrc, cname, msid, track_id); ssrcs.insert(std::make_pair(p.rid, ssrc)); @@ -473,15 +507,41 @@ class SignalingImpl : public Signaling { auto simulcast_config = std::make_shared(); auto simulcast_handler = std::make_shared(simulcast_config); - for (int i = 0; - i < (!IsSimulcast() ? 1 : rtp_encoding_params_.parameters.size()); + for (int i = 0; i < (!IsSimulcast() ? 1 : rtp_params_.encodings.size()); i++) { std::optional rid; if (IsSimulcast()) { - rid = rtp_encoding_params_.parameters[i].rid; + rid = rtp_params_.encodings[i].rid; } uint32_t ssrc = ssrcs[rid]; + int payload_type; + std::string codec; + if (IsSimulcast()) { + // この rid が参照するべき payload_type と codec を探す + auto it = + std::find_if(rtp_params_.rids.begin(), rtp_params_.rids.end(), + [rid](const soracp::RidDescription& rd) { + return rd.rid == *rid; + }); + if (it == rtp_params_.rids.end() || !it->has_payload_type()) { + payload_type = rtp_params_.codecs[0].payload_type; + codec = rtp_params_.codecs[0].name; + } else { + payload_type = it->payload_type; + codec = + std::find_if( + rtp_params_.codecs.begin(), rtp_params_.codecs.end(), + [payload_type](const soracp::RtpCodecParameters& codec) { + return codec.payload_type == payload_type; + }) + ->name; + } + } else { + payload_type = rtp_params_.codecs[0].payload_type; + codec = rtp_params_.codecs[0].name; + } + auto rtp_config = std::make_shared( ssrc, cname, payload_type, codec == "H264" ? rtc::H264RtpPacketizer::defaultClockRate @@ -519,56 +579,47 @@ class SignalingImpl : public Signaling { } track->setMediaHandler(simulcast_handler); - track->onOpen([this, wtrack = std::weak_ptr(track), - codec]() { + track->onOpen([this, wtrack = std::weak_ptr(track)]() { PLOG_DEBUG << "Video Track Opened"; auto track = wtrack.lock(); if (track == nullptr) { return; } - std::function()> create_encoder; - - if (codec == "H264") { - if (config_.h264_encoder_type == - soracp::H264_ENCODER_TYPE_OPEN_H264) { - create_encoder = [openh264 = config_.openh264]() { - return CreateOpenH264VideoEncoder(openh264); - }; - } else if (config_.h264_encoder_type == - soracp::H264_ENCODER_TYPE_VIDEO_TOOLBOX) { + std::function(std::string)> + create_encoder = + [this](std::string codec) -> std::shared_ptr { + if (codec == "H264") { + if (config_.h264_encoder_type == + soracp::H264_ENCODER_TYPE_OPEN_H264) { + return CreateOpenH264VideoEncoder(config_.openh264); + } else if (config_.h264_encoder_type == + soracp::H264_ENCODER_TYPE_VIDEO_TOOLBOX) { #if defined(__APPLE__) - create_encoder = []() { return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH264); - }; #else - PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; - return; + PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; #endif - } else { - PLOG_ERROR << "Unknown H264EncoderType"; - return; - } - } else if (codec == "H265") { - if (config_.h265_encoder_type == - soracp::H265_ENCODER_TYPE_VIDEO_TOOLBOX) { + } else { + PLOG_ERROR << "Unknown H264EncoderType"; + } + } else if (codec == "H265") { + if (config_.h265_encoder_type == + soracp::H265_ENCODER_TYPE_VIDEO_TOOLBOX) { #if defined(__APPLE__) - create_encoder = []() { return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH265); - }; #else - PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; - return; + PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; #endif - } else { - PLOG_ERROR << "Unknown H265EncoderType"; - return; + } else { + PLOG_ERROR << "Unknown H265EncoderType"; + } } - } - if (create_encoder) { - client_.video_encoder = CreateSimulcastEncoderAdapter( - rtp_encoding_params_, create_encoder); - } + return nullptr; + }; + + client_.video_encoder = + CreateSimulcastEncoderAdapter(rtp_params_, create_encoder); on_track_(track); }); @@ -834,7 +885,7 @@ class SignalingImpl : public Signaling { // ws_ = nullptr; } - bool IsSimulcast() const { return rtp_encoding_params_.enable_parameters; } + bool IsSimulcast() const { return !rtp_params_.encodings.empty(); } std::shared_ptr GetWebSocket() const { std::lock_guard lock(ws_mutex_); @@ -848,7 +899,7 @@ class SignalingImpl : public Signaling { Client client_; soracp::SignalingConfig config_; soracp::SoraConnectConfig sora_config_; - soracp::RtpEncodingParameters rtp_encoding_params_; + soracp::RtpParameters rtp_params_; int rtp_stream_id_ = 0; int video_ssrc_ = 0; std::function)> on_track_; diff --git a/src/simulcast_encoder_adapter.cpp b/src/simulcast_encoder_adapter.cpp index d2f89e0..c13477c 100644 --- a/src/simulcast_encoder_adapter.cpp +++ b/src/simulcast_encoder_adapter.cpp @@ -53,16 +53,38 @@ static Bps GetMaxBitrate(int width, int height) { class SimulcastEncoderAdapter : public VideoEncoder { public: SimulcastEncoderAdapter( - const soracp::RtpEncodingParameters& params, - std::function()> create_encoder) + const soracp::RtpParameters& params, + std::function(std::string)> create_encoder) : create_encoder_(create_encoder) { - if (!params.enable_parameters || params.parameters.empty()) { + if (params.encodings.empty()) { encoders_.resize(1); - encoders_[0].param.active = true; + encoders_[0].encoding.active = true; + encoders_[0].codec = params.codecs[0]; simulcast_ = false; } else { - for (auto& param : params.parameters) { - encoders_.push_back({nullptr, param}); + for (const auto& encoding : params.encodings) { + auto it = std::find_if(params.rids.begin(), params.rids.end(), + [&encoding](const soracp::RidDescription& rd) { + return rd.rid == encoding.rid; + }); + if (it == params.rids.end()) { + PLOG_ERROR << "Rid not found: rid=" << encoding.rid; + continue; + } + if (!it->has_payload_type()) { + encoders_.push_back({nullptr, encoding, params.codecs[0]}); + } else { + auto it2 = + std::find_if(params.codecs.begin(), params.codecs.end(), + [it](const soracp::RtpCodecParameters& codec) { + return codec.payload_type == it->payload_type; + }); + if (it2 == params.codecs.end()) { + PLOG_ERROR << "Codec not found: payload_type=" << it->payload_type; + continue; + } + encoders_.push_back({nullptr, encoding, *it2}); + } } simulcast_ = true; } @@ -86,7 +108,7 @@ class SimulcastEncoderAdapter : public VideoEncoder { // 各サイズの最大ビットレートを計算して、その割合でビットレートを分配する Bps sum_bitrate; for (const auto& e : encoders_) { - const auto& p = e.param; + const auto& p = e.encoding; if (!p.active) { continue; } @@ -100,20 +122,21 @@ class SimulcastEncoderAdapter : public VideoEncoder { } for (auto& e : encoders_) { - if (!e.param.active) { + if (!e.encoding.active) { continue; } Settings s = settings; - if (e.param.has_scale_resolution_down_by()) { - s.width = (int)(settings.width / e.param.scale_resolution_down_by); - s.height = (int)(settings.height / e.param.scale_resolution_down_by); + if (e.encoding.has_scale_resolution_down_by()) { + s.width = (int)(settings.width / e.encoding.scale_resolution_down_by); + s.height = (int)(settings.height / e.encoding.scale_resolution_down_by); } double rate = (double)GetMaxBitrate(s.width, s.height).count() / sum_bitrate.count(); s.bitrate = Bps((int64_t)(settings.bitrate.count() * rate)); - e.encoder = create_encoder_(); - PLOG_INFO << "InitEncode(Layerd): width=" << s.width - << " height=" << s.height << " bitrate=" << s.bitrate.count(); + e.encoder = create_encoder_(e.codec.name); + PLOG_INFO << "InitEncode(Layerd): rid=" << e.encoding.rid + << ", width=" << s.width << ", height=" << s.height + << ", bitrate=" << s.bitrate.count(); if (!e.encoder->InitEncode(s)) { return false; } @@ -129,7 +152,7 @@ class SimulcastEncoderAdapter : public VideoEncoder { if (e.encoder != nullptr) { std::optional rid; if (simulcast_) { - rid = e.param.rid; + rid = e.encoding.rid; } e.encoder->SetEncodeCallback( [rid, callback](const sorac::EncodedImage& image) { @@ -156,7 +179,7 @@ class SimulcastEncoderAdapter : public VideoEncoder { } for (auto& e : encoders_) { if (e.encoder != nullptr) { - if (e.param.rid == *frame.rid) { + if (e.encoding.rid == *frame.rid) { e.encoder->Encode(frame); break; } @@ -177,17 +200,18 @@ class SimulcastEncoderAdapter : public VideoEncoder { private: struct Encoder { std::shared_ptr encoder; - soracp::RtpEncodingParameter param; + soracp::RtpEncodingParameters encoding; + soracp::RtpCodecParameters codec; Settings settings; }; std::vector encoders_; bool simulcast_; - std::function()> create_encoder_; + std::function(std::string)> create_encoder_; }; std::shared_ptr CreateSimulcastEncoderAdapter( - const soracp::RtpEncodingParameters& params, - std::function()> create_encoder) { + const soracp::RtpParameters& params, + std::function(std::string)> create_encoder) { return std::make_shared(params, create_encoder); } diff --git a/src/sorac.cpp b/src/sorac.cpp index 7f563b0..a26061a 100644 --- a/src/sorac.cpp +++ b/src/sorac.cpp @@ -509,11 +509,10 @@ void sorac_signaling_set_on_push(SoracSignaling* p, on_push(message.c_str(), (int)message.size(), userdata); }); } -void sorac_signaling_get_rtp_encoding_parameters( - SoracSignaling* p, - soracp_RtpEncodingParameters* params) { +void sorac_signaling_get_rtp_parameters(SoracSignaling* p, + soracp_RtpParameters* params) { auto signaling = g_cptr.Get(p, g_signaling_map); - auto u = signaling->GetRtpEncodingParameters(); - soracp_RtpEncodingParameters_from_cpp(u, params); + auto u = signaling->GetRtpParameters(); + soracp_RtpParameters_from_cpp(u, params); } } From d4143cca1a084ff32899f9b26d13ece7415302af Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 25 Feb 2024 22:52:06 +0900 Subject: [PATCH 02/34] =?UTF-8?q?CHANGES=20=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 6a0ba61..bde4af2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,8 @@ ## develop +- [ADD] マルチコーデックサイマルキャストの送信に対応する + - @melpon - [UPDATE] protoc-gen-jsonif をアップデートして optional 対応する - @melpon From bddd6326494741a6482bd68b4e35fc567edda74b Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 26 Feb 2024 06:29:06 +0900 Subject: [PATCH 03/34] =?UTF-8?q?=E3=83=AD=E3=82=B0=E3=81=AB=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=87=E3=83=83=E3=82=AF=E3=82=92=E8=A1=A8=E7=A4=BA?= =?UTF-8?q?=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/simulcast_encoder_adapter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulcast_encoder_adapter.cpp b/src/simulcast_encoder_adapter.cpp index c13477c..aaad528 100644 --- a/src/simulcast_encoder_adapter.cpp +++ b/src/simulcast_encoder_adapter.cpp @@ -135,8 +135,8 @@ class SimulcastEncoderAdapter : public VideoEncoder { s.bitrate = Bps((int64_t)(settings.bitrate.count() * rate)); e.encoder = create_encoder_(e.codec.name); PLOG_INFO << "InitEncode(Layerd): rid=" << e.encoding.rid - << ", width=" << s.width << ", height=" << s.height - << ", bitrate=" << s.bitrate.count(); + << ", codec=" << e.codec.name << ", width=" << s.width + << ", height=" << s.height << ", bitrate=" << s.bitrate.count(); if (!e.encoder->InitEncode(s)) { return false; } From 738e6bfa2ce6d29de9b799a7c8d2245b3222bc7b Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 26 Feb 2024 14:34:52 +0900 Subject: [PATCH 04/34] =?UTF-8?q?--audio,=20--video=20=E3=82=AA=E3=83=97?= =?UTF-8?q?=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/sumomo/option.c | 34 ++++++++++++++++++++++++---------- examples/sumomo/option.h | 2 ++ examples/sumomo/sumomo.c | 9 +++++++-- src/signaling.cpp | 26 ++++++++++++++++++++++---- 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index 38a4746..3334365 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -14,6 +14,9 @@ static struct option long_opts[] = { {"video-codec-type", required_argument, 0, 0}, {"video-bit-rate", required_argument, 0, 0}, {"metadata", required_argument, 0, 0}, + {"video", required_argument, 0, 0}, + {"audio", required_argument, 0, 0}, + {"capture-type", required_argument, 0, 0}, {"capture-device-name", required_argument, 0, 0}, {"capture-device-width", required_argument, 0, 0}, @@ -57,6 +60,20 @@ int sumomo_option_parse(SumomoOption* option, switch (c) { case 0: #define OPT_IS(optname) strcmp(long_opts[index].name, optname) == 0 +#define SET_OPTBOOL(name) \ + do { \ + if (strcmp(optarg, "true") == 0) { \ + name = SUMOMO_OPTIONAL_BOOL_TRUE; \ + } else if (strcmp(optarg, "false") == 0) { \ + name = SUMOMO_OPTIONAL_BOOL_FALSE; \ + } else if (strcmp(optarg, "none") == 0) { \ + name = SUMOMO_OPTIONAL_BOOL_NONE; \ + } else { \ + fprintf(stderr, "Failed to set to " #name ": %s\n", optarg); \ + *error = 1; \ + } \ + } while (false) + if (OPT_IS("signaling-url")) { if (option->signaling_url_len >= sizeof(option->signaling_url) / @@ -70,16 +87,7 @@ int sumomo_option_parse(SumomoOption* option, } else if (OPT_IS("channel-id")) { option->channel_id = optarg; } else if (OPT_IS("simulcast")) { - if (strcmp(optarg, "true") == 0) { - option->simulcast = SUMOMO_OPTIONAL_BOOL_TRUE; - } else if (strcmp(optarg, "false") == 0) { - option->simulcast = SUMOMO_OPTIONAL_BOOL_FALSE; - } else if (strcmp(optarg, "none") == 0) { - option->simulcast = SUMOMO_OPTIONAL_BOOL_NONE; - } else { - fprintf(stderr, "Invalid simulcast: %s\n", optarg); - *error = 1; - } + SET_OPTBOOL(option->simulcast); } else if (OPT_IS("video-codec-type")) { if (strcmp(optarg, "H264") == 0) { option->video_codec_type = optarg; @@ -98,6 +106,10 @@ int sumomo_option_parse(SumomoOption* option, } } else if (OPT_IS("metadata")) { option->metadata = optarg; + } else if (OPT_IS("video")) { + SET_OPTBOOL(option->video); + } else if (OPT_IS("audio")) { + SET_OPTBOOL(option->audio); } else if (OPT_IS("capture-type")) { if (strcmp(optarg, "fake") == 0) { option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE; @@ -165,6 +177,8 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --video-codec-type=H264,H265\n"); fprintf(stdout, " --video-bit-rate=0-5000 [kbps]\n"); fprintf(stdout, " --metadata=JSON\n"); + fprintf(stdout, " --video=true,false,none\n"); + fprintf(stdout, " --audio=true,false,none\n"); fprintf(stdout, " --capture-type=fake,v4l2,mac\n"); fprintf(stdout, " --capture-device-name=NAME\n"); fprintf(stdout, " --capture-device-width=WIDTH\n"); diff --git a/examples/sumomo/option.h b/examples/sumomo/option.h index 658c950..86e1ff6 100644 --- a/examples/sumomo/option.h +++ b/examples/sumomo/option.h @@ -28,6 +28,8 @@ typedef struct SumomoOption { const char* video_codec_type; int video_bit_rate; const char* metadata; + SumomoOptionalBool video; + SumomoOptionalBool audio; SumomoOptionCaptureType capture_type; const char* capture_device_name; diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index e8c11f9..2c7f8d7 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -207,7 +207,6 @@ int main(int argc, char* argv[]) { soracp_SoraConnectConfig_set_role(&sora_config, "sendonly"); soracp_SoraConnectConfig_set_channel_id(&sora_config, opt.channel_id); - sora_config.video = true; if (opt.video_codec_type != NULL) { soracp_SoraConnectConfig_set_video_codec_type(&sora_config, opt.video_codec_type); @@ -219,7 +218,13 @@ int main(int argc, char* argv[]) { if (opt.metadata != NULL) { soracp_SoraConnectConfig_set_metadata(&sora_config, opt.metadata); } - soracp_SoraConnectConfig_set_audio(&sora_config, true); + + // none, true の場合は true, false の場合は false + soracp_SoraConnectConfig_set_video(&sora_config, + opt.video != SUMOMO_OPTIONAL_BOOL_FALSE); + soracp_SoraConnectConfig_set_audio(&sora_config, + opt.audio != SUMOMO_OPTIONAL_BOOL_FALSE); + soracp_SoraConnectConfig_set_multistream(&sora_config, soracp_OPTIONAL_BOOL_TRUE); soracp_SoraConnectConfig_set_data_channel_signaling( diff --git a/src/signaling.cpp b/src/signaling.cpp index 84ce325..0f16f1f 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -133,6 +133,10 @@ class SignalingImpl : public Signaling { } void SendVideoFrame(const VideoFrame& frame) override { + if (rtp_params_.mid.empty()) { + return; + } + if (!client_.video_encoder_settings || frame.base_width != client_.video_encoder_settings->width || frame.base_height != client_.video_encoder_settings->height) { @@ -171,6 +175,9 @@ class SignalingImpl : public Signaling { } void SendAudioFrame(const AudioFrame& frame) override { + if (client_.opus_encoder == nullptr) { + return; + } client_.opus_encoder->Encode(frame); } @@ -396,7 +403,7 @@ class SignalingImpl : public Signaling { auto msid = "msid-" + generate_random_string(24); auto track_id = "trackid-" + generate_random_string(24); // video - { + std::invoke([&]() { // m=video から他の m= が出てくるまでの間のデータを取得する std::vector video_lines; { @@ -432,6 +439,11 @@ class SignalingImpl : public Signaling { } } } + // mid が空ということは vido=false なので何もしない + if (rtp_params_.mid.empty()) { + return; + } + // サイマルキャストの場合、拡張ヘッダーのどの ID を使えば良いか調べる if (IsSimulcast()) { auto it = std::find_if( @@ -627,9 +639,10 @@ class SignalingImpl : public Signaling { client_.video->track = track; client_.video->senders = sr_reporters; client_.video->simulcast_handler = simulcast_handler; - } + }); + // audio - { + std::invoke([&]() { uint32_t ssrc = generate_random_number(); // m=audio から他の m= が出てくるまでの間のデータを取得する std::vector audio_lines; @@ -661,6 +674,11 @@ class SignalingImpl : public Signaling { }; mid = get_value("a=mid:"); PLOG_DEBUG << "mid=" << mid; + // mid が空ということは audio=false なので何もしない + if (mid.empty()) { + return; + } + auto xs = split_with(get_value("a=msid:"), " "); auto rtpmap = get_value("a=rtpmap:"); payload_type = std::stoi(split_with(rtpmap, " ")[0]); @@ -725,7 +743,7 @@ class SignalingImpl : public Signaling { sr_reporters; sr_reporters[std::nullopt] = sr_reporter; client_.audio->senders = sr_reporters; - } + }); client_.pc->setRemoteDescription(rtc::Description(sdp, "offer")); } else if (js["type"] == "switched") { From 120bc243628aa5b2a57453693b5e311e4fc3591e Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 26 Feb 2024 14:35:37 +0900 Subject: [PATCH 05/34] =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/sumomo/option.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index 3334365..8ce04a8 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -175,7 +175,7 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --channel-id=ID [required]\n"); fprintf(stdout, " --simulcast=true,false,none\n"); fprintf(stdout, " --video-codec-type=H264,H265\n"); - fprintf(stdout, " --video-bit-rate=0-5000 [kbps]\n"); + fprintf(stdout, " --video-bit-rate=0-15000 [kbps]\n"); fprintf(stdout, " --metadata=JSON\n"); fprintf(stdout, " --video=true,false,none\n"); fprintf(stdout, " --audio=true,false,none\n"); From 961e4856c8e7c93f446aa3283eee170c16c814ab Mon Sep 17 00:00:00 2001 From: melpon Date: Fri, 1 Mar 2024 07:10:18 +0900 Subject: [PATCH 06/34] =?UTF-8?q?libaom=20=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=81=97=E3=81=A6=20AV1=20=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC?= =?UTF-8?q?=E3=83=89=E3=81=A7=E3=81=8D=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/c_cpp_properties.json | 2 + CMakeLists.txt | 5 + VERSION | 1 + examples/sumomo/option.c | 15 ++ examples/sumomo/option.h | 2 + examples/sumomo/sumomo.c | 4 + include/sorac/aom_av1_video_encoder.hpp | 15 ++ proto/soracp.proto | 6 + run.py | 101 ++++++-- src/aom_av1_video_encoder.cpp | 331 ++++++++++++++++++++++++ src/signaling.cpp | 21 +- 11 files changed, 476 insertions(+), 27 deletions(-) create mode 100644 include/sorac/aom_av1_video_encoder.hpp create mode 100644 src/aom_av1_video_encoder.cpp diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 3af780e..4eeb3b2 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -13,6 +13,7 @@ "${workspaceFolder}/_build/ubuntu-20.04_x86_64/release/sorac/proto/sorac", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/mbedtls/include", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/openh264/include", + "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/aom/include", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/libjpeg-turbo/include", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/libyuv/include", @@ -20,6 +21,7 @@ "${workspaceFolder}/_build/macos_arm64/release/sorac/proto/sorac", "${workspaceFolder}/_install/macos_arm64/release/mbedtls/include", "${workspaceFolder}/_install/macos_arm64/release/openh264/include", + "${workspaceFolder}/_install/macos_arm64/release/aom/include", "${workspaceFolder}/_install/macos_arm64/release/libjpeg-turbo/include", "${workspaceFolder}/_install/macos_arm64/release/libyuv/include" ], diff --git a/CMakeLists.txt b/CMakeLists.txt index 06abae2..c889e1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,7 @@ add_custom_command( target_sources(sorac PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.c.cpp" + src/aom_av1_video_encoder.cpp src/current_time.cpp src/data_channel.cpp src/open_h264_video_encoder.cpp @@ -112,6 +113,7 @@ target_sources(sorac "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.h" "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.c.h" "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.c.hpp" + include/sorac/aom_av1_video_encoder.hpp include/sorac/bitrate.hpp include/sorac/current_time.hpp include/sorac/data_channel.hpp @@ -132,6 +134,9 @@ target_compile_definitions(sorac PRIVATE JSONIF_USE_NLOHMANN_JSON) # OpenH264 target_include_directories(sorac PRIVATE ${OPENH264_ROOT_DIR}/include) +# AOM +target_include_directories(sorac PRIVATE ${AOM_ROOT_DIR}/include) + set_target_properties(sorac PROPERTIES CXX_STANDARD 20 C_STANDARD 20) set(_LIBS diff --git a/VERSION b/VERSION index c453195..1de2266 100644 --- a/VERSION +++ b/VERSION @@ -3,6 +3,7 @@ LIBDATACHANNEL_VERSION=v0.20.1 OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 +AOM_VERSION=v3.8.1 MBEDTLS_VERSION=v3.5.1 PROTOBUF_VERSION=21.1 PROTOC_GEN_JSONIF_VERSION=0.12.1 diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index 8ce04a8..a86ee9c 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -24,7 +24,9 @@ static struct option long_opts[] = { {"audio-type", required_argument, 0, 0}, {"h264-encoder-type", required_argument, 0, 0}, {"h265-encoder-type", required_argument, 0, 0}, + {"av1-encoder-type", required_argument, 0, 0}, {"openh264", required_argument, 0, 0}, + {"aom", required_argument, 0, 0}, {"cacert", required_argument, 0, 0}, {"help", no_argument, 0, 0}, {0, 0, 0, 0}, @@ -93,6 +95,8 @@ int sumomo_option_parse(SumomoOption* option, option->video_codec_type = optarg; } else if (strcmp(optarg, "H265") == 0) { option->video_codec_type = optarg; + } else if (strcmp(optarg, "AV1") == 0) { + option->video_codec_type = optarg; } else { fprintf(stderr, "Invalid video encoder type: %s\n", optarg); *error = 1; @@ -154,8 +158,17 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stderr, "Invalid h265 encoder type: %s\n", optarg); *error = 1; } + } else if (OPT_IS("av1-encoder-type")) { + if (strcmp(optarg, "aom") == 0) { + option->av1_encoder_type = soracp_AV1_ENCODER_TYPE_AOM; + } else { + fprintf(stderr, "Invalid AV1 encoder type: %s\n", optarg); + *error = 1; + } } else if (OPT_IS("openh264")) { option->openh264 = optarg; + } else if (OPT_IS("aom")) { + option->aom = optarg; } else if (OPT_IS("cacert")) { option->cacert = optarg; } else if (OPT_IS("help")) { @@ -186,7 +199,9 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --audio-type=fake,pulse,macos\n"); fprintf(stdout, " --h264-encoder-type=openh264,videotoolbox\n"); fprintf(stdout, " --h265-encoder-type=videotoolbox\n"); + fprintf(stdout, " --av1-encoder-type=aom\n"); fprintf(stdout, " --openh264=PATH\n"); + fprintf(stdout, " --aom=PATH\n"); fprintf(stdout, " --cacert=PATH\n"); fprintf(stdout, " --help\n"); return -1; diff --git a/examples/sumomo/option.h b/examples/sumomo/option.h index 86e1ff6..8080ddc 100644 --- a/examples/sumomo/option.h +++ b/examples/sumomo/option.h @@ -38,7 +38,9 @@ typedef struct SumomoOption { SumomoOptionAudioType audio_type; soracp_H264EncoderType h264_encoder_type; soracp_H265EncoderType h265_encoder_type; + soracp_Av1EncoderType av1_encoder_type; const char* openh264; + const char* aom; const char* cacert; } SumomoOption; diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index 2c7f8d7..d0137c2 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -190,11 +190,15 @@ int main(int argc, char* argv[]) { if (opt.openh264 != NULL) { soracp_SignalingConfig_set_openh264(&config, opt.openh264); } + if (opt.aom != NULL) { + soracp_SignalingConfig_set_aom(&config, opt.aom); + } if (opt.cacert != NULL) { soracp_SignalingConfig_set_ca_certificate(&config, opt.cacert); } soracp_SignalingConfig_set_h264_encoder_type(&config, opt.h264_encoder_type); soracp_SignalingConfig_set_h265_encoder_type(&config, opt.h265_encoder_type); + soracp_SignalingConfig_set_av1_encoder_type(&config, opt.av1_encoder_type); soracp_SignalingConfig_set_video_encoder_initial_bitrate_kbps( &config, opt.video_bit_rate == 0 ? 500 : opt.video_bit_rate); SoracSignaling* signaling = sorac_signaling_create(&config); diff --git a/include/sorac/aom_av1_video_encoder.hpp b/include/sorac/aom_av1_video_encoder.hpp new file mode 100644 index 0000000..5594a33 --- /dev/null +++ b/include/sorac/aom_av1_video_encoder.hpp @@ -0,0 +1,15 @@ +#ifndef SORAC_AOM_AV1_VIDEO_ENCODER_HPP_ +#define SORAC_AOM_AV1_VIDEO_ENCODER_HPP_ + +#include +#include + +#include "video_encoder.hpp" + +namespace sorac { + +std::shared_ptr CreateAomAv1VideoEncoder(const std::string& aom); + +} + +#endif diff --git a/proto/soracp.proto b/proto/soracp.proto index 1981d94..57b81ed 100644 --- a/proto/soracp.proto +++ b/proto/soracp.proto @@ -18,6 +18,10 @@ enum H265EncoderType { H265_ENCODER_TYPE_VIDEO_TOOLBOX = 1; } +enum Av1EncoderType { + AV1_ENCODER_TYPE_AOM = 0; +} + message DataChannel { // required string label = 1; @@ -49,7 +53,9 @@ message SignalingConfig { repeated string signaling_url_candidates = 1; H264EncoderType h264_encoder_type = 11; H265EncoderType h265_encoder_type = 12; + Av1EncoderType av1_encoder_type = 120; string openh264 = 2; + string aom = 21; string ca_certificate = 3; string proxy_url = 44; string proxy_username = 45; diff --git a/run.py b/run.py index 0fd64f8..d8033db 100644 --- a/run.py +++ b/run.py @@ -400,6 +400,40 @@ def install_openh264(version, source_dir, install_dir): ) +@versioned +def install_aom(version, source_dir, build_dir, install_dir, cmake_args): + rm_rf(os.path.join(source_dir, "aom")) + rm_rf(os.path.join(build_dir, "aom")) + rm_rf(os.path.join(install_dir, "aom")) + git_clone_shallow( + "https://aomedia.googlesource.com/aom", + version, + os.path.join(source_dir, "aom"), + ) + with cd(os.path.join(source_dir, "aom")): + cmd( + [ + "cmake", + "-B", + os.path.join(build_dir, "aom"), + f'-DCMAKE_INSTALL_PREFIX={os.path.join(install_dir, "aom")}', + "-DBUILD_SHARED_LIBS=ON", + *cmake_args, + ] + ) + cmd( + [ + "cmake", + "--build", + os.path.join(build_dir, "aom"), + f"-j{multiprocessing.cpu_count()}", + "--config", + "Release", + ] + ) + cmd(["cmake", "--install", os.path.join(build_dir, "aom")]) + + @versioned def install_mbedtls(version, source_dir, build_dir, install_dir, cmake_args): rm_rf(os.path.join(source_dir, "mbedtls")) @@ -564,6 +598,33 @@ def install_deps( with cd(BASE_DIR): version = read_version_file("VERSION") + # CMake + install_cmake_args = { + "version": version["CMAKE_VERSION"], + "version_file": os.path.join(install_dir, "cmake.version"), + "source_dir": source_dir, + "install_dir": install_dir, + "platform": "", + "ext": "tar.gz", + } + if build_platform in ("windows_x86_64",): + install_cmake_args["platform"] = "windows-x86_64" + install_cmake_args["ext"] = "zip" + elif build_platform in ("macos_x86_64", "macos_arm64"): + install_cmake_args["platform"] = "macos-universal" + elif build_platform in ("ubuntu-20.04_x86_64", "ubuntu-22.04_x86_64"): + install_cmake_args["platform"] = "linux-x86_64" + elif build_platform in ("ubuntu-20.04_arm64", "ubuntu-22.04_arm64"): + install_cmake_args["platform"] = "linux-aarch64" + else: + raise Exception("Failed to install CMake") + install_cmake(**install_cmake_args) + + if build_platform == "macos_arm64": + add_path(os.path.join(install_dir, "cmake", "CMake.app", "Contents", "bin")) + else: + add_path(os.path.join(install_dir, "cmake", "bin")) + # libdatachannel dir = os.path.join(shared_source_dir, "libdatachannel") url = "https://github.com/paullouisageneau/libdatachannel.git" @@ -596,32 +657,21 @@ def install_deps( } install_openh264(**install_openh264_args) - # CMake - install_cmake_args = { - "version": version["CMAKE_VERSION"], - "version_file": os.path.join(install_dir, "cmake.version"), + # AOM + install_aom_args = { + "version": version["AOM_VERSION"], + "version_file": os.path.join(install_dir, "aom.version"), "source_dir": source_dir, + "build_dir": build_dir, "install_dir": install_dir, - "platform": "", - "ext": "tar.gz", + "cmake_args": [], } - if build_platform in ("windows_x86_64",): - install_cmake_args["platform"] = "windows-x86_64" - install_cmake_args["ext"] = "zip" - elif build_platform in ("macos_x86_64", "macos_arm64"): - install_cmake_args["platform"] = "macos-universal" - elif build_platform in ("ubuntu-20.04_x86_64", "ubuntu-22.04_x86_64"): - install_cmake_args["platform"] = "linux-x86_64" - elif build_platform in ("ubuntu-20.04_arm64", "ubuntu-22.04_arm64"): - install_cmake_args["platform"] = "linux-aarch64" - else: - raise Exception("Failed to install CMake") - install_cmake(**install_cmake_args) - - if build_platform == "macos_arm64": - add_path(os.path.join(install_dir, "cmake", "CMake.app", "Contents", "bin")) - else: - add_path(os.path.join(install_dir, "cmake", "bin")) + if build_platform in ("ubuntu-20.04_x86_64", "ubuntu-22.04_x86_64"): + install_aom_args["cmake_args"] = [ + "-DCMAKE_C_COMPILER=clang-12", + "-DCMAKE_CXX_COMPILER=clang++-12", + ] + install_aom(**install_aom_args) macos_cmake_args = [] if build_platform in ("macos_x86_64", "macos_arm64"): @@ -922,6 +972,11 @@ def main(): f"-DOPENH264_ROOT_DIR={cmake_path(os.path.join(install_dir, 'openh264'))}" ) + # AOM + cmake_args.append( + f"-DAOM_ROOT_DIR={cmake_path(os.path.join(install_dir, 'aom'))}" + ) + # libdatachannel cmake_args.append("-DUSE_MBEDTLS=ON") cmake_args.append( diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp new file mode 100644 index 0000000..f91e1d4 --- /dev/null +++ b/src/aom_av1_video_encoder.cpp @@ -0,0 +1,331 @@ +#include "sorac/aom_av1_video_encoder.hpp" + +#include +#include +#include + +// Linux +#include + +// plog +#include + +// AOM +#include +#include +#include + +// text の定義を全て展開した上で文字列化する。 +// 単純に #text とした場合、全て展開する前に文字列化されてしまう +#if defined(_WIN32) +#define SORAC_STRINGIZE(text) SORAC_STRINGIZE_((text)) +#define SORAC_STRINGIZE_(x) SORAC_STRINGIZE_I x +#else +#define SORAC_STRINGIZE(x) SORAC_STRINGIZE_I(x) +#endif + +#define SORAC_STRINGIZE_I(text) #text + +// a と b の定義を全て展開した上で結合する +// 単純に a ## b とした場合、全て展開する前に結合されてしまう +// clang-format off +#define SORAC_CAT(a, b) SORAC_CAT_I(a, b) + +#if defined(_WIN32) +#define SORAC_CAT_I(a, b) a ## b +#else +#define SORAC_CAT_I(a, b) SORAC_CAT_II(a ## b) +#define SORAC_CAT_II(res) res +#endif +// clang-format on + +namespace sorac { + +class AomAv1VideoEncoder : public VideoEncoder { + public: + AomAv1VideoEncoder(const std::string& aom) { + bool result = InitAom(aom); + if (!result) { + throw std::runtime_error("Failed to load AOM"); + } + } + ~AomAv1VideoEncoder() override { + Release(); + ReleaseAom(); + } + + void ForceIntraNextFrame() override { next_iframe_ = true; } + + bool InitEncode(const Settings& settings) override { + Release(); + + PLOG_INFO << "AOM InitEncode"; + + // https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc + // を参考に初期化やエンコードを行う + + aom_codec_err_t ret = aom_codec_enc_config_default_( + aom_codec_av1_cx_(), &cfg_, AOM_USAGE_REALTIME); + if (ret != AOM_CODEC_OK) { + PLOG_ERROR << "Failed to aom_codec_enc_config_default: ret=" << ret; + return false; + } + + // Overwrite default config with input encoder settings & RTC-relevant values. + cfg_.g_w = settings.width; + cfg_.g_h = settings.height; + cfg_.g_threads = 8; + cfg_.g_timebase.num = 1; + cfg_.g_timebase.den = 90000; + cfg_.rc_target_bitrate = settings.bitrate.count(); + cfg_.rc_dropframe_thresh = 0; + cfg_.g_input_bit_depth = 8; + cfg_.kf_mode = AOM_KF_DISABLED; + cfg_.rc_min_quantizer = 10; + cfg_.rc_max_quantizer = 63; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_initial_sz = 600; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.g_usage = AOM_USAGE_REALTIME; + cfg_.g_error_resilient = 0; + // Low-latency settings. + cfg_.rc_end_usage = AOM_CBR; // Constant Bit Rate (CBR) mode + cfg_.g_pass = AOM_RC_ONE_PASS; // One-pass rate control + cfg_.g_lag_in_frames = 0; // No look ahead when lag equals 0. + + if (frame_for_encode_ != nullptr) { + aom_img_free_(frame_for_encode_); + frame_for_encode_ = nullptr; + } + + // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH + aom_codec_flags_t flags = 0; + + // Initialize an encoder instance. + ret = aom_codec_enc_init_ver_(&ctx_, aom_codec_av1_cx_(), &cfg_, flags, + AOM_ENCODER_ABI_VERSION); + if (ret != AOM_CODEC_OK) { + PLOG_ERROR << "Failed to aom_codec_enc_init_ver: ret=" << ret; + return false; + } + init_ctx_ = true; + +#define SET_PARAM(param_id, param_value) \ + do { \ + ret = aom_codec_control_(&ctx_, param_id, param_value); \ + if (ret != AOM_CODEC_OK) { \ + PLOG_ERROR << "Failed to aom_codec_control: ret=" << ret \ + << ", param_id=" << SORAC_STRINGIZE(param_id) \ + << ", param_value=" << param_value; \ + return false; \ + } \ + } while (0) + + // Set control parameters + SET_PARAM(AOME_SET_CPUUSED, 10); + SET_PARAM(AV1E_SET_ENABLE_CDEF, 1); + SET_PARAM(AV1E_SET_ENABLE_TPL_MODEL, 0); + SET_PARAM(AV1E_SET_DELTAQ_MODE, 0); + SET_PARAM(AV1E_SET_ENABLE_ORDER_HINT, 0); + SET_PARAM(AV1E_SET_AQ_MODE, 3); + SET_PARAM(AOME_SET_MAX_INTRA_BITRATE_PCT, 300); + SET_PARAM(AV1E_SET_COEFF_COST_UPD_FREQ, 3); + SET_PARAM(AV1E_SET_MODE_COST_UPD_FREQ, 3); + SET_PARAM(AV1E_SET_MV_COST_UPD_FREQ, 3); + + SET_PARAM(AV1E_SET_ENABLE_PALETTE, 0); + + SET_PARAM(AV1E_SET_TILE_ROWS, 1); + SET_PARAM(AV1E_SET_TILE_COLUMNS, 2); + + SET_PARAM(AV1E_SET_ROW_MT, 1); + SET_PARAM(AV1E_SET_ENABLE_OBMC, 0); + SET_PARAM(AV1E_SET_NOISE_SENSITIVITY, 0); + SET_PARAM(AV1E_SET_ENABLE_WARPED_MOTION, 0); + SET_PARAM(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + SET_PARAM(AV1E_SET_ENABLE_REF_FRAME_MVS, 0); + SET_PARAM(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_DYNAMIC); + SET_PARAM(AV1E_SET_ENABLE_CFL_INTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_SMOOTH_INTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_ANGLE_DELTA, 0); + SET_PARAM(AV1E_SET_ENABLE_FILTER_INTRA, 0); + SET_PARAM(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); + SET_PARAM(AV1E_SET_DISABLE_TRELLIS_QUANT, 1); + SET_PARAM(AV1E_SET_ENABLE_DIST_WTD_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_DUAL_FILTER, 0); + SET_PARAM(AV1E_SET_ENABLE_INTERINTRA_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0); + SET_PARAM(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0); + SET_PARAM(AV1E_SET_ENABLE_INTRABC, 0); + SET_PARAM(AV1E_SET_ENABLE_MASKED_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_PAETH_INTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_QM, 0); + SET_PARAM(AV1E_SET_ENABLE_RECT_PARTITIONS, 0); + SET_PARAM(AV1E_SET_ENABLE_RESTORATION, 0); + SET_PARAM(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_TX64, 0); + SET_PARAM(AV1E_SET_MAX_REFERENCE_FRAMES, 3); + + return true; + } + + void SetEncodeCallback( + std::function callback) override { + callback_ = callback; + } + + void Encode(const VideoFrame& frame) override { + if (frame.i420_buffer == nullptr && frame.nv12_buffer == nullptr) { + PLOG_ERROR << "Unknown video frame format"; + return; + } + aom_img_fmt_t fmt = + frame.i420_buffer != nullptr ? AOM_IMG_FMT_I420 : AOM_IMG_FMT_NV12; + + if (frame_for_encode_ == nullptr || frame_for_encode_->fmt != fmt) { + if (frame_for_encode_ != nullptr) { + aom_img_free_(frame_for_encode_); + } + frame_for_encode_ = + aom_img_wrap_(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr); + } + + if (frame.i420_buffer != nullptr) { + // I420 + frame_for_encode_->planes[AOM_PLANE_Y] = frame.i420_buffer->y.get(); + frame_for_encode_->planes[AOM_PLANE_U] = frame.i420_buffer->u.get(); + frame_for_encode_->planes[AOM_PLANE_V] = frame.i420_buffer->v.get(); + frame_for_encode_->stride[AOM_PLANE_Y] = frame.i420_buffer->stride_y; + frame_for_encode_->stride[AOM_PLANE_U] = frame.i420_buffer->stride_u; + frame_for_encode_->stride[AOM_PLANE_V] = frame.i420_buffer->stride_v; + } else { + // NV12 + frame_for_encode_->planes[AOM_PLANE_Y] = frame.nv12_buffer->y.get(); + frame_for_encode_->planes[AOM_PLANE_U] = frame.nv12_buffer->uv.get(); + frame_for_encode_->planes[AOM_PLANE_V] = nullptr; + frame_for_encode_->stride[AOM_PLANE_Y] = frame.nv12_buffer->stride_y; + frame_for_encode_->stride[AOM_PLANE_U] = frame.nv12_buffer->stride_uv; + frame_for_encode_->stride[AOM_PLANE_V] = 0; + } + + const uint32_t duration = 90000 / 30; + timestamp_ += duration; + + aom_enc_frame_flags_t flags = 0; + + bool send_key_frame = next_iframe_.exchange(false); + if (send_key_frame) { + PLOG_DEBUG << "KeyFrame generated"; + flags = AOM_EFLAG_FORCE_KF; + } + + aom_codec_err_t ret = aom_codec_encode_(&ctx_, frame_for_encode_, + timestamp_, duration, flags); + + EncodedImage encoded; + const aom_codec_cx_pkt_t* pkt = nullptr; + aom_codec_iter_t iter = nullptr; + while (true) { + const aom_codec_cx_pkt_t* p = aom_codec_get_cx_data_(&ctx_, &iter); + if (p == nullptr) { + break; + } + if (p->kind == AOM_CODEC_CX_FRAME_PKT && p->data.frame.sz > 0) { + pkt = p; + } + } + + encoded.buf.reset(new uint8_t[pkt->data.frame.sz]); + encoded.size = pkt->data.frame.sz; + memcpy(encoded.buf.get(), pkt->data.frame.buf, encoded.size); + encoded.timestamp = frame.timestamp; + + callback_(encoded); + } + + void Release() override { + if (frame_for_encode_ != nullptr) { + aom_img_free_(frame_for_encode_); + frame_for_encode_ = nullptr; + } + if (init_ctx_) { + aom_codec_destroy_(&ctx_); + init_ctx_ = false; + } + } + + private: + bool InitAom(const std::string& aom) { + void* handle = ::dlopen(aom.c_str(), RTLD_LAZY); + if (handle == nullptr) { + PLOG_ERROR << "Failed to dlopen: error=" << dlerror(); + return false; + } + +#define LOAD_AOM(name) \ + SORAC_CAT(name, _) = \ + (SORAC_CAT(name, _func))::dlsym(handle, SORAC_STRINGIZE(name)); \ + if (SORAC_CAT(name, _) == nullptr) { \ + PLOG_ERROR << "Failed to dlsym: name=" << SORAC_STRINGIZE(name); \ + ::dlclose(handle); \ + return false; \ + } + + LOAD_AOM(aom_codec_av1_cx); + LOAD_AOM(aom_codec_enc_config_default); + LOAD_AOM(aom_codec_enc_init_ver); + LOAD_AOM(aom_codec_destroy); + LOAD_AOM(aom_codec_encode); + LOAD_AOM(aom_codec_get_cx_data); + LOAD_AOM(aom_codec_control); + LOAD_AOM(aom_codec_enc_config_set); + LOAD_AOM(aom_img_wrap); + LOAD_AOM(aom_img_free); + aom_handle_ = handle; + return true; + } + void ReleaseAom() { + if (aom_handle_ != nullptr) { + ::dlclose(aom_handle_); + aom_handle_ = nullptr; + } + } + + private: + bool init_ctx_ = false; + aom_codec_ctx_t ctx_; + aom_codec_enc_cfg_t cfg_; + aom_image_t* frame_for_encode_; + int64_t timestamp_ = 0; + + std::function callback_; + + std::atomic next_iframe_; + + void* aom_handle_ = nullptr; + +#define DECLARE_AOM(name, result, ...) \ + using SORAC_CAT(name, _func) = result (*)(__VA_ARGS__); \ + SORAC_CAT(name, _func) SORAC_CAT(name, _); + // clang-format off + DECLARE_AOM(aom_codec_av1_cx, aom_codec_iface_t*, void); + DECLARE_AOM(aom_codec_enc_config_default, aom_codec_err_t, aom_codec_iface_t* iface, aom_codec_enc_cfg_t* cfg, unsigned int usage); + DECLARE_AOM(aom_codec_enc_init_ver, aom_codec_err_t, aom_codec_ctx_t* ctx, aom_codec_iface_t* iface, const aom_codec_enc_cfg_t* cfg, aom_codec_flags_t flags, int ver); + DECLARE_AOM(aom_codec_destroy, aom_codec_err_t, aom_codec_ctx_t* ctx); + DECLARE_AOM(aom_codec_encode, aom_codec_err_t, aom_codec_ctx_t* ctx, const aom_image_t* img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags); + DECLARE_AOM(aom_codec_get_cx_data, const aom_codec_cx_pkt_t*, aom_codec_ctx_t* ctx, aom_codec_iter_t* iter); + DECLARE_AOM(aom_codec_control, aom_codec_err_t, aom_codec_ctx_t* ctx, int ctrl_id, ...); + DECLARE_AOM(aom_codec_enc_config_set, aom_codec_err_t, aom_codec_ctx_t* ctx, const aom_codec_enc_cfg_t* cfg); + DECLARE_AOM(aom_img_wrap, aom_image_t*, aom_image_t* img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int stride_align, unsigned char* img_data); + DECLARE_AOM(aom_img_free, void, aom_image_t* img); + // clang-format on +}; + +std::shared_ptr CreateAomAv1VideoEncoder(const std::string& aom) { + return std::make_shared(aom); +} + +} // namespace sorac diff --git a/src/signaling.cpp b/src/signaling.cpp index 0f16f1f..d9a74c2 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -13,6 +13,7 @@ // plog #include +#include "sorac/aom_av1_video_encoder.hpp" #include "sorac/current_time.hpp" #include "sorac/open_h264_video_encoder.hpp" #include "sorac/opus_audio_encoder.hpp" @@ -428,7 +429,7 @@ class SignalingImpl : public Signaling { auto ys = split_with(rtpmap, " "); auto payload_type = std::stoi(ys[0]); auto codec = split_with(ys[1], "/")[0]; - if (codec == "H264" || codec == "H265") { + if (codec == "H264" || codec == "H265" || codec == "AV1") { PLOG_DEBUG << "payload_type=" << payload_type << ", codec=" << codec; soracp::RtpCodecParameters cp; @@ -500,6 +501,8 @@ class SignalingImpl : public Signaling { video.addH264Codec(codec.payload_type); } else if (codec.name == "H265") { video.addH265Codec(codec.payload_type); + } else if (codec.name == "AV1") { + video.addAV1Codec(codec.payload_type); } } std::map, uint32_t> ssrcs; @@ -556,15 +559,19 @@ class SignalingImpl : public Signaling { auto rtp_config = std::make_shared( ssrc, cname, payload_type, - codec == "H264" ? rtc::H264RtpPacketizer::defaultClockRate - : rtc::H265RtpPacketizer::defaultClockRate); + codec == "H264" ? rtc::H264RtpPacketizer::defaultClockRate + : codec == "H265" ? rtc::H265RtpPacketizer::defaultClockRate + : rtc::AV1RtpPacketizer::defaultClockRate); std::shared_ptr packetizer; if (codec == "H264") { packetizer = std::make_shared( rtc::NalUnit::Separator::LongStartSequence, rtp_config); - } else { + } else if (codec == "H265") { packetizer = std::make_shared( rtc::NalUnit::Separator::LongStartSequence, rtp_config); + } else { + packetizer = std::make_shared( + rtc::AV1RtpPacketizer::Packetization::TemporalUnit, rtp_config); } auto sr_reporter = std::make_shared(rtp_config); packetizer->addToChain(sr_reporter); @@ -626,6 +633,12 @@ class SignalingImpl : public Signaling { } else { PLOG_ERROR << "Unknown H265EncoderType"; } + } else if (codec == "AV1") { + if (config_.av1_encoder_type == soracp::AV1_ENCODER_TYPE_AOM) { + return CreateAomAv1VideoEncoder(config_.aom); + } else { + PLOG_ERROR << "Unknown Av1EncoderType"; + } } return nullptr; }; From 25a3169bda67307677ac219577e6dd359ce05d85 Mon Sep 17 00:00:00 2001 From: melpon Date: Fri, 1 Mar 2024 07:12:17 +0900 Subject: [PATCH 07/34] =?UTF-8?q?CHANGES,=20NOTICE=20=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 2 ++ NOTICE.md | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index bde4af2..57b202f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,8 @@ - [ADD] マルチコーデックサイマルキャストの送信に対応する - @melpon +- [ADD] libaom を追加して AV1 エンコードできるようにする + - @melpon - [UPDATE] protoc-gen-jsonif をアップデートして optional 対応する - @melpon diff --git a/NOTICE.md b/NOTICE.md index 13e48f1..bca7505 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -333,3 +333,32 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ``` + +## AOM + +https://aomedia.googlesource.com/aom/ + +``` +Copyright (c) 2016, Alliance for Open Media. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +``` From 61b5cb35f40820346ebc19a12c04d51f48c4017b Mon Sep 17 00:00:00 2001 From: melpon Date: Fri, 1 Mar 2024 07:15:10 +0900 Subject: [PATCH 08/34] =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run.py | 2 +- src/aom_av1_video_encoder.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/run.py b/run.py index d8033db..40ded5c 100644 --- a/run.py +++ b/run.py @@ -513,7 +513,7 @@ def install_protoc_gen_jsonif(version, source_dir, install_dir, platform: str): ) # なぜか実行属性が消えてるので入れてやる for file in os.scandir(os.path.join(jsonif_install_dir, "bin")): - if file.is_file: + if file.is_file(): os.chmod(file.path, file.stat().st_mode | stat.S_IXUSR) diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index f91e1d4..645e8f2 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -28,16 +28,14 @@ // a と b の定義を全て展開した上で結合する // 単純に a ## b とした場合、全て展開する前に結合されてしまう -// clang-format off #define SORAC_CAT(a, b) SORAC_CAT_I(a, b) #if defined(_WIN32) -#define SORAC_CAT_I(a, b) a ## b +#define SORAC_CAT_I(a, b) a##b #else -#define SORAC_CAT_I(a, b) SORAC_CAT_II(a ## b) +#define SORAC_CAT_I(a, b) SORAC_CAT_II(a##b) #define SORAC_CAT_II(res) res #endif -// clang-format on namespace sorac { From 857118766878ea9e787ecf685467d6bcd7a97370 Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 3 Mar 2024 21:53:29 +0900 Subject: [PATCH 09/34] =?UTF-8?q?AV1=20=E3=81=A7=20DD=20=E3=82=92=E5=88=A9?= =?UTF-8?q?=E7=94=A8=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/c_cpp_properties.json | 5 +++++ VERSION | 2 +- include/sorac/types.hpp | 3 +++ run.py | 3 ++- src/aom_av1_video_encoder.cpp | 41 +++++++++++++++++++++++++++++++++++ src/signaling.cpp | 22 +++++++++++++++++++ 6 files changed, 74 insertions(+), 2 deletions(-) diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 4eeb3b2..71deb3e 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -5,8 +5,13 @@ "includePath": [ "${workspaceFolder}/include", "${workspaceFolder}/_source/libdatachannel/include", + "${workspaceFolder}/_source/libdatachannel/include/rtc", + "${workspaceFolder}/_source/libdatachannel/src", "${workspaceFolder}/_source/libdatachannel/deps/json/include", "${workspaceFolder}/_source/libdatachannel/deps/plog/include", + "${workspaceFolder}/_source/libdatachannel/deps/libjuice/include", + "${workspaceFolder}/_source/libdatachannel/deps/usrsctp/usrsctplib", + "${workspaceFolder}/_source/libdatachannel/deps/libsrtp/include", "${workspaceFolder}/_source/opus/include", "${workspaceFolder}/_build/ubuntu-20.04_x86_64/release/sorac", diff --git a/VERSION b/VERSION index 1de2266..b27bdc1 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=v0.20.1 +LIBDATACHANNEL_VERSION=3963e79621dce30afb59148c72dc4b2de48f13cd OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 diff --git a/include/sorac/types.hpp b/include/sorac/types.hpp index b1a02c4..163794e 100644 --- a/include/sorac/types.hpp +++ b/include/sorac/types.hpp @@ -52,6 +52,9 @@ struct EncodedImage { int size; std::chrono::microseconds timestamp; std::optional rid; + // rtc::RtpPacketizationConfig::DependencyDescriptorContext 型なんだけど、ここで + // libdatachannel のヘッダーを include してはいけないので shared_ptr を利用する + std::shared_ptr dependency_descriptor_context; }; struct AudioFrame { diff --git a/run.py b/run.py index 40ded5c..4070f95 100644 --- a/run.py +++ b/run.py @@ -627,7 +627,8 @@ def install_deps( # libdatachannel dir = os.path.join(shared_source_dir, "libdatachannel") - url = "https://github.com/paullouisageneau/libdatachannel.git" + # url = "https://github.com/paullouisageneau/libdatachannel.git" + url = "https://github.com/melpon/libdatachannel.git" if not os.path.exists(os.path.join(dir, ".git")): cmd(["git", "clone", url, dir]) with cd(dir): diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index 645e8f2..52a5164 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -10,6 +10,9 @@ // plog #include +// libdatachannel +#include + // AOM #include #include @@ -167,6 +170,8 @@ class AomAv1VideoEncoder : public VideoEncoder { SET_PARAM(AV1E_SET_ENABLE_TX64, 0); SET_PARAM(AV1E_SET_MAX_REFERENCE_FRAMES, 3); + frame_number_ = 0; + return true; } @@ -241,6 +246,41 @@ class AomAv1VideoEncoder : public VideoEncoder { memcpy(encoded.buf.get(), pkt->data.frame.buf, encoded.size); encoded.timestamp = frame.timestamp; + bool is_key_frame = (pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0; + + // DD の設定を行う + rtc::RtpPacketizationConfig::DependencyDescriptorContext ctx; + ctx.structure.templateIdOffset = 0; + ctx.structure.decodeTargetCount = 1; + ctx.structure.chainCount = 1; + ctx.structure.decodeTargetProtectedBy = {0}; + ctx.structure.resolutions.push_back({frame.width(), frame.height()}); + rtc::FrameDependencyTemplate key_frame_template; + key_frame_template.spatialId = 0; + key_frame_template.temporalId = 0; + key_frame_template.decodeTargetIndications = { + rtc::DecodeTargetIndication::Switch}; + key_frame_template.chainDiffs = {0}; + rtc::FrameDependencyTemplate delta_frame_template; + delta_frame_template.spatialId = 0; + delta_frame_template.temporalId = 0; + delta_frame_template.decodeTargetIndications = { + rtc::DecodeTargetIndication::Switch}; + delta_frame_template.chainDiffs = {1}; + delta_frame_template.frameDiffs = {1}; + ctx.structure.templates = {key_frame_template, delta_frame_template}; + ctx.active_chains[0] = true; + ctx.descriptor.frameNumber = ++frame_number_; + if (is_key_frame) { + ctx.descriptor.dependencyTemplate = key_frame_template; + } else { + ctx.descriptor.dependencyTemplate = delta_frame_template; + } + ctx.descriptor.structureAttached = is_key_frame; + + encoded.dependency_descriptor_context = std::make_shared< + rtc::RtpPacketizationConfig::DependencyDescriptorContext>(ctx); + callback_(encoded); } @@ -298,6 +338,7 @@ class AomAv1VideoEncoder : public VideoEncoder { aom_codec_enc_cfg_t cfg_; aom_image_t* frame_for_encode_; int64_t timestamp_ = 0; + int frame_number_ = 0; std::function callback_; diff --git a/src/signaling.cpp b/src/signaling.cpp index d9a74c2..573e094 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -166,6 +166,12 @@ class SignalingImpl : public Signaling { if (rtp_config->timestampToSeconds(report_elapsed_timestamp) > 0.2) { sender->setNeedsToReport(); } + if (image.dependency_descriptor_context != nullptr) { + rtp_config->dependencyDescriptorId = dependency_descriptor_id_; + rtp_config->dependencyDescriptorContext = *std::static_pointer_cast< + rtc::RtpPacketizationConfig::DependencyDescriptorContext>( + image.dependency_descriptor_context); + } std::vector buf((std::byte*)image.buf.get(), (std::byte*)image.buf.get() + image.size); client_.video->simulcast_handler->config()->rid = image.rid; @@ -489,6 +495,21 @@ class SignalingImpl : public Signaling { : "(none)"); } } + { + auto it = std::find_if( + video_lines.begin(), video_lines.end(), [](const std::string& s) { + return starts_with(s, "a=extmap:") && + s.find( + "https://aomediacodec.github.io/av1-rtp-spec/" + "#dependency-descriptor-rtp-header-extension") != + std::string::npos; + }); + auto xs = split_with(*it, " "); + auto ys = split_with(xs[0], ":"); + dependency_descriptor_id_ = std::stoi(ys[1]); + PLOG_DEBUG << "dependency_descriptor_id=" + << dependency_descriptor_id_; + } std::shared_ptr track; std::map, @@ -932,6 +953,7 @@ class SignalingImpl : public Signaling { soracp::SoraConnectConfig sora_config_; soracp::RtpParameters rtp_params_; int rtp_stream_id_ = 0; + int dependency_descriptor_id_ = 0; int video_ssrc_ = 0; std::function)> on_track_; std::function)> on_data_channel_; From 950b2b998524a144055731abbd9d6eb182729e53 Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 3 Mar 2024 21:58:12 +0900 Subject: [PATCH 10/34] =?UTF-8?q?aom=20=E3=83=93=E3=83=AB=E3=83=89?= =?UTF-8?q?=E3=81=AE=E3=81=9F=E3=82=81=E3=81=AB=20yasm=20=E3=82=92?= =?UTF-8?q?=E5=85=A5=E3=82=8C=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 09c7414..c5eb40a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ jobs: - uses: actions/checkout@v4 - run: | sudo apt-get update - sudo apt-get install -y libpulse-dev + sudo apt-get install -y libpulse-dev yasm if: matrix.m.linux == true - run: python3 run.py --sumomo --package ${{ matrix.m.name }} - name: Get package name From b05cad3c0e692be9d7bb94a705cbf0eb8d27f8a0 Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 3 Mar 2024 22:06:27 +0900 Subject: [PATCH 11/34] =?UTF-8?q?macOS=20=E3=81=A8=20Ubuntu=2022.04=20?= =?UTF-8?q?=E3=81=AE=E3=82=A8=E3=83=A9=E3=83=BC=E3=82=92=E7=9B=B4=E3=81=97?= =?UTF-8?q?=E3=81=9F=E3=81=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c5eb40a..20cd3b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,8 +29,11 @@ jobs: - uses: actions/checkout@v4 - run: | sudo apt-get update - sudo apt-get install -y libpulse-dev yasm + sudo apt-get install -y libpulse-dev yasm clang-12 if: matrix.m.linux == true + - run: | + brew install yasm + if: matrix.m.linux == false - run: python3 run.py --sumomo --package ${{ matrix.m.name }} - name: Get package name run: | From 545a417474c46136c933e57e2d259c8f78b587cf Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 3 Mar 2024 22:18:01 +0900 Subject: [PATCH 12/34] =?UTF-8?q?libdatachannel=20=E3=81=AE=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index b27bdc1..d44bb53 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=3963e79621dce30afb59148c72dc4b2de48f13cd +LIBDATACHANNEL_VERSION=ad12f68b6926c679a7cd97f41c4adc0a647db77a OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 From 74d66ca74a3b77d0cb298a0ef463ba0f6b3c7cfd Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 3 Mar 2024 22:28:02 +0900 Subject: [PATCH 13/34] =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/aom_av1_video_encoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index 52a5164..2dcaecb 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -269,7 +269,7 @@ class AomAv1VideoEncoder : public VideoEncoder { delta_frame_template.chainDiffs = {1}; delta_frame_template.frameDiffs = {1}; ctx.structure.templates = {key_frame_template, delta_frame_template}; - ctx.active_chains[0] = true; + ctx.activeChains[0] = true; ctx.descriptor.frameNumber = ++frame_number_; if (is_key_frame) { ctx.descriptor.dependencyTemplate = key_frame_template; From 5874017eadf8ab98759dd80604eb820843c4c505 Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 3 Mar 2024 23:24:37 +0900 Subject: [PATCH 14/34] =?UTF-8?q?=E5=88=9D=E6=9C=9F=E5=8C=96=E3=81=A7?= =?UTF-8?q?=E3=81=8D=E3=81=A6=E3=81=AA=E3=81=8B=E3=81=A3=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/aom_av1_video_encoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index 2dcaecb..061bec3 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -336,7 +336,7 @@ class AomAv1VideoEncoder : public VideoEncoder { bool init_ctx_ = false; aom_codec_ctx_t ctx_; aom_codec_enc_cfg_t cfg_; - aom_image_t* frame_for_encode_; + aom_image_t* frame_for_encode_ = nullptr; int64_t timestamp_ = 0; int frame_number_ = 0; From 918af017bfab8d8c3f94780537b76adb5ed36456 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 4 Mar 2024 19:10:33 +0900 Subject: [PATCH 15/34] =?UTF-8?q?DD=20=E6=8B=A1=E5=BC=B5=E3=81=8C=E3=81=AA?= =?UTF-8?q?=E3=81=84=E5=A0=B4=E5=90=88=E3=81=AB=E3=82=A8=E3=83=A9=E3=83=BC?= =?UTF-8?q?=E3=81=AB=E3=81=AA=E3=82=89=E3=81=AA=E3=81=84=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/signaling.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/signaling.cpp b/src/signaling.cpp index 573e094..1d742ad 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -166,7 +166,7 @@ class SignalingImpl : public Signaling { if (rtp_config->timestampToSeconds(report_elapsed_timestamp) > 0.2) { sender->setNeedsToReport(); } - if (image.dependency_descriptor_context != nullptr) { + if (image.dependency_descriptor_context != nullptr && dependency_descriptor_id_ != 0) { rtp_config->dependencyDescriptorId = dependency_descriptor_id_; rtp_config->dependencyDescriptorContext = *std::static_pointer_cast< rtc::RtpPacketizationConfig::DependencyDescriptorContext>( @@ -504,11 +504,13 @@ class SignalingImpl : public Signaling { "#dependency-descriptor-rtp-header-extension") != std::string::npos; }); - auto xs = split_with(*it, " "); - auto ys = split_with(xs[0], ":"); - dependency_descriptor_id_ = std::stoi(ys[1]); - PLOG_DEBUG << "dependency_descriptor_id=" - << dependency_descriptor_id_; + if (it != video_lines.end()) { + auto xs = split_with(*it, " "); + auto ys = split_with(xs[0], ":"); + dependency_descriptor_id_ = std::stoi(ys[1]); + PLOG_DEBUG << "dependency_descriptor_id=" + << dependency_descriptor_id_; + } } std::shared_ptr track; From bd601409e9c501da1802a38769a5c453b286bf94 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 4 Mar 2024 17:56:19 +0900 Subject: [PATCH 16/34] =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.py b/run.py index 4070f95..c911101 100644 --- a/run.py +++ b/run.py @@ -489,7 +489,7 @@ def install_protobuf(version, source_dir, install_dir, platform: str): extract(path, install_dir, "protobuf") # なぜか実行属性が消えてるので入れてやる for file in os.scandir(os.path.join(install_dir, "protobuf", "bin")): - if file.is_file: + if file.is_file(): os.chmod(file.path, file.stat().st_mode | stat.S_IXUSR) From 72bbfaa51c7075e39bdf97a6c1a80689aa8aad3b Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 4 Mar 2024 20:48:34 +0900 Subject: [PATCH 17/34] bps -> kbps --- src/aom_av1_video_encoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index 061bec3..ce7f706 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -78,7 +78,7 @@ class AomAv1VideoEncoder : public VideoEncoder { cfg_.g_threads = 8; cfg_.g_timebase.num = 1; cfg_.g_timebase.den = 90000; - cfg_.rc_target_bitrate = settings.bitrate.count(); + cfg_.rc_target_bitrate = bitrate_cast(settings.bitrate).count(); cfg_.rc_dropframe_thresh = 0; cfg_.g_input_bit_depth = 8; cfg_.kf_mode = AOM_KF_DISABLED; From 8092f275c4aaa249d83506b66c44e2c9a66cef6d Mon Sep 17 00:00:00 2001 From: melpon Date: Tue, 5 Mar 2024 00:11:05 +0900 Subject: [PATCH 18/34] =?UTF-8?q?libdatachannel=20=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index d44bb53..53769aa 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=ad12f68b6926c679a7cd97f41c4adc0a647db77a +LIBDATACHANNEL_VERSION=4e7e306be62c643fdc0cd9fd864975adb58e26ec OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 From c5b33c5f2510d717c006398053e94594d81aa932 Mon Sep 17 00:00:00 2001 From: melpon Date: Tue, 5 Mar 2024 00:59:26 +0900 Subject: [PATCH 19/34] =?UTF-8?q?libdatachannel=20=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 53769aa..1069962 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=4e7e306be62c643fdc0cd9fd864975adb58e26ec +LIBDATACHANNEL_VERSION=2247a7245d3a5646f826bcc6da055df6337ec2c9 OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 From ab1ab2806008e1b1244e7d11f3758c86a3ddccc8 Mon Sep 17 00:00:00 2001 From: melpon Date: Tue, 5 Mar 2024 13:36:11 +0900 Subject: [PATCH 20/34] =?UTF-8?q?libdatachannel=20=E3=81=AB=20-Werror=20?= =?UTF-8?q?=E3=81=8C=E4=BB=98=E3=81=8F=E3=82=88=E3=81=86=E3=81=AB=E3=81=99?= =?UTF-8?q?=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 ++ VERSION | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c889e1b..dd0cfe8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ cmake_policy(SET CMP0074 NEW) project(sorac C CXX) +set(WARNINGS_AS_ERRORS ON) + add_subdirectory(_source/libdatachannel libdatachannel EXCLUDE_FROM_ALL) add_subdirectory(_source/libdatachannel/deps/json json EXCLUDE_FROM_ALL) add_subdirectory(_source/opus opus EXCLUDE_FROM_ALL) diff --git a/VERSION b/VERSION index 1069962..cd02606 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=2247a7245d3a5646f826bcc6da055df6337ec2c9 +LIBDATACHANNEL_VERSION=cadffd51da2bb2bb2573f0526bfd7773bdcedf0b OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 From fc890578be18b1dfcc4aea5d9d0de90c501072d6 Mon Sep 17 00:00:00 2001 From: melpon Date: Wed, 10 Apr 2024 19:20:27 +0900 Subject: [PATCH 21/34] =?UTF-8?q?--simulcast-multicodec=20=E3=82=AA?= =?UTF-8?q?=E3=83=97=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/sumomo/option.c | 4 ++++ examples/sumomo/option.h | 1 + examples/sumomo/sumomo.c | 6 ++++++ proto/soracp.proto | 1 + src/signaling.cpp | 4 +++- 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index a86ee9c..3417f7a 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -11,6 +11,7 @@ static struct option long_opts[] = { {"signaling-url", required_argument, 0, 0}, {"channel-id", required_argument, 0, 0}, {"simulcast", required_argument, 0, 0}, + {"simulcast-multicodec", required_argument, 0, 0}, {"video-codec-type", required_argument, 0, 0}, {"video-bit-rate", required_argument, 0, 0}, {"metadata", required_argument, 0, 0}, @@ -90,6 +91,8 @@ int sumomo_option_parse(SumomoOption* option, option->channel_id = optarg; } else if (OPT_IS("simulcast")) { SET_OPTBOOL(option->simulcast); + } else if (OPT_IS("simulcast-multicodec")) { + SET_OPTBOOL(option->simulcast_multicodec); } else if (OPT_IS("video-codec-type")) { if (strcmp(optarg, "H264") == 0) { option->video_codec_type = optarg; @@ -187,6 +190,7 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --signaling-url=URL [required]\n"); fprintf(stdout, " --channel-id=ID [required]\n"); fprintf(stdout, " --simulcast=true,false,none\n"); + fprintf(stdout, " --simulcast-multicodec=true,false,none\n"); fprintf(stdout, " --video-codec-type=H264,H265\n"); fprintf(stdout, " --video-bit-rate=0-15000 [kbps]\n"); fprintf(stdout, " --metadata=JSON\n"); diff --git a/examples/sumomo/option.h b/examples/sumomo/option.h index 8080ddc..9593569 100644 --- a/examples/sumomo/option.h +++ b/examples/sumomo/option.h @@ -25,6 +25,7 @@ typedef struct SumomoOption { int signaling_url_len; const char* channel_id; SumomoOptionalBool simulcast; + SumomoOptionalBool simulcast_multicodec; const char* video_codec_type; int video_bit_rate; const char* metadata; diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index d0137c2..1504d03 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -239,6 +239,12 @@ int main(int argc, char* argv[]) { : opt.simulcast == SUMOMO_OPTIONAL_BOOL_FALSE ? soracp_OPTIONAL_BOOL_FALSE : soracp_OPTIONAL_BOOL_TRUE); + soracp_SoraConnectConfig_set_simulcast_multicodec( + &sora_config, opt.simulcast_multicodec == SUMOMO_OPTIONAL_BOOL_NONE + ? soracp_OPTIONAL_BOOL_NONE + : opt.simulcast_multicodec == SUMOMO_OPTIONAL_BOOL_FALSE + ? soracp_OPTIONAL_BOOL_FALSE + : soracp_OPTIONAL_BOOL_TRUE); soracp_SoraConnectConfig_alloc_data_channels(&sora_config, 1); soracp_DataChannel_set_label(&dc, "#test"); diff --git a/proto/soracp.proto b/proto/soracp.proto index 57b81ed..f870161 100644 --- a/proto/soracp.proto +++ b/proto/soracp.proto @@ -75,6 +75,7 @@ message SoraConnectConfig { string spotlight_focus_rid = 12; string spotlight_unfocus_rid = 13; OptionalBool simulcast = 15; + OptionalBool simulcast_multicodec = 150; string simulcast_rid = 16; bool video = 20; bool audio = 21; diff --git a/src/signaling.cpp b/src/signaling.cpp index 1d742ad..ec8c75b 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -166,7 +166,8 @@ class SignalingImpl : public Signaling { if (rtp_config->timestampToSeconds(report_elapsed_timestamp) > 0.2) { sender->setNeedsToReport(); } - if (image.dependency_descriptor_context != nullptr && dependency_descriptor_id_ != 0) { + if (image.dependency_descriptor_context != nullptr && + dependency_descriptor_id_ != 0) { rtp_config->dependencyDescriptorId = dependency_descriptor_id_; rtp_config->dependencyDescriptorContext = *std::static_pointer_cast< rtc::RtpPacketizationConfig::DependencyDescriptorContext>( @@ -848,6 +849,7 @@ class SignalingImpl : public Signaling { set_string(js, "bundle_id", sc.bundle_id); set_optional_bool(js, "multistream", sc.multistream); set_optional_bool(js, "simulcast", sc.simulcast); + set_optional_bool(js, "simulcast_multicodec", sc.simulcast_multicodec); set_string(js, "simulcast_rid", sc.simulcast_rid); set_optional_bool(js, "spotlight", sc.spotlight); set_if(js, "spotlight_number", sc.spotlight_number, From 9bd628859d9c4688280ec8e39db4c95a69cc42e0 Mon Sep 17 00:00:00 2001 From: melpon Date: Wed, 10 Apr 2024 19:43:57 +0900 Subject: [PATCH 22/34] =?UTF-8?q?proto=20=E3=81=AE=20OptionalBool=20?= =?UTF-8?q?=E3=82=92=20optional=20bool=20=E3=81=AB=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/sumomo/sumomo.c | 26 ++++++++++---------------- proto/soracp.proto | 23 ++++++++--------------- src/signaling.cpp | 22 +++++++++++++--------- 3 files changed, 31 insertions(+), 40 deletions(-) diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index 1504d03..1ab9b0a 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -229,22 +229,16 @@ int main(int argc, char* argv[]) { soracp_SoraConnectConfig_set_audio(&sora_config, opt.audio != SUMOMO_OPTIONAL_BOOL_FALSE); - soracp_SoraConnectConfig_set_multistream(&sora_config, - soracp_OPTIONAL_BOOL_TRUE); - soracp_SoraConnectConfig_set_data_channel_signaling( - &sora_config, soracp_OPTIONAL_BOOL_TRUE); - soracp_SoraConnectConfig_set_simulcast( - &sora_config, opt.simulcast == SUMOMO_OPTIONAL_BOOL_NONE - ? soracp_OPTIONAL_BOOL_NONE - : opt.simulcast == SUMOMO_OPTIONAL_BOOL_FALSE - ? soracp_OPTIONAL_BOOL_FALSE - : soracp_OPTIONAL_BOOL_TRUE); - soracp_SoraConnectConfig_set_simulcast_multicodec( - &sora_config, opt.simulcast_multicodec == SUMOMO_OPTIONAL_BOOL_NONE - ? soracp_OPTIONAL_BOOL_NONE - : opt.simulcast_multicodec == SUMOMO_OPTIONAL_BOOL_FALSE - ? soracp_OPTIONAL_BOOL_FALSE - : soracp_OPTIONAL_BOOL_TRUE); + soracp_SoraConnectConfig_set_multistream(&sora_config, true); + soracp_SoraConnectConfig_set_data_channel_signaling(&sora_config, true); + if (opt.simulcast != SUMOMO_OPTIONAL_BOOL_NONE) { + soracp_SoraConnectConfig_set_simulcast( + &sora_config, opt.simulcast == SUMOMO_OPTIONAL_BOOL_TRUE); + } + if (opt.simulcast_multicodec != SUMOMO_OPTIONAL_BOOL_NONE) { + soracp_SoraConnectConfig_set_simulcast_multicodec( + &sora_config, opt.simulcast_multicodec == SUMOMO_OPTIONAL_BOOL_TRUE); + } soracp_SoraConnectConfig_alloc_data_channels(&sora_config, 1); soracp_DataChannel_set_label(&dc, "#test"); diff --git a/proto/soracp.proto b/proto/soracp.proto index f870161..fdce53b 100644 --- a/proto/soracp.proto +++ b/proto/soracp.proto @@ -2,12 +2,6 @@ syntax = "proto3"; package soracp; -enum OptionalBool { - OPTIONAL_BOOL_NONE = 0; - OPTIONAL_BOOL_TRUE = 1; - OPTIONAL_BOOL_FALSE = 2; -} - enum H264EncoderType { H264_ENCODER_TYPE_OPEN_H264 = 0; H264_ENCODER_TYPE_VIDEO_TOOLBOX = 1; @@ -28,12 +22,11 @@ message DataChannel { string direction = 2; // optional - OptionalBool ordered = 4; - + optional bool ordered = 4; optional int32 max_packet_life_time = 6; optional int32 max_retransmits = 8; optional string protocol = 10; - OptionalBool compress = 12; + optional bool compress = 12; } message ForwardingFilter { @@ -69,13 +62,13 @@ message SoraConnectConfig { string client_id = 4; string metadata = 5; string role = 6; - OptionalBool multistream = 8; - OptionalBool spotlight = 10; + optional bool multistream = 8; + optional bool spotlight = 10; int32 spotlight_number = 11; string spotlight_focus_rid = 12; string spotlight_unfocus_rid = 13; - OptionalBool simulcast = 15; - OptionalBool simulcast_multicodec = 150; + optional bool simulcast = 15; + optional bool simulcast_multicodec = 150; string simulcast_rid = 16; bool video = 20; bool audio = 21; @@ -86,8 +79,8 @@ message SoraConnectConfig { int32 video_bit_rate = 26; string audio_codec_type = 31; int32 audio_bit_rate = 34; - OptionalBool data_channel_signaling = 36; - OptionalBool ignore_disconnect_websocket = 39; + optional bool data_channel_signaling = 36; + optional bool ignore_disconnect_websocket = 39; repeated DataChannel data_channels = 41; string bundle_id = 43; string audio_streaming_language_code = 48; diff --git a/src/signaling.cpp b/src/signaling.cpp index ec8c75b..794014b 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -832,9 +832,9 @@ class SignalingImpl : public Signaling { } }; auto set_optional_bool = [](nlohmann::json& js, const std::string& key, - soracp::OptionalBool value) { - if (value != soracp::OPTIONAL_BOOL_NONE) { - js[key] = value == soracp::OPTIONAL_BOOL_TRUE ? true : false; + bool has_value, bool value) { + if (has_value) { + js[key] = value; } }; auto set_json = [](nlohmann::json& js, const std::string& key, @@ -847,11 +847,12 @@ class SignalingImpl : public Signaling { set_if(js, "redirect", true, redirect); set_string(js, "client_id", sc.client_id); set_string(js, "bundle_id", sc.bundle_id); - set_optional_bool(js, "multistream", sc.multistream); - set_optional_bool(js, "simulcast", sc.simulcast); - set_optional_bool(js, "simulcast_multicodec", sc.simulcast_multicodec); + set_optional_bool(js, "multistream", sc.has_multistream(), sc.multistream); + set_optional_bool(js, "simulcast", sc.has_simulcast(), sc.simulcast); + set_optional_bool(js, "simulcast_multicodec", sc.has_simulcast_multicodec(), + sc.simulcast_multicodec); set_string(js, "simulcast_rid", sc.simulcast_rid); - set_optional_bool(js, "spotlight", sc.spotlight); + set_optional_bool(js, "spotlight", sc.has_spotlight(), sc.spotlight); set_if(js, "spotlight_number", sc.spotlight_number, sc.spotlight_number > 0); set_string(js, "spotlight_focus_rid", sc.spotlight_focus_rid); @@ -892,8 +893,11 @@ class SignalingImpl : public Signaling { set_string(js, "audio_streaming_language_code", sc.audio_streaming_language_code); - set_optional_bool(js, "data_channel_signaling", sc.data_channel_signaling); + set_optional_bool(js, "data_channel_signaling", + sc.has_data_channel_signaling(), + sc.data_channel_signaling); set_optional_bool(js, "ignore_disconnect_websocket", + sc.has_ignore_disconnect_websocket(), sc.ignore_disconnect_websocket); for (const auto& d : sc.data_channels) { @@ -904,7 +908,7 @@ class SignalingImpl : public Signaling { d.has_max_packet_life_time()); set_if(dc, "max_retransmits", d.max_retransmits, d.has_max_retransmits()); set_if(dc, "protocol", d.protocol, d.has_protocol()); - set_optional_bool(dc, "compress", d.compress); + set_optional_bool(dc, "compress", d.has_compress(), d.compress); js["data_channels"].push_back(dc); } From 288d4864a041f4b66b515388cad106323f204d93 Mon Sep 17 00:00:00 2001 From: melpon Date: Wed, 10 Apr 2024 20:57:35 +0900 Subject: [PATCH 23/34] =?UTF-8?q?libdatachannel=20=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index cd02606..09ee23e 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=cadffd51da2bb2bb2573f0526bfd7773bdcedf0b +LIBDATACHANNEL_VERSION=518c0ec9dbe5f77dfd700c219b560a8bfd279441 OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 From 02bfd0f013c4d70faf90b560180055bcdb80a5d6 Mon Sep 17 00:00:00 2001 From: melpon Date: Wed, 10 Apr 2024 21:17:42 +0900 Subject: [PATCH 24/34] =?UTF-8?q?WARNINGS=5FAS=5FERRORS=20=E3=82=92?= =?UTF-8?q?=E3=82=84=E3=82=81=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dd0cfe8..c889e1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,6 @@ cmake_policy(SET CMP0074 NEW) project(sorac C CXX) -set(WARNINGS_AS_ERRORS ON) - add_subdirectory(_source/libdatachannel libdatachannel EXCLUDE_FROM_ALL) add_subdirectory(_source/libdatachannel/deps/json json EXCLUDE_FROM_ALL) add_subdirectory(_source/opus opus EXCLUDE_FROM_ALL) From a22b68c5e76de370e02dbd1904a1d5fff7fe3b76 Mon Sep 17 00:00:00 2001 From: melpon Date: Thu, 11 Apr 2024 02:05:10 +0900 Subject: [PATCH 25/34] =?UTF-8?q?=E3=81=A1=E3=82=83=E3=82=93=E3=81=A8?= =?UTF-8?q?=E3=83=93=E3=83=83=E3=83=88=E3=83=AC=E3=83=BC=E3=83=88=E3=81=A8?= =?UTF-8?q?=E3=83=95=E3=83=AC=E3=83=BC=E3=83=A0=E3=83=AC=E3=83=BC=E3=83=88?= =?UTF-8?q?=E3=82=92=E8=80=83=E6=85=AE=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 1 + examples/sumomo/fake_capturer.cpp | 22 ++--- examples/sumomo/fake_capturer.h | 4 +- examples/sumomo/mac_capturer.h | 3 +- examples/sumomo/mac_capturer.mm | 15 ++-- examples/sumomo/option.c | 5 ++ examples/sumomo/option.h | 1 + examples/sumomo/sumomo.c | 10 +-- examples/sumomo/v4l2_capturer.cpp | 31 +++++-- examples/sumomo/v4l2_capturer.h | 3 +- include/sorac/default_encoder_adapter.hpp | 20 +++++ include/sorac/video_encoder.hpp | 1 + proto/soracp.proto | 1 - src/aom_av1_video_encoder.cpp | 5 +- src/default_encoder_adapter.cpp | 98 +++++++++++++++++++++++ src/open_h264_video_encoder.cpp | 2 +- src/signaling.cpp | 26 +++++- src/simulcast_encoder_adapter.cpp | 15 +++- src/vt_h26x_video_encoder.cpp | 17 +++- 19 files changed, 243 insertions(+), 37 deletions(-) create mode 100644 include/sorac/default_encoder_adapter.hpp create mode 100644 src/default_encoder_adapter.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c889e1b..048d638 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,6 +95,7 @@ target_sources(sorac src/aom_av1_video_encoder.cpp src/current_time.cpp src/data_channel.cpp + src/default_encoder_adapter.cpp src/open_h264_video_encoder.cpp src/opus_audio_encoder.cpp src/signaling.cpp diff --git a/examples/sumomo/fake_capturer.cpp b/examples/sumomo/fake_capturer.cpp index c63c956..1acefef 100644 --- a/examples/sumomo/fake_capturer.cpp +++ b/examples/sumomo/fake_capturer.cpp @@ -13,7 +13,8 @@ namespace sumomo { class FakeCapturer : public SumomoCapturer { public: - FakeCapturer() { + FakeCapturer(int width, int height, int fps) + : width_(width), height_(height), fps_(fps) { this->destroy = [](SumomoCapturer* p) { delete (sumomo::FakeCapturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -42,19 +43,19 @@ class FakeCapturer : public SumomoCapturer { engine_ = std::make_unique(seed_gen()); return nullptr; }); - th_.Start(30, [this](std::chrono::microseconds timestamp, - std::chrono::microseconds prev) { - std::uniform_int_distribution dist(0, 640 * 480 - 1); + th_.Start(fps_, [this](std::chrono::microseconds timestamp, + std::chrono::microseconds prev) { + std::uniform_int_distribution dist(0, width_ * height_ - 1); sorac::VideoFrame frame; frame.timestamp = timestamp; - frame.i420_buffer = sorac::VideoFrameBufferI420::Create(640, 480); + frame.i420_buffer = sorac::VideoFrameBufferI420::Create(width_, height_); frame.i420_buffer->y[dist(*engine_)] = 0xff; frame.i420_buffer->y[dist(*engine_)] = 0xff; frame.i420_buffer->y[dist(*engine_)] = 0xff; frame.i420_buffer->y[dist(*engine_)] = 0xff; frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.base_width = 640; - frame.base_height = 480; + frame.base_width = width_; + frame.base_height = height_; callback_(frame); }); return 0; @@ -62,6 +63,9 @@ class FakeCapturer : public SumomoCapturer { void Stop() { th_.Stop(); } private: + int width_; + int height_; + int fps_; std::function callback_; SteadyFrameThread th_; std::unique_ptr engine_; @@ -71,7 +75,7 @@ class FakeCapturer : public SumomoCapturer { extern "C" { -SumomoCapturer* sumomo_fake_capturer_create() { - return new sumomo::FakeCapturer(); +SumomoCapturer* sumomo_fake_capturer_create(int width, int height, int fps) { + return new sumomo::FakeCapturer(width, height, fps); } } diff --git a/examples/sumomo/fake_capturer.h b/examples/sumomo/fake_capturer.h index 31e6421..8dc4f5c 100644 --- a/examples/sumomo/fake_capturer.h +++ b/examples/sumomo/fake_capturer.h @@ -9,7 +9,9 @@ extern "C" { #endif -extern SumomoCapturer* sumomo_fake_capturer_create(); +extern SumomoCapturer* sumomo_fake_capturer_create(int width, + int height, + int fps); #ifdef __cplusplus } diff --git a/examples/sumomo/mac_capturer.h b/examples/sumomo/mac_capturer.h index f9e6da6..335e828 100644 --- a/examples/sumomo/mac_capturer.h +++ b/examples/sumomo/mac_capturer.h @@ -11,7 +11,8 @@ extern "C" { extern SumomoCapturer* sumomo_mac_capturer_create(const char* device, int width, - int height); + int height, + int fps); #ifdef __cplusplus } diff --git a/examples/sumomo/mac_capturer.mm b/examples/sumomo/mac_capturer.mm index f820d4f..a17a264 100644 --- a/examples/sumomo/mac_capturer.mm +++ b/examples/sumomo/mac_capturer.mm @@ -368,10 +368,11 @@ - (dispatch_queue_t)frameQueue { class MacCapturer : public SumomoCapturer { public: - MacCapturer(const char* device, int width, int height) { + MacCapturer(const char* device, int width, int height, int fps) { this->device_ = device; this->width_ = width; this->height_ = height; + this->fps_ = fps; this->destroy = [](SumomoCapturer* p) { delete (sumomo::MacCapturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -385,7 +386,7 @@ - (dispatch_queue_t)frameQueue { }; this->start = [](SumomoCapturer* p) { auto q = (sumomo::MacCapturer*)p; - return q->Start(q->device_.c_str(), q->width_, q->height_); + return q->Start(q->device_.c_str(), q->width_, q->height_, q->fps_); }; this->stop = [](SumomoCapturer* p) { ((sumomo::MacCapturer*)p)->Stop(); }; } @@ -396,14 +397,14 @@ void SetFrameCallback( callback_ = callback; } - int Start(const char* device, int width, int height) { + int Start(const char* device, int width, int height, int fps) { Stop(); capturer_ = [[SumomoMacCapturer alloc] initWithCallback:callback_]; [capturer_ startCaptureWithDeviceName:device width:width height:height - fps:30 + fps:fps completionHandler:[](NSError* _Nullable error) { if (error) { fprintf(stderr, "Failed to start capture: %s\n", @@ -422,6 +423,7 @@ void Stop() { std::function callback_; int width_; int height_; + int fps_; SumomoMacCapturer* capturer_; }; @@ -432,7 +434,8 @@ void Stop() { SumomoCapturer* sumomo_mac_capturer_create(const char* device, int width, - int height) { - return new sumomo::MacCapturer(device, width, height); + int height, + int fps) { + return new sumomo::MacCapturer(device, width, height, fps); } } diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index 3417f7a..75a6487 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -22,6 +22,7 @@ static struct option long_opts[] = { {"capture-device-name", required_argument, 0, 0}, {"capture-device-width", required_argument, 0, 0}, {"capture-device-height", required_argument, 0, 0}, + {"capture-device-fps", required_argument, 0, 0}, {"audio-type", required_argument, 0, 0}, {"h264-encoder-type", required_argument, 0, 0}, {"h265-encoder-type", required_argument, 0, 0}, @@ -51,6 +52,7 @@ int sumomo_option_parse(SumomoOption* option, #endif option->capture_device_width = 640; option->capture_device_height = 480; + option->capture_device_fps = 30; option->audio_type = SUMOMO_OPTION_AUDIO_TYPE_FAKE; option->video_codec_type = "H264"; option->cacert = "/etc/ssl/certs/ca-certificates.crt"; @@ -134,6 +136,8 @@ int sumomo_option_parse(SumomoOption* option, option->capture_device_width = atoi(optarg); } else if (OPT_IS("capture-device-height")) { option->capture_device_height = atoi(optarg); + } else if (OPT_IS("capture-device-fps")) { + option->capture_device_fps = atoi(optarg); } else if (OPT_IS("audio-type")) { if (strcmp(optarg, "fake") == 0) { option->audio_type = SUMOMO_OPTION_AUDIO_TYPE_FAKE; @@ -200,6 +204,7 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --capture-device-name=NAME\n"); fprintf(stdout, " --capture-device-width=WIDTH\n"); fprintf(stdout, " --capture-device-height=HEIGHT\n"); + fprintf(stdout, " --capture-device-fps=FPS\n"); fprintf(stdout, " --audio-type=fake,pulse,macos\n"); fprintf(stdout, " --h264-encoder-type=openh264,videotoolbox\n"); fprintf(stdout, " --h265-encoder-type=videotoolbox\n"); diff --git a/examples/sumomo/option.h b/examples/sumomo/option.h index 9593569..666fb78 100644 --- a/examples/sumomo/option.h +++ b/examples/sumomo/option.h @@ -36,6 +36,7 @@ typedef struct SumomoOption { const char* capture_device_name; int capture_device_width; int capture_device_height; + int capture_device_fps; SumomoOptionAudioType audio_type; soracp_H264EncoderType h264_encoder_type; soracp_H265EncoderType h265_encoder_type; diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index 1ab9b0a..418d911 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -81,7 +81,7 @@ void on_track(SoracTrack* track, void* userdata) { #if defined(__linux__) state->capturer = sumomo_v4l2_capturer_create( state->opt->capture_device_name, state->opt->capture_device_width, - state->opt->capture_device_height); + state->opt->capture_device_height, state->opt->capture_device_fps); #else fprintf(stderr, "V4L2 capturer cannot be used on environments other than Linux"); @@ -91,14 +91,16 @@ void on_track(SoracTrack* track, void* userdata) { #if defined(__APPLE__) state->capturer = sumomo_mac_capturer_create( state->opt->capture_device_name, state->opt->capture_device_width, - state->opt->capture_device_height); + state->opt->capture_device_height, state->opt->capture_device_fps); #else fprintf(stderr, "V4L2 capturer cannot be used on environments other than Linux"); exit(1); #endif } else { - state->capturer = sumomo_fake_capturer_create(); + state->capturer = sumomo_fake_capturer_create( + state->opt->capture_device_width, state->opt->capture_device_height, + state->opt->capture_device_fps); } sumomo_capturer_set_frame_callback(state->capturer, on_capture_frame, state); @@ -199,8 +201,6 @@ int main(int argc, char* argv[]) { soracp_SignalingConfig_set_h264_encoder_type(&config, opt.h264_encoder_type); soracp_SignalingConfig_set_h265_encoder_type(&config, opt.h265_encoder_type); soracp_SignalingConfig_set_av1_encoder_type(&config, opt.av1_encoder_type); - soracp_SignalingConfig_set_video_encoder_initial_bitrate_kbps( - &config, opt.video_bit_rate == 0 ? 500 : opt.video_bit_rate); SoracSignaling* signaling = sorac_signaling_create(&config); state.signaling = signaling; diff --git a/examples/sumomo/v4l2_capturer.cpp b/examples/sumomo/v4l2_capturer.cpp index 06a12dc..6fc1af9 100644 --- a/examples/sumomo/v4l2_capturer.cpp +++ b/examples/sumomo/v4l2_capturer.cpp @@ -32,10 +32,11 @@ namespace sumomo { class V4L2Capturer : public SumomoCapturer { public: - V4L2Capturer(const char* device, int width, int height) { + V4L2Capturer(const char* device, int width, int height, int fps) { this->device_ = device; this->width_ = width; this->height_ = height; + this->fps_ = fps; this->destroy = [](SumomoCapturer* p) { delete (sumomo::V4L2Capturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -49,7 +50,7 @@ class V4L2Capturer : public SumomoCapturer { }; this->start = [](SumomoCapturer* p) { auto q = (sumomo::V4L2Capturer*)p; - return q->Start(q->device_.c_str(), q->width_, q->height_); + return q->Start(q->device_.c_str(), q->width_, q->height_, q->fps_); }; this->stop = [](SumomoCapturer* p) { ((sumomo::V4L2Capturer*)p)->Stop(); }; } @@ -59,7 +60,7 @@ class V4L2Capturer : public SumomoCapturer { callback_ = callback; } - int Start(const char* device, int width, int height) { + int Start(const char* device, int width, int height, int fps) { Stop(); device_fd_ = open(device, O_RDWR | O_NONBLOCK, 0); @@ -102,6 +103,24 @@ class V4L2Capturer : public SumomoCapturer { width_ = fmt.fmt.pix.width; height_ = fmt.fmt.pix.height; + // フレームレートの設定 + struct v4l2_streamparm sp; + memset(&sp, 0, sizeof(sp)); + sp.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + if (ioctl(device_fd_, VIDIOC_G_PARM, &sp) < 0) { + fprintf(stderr, "Failed to VIDIOC_G_PARM: %s\n", strerror(errno)); + } else { + if ((sp.parm.capture.capability & V4L2_CAP_TIMEPERFRAME) != 0) { + memset(&sp, 0, sizeof(sp)); + sp.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + sp.parm.capture.timeperframe.numerator = 1; + sp.parm.capture.timeperframe.denominator = fps; + if (ioctl(device_fd_, VIDIOC_S_PARM, &sp) < 0) { + fprintf(stderr, "Failed to set the framerate: %s\n", strerror(errno)); + } + } + } + // ビデオバッファの設定 const int V4L2_BUFFER_COUNT = 4; { @@ -247,6 +266,7 @@ class V4L2Capturer : public SumomoCapturer { std::function callback_; int width_; int height_; + int fps_; int device_fd_ = -1; std::atomic quit_; @@ -265,7 +285,8 @@ extern "C" { SumomoCapturer* sumomo_v4l2_capturer_create(const char* device, int width, - int height) { - return new sumomo::V4L2Capturer(device, width, height); + int height, + int fps) { + return new sumomo::V4L2Capturer(device, width, height, fps); } } diff --git a/examples/sumomo/v4l2_capturer.h b/examples/sumomo/v4l2_capturer.h index f991721..85a4ae1 100644 --- a/examples/sumomo/v4l2_capturer.h +++ b/examples/sumomo/v4l2_capturer.h @@ -11,7 +11,8 @@ extern "C" { extern SumomoCapturer* sumomo_v4l2_capturer_create(const char* device, int width, - int height); + int height, + int fps); #ifdef __cplusplus } diff --git a/include/sorac/default_encoder_adapter.hpp b/include/sorac/default_encoder_adapter.hpp new file mode 100644 index 0000000..3635b68 --- /dev/null +++ b/include/sorac/default_encoder_adapter.hpp @@ -0,0 +1,20 @@ +#ifndef SORAC_DEFAULT_ENCODER_ADAPTER_HPP_ +#define SORAC_DEFAULT_ENCODER_ADAPTER_HPP_ + +#include +#include +#include +#include + +#include "soracp.json.h" +#include "types.hpp" +#include "video_encoder.hpp" + +namespace sorac { + +std::shared_ptr CreateDefaultEncoderAdapter( + std::shared_ptr encoder); + +} + +#endif diff --git a/include/sorac/video_encoder.hpp b/include/sorac/video_encoder.hpp index a55feab..f987d38 100644 --- a/include/sorac/video_encoder.hpp +++ b/include/sorac/video_encoder.hpp @@ -15,6 +15,7 @@ class VideoEncoder { int width; int height; Bps bitrate; + int fps; }; virtual ~VideoEncoder() {} diff --git a/proto/soracp.proto b/proto/soracp.proto index fdce53b..f9db82b 100644 --- a/proto/soracp.proto +++ b/proto/soracp.proto @@ -54,7 +54,6 @@ message SignalingConfig { string proxy_username = 45; string proxy_password = 46; string proxy_agent = 47; - int32 video_encoder_initial_bitrate_kbps = 4; } message SoraConnectConfig { diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index ce7f706..2a0517b 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -62,6 +62,8 @@ class AomAv1VideoEncoder : public VideoEncoder { PLOG_INFO << "AOM InitEncode"; + settings_ = settings; + // https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc // を参考に初期化やエンコードを行う @@ -214,7 +216,7 @@ class AomAv1VideoEncoder : public VideoEncoder { frame_for_encode_->stride[AOM_PLANE_V] = 0; } - const uint32_t duration = 90000 / 30; + const uint32_t duration = 90000 / settings_.fps; timestamp_ += duration; aom_enc_frame_flags_t flags = 0; @@ -333,6 +335,7 @@ class AomAv1VideoEncoder : public VideoEncoder { } private: + Settings settings_; bool init_ctx_ = false; aom_codec_ctx_t ctx_; aom_codec_enc_cfg_t cfg_; diff --git a/src/default_encoder_adapter.cpp b/src/default_encoder_adapter.cpp new file mode 100644 index 0000000..3bdc31a --- /dev/null +++ b/src/default_encoder_adapter.cpp @@ -0,0 +1,98 @@ +#include "sorac/default_encoder_adapter.hpp" + +#include +#include +#include + +// plog +#include + +#include "sorac/bitrate.hpp" + +namespace sorac { + +// FPS の計測区間 +static const std::chrono::seconds kFpsCalcInterval(2); + +// 全てのエンコーダに適用するアダプタ。 +// +// 機能ごとにアダプタを分けるのが面倒なので一緒にしてしまう。 +// 今のところ、以下の機能がある。 +// - エンコードする映像を 16 の倍数にアライメントする +// - FPS を計測して、指定した FPS を超えた場合はエンコードをスキップする +class DefaultEncoderAdapter : public VideoEncoder { + public: + DefaultEncoderAdapter(std::shared_ptr encoder) + : encoder_(encoder) {} + ~DefaultEncoderAdapter() override { Release(); } + + void ForceIntraNextFrame() override { encoder_->ForceIntraNextFrame(); } + + bool InitEncode(const Settings& settings) override { + Release(); + + // 16の倍数にアライメントする + settings_ = settings; + settings_.width = settings.width / 16 * 16; + settings_.height = settings.height / 16 * 16; + if (settings.width != settings_.width || + settings.height != settings_.height) { + PLOG_INFO << "InitEncode adjusted: width=" << settings_.width + << " height=" << settings_.height; + } + return encoder_->InitEncode(settings_); + } + + void SetEncodeCallback( + std::function callback) override { + encoder_->SetEncodeCallback(callback); + } + + void Encode(const VideoFrame& frame) override { + // フレームレートによってはエンコードをスキップする + auto now = std::chrono::steady_clock::now(); + if (!start_timestamp_) { + start_timestamp_ = now; + } else { + auto from = std::max(now - kFpsCalcInterval, *start_timestamp_); + // from 未満のフレームを削除する + encode_timestamps_.erase( + std::remove_if(encode_timestamps_.begin(), encode_timestamps_.end(), + [from](const auto& t) { return t < from; }), + encode_timestamps_.end()); + auto fps = + ((double)encode_timestamps_.size() * 1000000 / + std::chrono::duration_cast(now - from) + .count()); + if (fps > settings_.fps) { + return; + } + encode_timestamps_.push_back(now); + } + VideoFrame frame2 = frame; + if (frame2.i420_buffer != nullptr) { + frame2.i420_buffer->width = settings_.width; + frame2.i420_buffer->height = settings_.height; + } + if (frame2.nv12_buffer != nullptr) { + frame2.nv12_buffer->width = settings_.width; + frame2.nv12_buffer->height = settings_.height; + } + encoder_->Encode(frame2); + } + + void Release() override { encoder_->Release(); } + + private: + std::shared_ptr encoder_; + Settings settings_; + std::vector encode_timestamps_; + std::optional start_timestamp_; +}; + +std::shared_ptr CreateDefaultEncoderAdapter( + std::shared_ptr encoder) { + return std::make_shared(encoder); +} + +} // namespace sorac diff --git a/src/open_h264_video_encoder.cpp b/src/open_h264_video_encoder.cpp index 748cd7b..e112d79 100644 --- a/src/open_h264_video_encoder.cpp +++ b/src/open_h264_video_encoder.cpp @@ -53,7 +53,7 @@ class OpenH264VideoEncoder : public VideoEncoder { encoder_params.iMaxBitrate = UNSPECIFIED_BIT_RATE; // Rate Control mode encoder_params.iRCMode = RC_BITRATE_MODE; - encoder_params.fMaxFrameRate = 30; + encoder_params.fMaxFrameRate = settings.fps; // The following parameters are extension parameters (they're in SEncParamExt, // not in SEncParamBase). diff --git a/src/signaling.cpp b/src/signaling.cpp index 794014b..f95bef8 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -15,6 +15,7 @@ #include "sorac/aom_av1_video_encoder.hpp" #include "sorac/current_time.hpp" +#include "sorac/default_encoder_adapter.hpp" #include "sorac/open_h264_video_encoder.hpp" #include "sorac/opus_audio_encoder.hpp" #include "sorac/simulcast_encoder_adapter.hpp" @@ -145,7 +146,8 @@ class SignalingImpl : public Signaling { VideoEncoder::Settings settings; settings.width = frame.base_width; settings.height = frame.base_height; - settings.bitrate = Kbps(config_.video_encoder_initial_bitrate_kbps); + settings.bitrate = default_bitrate_; + settings.fps = 30; if (!client_.video_encoder->InitEncode(settings)) { PLOG_ERROR << "Failed to InitEncode()"; return; @@ -410,6 +412,19 @@ class SignalingImpl : public Signaling { auto cname = "cname-" + generate_random_string(24); auto msid = "msid-" + generate_random_string(24); auto track_id = "trackid-" + generate_random_string(24); + // ビットレート + default_bitrate_ = std::invoke([&]() { + auto it = std::find_if( + lines.begin(), lines.end(), + [](const std::string& s) { return starts_with(s, "b=TIAS:"); }); + if (it == lines.end()) { + throw std::runtime_error("b=TIAS: not found"); + } + auto ys = split_with(*it, ":"); + auto bitrate = Bps(std::stoi(ys[1])); + return bitrate; + }); + // video std::invoke([&]() { // m=video から他の m= が出てくるまでの間のデータを取得する @@ -666,9 +681,15 @@ class SignalingImpl : public Signaling { } return nullptr; }; + std::function(std::string)> + create_encoder2 = + [create_encoder]( + std::string codec) -> std::shared_ptr { + return CreateDefaultEncoderAdapter(create_encoder(codec)); + }; client_.video_encoder = - CreateSimulcastEncoderAdapter(rtp_params_, create_encoder); + CreateSimulcastEncoderAdapter(rtp_params_, create_encoder2); on_track_(track); }); @@ -962,6 +983,7 @@ class SignalingImpl : public Signaling { soracp::RtpParameters rtp_params_; int rtp_stream_id_ = 0; int dependency_descriptor_id_ = 0; + Bps default_bitrate_; int video_ssrc_ = 0; std::function)> on_track_; std::function)> on_data_channel_; diff --git a/src/simulcast_encoder_adapter.cpp b/src/simulcast_encoder_adapter.cpp index aaad528..f4ed721 100644 --- a/src/simulcast_encoder_adapter.cpp +++ b/src/simulcast_encoder_adapter.cpp @@ -104,7 +104,8 @@ class SimulcastEncoderAdapter : public VideoEncoder { PLOG_INFO << "InitEncode: width=" << settings.width << " height=" << settings.height - << " bitrate=" << settings.bitrate.count(); + << " bitrate=" << settings.bitrate.count() + << " fps=" << settings.fps; // 各サイズの最大ビットレートを計算して、その割合でビットレートを分配する Bps sum_bitrate; for (const auto& e : encoders_) { @@ -133,10 +134,20 @@ class SimulcastEncoderAdapter : public VideoEncoder { double rate = (double)GetMaxBitrate(s.width, s.height).count() / sum_bitrate.count(); s.bitrate = Bps((int64_t)(settings.bitrate.count() * rate)); + s.fps = settings.fps; + // 個別にビットレートやフレームレートが指定されていたら、その通りにする + if (e.encoding.has_max_bitrate_bps()) { + s.bitrate = Bps(e.encoding.max_bitrate_bps); + } + if (e.encoding.has_max_framerate()) { + s.fps = e.encoding.max_framerate; + } + e.encoder = create_encoder_(e.codec.name); PLOG_INFO << "InitEncode(Layerd): rid=" << e.encoding.rid << ", codec=" << e.codec.name << ", width=" << s.width - << ", height=" << s.height << ", bitrate=" << s.bitrate.count(); + << ", height=" << s.height << ", bitrate=" << s.bitrate.count() + << ", fps=" << s.fps; if (!e.encoder->InitEncode(s)) { return false; } diff --git a/src/vt_h26x_video_encoder.cpp b/src/vt_h26x_video_encoder.cpp index 79e5b9d..613c25b 100644 --- a/src/vt_h26x_video_encoder.cpp +++ b/src/vt_h26x_video_encoder.cpp @@ -51,8 +51,7 @@ class VTH26xVideoEncoder : public VideoEncoder { CFDictionaryRef encoder_specs = CFDictionaryCreate( nullptr, - (const void**) - &kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, + (const void**)&kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, (const void**)&kCFBooleanTrue, 1, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); Resource encoder_specs_resource( @@ -110,6 +109,20 @@ class VTH26xVideoEncoder : public VideoEncoder { } } + // フレームレート + { + int value = settings.fps; + CFNumberRef cfnum = + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value); + Resource cfnum_resource([cfnum]() { CFRelease(cfnum); }); + OSStatus err = VTSessionSetProperty( + vtref_, kVTCompressionPropertyKey_ExpectedFrameRate, cfnum); + if (err != noErr) { + PLOG_ERROR << "Failed to set expected-frame-rate property: err=" << err; + return false; + } + } + // キーフレーム間隔 (7200 フレームまたは 4 分間) { int value = 7200; From 76fee359897066db51d58bc5d992ad450863b9a2 Mon Sep 17 00:00:00 2001 From: melpon Date: Thu, 11 Apr 2024 13:13:22 +0900 Subject: [PATCH 26/34] =?UTF-8?q?NV12=20=E3=81=AE=20fake=20=E3=82=92?= =?UTF-8?q?=E5=88=A9=E7=94=A8=E5=8F=AF=E8=83=BD=E3=81=AB=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/sumomo/fake_capturer.cpp | 31 +++++++++++++++++++++---------- examples/sumomo/fake_capturer.h | 13 ++++++++++--- examples/sumomo/option.c | 10 ++++++---- examples/sumomo/option.h | 3 ++- examples/sumomo/sumomo.c | 5 ++++- 5 files changed, 43 insertions(+), 19 deletions(-) diff --git a/examples/sumomo/fake_capturer.cpp b/examples/sumomo/fake_capturer.cpp index 1acefef..fa659f9 100644 --- a/examples/sumomo/fake_capturer.cpp +++ b/examples/sumomo/fake_capturer.cpp @@ -13,8 +13,8 @@ namespace sumomo { class FakeCapturer : public SumomoCapturer { public: - FakeCapturer(int width, int height, int fps) - : width_(width), height_(height), fps_(fps) { + FakeCapturer(int width, int height, int fps, SumomoFakeCapturerFormat format) + : width_(width), height_(height), fps_(fps), format_(format) { this->destroy = [](SumomoCapturer* p) { delete (sumomo::FakeCapturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -48,12 +48,19 @@ class FakeCapturer : public SumomoCapturer { std::uniform_int_distribution dist(0, width_ * height_ - 1); sorac::VideoFrame frame; frame.timestamp = timestamp; - frame.i420_buffer = sorac::VideoFrameBufferI420::Create(width_, height_); - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; + if (format_ == SUMOMO_FAKE_CAPTURER_FORMAT_I420) { + frame.i420_buffer = + sorac::VideoFrameBufferI420::Create(width_, height_); + for (int i = 0; i < width_ / 100; i++) { + frame.i420_buffer->y[dist(*engine_)] = 0xff; + } + } else if (format_ == SUMOMO_FAKE_CAPTURER_FORMAT_NV12) { + frame.nv12_buffer = + sorac::VideoFrameBufferNV12::Create(width_, height_); + for (int i = 0; i < width_ / 100; i++) { + frame.nv12_buffer->y[dist(*engine_)] = 0xff; + } + } frame.base_width = width_; frame.base_height = height_; callback_(frame); @@ -66,6 +73,7 @@ class FakeCapturer : public SumomoCapturer { int width_; int height_; int fps_; + SumomoFakeCapturerFormat format_; std::function callback_; SteadyFrameThread th_; std::unique_ptr engine_; @@ -75,7 +83,10 @@ class FakeCapturer : public SumomoCapturer { extern "C" { -SumomoCapturer* sumomo_fake_capturer_create(int width, int height, int fps) { - return new sumomo::FakeCapturer(width, height, fps); +SumomoCapturer* sumomo_fake_capturer_create(int width, + int height, + int fps, + SumomoFakeCapturerFormat format) { + return new sumomo::FakeCapturer(width, height, fps, format); } } diff --git a/examples/sumomo/fake_capturer.h b/examples/sumomo/fake_capturer.h index 8dc4f5c..e95dcce 100644 --- a/examples/sumomo/fake_capturer.h +++ b/examples/sumomo/fake_capturer.h @@ -9,9 +9,16 @@ extern "C" { #endif -extern SumomoCapturer* sumomo_fake_capturer_create(int width, - int height, - int fps); +typedef enum { + SUMOMO_FAKE_CAPTURER_FORMAT_I420 = 0, + SUMOMO_FAKE_CAPTURER_FORMAT_NV12 = 1, +} SumomoFakeCapturerFormat; + +extern SumomoCapturer* sumomo_fake_capturer_create( + int width, + int height, + int fps, + SumomoFakeCapturerFormat format); #ifdef __cplusplus } diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index 75a6487..fe17320 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -44,7 +44,7 @@ int sumomo_option_parse(SumomoOption* option, } *error = 0; memset(option, 0, sizeof(SumomoOption)); - option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE; + option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420; #if defined(__linux__) option->capture_device_name = "/dev/video0"; #elif defined(__APPLE__) @@ -120,8 +120,10 @@ int sumomo_option_parse(SumomoOption* option, } else if (OPT_IS("audio")) { SET_OPTBOOL(option->audio); } else if (OPT_IS("capture-type")) { - if (strcmp(optarg, "fake") == 0) { - option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE; + if (strcmp(optarg, "fake-i420") == 0) { + option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420; + } else if (strcmp(optarg, "fake-nv12") == 0) { + option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE_NV12; } else if (strcmp(optarg, "v4l2") == 0) { option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_V4L2; } else if (strcmp(optarg, "mac") == 0) { @@ -200,7 +202,7 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --metadata=JSON\n"); fprintf(stdout, " --video=true,false,none\n"); fprintf(stdout, " --audio=true,false,none\n"); - fprintf(stdout, " --capture-type=fake,v4l2,mac\n"); + fprintf(stdout, " --capture-type=fake-i420,fake-nv12,v4l2,mac\n"); fprintf(stdout, " --capture-device-name=NAME\n"); fprintf(stdout, " --capture-device-width=WIDTH\n"); fprintf(stdout, " --capture-device-height=HEIGHT\n"); diff --git a/examples/sumomo/option.h b/examples/sumomo/option.h index 666fb78..ab628fc 100644 --- a/examples/sumomo/option.h +++ b/examples/sumomo/option.h @@ -10,7 +10,8 @@ typedef enum SumomoOptionalBool { } SumomoOptionalBool; typedef enum SumomoOptionCaptureType { - SUMOMO_OPTION_CAPTURE_TYPE_FAKE, + SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420, + SUMOMO_OPTION_CAPTURE_TYPE_FAKE_NV12, SUMOMO_OPTION_CAPTURE_TYPE_V4L2, SUMOMO_OPTION_CAPTURE_TYPE_MAC, } SumomoOptionCaptureType; diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index 418d911..f4921a3 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -100,7 +100,10 @@ void on_track(SoracTrack* track, void* userdata) { } else { state->capturer = sumomo_fake_capturer_create( state->opt->capture_device_width, state->opt->capture_device_height, - state->opt->capture_device_fps); + state->opt->capture_device_fps, + state->opt->capture_type == SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420 + ? SUMOMO_FAKE_CAPTURER_FORMAT_I420 + : SUMOMO_FAKE_CAPTURER_FORMAT_NV12); } sumomo_capturer_set_frame_callback(state->capturer, on_capture_frame, state); From c4d00e25c6a3608d8de92a15f0158c69bff8885c Mon Sep 17 00:00:00 2001 From: melpon Date: Thu, 11 Apr 2024 14:29:42 +0900 Subject: [PATCH 27/34] =?UTF-8?q?SDP=20=E3=81=AE=20profile-level-id=20?= =?UTF-8?q?=E3=82=92=E8=A6=8B=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=81=99?= =?UTF-8?q?=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 3 + NOTICE.md | 36 +++++++ include/sorac/h264_profile_level_id.hpp | 67 ++++++++++++ include/sorac/vt_h26x_video_encoder.hpp | 5 +- src/h264_profile_level_id.cpp | 133 ++++++++++++++++++++++++ src/signaling.cpp | 34 +++++- src/vt_h26x_video_encoder.cpp | 119 ++++++++++++++++++++- 7 files changed, 388 insertions(+), 9 deletions(-) create mode 100644 include/sorac/h264_profile_level_id.hpp create mode 100644 src/h264_profile_level_id.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 048d638..6acc468 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,7 @@ target_sources(sorac src/current_time.cpp src/data_channel.cpp src/default_encoder_adapter.cpp + src/h264_profile_level_id.cpp src/open_h264_video_encoder.cpp src/opus_audio_encoder.cpp src/signaling.cpp @@ -118,6 +119,8 @@ target_sources(sorac include/sorac/bitrate.hpp include/sorac/current_time.hpp include/sorac/data_channel.hpp + include/sorac/default_encoder_adapter.hpp + include/sorac/h264_profile_level_id.hpp include/sorac/open_h264_video_encoder.hpp include/sorac/opus_audio_encoder.hpp include/sorac/signaling.hpp diff --git a/NOTICE.md b/NOTICE.md index bca7505..7fe0396 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -362,3 +362,39 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ``` + +## WebRTC + +https://webrtc.googlesource.com/src/ + +``` +Copyright (c) 2011, The WebRTC project authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Google nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` diff --git a/include/sorac/h264_profile_level_id.hpp b/include/sorac/h264_profile_level_id.hpp new file mode 100644 index 0000000..3182dc2 --- /dev/null +++ b/include/sorac/h264_profile_level_id.hpp @@ -0,0 +1,67 @@ +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/video_codecs/h264_profile_level_id.h +// から必要な部分だけ抜き出して修正したもの。 + +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_VIDEO_CODECS_H264_PROFILE_LEVEL_ID_H_ +#define API_VIDEO_CODECS_H264_PROFILE_LEVEL_ID_H_ + +#include +#include + +namespace sorac { + +enum class H264Profile { + kProfileConstrainedBaseline, + kProfileBaseline, + kProfileMain, + kProfileConstrainedHigh, + kProfileHigh, + kProfilePredictiveHigh444, +}; + +// All values are equal to ten times the level number, except level 1b which is +// special. +enum class H264Level { + kLevel1_b = 0, + kLevel1 = 10, + kLevel1_1 = 11, + kLevel1_2 = 12, + kLevel1_3 = 13, + kLevel2 = 20, + kLevel2_1 = 21, + kLevel2_2 = 22, + kLevel3 = 30, + kLevel3_1 = 31, + kLevel3_2 = 32, + kLevel4 = 40, + kLevel4_1 = 41, + kLevel4_2 = 42, + kLevel5 = 50, + kLevel5_1 = 51, + kLevel5_2 = 52 +}; + +struct H264ProfileLevelId { + H264ProfileLevelId(H264Profile profile, H264Level level) + : profile(profile), level(level) {} + H264Profile profile; + H264Level level; +}; + +// Parse profile level id that is represented as a string of 3 hex bytes. +// Nothing will be returned if the string is not a recognized H264 +// profile level id. +std::optional ParseH264ProfileLevelId(const char* str); + +} // namespace sorac + +#endif // API_VIDEO_CODECS_H264_PROFILE_LEVEL_ID_H_ diff --git a/include/sorac/vt_h26x_video_encoder.hpp b/include/sorac/vt_h26x_video_encoder.hpp index 09d211a..72df488 100644 --- a/include/sorac/vt_h26x_video_encoder.hpp +++ b/include/sorac/vt_h26x_video_encoder.hpp @@ -2,8 +2,10 @@ #define SORAC_VT_H26X_VIDEO_ENCODER_HPP_ #include +#include #include +#include "h264_profile_level_id.hpp" #include "video_encoder.hpp" namespace sorac { @@ -14,7 +16,8 @@ enum class VTH26xVideoEncoderType { }; std::shared_ptr CreateVTH26xVideoEncoder( - VTH26xVideoEncoderType type); + VTH26xVideoEncoderType type, + std::optional profile); } // namespace sorac diff --git a/src/h264_profile_level_id.cpp b/src/h264_profile_level_id.cpp new file mode 100644 index 0000000..ac287a4 --- /dev/null +++ b/src/h264_profile_level_id.cpp @@ -0,0 +1,133 @@ +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/video_codecs/h264_profile_level_id.cpp +// から必要な部分だけ抜き出して修正したもの。 + +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "sorac/h264_profile_level_id.hpp" + +#include +#include +#include + +namespace sorac { + +namespace { + +// For level_idc=11 and profile_idc=0x42, 0x4D, or 0x58, the constraint set3 +// flag specifies if level 1b or level 1.1 is used. +const uint8_t kConstraintSet3Flag = 0x10; + +// Convert a string of 8 characters into a byte where the positions containing +// character c will have their bit set. For example, c = 'x', str = "x1xx0000" +// will return 0b10110000. constexpr is used so that the pattern table in +// kProfilePatterns is statically initialized. +constexpr uint8_t ByteMaskString(char c, const char (&str)[9]) { + return (str[0] == c) << 7 | (str[1] == c) << 6 | (str[2] == c) << 5 | + (str[3] == c) << 4 | (str[4] == c) << 3 | (str[5] == c) << 2 | + (str[6] == c) << 1 | (str[7] == c) << 0; +} + +// Class for matching bit patterns such as "x1xx0000" where 'x' is allowed to be +// either 0 or 1. +class BitPattern { + public: + explicit constexpr BitPattern(const char (&str)[9]) + : mask_(~ByteMaskString('x', str)), + masked_value_(ByteMaskString('1', str)) {} + + bool IsMatch(uint8_t value) const { return masked_value_ == (value & mask_); } + + private: + const uint8_t mask_; + const uint8_t masked_value_; +}; + +// Table for converting between profile_idc/profile_iop to H264Profile. +struct ProfilePattern { + const uint8_t profile_idc; + const BitPattern profile_iop; + const H264Profile profile; +}; + +// This is from https://tools.ietf.org/html/rfc6184#section-8.1. +constexpr ProfilePattern kProfilePatterns[] = { + {0x42, BitPattern("x1xx0000"), H264Profile::kProfileConstrainedBaseline}, + {0x4D, BitPattern("1xxx0000"), H264Profile::kProfileConstrainedBaseline}, + {0x58, BitPattern("11xx0000"), H264Profile::kProfileConstrainedBaseline}, + {0x42, BitPattern("x0xx0000"), H264Profile::kProfileBaseline}, + {0x58, BitPattern("10xx0000"), H264Profile::kProfileBaseline}, + {0x4D, BitPattern("0x0x0000"), H264Profile::kProfileMain}, + {0x64, BitPattern("00000000"), H264Profile::kProfileHigh}, + {0x64, BitPattern("00001100"), H264Profile::kProfileConstrainedHigh}, + {0xF4, BitPattern("00000000"), H264Profile::kProfilePredictiveHigh444}}; + +} // anonymous namespace + +std::optional ParseH264ProfileLevelId(const char* str) { + // The string should consist of 3 bytes in hexadecimal format. + if (strlen(str) != 6u) + return std::nullopt; + const uint32_t profile_level_id_numeric = strtol(str, nullptr, 16); + if (profile_level_id_numeric == 0) + return std::nullopt; + + // Separate into three bytes. + const uint8_t level_idc = + static_cast(profile_level_id_numeric & 0xFF); + const uint8_t profile_iop = + static_cast((profile_level_id_numeric >> 8) & 0xFF); + const uint8_t profile_idc = + static_cast((profile_level_id_numeric >> 16) & 0xFF); + + // Parse level based on level_idc and constraint set 3 flag. + H264Level level_casted = static_cast(level_idc); + H264Level level; + + switch (level_casted) { + case H264Level::kLevel1_1: + level = (profile_iop & kConstraintSet3Flag) != 0 ? H264Level::kLevel1_b + : H264Level::kLevel1_1; + break; + case H264Level::kLevel1: + case H264Level::kLevel1_2: + case H264Level::kLevel1_3: + case H264Level::kLevel2: + case H264Level::kLevel2_1: + case H264Level::kLevel2_2: + case H264Level::kLevel3: + case H264Level::kLevel3_1: + case H264Level::kLevel3_2: + case H264Level::kLevel4: + case H264Level::kLevel4_1: + case H264Level::kLevel4_2: + case H264Level::kLevel5: + case H264Level::kLevel5_1: + case H264Level::kLevel5_2: + level = level_casted; + break; + default: + // Unrecognized level_idc. + return std::nullopt; + } + + // Parse profile_idc/profile_iop into a Profile enum. + for (const ProfilePattern& pattern : kProfilePatterns) { + if (profile_idc == pattern.profile_idc && + pattern.profile_iop.IsMatch(profile_iop)) { + return H264ProfileLevelId(pattern.profile, level); + } + } + + // Unrecognized profile_idc/profile_iop combination. + return std::nullopt; +} + +} // namespace sorac \ No newline at end of file diff --git a/src/signaling.cpp b/src/signaling.cpp index f95bef8..126b650 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -440,6 +440,9 @@ class SignalingImpl : public Signaling { video_lines.assign(it, it2); } } + + std::optional h264_profile; + // mid, payload_type, codec for (const auto& line : video_lines) { if (auto s = std::string("a=mid:"); starts_with(line, s)) { @@ -460,6 +463,25 @@ class SignalingImpl : public Signaling { cp.name = codec; rtp_params_.codecs.push_back(cp); } + } else if (auto s = std::string("a=fmtp:"); starts_with(line, s)) { + // 直前の a=rtpmap が H264 だった場合、a=fmtp 行の profile-level-id を取得する + if (rtp_params_.codecs.empty() || + rtp_params_.codecs.back().name != "H264") { + continue; + } + auto fmtp = line.substr(s.size()); + auto ys = split_with(fmtp, " "); + auto params = split_with(ys[1], ";"); + for (const auto& param : params) { + auto zs = split_with(param, "="); + if (zs.size() != 2) { + continue; + } + if (zs[0] == "profile-level-id") { + h264_profile = ParseH264ProfileLevelId(zs[1].c_str()); + PLOG_DEBUG << "profile-level-id=" << zs[1]; + } + } } } // mid が空ということは vido=false なので何もしない @@ -637,7 +659,8 @@ class SignalingImpl : public Signaling { } track->setMediaHandler(simulcast_handler); - track->onOpen([this, wtrack = std::weak_ptr(track)]() { + track->onOpen([this, wtrack = std::weak_ptr(track), + h264_profile]() { PLOG_DEBUG << "Video Track Opened"; auto track = wtrack.lock(); if (track == nullptr) { @@ -646,7 +669,8 @@ class SignalingImpl : public Signaling { std::function(std::string)> create_encoder = - [this](std::string codec) -> std::shared_ptr { + [this, h264_profile]( + std::string codec) -> std::shared_ptr { if (codec == "H264") { if (config_.h264_encoder_type == soracp::H264_ENCODER_TYPE_OPEN_H264) { @@ -654,7 +678,8 @@ class SignalingImpl : public Signaling { } else if (config_.h264_encoder_type == soracp::H264_ENCODER_TYPE_VIDEO_TOOLBOX) { #if defined(__APPLE__) - return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH264); + return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH264, + h264_profile); #else PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; #endif @@ -665,7 +690,8 @@ class SignalingImpl : public Signaling { if (config_.h265_encoder_type == soracp::H265_ENCODER_TYPE_VIDEO_TOOLBOX) { #if defined(__APPLE__) - return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH265); + return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH265, + std::nullopt); #else PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; #endif diff --git a/src/vt_h26x_video_encoder.cpp b/src/vt_h26x_video_encoder.cpp index 613c25b..900756f 100644 --- a/src/vt_h26x_video_encoder.cpp +++ b/src/vt_h26x_video_encoder.cpp @@ -12,6 +12,112 @@ namespace sorac { +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/sdk/objc/components/video_codec/RTCVideoEncoderH264.mm +// より +// Extract VideoToolbox profile out of the webrtc::SdpVideoFormat. If there is +// no specific VideoToolbox profile for the specified level, AutoLevel will be +// returned. The user must initialize the encoder with a resolution and +// framerate conforming to the selected H264 level regardless. +CFStringRef ExtractProfile(const sorac::H264ProfileLevelId& profile_level_id) { + switch (profile_level_id.profile) { + case sorac::H264Profile::kProfileConstrainedBaseline: + case sorac::H264Profile::kProfileBaseline: + switch (profile_level_id.level) { + case sorac::H264Level::kLevel3: + return kVTProfileLevel_H264_Baseline_3_0; + case sorac::H264Level::kLevel3_1: + return kVTProfileLevel_H264_Baseline_3_1; + case sorac::H264Level::kLevel3_2: + return kVTProfileLevel_H264_Baseline_3_2; + case sorac::H264Level::kLevel4: + return kVTProfileLevel_H264_Baseline_4_0; + case sorac::H264Level::kLevel4_1: + return kVTProfileLevel_H264_Baseline_4_1; + case sorac::H264Level::kLevel4_2: + return kVTProfileLevel_H264_Baseline_4_2; + case sorac::H264Level::kLevel5: + return kVTProfileLevel_H264_Baseline_5_0; + case sorac::H264Level::kLevel5_1: + return kVTProfileLevel_H264_Baseline_5_1; + case sorac::H264Level::kLevel5_2: + return kVTProfileLevel_H264_Baseline_5_2; + case sorac::H264Level::kLevel1: + case sorac::H264Level::kLevel1_b: + case sorac::H264Level::kLevel1_1: + case sorac::H264Level::kLevel1_2: + case sorac::H264Level::kLevel1_3: + case sorac::H264Level::kLevel2: + case sorac::H264Level::kLevel2_1: + case sorac::H264Level::kLevel2_2: + return kVTProfileLevel_H264_Baseline_AutoLevel; + } + + case sorac::H264Profile::kProfileMain: + switch (profile_level_id.level) { + case sorac::H264Level::kLevel3: + return kVTProfileLevel_H264_Main_3_0; + case sorac::H264Level::kLevel3_1: + return kVTProfileLevel_H264_Main_3_1; + case sorac::H264Level::kLevel3_2: + return kVTProfileLevel_H264_Main_3_2; + case sorac::H264Level::kLevel4: + return kVTProfileLevel_H264_Main_4_0; + case sorac::H264Level::kLevel4_1: + return kVTProfileLevel_H264_Main_4_1; + case sorac::H264Level::kLevel4_2: + return kVTProfileLevel_H264_Main_4_2; + case sorac::H264Level::kLevel5: + return kVTProfileLevel_H264_Main_5_0; + case sorac::H264Level::kLevel5_1: + return kVTProfileLevel_H264_Main_5_1; + case sorac::H264Level::kLevel5_2: + return kVTProfileLevel_H264_Main_5_2; + case sorac::H264Level::kLevel1: + case sorac::H264Level::kLevel1_b: + case sorac::H264Level::kLevel1_1: + case sorac::H264Level::kLevel1_2: + case sorac::H264Level::kLevel1_3: + case sorac::H264Level::kLevel2: + case sorac::H264Level::kLevel2_1: + case sorac::H264Level::kLevel2_2: + return kVTProfileLevel_H264_Main_AutoLevel; + } + + case sorac::H264Profile::kProfileConstrainedHigh: + case sorac::H264Profile::kProfileHigh: + case sorac::H264Profile::kProfilePredictiveHigh444: + switch (profile_level_id.level) { + case sorac::H264Level::kLevel3: + return kVTProfileLevel_H264_High_3_0; + case sorac::H264Level::kLevel3_1: + return kVTProfileLevel_H264_High_3_1; + case sorac::H264Level::kLevel3_2: + return kVTProfileLevel_H264_High_3_2; + case sorac::H264Level::kLevel4: + return kVTProfileLevel_H264_High_4_0; + case sorac::H264Level::kLevel4_1: + return kVTProfileLevel_H264_High_4_1; + case sorac::H264Level::kLevel4_2: + return kVTProfileLevel_H264_High_4_2; + case sorac::H264Level::kLevel5: + return kVTProfileLevel_H264_High_5_0; + case sorac::H264Level::kLevel5_1: + return kVTProfileLevel_H264_High_5_1; + case sorac::H264Level::kLevel5_2: + return kVTProfileLevel_H264_High_5_2; + case sorac::H264Level::kLevel1: + case sorac::H264Level::kLevel1_b: + case sorac::H264Level::kLevel1_1: + case sorac::H264Level::kLevel1_2: + case sorac::H264Level::kLevel1_3: + case sorac::H264Level::kLevel2: + case sorac::H264Level::kLevel2_1: + case sorac::H264Level::kLevel2_2: + return kVTProfileLevel_H264_High_AutoLevel; + } + } +} + // デストラクタで指定した関数を呼ぶだけのクラス class Resource { public: @@ -24,7 +130,9 @@ class Resource { class VTH26xVideoEncoder : public VideoEncoder { public: - VTH26xVideoEncoder(VTH26xVideoEncoderType type) : type_(type) {} + VTH26xVideoEncoder(VTH26xVideoEncoderType type, + std::optional profile) + : type_(type), profile_(profile) {} ~VTH26xVideoEncoder() override { Release(); } void ForceIntraNextFrame() override { next_iframe_ = true; } @@ -80,7 +188,8 @@ class VTH26xVideoEncoder : public VideoEncoder { if (type_ == VTH26xVideoEncoderType::kH264) { if (OSStatus err = VTSessionSetProperty( vtref_, kVTCompressionPropertyKey_ProfileLevel, - kVTProfileLevel_H264_Baseline_3_1); + ExtractProfile(profile_.value_or(H264ProfileLevelId( + H264Profile::kProfileBaseline, H264Level::kLevel3_1)))); err != noErr) { PLOG_ERROR << "Failed to set profile-level property: err=" << err; return false; @@ -425,6 +534,7 @@ class VTH26xVideoEncoder : public VideoEncoder { }; VTH26xVideoEncoderType type_; + std::optional profile_; VTCompressionSessionRef vtref_ = nullptr; std::function callback_; @@ -433,8 +543,9 @@ class VTH26xVideoEncoder : public VideoEncoder { }; std::shared_ptr CreateVTH26xVideoEncoder( - VTH26xVideoEncoderType type) { - return std::make_shared(type); + VTH26xVideoEncoderType type, + std::optional profile) { + return std::make_shared(type, profile); } } // namespace sorac From 97eddd82151d0d0586f0a9e6fe5fa06d79102f4a Mon Sep 17 00:00:00 2001 From: voluntas Date: Thu, 11 Apr 2024 15:33:52 +0900 Subject: [PATCH 28/34] =?UTF-8?q?=E3=82=B5=E3=83=B3=E3=83=97=E3=83=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/FAQ.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/FAQ.md b/doc/FAQ.md index 801420f..24b44b2 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -80,6 +80,26 @@ libwebrtc ベースの [Sora C++ SDK](https://github.com/shiguredo/sora-cpp-sdk) --cacert cacert.pem ``` +### マルチコーデックサイマルキャスト + +マルチコーデックサイマルキャストを利用した配信を macOS arm64 で利用する例です。 + +```bash +./sumomo --signaling-url wss://sora.example.com/signaling \ + --channel-id sora \ + --simulcast true \ + --simulcast-multicodec true \ + --audio=true \ + --audio-type=macos \ + --capture-type mac \ + --capture-device-name=OBS \ + --video-codec-type=AV1 \ + --video-bit-rate 5000 \ + --h264-encoder-type videotoolbox \ + --aom `pwd`/_install/macos_arm64/release/aom/lib/libaom.dylib +~/shiguredo/sora-c-sdk +``` + ## Sumomo のヘルプ ```bash From 836b99b8d9e718b158419c1b88e2b91a04e9eccc Mon Sep 17 00:00:00 2001 From: melpon Date: Fri, 12 Apr 2024 00:29:17 +0900 Subject: [PATCH 29/34] =?UTF-8?q?=E3=82=B3=E3=83=B3=E3=83=91=E3=82=A4?= =?UTF-8?q?=E3=83=AB=E3=82=A8=E3=83=A9=E3=83=BC=E3=82=92=E7=9B=B4=E3=81=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/h264_profile_level_id.cpp | 3 ++- src/signaling.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/h264_profile_level_id.cpp b/src/h264_profile_level_id.cpp index ac287a4..a3c860f 100644 --- a/src/h264_profile_level_id.cpp +++ b/src/h264_profile_level_id.cpp @@ -15,6 +15,7 @@ #include #include +#include #include namespace sorac { @@ -73,7 +74,7 @@ constexpr ProfilePattern kProfilePatterns[] = { std::optional ParseH264ProfileLevelId(const char* str) { // The string should consist of 3 bytes in hexadecimal format. - if (strlen(str) != 6u) + if (std::strlen(str) != 6u) return std::nullopt; const uint32_t profile_level_id_numeric = strtol(str, nullptr, 16); if (profile_level_id_numeric == 0) diff --git a/src/signaling.cpp b/src/signaling.cpp index 126b650..4490295 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -16,6 +16,7 @@ #include "sorac/aom_av1_video_encoder.hpp" #include "sorac/current_time.hpp" #include "sorac/default_encoder_adapter.hpp" +#include "sorac/h264_profile_level_id.hpp" #include "sorac/open_h264_video_encoder.hpp" #include "sorac/opus_audio_encoder.hpp" #include "sorac/simulcast_encoder_adapter.hpp" @@ -441,7 +442,7 @@ class SignalingImpl : public Signaling { } } - std::optional h264_profile; + std::optional h264_profile; // mid, payload_type, codec for (const auto& line : video_lines) { From fc7741ec13d10766b252b9a1417d9903f08040bc Mon Sep 17 00:00:00 2001 From: melpon Date: Fri, 12 Apr 2024 08:10:07 +0900 Subject: [PATCH 30/34] =?UTF-8?q?=E5=88=9D=E5=9B=9E=E3=82=82=E3=81=A1?= =?UTF-8?q?=E3=82=83=E3=82=93=E3=81=A8=20push=5Fback=20=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/default_encoder_adapter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/default_encoder_adapter.cpp b/src/default_encoder_adapter.cpp index 3bdc31a..41ba9f2 100644 --- a/src/default_encoder_adapter.cpp +++ b/src/default_encoder_adapter.cpp @@ -67,8 +67,9 @@ class DefaultEncoderAdapter : public VideoEncoder { if (fps > settings_.fps) { return; } - encode_timestamps_.push_back(now); } + encode_timestamps_.push_back(now); + VideoFrame frame2 = frame; if (frame2.i420_buffer != nullptr) { frame2.i420_buffer->width = settings_.width; From b681e575356f56d89b0e5d8545163e4e3ee12e44 Mon Sep 17 00:00:00 2001 From: melpon Date: Sat, 20 Apr 2024 16:23:18 +0900 Subject: [PATCH 31/34] =?UTF-8?q?AV1=20=E3=81=AE=E3=82=B5=E3=82=A4?= =?UTF-8?q?=E3=83=9E=E3=83=AB=E3=82=AD=E3=83=A3=E3=82=B9=E3=83=88=E3=81=8C?= =?UTF-8?q?=E3=81=A1=E3=82=83=E3=82=93=E3=81=A8=E5=8B=95=E3=81=8F=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E3=81=AA=E3=81=A3=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/settings.json | 4 +++- examples/sumomo/fake_capturer.cpp | 6 ++++-- examples/sumomo/mac_capturer.mm | 3 +++ examples/sumomo/v4l2_capturer.cpp | 2 ++ include/sorac/types.hpp | 3 +++ src/aom_av1_video_encoder.cpp | 10 ++++++---- 6 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 627d8a5..f1161fd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -103,6 +103,8 @@ "recorder.h": "c", "__mutex_base": "cpp", "sorac.h": "c", - "soracp.json.c.h": "c" + "soracp.json.c.h": "c", + "complex": "cpp", + "cfenv": "cpp" } } \ No newline at end of file diff --git a/examples/sumomo/fake_capturer.cpp b/examples/sumomo/fake_capturer.cpp index fa659f9..99cd8b0 100644 --- a/examples/sumomo/fake_capturer.cpp +++ b/examples/sumomo/fake_capturer.cpp @@ -48,16 +48,17 @@ class FakeCapturer : public SumomoCapturer { std::uniform_int_distribution dist(0, width_ * height_ - 1); sorac::VideoFrame frame; frame.timestamp = timestamp; + frame.frame_number = ++frame_number_; if (format_ == SUMOMO_FAKE_CAPTURER_FORMAT_I420) { frame.i420_buffer = sorac::VideoFrameBufferI420::Create(width_, height_); - for (int i = 0; i < width_ / 100; i++) { + for (int i = 0; i < width_ / 10; i++) { frame.i420_buffer->y[dist(*engine_)] = 0xff; } } else if (format_ == SUMOMO_FAKE_CAPTURER_FORMAT_NV12) { frame.nv12_buffer = sorac::VideoFrameBufferNV12::Create(width_, height_); - for (int i = 0; i < width_ / 100; i++) { + for (int i = 0; i < width_ / 10; i++) { frame.nv12_buffer->y[dist(*engine_)] = 0xff; } } @@ -73,6 +74,7 @@ class FakeCapturer : public SumomoCapturer { int width_; int height_; int fps_; + int frame_number_ = 0; SumomoFakeCapturerFormat format_; std::function callback_; SteadyFrameThread th_; diff --git a/examples/sumomo/mac_capturer.mm b/examples/sumomo/mac_capturer.mm index a17a264..d931845 100644 --- a/examples/sumomo/mac_capturer.mm +++ b/examples/sumomo/mac_capturer.mm @@ -48,6 +48,7 @@ @implementation SumomoMacCapturer { std::function _callback; BOOL _willBeRunning; dispatch_queue_t _frameQueue; + int _frameNumber; } - (instancetype)initWithCallback: @@ -64,6 +65,7 @@ - (instancetype)initWithCallback: _videoDataOutput = [[AVCaptureVideoDataOutput alloc] init]; _willBeRunning = NO; _frameQueue = nil; + _frameNumber = 0; NSSet* supportedPixelFormats = [NSSet setWithObjects:@(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange), @@ -283,6 +285,7 @@ - (void)captureOutput:(AVCaptureOutput*)captureOutput (int64_t)(CMTimeGetSeconds( CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) * kMicrosecondsPerSecond)); + frame.frame_number = ++_frameNumber; frame.nv12_buffer = sorac::VideoFrameBufferNV12::Create(width, height); frame.base_width = width; frame.base_height = height; diff --git a/examples/sumomo/v4l2_capturer.cpp b/examples/sumomo/v4l2_capturer.cpp index 6fc1af9..2308e4c 100644 --- a/examples/sumomo/v4l2_capturer.cpp +++ b/examples/sumomo/v4l2_capturer.cpp @@ -229,6 +229,7 @@ class V4L2Capturer : public SumomoCapturer { frame.timestamp = sorac::get_current_time(); frame.base_width = width_; frame.base_height = height_; + frame.frame_number = ++frame_number_; callback_(frame); if (ioctl(device_fd_, VIDIOC_QBUF, &buf) < 0) { @@ -267,6 +268,7 @@ class V4L2Capturer : public SumomoCapturer { int width_; int height_; int fps_; + int frame_number_ = 0; int device_fd_ = -1; std::atomic quit_; diff --git a/include/sorac/types.hpp b/include/sorac/types.hpp index 163794e..87074e7 100644 --- a/include/sorac/types.hpp +++ b/include/sorac/types.hpp @@ -45,6 +45,9 @@ struct VideoFrame { int height() const { return i420_buffer != nullptr ? i420_buffer->height : nv12_buffer->height; } + // サイマルキャストで DD を利用する時にフレーム番号を全体で同じにする必要があるため + // ここにフレーム番号を持たせる + int frame_number; }; struct EncodedImage { diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp index 2a0517b..60f9bc9 100644 --- a/src/aom_av1_video_encoder.cpp +++ b/src/aom_av1_video_encoder.cpp @@ -172,8 +172,6 @@ class AomAv1VideoEncoder : public VideoEncoder { SET_PARAM(AV1E_SET_ENABLE_TX64, 0); SET_PARAM(AV1E_SET_MAX_REFERENCE_FRAMES, 3); - frame_number_ = 0; - return true; } @@ -272,17 +270,21 @@ class AomAv1VideoEncoder : public VideoEncoder { delta_frame_template.frameDiffs = {1}; ctx.structure.templates = {key_frame_template, delta_frame_template}; ctx.activeChains[0] = true; - ctx.descriptor.frameNumber = ++frame_number_; + ctx.descriptor.frameNumber = frame.frame_number; if (is_key_frame) { ctx.descriptor.dependencyTemplate = key_frame_template; } else { ctx.descriptor.dependencyTemplate = delta_frame_template; + ctx.descriptor.dependencyTemplate.frameDiffs = {frame.frame_number - + prev_frame_number_}; } ctx.descriptor.structureAttached = is_key_frame; encoded.dependency_descriptor_context = std::make_shared< rtc::RtpPacketizationConfig::DependencyDescriptorContext>(ctx); + prev_frame_number_ = frame.frame_number; + callback_(encoded); } @@ -341,7 +343,7 @@ class AomAv1VideoEncoder : public VideoEncoder { aom_codec_enc_cfg_t cfg_; aom_image_t* frame_for_encode_ = nullptr; int64_t timestamp_ = 0; - int frame_number_ = 0; + int prev_frame_number_ = 0; std::function callback_; From c977bac120882abf7008664ded4a1712fd674e79 Mon Sep 17 00:00:00 2001 From: melpon Date: Sat, 20 Apr 2024 16:42:02 +0900 Subject: [PATCH 32/34] =?UTF-8?q?video=5Fcodec=5Ftype=20=E3=81=AE=E3=83=87?= =?UTF-8?q?=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88=E3=82=92=E7=84=A1=E6=8C=87?= =?UTF-8?q?=E5=AE=9A=E3=81=AB=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/sumomo/option.c | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index fe17320..10eca3b 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -54,7 +54,6 @@ int sumomo_option_parse(SumomoOption* option, option->capture_device_height = 480; option->capture_device_fps = 30; option->audio_type = SUMOMO_OPTION_AUDIO_TYPE_FAKE; - option->video_codec_type = "H264"; option->cacert = "/etc/ssl/certs/ca-certificates.crt"; int index; From ad9bdf791133f4b95edfc81af767198b99f1a998 Mon Sep 17 00:00:00 2001 From: melpon Date: Sun, 21 Apr 2024 12:41:35 +0900 Subject: [PATCH 33/34] =?UTF-8?q?answer=20=E3=81=AB=20profile-level-id=20?= =?UTF-8?q?=E3=81=8C=E7=84=A1=E3=81=8B=E3=81=A3=E3=81=9F=E3=81=AE=E3=81=A7?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/signaling.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/signaling.cpp b/src/signaling.cpp index 4490295..257f5e0 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -443,6 +443,7 @@ class SignalingImpl : public Signaling { } std::optional h264_profile; + std::optional h264_profile_string; // mid, payload_type, codec for (const auto& line : video_lines) { @@ -479,8 +480,13 @@ class SignalingImpl : public Signaling { continue; } if (zs[0] == "profile-level-id") { + h264_profile_string = zs[1]; h264_profile = ParseH264ProfileLevelId(zs[1].c_str()); PLOG_DEBUG << "profile-level-id=" << zs[1]; + if (h264_profile != std::nullopt) { + PLOG_DEBUG << "profile=" << (int)h264_profile->profile + << ", level=" << (int)h264_profile->level; + } } } } @@ -560,7 +566,11 @@ class SignalingImpl : public Signaling { auto video = rtc::Description::Video(rtp_params_.mid); for (const auto& codec : rtp_params_.codecs) { if (codec.name == "H264") { - video.addH264Codec(codec.payload_type); + std::optional profile; + if (h264_profile_string != std::nullopt) { + profile = "level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=" + *h264_profile_string; + } + video.addH264Codec(codec.payload_type, profile); } else if (codec.name == "H265") { video.addH265Codec(codec.payload_type); } else if (codec.name == "AV1") { From ab83162f53247d0da9a45256fb84d17fd425ec97 Mon Sep 17 00:00:00 2001 From: melpon Date: Sat, 27 Apr 2024 19:32:11 +0900 Subject: [PATCH 34/34] =?UTF-8?q?libdatachannel=20=E3=81=AE=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E3=81=A8=E3=80=81URL=20=E3=82=92=E3=83=91=E3=83=A9?= =?UTF-8?q?=E3=83=A1=E3=83=BC=E3=82=BF=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 3 ++- run.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/VERSION b/VERSION index 09ee23e..d07d839 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,6 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=518c0ec9dbe5f77dfd700c219b560a8bfd279441 +LIBDATACHANNEL_URL=https://github.com/melpon/libdatachannel.git +LIBDATACHANNEL_VERSION=005820eb54585b57fe5567a7ad51b801598c159f OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 diff --git a/run.py b/run.py index c911101..e39a994 100644 --- a/run.py +++ b/run.py @@ -627,8 +627,7 @@ def install_deps( # libdatachannel dir = os.path.join(shared_source_dir, "libdatachannel") - # url = "https://github.com/paullouisageneau/libdatachannel.git" - url = "https://github.com/melpon/libdatachannel.git" + url = version["LIBDATACHANNEL_URL"] if not os.path.exists(os.path.join(dir, ".git")): cmd(["git", "clone", url, dir]) with cd(dir):