From 99946629fc53df81fc1c7e1fd473a4d70811b861 Mon Sep 17 00:00:00 2001
From: Josh Allmann <joshua.allmann@gmail.com>
Date: Mon, 1 Jul 2024 17:49:27 +0000
Subject: [PATCH] Fix FPS passthrough

* Set the encoder timebase using AVCodecContext.framerate instead of
  the decoder's AVCodecContext.time_base.

  The use of AVCodecContext.time_base is deprecated for decoding.
  See https://ffmpeg.org/doxygen/3.3/structAVCodecContext.html#ab7bfeb9fa5840aac090e2b0bd0ef7589

* Adjust the packet timebase as necessary for FPS pass through
  to match the encoder's expected timebase. For filtergraphs using
  FPS adjustment, the filtergraph output timebase will match the
  framerate (1 / framerate) and the encoder is configured for the same.

  However, for FPS pass through, the filtergraph's output timebase
  will match the input timebase (since there is no FPS adjustment)
  while the encoder uses the timebase detected from the decoder's
  framerate. Since the input timebase does not typically match the FPS
  (eg 90khz for mpegts vs 30fps), we need to adjust the packet timestamps
  (in container timebase) to the encoder's expected timebase.

* For the specific case of FPS passthrough, preserve the original PTS
  as much as possible since we are trying to re-encode existing frames
  one-to-one. Use the opaque field for this, since it is already being
  populated with the original PTS to detect sentinel packets
  during flushing.

  Without this, timestamps can be slightly "squashed" down when
  rescaling output packets to the muxer's timebase, due to the loss of
  precision (eg, demuxer 90khz -> encoder 30hz -> muxer 90khz)
---
 ffmpeg/encoder.c      | 22 ++++++++++++++++++++--
 ffmpeg/ffmpeg_test.go | 18 ++++++------------
 ffmpeg/filter.c       |  3 +++
 ffmpeg/filter.h       |  3 +++
 4 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/ffmpeg/encoder.c b/ffmpeg/encoder.c
index d23e744941..1e1fd3f7f0 100755
--- a/ffmpeg/encoder.c
+++ b/ffmpeg/encoder.c
@@ -241,8 +241,9 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
     else if (ictx->vc->framerate.num && ictx->vc->framerate.den) vc->framerate = ictx->vc->framerate;
     else vc->framerate = ictx->ic->streams[ictx->vi]->r_frame_rate;
     if (octx->fps.den) vc->time_base = av_buffersink_get_time_base(octx->vf.sink_ctx);
-    else if (ictx->vc->time_base.num && ictx->vc->time_base.den) vc->time_base = ictx->vc->time_base;
+    else if (ictx->vc->framerate.num && ictx->vc->framerate.den) vc->time_base = av_inv_q(ictx->vc->framerate);
     else vc->time_base = ictx->ic->streams[ictx->vi]->time_base;
+    vc->flags |= AV_CODEC_FLAG_COPY_OPAQUE;
     if (octx->bitrate) vc->rc_min_rate = vc->bit_rate = vc->rc_max_rate = vc->rc_buffer_size = octx->bitrate;
     if (av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx)) {
       vc->hw_frames_ctx =
@@ -368,7 +369,14 @@ static int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* oc
     ret = avcodec_receive_packet(encoder, pkt);
     if (AVERROR(EAGAIN) == ret || AVERROR_EOF == ret) goto encode_cleanup;
     if (ret < 0) LPMS_ERR(encode_cleanup, "Error receiving packet from encoder");
-    ret = mux(pkt, encoder->time_base, octx, ost);
+    AVRational time_base = encoder->time_base;
+    if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type && !octx->fps.den && octx->vf.active) {
+      // try to preserve source timestamps for fps passthrough.
+      time_base = octx->vf.time_base;
+      pkt->pts = (int64_t)pkt->opaque; // already in filter timebase
+      pkt->dts = av_rescale_q(pkt->dts, encoder->time_base, time_base);
+    }
+    ret = mux(pkt, time_base, octx, ost);
     if (ret < 0) goto encode_cleanup;
   }
 
@@ -527,6 +535,16 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
          ret = calc_signature(frame, octx);
          if(ret < 0) LPMS_WARN("Could not calculate signature value for frame");
       }
+
+      if (frame) {
+        // rescale pts to match encoder timebase if necessary (eg, fps passthrough)
+        AVRational filter_tb = av_buffersink_get_time_base(filter->sink_ctx);
+        if (av_cmp_q(filter_tb, encoder->time_base)) {
+          frame->pts = av_rescale_q(frame->pts, filter_tb, encoder->time_base);
+          // TODO does frame->duration needs to be rescaled too?
+        }
+      }
+
       ret = encode(encoder, frame, octx, ost);
 skip:
     av_frame_unref(frame);
diff --git a/ffmpeg/ffmpeg_test.go b/ffmpeg/ffmpeg_test.go
index a60ef9a653..5c59128dfe 100644
--- a/ffmpeg/ffmpeg_test.go
+++ b/ffmpeg/ffmpeg_test.go
@@ -1469,10 +1469,8 @@ func TestTranscoder_PassthroughFPS(t *testing.T) {
         ffprobe -v warning -show_streams test-short.ts | grep r_frame_rate=60/1
         ffprobe -v warning -show_streams test-123fps.mp4 | grep r_frame_rate=123/1
         # Extract frame properties for later comparison
-        ffprobe -v warning -select_streams v -show_frames test-123fps.mp4 | grep duration= > test-123fps.duration
-        ffprobe -v warning -select_streams v -show_frames test-short.ts | grep duration= > test-short.duration
-        ffprobe -v warning -select_streams v -show_frames test-123fps.mp4 | grep pkt_pts= > test-123fps.pts
-        ffprobe -v warning -select_streams v -show_frames test-short.ts | grep pkt_pts= > test-short.pts
+        ffprobe -v warning -select_streams v -show_frames -show_entries frame=pts,pkt_dts,duration -of csv test-123fps.mp4 > test-123fps.data
+        ffprobe -v warning -select_streams v -show_frames -show_entries frame=pts,pkt_dts,duration -of csv test-short.ts > test-short.data
     `
 	run(cmd)
 	out := []TranscodeOptions{{Profile: P144p30fps16x9}}
@@ -1510,14 +1508,10 @@ func TestTranscoder_PassthroughFPS(t *testing.T) {
         ffprobe -v warning -show_streams out-123fps.mp4 | grep r_frame_rate=123/1
 
         # Check some per-frame properties
-        ffprobe -v warning -select_streams v -show_frames out-123fps.mp4 | grep duration= > out-123fps.duration
-        ffprobe -v warning -select_streams v -show_frames out-short.ts | grep duration= > out-short.duration
-        diff -u test-123fps.duration out-123fps.duration
-        # diff -u test-short.duration out-short.duration # Why does this fail???
-        ffprobe -v warning -select_streams v -show_frames out-123fps.mp4 | grep pkt_pts= > out-123fps.pts
-        ffprobe -v warning -select_streams v -show_frames out-short.ts | grep pkt_pts= > out-short.pts
-        diff -u test-123fps.pts out-123fps.pts
-        diff -u test-short.pts out-short.pts
+        ffprobe -v warning -select_streams v -show_frames -show_entries frame=pts,pkt_dts,duration -of csv out-123fps.mp4 > out-123fps.data
+        ffprobe -v warning -select_streams v -show_frames -show_entries frame=pts,pkt_dts,duration -of csv out-short.ts > out-short.data
+        diff -u test-123fps.data out-123fps.data
+        diff -u test-short.data test-short.data
     `
 	run(cmd)
 }
diff --git a/ffmpeg/filter.c b/ffmpeg/filter.c
index 9a3be60e21..9bacc1ec46 100644
--- a/ffmpeg/filter.c
+++ b/ffmpeg/filter.c
@@ -74,6 +74,7 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
       ret = AVERROR(ENOMEM);
       LPMS_ERR(vf_init_cleanup, "Unable to allocate filters");
     }
+    vf->time_base = time_base;
     if (ictx->vc->hw_device_ctx) in_pix_fmt = hw2pixfmt(ictx->vc);
 
     /* buffer video source: the decoded frames from the decoder will be inserted here. */
@@ -303,6 +304,7 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
       filter->custom_pts += ts_step;
       filter->prev_frame_pts = inf->pts;
     } else {
+      // FPS Passthrough or Audio case
       filter->custom_pts = inf->pts;
     }
   } else if (!filter->flushed) { // Flush Frame
@@ -349,6 +351,7 @@ int filtergraph_read(struct input_ctx *ictx, struct output_ctx *octx, struct fil
       if (filter->flushing) filter->flushed = 1;
       ret = lpms_ERR_FILTER_FLUSHED;
     } else if (frame && is_video && octx->fps.den) {
+      // TODO why limit to fps filter? what about non-fps filtergraphs, eg scale?
       // We set custom PTS as an input of the filtergraph so we need to
       // re-calculate our output PTS before passing it on to the encoder
       if (filter->pts_diff == INT64_MIN) {
diff --git a/ffmpeg/filter.h b/ffmpeg/filter.h
index 0204c2a67a..59d9e8c945 100755
--- a/ffmpeg/filter.h
+++ b/ffmpeg/filter.h
@@ -13,6 +13,9 @@ struct filter_ctx {
 
   uint8_t *hwframes; // GPU frame pool data
 
+  // Input timebase for this filter
+  AVRational time_base;
+
   // The fps filter expects monotonically increasing PTS, which might not hold
   // for our input segments (they may be out of order, or have dropped frames).
   // So we set a custom PTS before sending the frame to the filtergraph that is