From 4d44fe02f380d5fe78d336ff4cc25847fee20b5e Mon Sep 17 00:00:00 2001
From: Josh Allmann <joshua.allmann@gmail.com>
Date: Fri, 13 Sep 2024 07:30:13 +0000
Subject: [PATCH] Fix occassional DTS overlap

Fix an occassional DTS overlap by
closing the filtergraph after each
segment and re-creating it at the
beginning of each segment, instead
of attempting to persist the
filtergraph in between segments.

This overlap occurred mostly when
flip-flopping segments between transcoders,
or processing non-consecutive segments within
a single transcoder. This was due to drift in
adjusting input timestamps to match the fps
filter's expectation of mostly consecutive
timestamps while adjusting output timestamps
to remove accumulated delay from the filter.

There is roughly a 1% performance hit on my
machine from re-creating the filtergraph.

Because we are now resetting the filter after
each segment, we can remove a good chunk of
the special-cased timestamp handling code
before and after the filtergraph since
we no longer need to handle discontinuities
between segments.

However, we do need to keep some filter flushing
logic in order to accommodate low-fps or low-frame
content.

This does change our outputs, usually by one
fewer frame. Sometimes we seem to produce an
*additional* frame - it is unclear why. However,
as the test cases note, this actually clears up a
numer of long-standing oddities around the expected
frame count, so it should be seen as an improvement.

---

It is important to note that while this fixes DTS
overlap in a (rather unpredictable) general case,
there is another overlap bug in one very specific case.

These are the conditions for bug:

1. First and second segments of the stream are being
   processed. This could be the same transcoder or
   different ones.

2. The first segment starts at or near zero pts

3. mpegts is the output format

4. B-frames are being used

What happens is we may see DTS < PTS for the
very first frames in the very first segment,
potentially starting with PTS = 0, DTS < 0.
This is expected for B-frames.

However, if mpegts is in use, it cannot take negative
timestamps. To accompdate negative DTS, the muxer
will set PTS = -DTS, DTS = 0 and delay (offset) the
rest of the packets in the segment accordingly.

Unfortunately, subsequent transcodes will not know
about this delay! This typically leads to an overlap
between the first and second segments (but segments after
that would be fine).

The normal way to fix this would be to add a constant delay
to all segments - ffmpeg adds 1.4s to mpegts by default.

However, introducing a delay right now feels a little
odd since we don't really offer any other knobs to control
the timestamp (re-transcodes would accumulate the delay) and
there is some concern about falling out of sync with the
source segment since we have historically tried to make
timestamps follow the source as closely as possible.

So we're leaving this particular bug as-is for now.
There is some commented-out code that adds this delay
in case we feel that we would need it in the future.

Note that FFmpeg CLI also has the exact same problem
when the muxer delay is removed, so this is not a
LPMS-specific issue. This is exercised in the test cases.

Example of non-monotonic DTS after encoding and after muxing:

Segment.Frame | Encoder DTS | Encoder PTS | Muxer DTS | Muxer PTS
--------------|-------------|-------------|-----------|-----------
      1.1     |  -20        |    0        |    0      | 20
      1.2     |  -10        |   10        |   10      | 30
      1.3     |    0        |   20        |  *20*     | 40
      1.4     |   10        |   30        |  *30*     | 50
      2.1     |   20        |   40        |  *20*     | 40
      2.2     |   30        |   50        |  *30*     | 50
      2.3     |   40        |   60        |   40      | 60
---
 ffmpeg/api_test.go    | 181 +++++++++++++++++++++++++++++++++++-------
 ffmpeg/encoder.c      |  19 ++++-
 ffmpeg/ffmpeg_test.go |  44 +++++-----
 ffmpeg/filter.c       |  37 +++------
 ffmpeg/filter.h       |  15 +---
 ffmpeg/nvidia_test.go |  30 ++++---
 6 files changed, 218 insertions(+), 108 deletions(-)

diff --git a/ffmpeg/api_test.go b/ffmpeg/api_test.go
index d5c4946ab0..e32f0f354b 100755
--- a/ffmpeg/api_test.go
+++ b/ffmpeg/api_test.go
@@ -51,7 +51,7 @@ func TestAPI_SkippedSegment(t *testing.T) {
 		if res.Decoded.Frames != 120 {
 			t.Error("Did not get decoded frames", res.Decoded.Frames)
 		}
-		if res.Encoded[1].Frames != 245 {
+		if res.Encoded[1].Frames != 246 {
 			t.Error("Did not get encoded frames ", res.Encoded[1].Frames)
 		}
 	}
@@ -68,7 +68,7 @@ func TestAPI_SkippedSegment(t *testing.T) {
 
       # sanity check ffmpeg frame count against ours
       ffprobe -count_frames -show_streams -select_streams v ffmpeg_sw_$1.ts | grep nb_read_frames=246
-      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245
+      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246
 
     # check image quality
     # TODO really should have frame counts match for ssim
@@ -224,18 +224,14 @@ func countEncodedFrames(t *testing.T, accel Acceleration) {
 		if err != nil {
 			t.Error(err)
 		}
-		expectedFrames := 60
-		if i == 1 || i == 3 {
-			expectedFrames = 61 // TODO figure out why this is!
-		}
-		if res.Encoded[0].Frames != expectedFrames {
-			t.Error(in.Fname, " Mismatched frame count: expected ", expectedFrames, " got ", res.Encoded[0].Frames)
+		if res.Encoded[0].Frames != 60 {
+			t.Error(in.Fname, " Mismatched frame count: expected 60 got ", res.Encoded[0].Frames)
 		}
 		if res.Encoded[1].Frames != 120 {
 			t.Error(in.Fname, " Mismatched frame count: expected 120 got ", res.Encoded[1].Frames)
 		}
-		if res.Encoded[2].Frames != 239 {
-			t.Error(in.Fname, " Mismatched frame count: expected 239 got ", res.Encoded[2].Frames)
+		if res.Encoded[2].Frames != 240 {
+			t.Error(in.Fname, " Mismatched frame count: expected 240 got ", res.Encoded[2].Frames)
 		}
 		if res.Encoded[3].Frames != 120 {
 			t.Error(in.Fname, " Mismatched frame count: expected 120 got ", res.Encoded[3].Frames)
@@ -257,33 +253,33 @@ func countEncodedFrames(t *testing.T, accel Acceleration) {
 pts=129000
 pts=129750
 pts=130500
-pts=306000
 pts=306750
 pts=307500
+pts=308250
 
 ==> out_120fps_1.ts.pts <==
 pts=309000
 pts=309750
 pts=310500
-pts=486000
 pts=486750
 pts=487500
+pts=488250
 
 ==> out_120fps_2.ts.pts <==
 pts=489000
 pts=489750
 pts=490500
-pts=666000
 pts=666750
 pts=667500
+pts=668250
 
 ==> out_120fps_3.ts.pts <==
 pts=669000
 pts=669750
 pts=670500
-pts=846000
 pts=846750
 pts=847500
+pts=848250
 
 ==> out_30fps_0.ts.pts <==
 pts=129000
@@ -297,9 +293,9 @@ pts=306000
 pts=309000
 pts=312000
 pts=315000
+pts=480000
 pts=483000
 pts=486000
-pts=489000
 
 ==> out_30fps_2.ts.pts <==
 pts=489000
@@ -313,9 +309,9 @@ pts=666000
 pts=669000
 pts=672000
 pts=675000
+pts=840000
 pts=843000
 pts=846000
-pts=849000
 
 ==> out_60fps_0.ts.pts <==
 pts=129000
@@ -463,8 +459,8 @@ func TestTranscoder_API_AlternatingTimestamps(t *testing.T) {
 
       # sanity check ffmpeg frame count against ours
       ffprobe -count_frames -show_streams -select_streams v ffmpeg_sw_$1.ts | grep nb_read_frames=246
-      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245
-      ffprobe -count_frames -show_streams -select_streams v sw_audio_encode_$1.ts | grep nb_read_frames=245
+      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246
+      ffprobe -count_frames -show_streams -select_streams v sw_audio_encode_$1.ts | grep nb_read_frames=246
 
     # check image quality
     # TODO frame count should really match for ssim
@@ -476,12 +472,146 @@ func TestTranscoder_API_AlternatingTimestamps(t *testing.T) {
     # Really should check relevant audio as well...
     }
 
-
-    # re-enable for seg 0 and 1 when alternating timestamps can be handled
     check 0
     check 1
     check 2
     check 3
+	`
+	run(cmd)
+
+}
+
+func TestTranscoder_API_DTSOverlap(t *testing.T) {
+	dtsOverlap(t, Software)
+}
+
+func dtsOverlap(t *testing.T, accel Acceleration) {
+	// Non-monotonic DTS timestamps are a major problem.
+	// We have one such case here when:
+	// 1. first segment pts starts near zero
+	// 2. B-frames are in use
+	// 3. mpegts is the output format
+	//
+	// the transcoder can produce DTS < 0, PTS = 0 which gets
+	// offset to DTS = 0, PTS = -DTS in the mpegts muxer
+	//
+	// However, transcodes for other segments will not be aware
+	// of this delay, leading to overlap between the first and
+	// second segments.
+	//
+	// This is not a LPMS specific issue but rather one in the
+	// employment of mpegts (always add an offset!); ffmpeg has
+	// the exact same issue.
+
+	// This test case codifies this behavior for now as a sign
+	// that we are aware of it, and if it ever changes somehow,
+	// this should fail and let us know.
+
+	run, dir := setupTest(t)
+	defer os.RemoveAll(dir)
+
+	err := RTMPToHLS("../transcoder/test.ts", dir+"/out.m3u8", dir+"/out_%d.ts", "2", 0)
+	if err != nil {
+		t.Error(err)
+	}
+
+	profile := P144p30fps16x9
+	profile.Framerate = 15
+	profile.Profile = ProfileH264Main
+	// and check no bframes case (which is ok)
+	profileNoB := profile
+	profileNoB.Profile = ProfileH264ConstrainedHigh
+	tc := NewTranscoder()
+	defer tc.StopTranscoder()
+	idx := []int{1, 0}
+	for _, i := range idx {
+		in := &TranscodeOptionsIn{Fname: fmt.Sprintf("%s/out_%d.ts", dir, i)}
+		out := []TranscodeOptions{{
+			Oname:        fmt.Sprintf("%s/bf_%d.ts", dir, i),
+			Profile:      profile,
+			AudioEncoder: ComponentOptions{Name: "copy"},
+			Accel:        accel,
+		}, {
+			Oname:        fmt.Sprintf("%s/nobf_%d.ts", dir, i),
+			Profile:      profileNoB,
+			AudioEncoder: ComponentOptions{Name: "copy"},
+			Accel:        accel,
+		}}
+		res, err := tc.Transcode(in, out)
+		if err != nil {
+			t.Error(err)
+		}
+		if res != nil {
+			if res.Decoded.Frames != 120 {
+				t.Error("Did not get decoded frames", res.Decoded.Frames)
+			}
+			if res.Encoded[0].Frames != 30 {
+				t.Error("Mismatched frame count for hw/nv")
+			}
+		}
+	}
+
+	cmd := `
+
+		# ffmpeg has the exact same problem so let's demonstrate that too
+		ffmpeg -loglevel warning -i out_0.ts -c:a copy \
+			-vf fps=15,scale=w=256:h=144 -c:v libx264 -muxdelay 0 -muxpreload 0 -copyts \
+			ffmpeg_sw_0.ts
+		ffmpeg -loglevel warning -i out_1.ts -c:a copy \
+			-vf fps=15,scale=w=256:h=144 -c:v libx264 -muxdelay 0 -muxpreload 0 -copyts \
+			ffmpeg_sw_1.ts
+
+
+		# Ensure timestamps are monotonic by checking deltas
+		# do this for the low fps rendition since those are more
+		# likely to have issues while downsampling fps
+		function calc_delta {
+			cat $1_0.ts $1_1.ts > $1_concat.ts
+			mapfile -t dts_times < <(ffprobe -hide_banner -select_streams v -of csv=p=0 -show_entries packet=dts_time $1_concat.ts | awk -F',' '{print $1}')
+			# Loop through the array and calculate the delta
+			for ((i = 1; i < ${#dts_times[@]}; i++)); do
+				delta=$(echo "${dts_times[$i]} - ${dts_times[$i-1]}" | bc -l)
+				echo "$delta" >> $1_concat.delta
+			done
+			sort $1_concat.delta | uniq -c | sed 's/^ *//g' > $2
+		}
+
+		calc_delta bf deltas.out
+		calc_delta nobf deltas_nobf.out
+		calc_delta ffmpeg_sw ffmpeg_deltas.out
+	`
+
+	if accel == Nvidia {
+		cmd = cmd + `
+			cat <<-EOF > expected_deltas.out
+				1 -.133333
+				20 .066666
+				38 .066667
+			EOF
+		`
+	} else {
+		// for sw transcode, ffmpeg and lpms are exactly the same
+		cmd = cmd + `
+			diff -u deltas.out ffmpeg_deltas.out
+
+			cat <<-EOF > expected_deltas.out
+				1 -.066666
+				20 .066666
+				38 .066667
+			EOF
+	`
+	}
+
+	cmd = cmd + `
+		diff -u expected_deltas.out deltas.out
+
+		# no b-frames case
+		cat <<-EOF > expected_deltas_nobf.out
+			20 .066666
+			39 .066667
+		EOF
+		diff -u expected_deltas_nobf.out deltas_nobf.out
+
   `
 	run(cmd)
 }
@@ -795,11 +925,7 @@ func consecutiveMP4s(t *testing.T, accel Acceleration) {
 				t.Error("Unexpected error ", err)
 				continue
 			}
-			expectedFrames := 60
-			if i == 1 || i == 3 {
-				expectedFrames = 61 // TODO figure out why this is!
-			}
-			if res.Decoded.Frames != 120 || res.Encoded[0].Frames != expectedFrames {
+			if res.Decoded.Frames != 120 || res.Encoded[0].Frames != 60 {
 				t.Error("Unexpected results ", i, inExt, outExt, res)
 			}
 		}
@@ -1119,9 +1245,8 @@ func setGops(t *testing.T, accel Acceleration) {
 
         # intra checks with fixed fps.
         # sanity check number of packets vs keyframes
-        # TODO look into why lpms generates 91 frames instead of 100
-        ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags= | wc -l | grep 91
-        ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags=K | wc -l | grep 91
+        ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags= | wc -l | grep 100
+        ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags=K | wc -l | grep 100
     `
 	run(cmd)
 
diff --git a/ffmpeg/encoder.c b/ffmpeg/encoder.c
index 0620ff02e2..887a9f5742 100755
--- a/ffmpeg/encoder.c
+++ b/ffmpeg/encoder.c
@@ -159,11 +159,9 @@ void close_output(struct output_ctx *octx)
   }
   if (octx->vc && octx->hw_type == AV_HWDEVICE_TYPE_NONE) avcodec_free_context(&octx->vc);
   if (octx->ac) avcodec_free_context(&octx->ac);
+  free_filter(&octx->vf);
   octx->af.flushed = octx->vf.flushed = 0;
   octx->af.flushing = octx->vf.flushing = 0;
-  octx->vf.pts_diff = INT64_MIN;
-  octx->vf.prev_frame_pts = 0;
-  octx->vf.segments_complete++;
 }
 
 void free_output(struct output_ctx *octx)
@@ -314,6 +312,8 @@ int reopen_output(struct output_ctx *octx, struct input_ctx *ictx)
 
   // re-attach video encoder
   if (octx->vc) {
+    ret = init_video_filters(ictx, octx);
+    if (ret < 0) LPMS_ERR(reopen_out_err, "Unable to reopen filter")
     ret = add_video_stream(octx, ictx);
     if (ret < 0) LPMS_ERR(reopen_out_err, "Unable to re-add video stream");
   } else LPMS_INFO("No video stream!?");
@@ -470,6 +470,19 @@ int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost)
     av_packet_rescale_ts(pkt, tb, ost->time_base);
   }
 
+  /* Enable this if it seems we have issues
+     with the first and second segments overlapping due to bframes
+     See TestTranscoder_API_DTSOverlap
+
+  int delay = av_rescale_q(10, (AVRational){1, 1}, ost->time_base);
+  if (pkt->dts != AV_NOPTS_VALUE) {
+    pkt->dts += delay;
+  }
+  if (pkt->pts != AV_NOPTS_VALUE) {
+    pkt->pts += delay;
+  }
+  */
+
   // drop any preroll audio. may need to drop multiple packets for multichannel
   // XXX this breaks if preroll isn't exactly one AVPacket or drop_ts == 0
   //     hasn't been a problem in practice (so far)
diff --git a/ffmpeg/ffmpeg_test.go b/ffmpeg/ffmpeg_test.go
index 1b3f1e6280..1e610ad0c6 100644
--- a/ffmpeg/ffmpeg_test.go
+++ b/ffmpeg/ffmpeg_test.go
@@ -493,16 +493,17 @@ func TestTranscoder_Statistics_Encoded(t *testing.T) {
 			t.Error("Mismatched pixel counts")
 		}
 		// Since this is a 1-second input we should ideally have count of frames
-		if r.Frames != int(out[i].Profile.Framerate+1) {
-
+		if r.Frames == int(out[i].Profile.Framerate) {
+			// cool all good
+		} else {
 			// Some "special" cases (already have test cases covering these)
 			if p144p60fps == out[i].Profile {
 				if r.Frames != int(out[i].Profile.Framerate)+1 {
 					t.Error("Mismatched frame counts for 60fps; expected 61 frames but got ", r.Frames)
 				}
 			} else if podd123fps == out[i].Profile {
-				if r.Frames != 124 {
-					t.Error("Mismatched frame counts for 123fps; expected 124 frames but got ", r.Frames)
+				if r.Frames != 125 {
+					t.Error("Mismatched frame counts for 123fps; expected 125 frames but got ", r.Frames)
 				}
 			} else {
 				t.Error("Mismatched frame counts ", r.Frames, out[i].Profile.Framerate)
@@ -559,7 +560,7 @@ func TestTranscoder_StatisticsAspectRatio(t *testing.T) {
 		t.Error(err)
 	}
 	r := res.Encoded[0]
-	if r.Frames != int(pAdj.Framerate+1) || r.Pixels != int64(r.Frames*146*82) {
+	if r.Frames != int(pAdj.Framerate) || r.Pixels != int64(r.Frames*146*82) {
 		t.Error(fmt.Errorf("Results did not match: %v ", r))
 	}
 }
@@ -851,7 +852,7 @@ func TestTranscoder_StreamCopy(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 31 ||
+	if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 30 ||
 		res.Encoded[1].Frames != 0 {
 		t.Error("Unexpected frame counts from stream copy")
 		t.Error(res)
@@ -995,7 +996,7 @@ func TestTranscoder_Drop(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 31 {
+	if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 30 {
 		t.Error("Unexpected count of decoded frames ", res.Decoded.Frames, res.Decoded.Pixels)
 	}
 
@@ -1027,7 +1028,7 @@ func TestTranscoder_Drop(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	if res.Decoded.Frames != 31 || res.Encoded[0].Frames != 31 {
+	if res.Decoded.Frames != 30 || res.Encoded[0].Frames != 30 {
 		t.Error("Unexpected encoded/decoded frame counts ", res.Decoded.Frames, res.Encoded[0].Frames)
 	}
 	in.Fname = dir + "/novideo.ts"
@@ -1223,7 +1224,7 @@ func TestTranscoder_RepeatedTranscodes(t *testing.T) {
 	in = &TranscodeOptionsIn{Fname: dir + "/test-short-with-audio.ts"}
 	out = []TranscodeOptions{{Oname: dir + "/audio-0.ts", Profile: P144p30fps16x9}}
 	res, err = Transcode3(in, out)
-	if err != nil || res.Decoded.Frames != 60 || res.Encoded[0].Frames != 31 {
+	if err != nil || res.Decoded.Frames != 60 || res.Encoded[0].Frames != 30 {
 		t.Error("Unexpected preconditions ", err, res)
 	}
 	frames = res.Encoded[0].Frames
@@ -1398,7 +1399,7 @@ nb_read_frames=%d
 		b.Flush()
 
 		// Run a ffmpeg command that attempts to match the given encode settings
-		run(fmt.Sprintf(`ffmpeg -loglevel warning -hide_banner -i %s -vsync passthrough -c:a aac -ar 44100 -ac 2 -c:v libx264 -vf fps=%d/1:eof_action=pass,scale=%dx%d -copyts -muxdelay 0 -y ffmpeg.ts`, in.Fname, out.Profile.Framerate, w, h))
+		run(fmt.Sprintf(`ffmpeg -loglevel warning -hide_banner -i %s -vsync passthrough -c:a aac -ar 44100 -ac 2 -c:v libx264 -vf fps=%d/1,scale=%dx%d -copyts -muxdelay 0 -y ffmpeg.ts`, in.Fname, out.Profile.Framerate, w, h))
 
 		// Gather some ffprobe stats on the output of the above ffmpeg command
 		run(`ffprobe -loglevel warning -hide_banner -count_frames -select_streams v -show_streams 2>&1 ffmpeg.ts | grep '^width=\|^height=\|nb_read_frames=' > ffmpeg.stats`)
@@ -1438,7 +1439,7 @@ nb_read_frames=%d
 	if err != nil {
 		t.Error(err)
 	}
-	if res.Encoded[0].Frames != 31 {
+	if res.Encoded[0].Frames != 30 {
 		t.Error("Did not get expected frame count ", res.Encoded[0].Frames)
 	}
 	checkStatsFile(in, &out[0], res)
@@ -1464,10 +1465,10 @@ nb_read_frames=%d
 	if err != nil {
 		t.Error(err)
 	}
-	if res.Encoded[0].Frames != 124 { // TODO Find out why this isn't 126 (ffmpeg)
+	if res.Encoded[0].Frames != 125 { // TODO Find out why this isn't 126 (ffmpeg)
 		t.Error("Did not get expected frame count ", res.Encoded[0].Frames)
 	}
-	// checkStatsFile(in, &out[0], res) // TODO framecounts don't match ffmpeg
+	checkStatsFile(in, &out[0], res)
 }
 
 func TestTranscoder_PassthroughFPS(t *testing.T) {
@@ -2037,9 +2038,10 @@ PTS_EOF
 	// check output
 	cmd = `
     # reproduce expected lpms output using ffmpeg
-    ffmpeg -debug_ts -loglevel trace -i in.ts -vf 'scale=136x240,fps=30/1:eof_action=pass' -c:v libx264 -copyts -muxdelay 0 out-ffmpeg.ts
+    ffmpeg -debug_ts -loglevel trace -i in.ts -vf 'scale=136x240,fps=30/1' -c:v libx264 -copyts -muxdelay 0 out-ffmpeg.ts
 
-    ffprobe -show_packets out-ffmpeg.ts | grep dts= > ffmpeg-dts.out
+		# ffmpeg produces one more packet than lpms in this case so just trim that out
+    ffprobe -show_packets out-ffmpeg.ts | grep dts= | head -n -1 > ffmpeg-dts.out
     ffprobe -show_packets out0in.ts | grep dts= > lpms-dts.out
 
     diff -u lpms-dts.out ffmpeg-dts.out
@@ -2195,7 +2197,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
 	require.NoError(t, err)
 
 	assert.Equal(t, 360, res.Decoded.Frames)
-	assert.Equal(t, 181, res.Encoded[0].Frames) // should be 180 ... ts rounding ?
+	assert.Equal(t, 180, res.Encoded[0].Frames)
 	assert.Equal(t, 360, res.Encoded[1].Frames)
 
 	// TODO test rollover of gop interval during flush
@@ -2230,7 +2232,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
 			cat <<-EOF2 > expected-30fps.dims
 				58 256,144
 				60 146,260
-				63 256,144
+				62 256,144
 			EOF2
 		`
 	} else {
@@ -2244,7 +2246,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
 			cat <<-EOF2 > expected-30fps.dims
 				60 256,144
 				60 146,260
-				61 256,144
+				60 256,144
 			EOF2
 		`
 	}
@@ -2335,7 +2337,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
 	require.NoError(t, err)
 
 	assert.Equal(t, 240, res.Decoded.Frames)
-	assert.Equal(t, 121, res.Encoded[0].Frames) // should be 120 ... ts rounding ?
+	assert.Equal(t, 120, res.Encoded[0].Frames)
 	assert.Equal(t, 240, res.Encoded[1].Frames)
 
 	cmd = `
@@ -2354,7 +2356,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
 
 			cat <<-EOF2 > single-expected-30fps.dims
 				58 256,144
-				63 146,260
+				62 146,260
 			EOF2
 		`
 	} else {
@@ -2366,7 +2368,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
 
 			cat <<-EOF2 > single-expected-30fps.dims
 				60 256,144
-				61 146,260
+				60 146,260
 			EOF2
 		`
 	}
diff --git a/ffmpeg/filter.c b/ffmpeg/filter.c
index e3a1d93e8c..366375cec9 100644
--- a/ffmpeg/filter.c
+++ b/ffmpeg/filter.c
@@ -70,7 +70,6 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
     if (vf->graph == NULL) {
       vf->graph = avfilter_graph_alloc();
     }
-    vf->pts_diff = INT64_MIN;
     if (!outputs || !inputs || !vf->graph) {
       ret = AVERROR(ENOMEM);
       LPMS_ERR(vf_init_cleanup, "Unable to allocate filters");
@@ -124,6 +123,7 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
     if (!vf->frame) LPMS_ERR(vf_init_cleanup, "Unable to allocate video frame");
 
     vf->active = 1;
+    vf->closed = 0;
 
 vf_init_cleanup:
     avfilter_inout_free(&inputs);
@@ -222,7 +222,6 @@ int init_signature_filters(struct output_ctx *octx, AVFrame *inf)
     outputs = avfilter_inout_alloc();
     inputs = avfilter_inout_alloc();
     sf->graph = avfilter_graph_alloc();
-    sf->pts_diff = INT64_MIN;
     if (!outputs || !inputs || !sf->graph) {
       ret = AVERROR(ENOMEM);
       LPMS_ERR(sf_init_cleanup, "Unable to allocate filters");
@@ -283,6 +282,7 @@ int init_signature_filters(struct output_ctx *octx, AVFrame *inf)
 
 int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *octx, struct filter_ctx *filter, int is_video)
 {
+  if (filter->closed) return 0;
   int ret = 0;
   // We have to reset the filter because we initially set the filter
   // before the decoder is fully ready, and the decoder may change HW params
@@ -334,21 +334,15 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
   AVStream *vst = ictx->ic->streams[ictx->vi];
   if (inf) { // Non-Flush Frame
     inf->opaque = (void *) inf->pts; // Store original PTS for calc later
-    if (is_video && octx->fps.den) {
-      // Custom PTS set when FPS filter is used
-      int64_t ts_step = inf->pts - filter->prev_frame_pts;
-      if (filter->segments_complete && !filter->prev_frame_pts) {
-        // We are on the first frame of the second (or later) segment
-        // So in this case just increment the pts by 1/fps
-        ts_step = av_rescale_q_rnd(1, av_inv_q(octx->fps), vst->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-      }
-      filter->custom_pts += ts_step;
-      filter->prev_frame_pts = inf->pts;
-    } else {
-      // FPS Passthrough or Audio case
-      filter->custom_pts = inf->pts;
-    }
+    filter->custom_pts = inf->pts;
   } else if (!filter->flushed) { // Flush Frame
+    // close filter right away if we already have some frames
+    if (octx->res->frames) {
+      filter->closed = 1;
+      return av_buffersrc_write_frame(filter->src_ctx, NULL);
+    }
+    // we don't have frames yet so flush the filter
+    // needed for extremely short or low-fps content
     int64_t ts_step;
     inf = (is_video) ? ictx->last_frame_v : ictx->last_frame_a;
     inf->opaque = (void *) (INT64_MIN); // Store INT64_MIN as pts for flush frames
@@ -391,17 +385,6 @@ int filtergraph_read(struct input_ctx *ictx, struct output_ctx *octx, struct fil
       // don't set flushed flag in case this is a flush from a previous segment
       if (filter->flushing) filter->flushed = 1;
       ret = lpms_ERR_FILTER_FLUSHED;
-    } else if (frame && is_video && octx->fps.den) {
-      // TODO why limit to fps filter? what about non-fps filtergraphs, eg scale?
-      // We set custom PTS as an input of the filtergraph so we need to
-      // re-calculate our output PTS before passing it on to the encoder
-      if (filter->pts_diff == INT64_MIN) {
-        int64_t pts = (int64_t)frame->opaque; // original input PTS
-        pts = av_rescale_q_rnd(pts, ictx->ic->streams[ictx->vi]->time_base, av_buffersink_get_time_base(filter->sink_ctx), AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-        // difference between rescaled input PTS and the segment's first frame PTS of the filtergraph output
-        filter->pts_diff = pts - frame->pts;
-      }
-      frame->pts += filter->pts_diff; // Re-calculate by adding back this segment's difference calculated at start
     }
 fg_read_cleanup:
     return ret;
diff --git a/ffmpeg/filter.h b/ffmpeg/filter.h
index 412f185d0f..9c52b53da3 100755
--- a/ffmpeg/filter.h
+++ b/ffmpeg/filter.h
@@ -22,19 +22,6 @@ struct filter_ctx {
   // uniformly and monotonically increasing.
   int64_t custom_pts;
 
-  // Previous PTS to be used to manually calculate duration for custom_pts
-  int64_t prev_frame_pts;
-
-  // Count of complete segments that have been processed by this filtergraph
-  int segments_complete;
-
-  // We need to update the post-filtergraph PTS before sending the frame for
-  // encoding because we modified the input PTS.
-  // We do this by calculating the difference between our custom PTS and actual
-  // PTS for the first-frame of every segment, and then applying this diff to
-  // every subsequent frame in the segment.
-  int64_t pts_diff;
-
   // When draining the filtergraph, we inject fake frames.
   // These frames have monotonically increasing timestamps at the same interval
   // as a normal stream of frames. The custom_pts is set to more than usual jump
@@ -43,6 +30,8 @@ struct filter_ctx {
   // We mark this boolean as flushed when done flushing.
   int flushed;
   int flushing;
+
+  int closed;
 };
 
 struct output_ctx {
diff --git a/ffmpeg/nvidia_test.go b/ffmpeg/nvidia_test.go
index 0206c98285..80efb5801b 100755
--- a/ffmpeg/nvidia_test.go
+++ b/ffmpeg/nvidia_test.go
@@ -423,12 +423,10 @@ func TestNvidia_DrainFilters(t *testing.T) {
     # sanity check with ffmpeg itself
     ffmpeg -loglevel warning -i test.ts -c:a copy -c:v libx264 -vf fps=100 -vsync 0 ffmpeg-out.ts
 
-    ffprobe -loglevel warning -show_streams -select_streams v -count_frames ffmpeg-out.ts > ffmpeg.out
-    ffprobe -loglevel warning -show_streams -select_streams v -count_frames out.ts > probe.out
+    ffprobe -loglevel warning -select_streams v -count_frames -show_entries stream=nb_read_frames,time_base,duration,r_frame_rate,avg_frame_rate,start_pts,start_time,duration_ts,duration ffmpeg-out.ts > ffmpeg.out
+    ffprobe -loglevel warning -select_streams v -count_frames -show_entries stream=nb_read_frames,time_base,duration,r_frame_rate,avg_frame_rate,start_pts,start_time,duration_ts,duration out.ts > probe.out
 
-    # These used to be same, but aren't since we've diverged the flushing and PTS handling from ffmpeg
-    grep nb_read_frames=101 probe.out
-    grep duration=1.0100 probe.out
+		diff -u ffmpeg.out probe.out
     grep nb_read_frames=102 ffmpeg.out
     grep duration=1.0200 ffmpeg.out
   `
@@ -559,9 +557,9 @@ func TestNvidia_API_MixedOutput(t *testing.T) {
 
       # sanity check ffmpeg frame count against ours
       ffprobe -count_frames -show_streams -select_streams v ffmpeg_nv_$1.ts | grep nb_read_frames=246
-      ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=245
-      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245
-      ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=245
+      ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=246
+      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246
+      ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=246
 
     # check image quality
     ffmpeg -loglevel warning -i nv_$1.ts -i ffmpeg_nv_$1.ts \
@@ -601,6 +599,7 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) {
 	profile := P144p30fps16x9
 	profile.Framerate = 123
 	tc := NewTranscoder()
+	defer tc.StopTranscoder()
 	idx := []int{1, 0, 3, 2}
 	for _, i := range idx {
 		in := &TranscodeOptionsIn{Fname: fmt.Sprintf("%s/out_%d.ts", dir, i)}
@@ -623,9 +622,6 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) {
 			Profile: profile,
 		}}
 		res, err := tc.Transcode(in, out)
-		if (i == 1 || i == 3) && err != nil {
-			t.Error(err)
-		}
 		if err != nil {
 			t.Error(err)
 		}
@@ -650,9 +646,9 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) {
 
       # sanity check ffmpeg frame count against ours
       ffprobe -count_frames -show_streams -select_streams v ffmpeg_nv_$1.ts | grep nb_read_frames=246
-      ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=245
-      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245
-      ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=245
+      ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=246
+      ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246
+      ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=246
 
     # check image quality
     ffmpeg -loglevel warning -i nv_$1.ts -i ffmpeg_nv_$1.ts \
@@ -673,11 +669,13 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) {
     check 1
     check 2
     check 3
-  `
+	`
 	run(cmd)
-	tc.StopTranscoder()
 }
 
+func TestNvidia_DTSOverlap(t *testing.T) {
+	dtsOverlap(t, Nvidia)
+}
 func TestNvidia_ShortSegments(t *testing.T) {
 	shortSegments(t, Nvidia, 1)
 	shortSegments(t, Nvidia, 2)