From 540368fef925fd21e125e7dca76b060fdec5588e Mon Sep 17 00:00:00 2001 From: Yondon Fu Date: Sat, 20 Jan 2024 15:02:59 +0000 Subject: [PATCH 1/4] ffmpeg: Use helper to check for video metadata --- ffmpeg/ffmpeg.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ffmpeg/ffmpeg.go b/ffmpeg/ffmpeg.go index 4625b5a21a..0e0b119786 100755 --- a/ffmpeg/ffmpeg.go +++ b/ffmpeg/ffmpeg.go @@ -850,6 +850,19 @@ func destroyCOutputParams(params []C.output_params) { } } +func hasVideoMetadata(fname string) bool { + if strings.HasPrefix(strings.ToLower(fname), "pipe:") { + return false + } + + fileInfo, err := os.Stat(fname) + if err != nil { + return false + } + + return !fileInfo.IsDir() +} + func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) (*TranscodeResults, error) { t.mu.Lock() defer t.mu.Unlock() @@ -861,8 +874,8 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) } var reopendemux bool reopendemux = false - // don't read metadata for pipe input, because it can't seek back and av_find_input_format in the decoder will fail - if !strings.HasPrefix(strings.ToLower(input.Fname), "pipe:") { + // don't read metadata for inputs without video metadata, because it can't seek back and av_find_input_format in the decoder will fail + if hasVideoMetadata(input.Fname) { status, format, err := GetCodecInfo(input.Fname) if err != nil { return nil, err From 47c34ef1775f1a947a484468ff806169c3c88b08 Mon Sep 17 00:00:00 2001 From: Yondon Fu Date: Sat, 20 Jan 2024 15:04:05 +0000 Subject: [PATCH 2/4] ffmpeg: Support image2 demuxer --- ffmpeg/decoder.c | 6 +++++- ffmpeg/ffmpeg.go | 34 +++++++++++++++++++++++++++++++++- ffmpeg/transcoder.c | 10 ++++++++-- ffmpeg/transcoder.h | 2 ++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/ffmpeg/decoder.c b/ffmpeg/decoder.c index 8fd0fa3dd3..907751c1bb 100755 --- a/ffmpeg/decoder.c +++ b/ffmpeg/decoder.c @@ -338,8 +338,12 @@ int open_input(input_params *params, struct input_ctx *ctx) ctx->transmuxing = params->transmuxing; // open demuxer - ret = avformat_open_input(&ic, inp, NULL, NULL); + AVDictionary **demuxer_opts = NULL; + if (params->demuxer.opts) demuxer_opts = ¶ms->demuxer.opts; + ret = avformat_open_input(&ic, inp, NULL, demuxer_opts); if (ret < 0) LPMS_ERR(open_input_err, "demuxer: Unable to open input"); + // If avformat_open_input replaced the options AVDictionary with options that were not found free it + if (demuxer_opts) av_dict_free(demuxer_opts); ctx->ic = ic; ret = avformat_find_stream_info(ic, NULL); if (ret < 0) LPMS_ERR(open_input_err, "Unable to find input info"); diff --git a/ffmpeg/ffmpeg.go b/ffmpeg/ffmpeg.go index 0e0b119786..072bd4a9c6 100755 --- a/ffmpeg/ffmpeg.go +++ b/ffmpeg/ffmpeg.go @@ -97,6 +97,7 @@ type TranscodeOptionsIn struct { Accel Acceleration Device string Transmuxing bool + Profile VideoProfile } type TranscodeOptions struct { @@ -649,6 +650,11 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C. // needed for hw dec -> hw rescale -> sw enc filters = filters + ",hwdownload,format=nv12" } + if p.Accel == Nvidia && filepath.Ext(input.Fname) == ".png" { + // If the input is PNG image(s) and we are scaling on a Nvidia device + // we need to first convert to a pixel format that the scale_npp filter supports + filters = "format=nv12," + filters + } // set FPS denominator to 1 if unset by user if param.FramerateDen == 0 { param.FramerateDen = 1 @@ -955,8 +961,34 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) defer C.free(unsafe.Pointer(fname)) xcoderParams := C.CString("") defer C.free(unsafe.Pointer(xcoderParams)) + + var demuxerOpts C.component_opts + + ext := filepath.Ext(input.Fname) + // If the input has an image file extension setup the image2 demuxer + if ext == ".png" { + image2 := C.CString("image2") + defer C.free(unsafe.Pointer(image2)) + + demuxerOpts = C.component_opts{ + name: image2, + } + + if input.Profile.Framerate > 0 { + if input.Profile.FramerateDen == 0 { + input.Profile.FramerateDen = 1 + } + + // Do not try tofree in this function because in the C code avformat_open_input() + // will destroy this + demuxerOpts.opts = newAVOpts(map[string]string{ + "framerate": fmt.Sprintf("%d/%d", input.Profile.Framerate, input.Profile.FramerateDen), + }) + } + } + inp := &C.input_params{fname: fname, hw_type: hw_type, device: device, xcoderParams: xcoderParams, - handle: t.handle} + handle: t.handle, demuxer: demuxerOpts} if input.Transmuxing { inp.transmuxing = 1 } diff --git a/ffmpeg/transcoder.c b/ffmpeg/transcoder.c index da7caf3492..ab08b9c6c2 100755 --- a/ffmpeg/transcoder.c +++ b/ffmpeg/transcoder.c @@ -160,17 +160,23 @@ int transcode_init(struct transcode_thread *h, input_params *inp, if (!inp) LPMS_ERR(transcode_cleanup, "Missing input params") + AVDictionary **demuxer_opts; + if (inp->demuxer.opts) demuxer_opts = &inp->demuxer.opts; + // by default we re-use decoder between segments of same stream // unless we are using SW deocder and had to re-open IO or demuxer if (!ictx->ic) { // reopen demuxer for the input segment if needed // XXX could open_input() be re-used here? - ret = avformat_open_input(&ictx->ic, inp->fname, NULL, NULL); + ret = avformat_open_input(&ictx->ic, inp->fname, NULL, demuxer_opts); if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to reopen demuxer"); + // If avformat_open_input replaced the options AVDictionary with options that were not found free it + if (demuxer_opts) av_dict_free(demuxer_opts); ret = avformat_find_stream_info(ictx->ic, NULL); if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to find info for reopened stream") - } else if (!ictx->ic->pb) { + } else if (is_mpegts(ictx->ic) && !ictx->ic->pb) { // reopen input segment file IO context if needed + // only necessary for mpegts ret = avio_open(&ictx->ic->pb, inp->fname, AVIO_FLAG_READ); if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to reopen file"); } else reopen_decoders = 0; diff --git a/ffmpeg/transcoder.h b/ffmpeg/transcoder.h index 846dce1b6e..8c1c4888b4 100755 --- a/ffmpeg/transcoder.h +++ b/ffmpeg/transcoder.h @@ -51,6 +51,8 @@ typedef struct { char *device; char *xcoderParams; + // Optional demuxer opts + component_opts demuxer; // Optional video decoder + opts component_opts video; From 4996dc234221075a875fd224b32948a694d0d07c Mon Sep 17 00:00:00 2001 From: Elite Encoder Date: Thu, 11 Jul 2024 13:52:20 -0400 Subject: [PATCH 3/4] Add media duration to lpms_get_codec_info for GetCodecInfo (#407) * add fps and duration to GetCodecInfo --- ffmpeg/decoder.c | 2 +- ffmpeg/extras.c | 4 ++++ ffmpeg/extras.h | 2 ++ ffmpeg/ffmpeg.go | 6 +++++- ffmpeg/ffmpeg_test.go | 47 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/ffmpeg/decoder.c b/ffmpeg/decoder.c index 907751c1bb..6b2019efcb 100755 --- a/ffmpeg/decoder.c +++ b/ffmpeg/decoder.c @@ -337,7 +337,7 @@ int open_input(input_params *params, struct input_ctx *ctx) ctx->transmuxing = params->transmuxing; - // open demuxer + // open demuxer/ open demuxer AVDictionary **demuxer_opts = NULL; if (params->demuxer.opts) demuxer_opts = ¶ms->demuxer.opts; ret = avformat_open_input(&ic, inp, NULL, demuxer_opts); diff --git a/ffmpeg/extras.c b/ffmpeg/extras.c index 471dd48d31..82a7609e1e 100644 --- a/ffmpeg/extras.c +++ b/ffmpeg/extras.c @@ -164,6 +164,9 @@ int lpms_get_codec_info(char *fname, pcodec_info out) // instead of returning -1 ret = GET_CODEC_STREAMS_MISSING; } + if (ic->duration != AV_NOPTS_VALUE) { + out->dur = ic->duration / AV_TIME_BASE; + } // Return if (video_present && vc->name) { strncpy(out->video_codec, vc->name, MIN(strlen(out->video_codec), strlen(vc->name))+1); @@ -176,6 +179,7 @@ int lpms_get_codec_info(char *fname, pcodec_info out) } out->width = ic->streams[vstream]->codecpar->width; out->height = ic->streams[vstream]->codecpar->height; + out->fps = av_q2d(ic->streams[vstream]->r_frame_rate); } else { // Indicate failure to extract video codec from given container out->video_codec[0] = 0; diff --git a/ffmpeg/extras.h b/ffmpeg/extras.h index 96f172a1e6..b06b0903ac 100644 --- a/ffmpeg/extras.h +++ b/ffmpeg/extras.h @@ -7,6 +7,8 @@ typedef struct s_codec_info { int pixel_format; int width; int height; + double fps; + double dur; } codec_info, *pcodec_info; int lpms_rtmp2hls(char *listen, char *outf, char *ts_tmpl, char *seg_time, char *seg_start); diff --git a/ffmpeg/ffmpeg.go b/ffmpeg/ffmpeg.go index 072bd4a9c6..94d9ff33d6 100755 --- a/ffmpeg/ffmpeg.go +++ b/ffmpeg/ffmpeg.go @@ -245,6 +245,8 @@ type MediaFormatInfo struct { Acodec, Vcodec string PixFormat PixelFormat Width, Height int + FPS float32 + DurSecs int64 } func (f *MediaFormatInfo) ScaledHeight(width int) int { @@ -277,6 +279,8 @@ func GetCodecInfo(fname string) (CodecStatus, MediaFormatInfo, error) { format.PixFormat = PixelFormat{int(params_c.pixel_format)} format.Width = int(params_c.width) format.Height = int(params_c.height) + format.FPS = float32(params_c.fps) + format.DurSecs = int64(params_c.dur) return status, format, nil } @@ -979,7 +983,7 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) input.Profile.FramerateDen = 1 } - // Do not try tofree in this function because in the C code avformat_open_input() + // Do not try to free in this function because in the C code avformat_open_input() // will destroy this demuxerOpts.opts = newAVOpts(map[string]string{ "framerate": fmt.Sprintf("%d/%d", input.Profile.Framerate, input.Profile.FramerateDen), diff --git a/ffmpeg/ffmpeg_test.go b/ffmpeg/ffmpeg_test.go index 5c59128dfe..fa2f822523 100644 --- a/ffmpeg/ffmpeg_test.go +++ b/ffmpeg/ffmpeg_test.go @@ -1873,7 +1873,6 @@ func TestTranscoder_VFR(t *testing.T) { run, dir := setupTest(t) defer os.RemoveAll(dir) - // prepare the input by generating a vfr video and verify its properties cmd := ` ffmpeg -hide_banner -i "$1/../transcoder/test.ts" -an -vf "setpts='\ @@ -1967,3 +1966,49 @@ PTS_EOF ` run(cmd) } + +func TestDurationFPS_GetCodecInfo(t *testing.T) { + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + //Generate test files + cmd := ` + cp "$1/../data/duplicate-audio-dts.ts" test.ts + ffprobe -loglevel warning -show_format test.ts | grep duration=2.008555 + ffprobe -loglevel warning -show_streams -select_streams v test.ts | grep r_frame_rate=30/1 + cp "$1/../data/bunny.mp4" test.mp4 + ffmpeg -loglevel warning -i test.mp4 -c:v copy -c:a copy -t 2 test-short.mp4 + ffprobe -loglevel warning -show_format test-short.mp4 | grep duration=2.043356 + ffprobe -loglevel warning -show_streams -select_streams v test-short.mp4 | grep r_frame_rate=24/1 + ffmpeg -loglevel warning -i test-short.mp4 -c:v libvpx -c:a vorbis -strict -2 -t 2 test.webm + ffprobe -loglevel warning -show_format test.webm | grep duration=2.049000 + ffprobe -loglevel warning -show_streams -select_streams v test.webm | grep r_frame_rate=24/1 + ffmpeg -loglevel warning -i test-short.mp4 -vn -c:a aac -b:a 128k test.m4a + ffprobe -loglevel warning -show_format test.m4a | grep duration=2.042993 + ffmpeg -loglevel warning -i test-short.mp4 -vn -c:a flac test.flac + ffprobe -loglevel warning -show_format test.flac | grep duration=2.043356 + ` + run(cmd) + + files := []struct { + Filename string + Duration int64 + FPS float32 + }{ + {Filename: "test-short.mp4", Duration: 2, FPS: 24}, + {Filename: "test.ts", Duration: 2, FPS: 30.0}, + {Filename: "test.flac", Duration: 2, FPS: 0.0}, + {Filename: "test.webm", Duration: 2, FPS: 24}, + {Filename: "test.m4a", Duration: 2, FPS: 0.0}, + } + for _, file := range files { + t.Run(file.Filename, func(t *testing.T) { + assert := assert.New(t) + status, format, err := GetCodecInfo(path.Join(dir, file.Filename)) + assert.Nil(err, "getcodecinfo error") + assert.Equal(CodecStatusOk, status, "status not ok") + assert.Equal(file.Duration, format.DurSecs, "duration mismatch") + assert.Equal(file.FPS, format.FPS, "fps mismatch") + }) + } +} From 28406cf8bc7833e86ed2c3b26973284e4f5f10bc Mon Sep 17 00:00:00 2001 From: Rick Staa Date: Fri, 26 Jul 2024 15:29:31 +0200 Subject: [PATCH 4/4] feat(ai): enable extra audio media formats This commit enables several extra audio formats needed for the new audio-to-text pipeline. --- install_ffmpeg.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/install_ffmpeg.sh b/install_ffmpeg.sh index 36900ba13c..2b37c87cc6 100755 --- a/install_ffmpeg.sh +++ b/install_ffmpeg.sh @@ -209,13 +209,13 @@ if [[ ! -e "$ROOT/ffmpeg/libavcodec/libavcodec.a" ]]; then ./configure ${TARGET_OS:-} $DISABLE_FFMPEG_COMPONENTS --fatal-warnings \ --enable-libx264 --enable-gpl \ --enable-protocol=rtmp,file,pipe \ - --enable-muxer=mpegts,hls,segment,mp4,hevc,matroska,webm,null --enable-demuxer=flv,mpegts,mp4,mov,webm,matroska \ + --enable-muxer=mp3,wav,flac,mpegts,hls,segment,mp4,hevc,matroska,webm,null --enable-demuxer=mp3,wav,flac,flv,mpegts,mp4,mov,webm,matroska,image2 \ --enable-bsf=h264_mp4toannexb,aac_adtstoasc,h264_metadata,h264_redundant_pps,hevc_mp4toannexb,extract_extradata \ - --enable-parser=aac,aac_latm,h264,hevc,vp8,vp9 \ + --enable-parser=mpegaudio,vorbis,opus,flac,aac,aac_latm,h264,hevc,vp8,vp9,png \ --enable-filter=abuffer,buffer,abuffersink,buffersink,afifo,fifo,aformat,format \ --enable-filter=aresample,asetnsamples,fps,scale,hwdownload,select,livepeer_dnn,signature \ - --enable-encoder=aac,opus,libx264 \ - --enable-decoder=aac,opus,h264 \ + --enable-encoder=mp3,vorbis,flac,aac,opus,libx264 \ + --enable-decoder=mp3,vorbis,flac,aac,opus,h264,png \ --extra-cflags="${EXTRA_CFLAGS} -I${ROOT}/compiled/include -I/usr/local/cuda/include" \ --extra-ldflags="${EXTRA_FFMPEG_LDFLAGS} -L${ROOT}/compiled/lib -L/usr/local/cuda/lib64" \ --prefix="$ROOT/compiled" \