Test

Signed-off-by: Janusz Lisiecki <[email protected]>
NVIDIA · Sep 9, 2024 · 4aa945e · 4aa945e
1 parent d886ad6
commit 4aa945e
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 27 deletions.
diff --git a/dali/operators/reader/loader/video/video_loader_decoder_base.h b/dali/operators/reader/loader/video/video_loader_decoder_base.h
@@ -36,6 +36,7 @@ class VideoSample {
  public:
   Tensor<Backend> data_;
   int label_;
+  int first_frame_;
 };
 
 class VideoLoaderDecoderBase {
@@ -46,6 +47,7 @@ class VideoLoaderDecoderBase {
     stride_(spec.GetArgument<int>("stride")),
     step_(spec.GetArgument<int>("step")) {
     has_labels_ = spec.TryGetRepeatedArgument(labels_, "labels");
+    has_frame_no_ = spec.GetArgument<bool>("enable_frame_num");
     DALI_ENFORCE(
         !has_labels_ || labels_.size() == filenames_.size(),
         make_string(
@@ -61,6 +63,7 @@ class VideoLoaderDecoderBase {
   std::vector<std::string> filenames_;
   std::vector<int> labels_;
   bool has_labels_ = false;
+  bool has_frame_no_ = false;
 
   Index current_index_ = 0;
 

diff --git a/dali/operators/reader/loader/video/video_loader_decoder_cpu.cc b/dali/operators/reader/loader/video/video_loader_decoder_cpu.cc
@@ -45,6 +45,9 @@ void VideoLoaderDecoderCpu::ReadSample(VideoSample<CPUBackend> &sample) {
   if (has_labels_) {
     sample.label_ = labels_[sample_span.video_idx_];
   }
+  if (has_frame_no_) {
+    sample.first_frame_ = sample_span.start_;
+  }
 }
 
 Index VideoLoaderDecoderCpu::SizeImpl() {

diff --git a/dali/operators/reader/video_reader_decoder_cpu_op.cc b/dali/operators/reader/video_reader_decoder_cpu_op.cc
@@ -20,7 +20,8 @@ namespace dali {
 
 VideoReaderDecoderCpu::VideoReaderDecoderCpu(const OpSpec &spec)
     : DataReader<CPUBackend, VideoSampleCpu, VideoSampleCpu, true>(spec),
-      has_labels_(spec.HasArgument("labels")) {
+      has_labels_(spec.HasArgument("labels")),
+      has_frame_no_(spec.GetArgument<bool>("enable_frame_num")) {
       loader_ = InitLoader<VideoLoaderDecoderCpu>(spec);
       this->SetInitialSnapshot();
 }
@@ -32,16 +33,28 @@ void VideoReaderDecoderCpu::RunImpl(SampleWorkspace &ws) {
   video_output.Copy(sample.data_);
   video_output.SetSourceInfo(sample.data_.GetSourceInfo());
 
+  int out_index = 1;
   if (has_labels_) {
-    auto &label_output = ws.Output<CPUBackend>(1);
+    auto &label_output = ws.Output<CPUBackend>(out_index);
     label_output.Resize({}, DALIDataType::DALI_INT32);
     label_output.mutable_data<int>()[0] = sample.label_;
+    out_index++;
+  }
+  if (has_frame_no_) {
+    auto &frame_no_output = ws.Output<CPUBackend>(out_index);
+    frame_no_output.Resize({}, DALIDataType::DALI_INT32);
+    frame_no_output.mutable_data<int>()[0] = sample.first_frame_;
+    out_index++;
   }
 }
 
 namespace detail {
 inline int VideoReaderDecoderOutputFn(const OpSpec &spec) {
-  return spec.HasArgument("labels") ? 2 : 1;
+  int num_outputs = 1;
+  if (spec.HasArgument("labels")) num_outputs++;
+  bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
+  if (enable_frame_num) num_outputs++;
+  return num_outputs;
 }
 }  // namespace detail
 
@@ -68,6 +81,10 @@ even in the variable frame rate scenario.)code")
   .AddArg("sequence_length",
       R"code(Frames to load per sequence.)code",
       DALI_INT32)
+  .AddOptionalArg("enable_frame_num",
+      R"code(If set, returns the first frame number in the decoded sequence
+as a separate output.)code",
+      false)
   .AddOptionalArg("step",
       R"code(Frame interval between each sequence.
 

diff --git a/dali/operators/reader/video_reader_decoder_cpu_op.h b/dali/operators/reader/video_reader_decoder_cpu_op.h
@@ -29,6 +29,7 @@ class VideoReaderDecoderCpu
 
  private:
   bool has_labels_ = false;
+  bool has_frame_no_ = false;
 };
 
 }  // namespace dali

diff --git a/dali/operators/reader/video_reader_decoder_gpu_op.cc b/dali/operators/reader/video_reader_decoder_gpu_op.cc
@@ -20,7 +20,8 @@ namespace dali {
 
 VideoReaderDecoderGpu::VideoReaderDecoderGpu(const OpSpec &spec)
     : DataReader<GPUBackend, VideoSampleGpu, VideoSampleGpu, true>(spec),
-      has_labels_(spec.HasArgument("labels")) {
+      has_labels_(spec.HasArgument("labels")),
+      has_frame_no_(spec.GetArgument<bool>("enable_frame_num")) {
       loader_ = InitLoader<VideoLoaderDecoderGpu>(spec);
       this->SetInitialSnapshot();
 }
@@ -50,14 +51,21 @@ bool VideoReaderDecoderGpu::SetupImpl(
 
   output_desc[0] = { video_shape, DALI_UINT8 };
 
-  if (!has_labels_) {
-    return true;
+  int out_index = 1;
+  if (has_labels_) {
+    output_desc[out_index] = {
+      uniform_list_shape<1>(batch_size, {1}),
+      DALI_INT32
+    };
+    out_index++;
+  }
+  if (has_frame_no_) {
+    output_desc[out_index] = {
+      uniform_list_shape<1>(batch_size, {1}),
+      DALI_INT32
+    };
+    out_index++;
   }
-
-  output_desc[1] = {
-    uniform_list_shape<1>(batch_size, {1}),
-    DALI_INT32
-  };
 
   return true;
 }
@@ -80,23 +88,39 @@ void VideoReaderDecoderGpu::RunImpl(Workspace &ws) {
     video_output.SetSourceInfo(sample_id, sample.data_.GetSourceInfo());
   }
 
-  if (!has_labels_) {
-    return;
-  }
+  int out_index = 1;
+  if (has_labels_) {
+    auto &labels_output = ws.Output<GPUBackend>(out_index);
+    SmallVector<int, 32> labels_cpu;
 
-  auto &labels_output = ws.Output<GPUBackend>(1);
-  SmallVector<int, 32> labels_cpu;
+    for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
+      auto &sample = GetSample(sample_id);
+      labels_cpu[sample_id] = sample.label_;
+    }
 
-  for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
-    auto &sample = GetSample(sample_id);
-    labels_cpu[sample_id] = sample.label_;
+    MemCopy(
+      labels_output.AsTensor().raw_mutable_data(),
+      labels_cpu.data(),
+      batch_size * sizeof(DALI_INT32),
+      ws.stream());
+    out_index++;
   }
+  if (has_frame_no_) {
+    auto &frame_no_output = ws.Output<GPUBackend>(out_index);
+    SmallVector<int, 32> frame_no_output_cpu;
+
+    for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
+      auto &sample = GetSample(sample_id);
+      frame_no_output_cpu[sample_id] = sample.span_ ? sample.span_->start_ : -1;
+    }
 
-  MemCopy(
-    labels_output.AsTensor().raw_mutable_data(),
-    labels_cpu.data(),
-    batch_size * sizeof(DALI_INT32),
-    ws.stream());
+    MemCopy(
+      frame_no_output.AsTensor().raw_mutable_data(),
+      frame_no_output_cpu.data(),
+      batch_size * sizeof(DALI_INT32),
+      ws.stream());
+    out_index++;
+  }
 }
 
 DALI_REGISTER_OPERATOR(experimental__readers__Video, VideoReaderDecoderGpu, GPU);

diff --git a/dali/operators/reader/video_reader_decoder_gpu_op.h b/dali/operators/reader/video_reader_decoder_gpu_op.h
@@ -35,6 +35,7 @@ class VideoReaderDecoderGpu : public DataReader<GPUBackend, VideoSampleGpu, Vide
 
  private:
   bool has_labels_ = false;
+  bool has_frame_no_ = false;
 };
 
 }  // namespace dali

diff --git a/dali/operators/reader/video_reader_decoder_op_test.cc b/dali/operators/reader/video_reader_decoder_op_test.cc
@@ -129,13 +129,15 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
       .AddArg("device", backend)
       .AddArg("sequence_length", sequence_length)
       .AddArg("random_shuffle", true)
+      .AddArg("enable_frame_num", true)
       .AddArg("initial_fill", cfr_videos_[0].NumFrames())
       .AddArg(
         "filenames",
         std::vector<std::string>{cfr_videos_paths_[0]})
-      .AddOutput("frames", backend));
+      .AddOutput("frames", backend)
+      .AddOutput("frame_no", backend));
 
-    pipe.Build({{"frames", backend}});
+    pipe.Build({{"frames", backend}, {"frame_no", backend}});
 
     // ToFix
     std::vector<int> expected_order = {29, 46, 33, 6, 37};
@@ -148,10 +150,14 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
       pipe.Outputs(&ws);
 
       auto &frame_video_output = ws.Output<Backend>(0);
+      auto &cpu_frame_no_output = ws.Output<Backend>(1);
       const auto sample = frame_video_output.template tensor<uint8_t>(0);
+      const auto cpu_frame_no = cpu_frame_no_output.template tensor<int>(0);
+      int frame_no_buffer = -1;
+      MemCopy(&frame_no_buffer, cpu_frame_no, sizeof(int));
 
       // We want to access correct order, so we compare only the first frame of the sequence
-      AssertFrame(expected_order[sequence_id], sample, ground_truth_video);
+      AssertFrame(frame_no_buffer, sample, ground_truth_video);
     }
   }
 };