Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds enable_frame_num to the experimental video reader #5628

Merged
merged 5 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class VideoSample {
public:
Tensor<Backend> data_;
int label_;
int first_frame_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since VideoSample already has a non-trivial constructor, some initialization (e.g. to invalid values, like -1) would be nice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

};

class VideoLoaderDecoderBase {
Expand All @@ -46,6 +47,7 @@ class VideoLoaderDecoderBase {
stride_(spec.GetArgument<int>("stride")),
step_(spec.GetArgument<int>("step")) {
has_labels_ = spec.TryGetRepeatedArgument(labels_, "labels");
has_frame_no_ = spec.GetArgument<bool>("enable_frame_num");
DALI_ENFORCE(
!has_labels_ || labels_.size() == filenames_.size(),
make_string(
Expand All @@ -61,6 +63,7 @@ class VideoLoaderDecoderBase {
std::vector<std::string> filenames_;
std::vector<int> labels_;
bool has_labels_ = false;
bool has_frame_no_ = false;

Index current_index_ = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ void VideoLoaderDecoderCpu::ReadSample(VideoSample<CPUBackend> &sample) {
if (has_labels_) {
sample.label_ = labels_[sample_span.video_idx_];
}
if (has_frame_no_) {
sample.first_frame_ = sample_span.start_;
}
}

Index VideoLoaderDecoderCpu::SizeImpl() {
Expand Down
23 changes: 20 additions & 3 deletions dali/operators/reader/video_reader_decoder_cpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderCpu::VideoReaderDecoderCpu(const OpSpec &spec)
: DataReader<CPUBackend, VideoSampleCpu, VideoSampleCpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_no_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderCpu>(spec);
this->SetInitialSnapshot();
}
Expand All @@ -32,16 +33,28 @@ void VideoReaderDecoderCpu::RunImpl(SampleWorkspace &ws) {
video_output.Copy(sample.data_);
video_output.SetSourceInfo(sample.data_.GetSourceInfo());

int out_index = 1;
if (has_labels_) {
auto &label_output = ws.Output<CPUBackend>(1);
auto &label_output = ws.Output<CPUBackend>(out_index);
label_output.Resize({}, DALIDataType::DALI_INT32);
label_output.mutable_data<int>()[0] = sample.label_;
out_index++;
}
if (has_frame_no_) {
auto &frame_no_output = ws.Output<CPUBackend>(out_index);
frame_no_output.Resize({}, DALIDataType::DALI_INT32);
frame_no_output.mutable_data<int>()[0] = sample.first_frame_;
out_index++;
}
}

namespace detail {
inline int VideoReaderDecoderOutputFn(const OpSpec &spec) {
return spec.HasArgument("labels") ? 2 : 1;
int num_outputs = 1;
if (spec.HasArgument("labels")) num_outputs++;
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
if (enable_frame_num) num_outputs++;
return num_outputs;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps clearer (?) Your call.

Suggested change
int num_outputs = 1;
if (spec.HasArgument("labels")) num_outputs++;
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
if (enable_frame_num) num_outputs++;
return num_outputs;
bool has_label_output = spec.HasArgument("labels");
bool has_frame_num_output = spec.GetArgument<bool>("enable_frame_num");
return 1 + has_label_output + has_frame_num_output;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}
} // namespace detail

Expand All @@ -68,6 +81,10 @@ even in the variable frame rate scenario.)code")
.AddArg("sequence_length",
R"code(Frames to load per sequence.)code",
DALI_INT32)
.AddOptionalArg("enable_frame_num",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.AddOptionalArg("enable_frame_num",
.AddOptionalArg("enable_frame_idx",

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to keep it consistent with the video reader .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand and agree - nonetheless, it's unfortunate.

R"code(If set, returns the first frame number in the decoded sequence
as a separate output.)code",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
R"code(If set, returns the first frame number in the decoded sequence
as a separate output.)code",
R"code(If set, returns the index of the first frame in the decoded sequence
as an additional output.)code",

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

false)
.AddOptionalArg("step",
R"code(Frame interval between each sequence.

Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_cpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class VideoReaderDecoderCpu

private:
bool has_labels_ = false;
bool has_frame_no_ = false;
};

} // namespace dali
Expand Down
66 changes: 45 additions & 21 deletions dali/operators/reader/video_reader_decoder_gpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderGpu::VideoReaderDecoderGpu(const OpSpec &spec)
: DataReader<GPUBackend, VideoSampleGpu, VideoSampleGpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_no_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderGpu>(spec);
this->SetInitialSnapshot();
}
Expand Down Expand Up @@ -50,14 +51,21 @@ bool VideoReaderDecoderGpu::SetupImpl(

output_desc[0] = { video_shape, DALI_UINT8 };

if (!has_labels_) {
return true;
int out_index = 1;
if (has_labels_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}
if (has_frame_no_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}

output_desc[1] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};

return true;
}
Expand All @@ -80,23 +88,39 @@ void VideoReaderDecoderGpu::RunImpl(Workspace &ws) {
video_output.SetSourceInfo(sample_id, sample.data_.GetSourceInfo());
}

if (!has_labels_) {
return;
}
int out_index = 1;
if (has_labels_) {
auto &labels_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> labels_cpu;

auto &labels_output = ws.Output<GPUBackend>(1);
SmallVector<int, 32> labels_cpu;
for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
}

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
if (has_frame_no_) {
auto &frame_no_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> frame_no_output_cpu;

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
frame_no_output_cpu[sample_id] = sample.span_ ? sample.span_->start_ : -1;
}

MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
MemCopy(
frame_no_output.AsTensor().raw_mutable_data(),
frame_no_output_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
}

DALI_REGISTER_OPERATOR(experimental__readers__Video, VideoReaderDecoderGpu, GPU);
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_gpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class VideoReaderDecoderGpu : public DataReader<GPUBackend, VideoSampleGpu, Vide

private:
bool has_labels_ = false;
bool has_frame_no_ = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool has_frame_no_ = false;
bool has_frame_idx_ = false;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

};

} // namespace dali
Expand Down
34 changes: 28 additions & 6 deletions dali/operators/reader/video_reader_decoder_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
virtual void AssertFrame(
int frame_id, const uint8_t *frame, TestVideo &ground_truth) = 0;

template<typename Backend>
int GetFrameNo(dali::TensorList<Backend> &device_frame_no);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int GetFrameNo(dali::TensorList<Backend> &device_frame_no);
int GetFrameIdx(dali::TensorList<Backend> &device_frame_idx);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


private:
template<typename Backend>
void RunTestImpl(
Expand Down Expand Up @@ -129,15 +132,15 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
.AddArg("device", backend)
.AddArg("sequence_length", sequence_length)
.AddArg("random_shuffle", true)
.AddArg("enable_frame_num", true)
.AddArg("initial_fill", cfr_videos_[0].NumFrames())
.AddArg(
"filenames",
std::vector<std::string>{cfr_videos_paths_[0]})
.AddOutput("frames", backend));

pipe.Build({{"frames", backend}});
.AddOutput("frames", backend)
.AddOutput("frame_no", backend));

std::vector<int> expected_order = {29, 46, 33, 6, 37};
pipe.Build({{"frames", backend}, {"frame_no", backend}});

int num_sequences = 5;

Expand All @@ -148,9 +151,10 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {

auto &frame_video_output = ws.Output<Backend>(0);
const auto sample = frame_video_output.template tensor<uint8_t>(0);
int frame_no = GetFrameNo(ws.Output<Backend>(1));

// We want to access correct order, so we comapre only the first frame of the sequence
AssertFrame(expected_order[sequence_id], sample, ground_truth_video);
// We want to access correct order, so we compare only the first frame of the sequence
AssertFrame(frame_no, sample, ground_truth_video);
}
}
};
Expand All @@ -168,6 +172,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::CPUBackend>() {
RunShuffleTestImpl<dali::CPUBackend>("cpu", dali::CPU_ONLY_DEVICE_ID);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameNo(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int VideoReaderDecoderBaseTest::GetFrameNo(
int VideoReaderDecoderBaseTest::GetFrameIdx(

...if going with the idx suggestion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

dali::TensorList<dali::CPUBackend> &device_frame_no) {
const auto frame_no = device_frame_no.template tensor<int>(0);
int frame_no_buffer = -1;
std::copy_n(frame_no, 1, &frame_no_buffer);
return frame_no_buffer;
}

template<>
void VideoReaderDecoderBaseTest::RunTest<dali::GPUBackend>(
std::vector<std::string> &videos_paths,
Expand All @@ -181,6 +194,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::GPUBackend>() {
RunShuffleTestImpl<dali::GPUBackend>("gpu", 0);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameNo(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int VideoReaderDecoderBaseTest::GetFrameNo(
int VideoReaderDecoderBaseTest::GetFrameIdx(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

dali::TensorList<dali::GPUBackend> &device_frame_no) {
const auto frame_no = device_frame_no.template tensor<int>(0);
int frame_no_buffer = -1;
MemCopy(&frame_no_buffer, frame_no, sizeof(int));
return frame_no_buffer;
}

class VideoReaderDecoderCpuTest : public VideoReaderDecoderBaseTest {
public:
void AssertLabel(const int *label, int ground_truth_label) override {
Expand Down
Loading