Skip to content

Commit

Permalink
Add a new file ingestion option link_files (#12980)
Browse files Browse the repository at this point in the history
Summary:
Add option `IngestExternalFileOptions::link_files` that hard links input files and preserves original file links after ingestion, unlike `move_files` which will unlink input files after ingestion. This can be useful when being used together with `allow_db_generated_files` to ingest files from another DB. Also reverted the change to `move_files` in #12959 to simplify the contract so that it will always unlink input files without exception.

Pull Request resolved: #12980

Test Plan: updated unit test `ExternSSTFileLinkFailFallbackTest.LinkFailFallBackExternalSst` to test that input files will not be unlinked.

Reviewed By: pdillinger

Differential Revision: D61925111

Pulled By: cbi42

fbshipit-source-id: eadaca72e1ae5288bdd195d57158466e5656fa62
  • Loading branch information
cbi42 authored and facebook-github-bot committed Sep 3, 2024
1 parent 92ad4a8 commit cd6f802
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 21 deletions.
4 changes: 4 additions & 0 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5837,6 +5837,10 @@ Status DBImpl::IngestExternalFiles(
"allow_db_generated_files.");
}
}
if (ingest_opts.move_files && ingest_opts.link_files) {
return Status::InvalidArgument(
"`move_files` and `link_files` can not both be true.");
}
}

// TODO (yanqin) maybe handle the case in which column_families have
Expand Down
5 changes: 2 additions & 3 deletions db/external_sst_file_ingestion_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ Status ExternalSstFileIngestionJob::Prepare(
const std::string path_outside_db = f.external_file_path;
const std::string path_inside_db = TableFileName(
cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId());
if (ingestion_options_.move_files) {
if (ingestion_options_.move_files || ingestion_options_.link_files) {
status =
fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr);
if (status.ok()) {
Expand Down Expand Up @@ -626,8 +626,7 @@ void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
DeleteInternalFiles();
consumed_seqno_count_ = 0;
files_overlap_ = false;
} else if (status.ok() && ingestion_options_.move_files &&
!ingestion_options_.allow_db_generated_files) {
} else if (status.ok() && ingestion_options_.move_files) {
// The files were moved and added successfully, remove original file links
for (IngestedFileInfo& f : files_to_ingest_) {
Status s = fs_->DeleteFile(f.external_file_path, io_opts, nullptr);
Expand Down
31 changes: 19 additions & 12 deletions db/external_sst_file_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class ExternalSSTFileTestBase : public DBTestBase {

class ExternSSTFileLinkFailFallbackTest
: public ExternalSSTFileTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public ::testing::WithParamInterface<std::tuple<bool, bool, bool>> {
public:
ExternSSTFileLinkFailFallbackTest() {
fs_ = std::make_shared<ExternalSSTTestFS>(env_->GetFileSystem(), true);
Expand Down Expand Up @@ -2210,7 +2210,8 @@ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
DestroyAndReopen(options_);
const int kNumKeys = 10000;
IngestExternalFileOptions ifo;
ifo.move_files = true;
ifo.move_files = std::get<2>(GetParam());
ifo.link_files = !ifo.move_files;
ifo.failed_move_fall_back_to_copy = failed_move_fall_back_to_copy;

std::string file_path = sst_files_dir_ + "file1.sst";
Expand Down Expand Up @@ -2251,6 +2252,13 @@ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
ASSERT_EQ(0, bytes_copied);
ASSERT_EQ(file_size, bytes_moved);
ASSERT_FALSE(copyfile);

Status es = env_->FileExists(file_path);
if (ifo.move_files) {
ASSERT_TRUE(es.IsNotFound());
} else {
ASSERT_OK(es);
}
} else {
// Link operation fails.
ASSERT_EQ(0, bytes_moved);
Expand All @@ -2269,6 +2277,11 @@ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest,
ExternSSTFileLinkFailFallbackTest,
testing::Combine(testing::Bool(), testing::Bool(),
testing::Bool()));

class TestIngestExternalFileListener : public EventListener {
public:
void OnExternalFileIngested(DB* /*db*/,
Expand Down Expand Up @@ -3719,19 +3732,13 @@ TEST_F(ExternalSSTFileWithTimestampTest, TimestampsNotPersistedBasic) {
INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest,
testing::Combine(testing::Bool(), testing::Bool()));

INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest,
ExternSSTFileLinkFailFallbackTest,
testing::Values(std::make_tuple(true, false),
std::make_tuple(true, true),
std::make_tuple(false, false)));

class IngestDBGeneratedFileTest
: public ExternalSSTFileTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public:
IngestDBGeneratedFileTest() {
ingest_opts.allow_db_generated_files = true;
ingest_opts.move_files = std::get<0>(GetParam());
ingest_opts.link_files = std::get<0>(GetParam());
ingest_opts.verify_checksums_before_ingest = std::get<1>(GetParam());
ingest_opts.snapshot_consistency = false;
}
Expand All @@ -3744,10 +3751,10 @@ INSTANTIATE_TEST_CASE_P(BasicMultiConfig, IngestDBGeneratedFileTest,
testing::Combine(testing::Bool(), testing::Bool()));

TEST_P(IngestDBGeneratedFileTest, FailureCase) {
if (encrypted_env_ && ingest_opts.move_files) {
if (encrypted_env_ && ingest_opts.link_files) {
// FIXME: should fail ingestion or support this combination.
ROCKSDB_GTEST_SKIP(
"Encrypted env and move_files do not work together, as we reopen the "
"Encrypted env and link_files do not work together, as we reopen the "
"file after linking it which appends an extra encryption prefix.");
return;
}
Expand Down Expand Up @@ -3943,7 +3950,7 @@ TEST_P(IngestDBGeneratedFileTest2, NotOverlapWithDB) {
ingest_opts.allow_global_seqno = std::get<1>(GetParam());
ingest_opts.allow_blocking_flush = std::get<2>(GetParam());
ingest_opts.fail_if_not_bottommost_level = std::get<3>(GetParam());
ingest_opts.move_files = std::get<4>(GetParam());
ingest_opts.link_files = std::get<4>(GetParam());

do {
SCOPED_TRACE("option_config_ = " + std::to_string(option_config_));
Expand Down
14 changes: 9 additions & 5 deletions include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -2131,10 +2131,16 @@ struct CompactRangeOptions {
// IngestExternalFileOptions is used by IngestExternalFile()
struct IngestExternalFileOptions {
// Can be set to true to move the files instead of copying them.
// Note that original file links will be removed after successful ingestion,
// unless `allow_db_generated_files` is true.
// The input files will be unlinked after successful ingestion.
// The implementation depends on hard links (LinkFile) instead of traditional
// move (RenameFile) to maximize the chances to restore to the original
// state upon failure.
bool move_files = false;
// If set to true, ingestion falls back to copy when move fails.
// Same as move_files except that input files will NOT be unlinked.
// Only one of `move_files` and `link_files` can be set at the same time.
bool link_files = false;
// If set to true, ingestion falls back to copy when hard linking fails.
// This applies to both `move_files` and `link_files`.
bool failed_move_fall_back_to_copy = true;
// If set to false, an ingested file keys could appear in existing snapshots
// that where created before the file was ingested.
Expand Down Expand Up @@ -2209,8 +2215,6 @@ struct IngestExternalFileOptions {
// Enables ingestion of files not generated by SstFileWriter. When true:
// - Allows files to be ingested when their cf_id doesn't match the CF they
// are being ingested into.
// - Preserves original file links after successful ingestion when
// `move_files = true`.
// REQUIREMENTS:
// - Ingested files must not overlap with existing keys.
// - `write_global_seqno` must be false.
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* Support ingesting db generated files using hard link, i.e. IngestExternalFileOptions::move_files and IngestExternalFileOptions::allow_db_generated_files._
* Support ingesting db generated files using hard link, i.e. IngestExternalFileOptions::move_files/link_files and IngestExternalFileOptions::allow_db_generated_files.
1 change: 1 addition & 0 deletions unreleased_history/behavior_changes/link-file-ingest.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* Add a new file ingestion option `IngestExternalFileOptions::link_files` to hard link input files and preserve original files links after ingestion.

0 comments on commit cd6f802

Please sign in to comment.