Skip to content

Commit

Permalink
[fix](merge-on-write) Remove pending delete bitmap check when `commit…
Browse files Browse the repository at this point in the history
…_txn()` (apache#47136)

### What problem does this PR solve?

Related PR: apache#46039

Problem Summary:

apache#46039 introduce an defensive check
when `commit_txn()`, but this may influence the commit process. This PR
remove this check totally to eliminate this overhead.
  • Loading branch information
bobhan1 authored Jan 21, 2025
1 parent d0f9daf commit a15a901
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 182 deletions.
1 change: 0 additions & 1 deletion cloud/src/meta-service/meta_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1813,7 +1813,6 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont

// 3. store all pending delete bitmap for this txn
PendingDeleteBitmapPB delete_bitmap_keys;
delete_bitmap_keys.set_lock_id(request->lock_id());
for (size_t i = 0; i < request->rowset_ids_size(); ++i) {
MetaDeleteBitmapInfo key_info {instance_id, tablet_id, request->rowset_ids(i),
request->versions(i), request->segment_ids(i)};
Expand Down
31 changes: 0 additions & 31 deletions cloud/src/meta-service/meta_service_txn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,39 +972,8 @@ void process_mow_when_commit_txn(
LOG(INFO) << "xxx remove delete bitmap lock, lock_key=" << hex(lock_keys[i])
<< " txn_id=" << txn_id;

int64_t lock_id = lock_info.lock_id();
for (auto tablet_id : table_id_tablet_ids[table_id]) {
std::string pending_key = meta_pending_delete_bitmap_key({instance_id, tablet_id});

// check that if the pending info's lock_id is correct
std::string pending_val;
err = txn->get(pending_key, &pending_val);
if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
ss << "failed to get delete bitmap pending info, instance_id=" << instance_id
<< " tablet_id=" << tablet_id << " key=" << hex(pending_key) << " err=" << err;
msg = ss.str();
code = cast_as<ErrCategory::READ>(err);
return;
}

if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) continue;

PendingDeleteBitmapPB pending_info;
if (!pending_info.ParseFromString(pending_val)) [[unlikely]] {
code = MetaServiceCode::PROTOBUF_PARSE_ERR;
msg = "failed to parse PendingDeleteBitmapPB";
return;
}

if (pending_info.has_lock_id() && pending_info.lock_id() != lock_id) {
TEST_SYNC_POINT_CALLBACK("commit_txn:check_pending_delete_bitmap_lock_id",
&tablet_id);
LOG_WARNING(
"wrong lock_id in pending delete bitmap infos, expect lock_id={}, but "
"found {} tablet_id={} instance_id={}",
lock_id, pending_info.lock_id(), tablet_id, instance_id);
}

txn->remove(pending_key);
LOG(INFO) << "xxx remove delete bitmap pending key, pending_key=" << hex(pending_key)
<< " txn_id=" << txn_id;
Expand Down
148 changes: 0 additions & 148 deletions cloud/test/meta_service_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5494,7 +5494,6 @@ TEST(MetaServiceTest, DeleteBimapCommitTxnTest) {
ASSERT_EQ(ret, TxnErrorCode::TXN_OK);
PendingDeleteBitmapPB pending_info;
ASSERT_TRUE(pending_info.ParseFromString(pending_val));
ASSERT_EQ(pending_info.lock_id(), lock_info.lock_id());
}

// commit txn
Expand Down Expand Up @@ -5528,153 +5527,6 @@ TEST(MetaServiceTest, DeleteBimapCommitTxnTest) {
}
}

TEST(MetaServiceTest, WrongPendingBitmapTest) {
auto meta_service = get_meta_service();
extern std::string get_instance_id(const std::shared_ptr<ResourceManager>& rc_mgr,
const std::string& cloud_unique_id);
auto instance_id = get_instance_id(meta_service->resource_mgr(), "test_cloud_unique_id");

std::set<int64_t> real_wrong_pending_delete_bitmap_tablet_ids;
std::set<int64_t> expected_wrong_pending_delete_bitmap_tablet_ids;
auto sp = SyncPoint::get_instance();
sp->set_call_back("commit_txn:check_pending_delete_bitmap_lock_id", [&](auto&& args) {
auto* tablet_id = try_any_cast<int64_t*>(args[0]);
real_wrong_pending_delete_bitmap_tablet_ids.insert(*tablet_id);
});
sp->enable_processing();

// case: first version of rowset
{
int64_t txn_id = 56789;
int64_t table_id = 123456; // same as table_id of tmp rowset
int64_t db_id = 222;
int64_t tablet_id_base = 8113;
int64_t partition_id = 1234;
// begin txn
{
brpc::Controller cntl;
BeginTxnRequest req;
req.set_cloud_unique_id("test_cloud_unique_id");
TxnInfoPB txn_info_pb;
txn_info_pb.set_db_id(db_id);
txn_info_pb.set_label("test_label");
txn_info_pb.add_table_ids(table_id);
txn_info_pb.set_timeout_ms(36000);
req.mutable_txn_info()->CopyFrom(txn_info_pb);
BeginTxnResponse res;
meta_service->begin_txn(reinterpret_cast<::google::protobuf::RpcController*>(&cntl),
&req, &res, nullptr);
ASSERT_EQ(res.status().code(), MetaServiceCode::OK);
txn_id = res.txn_id();
}

// mock rowset and tablet
for (int i = 0; i < 5; ++i) {
create_tablet(meta_service.get(), table_id, 1235, partition_id, tablet_id_base + i);
auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i);
tmp_rowset.set_partition_id(partition_id);
CreateRowsetResponse res;
commit_rowset(meta_service.get(), tmp_rowset, res);
ASSERT_EQ(res.status().code(), MetaServiceCode::OK);
}

// update delete bitmap
{
// get delete bitmap update lock
brpc::Controller cntl;
GetDeleteBitmapUpdateLockRequest get_lock_req;
GetDeleteBitmapUpdateLockResponse get_lock_res;
get_lock_req.set_cloud_unique_id("test_cloud_unique_id");
get_lock_req.set_table_id(table_id);
get_lock_req.add_partition_ids(partition_id);
get_lock_req.set_expiration(5);
get_lock_req.set_lock_id(txn_id);
get_lock_req.set_initiator(-1);
meta_service->get_delete_bitmap_update_lock(
reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &get_lock_req,
&get_lock_res, nullptr);
ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK);

// first update delete bitmap
UpdateDeleteBitmapRequest update_delete_bitmap_req;
UpdateDeleteBitmapResponse update_delete_bitmap_res;
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
update_delete_bitmap_req.set_table_id(table_id);
update_delete_bitmap_req.set_partition_id(partition_id);
update_delete_bitmap_req.set_lock_id(txn_id);
update_delete_bitmap_req.set_initiator(-1);
update_delete_bitmap_req.set_tablet_id(tablet_id_base);

update_delete_bitmap_req.add_rowset_ids("123");
update_delete_bitmap_req.add_segment_ids(1);
update_delete_bitmap_req.add_versions(2);
update_delete_bitmap_req.add_segment_delete_bitmaps("abc0");

meta_service->update_delete_bitmap(
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
}

// check delete bitmap update lock and pending delete bitmap
{
std::unique_ptr<Transaction> txn;
ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK);
std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1});
std::string lock_val;
auto ret = txn->get(lock_key, &lock_val);
ASSERT_EQ(ret, TxnErrorCode::TXN_OK);
DeleteBitmapUpdateLockPB lock_info;
ASSERT_TRUE(lock_info.ParseFromString(lock_val));

std::string pending_key = meta_pending_delete_bitmap_key({instance_id, tablet_id_base});
std::string pending_val;
ret = txn->get(pending_key, &pending_val);
ASSERT_EQ(ret, TxnErrorCode::TXN_OK);
PendingDeleteBitmapPB pending_info;
ASSERT_TRUE(pending_info.ParseFromString(pending_val));
ASSERT_EQ(pending_info.lock_id(), lock_info.lock_id());
}

{
std::unique_ptr<Transaction> txn;
ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK);
// pending bitmap have been modified by other txn
std::string pending_key = meta_pending_delete_bitmap_key({instance_id, tablet_id_base});
std::string pending_val;
auto ret = txn->get(pending_key, &pending_val);
ASSERT_EQ(ret, TxnErrorCode::TXN_OK);
PendingDeleteBitmapPB pending_info;
ASSERT_TRUE(pending_info.ParseFromString(pending_val));
// change pending bitmap's lock_id
pending_info.set_lock_id(pending_info.lock_id() + 1);
ASSERT_TRUE(pending_info.SerializeToString(&pending_val));
txn->put(pending_key, pending_val);
ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK);

expected_wrong_pending_delete_bitmap_tablet_ids.insert(tablet_id_base);
}

// commit txn
{
brpc::Controller cntl;
CommitTxnRequest req;
req.set_cloud_unique_id("test_cloud_unique_id");
req.set_db_id(db_id);
req.set_txn_id(txn_id);
req.add_mow_table_ids(table_id);
CommitTxnResponse res;
meta_service->commit_txn(reinterpret_cast<::google::protobuf::RpcController*>(&cntl),
&req, &res, nullptr);
ASSERT_EQ(expected_wrong_pending_delete_bitmap_tablet_ids,
real_wrong_pending_delete_bitmap_tablet_ids);
}
}

SyncPoint::get_instance()->disable_processing();
SyncPoint::get_instance()->clear_all_call_backs();
}

TEST(MetaServiceTest, GetDeleteBitmapWithRetryTest1) {
auto meta_service = get_meta_service();
SyncPoint::get_instance()->enable_processing();
Expand Down
2 changes: 0 additions & 2 deletions gensrc/proto/cloud.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1383,7 +1383,6 @@ enum MetaServiceCode {
LOCK_EXPIRED = 8001;
LOCK_CONFLICT = 8002;
ROWSETS_EXPIRED = 8003;
PENDING_DELETE_BITMAP_WRONG = 8004;

// partial update
ROWSET_META_NOT_FOUND = 9001;
Expand Down Expand Up @@ -1449,7 +1448,6 @@ message RemoveDeleteBitmapResponse {

message PendingDeleteBitmapPB {
repeated bytes delete_bitmap_keys = 1;
optional int64 lock_id = 2;
}

message DeleteBitmapUpdateLockPB {
Expand Down

0 comments on commit a15a901

Please sign in to comment.