Skip to content

Commit

Permalink
[Enhancement] support force delete shard meta (#49224)
Browse files Browse the repository at this point in the history
Signed-off-by: starrocks-xupeng <[email protected]>
(cherry picked from commit 9971882)
  • Loading branch information
starrocks-xupeng authored and mergify[bot] committed Aug 6, 2024
1 parent 07d31d8 commit 7e3e6ad
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 2 deletions.
9 changes: 9 additions & 0 deletions fe/fe-core/src/main/java/com/starrocks/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -2251,6 +2251,15 @@ public class Config extends ConfigBase {
@ConfField
public static long star_mgr_meta_sync_interval_sec = 600L;

/**
* Whether allows delete shard meta if failes to delete actual data.
* In extreme cases, actual data deletion might fail or timeout,
* and if shard meta is not deleted, the FE memory will grow,
* eventually cause fe frequently Full GC
*/
@ConfField(mutable = true)
public static boolean meta_sync_force_delete_shard_meta = false;

// ***********************************************************
// * BEGIN: Cloud native meta server related configurations
// ***********************************************************
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ public static void dropTabletAndDeleteShard(List<Long> shardIds, StarOSAgent sta
DeleteTabletRequest request = new DeleteTabletRequest();
request.tabletIds = Lists.newArrayList(shards);

boolean forceDelete = Config.meta_sync_force_delete_shard_meta;
try {
LakeService lakeService = BrpcProxy.getLakeService(node.getHost(), node.getBrpcPort());
DeleteTabletResponse response = lakeService.deleteTablet(request).get();
Expand All @@ -119,7 +120,7 @@ public static void dropTabletAndDeleteShard(List<Long> shardIds, StarOSAgent sta
LOG.info("Fail to delete tablet. StatusCode: {}, failedTablets: {}", stCode, response.failedTablets);

// ignore INVALID_ARGUMENT error, treat it as success
if (stCode != TStatusCode.INVALID_ARGUMENT) {
if (stCode != TStatusCode.INVALID_ARGUMENT && !forceDelete) {
response.failedTablets.forEach(shards::remove);
}
}
Expand All @@ -128,7 +129,9 @@ public static void dropTabletAndDeleteShard(List<Long> shardIds, StarOSAgent sta
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
continue;
if (!forceDelete) {
continue;
}
}

// 2. delete shard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -470,4 +470,76 @@ Future<DeleteTabletResponse> deleteTablet(DeleteTabletRequest request) {
// can delete the shards, because the error is INVALID_ARGUMENT
Assert.assertEquals(0, allShardIds.size());
}

@Test
public void testForceDelete() {
Config.meta_sync_force_delete_shard_meta = true;
Config.shard_group_clean_threshold_sec = 0;
long groupIdToClear = shardGroupId + 1;
List<Long> allShardGroupId = Lists.newArrayList(groupIdToClear);
// build shardGroupInfos
List<Long> allShardIds = Stream.of(1000L, 1001L, 1002L, 1003L).collect(Collectors.toList());
int numOfShards = allShardIds.size();
List<ShardGroupInfo> shardGroupInfos = new ArrayList<>();
for (long groupId : allShardGroupId) {
ShardGroupInfo info = ShardGroupInfo.newBuilder()
.setGroupId(groupIdToClear)
.putProperties("createTime", String.valueOf(System.currentTimeMillis() - 86400 * 1000))
.addAllShardIds(allShardIds)
.build();
shardGroupInfos.add(info);
}

new MockUp<StarOSAgent>() {
@Mock
public void deleteShardGroup(List<Long> groupIds) throws
StarClientException {
allShardGroupId.removeAll(groupIds);
for (long groupId : groupIds) {
shardGroupInfos.removeIf(item -> item.getGroupId() == groupId);
}
}
@Mock
public List<ShardGroupInfo> listShardGroup() {
return shardGroupInfos;
}

@Mock
public List<Long> listShard(long groupId) throws DdlException {
if (groupId == groupIdToClear) {
return allShardIds;
} else {
return Lists.newArrayList();
}
}

@Mock
public void deleteShards(Set<Long> shardIds) throws DdlException {
allShardIds.removeAll(shardIds);
}
};

new MockUp<BrpcProxy>() {
@Mock
public LakeService getLakeService(String host, int port) throws RpcException {
return new PseudoBackend.PseudoLakeService();
}
};

new MockUp<PseudoBackend.PseudoLakeService>() {
@Mock
Future<DeleteTabletResponse> deleteTablet(DeleteTabletRequest request) throws Exception {
throw new Exception("testForceDelete");
}
};
Config.meta_sync_force_delete_shard_meta = false;
Deencapsulation.invoke(starMgrMetaSyncer, "deleteUnusedShardAndShardGroup");
Assert.assertEquals(numOfShards, allShardIds.size());

Config.meta_sync_force_delete_shard_meta = true;
Deencapsulation.invoke(starMgrMetaSyncer, "deleteUnusedShardAndShardGroup");
Assert.assertEquals(0, allShardIds.size());

Config.meta_sync_force_delete_shard_meta = false;
}
}

0 comments on commit 7e3e6ad

Please sign in to comment.