Skip to content

Commit

Permalink
[region migration] Log optimization (apache#14536) (apache#14545)
Browse files Browse the repository at this point in the history
* save log

* done?

(cherry picked from commit 3ae2355)
  • Loading branch information
liyuheng55555 authored Dec 26, 2024
1 parent e579738 commit 0b74512
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ public static String getIdWithRpcEndpoint(TDataNodeLocation location) {
location.getDataNodeId(), location.getClientRpcEndPoint());
}

public String simplifiedLocation(TDataNodeLocation dataNodeLocation) {
return dataNodeLocation.getDataNodeId() + "@" + dataNodeLocation.getInternalEndPoint().getIp();
}

/**
* Find dest data node.
*
Expand Down Expand Up @@ -397,17 +401,32 @@ public void removeRegionLocation(
* @return DataNode locations
*/
public List<TDataNodeLocation> findRegionLocations(TConsensusGroupId regionId) {
Optional<TRegionReplicaSet> regionReplicaSet =
configManager.getPartitionManager().getAllReplicaSets().stream()
.filter(rg -> rg.regionId.equals(regionId))
.findAny();
Optional<TRegionReplicaSet> regionReplicaSet = getRegionReplicaSet(regionId);
if (regionReplicaSet.isPresent()) {
return regionReplicaSet.get().getDataNodeLocations();
}

return Collections.emptyList();
}

public Optional<TRegionReplicaSet> getRegionReplicaSet(TConsensusGroupId regionId) {
return configManager.getPartitionManager().getAllReplicaSets().stream()
.filter(rg -> rg.regionId.equals(regionId))
.findAny();
}

public String getRegionReplicaSetString(TConsensusGroupId regionId) {
Optional<TRegionReplicaSet> regionReplicaSet = getRegionReplicaSet(regionId);
if (!regionReplicaSet.isPresent()) {
return "UNKNOWN!";
}
StringBuilder result = new StringBuilder(regionReplicaSet.get().getRegionId() + ": {");
for (TDataNodeLocation dataNodeLocation : regionReplicaSet.get().getDataNodeLocations()) {
result.append(simplifiedLocation(dataNodeLocation)).append(", ");
}
result.append("}");
return result.toString();
}

private Optional<TDataNodeLocation> pickNewReplicaNodeForRegion(
List<TDataNodeLocation> regionReplicaNodes) {
List<TDataNodeConfiguration> dataNodeConfigurations =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, AddRegionPeerState s
switch (state) {
case CREATE_NEW_REGION_PEER:
LOGGER.info(
"[pid{}][AddRegion] started, region {} will be added to DataNode {}.",
"[pid{}][AddRegion] started, {} will be added to DataNode {}.",
getProcId(),
consensusGroupId.getId(),
destDataNode.getDataNodeId());
consensusGroupId,
handler.simplifiedLocation(destDataNode));
handler.addRegionLocation(consensusGroupId, destDataNode);
handler.forceUpdateRegionCache(consensusGroupId, destDataNode, RegionStatus.Adding);
TSStatus status = handler.createNewRegionPeer(consensusGroupId, destDataNode);
Expand Down Expand Up @@ -138,10 +138,10 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, AddRegionPeerState s
setKillPoint(state);
LOGGER.info("[pid{}][AddRegion] state {} complete", getProcId(), state);
LOGGER.info(
"[pid{}][AddRegion] success, region {} has been added to DataNode {}. Procedure took {} (start at {}).",
"[pid{}][AddRegion] success, {} has been added to DataNode {}. Procedure took {} (start at {}).",
getProcId(),
consensusGroupId.getId(),
destDataNode.getDataNodeId(),
consensusGroupId,
handler.simplifiedLocation(destDataNode),
CommonDateTimeUtils.convertMillisecondToDurationStr(
System.currentTimeMillis() - getSubmittedTime()),
DateTimeUtils.convertLongToDate(getSubmittedTime(), "ms"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, RegionTransitionStat
switch (state) {
case REGION_MIGRATE_PREPARE:
LOGGER.info(
"[pid{}][MigrateRegion] started, region {} will be migrated from DataNode {} to {}.",
"[pid{}][MigrateRegion] started, {} will be migrated from DataNode {} to {}.",
getProcId(),
consensusGroupId.getId(),
originalDataNode.getDataNodeId(),
destDataNode.getDataNodeId());
consensusGroupId,
handler.simplifiedLocation(originalDataNode),
handler.simplifiedLocation(destDataNode));
setNextState(RegionTransitionState.ADD_REGION_PEER);
break;
case ADD_REGION_PEER:
Expand All @@ -113,23 +113,23 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, RegionTransitionStat
setNextState(RegionTransitionState.CHECK_REMOVE_REGION_PEER);
break;
case CHECK_REMOVE_REGION_PEER:
String cleanHint = "";
if (env.getConfigManager()
.getPartitionManager()
.isDataNodeContainsRegion(originalDataNode.getDataNodeId(), consensusGroupId)) {
LOGGER.warn(
"[pid{}][MigrateRegion] success, but you may need to manually clean the old region to make everything works fine",
getProcId());
} else {
LOGGER.info(
"[pid{}][MigrateRegion] success, region {} has been migrated from DataNode {} to {}. Procedure took {} (started at {})",
getProcId(),
consensusGroupId.getId(),
originalDataNode.getDataNodeId(),
destDataNode.getDataNodeId(),
CommonDateTimeUtils.convertMillisecondToDurationStr(
System.currentTimeMillis() - getSubmittedTime()),
DateTimeUtils.convertLongToDate(getSubmittedTime(), "ms"));
cleanHint =
"but you may need to restart the related DataNode to make sure everything is cleaned up. ";
}
LOGGER.info(
"[pid{}][MigrateRegion] success,{} {} has been migrated from DataNode {} to {}. Procedure took {} (started at {}).",
getProcId(),
cleanHint,
consensusGroupId,
handler.simplifiedLocation(originalDataNode),
handler.simplifiedLocation(destDataNode),
CommonDateTimeUtils.convertMillisecondToDurationStr(
System.currentTimeMillis() - getSubmittedTime()),
DateTimeUtils.convertLongToDate(getSubmittedTime(), "ms"));
return Flow.NO_MORE_STATE;
default:
throw new ProcedureException("Unsupported state: " + state.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,23 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, RemoveRegionPeerStat
setKillPoint(state);
if (tsStatus.getCode() != SUCCESS_STATUS.getStatusCode()) {
LOGGER.warn(
"[pid{}][RemoveRegion] {} task submitted failed, procedure will continue. You should manually clear peer list.",
"[pid{}][RemoveRegion] {} task submitted failed, ConfigNode believe current peer list of {} is {}. Procedure will continue. You should manually clear peer list.",
getProcId(),
state);
state,
consensusGroupId,
handler.getRegionReplicaSetString(consensusGroupId));
setNextState(DELETE_OLD_REGION_PEER);
return Flow.HAS_MORE_STATE;
}
TRegionMigrateResult removeRegionPeerResult =
handler.waitTaskFinish(this.getProcId(), coordinator);
if (removeRegionPeerResult.getTaskStatus() != TRegionMaintainTaskStatus.SUCCESS) {
LOGGER.warn(
"[pid{}][RemoveRegion] {} executed failed, procedure will continue. You should manually clear peer list.",
"[pid{}][RemoveRegion] {} executed failed, ConfigNode believe current peer list of {} is {}. Procedure will continue. You should manually clear peer list.",
getProcId(),
state);
state,
consensusGroupId,
handler.getRegionReplicaSetString(consensusGroupId));
setNextState(DELETE_OLD_REGION_PEER);
return Flow.HAS_MORE_STATE;
}
Expand All @@ -132,17 +136,19 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, RemoveRegionPeerStat
setKillPoint(state);
if (tsStatus.getCode() != SUCCESS_STATUS.getStatusCode()) {
LOGGER.warn(
"[pid{}][RemoveRegion] DELETE_OLD_REGION_PEER task submitted failed, procedure will continue. You should manually delete region file.",
getProcId());
"[pid{}][RemoveRegion] DELETE_OLD_REGION_PEER task submitted failed, procedure will continue. You should manually delete region file. {}",
getProcId(),
consensusGroupId);
setNextState(REMOVE_REGION_LOCATION_CACHE);
return Flow.HAS_MORE_STATE;
}
TRegionMigrateResult deleteOldRegionPeerResult =
handler.waitTaskFinish(this.getProcId(), targetDataNode);
if (deleteOldRegionPeerResult.getTaskStatus() != TRegionMaintainTaskStatus.SUCCESS) {
LOGGER.warn(
"[pid{}][RemoveRegion] DELETE_OLD_REGION_PEER executed failed, procedure will continue. You should manually delete region file.",
getProcId());
"[pid{}][RemoveRegion] DELETE_OLD_REGION_PEER executed failed, procedure will continue. You should manually delete region file. {}",
getProcId(),
consensusGroupId);
setNextState(REMOVE_REGION_LOCATION_CACHE);
return Flow.HAS_MORE_STATE;
}
Expand Down

0 comments on commit 0b74512

Please sign in to comment.