Skip to content

Commit

Permalink
move time difference issue
Browse files Browse the repository at this point in the history
  • Loading branch information
StekPerepolnen committed Jun 23, 2024
1 parent c1ccdda commit 4568f24
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
22 changes: 15 additions & 7 deletions ydb/core/health_check/health_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2579,15 +2579,17 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
databaseStatus.set_name(path);
FillCompute(state, *databaseStatus.mutable_compute(), {&dbContext, "COMPUTE"});
FillStorage(state, *databaseStatus.mutable_storage(), {&dbContext, "STORAGE"});
FillTimeDifference(state, {&dbContext, "NODES_TIME_DIFFERENCE"});
FillTimeDifference(state, *databaseStatus.mutable_time_difference(), {&dbContext, "NODES_TIME_DIFFERENCE"});
if (databaseStatus.compute().overall() != Ydb::Monitoring::StatusFlag::GREEN
&& databaseStatus.storage().overall() != Ydb::Monitoring::StatusFlag::GREEN) {
dbContext.ReportStatus(MaxStatus(databaseStatus.compute().overall(), databaseStatus.storage().overall()),
"Database has multiple issues", ETags::DBState, { ETags::ComputeState, ETags::StorageState});
"Database has multiple issues", ETags::DBState, { ETags::ComputeState, ETags::StorageState, ETags::SyncState });
} else if (databaseStatus.compute().overall() != Ydb::Monitoring::StatusFlag::GREEN) {
dbContext.ReportStatus(databaseStatus.compute().overall(), "Database has compute issues", ETags::DBState, {ETags::ComputeState});
dbContext.ReportStatus(databaseStatus.compute().overall(), "Database has compute issues", ETags::DBState, {ETags::ComputeState, ETags::SyncState});
} else if (databaseStatus.storage().overall() != Ydb::Monitoring::StatusFlag::GREEN) {
dbContext.ReportStatus(databaseStatus.storage().overall(), "Database has storage issues", ETags::DBState, {ETags::StorageState});
dbContext.ReportStatus(databaseStatus.storage().overall(), "Database has storage issues", ETags::DBState, {ETags::StorageState, ETags::SyncState});
} else if (databaseStatus.time_difference().overall() != Ydb::Monitoring::StatusFlag::GREEN) {
dbContext.ReportStatus(databaseStatus.time_difference().overall(), "Database has time difference issues", ETags::DBState, {ETags::SyncState});
}
databaseStatus.set_overall(dbContext.GetOverallStatus());
context.UpdateMaxStatus(dbContext.GetOverallStatus());
Expand All @@ -2597,10 +2599,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
}
}

const TDuration MAX_CLOCKSKEW_ORANGE_ISSUE_TIME = TDuration::MicroSeconds(25000);
const TDuration MAX_CLOCKSKEW_YELLOW_ISSUE_TIME = TDuration::MicroSeconds(5000);
const TDuration MAX_CLOCKSKEW_ORANGE_ISSUE_TIME = TDuration::MicroSeconds(25);
const TDuration MAX_CLOCKSKEW_YELLOW_ISSUE_TIME = TDuration::MicroSeconds(5);

void FillTimeDifference(TDatabaseState& databaseState, TSelfCheckContext context) {
void FillTimeDifference(TDatabaseState& databaseState, Ydb::Monitoring::TimeDifferenceStatus& timeDifferenceStatus, TSelfCheckContext context) {
long maxClockSkewUs = 0;
TNodeId maxClockSkewPeerId = 0;
TNodeId maxClockSkewNodeId = 0;
Expand Down Expand Up @@ -2630,6 +2632,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
}

if (!maxClockSkewNodeId) {
timeDifferenceStatus.set_overall(Ydb::Monitoring::StatusFlag::GREEN);
return;
}

Expand All @@ -2644,6 +2647,11 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
} else {
context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN);
}

timeDifferenceStatus.set_node(ToString(maxClockSkewNodeId));
timeDifferenceStatus.set_peer(ToString(maxClockSkewPeerId));
timeDifferenceStatus.set_max_difference_ms(maxClockSkewTime.MilliSeconds());
timeDifferenceStatus.set_overall(context.GetOverallStatus());
}

void FillResult(TOverallStateContext context) {
Expand Down
8 changes: 8 additions & 0 deletions ydb/public/api/protos/ydb_monitoring.proto
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ message ComputeStatus {
float shards_quota_usage = 5;
}

message TimeDifferenceStatus {
StatusFlag.Status overall = 1;
int64 max_difference_ms = 2;
string node = 3;
string peer = 4;
}

message LocationNode {
uint32 id = 1;
string host = 2;
Expand Down Expand Up @@ -198,6 +205,7 @@ message DatabaseStatus {
StatusFlag.Status overall = 2;
StorageStatus storage = 3;
ComputeStatus compute = 4;
TimeDifferenceStatus time_difference = 5;
}

message SelfCheckResult {
Expand Down

0 comments on commit 4568f24

Please sign in to comment.