Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DiskTimeAvailable scaling option to BlobStorage configuration, improve burst threshold configuration #2530

Merged
merged 1 commit into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,19 @@ namespace NKikimr::NStorage {
vdiskConfig->EnableVPatch = EnableVPatch;
vdiskConfig->FeatureFlags = Cfg->FeatureFlags;

if (Cfg->BlobStorageConfig.HasCostMetricsSettings()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StorageConfig.GetBlobStorageConfig()

for (auto type : Cfg->BlobStorageConfig.GetCostMetricsSettings().GetVDiskTypes()) {
if (type.HasPDiskType() && deviceType == PDiskTypeToPDiskType(type.GetPDiskType())) {
if (type.HasBurstThresholdNs()) {
vdiskConfig->BurstThresholdNs = type.GetBurstThresholdNs();
}
if (type.HasDiskTimeAvailableScale()) {
vdiskConfig->DiskTimeAvailableScale = type.GetDiskTimeAvailableScale();
}
}
}
}

// issue initial report to whiteboard before creating actor to avoid races
Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(vdiskId, groupInfo->GetStoragePoolName(),
vslotId.PDiskId, vslotId.VDiskSlotId, pdiskGuid, kind, donorMode, whiteboardInstanceGuid, std::move(donors)));
Expand Down
28 changes: 10 additions & 18 deletions ydb/core/blobstorage/ut_blobstorage/lib/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct TEnvironmentSetup {
const TFeatureFlags FeatureFlags;
const NPDisk::EDeviceType DiskType = NPDisk::EDeviceType::DEVICE_TYPE_NVME;
const ui32 BurstThresholdNs = 0;
const TString VDiskKind = "";
const float DiskTimeAvailableScale = 1;
};

const TSettings Settings;
Expand Down Expand Up @@ -325,16 +325,16 @@ struct TEnvironmentSetup {
config->CacheAccessor = std::make_unique<TAccessor>(Cache[nodeId]);
}
config->FeatureFlags = Settings.FeatureFlags;
if (Settings.VDiskKind) {
NKikimrBlobStorage::TAllVDiskKinds vdiskConfig;
auto* kind = vdiskConfig.AddVDiskKinds();
kind->SetKind(NKikimrBlobStorage::TVDiskKind::Test1);

{
auto* type = config->BlobStorageConfig.MutableCostMetricsSettings()->AddVDiskTypes();
type->SetPDiskType(NKikimrBlobStorage::EPDiskType::ROT);
if (Settings.BurstThresholdNs) {
kind->MutableConfig()->SetBurstThresholdNs(Settings.BurstThresholdNs);
type->SetBurstThresholdNs(Settings.BurstThresholdNs);
}

config->AllVDiskKinds = MakeIntrusive<TAllVDiskKinds>(vdiskConfig);
type->SetDiskTimeAvailableScale(Settings.DiskTimeAvailableScale);
}

warden.reset(CreateBSNodeWarden(config));
}

Expand Down Expand Up @@ -419,11 +419,7 @@ struct TEnvironmentSetup {
cmd2->SetName(StoragePoolName);
cmd2->SetKind(StoragePoolName);
cmd2->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure()));
if (Settings.VDiskKind) {
cmd2->SetVDiskKind(Settings.VDiskKind);
} else {
cmd2->SetVDiskKind("Default");
}
cmd2->SetVDiskKind("Default");
cmd2->SetNumGroups(numGroups ? numGroups : NumGroups);
cmd2->AddPDiskFilter()->AddProperty()->SetType(pdiskType);
if (Settings.Encryption) {
Expand All @@ -443,11 +439,7 @@ struct TEnvironmentSetup {
cmd->SetName(poolName);
cmd->SetKind(poolName);
cmd->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure()));
if (Settings.VDiskKind) {
cmd->SetVDiskKind(Settings.VDiskKind);
} else {
cmd->SetVDiskKind("Default");
}
cmd->SetVDiskKind("Default");
cmd->SetNumGroups(1);
cmd->AddPDiskFilter()->AddProperty()->SetType(NKikimrBlobStorage::EPDiskType::ROT);
if (Settings.Encryption) {
Expand Down
38 changes: 34 additions & 4 deletions ydb/core/blobstorage/ut_blobstorage/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ ui64 AggregateVDiskCounters(std::unique_ptr<TEnvironmentSetup>& env, TString sto

void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr<TEnvironmentSetup>& env,
ui32& groupSize, TBlobStorageGroupType& groupType, ui32& groupId, std::vector<ui32>& pdiskLayout,
ui32 burstThresholdNs = 0, TString vdiskKind = "") {
ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) {
groupSize = topology.TotalVDisks;
groupType = topology.GType;
env.reset(new TEnvironmentSetup({
.NodeCount = groupSize,
.Erasure = groupType,
.DiskType = NPDisk::EDeviceType::DEVICE_TYPE_ROT,
.BurstThresholdNs = burstThresholdNs,
.VDiskKind = vdiskKind,
.DiskTimeAvailableScale = diskTimeAvailableScale,
}));

env->CreateBoxAndPool(1, 1);
Expand Down Expand Up @@ -248,15 +248,16 @@ enum class ELoadDistribution : ui8 {

template <typename TInflightActor>
void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution loadDistribution,
ui32 burstThresholdNs = 0) {
ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) {
TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true);
auto* actor = new TInflightActor({requests, inflight, delay}, 8_MB);
std::unique_ptr<TEnvironmentSetup> env;
ui32 groupSize;
TBlobStorageGroupType groupType;
ui32 groupId;
std::vector<ui32> pdiskLayout;
SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs, "Test1");
SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs,
diskTimeAvailableScale);

actor->SetGroupId(groupId);
env->Runtime->Register(actor, 1);
Expand Down Expand Up @@ -286,4 +287,33 @@ Y_UNIT_TEST_SUITE(BurstDetection) {
}
}

void TestDiskTimeAvailableScaling() {
auto measure = [](float scale) {
TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true);
std::unique_ptr<TEnvironmentSetup> env;
ui32 groupSize;
TBlobStorageGroupType groupType;
ui32 groupId;
std::vector<ui32> pdiskLayout;
SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, 0, scale);

return AggregateVDiskCounters(env, env->StoragePoolName, groupSize, groupId, pdiskLayout,
"advancedCost", "DiskTimeAvailable");
};

i64 test1 = measure(1);
i64 test2 = measure(2);

i64 delta = test1 * 2 - test2;

UNIT_ASSERT_LE_C(std::abs(delta), 10, "Total time available: with scale=1 time=" << test1 <<
", with scale=2 time=" << test2);
}

Y_UNIT_TEST_SUITE(DiskTimeAvailable) {
Y_UNIT_TEST(Scaling) {
TestDiskTimeAvailableScaling();
}
}

#undef MAKE_BURST_TEST
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,19 @@ class TBsCostModelMirror3of4 : public TBsCostModelBase {
};

TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType,
const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs)
const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs,
float diskTimeAvailableScale)
: GroupType(groupType)
, CostCounters(counters->GetSubgroup("subsystem", "advancedCost"))
, UserDiskCost(CostCounters->GetCounter("UserDiskCost", true))
, CompactionDiskCost(CostCounters->GetCounter("CompactionDiskCost", true))
, ScrubDiskCost(CostCounters->GetCounter("ScrubDiskCost", true))
, DefragDiskCost(CostCounters->GetCounter("DefragDiskCost", true))
, InternalDiskCost(CostCounters->GetCounter("InternalDiskCost", true))
, BucketCapacity(burstThresholdNs / GroupType.BlobSubgroupSize())
, Bucket(&DiskTimeAvailableNs, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true)
, DiskTimeAvailableCtr(CostCounters->GetCounter("DiskTimeAvailable", false))
, BucketCapacity(burstThresholdNs * diskTimeAvailableScale / GroupType.BlobSubgroupSize())
, Bucket(&DiskTimeAvailable, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true)
, DiskTimeAvailableScale(diskTimeAvailableScale)
{
BurstDetector.Initialize(CostCounters, "BurstDetector");
switch (GroupType.GetErasure()) {
Expand Down
11 changes: 8 additions & 3 deletions ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,17 +317,20 @@ class TBsCostTracker {
::NMonitoring::TDynamicCounters::TCounterPtr ScrubDiskCost;
::NMonitoring::TDynamicCounters::TCounterPtr DefragDiskCost;
::NMonitoring::TDynamicCounters::TCounterPtr InternalDiskCost;
::NMonitoring::TDynamicCounters::TCounterPtr DiskTimeAvailableCtr;

TAtomic BucketCapacity; // 10^9 nsec
TAtomic DiskTimeAvailableNs = 1'000'000'000;
TAtomic DiskTimeAvailable = 1'000'000'000;
TBucketQuoter<i64, TSpinLock, TAppDataTimerMs<TInstantTimerMs>> Bucket;
TLight BurstDetector;
std::atomic<ui64> SeqnoBurstDetector = 0;
static constexpr ui32 ConcurrentHugeRequestsAllowed = 3;
float DiskTimeAvailableScale = 1;

public:
TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType,
const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs);
const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs,
float diskTimeAvailableScale);

template<class TEv>
ui64 GetCost(const TEv& ev) const {
Expand All @@ -353,7 +356,9 @@ class TBsCostTracker {
}

void SetTimeAvailable(ui32 diskTimeAvailableNSec) {
AtomicSet(DiskTimeAvailableNs, diskTimeAvailableNSec);
ui64 diskTimeAvailable = diskTimeAvailableNSec * DiskTimeAvailableScale;
AtomicSet(DiskTimeAvailable, diskTimeAvailable);
*DiskTimeAvailableCtr = diskTimeAvailable;
}

public:
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/blobstorage/vdisk/common/vdisk_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ namespace NKikimr {
#endif

BurstThresholdNs = NPDisk::DevicePerformance.at(baseInfo.DeviceType).BurstThresholdNs;
DiskTimeAvailableScale = 1;
}

void TVDiskConfig::SetupHugeBytes() {
Expand Down Expand Up @@ -163,7 +164,6 @@ namespace NKikimr {

UPDATE_MACRO(BarrierValidation);

UPDATE_MACRO(BurstThresholdNs);
#undef UPDATE_MACRO
}

Expand Down
3 changes: 3 additions & 0 deletions ydb/core/blobstorage/vdisk/common/vdisk_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,10 @@ namespace NKikimr {
TDuration WhiteboardUpdateInterval;
bool EnableVDiskCooldownTimeout;
TControlWrapper EnableVPatch = true;

///////////// COST METRICS SETTINGS ////////////////
ui64 BurstThresholdNs = 1'000'000'000;
float DiskTimeAvailableScale = 1;

///////////// FEATURE FLAGS ////////////////////////
NKikimrConfig::TFeatureFlags FeatureFlags;
Expand Down
6 changes: 4 additions & 2 deletions ydb/core/blobstorage/vdisk/common/vdisk_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ namespace NKikimr {
TReplQuoter::TPtr replPDiskWriteQuoter,
TReplQuoter::TPtr replNodeRequestQuoter,
TReplQuoter::TPtr replNodeResponseQuoter,
ui64 burstThresholdNs)
ui64 burstThresholdNs,
float diskTimeAvailableScale)
: TBSProxyContext(vdiskCounters->GetSubgroup("subsystem", "memhull"))
, VDiskActorId(vdiskActorId)
, Top(std::move(top))
Expand All @@ -58,7 +59,8 @@ namespace NKikimr {
, ReplPDiskWriteQuoter(std::move(replPDiskWriteQuoter))
, ReplNodeRequestQuoter(std::move(replNodeRequestQuoter))
, ReplNodeResponseQuoter(std::move(replNodeResponseQuoter))
, CostTracker(std::make_shared<TBsCostTracker>(Top->GType, type, vdiskCounters, burstThresholdNs))
, CostTracker(std::make_shared<TBsCostTracker>(Top->GType, type, vdiskCounters, burstThresholdNs,
diskTimeAvailableScale))
, OutOfSpaceState(Top->GetTotalVDisksNum(), Top->GetOrderNumber(ShortSelfVDisk))
, CostMonGroup(vdiskCounters, "subsystem", "cost")
, Logger(as ? ActorSystemLogger(as) : DevNullLogger())
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/vdisk/common/vdisk_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ namespace NKikimr {
TReplQuoter::TPtr replPDiskWriteQuoter = nullptr,
TReplQuoter::TPtr replNodeRequestQuoter = nullptr,
TReplQuoter::TPtr replNodeResponseQuoter = nullptr,
ui64 burstThresholdNs = 1'000'000'000);
ui64 burstThresholdNs = 1'000'000'000,
float diskTimeAvailableScale = 1);

// The function checks response from PDisk. Normally, it's OK.
// Other alternatives are: 1) shutdown; 2) FAIL
Expand Down
4 changes: 4 additions & 0 deletions ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2157,6 +2157,10 @@ namespace NKikimr {
TABLED() {str << "BurstThresholdNs";}
TABLED() {str << Config->BurstThresholdNs;}
}
TABLER() {
TABLED() {str << "DiskTimeAvailableScale";}
TABLED() {str << Config->DiskTimeAvailableScale;}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ namespace NKikimr {
VCtx = MakeIntrusive<TVDiskContext>(ctx.SelfID, GInfo->PickTopology(), VDiskCounters, SelfVDiskId,
ctx.ExecutorThread.ActorSystem, baseInfo.DeviceType, baseInfo.DonorMode,
baseInfo.ReplPDiskReadQuoter, baseInfo.ReplPDiskWriteQuoter, baseInfo.ReplNodeRequestQuoter,
baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs);
baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs, Config->DiskTimeAvailableScale);

// create IntQueues
IntQueueAsyncGets = std::make_unique<TIntQueueClass>(
Expand Down
2 changes: 0 additions & 2 deletions ydb/core/protos/blobstorage_vdisk_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ message TVDiskConfig {

optional bool BarrierValidation = 60;
optional bool EnableOverseerLsnReporting = 61; // deprecated

optional uint64 BurstThresholdNs = 62;
};

// organizes hierarchy of VDisk configs: VDisk config may have a base config,
Expand Down
13 changes: 13 additions & 0 deletions ydb/core/protos/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import "ydb/core/fq/libs/config/protos/fq_config.proto";
import "ydb/core/protos/alloc.proto";
import "ydb/core/protos/auth.proto";
import "ydb/core/protos/blobstorage.proto";
import "ydb/core/protos/blobstorage_base3.proto";
import "ydb/core/protos/blobstorage_config.proto";
import "ydb/core/protos/blobstorage_pdisk_config.proto";
import "ydb/core/protos/blobstorage_vdisk_config.proto";
Expand Down Expand Up @@ -280,6 +281,18 @@ message TBlobStorageConfig {
}

optional TAutoconfigSettings AutoconfigSettings = 6;

message TCostMetricsConfig {
optional NKikimrBlobStorage.EPDiskType PDiskType = 1;
optional uint64 BurstThresholdNs = 2;
optional float DiskTimeAvailableScale = 3;
}

message TCostMetricsSettings {
repeated TCostMetricsConfig VDiskTypes = 1;
};

optional TCostMetricsSettings CostMetricsSettings = 7;
}

message TBlobStorageFormatConfig {
Expand Down
Loading