From 64276245c8686253a736e2b5bae43f88b47b5670 Mon Sep 17 00:00:00 2001 From: Sergey Belyakov Date: Wed, 6 Mar 2024 21:00:24 +0000 Subject: [PATCH] Add DiskTimeAvailable scaling option to BlobStorage configuration, improve burst threshold configuration --- .../nodewarden/node_warden_vdisk.cpp | 13 +++++++ ydb/core/blobstorage/ut_blobstorage/lib/env.h | 28 +++++--------- .../blobstorage/ut_blobstorage/monitoring.cpp | 38 +++++++++++++++++-- .../vdisk/common/blobstorage_cost_tracker.cpp | 9 +++-- .../vdisk/common/blobstorage_cost_tracker.h | 11 ++++-- .../blobstorage/vdisk/common/vdisk_config.cpp | 2 +- .../blobstorage/vdisk/common/vdisk_config.h | 3 ++ .../vdisk/common/vdisk_context.cpp | 6 ++- .../blobstorage/vdisk/common/vdisk_context.h | 3 +- .../vdisk/skeleton/blobstorage_skeleton.cpp | 4 ++ .../skeleton/blobstorage_skeletonfront.cpp | 2 +- .../protos/blobstorage_vdisk_config.proto | 2 - ydb/core/protos/config.proto | 13 +++++++ 13 files changed, 99 insertions(+), 35 deletions(-) diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp index 781dd6070fba..eeea89001a2f 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp @@ -176,6 +176,19 @@ namespace NKikimr::NStorage { vdiskConfig->EnableVPatch = EnableVPatch; vdiskConfig->FeatureFlags = Cfg->FeatureFlags; + if (Cfg->BlobStorageConfig.HasCostMetricsSettings()) { + for (auto type : Cfg->BlobStorageConfig.GetCostMetricsSettings().GetVDiskTypes()) { + if (type.HasPDiskType() && deviceType == PDiskTypeToPDiskType(type.GetPDiskType())) { + if (type.HasBurstThresholdNs()) { + vdiskConfig->BurstThresholdNs = type.GetBurstThresholdNs(); + } + if (type.HasDiskTimeAvailableScale()) { + vdiskConfig->DiskTimeAvailableScale = type.GetDiskTimeAvailableScale(); + } + } + } + } + // issue initial report to whiteboard before creating actor to avoid races Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(vdiskId, groupInfo->GetStoragePoolName(), vslotId.PDiskId, vslotId.VDiskSlotId, pdiskGuid, kind, donorMode, whiteboardInstanceGuid, std::move(donors))); diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h index 8be7af96c4d9..c774c2350353 100644 --- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h +++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h @@ -40,7 +40,7 @@ struct TEnvironmentSetup { const TFeatureFlags FeatureFlags; const NPDisk::EDeviceType DiskType = NPDisk::EDeviceType::DEVICE_TYPE_NVME; const ui32 BurstThresholdNs = 0; - const TString VDiskKind = ""; + const float DiskTimeAvailableScale = 1; }; const TSettings Settings; @@ -325,16 +325,16 @@ struct TEnvironmentSetup { config->CacheAccessor = std::make_unique(Cache[nodeId]); } config->FeatureFlags = Settings.FeatureFlags; - if (Settings.VDiskKind) { - NKikimrBlobStorage::TAllVDiskKinds vdiskConfig; - auto* kind = vdiskConfig.AddVDiskKinds(); - kind->SetKind(NKikimrBlobStorage::TVDiskKind::Test1); + + { + auto* type = config->BlobStorageConfig.MutableCostMetricsSettings()->AddVDiskTypes(); + type->SetPDiskType(NKikimrBlobStorage::EPDiskType::ROT); if (Settings.BurstThresholdNs) { - kind->MutableConfig()->SetBurstThresholdNs(Settings.BurstThresholdNs); + type->SetBurstThresholdNs(Settings.BurstThresholdNs); } - - config->AllVDiskKinds = MakeIntrusive(vdiskConfig); + type->SetDiskTimeAvailableScale(Settings.DiskTimeAvailableScale); } + warden.reset(CreateBSNodeWarden(config)); } @@ -419,11 +419,7 @@ struct TEnvironmentSetup { cmd2->SetName(StoragePoolName); cmd2->SetKind(StoragePoolName); cmd2->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure())); - if (Settings.VDiskKind) { - cmd2->SetVDiskKind(Settings.VDiskKind); - } else { - cmd2->SetVDiskKind("Default"); - } + cmd2->SetVDiskKind("Default"); cmd2->SetNumGroups(numGroups ? numGroups : NumGroups); cmd2->AddPDiskFilter()->AddProperty()->SetType(pdiskType); if (Settings.Encryption) { @@ -443,11 +439,7 @@ struct TEnvironmentSetup { cmd->SetName(poolName); cmd->SetKind(poolName); cmd->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure())); - if (Settings.VDiskKind) { - cmd->SetVDiskKind(Settings.VDiskKind); - } else { - cmd->SetVDiskKind("Default"); - } + cmd->SetVDiskKind("Default"); cmd->SetNumGroups(1); cmd->AddPDiskFilter()->AddProperty()->SetType(NKikimrBlobStorage::EPDiskType::ROT); if (Settings.Encryption) { diff --git a/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp b/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp index c4e3d42f7701..db60b5eef96b 100644 --- a/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp +++ b/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp @@ -34,7 +34,7 @@ ui64 AggregateVDiskCounters(std::unique_ptr& env, TString sto void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr& env, ui32& groupSize, TBlobStorageGroupType& groupType, ui32& groupId, std::vector& pdiskLayout, - ui32 burstThresholdNs = 0, TString vdiskKind = "") { + ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) { groupSize = topology.TotalVDisks; groupType = topology.GType; env.reset(new TEnvironmentSetup({ @@ -42,7 +42,7 @@ void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr< .Erasure = groupType, .DiskType = NPDisk::EDeviceType::DEVICE_TYPE_ROT, .BurstThresholdNs = burstThresholdNs, - .VDiskKind = vdiskKind, + .DiskTimeAvailableScale = diskTimeAvailableScale, })); env->CreateBoxAndPool(1, 1); @@ -248,7 +248,7 @@ enum class ELoadDistribution : ui8 { template void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution loadDistribution, - ui32 burstThresholdNs = 0) { + ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) { TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true); auto* actor = new TInflightActor({requests, inflight, delay}, 8_MB); std::unique_ptr env; @@ -256,7 +256,8 @@ void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution TBlobStorageGroupType groupType; ui32 groupId; std::vector pdiskLayout; - SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs, "Test1"); + SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs, + diskTimeAvailableScale); actor->SetGroupId(groupId); env->Runtime->Register(actor, 1); @@ -286,4 +287,33 @@ Y_UNIT_TEST_SUITE(BurstDetection) { } } +void TestDiskTimeAvailableScaling() { + auto measure = [](float scale) { + TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true); + std::unique_ptr env; + ui32 groupSize; + TBlobStorageGroupType groupType; + ui32 groupId; + std::vector pdiskLayout; + SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, 0, scale); + + return AggregateVDiskCounters(env, env->StoragePoolName, groupSize, groupId, pdiskLayout, + "advancedCost", "DiskTimeAvailable"); + }; + + i64 test1 = measure(1); + i64 test2 = measure(2); + + i64 delta = test1 * 2 - test2; + + UNIT_ASSERT_LE_C(std::abs(delta), 10, "Total time available: with scale=1 time=" << test1 << + ", with scale=2 time=" << test2); +} + +Y_UNIT_TEST_SUITE(DiskTimeAvailable) { + Y_UNIT_TEST(Scaling) { + TestDiskTimeAvailableScaling(); + } +} + #undef MAKE_BURST_TEST diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp index 188fd70c0521..f4a0ec15f4db 100644 --- a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp +++ b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp @@ -42,7 +42,8 @@ class TBsCostModelMirror3of4 : public TBsCostModelBase { }; TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType, - const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs) + const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs, + float diskTimeAvailableScale) : GroupType(groupType) , CostCounters(counters->GetSubgroup("subsystem", "advancedCost")) , UserDiskCost(CostCounters->GetCounter("UserDiskCost", true)) @@ -50,8 +51,10 @@ TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::E , ScrubDiskCost(CostCounters->GetCounter("ScrubDiskCost", true)) , DefragDiskCost(CostCounters->GetCounter("DefragDiskCost", true)) , InternalDiskCost(CostCounters->GetCounter("InternalDiskCost", true)) - , BucketCapacity(burstThresholdNs / GroupType.BlobSubgroupSize()) - , Bucket(&DiskTimeAvailableNs, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true) + , DiskTimeAvailableCtr(CostCounters->GetCounter("DiskTimeAvailable", false)) + , BucketCapacity(burstThresholdNs * diskTimeAvailableScale / GroupType.BlobSubgroupSize()) + , Bucket(&DiskTimeAvailable, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true) + , DiskTimeAvailableScale(diskTimeAvailableScale) { BurstDetector.Initialize(CostCounters, "BurstDetector"); switch (GroupType.GetErasure()) { diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h index c8ead3dc52bf..115392cc9ab5 100644 --- a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h +++ b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h @@ -317,17 +317,20 @@ class TBsCostTracker { ::NMonitoring::TDynamicCounters::TCounterPtr ScrubDiskCost; ::NMonitoring::TDynamicCounters::TCounterPtr DefragDiskCost; ::NMonitoring::TDynamicCounters::TCounterPtr InternalDiskCost; + ::NMonitoring::TDynamicCounters::TCounterPtr DiskTimeAvailableCtr; TAtomic BucketCapacity; // 10^9 nsec - TAtomic DiskTimeAvailableNs = 1'000'000'000; + TAtomic DiskTimeAvailable = 1'000'000'000; TBucketQuoter> Bucket; TLight BurstDetector; std::atomic SeqnoBurstDetector = 0; static constexpr ui32 ConcurrentHugeRequestsAllowed = 3; + float DiskTimeAvailableScale = 1; public: TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType, - const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs); + const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs, + float diskTimeAvailableScale); template ui64 GetCost(const TEv& ev) const { @@ -353,7 +356,9 @@ class TBsCostTracker { } void SetTimeAvailable(ui32 diskTimeAvailableNSec) { - AtomicSet(DiskTimeAvailableNs, diskTimeAvailableNSec); + ui64 diskTimeAvailable = diskTimeAvailableNSec * DiskTimeAvailableScale; + AtomicSet(DiskTimeAvailable, diskTimeAvailable); + *DiskTimeAvailableCtr = diskTimeAvailable; } public: diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp index 32c3647fc4b0..18f5e5735aef 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp @@ -121,6 +121,7 @@ namespace NKikimr { #endif BurstThresholdNs = NPDisk::DevicePerformance.at(baseInfo.DeviceType).BurstThresholdNs; + DiskTimeAvailableScale = 1; } void TVDiskConfig::SetupHugeBytes() { @@ -163,7 +164,6 @@ namespace NKikimr { UPDATE_MACRO(BarrierValidation); - UPDATE_MACRO(BurstThresholdNs); #undef UPDATE_MACRO } diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.h b/ydb/core/blobstorage/vdisk/common/vdisk_config.h index f613e48360de..a2d5bd28b191 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.h @@ -209,7 +209,10 @@ namespace NKikimr { TDuration WhiteboardUpdateInterval; bool EnableVDiskCooldownTimeout; TControlWrapper EnableVPatch = true; + + ///////////// COST METRICS SETTINGS //////////////// ui64 BurstThresholdNs = 1'000'000'000; + float DiskTimeAvailableScale = 1; ///////////// FEATURE FLAGS //////////////////////// NKikimrConfig::TFeatureFlags FeatureFlags; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp index 7068c670d489..7afc10652dfb 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp @@ -31,7 +31,8 @@ namespace NKikimr { TReplQuoter::TPtr replPDiskWriteQuoter, TReplQuoter::TPtr replNodeRequestQuoter, TReplQuoter::TPtr replNodeResponseQuoter, - ui64 burstThresholdNs) + ui64 burstThresholdNs, + float diskTimeAvailableScale) : TBSProxyContext(vdiskCounters->GetSubgroup("subsystem", "memhull")) , VDiskActorId(vdiskActorId) , Top(std::move(top)) @@ -58,7 +59,8 @@ namespace NKikimr { , ReplPDiskWriteQuoter(std::move(replPDiskWriteQuoter)) , ReplNodeRequestQuoter(std::move(replNodeRequestQuoter)) , ReplNodeResponseQuoter(std::move(replNodeResponseQuoter)) - , CostTracker(std::make_shared(Top->GType, type, vdiskCounters, burstThresholdNs)) + , CostTracker(std::make_shared(Top->GType, type, vdiskCounters, burstThresholdNs, + diskTimeAvailableScale)) , OutOfSpaceState(Top->GetTotalVDisksNum(), Top->GetOrderNumber(ShortSelfVDisk)) , CostMonGroup(vdiskCounters, "subsystem", "cost") , Logger(as ? ActorSystemLogger(as) : DevNullLogger()) diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.h b/ydb/core/blobstorage/vdisk/common/vdisk_context.h index 4982b1cac202..cdf099729f32 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_context.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.h @@ -101,7 +101,8 @@ namespace NKikimr { TReplQuoter::TPtr replPDiskWriteQuoter = nullptr, TReplQuoter::TPtr replNodeRequestQuoter = nullptr, TReplQuoter::TPtr replNodeResponseQuoter = nullptr, - ui64 burstThresholdNs = 1'000'000'000); + ui64 burstThresholdNs = 1'000'000'000, + float diskTimeAvailableScale = 1); // The function checks response from PDisk. Normally, it's OK. // Other alternatives are: 1) shutdown; 2) FAIL diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp index 03bed8d88fb9..47b6b14f35d3 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp @@ -2157,6 +2157,10 @@ namespace NKikimr { TABLED() {str << "BurstThresholdNs";} TABLED() {str << Config->BurstThresholdNs;} } + TABLER() { + TABLED() {str << "DiskTimeAvailableScale";} + TABLED() {str << Config->DiskTimeAvailableScale;} + } } } diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp index b27176250edf..d2101a87a83d 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp @@ -716,7 +716,7 @@ namespace NKikimr { VCtx = MakeIntrusive(ctx.SelfID, GInfo->PickTopology(), VDiskCounters, SelfVDiskId, ctx.ExecutorThread.ActorSystem, baseInfo.DeviceType, baseInfo.DonorMode, baseInfo.ReplPDiskReadQuoter, baseInfo.ReplPDiskWriteQuoter, baseInfo.ReplNodeRequestQuoter, - baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs); + baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs, Config->DiskTimeAvailableScale); // create IntQueues IntQueueAsyncGets = std::make_unique( diff --git a/ydb/core/protos/blobstorage_vdisk_config.proto b/ydb/core/protos/blobstorage_vdisk_config.proto index 1467405be4cf..5b800c1cf658 100644 --- a/ydb/core/protos/blobstorage_vdisk_config.proto +++ b/ydb/core/protos/blobstorage_vdisk_config.proto @@ -22,8 +22,6 @@ message TVDiskConfig { optional bool BarrierValidation = 60; optional bool EnableOverseerLsnReporting = 61; // deprecated - - optional uint64 BurstThresholdNs = 62; }; // organizes hierarchy of VDisk configs: VDisk config may have a base config, diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index a2d2b5386ec2..ce0d18baaccf 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -4,6 +4,7 @@ import "ydb/core/fq/libs/config/protos/fq_config.proto"; import "ydb/core/protos/alloc.proto"; import "ydb/core/protos/auth.proto"; import "ydb/core/protos/blobstorage.proto"; +import "ydb/core/protos/blobstorage_base3.proto"; import "ydb/core/protos/blobstorage_config.proto"; import "ydb/core/protos/blobstorage_pdisk_config.proto"; import "ydb/core/protos/blobstorage_vdisk_config.proto"; @@ -280,6 +281,18 @@ message TBlobStorageConfig { } optional TAutoconfigSettings AutoconfigSettings = 6; + + message TCostMetricsConfig { + optional NKikimrBlobStorage.EPDiskType PDiskType = 1; + optional uint64 BurstThresholdNs = 2; + optional float DiskTimeAvailableScale = 3; + } + + message TCostMetricsSettings { + repeated TCostMetricsConfig VDiskTypes = 1; + }; + + optional TCostMetricsSettings CostMetricsSettings = 7; } message TBlobStorageFormatConfig {