From 6b43ceb191ed9b2dd712aadfdfb6b0e81efae808 Mon Sep 17 00:00:00 2001 From: kungasc Date: Tue, 30 Jan 2024 17:49:51 +0000 Subject: [PATCH 01/11] wip bytes limit --- .../flat_part_charge_btree_index.h | 30 ++++++++--- .../ut/ut_btree_index_iter_charge.cpp | 53 ++++++++++++++++++- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index b404c7450e20..26dc368ecbcb 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -82,6 +82,7 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); + ui64 prevKey1Items = 0, prevKey1Bytes = 0, key1Items = 0, key1Bytes = 0; const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity @@ -119,6 +120,14 @@ class TChargeBTreeIndex : public ICharge { return; } } + if (bytesLimit) { + ui64 bytes = child->DataSize - firstChild->DataSize; + Cerr << child->RowCount << " " << bytes << Endl; + if (LimitExceeded(bytes, bytesLimit)) { + overshot = false; + return; + } + } } } } @@ -128,8 +137,7 @@ class TChargeBTreeIndex : public ICharge { const auto skipUnloadedRows = [&](const TChildState& child) { if (child.PageId == key1PageId) { if (chargeGroups && chargeGroupsItemsLimit) { - // TODO: use erased count - ui64 unloadedItems = child.EndRowId - child.BeginRowId; + ui64 unloadedItems = key1Items - prevKey1Items; if (unloadedItems < chargeGroupsItemsLimit) { chargeGroupsItemsLimit -= unloadedItems; } else { @@ -149,14 +157,20 @@ class TChargeBTreeIndex : public ICharge { const auto& node = nextLevel.back(); if (child.PageId == key1PageId) { TRecIdx pos = node.Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); - key1PageId = node.GetShortChild(pos).PageId; + auto& key1Child = node.GetChild(pos); + key1PageId = key1Child.PageId; + key1Items = key1Child.GetNonErasedRowCount(); + key1Bytes = key1Child.DataSize; if (pos) { - beginRowId = Max(beginRowId, node.GetShortChild(pos - 1).RowCount); // move beginRowId to the first key >= key1 + auto& prevKey1Child = node.GetChild(pos - 1); + prevKey1Items = prevKey1Child.GetNonErasedRowCount(); + prevKey1Bytes = prevKey1Child.DataSize; + beginRowId = Max(beginRowId, prevKey1Child.RowCount); // move beginRowId to the first key >= key1 } } if (child.PageId == key2PageId) { TRecIdx pos = node.Seek(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); - auto& key2Child = node.GetShortChild(pos); + auto& key2Child = node.GetChild(pos); key2PageId = key2Child.PageId; endRowId = Min(endRowId, key2Child.RowCount + 1); // move endRowId - 1 to the first key > key2 if (key2Child.RowCount <= beginRowId) { @@ -319,15 +333,15 @@ class TChargeBTreeIndex : public ICharge { const auto& node = nextLevel.back(); if (child.PageId == key1PageId) { TRecIdx pos = node.SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); - auto& key1Child = node.GetShortChild(pos); + auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; endRowId = Min(endRowId, key1Child.RowCount); // move endRowId - 1 to the last key <= key1 } if (child.PageId == key2PageId) { TRecIdx pos = node.Seek(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); - key2PageId = node.GetShortChild(pos).PageId; + key2PageId = node.GetChild(pos).PageId; if (pos) { - auto& prevKey2Child = node.GetShortChild(pos - 1); + auto& prevKey2Child = node.GetChild(pos - 1); beginRowId = Max(beginRowId, prevKey2Child.RowCount - 1); // move beginRowId to the last key < key2 if (prevKey2Child.RowCount >= endRowId) { chargeGroups = false; // key2 is after current slice diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index feb6d5606444..f762a26cc9d4 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -426,7 +426,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { void CheckChargeRowId(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { - for (ui32 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { + for (ui64 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { for (TRowId rowId1 : xrange(0, part.Stat.Rows - 1)) { for (TRowId rowId2 : xrange(rowId1, part.Stat.Rows - 1)) { TTouchEnv bTreeEnv, flatEnv; @@ -446,7 +446,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { void CheckChargeKeys(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { - for (ui32 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { + for (ui64 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { for (ui32 secondCellKey1 : xrange(0, 14)) { for (ui32 firstCellKey2 : xrange(0, part.Stat.Rows / 7 + 1)) { @@ -487,6 +487,55 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } + void CheckChargeBytesLimit(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + for (bool reverse : {false, true}) { + for (ui64 bytesLimit : xrange(1, part.Stat.Bytes + 1, part.Stat.Bytes / 100)) { + for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { + for (ui32 secondCellKey1 : xrange(0, 14)) { + TVector key1 = MakeKey(firstCellKey1, secondCellKey1); + + TTouchEnv limitedEnv, unlimitedEnv; + TChargeBTreeIndex limitedCharge(&limitedEnv, part, tags, true); + TChargeBTreeIndex unlimitedCharge(&unlimitedEnv, part, tags, true); + + TStringBuilder message = TStringBuilder() << (reverse ? "ChargeBytesLimitReverse " : "ChargeBytesLimit ") << "("; + for (auto c : key1) { + message << c.AsValue() << " "; + } + message << ") bytes " << bytesLimit; + + Cerr << message << Endl; + DoChargeKeys(part, limitedCharge, limitedEnv, key1, { }, 0, bytesLimit, reverse, *keyDefaults, message); + DoChargeKeys(part, unlimitedCharge, unlimitedEnv, key1, { }, 0, 0, reverse, *keyDefaults, message); + + for (const auto& [groupId, unlimitedLoaded] : unlimitedEnv.Loaded) { + ui64 size = 0; + TSet limitedExpected, limitedLoaded; + for (auto pageId : unlimitedLoaded) { + if (part.GetPageType(pageId, groupId) == EPage::DataPage) { + size += part.GetPageSize(pageId, groupId); + limitedExpected.insert(pageId); + if (size > bytesLimit) { + break; + } + } + } + for (auto pageId : limitedEnv.Loaded[groupId]) { + if (part.GetPageType(pageId, groupId) == EPage::DataPage) { + limitedLoaded.insert(pageId); + } + } + + UNIT_ASSERT_VALUES_EQUAL_C(limitedExpected, limitedLoaded, + TStringBuilder() << message << " Group {" << groupId.Index << "," << groupId.IsHistoric() << "}"); + } + } + } + } + } + } + } + void CheckPart(TMakePartParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); From bf1024b8c8d80f81773fe9a3bbdf3f9ea539e7ab Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 31 Jan 2024 15:36:28 +0000 Subject: [PATCH 02/11] fix assert --- .../tablet_flat/ut/ut_btree_index_iter_charge.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index f762a26cc9d4..609e99d6a9f6 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -510,11 +510,14 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { for (const auto& [groupId, unlimitedLoaded] : unlimitedEnv.Loaded) { ui64 size = 0; - TSet limitedExpected, limitedLoaded; + TVector expected, loaded; for (auto pageId : unlimitedLoaded) { if (part.GetPageType(pageId, groupId) == EPage::DataPage) { - size += part.GetPageSize(pageId, groupId); - limitedExpected.insert(pageId); + if (expected) { + // do not count first page + size += part.GetPageSize(pageId, groupId); + } + expected.push_back(pageId); if (size > bytesLimit) { break; } @@ -522,13 +525,12 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } for (auto pageId : limitedEnv.Loaded[groupId]) { if (part.GetPageType(pageId, groupId) == EPage::DataPage) { - limitedLoaded.insert(pageId); + loaded.push_back(pageId); } } - UNIT_ASSERT_VALUES_EQUAL_C(limitedExpected, limitedLoaded, + UNIT_ASSERT_VALUES_EQUAL_C(expected, loaded, TStringBuilder() << message << " Group {" << groupId.Index << "," << groupId.IsHistoric() << "}"); - } } } } @@ -547,6 +549,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get()); CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get()); + CheckChargeBytesLimit(part, tags, eggs.Scheme->Keys.Get()); } Y_UNIT_TEST(NoNodes) { From d6c1b42dbd00850825aa83abce4b526fd19b5da1 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 31 Jan 2024 16:12:04 +0000 Subject: [PATCH 03/11] better style --- ydb/core/tablet_flat/flat_part_charge_btree_index.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 26dc368ecbcb..cc4ac71a1fd8 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -82,7 +82,7 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); - ui64 prevKey1Items = 0, prevKey1Bytes = 0, key1Items = 0, key1Bytes = 0; + ui64 key1Items = 0, key1Bytes = 0, prevKey1Items = 0, prevKey1Bytes = 0; const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity @@ -122,7 +122,6 @@ class TChargeBTreeIndex : public ICharge { } if (bytesLimit) { ui64 bytes = child->DataSize - firstChild->DataSize; - Cerr << child->RowCount << " " << bytes << Endl; if (LimitExceeded(bytes, bytesLimit)) { overshot = false; return; From 42789be2d2f3bb8bf516930000ed154ce0649012 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 31 Jan 2024 16:16:58 +0000 Subject: [PATCH 04/11] limit reverse --- .../tablet_flat/flat_part_charge_btree_index.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index cc4ac71a1fd8..0cf0697b1656 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -261,6 +261,7 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); + ui64 prevKey1Items = 0, prevKey1Bytes = 0, key1Items = 0, key1Bytes = 0; const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity @@ -301,6 +302,13 @@ class TChargeBTreeIndex : public ICharge { return; } } + if (bytesLimit) { + ui64 bytes = prevLastChild->DataSize - child->DataSize; + if (LimitExceeded(bytes, bytesLimit)) { + overshot = false; + return; + } + } } } ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); @@ -311,8 +319,7 @@ class TChargeBTreeIndex : public ICharge { const auto skipUnloadedRows = [&](const TChildState& child) { if (child.PageId == key1PageId) { if (chargeGroups && chargeGroupsItemsLimit) { - // TODO: use erased count - ui64 unloadedItems = child.EndRowId - child.BeginRowId; + ui64 unloadedItems = key1Items - prevKey1Items; if (unloadedItems < chargeGroupsItemsLimit) { chargeGroupsItemsLimit -= unloadedItems; } else { @@ -334,6 +341,13 @@ class TChargeBTreeIndex : public ICharge { TRecIdx pos = node.SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; + key1Items = key1Child.GetNonErasedRowCount(); + key1Bytes = key1Child.DataSize; + if (pos) { + auto& prevKey1Child = node.GetChild(pos - 1); + prevKey1Items = prevKey1Child.GetNonErasedRowCount(); + prevKey1Bytes = prevKey1Child.DataSize; + } endRowId = Min(endRowId, key1Child.RowCount); // move endRowId - 1 to the last key <= key1 } if (child.PageId == key2PageId) { From ab5c14f7910ec7087fb21deb5e2010279bf447a7 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 31 Jan 2024 19:10:39 +0000 Subject: [PATCH 05/11] fix reverse check --- .../ut/ut_btree_index_iter_charge.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 609e99d6a9f6..0cfa91e767d4 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -504,20 +504,23 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } message << ") bytes " << bytesLimit; - Cerr << message << Endl; DoChargeKeys(part, limitedCharge, limitedEnv, key1, { }, 0, bytesLimit, reverse, *keyDefaults, message); DoChargeKeys(part, unlimitedCharge, unlimitedEnv, key1, { }, 0, 0, reverse, *keyDefaults, message); for (const auto& [groupId, unlimitedLoaded] : unlimitedEnv.Loaded) { ui64 size = 0; - TVector expected, loaded; - for (auto pageId : unlimitedLoaded) { + TSet expected, loaded; + TVector unlimitedLoadedList(unlimitedLoaded.begin(), unlimitedLoaded.end()); + if (reverse) { + std::reverse(unlimitedLoadedList.begin(), unlimitedLoadedList.end()); + } + for (auto pageId : unlimitedLoadedList) { if (part.GetPageType(pageId, groupId) == EPage::DataPage) { - if (expected) { - // do not count first page + if (expected || !groupId.IsMain()) { + // do not count first main page size += part.GetPageSize(pageId, groupId); } - expected.push_back(pageId); + expected.insert(pageId); if (size > bytesLimit) { break; } @@ -525,7 +528,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } for (auto pageId : limitedEnv.Loaded[groupId]) { if (part.GetPageType(pageId, groupId) == EPage::DataPage) { - loaded.push_back(pageId); + loaded.insert(pageId); } } From a567b0b9f15ed9878f0f98fdfc2751e32d1440a9 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 31 Jan 2024 20:00:51 +0000 Subject: [PATCH 06/11] bytes limit groups --- ydb/core/tablet_flat/flat_part_charge_btree_index.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 0cf0697b1656..b83ecb8306d3 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -471,17 +471,19 @@ class TChargeBTreeIndex : public ICharge { bool DoPrechargeGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept { bool ready = true; - Y_UNUSED(bytesLimit); - const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; TVector level, nextLevel(::Reserve(3)); + ui64 prevFirstChildDataSize = 0; const auto iterateLevel = [&](const auto& tryHandleChild) { for (const auto &node : level) { TRecIdx from = 0, to = node.GetChildrenCount(); if (node.BeginRowId < beginRowId) { from = node.Seek(beginRowId); + if (from) { + prevFirstChildDataSize = node.GetShortChild(from - 1).DataSize; + } } if (node.EndRowId > endRowId) { to = node.Seek(endRowId - 1) + 1; @@ -492,6 +494,12 @@ class TChargeBTreeIndex : public ICharge { TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; TRowId endRowId = child.RowCount; ready &= tryHandleChild(TChildState(child.PageId, beginRowId, endRowId)); + if (bytesLimit) { + ui64 bytes = child.DataSize - prevFirstChildDataSize; + if (LimitExceeded(bytes, bytesLimit)) { + return; + } + } } } }; From f037f63237ac13ebe3d27720198bbcd8b6be8c7f Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 1 Feb 2024 10:03:26 +0000 Subject: [PATCH 07/11] + DoGroupReverse --- .../flat_part_charge_btree_index.h | 88 +++++++++++++++++-- 1 file changed, 79 insertions(+), 9 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index b83ecb8306d3..b6c49542e995 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -12,6 +12,7 @@ class TChargeBTreeIndex : public ICharge { using TRecIdx = NPage::TRecIdx; using TGroupId = NPage::TGroupId; using TChild = TBtreeIndexNode::TChild; + using TShortChild = TBtreeIndexNode::TShortChild; struct TChildState { TPageId PageId; @@ -220,7 +221,7 @@ class TChargeBTreeIndex : public ICharge { } if (!ready) { // some index pages are missing, do not continue - ready &= DoPrechargeGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, false}; } @@ -234,7 +235,7 @@ class TChargeBTreeIndex : public ICharge { iterateLevel(tryHandleDataPage); } - ready &= DoPrechargeGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } @@ -415,7 +416,7 @@ class TChargeBTreeIndex : public ICharge { } if (!ready) { // some index pages are missing, do not continue - ready &= DoPrechargeGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, false}; } @@ -429,13 +430,13 @@ class TChargeBTreeIndex : public ICharge { iterateLevel(tryHandleDataPage); } - ready &= DoPrechargeGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } private: - bool DoPrechargeGroups(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { + bool DoGroups(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { bool ready = true; if (chargeGroups && beginRowId < endRowId) { @@ -444,14 +445,14 @@ class TChargeBTreeIndex : public ICharge { } for (auto groupId : Groups) { - ready &= DoPrechargeGroup(groupId, beginRowId, endRowId, bytesLimit); + ready &= DoGroup(groupId, beginRowId, endRowId, bytesLimit); } } return ready; } - bool DoPrechargeGroupsReverse(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { + bool DoGroupsReverse(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { bool ready = true; if (chargeGroups && beginRowId < endRowId) { @@ -460,7 +461,7 @@ class TChargeBTreeIndex : public ICharge { } for (auto groupId : Groups) { - ready &= DoPrechargeGroup(groupId, beginRowId, endRowId, bytesLimit); + ready &= DoGroupReverse(groupId, beginRowId, endRowId, bytesLimit); } } @@ -468,7 +469,7 @@ class TChargeBTreeIndex : public ICharge { } private: - bool DoPrechargeGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept { + bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept { bool ready = true; const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; @@ -535,6 +536,75 @@ class TChargeBTreeIndex : public ICharge { return ready; } + bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept { + bool ready = true; + + const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; + + // level's nodes is in reverse order + TVector level, nextLevel(::Reserve(3)); + + const auto iterateLevel = [&](const auto& tryHandleChild) { + const TShortChild* lastChild = nullptr; + for (const auto &node : level) { + TRecIdx from = 0, to = node.GetChildrenCount(); + if (node.BeginRowId < beginRowId) { + from = node.Seek(beginRowId); + } + if (node.EndRowId > endRowId) { + to = node.Seek(endRowId - 1) + 1; + } + for (TRecIdx posExt = to; posExt > from; posExt--) { + auto child = node.GetShortChildRef(posExt - 1); + auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr; + TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; + TRowId endRowId = child->RowCount; + if (bytesLimit) { + if (!lastChild) { + lastChild = child; + } else { + ui64 bytes = lastChild->DataSize - child->DataSize; + if (LimitExceeded(bytes, bytesLimit)) { + return; + } + } + } + ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); + } + } + }; + + const auto tryHandleNode = [&](TChildState child) -> bool { + return TryLoadNode(child, nextLevel); + }; + + const auto tryHandleDataPage = [&](TChildState child) -> bool { + return HasDataPage(child.PageId, groupId); + }; + + for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + if (height == 0) { + ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + } else { + iterateLevel(tryHandleNode); + } + level.swap(nextLevel); + nextLevel.clear(); + } + + if (!ready) { // some index pages are missing, do not continue + return ready; + } + + if (meta.LevelCount == 0) { + ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + } else { + iterateLevel(tryHandleDataPage); + } + + return ready; + } + private: const TSharedData* TryGetDataPage(TPageId pageId, TGroupId groupId) const noexcept { return Env->TryGetPage(Part, pageId, groupId); From df835770f2555970e931fb407bf593cf59bd373b Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 1 Feb 2024 10:19:39 +0000 Subject: [PATCH 08/11] check all groups --- .../tablet_flat/ut/ut_btree_index_iter_charge.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 0cfa91e767d4..66b4c0598d42 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -507,14 +507,20 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { DoChargeKeys(part, limitedCharge, limitedEnv, key1, { }, 0, bytesLimit, reverse, *keyDefaults, message); DoChargeKeys(part, unlimitedCharge, unlimitedEnv, key1, { }, 0, 0, reverse, *keyDefaults, message); - for (const auto& [groupId, unlimitedLoaded] : unlimitedEnv.Loaded) { + TSet groupIds; + for (const auto &c : {limitedEnv.Loaded, unlimitedEnv.Loaded}) { + for (const auto &g : c) { + groupIds.insert(g.first); + } + } + for (auto groupId : groupIds) { ui64 size = 0; TSet expected, loaded; - TVector unlimitedLoadedList(unlimitedLoaded.begin(), unlimitedLoaded.end()); + TVector unlimitedLoaded(unlimitedEnv.Loaded[groupId].begin(), unlimitedEnv.Loaded[groupId].end()); if (reverse) { - std::reverse(unlimitedLoadedList.begin(), unlimitedLoadedList.end()); + std::reverse(unlimitedLoaded.begin(), unlimitedLoaded.end()); } - for (auto pageId : unlimitedLoadedList) { + for (auto pageId : unlimitedLoaded) { if (part.GetPageType(pageId, groupId) == EPage::DataPage) { if (expected || !groupId.IsMain()) { // do not count first main page From bc037e26549347e3b9ff788a4bb4ad45f1b341c6 Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 1 Feb 2024 18:20:44 +0000 Subject: [PATCH 09/11] fix skipping unloaded groups data --- .../flat_part_charge_btree_index.h | 102 +++++++++++++----- 1 file changed, 76 insertions(+), 26 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index b6c49542e995..e94f82cf9dbf 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -70,6 +70,7 @@ class TChargeBTreeIndex : public ICharge { bool ready = true, overshot = true; bool chargeGroups = bool(Groups); // false value means that beginRowId, endRowId are invalid and shouldn't be used ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups + TRowId beginBytesLimitRowId = Max(); const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(endRowId <= meta.RowCount); @@ -144,6 +145,9 @@ class TChargeBTreeIndex : public ICharge { chargeGroups = false; } } + if (chargeGroups && bytesLimit) { + beginBytesLimitRowId = Max(beginRowId, child.BeginRowId); + } beginRowId = Max(beginRowId, child.EndRowId); } if (child.PageId == key2PageId) { @@ -221,7 +225,7 @@ class TChargeBTreeIndex : public ICharge { } if (!ready) { // some index pages are missing, do not continue - ready &= DoGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroups(chargeGroups, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, false}; } @@ -235,7 +239,7 @@ class TChargeBTreeIndex : public ICharge { iterateLevel(tryHandleDataPage); } - ready &= DoGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroups(chargeGroups, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } @@ -248,6 +252,7 @@ class TChargeBTreeIndex : public ICharge { bool ready = true, overshot = true; bool chargeGroups = bool(Groups); // false value means that beginRowId, endRowId are invalid and shouldn't be used ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups + TRowId endBytesLimitRowId = Max(); const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(endRowId <= meta.RowCount); @@ -327,6 +332,9 @@ class TChargeBTreeIndex : public ICharge { chargeGroups = false; } } + if (chargeGroups && bytesLimit) { + endBytesLimitRowId = Min(endRowId, child.EndRowId); + } endRowId = Min(endRowId, child.BeginRowId); } if (child.PageId == key2PageId) { @@ -416,7 +424,7 @@ class TChargeBTreeIndex : public ICharge { } if (!ready) { // some index pages are missing, do not continue - ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, false}; } @@ -430,38 +438,44 @@ class TChargeBTreeIndex : public ICharge { iterateLevel(tryHandleDataPage); } - ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } private: - bool DoGroups(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { + bool DoGroups(bool chargeGroups, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { bool ready = true; if (chargeGroups && beginRowId < endRowId) { if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) { endRowId = beginRowId + itemsLimit + 1; } + if (beginBytesLimitRowId == Max()) { + beginBytesLimitRowId = beginRowId; + } for (auto groupId : Groups) { - ready &= DoGroup(groupId, beginRowId, endRowId, bytesLimit); + ready &= DoGroup(groupId, beginRowId, endRowId, beginBytesLimitRowId, bytesLimit); } } return ready; } - bool DoGroupsReverse(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { + bool DoGroupsReverse(bool chargeGroups, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { bool ready = true; if (chargeGroups && beginRowId < endRowId) { if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) { beginRowId = endRowId - itemsLimit - 1; } + if (endBytesLimitRowId == Max()) { + endBytesLimitRowId = endRowId; + } for (auto groupId : Groups) { - ready &= DoGroupReverse(groupId, beginRowId, endRowId, bytesLimit); + ready &= DoGroupReverse(groupId, beginRowId, endRowId, endBytesLimitRowId, bytesLimit); } } @@ -469,38 +483,40 @@ class TChargeBTreeIndex : public ICharge { } private: - bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept { + bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 bytesLimit) const noexcept { bool ready = true; const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; TVector level, nextLevel(::Reserve(3)); - ui64 prevFirstChildDataSize = 0; + ui64 prevBeginDataSize = 0; + ui64 prevBeginBytesLimitDataSize = bytesLimit ? GetPrevDataSize(meta, beginBytesLimitRowId) : 0; const auto iterateLevel = [&](const auto& tryHandleChild) { + ui64 prevChildDataSize = prevBeginDataSize; for (const auto &node : level) { TRecIdx from = 0, to = node.GetChildrenCount(); if (node.BeginRowId < beginRowId) { from = node.Seek(beginRowId); if (from) { - prevFirstChildDataSize = node.GetShortChild(from - 1).DataSize; + prevChildDataSize = prevBeginDataSize = node.GetShortChild(from - 1).DataSize; } } if (node.EndRowId > endRowId) { to = node.Seek(endRowId - 1) + 1; } for (TRecIdx pos : xrange(from, to)) { - auto child = node.GetShortChild(pos); + auto child = node.GetShortChildRef(pos); auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId endRowId = child.RowCount; - ready &= tryHandleChild(TChildState(child.PageId, beginRowId, endRowId)); + TRowId endRowId = child->RowCount; if (bytesLimit) { - ui64 bytes = child.DataSize - prevFirstChildDataSize; - if (LimitExceeded(bytes, bytesLimit)) { + if (prevChildDataSize > prevBeginBytesLimitDataSize && LimitExceeded(prevChildDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { return; } } + ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); + prevChildDataSize = child->DataSize; } } }; @@ -536,16 +552,16 @@ class TChargeBTreeIndex : public ICharge { return ready; } - bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept { + bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 bytesLimit) const noexcept { bool ready = true; const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; // level's nodes is in reverse order TVector level, nextLevel(::Reserve(3)); + ui64 endBytesLimitDataSize = bytesLimit ? GetDataSize(meta, endBytesLimitRowId - 1) : 0; const auto iterateLevel = [&](const auto& tryHandleChild) { - const TShortChild* lastChild = nullptr; for (const auto &node : level) { TRecIdx from = 0, to = node.GetChildrenCount(); if (node.BeginRowId < beginRowId) { @@ -560,13 +576,8 @@ class TChargeBTreeIndex : public ICharge { TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; TRowId endRowId = child->RowCount; if (bytesLimit) { - if (!lastChild) { - lastChild = child; - } else { - ui64 bytes = lastChild->DataSize - child->DataSize; - if (LimitExceeded(bytes, bytesLimit)) { - return; - } + if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { + return; } } ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); @@ -605,6 +616,45 @@ class TChargeBTreeIndex : public ICharge { return ready; } +private: + ui64 GetPrevDataSize(const TBtreeIndexMeta& meta, TRowId rowId) const { + TPageId pageId = meta.PageId; + ui64 result = 0; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = Env->TryGetPage(Part, pageId); + if (!page) { + return result; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(rowId); + pageId = node.GetShortChild(pos).PageId; + if (pos) { + result = node.GetShortChild(pos - 1).DataSize; + } + } + + return result; + } + + ui64 GetDataSize(TBtreeIndexMeta meta, TRowId rowId) const { + TPageId pageId = meta.PageId; + ui64 result = meta.DataSize; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = Env->TryGetPage(Part, pageId); + if (!page) { + return result; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(rowId); + pageId = node.GetShortChild(pos).PageId; + result = node.GetShortChild(pos).DataSize; + } + + return result; + } + private: const TSharedData* TryGetDataPage(TPageId pageId, TGroupId groupId) const noexcept { return Env->TryGetPage(Part, pageId, groupId); @@ -614,7 +664,7 @@ class TChargeBTreeIndex : public ICharge { return bool(Env->TryGetPage(Part, pageId, groupId)); } - bool TryLoadNode(TChildState& child, TVector& level) const noexcept { + bool TryLoadNode(const TChildState& child, TVector& level) const noexcept { auto page = Env->TryGetPage(Part, child.PageId); if (!page) { return false; From 02a2c61907a902b80429f926c2e9438e8d7cba34 Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 1 Feb 2024 18:49:19 +0000 Subject: [PATCH 10/11] better groups configuration --- .../ut/ut_btree_index_iter_charge.cpp | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 66b4c0598d42..07c5c5cff587 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -91,30 +91,29 @@ namespace { NPage::TConf conf; switch (params.Levels) { case 0: - if (params.Groups) { - conf.Group(3).PageRows = 1; - } + conf.Group(0).PageRows = 999; break; case 1: + conf.Group(0).PageRows = 2; + break; case 3: conf.Group(0).PageRows = 2; - if (params.Groups) { - for (auto i : xrange(1, 4)) { - conf.Group(i).PageRows = 1; - } - } - if (params.Levels == 3) { - conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; - if (params.Groups) { - conf.Group(1).BTreeIndexNodeKeysMin = conf.Group(1).BTreeIndexNodeKeysMax = 2; - conf.Group(2).BTreeIndexNodeKeysMin = conf.Group(2).BTreeIndexNodeKeysMax = 2; - } - } + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; break; default: Y_Fail("Unknown levels"); } + if (params.Groups) { + conf.Group(1).PageRows = params.Levels ? 1 : 999; + conf.Group(2).PageRows = 3; + conf.Group(3).PageRows = 1; + + conf.Group(1).BTreeIndexNodeKeysMin = conf.Group(1).BTreeIndexNodeKeysMax = conf.Group(0).BTreeIndexNodeKeysMax; + conf.Group(2).BTreeIndexNodeKeysMin = conf.Group(2).BTreeIndexNodeKeysMax = 2; + conf.Group(3).BTreeIndexNodeKeysMin = conf.Group(3).BTreeIndexNodeKeysMax = 999; + } + TLayoutCook lay; lay @@ -189,9 +188,8 @@ namespace { UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[0].LevelCount, params.Levels); if (params.Groups) { - UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[0].LevelCount, params.Levels); UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[1].LevelCount, params.Levels); - UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[2].LevelCount, params.Levels); + UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[2].LevelCount, 2); UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[3].LevelCount, 1); } @@ -489,7 +487,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { void CheckChargeBytesLimit(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { - for (ui64 bytesLimit : xrange(1, part.Stat.Bytes + 1, part.Stat.Bytes / 100)) { + for (ui64 bytesLimit : xrange(1, part.Stat.Bytes + 100, part.Stat.Bytes / 100)) { for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { for (ui32 secondCellKey1 : xrange(0, 14)) { TVector key1 = MakeKey(firstCellKey1, secondCellKey1); From 549aa289fd8c9035e43625ce85b4a871038cc7a2 Mon Sep 17 00:00:00 2001 From: kungasc Date: Fri, 2 Feb 2024 09:35:25 +0000 Subject: [PATCH 11/11] clean up --- .../flat_part_charge_btree_index.h | 71 +++++++++---------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index e94f82cf9dbf..baac5f3fafea 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -70,12 +70,12 @@ class TChargeBTreeIndex : public ICharge { bool ready = true, overshot = true; bool chargeGroups = bool(Groups); // false value means that beginRowId, endRowId are invalid and shouldn't be used ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups - TRowId beginBytesLimitRowId = Max(); + TRowId beginBytesLimitRowId = Max(); // first unloaded probably needed row const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(endRowId <= meta.RowCount); - TRowId sliceEndRowId = endRowId; + const TRowId sliceEndRowId = endRowId; if (Y_UNLIKELY(key1 && key2 && Compare(key1, key2, keyDefaults) > 0)) { key2 = key1; // will not go further than key1 chargeGroups = false; @@ -84,14 +84,14 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); - ui64 key1Items = 0, key1Bytes = 0, prevKey1Items = 0, prevKey1Bytes = 0; + ui64 key1Items = 0, prevKey1Items = 0; const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity // always load beginRowId regardless of keys const TRowId levelBeginRowId = beginRowId, levelEndRowId = Max(endRowId, beginRowId + 1); + const TChild* levelFirstChild = nullptr; - const TChild* firstChild = nullptr; for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { continue; @@ -107,23 +107,23 @@ class TChargeBTreeIndex : public ICharge { for (TRecIdx pos : xrange(from, to)) { auto child = node.GetChildRef(pos); auto prevChild = pos ? node.GetChildRef(pos - 1) : nullptr; - TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId endRowId = child->RowCount; - ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); + TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; + TRowId childEndRowId = child->RowCount; + ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); if (itemsLimit || bytesLimit) { - if (!firstChild) { - // do not apply limit on the first child because beginRowId/key1 position is uncertain - firstChild = child; + if (!levelFirstChild) { + // do not apply limits on the first child because beginRowId/key1 position is uncertain + levelFirstChild = child; } else { if (itemsLimit) { - ui64 items = child->GetNonErasedRowCount() - firstChild->GetNonErasedRowCount(); + ui64 items = child->GetNonErasedRowCount() - levelFirstChild->GetNonErasedRowCount(); if (LimitExceeded(items, itemsLimit)) { overshot = false; return; } } if (bytesLimit) { - ui64 bytes = child->DataSize - firstChild->DataSize; + ui64 bytes = child->DataSize - levelFirstChild->DataSize; if (LimitExceeded(bytes, bytesLimit)) { overshot = false; return; @@ -164,11 +164,9 @@ class TChargeBTreeIndex : public ICharge { auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; key1Items = key1Child.GetNonErasedRowCount(); - key1Bytes = key1Child.DataSize; if (pos) { auto& prevKey1Child = node.GetChild(pos - 1); prevKey1Items = prevKey1Child.GetNonErasedRowCount(); - prevKey1Bytes = prevKey1Child.DataSize; beginRowId = Max(beginRowId, prevKey1Child.RowCount); // move beginRowId to the first key >= key1 } } @@ -252,12 +250,12 @@ class TChargeBTreeIndex : public ICharge { bool ready = true, overshot = true; bool chargeGroups = bool(Groups); // false value means that beginRowId, endRowId are invalid and shouldn't be used ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups - TRowId endBytesLimitRowId = Max(); + TRowId endBytesLimitRowId = Max(); // last unloaded probably needed row const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(endRowId <= meta.RowCount); - TRowId sliceBeginRowId = beginRowId; + const TRowId sliceBeginRowId = beginRowId; if (Y_UNLIKELY(key1 && key2 && Compare(key2, key1, keyDefaults) > 0)) { key2 = key1; // will not go further than key1 chargeGroups = false; @@ -267,15 +265,14 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); - ui64 prevKey1Items = 0, prevKey1Bytes = 0, key1Items = 0, key1Bytes = 0; + ui64 prevKey1Items = 0, key1Items = 0; const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity // always load endRowId - 1 regardless of keys const TRowId levelBeginRowId = Min(beginRowId, endRowId - 1), levelEndRowId = endRowId; - - const TChild* lastChild = nullptr; - const TChild* prevLastChild = nullptr; + const TChild *levelLastChild = nullptr, *levelPrevLastChild = nullptr; + for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { continue; @@ -291,25 +288,25 @@ class TChargeBTreeIndex : public ICharge { for (TRecIdx posExt = to; posExt > from; posExt--) { auto child = node.GetChildRef(posExt - 1); auto prevChild = posExt - 1 ? node.GetChildRef(posExt - 2) : nullptr; - TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId endRowId = child->RowCount; + TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; + TRowId childEndRowId = child->RowCount; if (itemsLimit || bytesLimit) { - if (!lastChild) { - // do not apply limit on the last child because endRowId/key1 position is uncertain - lastChild = child; + if (!levelLastChild) { + // do not apply limits on the last child because endRowId/key1 position is uncertain + levelLastChild = child; } else { - if (!prevLastChild) { - prevLastChild = child; + if (!levelPrevLastChild) { + levelPrevLastChild = child; } if (itemsLimit) { - ui64 items = prevLastChild->GetNonErasedRowCount() - child->GetNonErasedRowCount(); + ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child->GetNonErasedRowCount(); if (LimitExceeded(items, itemsLimit)) { overshot = false; return; } } if (bytesLimit) { - ui64 bytes = prevLastChild->DataSize - child->DataSize; + ui64 bytes = levelPrevLastChild->DataSize - child->DataSize; if (LimitExceeded(bytes, bytesLimit)) { overshot = false; return; @@ -317,7 +314,7 @@ class TChargeBTreeIndex : public ICharge { } } } - ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); + ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); } } }; @@ -351,11 +348,9 @@ class TChargeBTreeIndex : public ICharge { auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; key1Items = key1Child.GetNonErasedRowCount(); - key1Bytes = key1Child.DataSize; if (pos) { auto& prevKey1Child = node.GetChild(pos - 1); prevKey1Items = prevKey1Child.GetNonErasedRowCount(); - prevKey1Bytes = prevKey1Child.DataSize; } endRowId = Min(endRowId, key1Child.RowCount); // move endRowId - 1 to the last key <= key1 } @@ -508,14 +503,14 @@ class TChargeBTreeIndex : public ICharge { for (TRecIdx pos : xrange(from, to)) { auto child = node.GetShortChildRef(pos); auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; - TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId endRowId = child->RowCount; + TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; + TRowId childEndRowId = child->RowCount; if (bytesLimit) { if (prevChildDataSize > prevBeginBytesLimitDataSize && LimitExceeded(prevChildDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { return; } } - ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); + ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); prevChildDataSize = child->DataSize; } } @@ -573,14 +568,14 @@ class TChargeBTreeIndex : public ICharge { for (TRecIdx posExt = to; posExt > from; posExt--) { auto child = node.GetShortChildRef(posExt - 1); auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr; - TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId endRowId = child->RowCount; + TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; + TRowId childEndRowId = child->RowCount; if (bytesLimit) { if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { return; } } - ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId)); + ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); } } };