From 1ba9a2d107419445251bb6992df2fc64ae24def0 Mon Sep 17 00:00:00 2001 From: qkrorlqr Date: Sat, 21 Sep 2024 11:41:31 +0000 Subject: [PATCH 1/3] issue-2033: we should ignore DupCache entries with invalid types and proceed with request processing as if the entry didn't exist (for the case of request id collisions) --- .../libs/diagnostics/critical_events.h | 1 + .../tablet/tablet_actor_createhandle.cpp | 5 +-- .../tablet/tablet_actor_createnode.cpp | 23 +++++++------ .../tablet/tablet_actor_renamenode.cpp | 8 +++-- .../tablet/tablet_actor_unlinknode.cpp | 10 ++++-- .../libs/storage/tablet/tablet_state.h | 2 +- .../storage/tablet/tablet_state_sessions.cpp | 12 ++++--- .../libs/storage/tablet/tablet_ut_nodes.cpp | 32 +++++++++++++++++-- 8 files changed, 68 insertions(+), 25 deletions(-) diff --git a/cloud/filestore/libs/diagnostics/critical_events.h b/cloud/filestore/libs/diagnostics/critical_events.h index 32042b326b..cbf4980204 100644 --- a/cloud/filestore/libs/diagnostics/critical_events.h +++ b/cloud/filestore/libs/diagnostics/critical_events.h @@ -21,6 +21,7 @@ namespace NCloud::NFileStore{ xxx(NotEnoughResultsInGetNodeAttrBatchResponses) \ xxx(AsyncDestroyHandleFailed) \ xxx(DuplicateRequestId) \ + xxx(InvalidDupCacheEntry) \ // FILESTORE_CRITICAL_EVENTS #define FILESTORE_IMPOSSIBLE_EVENTS(xxx) \ diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp index 395890d4e4..1265202206 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp @@ -61,10 +61,11 @@ void TIndexTabletActor::HandleCreateHandle( const auto requestId = GetRequestId(msg->Record); if (const auto* e = session->LookupDupEntry(requestId)) { auto response = std::make_unique(); - GetDupCacheEntry(e, response->Record); // sometimes bugged clients send us duplicate request ids // see #2033 - if (msg->Record.GetName().Empty() && msg->Record.GetNodeId() + if (!GetDupCacheEntry(e, response->Record)) { + session->DropDupEntry(requestId); + } else if (msg->Record.GetName().Empty() && msg->Record.GetNodeId() != response->Record.GetNodeAttr().GetId()) { ReportDuplicateRequestId(TStringBuilder() << "CreateHandle response" diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp index ab5a258652..6963ced8fe 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp @@ -290,17 +290,22 @@ void TIndexTabletActor::HandleCreateNode( } auto* msg = ev->Get(); - if (const auto* e = session->LookupDupEntry(GetRequestId(msg->Record))) { + const auto requestId = GetRequestId(msg->Record); + if (const auto* e = session->LookupDupEntry(requestId)) { auto response = std::make_unique(); - GetDupCacheEntry(e, response->Record); - if (response->Record.GetNode().GetId() == 0) { - // it's an external node which is not yet created in follower - // this check is needed for the case of leader reboot - *response->Record.MutableError() = MakeError( - E_REJECTED, - "node not yet created in follower"); + if (GetDupCacheEntry(e, response->Record)) { + if (response->Record.GetNode().GetId() == 0) { + // it's an external node which is not yet created in follower + // this check is needed for the case of leader reboot + *response->Record.MutableError() = MakeError( + E_REJECTED, + "node not yet created in follower"); + } + + return NCloud::Reply(ctx, *ev, std::move(response)); } - return NCloud::Reply(ctx, *ev, std::move(response)); + + session->DropDupEntry(requestId); } ui64 parentNodeId = msg->Record.GetNodeId(); diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp index 4cd39db0a1..3e1a0bf184 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp @@ -54,10 +54,14 @@ void TIndexTabletActor::HandleRenameNode( } auto* msg = ev->Get(); + const auto requestId = GetRequestId(msg->Record); if (const auto* e = session->LookupDupEntry(GetRequestId(msg->Record))) { auto response = std::make_unique(); - GetDupCacheEntry(e, response->Record); - return NCloud::Reply(ctx, *ev, std::move(response)); + if (GetDupCacheEntry(e, response->Record)) { + return NCloud::Reply(ctx, *ev, std::move(response)); + } + + session->DropDupEntry(requestId); } auto requestInfo = CreateRequestInfo( diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp index 551d218d2c..a03677707a 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp @@ -245,10 +245,14 @@ void TIndexTabletActor::HandleUnlinkNode( } auto* msg = ev->Get(); - if (const auto* e = session->LookupDupEntry(GetRequestId(msg->Record))) { + const auto requestId = GetRequestId(msg->Record); + if (const auto* e = session->LookupDupEntry(requestId)) { auto response = std::make_unique(); - GetDupCacheEntry(e, response->Record); - return NCloud::Reply(ctx, *ev, std::move(response)); + if (GetDupCacheEntry(e, response->Record)) { + return NCloud::Reply(ctx, *ev, std::move(response)); + } + + session->DropDupEntry(requestId); } auto requestInfo = CreateRequestInfo( diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.h b/cloud/filestore/libs/storage/tablet/tablet_state.h index c8b14491cd..3ec6e55617 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.h +++ b/cloud/filestore/libs/storage/tablet/tablet_state.h @@ -678,7 +678,7 @@ public: const NProto::T##name##Response& response, \ ui32 maxEntries); \ \ - void GetDupCacheEntry( \ + bool GetDupCacheEntry( \ const TDupCacheEntry* entry, \ NProto::T##name##Response& response); \ // FILESTORE_DECLARE_DUPCACHE diff --git a/cloud/filestore/libs/storage/tablet/tablet_state_sessions.cpp b/cloud/filestore/libs/storage/tablet/tablet_state_sessions.cpp index a624049697..83fa14ad4f 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state_sessions.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state_sessions.cpp @@ -840,7 +840,7 @@ void TIndexTabletState::AddDupCacheEntry( \ } \ } \ \ -void TIndexTabletState::GetDupCacheEntry( \ +bool TIndexTabletState::GetDupCacheEntry( \ const TDupCacheEntry* entry, \ NProto::T##name##Response& response) \ { \ @@ -849,11 +849,13 @@ void TIndexTabletState::GetDupCacheEntry( \ } else if (!entry->Committed) { \ *response.MutableError() = ErrorDuplicate(); \ } else if (!entry->Has##name()) { \ - *response.MutableError() = MakeError( \ - E_ARGUMENT, TStringBuilder() << "invalid request dup cache type: " \ - << entry->ShortUtf8DebugString().Quote()); \ - ReportDuplicateRequestId(response.GetError().GetMessage()); \ + ReportInvalidDupCacheEntry(TStringBuilder() \ + << "invalid request dup cache type: " \ + << entry->ShortUtf8DebugString().Quote()); \ + return false; \ } \ + \ + return true; \ } \ // FILESTORE_IMPLEMENT_DUPCACHE diff --git a/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp b/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp index c1b09dec13..b1e1a86774 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp @@ -1616,7 +1616,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes) TIndexTabletClient tablet(env.GetRuntime(), nodeIdx, tabletId); tablet.InitSession("client", "session"); - auto createRequest = [&] (ui64 reqId, ui64 nodeId) { + auto createCreateHandleRequest = [&] (ui64 reqId, ui64 nodeId) { auto request = tablet.CreateCreateHandleRequest( nodeId, TCreateHandleArgs::RDWR); request->Record.MutableHeaders()->SetRequestId(reqId); @@ -1624,8 +1624,17 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes) return request; }; + auto createCreateNodeRequest = [&] (ui64 reqId, TString name) { + auto request = tablet.CreateCreateNodeRequest( + TCreateNodeArgs::File(RootNodeId, std::move(name))); + request->Record.MutableHeaders()->SetRequestId(reqId); + + return request; + }; + const TString name1 = "file1"; const TString name2 = "file2"; + const TString name3 = "file3"; const auto nodeId1 = tablet.CreateNode(TCreateNodeArgs::File( RootNodeId, @@ -1640,7 +1649,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes) const ui64 requestId = 100500; - tablet.SendRequest(createRequest(requestId, nodeId1)); + tablet.SendRequest(createCreateHandleRequest(requestId, nodeId1)); { auto response = tablet.RecvCreateHandleResponse(); UNIT_ASSERT_VALUES_EQUAL_C( @@ -1653,7 +1662,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes) response->Record.GetNodeAttr().GetId()); } - tablet.SendRequest(createRequest(requestId, nodeId2)); + tablet.SendRequest(createCreateHandleRequest(requestId, nodeId2)); { auto response = tablet.RecvCreateHandleResponse(); UNIT_ASSERT_VALUES_EQUAL_C( @@ -1665,6 +1674,23 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes) nodeId2, response->Record.GetNodeAttr().GetId()); } + + tablet.SendRequest(createCreateNodeRequest(requestId, name3)); + { + auto response = tablet.RecvCreateNodeResponse(); + UNIT_ASSERT_VALUES_EQUAL_C( + S_OK, + response->Record.GetError().GetCode(), + response->Record.GetError().GetMessage()); + + UNIT_ASSERT_VALUES_UNEQUAL( + nodeId1, + response->Record.GetNode().GetId()); + UNIT_ASSERT_VALUES_UNEQUAL( + nodeId2, + response->Record.GetNode().GetId()); + UNIT_ASSERT_VALUES_UNEQUAL(0, response->Record.GetNode().GetId()); + } } } From 7eab7ecb9bb25bb18fd750ab60db7f4b5bdf9c34 Mon Sep 17 00:00:00 2001 From: qkrorlqr Date: Sat, 21 Sep 2024 11:44:29 +0000 Subject: [PATCH 2/3] issue-2033: we should ignore DupCache entries with invalid types and proceed with request processing as if the entry didn't exist (for the case of request id collisions) - cleanup --- cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp index 3e1a0bf184..b4a16465a7 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp @@ -55,7 +55,7 @@ void TIndexTabletActor::HandleRenameNode( auto* msg = ev->Get(); const auto requestId = GetRequestId(msg->Record); - if (const auto* e = session->LookupDupEntry(GetRequestId(msg->Record))) { + if (const auto* e = session->LookupDupEntry(requestId)) { auto response = std::make_unique(); if (GetDupCacheEntry(e, response->Record)) { return NCloud::Reply(ctx, *ev, std::move(response)); From 950dee8c57f1fd8b3e7a747433efc2a1ece3689f Mon Sep 17 00:00:00 2001 From: qkrorlqr Date: Sat, 21 Sep 2024 13:06:26 +0000 Subject: [PATCH 3/3] issue-2033: updated ShouldValidateDupRequests ut --- .../libs/storage/tablet/tablet_ut_nodes.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp b/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp index b1e1a86774..6db94005a3 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_ut_nodes.cpp @@ -1098,17 +1098,30 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Nodes) UNIT_ASSERT(!HasError(response->Record.GetError())); nodeId = response->Record.GetNode().GetId(); - UNIT_ASSERT(nodeId != InvalidNodeId); + UNIT_ASSERT_VALUES_UNEQUAL(InvalidNodeId, nodeId); + } + + { + auto response = tablet.ListNodes(RootNodeId); + const auto& names = response->Record.GetNames(); + UNIT_ASSERT_VALUES_EQUAL(1, names.size()); + UNIT_ASSERT_VALUES_EQUAL("xxx", names[0]); } tablet.SendRequest(createOther(100500)); { auto response = tablet.RecvUnlinkNodeResponse(); UNIT_ASSERT_VALUES_EQUAL_C( - E_ARGUMENT, + S_OK, response->Record.GetError().GetCode(), response->Record.GetError().GetMessage()); } + + { + auto response = tablet.ListNodes(RootNodeId); + const auto& names = response->Record.GetNames(); + UNIT_ASSERT_VALUES_EQUAL(0, names.size()); + } } Y_UNIT_TEST(ShouldWaitForDupRequestToBeCommitted)