From 1eec0f103685d2a54f9ee28d59de66a7ec826f11 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Thu, 5 Oct 2023 19:33:04 +0530 Subject: [PATCH 01/20] fix stale remote cluster uuid state not purged from remote (#10016) * fix stale remote cluster uuid state not purged from remote Signed-off-by: bansvaru * fix tests Signed-off-by: bansvaru * use new limit parameter Signed-off-by: bansvaru * minor refactoring Signed-off-by: bansvaru * delete index metadata files before manifest file Signed-off-by: bansvaru * add basic UT Signed-off-by: bansvaru * delete all data related to a cluster uuid in a single call Signed-off-by: bansvaru * fix git diff Signed-off-by: bansvaru * remove unreferenced code Signed-off-by: bansvaru * fix spa Signed-off-by: bansvaru --------- Signed-off-by: bansvaru --- .../remote/RemoteClusterStateService.java | 76 ++++++++++--- .../RemoteClusterStateServiceTests.java | 106 +++++++++++++----- 2 files changed, 137 insertions(+), 45 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index dddc5376803a5..164611816e1dc 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -103,11 +103,11 @@ public class RemoteClusterStateService implements Closeable { Property.Final ); - private static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; - private static final String INDEX_PATH_TOKEN = "index"; - private static final String MANIFEST_PATH_TOKEN = "manifest"; - private static final String MANIFEST_FILE_PREFIX = "manifest"; - private static final String INDEX_METADATA_FILE_PREFIX = "metadata"; + public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; + public static final String INDEX_PATH_TOKEN = "index"; + public static final String MANIFEST_PATH_TOKEN = "manifest"; + public static final String MANIFEST_FILE_PREFIX = "manifest"; + public static final String INDEX_METADATA_FILE_PREFIX = "metadata"; private final String nodeId; private final Supplier repositoriesService; @@ -385,13 +385,20 @@ private void writeIndexMetadataAsync( @Nullable public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) throws IOException { + assert clusterState != null : "Last accepted cluster state is not set"; if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); return null; } - assert clusterState != null : "Last accepted cluster state is not set"; assert previousManifest != null : "Last cluster metadata manifest is not set"; - return uploadManifest(clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), true); + ClusterMetadataManifest committedManifest = uploadManifest( + clusterState, + previousManifest.getIndices(), + previousManifest.getPreviousClusterUUID(), + true + ); + deleteStaleClusterUUIDs(clusterState, committedManifest); + return committedManifest; } @Override @@ -719,30 +726,42 @@ private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { } /** - * Fetch latest ClusterMetadataManifest file from remote state store + * Fetch ClusterMetadataManifest files from remote state store in order * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster - * @return latest ClusterMetadataManifest filename + * @param limit max no of files to fetch + * @return all manifest file names */ - private Optional getLatestManifestFileName(String clusterName, String clusterUUID) throws IllegalStateException { + private List getManifestFileNames(String clusterName, String clusterUUID, int limit) throws IllegalStateException { try { /** - * {@link BlobContainer#listBlobsByPrefixInSortedOrder} will get the latest manifest file + * {@link BlobContainer#listBlobsByPrefixInSortedOrder} will list the latest manifest file first * as the manifest file name generated via {@link RemoteClusterStateService#getManifestFileName} ensures * when sorted in LEXICOGRAPHIC order the latest uploaded manifest file comes on top. */ - List manifestFilesMetadata = manifestContainer(clusterName, clusterUUID).listBlobsByPrefixInSortedOrder( + return manifestContainer(clusterName, clusterUUID).listBlobsByPrefixInSortedOrder( MANIFEST_FILE_PREFIX + DELIMITER, - 1, + limit, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC ); - if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) { - return Optional.of(manifestFilesMetadata.get(0).name()); - } } catch (IOException e) { throw new IllegalStateException("Error while fetching latest manifest file for remote cluster state", e); } + } + + /** + * Fetch latest ClusterMetadataManifest file from remote state store + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return latest ClusterMetadataManifest filename + */ + private Optional getLatestManifestFileName(String clusterName, String clusterUUID) throws IllegalStateException { + List manifestFilesMetadata = getManifestFileNames(clusterName, clusterUUID, 1); + if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) { + return Optional.of(manifestFilesMetadata.get(0).name()); + } logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}", clusterName, clusterUUID); return Optional.empty(); } @@ -791,7 +810,7 @@ public IndexMetadataTransferException(String errorDesc, Throwable cause) { * @param clusterName name of the cluster * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged */ - public void deleteStaleClusterMetadata(String clusterName, List clusterUUIDs) { + private void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { clusterUUIDs.forEach(clusterUUID -> { getBlobStoreTransferService().deleteAsync( ThreadPool.Names.REMOTE_PURGE, @@ -923,4 +942,27 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List { + String clusterName = clusterState.getClusterName().value(); + logger.info("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + Set allClustersUUIDsInRemote; + try { + allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); + } catch (IOException e) { + logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); + return; + } + // Retain last 2 cluster uuids data + allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); + allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); + deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); + }); + } } diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 65166386733c6..6ecbc23f75bee 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -64,6 +64,8 @@ import org.mockito.ArgumentCaptor; import org.mockito.ArgumentMatchers; +import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -76,6 +78,8 @@ import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class RemoteClusterStateServiceTests extends OpenSearchTestCase { @@ -334,13 +338,8 @@ public void testReadLatestMetadataManifestFailedIOException() throws IOException final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); BlobContainer blobContainer = mockBlobStoreObjects(); - when( - blobContainer.listBlobsByPrefixInSortedOrder( - "manifest" + RemoteClusterStateService.DELIMITER, - 1, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenThrow(IOException.class); + when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) + .thenThrow(IOException.class); remoteClusterStateService.start(); Exception e = assertThrows( @@ -357,13 +356,8 @@ public void testReadLatestMetadataManifestFailedNoManifestFileInRemote() throws final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); BlobContainer blobContainer = mockBlobStoreObjects(); - when( - blobContainer.listBlobsByPrefixInSortedOrder( - "manifest" + RemoteClusterStateService.DELIMITER, - 1, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(List.of()); + when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) + .thenReturn(List.of()); remoteClusterStateService.start(); Optional manifest = remoteClusterStateService.getLatestClusterMetadataManifest( @@ -378,13 +372,8 @@ public void testReadLatestMetadataManifestFailedManifestFileRemoveAfterFetchInRe BlobContainer blobContainer = mockBlobStoreObjects(); BlobMetadata blobMetadata = new PlainBlobMetadata("manifestFileName", 1); - when( - blobContainer.listBlobsByPrefixInSortedOrder( - "manifest" + RemoteClusterStateService.DELIMITER, - 1, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(Arrays.asList(blobMetadata)); + when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) + .thenReturn(Arrays.asList(blobMetadata)); when(blobContainer.readBlob("manifestFileName")).thenThrow(FileNotFoundException.class); remoteClusterStateService.start(); @@ -618,6 +607,72 @@ public void testGetValidPreviousClusterUUIDWithInvalidMultipleChains() throws IO assertThrows(IllegalStateException.class, () -> remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster")); } + public void testDeleteStaleClusterUUIDs() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID(randomAlphaOfLength(10)) + .clusterUUID("cluster-uuid1") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID(ClusterState.UNKNOWN_UUID) + .committed(true) + .build(); + + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + BlobContainer uuidContainerContainer = mock(BlobContainer.class); + BlobContainer manifest2Container = mock(BlobContainer.class); + BlobContainer manifest3Container = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).then(invocation -> { + BlobPath blobPath1 = invocation.getArgument(0); + if (blobPath1.buildAsString().endsWith("cluster-state/")) { + return uuidContainerContainer; + } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) { + return manifest2Container; + } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) { + return manifest3Container; + } else { + throw new IllegalArgumentException("Unexpected blob path " + blobPath1); + } + }); + Map blobMetadataMap = Map.of( + "cluster-uuid1", + mock(BlobContainer.class), + "cluster-uuid2", + mock(BlobContainer.class), + "cluster-uuid3", + mock(BlobContainer.class) + ); + when(uuidContainerContainer.children()).thenReturn(blobMetadataMap); + when( + manifest2Container.listBlobsByPrefixInSortedOrder( + MANIFEST_FILE_PREFIX + DELIMITER, + Integer.MAX_VALUE, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L))); + when( + manifest3Container.listBlobsByPrefixInSortedOrder( + MANIFEST_FILE_PREFIX + DELIMITER, + Integer.MAX_VALUE, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L))); + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest); + try { + assertBusy(() -> { + verify(manifest2Container, times(1)).delete(); + verify(manifest3Container, times(1)).delete(); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); @@ -760,13 +815,8 @@ private void mockBlobContainer( Map indexMetadataMap ) throws IOException { BlobMetadata blobMetadata = new PlainBlobMetadata("manifestFileName", 1); - when( - blobContainer.listBlobsByPrefixInSortedOrder( - "manifest" + RemoteClusterStateService.DELIMITER, - 1, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(Arrays.asList(blobMetadata)); + when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) + .thenReturn(Arrays.asList(blobMetadata)); BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, From c6c07f3afd5658cb797ccbbf1a4f5dceb4f67015 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:52:42 -0400 Subject: [PATCH 02/20] Bump org.bouncycastle:bc-fips from 1.0.2.3 to 1.0.2.4 in /distribution/tools/plugin-cli (#10297) * Bump org.bouncycastle:bc-fips in /distribution/tools/plugin-cli Bumps org.bouncycastle:bc-fips from 1.0.2.3 to 1.0.2.4. --- updated-dependencies: - dependency-name: org.bouncycastle:bc-fips dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + distribution/tools/plugin-cli/build.gradle | 2 +- distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.3.jar.sha1 | 1 - distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.3.jar.sha1 create mode 100644 distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 153cc12c5f9bc..1d82f15198878 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -117,6 +117,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump netty from 4.1.97.Final to 4.1.99.Final ([#10303](https://github.com/opensearch-project/OpenSearch/pull/10303)) - Bump `peter-evans/create-pull-request` from 3 to 5 ([#10301](https://github.com/opensearch-project/OpenSearch/pull/10301)) - Bump `org.apache.avro:avro` from 1.11.2 to 1.11.3 ([#10210](https://github.com/opensearch-project/OpenSearch/pull/10210)) +- Bump `org.bouncycastle:bc-fips` from 1.0.2.3 to 1.0.2.4 ([#10297](https://github.com/opensearch-project/OpenSearch/pull/10297)) ### Changed - Add instrumentation in rest and network layer. ([#9415](https://github.com/opensearch-project/OpenSearch/pull/9415)) diff --git a/distribution/tools/plugin-cli/build.gradle b/distribution/tools/plugin-cli/build.gradle index 2db3fef55d02e..b61a00aba04bc 100644 --- a/distribution/tools/plugin-cli/build.gradle +++ b/distribution/tools/plugin-cli/build.gradle @@ -38,7 +38,7 @@ dependencies { compileOnly project(":server") compileOnly project(":libs:opensearch-cli") api "org.bouncycastle:bcpg-fips:1.0.7.1" - api "org.bouncycastle:bc-fips:1.0.2.3" + api "org.bouncycastle:bc-fips:1.0.2.4" testImplementation project(":test:framework") testImplementation 'com.google.jimfs:jimfs:1.3.0' testRuntimeOnly("com.google.guava:guava:${versions.guava}") { diff --git a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.3.jar.sha1 b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.3.jar.sha1 deleted file mode 100644 index c71320050b7de..0000000000000 --- a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -da62b32cb72591f5b4d322e6ab0ce7de3247b534 \ No newline at end of file diff --git a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 new file mode 100644 index 0000000000000..da37449f80d7e --- /dev/null +++ b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 @@ -0,0 +1 @@ +9008d04fc13da6455e6a792935b93b629757335d \ No newline at end of file From 12ccc33e192aab203d17c68e979c54b90b818da1 Mon Sep 17 00:00:00 2001 From: Bhumika Saini Date: Thu, 5 Oct 2023 23:38:12 +0530 Subject: [PATCH 03/20] Update TODO for updating public documentation (#10408) Signed-off-by: Bhumika Saini --- .github/pull_request_template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index cd7c1bb980eec..c47b9e0b69256 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -19,7 +19,7 @@ Resolves #[Issue number to be closed when this PR is merged] - [ ] New functionality has javadoc added - [ ] Commits are signed per the DCO using --signoff - [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) -- [ ] GitHub issue/PR created in [OpenSearch documentation repo](https://github.com/opensearch-project/documentation-website) for the required public documentation changes (#[Issue/PR number]) +- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose) By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). From 0facd50bc72a7ca05f44a167a9794c270cd0ad83 Mon Sep 17 00:00:00 2001 From: Prabhat <20185657+CaptainDredge@users.noreply.github.com> Date: Thu, 5 Oct 2023 12:56:07 -0700 Subject: [PATCH 04/20] Race condition fix for datetime optimization (#10385) * Race condition fix for datetime optimization Signed-off-by: Prabhat Sharma * Changed JavaDateTimeFormatter caching of parser from MRU(most recently used) to a simple last used formatter Signed-off-by: Prabhat Sharma --------- Signed-off-by: Prabhat Sharma Co-authored-by: Prabhat Sharma --- .../common/time/JavaDateFormatter.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/time/JavaDateFormatter.java b/server/src/main/java/org/opensearch/common/time/JavaDateFormatter.java index 594ebc3b26a73..14191357fb8aa 100644 --- a/server/src/main/java/org/opensearch/common/time/JavaDateFormatter.java +++ b/server/src/main/java/org/opensearch/common/time/JavaDateFormatter.java @@ -74,6 +74,7 @@ class JavaDateFormatter implements DateFormatter { private final List parsers; private final JavaDateFormatter roundupParser; private final Boolean canCacheLastParsedFormatter; + private volatile DateTimeFormatter lastParsedformatter = null; /** * A round up formatter @@ -150,7 +151,7 @@ JavaDateFormatter getRoundupParser() { if (parsers.length == 0) { this.parsers = Collections.singletonList(printer); } else { - this.parsers = new CopyOnWriteArrayList<>(parsers); + this.parsers = Arrays.asList(parsers); } List roundUp = createRoundUpParser(format, roundupParserConsumer); this.roundupParser = new RoundUpFormatter(format, roundUp); @@ -235,7 +236,7 @@ private JavaDateFormatter( this.printFormat = printFormat; this.printer = printer; this.roundupParser = roundUpParsers != null ? new RoundUpFormatter(format, roundUpParsers) : null; - this.parsers = new CopyOnWriteArrayList<>(parsers); + this.parsers = parsers; this.canCacheLastParsedFormatter = canCacheLastParsedFormatter; } @@ -286,24 +287,22 @@ public TemporalAccessor parse(String input) { private TemporalAccessor doParse(String input) { if (parsers.size() > 1) { Object object = null; - DateTimeFormatter lastParsedformatter = null; + if (canCacheLastParsedFormatter && lastParsedformatter != null) { + ParsePosition pos = new ParsePosition(0); + object = lastParsedformatter.toFormat().parseObject(input, pos); + if (parsingSucceeded(object, input, pos)) { + return (TemporalAccessor) object; + } + } for (DateTimeFormatter formatter : parsers) { ParsePosition pos = new ParsePosition(0); object = formatter.toFormat().parseObject(input, pos); if (parsingSucceeded(object, input, pos)) { lastParsedformatter = formatter; - break; + return (TemporalAccessor) object; } } - if (lastParsedformatter != null) { - if (canCacheLastParsedFormatter && lastParsedformatter != parsers.get(0)) { - synchronized (parsers) { - parsers.remove(lastParsedformatter); - parsers.add(0, lastParsedformatter); - } - } - return (TemporalAccessor) object; - } + throw new DateTimeParseException("Failed to parse with all enclosed parsers", input, 0); } return this.parsers.get(0).parse(input); From fdaa438ee03db285417bc0bd4204a0fbd4e699c9 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Fri, 6 Oct 2023 04:22:42 +0800 Subject: [PATCH 05/20] Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API (#10101) * Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API Signed-off-by: Gao Binlong * modify change log Signed-off-by: Gao Binlong * Add more tests Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong Signed-off-by: Daniel (dB.) Doubrovkine Co-authored-by: Daniel (dB.) Doubrovkine --- CHANGELOG.md | 1 + .../rest-api-spec/test/ingest/90_simulate.yml | 137 ++++++++++++++++++ .../ingest/SimulatePipelineRequest.java | 28 +++- .../SimulatePipelineRequestParsingTests.java | 60 +++++++- 4 files changed, 215 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d82f15198878..f9d2123458335 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -143,6 +143,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix broken backward compatibility from 2.7 for IndexSorted field indices ([#10045](https://github.com/opensearch-project/OpenSearch/pull/10045)) - Fix concurrent search NPE when track_total_hits, terminate_after and size=0 are used ([#10082](https://github.com/opensearch-project/OpenSearch/pull/10082)) - Fix remove ingest processor handing ignore_missing parameter not correctly ([10089](https://github.com/opensearch-project/OpenSearch/pull/10089)) +- Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) - Fix circular dependency in Settings initialization ([10194](https://github.com/opensearch-project/OpenSearch/pull/10194)) - Fix registration and initialization of multiple extensions ([10256](https://github.com/opensearch-project/OpenSearch/pull/10256)) diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/90_simulate.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/90_simulate.yml index e012a82b15927..7c073739f6a1f 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/90_simulate.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/90_simulate.yml @@ -976,3 +976,140 @@ teardown: } - match: { error.root_cause.0.type: "illegal_argument_exception" } - match: { error.root_cause.0.reason: "Pipeline processor configured for non-existent pipeline [____pipeline_doesnot_exist___]" } + +--- +"Test simulate with docs containing metadata fields": + - do: + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "set" : { + "field": "field2", + "value": "foo" + } + } + ] + }, + "docs": [ + { + "_index": "index", + "_id": "id", + "_routing": "foo", + "_version": 100, + "_if_seq_no": 12333333333333333, + "_if_primary_term": 1, + "_source": { + "foo": "bar" + } + } + ] + } + + - length: { docs: 1 } + - match: { docs.0.doc._index: "index" } + - match: { docs.0.doc._id: "id" } + - match: { docs.0.doc._routing: "foo" } + - match: { docs.0.doc._version: "100" } + - match: { docs.0.doc._if_seq_no: "12333333333333333" } + - match: { docs.0.doc._if_primary_term: "1" } + - match: { docs.0.doc._source.foo: "bar" } + + - do: + catch: bad_request + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "set" : { + "field" : "field2", + "value": "foo" + } + } + ] + }, + "docs": [ + { + "_index": "index", + "_id": "id", + "_routing": "foo", + "_version": "bar", + "_source": { + "foo": "bar" + } + } + ] + } + - match: { status: 400 } + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "Failed to parse parameter [_version], only int or long is accepted" } + + - do: + catch: bad_request + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "set" : { + "field" : "field2", + "value": "foo" + } + } + ] + }, + "docs": [ + { + "_index": "index", + "_id": "id", + "_routing": "foo", + "_if_seq_no": "123", + "_source": { + "foo": "bar" + } + } + ] + } + - match: { status: 400 } + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "Failed to parse parameter [_if_seq_no], only int or long is accepted" } + + - do: + catch: bad_request + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "set" : { + "field" : "field2", + "value": "foo" + } + } + ] + }, + "docs": [ + { + "_index": "index", + "_id": "id", + "_routing": "foo", + "_if_primary_term": "1", + "_source": { + "foo": "bar" + } + } + ] + } + - match: { status: 400 } + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "Failed to parse parameter [_if_primary_term], only int or long is accepted" } diff --git a/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineRequest.java b/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineRequest.java index 2234934499609..ec3ee981b646f 100644 --- a/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineRequest.java +++ b/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineRequest.java @@ -218,7 +218,12 @@ private static List parseDocs(Map config) { String routing = ConfigurationUtils.readOptionalStringOrIntProperty(null, null, dataMap, Metadata.ROUTING.getFieldName()); Long version = null; if (dataMap.containsKey(Metadata.VERSION.getFieldName())) { - version = (Long) ConfigurationUtils.readObject(null, null, dataMap, Metadata.VERSION.getFieldName()); + Object versionFieldValue = ConfigurationUtils.readObject(null, null, dataMap, Metadata.VERSION.getFieldName()); + if (versionFieldValue instanceof Integer || versionFieldValue instanceof Long) { + version = ((Number) versionFieldValue).longValue(); + } else { + throw new IllegalArgumentException("Failed to parse parameter [_version], only int or long is accepted"); + } } VersionType versionType = null; if (dataMap.containsKey(Metadata.VERSION_TYPE.getFieldName())) { @@ -228,12 +233,25 @@ private static List parseDocs(Map config) { } IngestDocument ingestDocument = new IngestDocument(index, id, routing, version, versionType, document); if (dataMap.containsKey(Metadata.IF_SEQ_NO.getFieldName())) { - Long ifSeqNo = (Long) ConfigurationUtils.readObject(null, null, dataMap, Metadata.IF_SEQ_NO.getFieldName()); - ingestDocument.setFieldValue(Metadata.IF_SEQ_NO.getFieldName(), ifSeqNo); + Object ifSeqNoFieldValue = ConfigurationUtils.readObject(null, null, dataMap, Metadata.IF_SEQ_NO.getFieldName()); + if (ifSeqNoFieldValue instanceof Integer || ifSeqNoFieldValue instanceof Long) { + ingestDocument.setFieldValue(Metadata.IF_SEQ_NO.getFieldName(), ((Number) ifSeqNoFieldValue).longValue()); + } else { + throw new IllegalArgumentException("Failed to parse parameter [_if_seq_no], only int or long is accepted"); + } } if (dataMap.containsKey(Metadata.IF_PRIMARY_TERM.getFieldName())) { - Long ifPrimaryTerm = (Long) ConfigurationUtils.readObject(null, null, dataMap, Metadata.IF_PRIMARY_TERM.getFieldName()); - ingestDocument.setFieldValue(Metadata.IF_PRIMARY_TERM.getFieldName(), ifPrimaryTerm); + Object ifPrimaryTermFieldValue = ConfigurationUtils.readObject( + null, + null, + dataMap, + Metadata.IF_PRIMARY_TERM.getFieldName() + ); + if (ifPrimaryTermFieldValue instanceof Integer || ifPrimaryTermFieldValue instanceof Long) { + ingestDocument.setFieldValue(Metadata.IF_PRIMARY_TERM.getFieldName(), ((Number) ifPrimaryTermFieldValue).longValue()); + } else { + throw new IllegalArgumentException("Failed to parse parameter [_if_primary_term], only int or long is accepted"); + } } ingestDocumentList.add(ingestDocument); } diff --git a/server/src/test/java/org/opensearch/action/ingest/SimulatePipelineRequestParsingTests.java b/server/src/test/java/org/opensearch/action/ingest/SimulatePipelineRequestParsingTests.java index 705fb546a2fed..04a9d08bb22bc 100644 --- a/server/src/test/java/org/opensearch/action/ingest/SimulatePipelineRequestParsingTests.java +++ b/server/src/test/java/org/opensearch/action/ingest/SimulatePipelineRequestParsingTests.java @@ -144,17 +144,29 @@ public void innerTestParseWithProvidedPipeline() throws Exception { List fields = Arrays.asList(INDEX, ID, ROUTING, VERSION, VERSION_TYPE, IF_SEQ_NO, IF_PRIMARY_TERM); for (IngestDocument.Metadata field : fields) { if (field == VERSION) { - Long value = randomLong(); - doc.put(field.getFieldName(), value); - expectedDoc.put(field.getFieldName(), value); + if (randomBoolean()) { + Long value = randomLong(); + doc.put(field.getFieldName(), value); + expectedDoc.put(field.getFieldName(), value); + } else { + Integer value = randomIntBetween(1, 1000000); + doc.put(field.getFieldName(), value); + expectedDoc.put(field.getFieldName(), value); + } } else if (field == VERSION_TYPE) { String value = VersionType.toString(randomFrom(VersionType.INTERNAL, VersionType.EXTERNAL, VersionType.EXTERNAL_GTE)); doc.put(field.getFieldName(), value); expectedDoc.put(field.getFieldName(), value); } else if (field == IF_SEQ_NO || field == IF_PRIMARY_TERM) { - Long value = randomNonNegativeLong(); - doc.put(field.getFieldName(), value); - expectedDoc.put(field.getFieldName(), value); + if (randomBoolean()) { + Long value = randomNonNegativeLong(); + doc.put(field.getFieldName(), value); + expectedDoc.put(field.getFieldName(), value); + } else { + Integer value = randomIntBetween(1, 1000000); + doc.put(field.getFieldName(), value); + expectedDoc.put(field.getFieldName(), value); + } } else { if (randomBoolean()) { String value = randomAlphaOfLengthBetween(1, 10); @@ -282,4 +294,40 @@ public void testNotValidDocs() { ); assertThat(e3.getMessage(), containsString("required property is missing")); } + + public void testNotValidMetadataFields() { + List fields = Arrays.asList(VERSION, IF_SEQ_NO, IF_PRIMARY_TERM); + for (IngestDocument.Metadata field : fields) { + String metadataFieldName = field.getFieldName(); + Map requestContent = new HashMap<>(); + List> docs = new ArrayList<>(); + requestContent.put(Fields.DOCS, docs); + Map doc = new HashMap<>(); + doc.put(metadataFieldName, randomAlphaOfLengthBetween(1, 10)); + doc.put(Fields.SOURCE, Collections.singletonMap(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10))); + docs.add(doc); + + Map pipelineConfig = new HashMap<>(); + List> processors = new ArrayList<>(); + Map processorConfig = new HashMap<>(); + List> onFailureProcessors = new ArrayList<>(); + int numOnFailureProcessors = randomIntBetween(0, 1); + for (int j = 0; j < numOnFailureProcessors; j++) { + onFailureProcessors.add(Collections.singletonMap("mock_processor", Collections.emptyMap())); + } + if (numOnFailureProcessors > 0) { + processorConfig.put("on_failure", onFailureProcessors); + } + processors.add(Collections.singletonMap("mock_processor", processorConfig)); + pipelineConfig.put("processors", processors); + + requestContent.put(Fields.PIPELINE, pipelineConfig); + + assertThrows( + "Failed to parse parameter [" + metadataFieldName + "], only int or long is accepted", + IllegalArgumentException.class, + () -> SimulatePipelineRequest.parse(requestContent, false, ingestService) + ); + } + } } From 651a9aa2ca05244e74bcaabcc89d878977efe259 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 5 Oct 2023 13:40:14 -0700 Subject: [PATCH 06/20] Segment Replication - Fix ShardLockObtained error during corruption cases (#10370) * Segment Replication - Fix ShardLockObtained error during corruption cases This change fixes a bug where shards could not be recreated locally after corruption. This occured because the store was not decref'd to 0 if the commit on close would fail with a corruption exception. Signed-off-by: Marc Handalian * Remove exra logs Signed-off-by: Marc Handalian * Remove flaky assertion on store refcount Signed-off-by: Marc Handalian * Remove flaky test. Signed-off-by: Marc Handalian * PR Feedback. Remove hacky handling of corruption when fetching metadata. This will now check for store corruption when replication has failed and fail the shard accordingly. This commit also fixes logging in NRTReplicationEngine. Signed-off-by: Marc Handalian * Fix unit test. Signed-off-by: Marc Handalian * Fix test failure testSegRepSucceedsOnPreviousCopiedFiles. This test broke because we invoked target.indexShard on a closed replicationTarget. In these cases we can assume the store is not corrupt. Signed-off-by: Marc Handalian * spotless Signed-off-by: Marc Handalian * Revert flaky IT Signed-off-by: Marc Handalian * Fix flakiness failure by expecting RTE when check index fails. Signed-off-by: Marc Handalian * reintroduce ITs and use recoveries API instead of waiting on shard state. Signed-off-by: Marc Handalian * Fix edge case where flush failures would not get reported as corruption. Signed-off-by: Marc Handalian --------- Signed-off-by: Marc Handalian --- CHANGELOG.md | 1 + .../replication/SegmentReplicationBaseIT.java | 5 +- .../replication/SegmentReplicationIT.java | 135 ++++++++++++++++++ .../index/engine/NRTReplicationEngine.java | 27 +++- .../org/opensearch/index/store/Store.java | 8 +- .../replication/SegmentReplicationTarget.java | 11 +- .../SegmentReplicationTargetService.java | 26 +++- .../engine/NRTReplicationEngineTests.java | 44 ++++++ .../SegmentReplicationTargetServiceTests.java | 3 +- .../SegmentReplicationTargetTests.java | 3 +- 10 files changed, 245 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9d2123458335..5b14f123b41cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -146,6 +146,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) - Fix circular dependency in Settings initialization ([10194](https://github.com/opensearch-project/OpenSearch/pull/10194)) - Fix registration and initialization of multiple extensions ([10256](https://github.com/opensearch-project/OpenSearch/pull/10256)) +- Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370)) ### Security diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java index 8e68a8bde39d5..1d93eecd6b245 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java @@ -197,9 +197,10 @@ protected IndexShard getIndexShard(String node, ShardId shardId, String indexNam protected IndexShard getIndexShard(String node, String indexName) { final Index index = resolveIndex(indexName); IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); - IndexService indexService = indicesService.indexServiceSafe(index); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); final Optional shardId = indexService.shardIds().stream().findFirst(); - return indexService.getShard(shardId.get()); + return shardId.map(indexService::getShard).orElse(null); } protected boolean segmentReplicationWithRemoteEnabled() { diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java index 33bc5a8f3afe6..81556cc270151 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java @@ -24,6 +24,7 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.action.admin.indices.alias.Alias; import org.opensearch.action.admin.indices.flush.FlushRequest; +import org.opensearch.action.admin.indices.recovery.RecoveryResponse; import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.get.GetResponse; @@ -58,6 +59,7 @@ import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.XContentBuilder; @@ -71,6 +73,7 @@ import org.opensearch.index.engine.NRTReplicationReaderManager; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.recovery.FileChunkRequest; +import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.node.NodeClosedException; @@ -82,6 +85,7 @@ import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportService; import org.junit.Before; @@ -94,6 +98,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import static java.util.Arrays.asList; @@ -1777,4 +1782,134 @@ public void testRealtimeTermVectorRequestsUnSuccessful() throws IOException { } + public void testSendCorruptBytesToReplica() throws Exception { + // this test stubs transport calls specific to node-node replication. + assumeFalse( + "Skipping the test as its not compatible with segment replication with remote store.", + segmentReplicationWithRemoteEnabled() + ); + final String primaryNode = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.refresh_interval", -1) + .build() + ); + ensureYellow(INDEX_NAME); + final String replicaNode = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + MockTransportService primaryTransportService = ((MockTransportService) internalCluster().getInstance( + TransportService.class, + primaryNode + )); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean failed = new AtomicBoolean(false); + primaryTransportService.addSendBehavior( + internalCluster().getInstance(TransportService.class, replicaNode), + (connection, requestId, action, request, options) -> { + if (action.equals(SegmentReplicationTargetService.Actions.FILE_CHUNK) && failed.getAndSet(true) == false) { + FileChunkRequest req = (FileChunkRequest) request; + logger.info("SENDING CORRUPT file chunk [{}] lastChunk: {}", req, req.lastChunk()); + TransportRequest corrupt = new FileChunkRequest( + req.recoveryId(), + ((FileChunkRequest) request).requestSeqNo(), + ((FileChunkRequest) request).shardId(), + ((FileChunkRequest) request).metadata(), + ((FileChunkRequest) request).position(), + new BytesArray("test"), + false, + 0, + 0L + ); + connection.sendRequest(requestId, action, corrupt, options); + latch.countDown(); + } else { + connection.sendRequest(requestId, action, request, options); + } + } + ); + for (int i = 0; i < 100; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); + assertNotEquals(originalRecoveryTime, 0); + refresh(INDEX_NAME); + latch.await(); + assertTrue(failed.get()); + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); + // reset checkIndex to ensure our original shard doesn't throw + resetCheckIndexStatus(); + waitForSearchableDocs(100, primaryNode, replicaNode); + } + + public void testWipeSegmentBetweenSyncs() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primaryNode = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.refresh_interval", -1) + .build() + ); + ensureYellow(INDEX_NAME); + final String replicaNode = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + for (int i = 0; i < 10; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + refresh(INDEX_NAME); + ensureGreen(INDEX_NAME); + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); + + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + waitForSearchableDocs(INDEX_NAME, 10, List.of(replicaNode)); + indexShard.store().directory().deleteFile("_0.si"); + + for (int i = 11; i < 21; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + refresh(INDEX_NAME); + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); + resetCheckIndexStatus(); + waitForSearchableDocs(20, primaryNode, replicaNode); + } + + private void waitForNewPeerRecovery(String replicaNode, long originalRecoveryTime) throws Exception { + assertBusy(() -> { + // assert we have a peer recovery after the original + final long time = getRecoveryStopTime(replicaNode); + assertNotEquals(time, 0); + assertNotEquals(originalRecoveryTime, time); + + }, 1, TimeUnit.MINUTES); + } + + private long getRecoveryStopTime(String nodeName) { + final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(INDEX_NAME).get(); + final List recoveryStates = recoveryResponse.shardRecoveryStates().get(INDEX_NAME); + logger.info("Recovery states {}", recoveryResponse); + for (RecoveryState recoveryState : recoveryStates) { + if (recoveryState.getTargetNode().getName().equals(nodeName)) { + return recoveryState.getTimer().stopTime(); + } + } + return 0L; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 570a2b186841a..020e92aba4ce5 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -379,6 +379,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { try { commitSegmentInfos(); } catch (IOException e) { + maybeFailEngine("flush", e); throw new FlushFailedEngineException(shardId, e); } finally { flushLock.unlock(); @@ -437,13 +438,29 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT; latestSegmentInfos.changed(); } - commitSegmentInfos(latestSegmentInfos); - IOUtils.close(readerManager, translogManager, store::decRef); + try { + commitSegmentInfos(latestSegmentInfos); + } catch (IOException e) { + // mark the store corrupted unless we are closing as result of engine failure. + // in this case Engine#failShard will handle store corruption. + if (failEngineLock.isHeldByCurrentThread() == false && store.isMarkedCorrupted() == false) { + try { + store.markStoreCorrupted(e); + } catch (IOException ex) { + logger.warn("Unable to mark store corrupted", ex); + } + } + } + IOUtils.close(readerManager, translogManager); } catch (Exception e) { - logger.warn("failed to close engine", e); + logger.error("failed to close engine", e); } finally { - logger.debug("engine closed [{}]", reason); - closedLatch.countDown(); + try { + store.decRef(); + logger.debug("engine closed [{}]", reason); + } finally { + closedLatch.countDown(); + } } } } diff --git a/server/src/main/java/org/opensearch/index/store/Store.java b/server/src/main/java/org/opensearch/index/store/Store.java index 285241ba89996..9f505121d8dfa 100644 --- a/server/src/main/java/org/opensearch/index/store/Store.java +++ b/server/src/main/java/org/opensearch/index/store/Store.java @@ -385,7 +385,13 @@ public MetadataSnapshot getMetadata(SegmentInfos segmentInfos) throws IOExceptio */ public Map getSegmentMetadataMap(SegmentInfos segmentInfos) throws IOException { assert indexSettings.isSegRepEnabled(); - return loadMetadata(segmentInfos, directory, logger, true).fileMetadata; + failIfCorrupted(); + try { + return loadMetadata(segmentInfos, directory, logger, true).fileMetadata; + } catch (NoSuchFileException | CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { + markStoreCorrupted(ex); + throw ex; + } } /** diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index 5ae480b7d63a4..0eb6ce36fa63d 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -18,7 +18,6 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.opensearch.OpenSearchCorruptionException; -import org.opensearch.OpenSearchException; import org.opensearch.action.StepListener; import org.opensearch.common.UUIDs; import org.opensearch.common.lucene.Lucene; @@ -261,9 +260,7 @@ private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse) } catch (CorruptIndexException | IndexFormatTooNewException | IndexFormatTooOldException ex) { // this is a fatal exception at this stage. // this means we transferred files from the remote that have not be checksummed and they are - // broken. We have to clean up this shard entirely, remove all files and bubble it up to the - // source shard since this index might be broken there as well? The Source can handle this and checks - // its content on disk if possible. + // broken. We have to clean up this shard entirely, remove all files and bubble it up. try { try { store.removeCorruptionMarker(); @@ -279,14 +276,14 @@ private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse) // In this case the shard is closed at some point while updating the reader. // This can happen when the engine is closed in a separate thread. logger.warn("Shard is already closed, closing replication"); - } catch (OpenSearchException ex) { + } catch (CancellableThreads.ExecutionCancelledException ex) { /* Ignore closed replication target as it can happen due to index shard closed event in a separate thread. In such scenario, ignore the exception */ - assert cancellableThreads.isCancelled() : "Replication target closed but segment replication not cancelled"; + assert cancellableThreads.isCancelled() : "Replication target cancelled but cancellable threads not cancelled"; } catch (Exception ex) { - throw new OpenSearchCorruptionException(ex); + throw new ReplicationFailedException(ex); } finally { if (store != null) { store.decRef(); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java index ffc4ab86661db..46095adfe96b4 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.index.CorruptIndexException; import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchCorruptionException; import org.opensearch.action.support.ChannelActionListener; @@ -28,6 +29,7 @@ import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardState; +import org.opensearch.index.store.Store; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.FileChunkRequest; import org.opensearch.indices.recovery.ForceSyncRequest; @@ -46,6 +48,7 @@ import org.opensearch.transport.TransportRequestOptions; import org.opensearch.transport.TransportService; +import java.io.IOException; import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; @@ -522,7 +525,7 @@ public void onResponse(Void o) { @Override public void onFailure(Exception e) { logger.debug("Replication failed {}", target.description()); - if (e instanceof OpenSearchCorruptionException) { + if (isStoreCorrupt(target) || e instanceof CorruptIndexException || e instanceof OpenSearchCorruptionException) { onGoingReplications.fail(replicationId, new ReplicationFailedException("Store corruption during replication", e), true); return; } @@ -531,6 +534,27 @@ public void onFailure(Exception e) { }); } + private boolean isStoreCorrupt(SegmentReplicationTarget target) { + // ensure target is not already closed. In that case + // we can assume the store is not corrupt and that the replication + // event completed successfully. + if (target.refCount() > 0) { + final Store store = target.store(); + if (store.tryIncRef()) { + try { + return store.isMarkedCorrupted(); + } catch (IOException ex) { + logger.warn("Unable to determine if store is corrupt", ex); + return false; + } finally { + store.decRef(); + } + } + } + // store already closed. + return false; + } + private class FileChunkTransportRequestHandler implements TransportRequestHandler { // How many bytes we've copied since we last called RateLimiter.pause diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 7b587e9a83d2d..ee25d3789fb13 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -577,6 +577,50 @@ public void testDecrefToZeroRemovesFile() throws IOException { } } + public void testCommitOnCloseThrowsException_decRefStore() throws Exception { + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + + final Store nrtEngineStore = createStore(INDEX_SETTINGS, newDirectory()); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore, INDEX_SETTINGS); + List operations = generateHistoryOnReplica( + randomIntBetween(1, 10), + randomBoolean(), + randomBoolean(), + randomBoolean() + ); + indexOperations(nrtEngine, operations); + // wipe the nrt directory initially so we can sync with primary. + cleanAndCopySegmentsFromPrimary(nrtEngine); + nrtEngineStore.directory().deleteFile("_0.si"); + assertEquals(2, nrtEngineStore.refCount()); + nrtEngine.close(); + assertEquals(1, nrtEngineStore.refCount()); + assertTrue(nrtEngineStore.isMarkedCorrupted()); + // store will throw when eventually closed, not handled here. + assertThrows(RuntimeException.class, nrtEngineStore::close); + } + + public void testFlushThrowsFlushFailedExceptionOnCorruption() throws Exception { + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + + final Store nrtEngineStore = createStore(INDEX_SETTINGS, newDirectory()); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore, INDEX_SETTINGS); + List operations = generateHistoryOnReplica( + randomIntBetween(1, 10), + randomBoolean(), + randomBoolean(), + randomBoolean() + ); + indexOperations(nrtEngine, operations); + // wipe the nrt directory initially so we can sync with primary. + cleanAndCopySegmentsFromPrimary(nrtEngine); + nrtEngineStore.directory().deleteFile("_0.si"); + assertThrows(FlushFailedEngineException.class, nrtEngine::flush); + assertTrue(nrtEngineStore.isMarkedCorrupted()); + // store will throw when eventually closed, not handled here. + assertThrows(RuntimeException.class, nrtEngineStore::close); + } + private void copySegments(Collection latestPrimaryFiles, Engine nrtEngine) throws IOException { final Store store = nrtEngine.store; final List replicaFiles = List.of(store.directory().listAll()); diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java index c1f88a6938d33..c108de5ee5ea6 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java @@ -9,6 +9,7 @@ package org.opensearch.indices.replication; import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.cluster.ClusterState; @@ -553,7 +554,7 @@ public void testForceSegmentSyncHandlerWithFailure() throws Exception { ).txGet(); }); Throwable nestedException = finalizeException.getCause().getCause(); - assertTrue(nestedException instanceof IOException); + assertNotNull(ExceptionsHelper.unwrap(finalizeException, IOException.class)); assertTrue(nestedException.getMessage().contains("dummy failure")); } diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java index 2596dd6e62026..a9d7d3cdd32fc 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java @@ -249,7 +249,7 @@ public void onFailure(Exception e) { }); } - public void testFailure_finalizeReplication_IOException() throws IOException { + public void testFailure_finalizeReplication_NonCorruptionException() throws IOException { IOException exception = new IOException("dummy failure"); SegmentReplicationSource segrepSource = new TestReplicationSource() { @@ -288,6 +288,7 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals(ReplicationFailedException.class, e.getClass()); assertEquals(exception, e.getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } From 10bae207cc6d8fa6bbb42021ef967c0bb57787a5 Mon Sep 17 00:00:00 2001 From: Kunal Kotwani Date: Thu, 5 Oct 2023 13:41:15 -0700 Subject: [PATCH 07/20] Fix error handling for future completion (#10406) Signed-off-by: Kunal Kotwani --- .../repositories/s3/S3BlobContainer.java | 34 +++++++------- .../s3/S3BlobStoreContainerTests.java | 45 +++++++++++++++++++ 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index fcfccf50ad326..9ffdba5eaae3a 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -241,23 +241,27 @@ public void readBlobAsync(String blobName, ActionListener listener) return; } - final List blobPartInputStreamFutures = new ArrayList<>(); - final long blobSize = blobMetadata.objectSize(); - final Integer numberOfParts = blobMetadata.objectParts() == null ? null : blobMetadata.objectParts().totalPartsCount(); - final String blobChecksum = blobMetadata.checksum().checksumCRC32(); - - if (numberOfParts == null) { - blobPartInputStreamFutures.add(() -> getBlobPartInputStreamContainer(s3AsyncClient, bucketName, blobKey, null)); - } else { - // S3 multipart files use 1 to n indexing - for (int partNumber = 1; partNumber <= numberOfParts; partNumber++) { - final int innerPartNumber = partNumber; - blobPartInputStreamFutures.add( - () -> getBlobPartInputStreamContainer(s3AsyncClient, bucketName, blobKey, innerPartNumber) - ); + try { + final List blobPartInputStreamFutures = new ArrayList<>(); + final long blobSize = blobMetadata.objectSize(); + final Integer numberOfParts = blobMetadata.objectParts() == null ? null : blobMetadata.objectParts().totalPartsCount(); + final String blobChecksum = blobMetadata.checksum() == null ? null : blobMetadata.checksum().checksumCRC32(); + + if (numberOfParts == null) { + blobPartInputStreamFutures.add(() -> getBlobPartInputStreamContainer(s3AsyncClient, bucketName, blobKey, null)); + } else { + // S3 multipart files use 1 to n indexing + for (int partNumber = 1; partNumber <= numberOfParts; partNumber++) { + final int innerPartNumber = partNumber; + blobPartInputStreamFutures.add( + () -> getBlobPartInputStreamContainer(s3AsyncClient, bucketName, blobKey, innerPartNumber) + ); + } } + listener.onResponse(new ReadContext(blobSize, blobPartInputStreamFutures, blobChecksum)); + } catch (Exception ex) { + listener.onFailure(ex); } - listener.onResponse(new ReadContext(blobSize, blobPartInputStreamFutures, blobChecksum)); }); } catch (Exception ex) { listener.onFailure(SdkException.create("Error occurred while fetching blob parts from the repository", ex)); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java index 2e54705e9cd78..e266bba372d80 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java @@ -1074,6 +1074,51 @@ public void testReadBlobAsyncFailure() throws Exception { assertEquals(1, readContextActionListener.getFailureCount()); } + public void testReadBlobAsyncOnCompleteFailureMissingData() throws Exception { + final String bucketName = randomAlphaOfLengthBetween(1, 10); + final String blobName = randomAlphaOfLengthBetween(1, 10); + final String checksum = randomAlphaOfLength(10); + + final long objectSize = 100L; + final int objectPartCount = 10; + + final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); + final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + ); + + final S3BlobStore blobStore = mock(S3BlobStore.class); + final BlobPath blobPath = new BlobPath(); + + when(blobStore.bucket()).thenReturn(bucketName); + when(blobStore.getStatsMetricPublisher()).thenReturn(new StatsMetricPublisher()); + when(blobStore.serverSideEncryption()).thenReturn(false); + when(blobStore.asyncClientReference()).thenReturn(amazonAsyncS3Reference); + + CompletableFuture getObjectAttributesResponseCompletableFuture = new CompletableFuture<>(); + getObjectAttributesResponseCompletableFuture.complete( + GetObjectAttributesResponse.builder() + .checksum(Checksum.builder().build()) + .objectSize(null) + .objectParts(GetObjectAttributesParts.builder().totalPartsCount(objectPartCount).build()) + .build() + ); + when(s3AsyncClient.getObjectAttributes(any(GetObjectAttributesRequest.class))).thenReturn( + getObjectAttributesResponseCompletableFuture + ); + + CountDownLatch countDownLatch = new CountDownLatch(1); + CountingCompletionListener readContextActionListener = new CountingCompletionListener<>(); + LatchedActionListener listener = new LatchedActionListener<>(readContextActionListener, countDownLatch); + + final S3BlobContainer blobContainer = new S3BlobContainer(blobPath, blobStore); + blobContainer.readBlobAsync(blobName, listener); + countDownLatch.await(); + + assertEquals(0, readContextActionListener.getResponseCount()); + assertEquals(1, readContextActionListener.getFailureCount()); + } + public void testGetBlobMetadata() throws Exception { final String checksum = randomAlphaOfLengthBetween(1, 10); final long objectSize = 100L; From a3f8432df30c69be3950ca55984032a1b2323f35 Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:56:09 -0700 Subject: [PATCH 08/20] [Remote Store] Fix stats reporting for multistream downloads. (#10402) * Fix stats reporting for multistream downloads. Signed-off-by: Rishikesh1159 * rename tracker to fileTransferTracker. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 --- .../opensearch/index/shard/IndexShard.java | 4 ++- .../store/RemoteSegmentStoreDirectory.java | 35 +++++++++++++++---- .../RemoteStoreReplicationSource.java | 7 ++-- .../RemoteSegmentStoreDirectoryTests.java | 13 ++++--- 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 4f08411c19b55..833c91c1766c8 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -160,6 +160,7 @@ import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.PrimaryReplicaSyncer.ResyncTask; import org.opensearch.index.similarity.SimilarityService; +import org.opensearch.index.store.DirectoryFileTransferTracker; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.Store; @@ -4929,9 +4930,10 @@ private void downloadSegments( final Runnable onFileSync ) throws IOException { final Path indexPath = store.shardPath() == null ? null : store.shardPath().resolveIndex(); + final DirectoryFileTransferTracker fileTransferTracker = store.getDirectoryFileTransferTracker(); for (String segment : toDownloadSegments) { final PlainActionFuture segmentListener = PlainActionFuture.newFuture(); - sourceRemoteDirectory.copyTo(segment, storeDirectory, indexPath, segmentListener); + sourceRemoteDirectory.copyTo(segment, storeDirectory, indexPath, fileTransferTracker, segmentListener); segmentListener.actionGet(); onFileSync.run(); if (targetRemoteDirectory != null) { diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index a97b22360716c..a067cb9c5ae61 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -487,17 +487,40 @@ public void copyFrom(Directory from, String src, IOContext context, ActionListen * @param source The source file name * @param destinationDirectory The destination directory (if multipart is not supported) * @param destinationPath The destination path (if multipart is supported) + * @param fileTransferTracker Tracker used for file transfer stats * @param fileCompletionListener The listener to notify of completion */ - public void copyTo(String source, Directory destinationDirectory, Path destinationPath, ActionListener fileCompletionListener) { + public void copyTo( + String source, + Directory destinationDirectory, + Path destinationPath, + DirectoryFileTransferTracker fileTransferTracker, + ActionListener fileCompletionListener + ) { final String blobName = getExistingRemoteFilename(source); if (destinationPath != null && remoteDataDirectory.getBlobContainer() instanceof AsyncMultiStreamBlobContainer) { + long length = 0L; + try { + length = fileLength(source); + } catch (IOException ex) { + logger.error("Unable to fetch segment length for stats tracking", ex); + } + final long fileLength = length; + final long startTime = System.currentTimeMillis(); + fileTransferTracker.addTransferredBytesStarted(fileLength); final AsyncMultiStreamBlobContainer blobContainer = (AsyncMultiStreamBlobContainer) remoteDataDirectory.getBlobContainer(); final Path destinationFilePath = destinationPath.resolve(source); + final ActionListener completionListener = ActionListener.wrap(response -> { + fileTransferTracker.addTransferredBytesSucceeded(fileLength, startTime); + fileCompletionListener.onResponse(response); + }, e -> { + fileTransferTracker.addTransferredBytesFailed(fileLength, startTime); + fileCompletionListener.onFailure(e); + }); final ReadContextListener readContextListener = new ReadContextListener( blobName, destinationFilePath, - fileCompletionListener, + completionListener, threadPool, remoteDataDirectory.getDownloadRateLimiter(), recoverySettings.getMaxConcurrentRemoteStoreStreams() @@ -505,12 +528,10 @@ public void copyTo(String source, Directory destinationDirectory, Path destinati blobContainer.readBlobAsync(blobName, readContextListener); } else { // Fallback to older mechanism of downloading the file - try { + ActionListener.completeWith(fileCompletionListener, () -> { destinationDirectory.copyFrom(this, source, source, IOContext.DEFAULT); - fileCompletionListener.onResponse(source); - } catch (IOException e) { - fileCompletionListener.onFailure(e); - } + return source; + }); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index e17c5293c38ac..ddbcb86269aa9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -20,6 +20,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardState; import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.DirectoryFileTransferTracker; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; @@ -121,7 +122,8 @@ public void getSegmentFiles( assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; toDownloadSegments.add(fileMetadata); } - downloadSegments(storeDirectory, remoteDirectory, toDownloadSegments, shardPath, listener); + final DirectoryFileTransferTracker fileTransferTracker = indexShard.store().getDirectoryFileTransferTracker(); + downloadSegments(storeDirectory, remoteDirectory, toDownloadSegments, shardPath, fileTransferTracker, listener); logger.debug("Downloaded segment files from remote store {}", toDownloadSegments); } finally { indexShard.store().decRef(); @@ -138,12 +140,13 @@ private void downloadSegments( RemoteSegmentStoreDirectory remoteStoreDirectory, List toDownloadSegments, ShardPath shardPath, + DirectoryFileTransferTracker fileTransferTracker, ActionListener completionListener ) { final Path indexPath = shardPath == null ? null : shardPath.resolveIndex(); for (StoreFileMetadata storeFileMetadata : toDownloadSegments) { final PlainActionFuture segmentListener = PlainActionFuture.newFuture(); - remoteStoreDirectory.copyTo(storeFileMetadata.name(), storeDirectory, indexPath, segmentListener); + remoteStoreDirectory.copyTo(storeFileMetadata.name(), storeDirectory, indexPath, fileTransferTracker, segmentListener); segmentListener.actionGet(); } completionListener.onResponse(new GetSegmentFilesResponse(toDownloadSegments)); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index b574ccaac55e1..4a89b3c718f0b 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -596,10 +596,15 @@ public void onResponse(String unused) { public void onFailure(Exception e) {} }; Path path = createTempDir(); - remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, path, completionListener); + DirectoryFileTransferTracker directoryFileTransferTracker = new DirectoryFileTransferTracker(); + long sourceFileLengthInBytes = remoteSegmentStoreDirectory.fileLength(filename); + remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, path, directoryFileTransferTracker, completionListener); assertTrue(downloadLatch.await(5000, TimeUnit.SECONDS)); verify(blobContainer, times(1)).readBlobAsync(contains(filename), any()); verify(storeDirectory, times(0)).copyFrom(any(), any(), any(), any()); + + // Verify stats are updated to DirectoryFileTransferTracker + assertEquals(sourceFileLengthInBytes, directoryFileTransferTracker.getTransferredBytesSucceeded()); } public void testCopyFilesTo() throws Exception { @@ -619,7 +624,7 @@ public void onResponse(String unused) { public void onFailure(Exception e) {} }; Path path = createTempDir(); - remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, path, completionListener); + remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, path, new DirectoryFileTransferTracker(), completionListener); assertTrue(downloadLatch.await(5000, TimeUnit.MILLISECONDS)); verify(storeDirectory, times(1)).copyFrom(any(), eq(filename), eq(filename), eq(IOContext.DEFAULT)); } @@ -643,7 +648,7 @@ public void onResponse(String unused) { @Override public void onFailure(Exception e) {} }; - remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, null, completionListener); + remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, null, new DirectoryFileTransferTracker(), completionListener); assertTrue(downloadLatch.await(5000, TimeUnit.MILLISECONDS)); verify(storeDirectory, times(1)).copyFrom(any(), eq(filename), eq(filename), eq(IOContext.DEFAULT)); } @@ -670,7 +675,7 @@ public void onFailure(Exception e) { } }; Path path = createTempDir(); - remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, path, completionListener); + remoteSegmentStoreDirectory.copyTo(filename, storeDirectory, path, new DirectoryFileTransferTracker(), completionListener); assertTrue(downloadLatch.await(5000, TimeUnit.MILLISECONDS)); verify(storeDirectory, times(1)).copyFrom(any(), eq(filename), eq(filename), eq(IOContext.DEFAULT)); } From 66aef13cf8bdd0f4d058bc0cd9205415349d4b81 Mon Sep 17 00:00:00 2001 From: Harish Bhakuni Date: Thu, 5 Oct 2023 14:08:54 -0700 Subject: [PATCH 09/20] Updating the separator for RemoteStoreLockManager since underscore is allowed in base64UUID url charset (#10379) * Refactor Remote Store Metadata Lock Manager Utils Signed-off-by: Harish Bhakuni * Address PR Comments Signed-off-by: Harish Bhakuni * Address PR Comments Signed-off-by: Harish Bhakuni * Update Changelog entry Signed-off-by: Harish Bhakuni * Update Changelog entry Signed-off-by: Harish Bhakuni * Unmute testDeleteShallowCopySnapshot test Signed-off-by: Harish Bhakuni --------- Signed-off-by: Harish Bhakuni Co-authored-by: Harish Bhakuni --- CHANGELOG.md | 1 + .../snapshots/DeleteSnapshotIT.java | 1 - .../index/store/lockmanager/FileLockInfo.java | 32 ++++++++++---- .../RemoteStoreLockManagerUtils.java | 7 +++- .../store/lockmanager/FileLockInfoTests.java | 38 ++++++++++++++++- .../BlobStoreRepositoryHelperTests.java | 4 +- .../BlobStoreRepositoryRemoteIndexTests.java | 42 ++++++++++--------- 7 files changed, 91 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b14f123b41cc..4ea8d5ae4cfe4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Enable remote segment upload backpressure by default ([#10356](https://github.com/opensearch-project/OpenSearch/pull/10356)) - [Remote Store] Add support to reload repository metadata inplace ([#9569](https://github.com/opensearch-project/OpenSearch/pull/9569)) - [Metrics Framework] Add Metrics framework. ([#10241](https://github.com/opensearch-project/OpenSearch/pull/10241)) +- Updating the separator for RemoteStoreLockManager since underscore is allowed in base64UUID url charset ([#10379](https://github.com/opensearch-project/OpenSearch/pull/10379)) ### Deprecated diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java index 31abc16bba50e..e79bf1c16b586 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java @@ -64,7 +64,6 @@ public void testDeleteSnapshot() throws Exception { assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == 0); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9115") public void testDeleteShallowCopySnapshot() throws Exception { disableRepoConsistencyCheck("Remote store repository is being used in the test"); final Path remoteStoreRepoPath = randomRepoPath(); diff --git a/server/src/main/java/org/opensearch/index/store/lockmanager/FileLockInfo.java b/server/src/main/java/org/opensearch/index/store/lockmanager/FileLockInfo.java index 24f42743e1a04..b6be60c489a6c 100644 --- a/server/src/main/java/org/opensearch/index/store/lockmanager/FileLockInfo.java +++ b/server/src/main/java/org/opensearch/index/store/lockmanager/FileLockInfo.java @@ -21,6 +21,7 @@ public class FileLockInfo implements LockInfo { private String fileToLock; private String acquirerId; + private static final int INVALID_INDEX = -1; public String getAcquirerId() { return acquirerId; @@ -88,21 +89,34 @@ static String generateLockName(String fileToLock, String acquirerId) { } public static String getFileToLockNameFromLock(String lockName) { - String[] lockNameTokens = lockName.split(RemoteStoreLockManagerUtils.SEPARATOR); - - if (lockNameTokens.length != 2) { - throw new IllegalArgumentException("Provided Lock Name " + lockName + " is not Valid."); + // use proper separator for the lock file depending on the version it is created + String lockSeparator = lockName.endsWith(RemoteStoreLockManagerUtils.PRE_OS210_LOCK_FILE_EXTENSION) + ? RemoteStoreLockManagerUtils.PRE_OS210_LOCK_SEPARATOR + : RemoteStoreLockManagerUtils.SEPARATOR; + final int indexOfSeparator = lockName.lastIndexOf(lockSeparator); + if (indexOfSeparator == INVALID_INDEX) { + throw new IllegalArgumentException("Provided lock name: " + lockName + " is invalid with separator: " + lockSeparator); } - return lockNameTokens[0]; + return lockName.substring(0, indexOfSeparator); } public static String getAcquirerIdFromLock(String lockName) { - String[] lockNameTokens = lockName.split(RemoteStoreLockManagerUtils.SEPARATOR); + String lockExtension = RemoteStoreLockManagerUtils.LOCK_FILE_EXTENSION; + String lockSeparator = RemoteStoreLockManagerUtils.SEPARATOR; - if (lockNameTokens.length != 2) { - throw new IllegalArgumentException("Provided Lock Name " + lockName + " is not Valid."); + // check if lock file is created on version <=2.10 + if (lockName.endsWith(RemoteStoreLockManagerUtils.PRE_OS210_LOCK_FILE_EXTENSION)) { + lockSeparator = RemoteStoreLockManagerUtils.PRE_OS210_LOCK_SEPARATOR; + lockExtension = RemoteStoreLockManagerUtils.PRE_OS210_LOCK_FILE_EXTENSION; + } + final int indexOfSeparator = lockName.lastIndexOf(lockSeparator); + final int indexOfExt = lockName.lastIndexOf(lockExtension); + if (indexOfSeparator == INVALID_INDEX || indexOfExt == INVALID_INDEX) { + throw new IllegalArgumentException( + "Provided lock name: " + lockName + " is invalid with separator: " + lockSeparator + " and extension: " + lockExtension + ); } - return lockNameTokens[1].replace(RemoteStoreLockManagerUtils.LOCK_FILE_EXTENSION, ""); + return lockName.substring(indexOfSeparator + lockSeparator.length(), indexOfExt); } } diff --git a/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerUtils.java b/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerUtils.java index 452dfc329d88b..d5fb2722a64dc 100644 --- a/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerUtils.java +++ b/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerUtils.java @@ -15,8 +15,11 @@ */ public class RemoteStoreLockManagerUtils { static final String FILE_TO_LOCK_NAME = "file_to_lock"; - static final String SEPARATOR = "___"; - static final String LOCK_FILE_EXTENSION = ".lock"; + static final String PRE_OS210_LOCK_SEPARATOR = "___"; + static final String SEPARATOR = "..."; + // for versions <= 2.10, we have lock files with this extension. + static final String PRE_OS210_LOCK_FILE_EXTENSION = ".lock"; + static final String LOCK_FILE_EXTENSION = ".v2_lock"; static final String ACQUIRER_ID = "acquirer_id"; public static final String NO_TTL = "-1"; static final String LOCK_EXPIRY_TIME = "lock_expiry_time"; diff --git a/server/src/test/java/org/opensearch/index/store/lockmanager/FileLockInfoTests.java b/server/src/test/java/org/opensearch/index/store/lockmanager/FileLockInfoTests.java index f3a2f1859923e..80413d4cb6612 100644 --- a/server/src/test/java/org/opensearch/index/store/lockmanager/FileLockInfoTests.java +++ b/server/src/test/java/org/opensearch/index/store/lockmanager/FileLockInfoTests.java @@ -15,10 +15,24 @@ public class FileLockInfoTests extends OpenSearchTestCase { String testMetadata = "testMetadata"; String testAcquirerId = "testAcquirerId"; + String testAcquirerId2 = "ZxZ4Wh89SXyEPmSYAHrIrQ"; + String testAcquirerId3 = "ZxZ4Wh89SXyEPmSYAHrItS"; + String testMetadata1 = "metadata__9223372036854775806__9223372036854775803__9223372036854775790" + + "__9223372036854775800___Hf3Dbw2QQagfGLlVBOUrg__9223370340398865071__1"; + + String oldLock = testMetadata1 + RemoteStoreLockManagerUtils.PRE_OS210_LOCK_SEPARATOR + testAcquirerId2 + + RemoteStoreLockManagerUtils.PRE_OS210_LOCK_FILE_EXTENSION; + String newLock = testMetadata1 + RemoteStoreLockManagerUtils.SEPARATOR + testAcquirerId3 + + RemoteStoreLockManagerUtils.LOCK_FILE_EXTENSION; public void testGenerateLockName() { FileLockInfo fileLockInfo = FileLockInfo.getLockInfoBuilder().withFileToLock(testMetadata).withAcquirerId(testAcquirerId).build(); assertEquals(fileLockInfo.generateLockName(), FileLockInfo.LockFileUtils.generateLockName(testMetadata, testAcquirerId)); + + // validate that lock generated will be the new version lock + fileLockInfo = FileLockInfo.getLockInfoBuilder().withFileToLock(testMetadata1).withAcquirerId(testAcquirerId3).build(); + assertEquals(fileLockInfo.generateLockName(), newLock); + } public void testGenerateLockNameFailureCase1() { @@ -41,13 +55,33 @@ public void testGetLockPrefixFailureCase() { assertThrows(IllegalArgumentException.class, fileLockInfo::getLockPrefix); } + public void testGetFileToLockNameFromLock() { + assertEquals(testMetadata1, FileLockInfo.LockFileUtils.getFileToLockNameFromLock(oldLock)); + assertEquals(testMetadata1, FileLockInfo.LockFileUtils.getFileToLockNameFromLock(newLock)); + } + + public void testGetAcquirerIdFromLock() { + assertEquals(testAcquirerId2, FileLockInfo.LockFileUtils.getAcquirerIdFromLock(oldLock)); + assertEquals(testAcquirerId3, FileLockInfo.LockFileUtils.getAcquirerIdFromLock(newLock)); + } + public void testGetLocksForAcquirer() throws NoSuchFileException { + String[] locks = new String[] { FileLockInfo.LockFileUtils.generateLockName(testMetadata, testAcquirerId), - FileLockInfo.LockFileUtils.generateLockName(testMetadata, "acquirerId2") }; + FileLockInfo.LockFileUtils.generateLockName(testMetadata, "acquirerId2"), + oldLock, + newLock }; FileLockInfo fileLockInfo = FileLockInfo.getLockInfoBuilder().withAcquirerId(testAcquirerId).build(); - assertEquals(fileLockInfo.getLockForAcquirer(locks), FileLockInfo.LockFileUtils.generateLockName(testMetadata, testAcquirerId)); + + // validate old lock + fileLockInfo = FileLockInfo.getLockInfoBuilder().withAcquirerId(testAcquirerId2).build(); + assertEquals(fileLockInfo.getLockForAcquirer(locks), oldLock); + + // validate new lock + fileLockInfo = FileLockInfo.getLockInfoBuilder().withAcquirerId(testAcquirerId3).build(); + assertEquals(fileLockInfo.getLockForAcquirer(locks), newLock); } } diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java index 4e60cddd14d73..57c126b85ff70 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java @@ -66,7 +66,9 @@ protected String[] getLockFilesInRemoteStore(String remoteStoreIndex, String rem BlobPath shardLevelBlobPath = remoteStorerepository.basePath().add(indexUUID).add("0").add("segments").add("lock_files"); BlobContainer blobContainer = remoteStorerepository.blobStore().blobContainer(shardLevelBlobPath); try (RemoteBufferedOutputDirectory lockDirectory = new RemoteBufferedOutputDirectory(blobContainer)) { - return Arrays.stream(lockDirectory.listAll()).filter(lock -> lock.endsWith(".lock")).toArray(String[]::new); + return Arrays.stream(lockDirectory.listAll()) + .filter(lock -> lock.endsWith(".lock") || lock.endsWith(".v2_lock")) + .toArray(String[]::new); } } diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java index e3e1bf31e82dc..9cca495cced72 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java @@ -145,7 +145,7 @@ public void testRetrieveShallowCopySnapshotCase1() throws IOException { final SnapshotId snapshotId1 = snapshotInfo.snapshotId(); String[] lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 0) : "there should be no lock files present in directory, but found " + Arrays.toString(lockFiles); + assertEquals("there should be no lock files present in directory, but found " + Arrays.toString(lockFiles), 0, lockFiles.length); logger.info("--> create remote index shallow snapshot"); Settings snapshotRepoSettingsForShallowCopy = Settings.builder() .put(snapshotRepoSettings) @@ -161,8 +161,8 @@ public void testRetrieveShallowCopySnapshotCase1() throws IOException { final SnapshotId snapshotId2 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 1) : "there should be only one lock file, but found " + Arrays.toString(lockFiles); - assert lockFiles[0].endsWith(snapshotId2.getUUID() + ".lock"); + assertEquals("there should be only one lock file, but found " + Arrays.toString(lockFiles), 1, lockFiles.length); + assertTrue(lockFiles[0].endsWith(snapshotId2.getUUID() + ".v2_lock")); logger.info("--> create another normal snapshot"); updateRepository(client, snapshotRepositoryName, snapshotRepoSettings); @@ -174,8 +174,8 @@ public void testRetrieveShallowCopySnapshotCase1() throws IOException { final SnapshotId snapshotId3 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 1) : "there should be only one lock file, but found " + Arrays.toString(lockFiles); - assert lockFiles[0].endsWith(snapshotId2.getUUID() + ".lock"); + assertEquals("there should be only one lock file, but found " + Arrays.toString(lockFiles), 1, lockFiles.length); + assertTrue(lockFiles[0].endsWith(snapshotId2.getUUID() + ".v2_lock")); logger.info("--> make sure the node's repository can resolve the snapshots"); final List originalSnapshots = Arrays.asList(snapshotId1, snapshotId2, snapshotId3); @@ -230,8 +230,8 @@ public void testGetRemoteStoreShallowCopyShardMetadata() throws IOException { final SnapshotId snapshotId = snapshotInfo.snapshotId(); String[] lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 1) : "there should be only one lock file, but found " + Arrays.toString(lockFiles); - assert lockFiles[0].endsWith(snapshotId.getUUID() + ".lock"); + assertEquals("there should be only one lock file, but found " + Arrays.toString(lockFiles), 1, lockFiles.length); + assertTrue(lockFiles[0].endsWith(snapshotId.getUUID() + ".v2_lock")); final RepositoriesService repositoriesService = getInstanceFromNode(RepositoriesService.class); final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(snapshotRepositoryName); @@ -305,8 +305,8 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { final SnapshotId snapshotId1 = snapshotInfo.snapshotId(); String[] lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 1) : "lock files are " + Arrays.toString(lockFiles); - assert lockFiles[0].endsWith(snapshotId1.getUUID() + ".lock"); + assertEquals("lock files are " + Arrays.toString(lockFiles), 1, lockFiles.length); + assertTrue(lockFiles[0].endsWith(snapshotId1.getUUID() + ".v2_lock")); logger.info("--> create second remote index shallow snapshot"); snapshotInfo = createSnapshot( @@ -317,10 +317,10 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { final SnapshotId snapshotId2 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 2) : "lock files are " + Arrays.toString(lockFiles); + assertEquals("lock files are " + Arrays.toString(lockFiles), 2, lockFiles.length); List shallowCopySnapshotIDs = Arrays.asList(snapshotId1, snapshotId2); for (SnapshotId snapshotId : shallowCopySnapshotIDs) { - assert lockFiles[0].contains(snapshotId.getUUID()) || lockFiles[1].contains(snapshotId.getUUID()); + assertTrue(lockFiles[0].contains(snapshotId.getUUID()) || lockFiles[1].contains(snapshotId.getUUID())); } logger.info("--> create third remote index shallow snapshot"); snapshotInfo = createSnapshot( @@ -331,12 +331,14 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { final SnapshotId snapshotId3 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 3); + assertEquals(3, lockFiles.length); shallowCopySnapshotIDs = Arrays.asList(snapshotId1, snapshotId2, snapshotId3); for (SnapshotId snapshotId : shallowCopySnapshotIDs) { - assert lockFiles[0].contains(snapshotId.getUUID()) - || lockFiles[1].contains(snapshotId.getUUID()) - || lockFiles[2].contains(snapshotId.getUUID()); + assertTrue( + lockFiles[0].contains(snapshotId.getUUID()) + || lockFiles[1].contains(snapshotId.getUUID()) + || lockFiles[2].contains(snapshotId.getUUID()) + ); } logger.info("--> create normal snapshot"); createRepository(client, snapshotRepositoryName, snapshotRepoSettings); @@ -348,12 +350,14 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { final SnapshotId snapshotId4 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); - assert (lockFiles.length == 3) : "lock files are " + Arrays.toString(lockFiles); + assertEquals("lock files are " + Arrays.toString(lockFiles), 3, lockFiles.length); shallowCopySnapshotIDs = Arrays.asList(snapshotId1, snapshotId2, snapshotId3); for (SnapshotId snapshotId : shallowCopySnapshotIDs) { - assert lockFiles[0].contains(snapshotId.getUUID()) - || lockFiles[1].contains(snapshotId.getUUID()) - || lockFiles[2].contains(snapshotId.getUUID()); + assertTrue( + lockFiles[0].contains(snapshotId.getUUID()) + || lockFiles[1].contains(snapshotId.getUUID()) + || lockFiles[2].contains(snapshotId.getUUID()) + ); } logger.info("--> make sure the node's repository can resolve the snapshots"); From dad525aefaab01b8452f9db8d7fba70a4d3b5cc8 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Thu, 5 Oct 2023 19:58:58 -0400 Subject: [PATCH 10/20] Allow customization of netty channel handles before and during decompression (#10261) --- CHANGELOG.md | 1 + .../http/netty4/Netty4HeaderVerifierIT.java | 75 +++++++++++ .../transport/Netty4BlockingPlugin.java | 127 ++++++++++++++++++ .../netty4/Netty4HttpServerTransport.java | 30 ++++- .../transport/Netty4ModulePlugin.java | 2 +- .../java/org/opensearch/rest/RestHandler.java | 2 +- 6 files changed, 229 insertions(+), 8 deletions(-) create mode 100644 modules/transport-netty4/src/internalClusterTest/java/org/opensearch/http/netty4/Netty4HeaderVerifierIT.java create mode 100644 modules/transport-netty4/src/internalClusterTest/java/org/opensearch/transport/Netty4BlockingPlugin.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ea8d5ae4cfe4..e408df2307587 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add Doc Status Counter for Indexing Engine ([#4562](https://github.com/opensearch-project/OpenSearch/issues/4562)) - Add unreferenced file cleanup count to merge stats ([#10204](https://github.com/opensearch-project/OpenSearch/pull/10204)) - [Remote Store] Add support to restrict creation & deletion if system repository and mutation of immutable settings of system repository ([#9839](https://github.com/opensearch-project/OpenSearch/pull/9839)) +- Improve compressed request handling ([#10261](https://github.com/opensearch-project/OpenSearch/pull/10261)) ### Dependencies - Bump `peter-evans/create-or-update-comment` from 2 to 3 ([#9575](https://github.com/opensearch-project/OpenSearch/pull/9575)) diff --git a/modules/transport-netty4/src/internalClusterTest/java/org/opensearch/http/netty4/Netty4HeaderVerifierIT.java b/modules/transport-netty4/src/internalClusterTest/java/org/opensearch/http/netty4/Netty4HeaderVerifierIT.java new file mode 100644 index 0000000000000..c39567a005fd1 --- /dev/null +++ b/modules/transport-netty4/src/internalClusterTest/java/org/opensearch/http/netty4/Netty4HeaderVerifierIT.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.http.netty4; + +import org.opensearch.OpenSearchNetty4IntegTestCase; +import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.http.HttpServerTransport; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; +import org.opensearch.test.OpenSearchIntegTestCase.Scope; +import org.opensearch.transport.Netty4BlockingPlugin; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import io.netty.buffer.ByteBufUtil; +import io.netty.handler.codec.http.DefaultFullHttpRequest; +import io.netty.handler.codec.http.FullHttpRequest; +import io.netty.handler.codec.http.FullHttpResponse; +import io.netty.handler.codec.http.HttpMethod; +import io.netty.handler.codec.http.HttpVersion; +import io.netty.handler.codec.http2.HttpConversionUtil; +import io.netty.util.ReferenceCounted; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.equalTo; +import static io.netty.handler.codec.http.HttpHeaderNames.HOST; + +@ClusterScope(scope = Scope.TEST, supportsDedicatedMasters = false, numDataNodes = 1) +public class Netty4HeaderVerifierIT extends OpenSearchNetty4IntegTestCase { + + @Override + protected boolean addMockHttpTransport() { + return false; // enable http + } + + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(Netty4BlockingPlugin.class); + } + + public void testThatNettyHttpServerRequestBlockedWithHeaderVerifier() throws Exception { + HttpServerTransport httpServerTransport = internalCluster().getInstance(HttpServerTransport.class); + TransportAddress[] boundAddresses = httpServerTransport.boundAddress().boundAddresses(); + TransportAddress transportAddress = randomFrom(boundAddresses); + + final FullHttpRequest blockedRequest = new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, "/"); + blockedRequest.headers().add("blockme", "Not Allowed"); + blockedRequest.headers().add(HOST, "localhost"); + blockedRequest.headers().add(HttpConversionUtil.ExtensionHeaderNames.SCHEME.text(), "http"); + + final List responses = new ArrayList<>(); + try (Netty4HttpClient nettyHttpClient = Netty4HttpClient.http2()) { + try { + FullHttpResponse blockedResponse = nettyHttpClient.send(transportAddress.address(), blockedRequest); + responses.add(blockedResponse); + String blockedResponseContent = new String(ByteBufUtil.getBytes(blockedResponse.content()), StandardCharsets.UTF_8); + assertThat(blockedResponseContent, containsString("Hit header_verifier")); + assertThat(blockedResponse.status().code(), equalTo(401)); + } finally { + responses.forEach(ReferenceCounted::release); + } + } + } + +} diff --git a/modules/transport-netty4/src/internalClusterTest/java/org/opensearch/transport/Netty4BlockingPlugin.java b/modules/transport-netty4/src/internalClusterTest/java/org/opensearch/transport/Netty4BlockingPlugin.java new file mode 100644 index 0000000000000..d5fe49952add3 --- /dev/null +++ b/modules/transport-netty4/src/internalClusterTest/java/org/opensearch/transport/Netty4BlockingPlugin.java @@ -0,0 +1,127 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.transport; + +import org.opensearch.common.network.NetworkService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.BigArrays; +import org.opensearch.common.util.PageCacheRecycler; +import org.opensearch.core.indices.breaker.CircuitBreakerService; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.http.HttpServerTransport; +import org.opensearch.http.netty4.Netty4HttpServerTransport; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.threadpool.ThreadPool; + +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Map; +import java.util.function.Supplier; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.channel.SimpleChannelInboundHandler; +import io.netty.handler.codec.http.DefaultFullHttpResponse; +import io.netty.handler.codec.http.DefaultHttpRequest; +import io.netty.handler.codec.http.FullHttpResponse; +import io.netty.handler.codec.http.HttpRequest; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.netty.util.ReferenceCountUtil; + +public class Netty4BlockingPlugin extends Netty4ModulePlugin { + + public class Netty4BlockingHttpServerTransport extends Netty4HttpServerTransport { + + public Netty4BlockingHttpServerTransport( + Settings settings, + NetworkService networkService, + BigArrays bigArrays, + ThreadPool threadPool, + NamedXContentRegistry xContentRegistry, + Dispatcher dispatcher, + ClusterSettings clusterSettings, + SharedGroupFactory sharedGroupFactory, + Tracer tracer + ) { + super( + settings, + networkService, + bigArrays, + threadPool, + xContentRegistry, + dispatcher, + clusterSettings, + sharedGroupFactory, + tracer + ); + } + + @Override + protected ChannelInboundHandlerAdapter createHeaderVerifier() { + return new ExampleBlockingNetty4HeaderVerifier(); + } + } + + @Override + public Map> getHttpTransports( + Settings settings, + ThreadPool threadPool, + BigArrays bigArrays, + PageCacheRecycler pageCacheRecycler, + CircuitBreakerService circuitBreakerService, + NamedXContentRegistry xContentRegistry, + NetworkService networkService, + HttpServerTransport.Dispatcher dispatcher, + ClusterSettings clusterSettings, + Tracer tracer + ) { + return Collections.singletonMap( + NETTY_HTTP_TRANSPORT_NAME, + () -> new Netty4BlockingHttpServerTransport( + settings, + networkService, + bigArrays, + threadPool, + xContentRegistry, + dispatcher, + clusterSettings, + getSharedGroupFactory(settings), + tracer + ) + ); + } + + /** POC for how an external header verifier would be implemented */ + public class ExampleBlockingNetty4HeaderVerifier extends SimpleChannelInboundHandler { + + @Override + public void channelRead0(ChannelHandlerContext ctx, DefaultHttpRequest msg) throws Exception { + ReferenceCountUtil.retain(msg); + if (isBlocked(msg)) { + ByteBuf buf = Unpooled.copiedBuffer("Hit header_verifier".getBytes(StandardCharsets.UTF_8)); + final FullHttpResponse response = new DefaultFullHttpResponse(msg.protocolVersion(), HttpResponseStatus.UNAUTHORIZED, buf); + ctx.writeAndFlush(response).addListener(ChannelFutureListener.CLOSE); + ReferenceCountUtil.release(msg); + } else { + // Lets the request pass to the next channel handler + ctx.fireChannelRead(msg); + } + } + + private boolean isBlocked(HttpRequest request) { + final boolean shouldBlock = request.headers().contains("blockme"); + + return shouldBlock; + } + } +} diff --git a/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpServerTransport.java b/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpServerTransport.java index 0271472125814..1677f333a4b1c 100644 --- a/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpServerTransport.java +++ b/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpServerTransport.java @@ -334,7 +334,7 @@ public ChannelHandler configureServerChannelHandler() { return new HttpChannelHandler(this, handlingSettings); } - protected static final AttributeKey HTTP_CHANNEL_KEY = AttributeKey.newInstance("opensearch-http-channel"); + public static final AttributeKey HTTP_CHANNEL_KEY = AttributeKey.newInstance("opensearch-http-channel"); protected static final AttributeKey HTTP_SERVER_CHANNEL_KEY = AttributeKey.newInstance( "opensearch-http-server-channel" ); @@ -419,8 +419,8 @@ protected void channelRead0(ChannelHandlerContext ctx, HttpMessage msg) throws E // If this handler is hit then no upgrade has been attempted and the client is just talking HTTP final ChannelPipeline pipeline = ctx.pipeline(); pipeline.addAfter(ctx.name(), "handler", getRequestHandler()); - pipeline.replace(this, "decoder_compress", new HttpContentDecompressor()); - + pipeline.replace(this, "header_verifier", transport.createHeaderVerifier()); + pipeline.addAfter("header_verifier", "decoder_compress", transport.createDecompressor()); pipeline.addAfter("decoder_compress", "aggregator", aggregator); if (handlingSettings.isCompression()) { pipeline.addAfter( @@ -446,7 +446,8 @@ protected void configureDefaultHttpPipeline(ChannelPipeline pipeline) { ); decoder.setCumulator(ByteToMessageDecoder.COMPOSITE_CUMULATOR); pipeline.addLast("decoder", decoder); - pipeline.addLast("decoder_compress", new HttpContentDecompressor()); + pipeline.addLast("header_verifier", transport.createHeaderVerifier()); + pipeline.addLast("decoder_compress", transport.createDecompressor()); pipeline.addLast("encoder", new HttpResponseEncoder()); final HttpObjectAggregator aggregator = new HttpObjectAggregator(handlingSettings.getMaxContentLength()); aggregator.setMaxCumulationBufferComponents(transport.maxCompositeBufferComponents); @@ -487,13 +488,13 @@ protected void initChannel(Channel childChannel) throws Exception { final HttpObjectAggregator aggregator = new HttpObjectAggregator(handlingSettings.getMaxContentLength()); aggregator.setMaxCumulationBufferComponents(transport.maxCompositeBufferComponents); - childChannel.pipeline() .addLast(new LoggingHandler(LogLevel.DEBUG)) .addLast(new Http2StreamFrameToHttpObjectCodec(true)) .addLast("byte_buf_sizer", byteBufSizer) .addLast("read_timeout", new ReadTimeoutHandler(transport.readTimeoutMillis, TimeUnit.MILLISECONDS)) - .addLast("decoder_decompress", new HttpContentDecompressor()); + .addLast("header_verifier", transport.createHeaderVerifier()) + .addLast("decoder_decompress", transport.createDecompressor()); if (handlingSettings.isCompression()) { childChannel.pipeline() @@ -531,4 +532,21 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { } } } + + /** + * Extension point that allows a NetworkPlugin to extend the netty pipeline and inspect headers after request decoding + */ + protected ChannelInboundHandlerAdapter createHeaderVerifier() { + // pass-through + return new ChannelInboundHandlerAdapter(); + } + + /** + * Extension point that allows a NetworkPlugin to override the default netty HttpContentDecompressor and supply a custom decompressor. + * + * Used in instances to conditionally decompress depending on the outcome from header verification + */ + protected ChannelInboundHandlerAdapter createDecompressor() { + return new HttpContentDecompressor(); + } } diff --git a/modules/transport-netty4/src/main/java/org/opensearch/transport/Netty4ModulePlugin.java b/modules/transport-netty4/src/main/java/org/opensearch/transport/Netty4ModulePlugin.java index 1a34dfd2c9ee4..2bc795d11ed5d 100644 --- a/modules/transport-netty4/src/main/java/org/opensearch/transport/Netty4ModulePlugin.java +++ b/modules/transport-netty4/src/main/java/org/opensearch/transport/Netty4ModulePlugin.java @@ -144,7 +144,7 @@ public Map> getHttpTransports( ); } - private SharedGroupFactory getSharedGroupFactory(Settings settings) { + SharedGroupFactory getSharedGroupFactory(Settings settings) { SharedGroupFactory groupFactory = this.groupFactory.get(); if (groupFactory != null) { assert groupFactory.getSettings().equals(settings) : "Different settings than originally provided"; diff --git a/server/src/main/java/org/opensearch/rest/RestHandler.java b/server/src/main/java/org/opensearch/rest/RestHandler.java index 7832649e8ad32..294dc3ffbe329 100644 --- a/server/src/main/java/org/opensearch/rest/RestHandler.java +++ b/server/src/main/java/org/opensearch/rest/RestHandler.java @@ -108,7 +108,7 @@ default List replacedRoutes() { } /** - * Controls whether requests handled by this class are allowed to to access system indices by default. + * Controls whether requests handled by this class are allowed to access system indices by default. * @return {@code true} if requests handled by this class should be allowed to access system indices. */ default boolean allowSystemIndexAccessByDefault() { From e4c477a68b0fa9272169325787088a8633d338fd Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 5 Oct 2023 21:52:24 -0400 Subject: [PATCH 11/20] Disable build cache since Github Action runners fail with 'no space left on device' (#10409) Signed-off-by: Andriy Redko --- .github/workflows/assemble.yml | 26 ++++++++++++++++++++++++++ .github/workflows/precommit.yml | 11 +---------- settings.gradle | 4 +++- 3 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/assemble.yml diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml new file mode 100644 index 0000000000000..6a66ac5fb5609 --- /dev/null +++ b/.github/workflows/assemble.yml @@ -0,0 +1,26 @@ +name: Gradle Assemble +on: [pull_request] + +jobs: + assemble: + if: github.repository == 'opensearch-project/OpenSearch' + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: 11 + distribution: temurin + - name: Setup docker (missing on MacOS) + if: runner.os == 'macos' + run: | + brew install docker + colima start + sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock + - name: Run Gradle (assemble) + run: | + ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index f4622859916c7..b04f404b11c55 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -1,4 +1,4 @@ -name: Gradle Precommit and Assemble +name: Gradle Precommit on: [pull_request] jobs: @@ -19,12 +19,3 @@ jobs: - name: Run Gradle (precommit) run: | ./gradlew javadoc precommit --parallel - - name: Setup docker (missing on MacOS) - if: runner.os == 'macos' - run: | - brew install docker - colima start - sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock - - name: Run Gradle (assemble) - run: | - ./gradlew assemble --parallel diff --git a/settings.gradle b/settings.gradle index c04b5997d49b1..13cc6669e3d33 100644 --- a/settings.gradle +++ b/settings.gradle @@ -13,9 +13,11 @@ plugins { id "com.gradle.enterprise" version "3.14.1" } +ext.disableBuildCache = hasProperty('DISABLE_BUILD_CACHE') || System.getenv().containsKey('DISABLE_BUILD_CACHE') + buildCache { local { - enabled = true + enabled = !disableBuildCache removeUnusedEntriesAfterDays = 14 } } From 4fee40d847199495f19bf1209b12f67c11e84f54 Mon Sep 17 00:00:00 2001 From: Vikas Bansal <43470111+vikasvb90@users.noreply.github.com> Date: Fri, 6 Oct 2023 12:14:05 +0000 Subject: [PATCH 12/20] Fix to include static settings in addition to dynamic repo metadata settings during s3 plugin reload (#10452) Signed-off-by: vikasvb90 --- .../java/org/opensearch/repositories/s3/S3Repository.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java index a69309d9e7a6f..bbc3451be1535 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java @@ -374,9 +374,8 @@ public void reload(RepositoryMetadata newRepositoryMetadata) { readRepositoryMetadata(); // Reload configs for S3RepositoryPlugin - final Map clientsSettings = S3ClientSettings.load(metadata.settings(), pluginConfigPath); - service.refreshAndClearCache(clientsSettings); - s3AsyncService.refreshAndClearCache(clientsSettings); + service.settings(metadata); + s3AsyncService.settings(metadata); // Reload configs for S3BlobStore BlobStore blobStore = getBlobStore(); From f4c7ac9fd87e1592a7cf4adf89594c176795409a Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 6 Oct 2023 10:52:36 -0400 Subject: [PATCH 13/20] Update Gradle to 8.4 (#10138) Signed-off-by: Andriy Redko --- .../gradle/internal/InternalBwcGitPlugin.java | 4 +--- gradle/wrapper/gradle-wrapper.properties | 4 ++-- gradlew | 14 +++++++------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/internal/InternalBwcGitPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/internal/InternalBwcGitPlugin.java index 159270d28e3d6..c6e49dc44d6bd 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/internal/InternalBwcGitPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/internal/InternalBwcGitPlugin.java @@ -76,7 +76,7 @@ public InternalBwcGitPlugin(ProviderFactory providerFactory, ExecOperations exec public void apply(Project project) { this.project = project; this.gitExtension = project.getExtensions().create("bwcGitConfig", BwcGitExtension.class); - Provider remote = providerFactory.systemProperty("bwc.remote").forUseAtConfigurationTime().orElse("opensearch-project"); + Provider remote = providerFactory.systemProperty("bwc.remote").orElse("opensearch-project"); TaskContainer tasks = project.getTasks(); TaskProvider createCloneTaskProvider = tasks.register("createClone", LoggedExec.class, createClone -> { @@ -105,7 +105,6 @@ public void apply(Project project) { String remoteRepo = remote.get(); // for testing only we can override the base remote url String remoteRepoUrl = providerFactory.systemProperty("testRemoteRepo") - .forUseAtConfigurationTime() .getOrElse("https://github.com/" + remoteRepo + "/OpenSearch.git"); addRemote.setCommandLine(asList("git", "remote", "add", remoteRepo, remoteRepoUrl)); }); @@ -113,7 +112,6 @@ public void apply(Project project) { TaskProvider fetchLatestTaskProvider = tasks.register("fetchLatest", LoggedExec.class, fetchLatest -> { Provider gitFetchLatest = project.getProviders() .systemProperty("tests.bwc.git_fetch_latest") - .forUseAtConfigurationTime() .orElse("true") .map(fetchProp -> { if ("true".equals(fetchProp)) { diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index f01f0a84a786a..adfb521550eb9 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -11,7 +11,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.3-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionSha256Sum=bb09982fdf52718e4c7b25023d10df6d35a5fff969860bdf5a5bd27a3ab27a9e +distributionSha256Sum=f2b9ed0faf8472cbe469255ae6c86eddb77076c75191741b4a462f33128dd419 diff --git a/gradlew b/gradlew index 0adc8e1a53214..1aa94a4269074 100755 --- a/gradlew +++ b/gradlew @@ -145,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac @@ -153,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then '' | soft) :;; #( *) # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -202,11 +202,11 @@ fi # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' -# Collect all arguments for the java command; -# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of -# shell script including quotes and variable substitutions, so put them in -# double quotes to make sure that they get re-expanded; and -# * put everything else in single quotes, so that it's not re-expanded. +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ From cd34fc72d71cb0ff21475e1fe3ee36f66ed2e9a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 6 Oct 2023 11:27:26 -0400 Subject: [PATCH 14/20] Bump com.google.api.grpc:proto-google-common-protos from 2.25.0 to 2.25.1 in /plugins/repository-gcs (#10298) * Bump com.google.api.grpc:proto-google-common-protos Bumps [com.google.api.grpc:proto-google-common-protos](https://github.com/googleapis/sdk-platform-java) from 2.25.0 to 2.25.1. - [Release notes](https://github.com/googleapis/sdk-platform-java/releases) - [Changelog](https://github.com/googleapis/sdk-platform-java/blob/main/CHANGELOG.md) - [Commits](https://github.com/googleapis/sdk-platform-java/commits) --- updated-dependencies: - dependency-name: com.google.api.grpc:proto-google-common-protos dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 2 +- plugins/repository-gcs/build.gradle | 2 +- .../licenses/proto-google-common-protos-2.25.0.jar.sha1 | 1 - .../licenses/proto-google-common-protos-2.25.1.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 plugins/repository-gcs/licenses/proto-google-common-protos-2.25.0.jar.sha1 create mode 100644 plugins/repository-gcs/licenses/proto-google-common-protos-2.25.1.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index e408df2307587..4a039526d3664 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,7 +107,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.wiremock:wiremock-standalone` from 2.35.0 to 3.1.0 ([#9752](https://github.com/opensearch-project/OpenSearch/pull/9752)) - Bump `com.google.http-client:google-http-client-jackson2` from 1.43.2 to 1.43.3 ([#10126](https://github.com/opensearch-project/OpenSearch/pull/10126)) - Bump `org.xerial.snappy:snappy-java` from 1.1.10.3 to 1.1.10.5 ([#10206](https://github.com/opensearch-project/OpenSearch/pull/10206), [#10299](https://github.com/opensearch-project/OpenSearch/pull/10299)) -- Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.0 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208)) +- Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) - Bump `codecov/codecov-action` from 2 to 3 ([#10209](https://github.com/opensearch-project/OpenSearch/pull/10209)) - Bump `org.bouncycastle:bcpkix-jdk15to18` from 1.75 to 1.76 ([10219](https://github.com/opensearch-project/OpenSearch/pull/10219))` - Bump `org.bouncycastle:bcprov-jdk15to18` from 1.75 to 1.76 ([10219](https://github.com/opensearch-project/OpenSearch/pull/10219))` diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index ec040a180876e..52e5e71618d69 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -60,7 +60,7 @@ dependencies { api 'com.google.api-client:google-api-client:2.2.0' - api 'com.google.api.grpc:proto-google-common-protos:2.25.0' + api 'com.google.api.grpc:proto-google-common-protos:2.25.1' api 'com.google.api.grpc:proto-google-iam-v1:0.12.0' api "com.google.auth:google-auth-library-credentials:${versions.google_auth}" diff --git a/plugins/repository-gcs/licenses/proto-google-common-protos-2.25.0.jar.sha1 b/plugins/repository-gcs/licenses/proto-google-common-protos-2.25.0.jar.sha1 deleted file mode 100644 index b5ef7ee78e794..0000000000000 --- a/plugins/repository-gcs/licenses/proto-google-common-protos-2.25.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -adfa7c3d9b806969db75cf35fe4b286b3b8b1ce0 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/proto-google-common-protos-2.25.1.jar.sha1 b/plugins/repository-gcs/licenses/proto-google-common-protos-2.25.1.jar.sha1 new file mode 100644 index 0000000000000..cd065dabb8e8a --- /dev/null +++ b/plugins/repository-gcs/licenses/proto-google-common-protos-2.25.1.jar.sha1 @@ -0,0 +1 @@ +cb90049537b621e39610a110c58ce0b914ee3cc5 \ No newline at end of file From 9cb4f9dbd44e42269fe573f9592d58d4115cc522 Mon Sep 17 00:00:00 2001 From: Jay Deng Date: Fri, 6 Oct 2023 14:31:14 -0700 Subject: [PATCH 15/20] Mute flaky concurrent segment search tests pending fixes (#10437) * DiversifiedSamplerIT.testNestedSamples * QueryProfilePhaseTests.testMaxScore * QueryProfilePhaseTests.testCollapseQuerySearchResults * HighlighterSearchIT.testHighlightQueryRewriteDatesWithNow * FieldCapabilitiesIT.testWithIndexFilter * QueryProfilePhaseTests.testDisableTopScoreCollection Signed-off-by: Jay Deng Signed-off-by: Jay Deng --- .../aggregations/bucket/DiversifiedSamplerIT.java | 4 ++++ .../subphase/highlight/HighlighterSearchIT.java | 4 ++++ .../search/fieldcaps/FieldCapabilitiesIT.java | 4 ++++ .../search/query/QueryProfilePhaseTests.java | 12 ++++++++++++ 4 files changed, 24 insertions(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/DiversifiedSamplerIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/DiversifiedSamplerIT.java index 5e95073209c71..865dd670fbf68 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/DiversifiedSamplerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/DiversifiedSamplerIT.java @@ -221,6 +221,10 @@ public void testNestedDiversity() throws Exception { } public void testNestedSamples() throws Exception { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10046", + internalCluster().clusterService().getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) + ); // Test samples nested under samples int MAX_DOCS_PER_AUTHOR = 1; int MAX_DOCS_PER_GENRE = 2; diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 4cdf5ae8e674f..42d91ac945662 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -3343,6 +3343,10 @@ public void testFiltersFunctionScoreQueryHighlight() throws Exception { } public void testHighlightQueryRewriteDatesWithNow() throws Exception { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10434", + internalCluster().clusterService().getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) + ); assertAcked( client().admin() .indices() diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fieldcaps/FieldCapabilitiesIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fieldcaps/FieldCapabilitiesIT.java index f5d1b8234558e..6b95405b3ebd4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fieldcaps/FieldCapabilitiesIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fieldcaps/FieldCapabilitiesIT.java @@ -244,6 +244,10 @@ public void testWithIndexAlias() { } public void testWithIndexFilter() throws InterruptedException { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10433", + internalCluster().clusterService().getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) + ); assertAcked(prepareCreate("index-1").setMapping("timestamp", "type=date", "field1", "type=keyword")); assertAcked(prepareCreate("index-2").setMapping("timestamp", "type=date", "field1", "type=long")); diff --git a/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java b/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java index fc77c1b356124..92d27032b62e3 100644 --- a/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java +++ b/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java @@ -1066,6 +1066,10 @@ public void testIndexSortScrollOptimization() throws Exception { } public void testDisableTopScoreCollection() throws Exception { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10469", + executor != null + ); Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new StandardAnalyzer()); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); @@ -1237,6 +1241,10 @@ public void testMinScore() throws Exception { } public void testMaxScore() throws Exception { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/9932", + executor != null + ); Directory dir = newDirectory(); final Sort sort = new Sort(new SortField("filter", SortField.Type.STRING)); IndexWriterConfig iwc = newIndexWriterConfig().setIndexSort(sort); @@ -1356,6 +1364,10 @@ public void testMaxScore() throws Exception { } public void testCollapseQuerySearchResults() throws Exception { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10139", + executor != null + ); Directory dir = newDirectory(); final Sort sort = new Sort(new SortField("user", SortField.Type.INT)); IndexWriterConfig iwc = newIndexWriterConfig().setIndexSort(sort); From 2bf63c931356d6a2e31ea00507032c2cd115af45 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 6 Oct 2023 23:14:25 -0400 Subject: [PATCH 16/20] Bump com.google.api.grpc:proto-google-common-protos from 2.25.0 to 2.25.1 in /plugins/repository-gcs (#10298) (#10461) * Bump com.google.api.grpc:proto-google-common-protos Bumps [com.google.api.grpc:proto-google-common-protos](https://github.com/googleapis/sdk-platform-java) from 2.25.0 to 2.25.1. - [Release notes](https://github.com/googleapis/sdk-platform-java/releases) - [Changelog](https://github.com/googleapis/sdk-platform-java/blob/main/CHANGELOG.md) - [Commits](https://github.com/googleapis/sdk-platform-java/commits) --- updated-dependencies: - dependency-name: com.google.api.grpc:proto-google-common-protos dependency-type: direct:production update-type: version-update:semver-patch ... * Updating SHAs * Update changelog --------- Signed-off-by: dependabot[bot] Signed-off-by: Andriy Redko Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- .ci/bwcVersions | 2 ++ libs/core/src/main/java/org/opensearch/Version.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.ci/bwcVersions b/.ci/bwcVersions index 6b86da2c91261..cfaadc5ed1e5e 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -23,4 +23,6 @@ BWC_VERSION: - "2.9.0" - "2.9.1" - "2.10.0" + - "2.10.1" - "2.11.0" + - "2.12.0" diff --git a/libs/core/src/main/java/org/opensearch/Version.java b/libs/core/src/main/java/org/opensearch/Version.java index 32f4ca0317907..3c210e5c7cf96 100644 --- a/libs/core/src/main/java/org/opensearch/Version.java +++ b/libs/core/src/main/java/org/opensearch/Version.java @@ -94,7 +94,9 @@ public class Version implements Comparable, ToXContentFragment { public static final Version V_2_9_0 = new Version(2090099, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_9_1 = new Version(2090199, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_10_0 = new Version(2100099, org.apache.lucene.util.Version.LUCENE_9_7_0); + public static final Version V_2_10_1 = new Version(2100199, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_11_0 = new Version(2110099, org.apache.lucene.util.Version.LUCENE_9_7_0); + public static final Version V_2_12_0 = new Version(2120099, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_3_0_0 = new Version(3000099, org.apache.lucene.util.Version.LUCENE_9_8_0); public static final Version CURRENT = V_3_0_0; From 98defa595cc83369fa863d50057d518133b81540 Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Sat, 7 Oct 2023 13:40:38 +0530 Subject: [PATCH 17/20] [Remote Store] Using hash of node id in metadata file names (#10480) Signed-off-by: Gaurav Bafna --- .../org/opensearch/index/remote/RemoteStoreUtils.java | 7 +++---- .../index/store/RemoteSegmentStoreDirectory.java | 3 ++- .../translog/transfer/TranslogTransferMetadata.java | 2 +- .../opensearch/index/remote/RemoteStoreUtilsTests.java | 6 +++--- .../index/store/RemoteSegmentStoreDirectoryTests.java | 9 ++++++--- .../translog/transfer/TranslogTransferManagerTests.java | 8 ++++++-- 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index 0ca9e0209c5ec..b4c33d781af86 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -82,7 +82,7 @@ public static String getSegmentName(String filename) { * @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name . * fn returns null if node id is not part of the file name */ - static public void verifyNoMultipleWriters(List mdFiles, Function> fn) { + public static void verifyNoMultipleWriters(List mdFiles, Function> fn) { Map nodesByPrimaryTermAndGen = new HashMap<>(); mdFiles.forEach(mdFile -> { Tuple nodeIdByPrimaryTermAndGen = fn.apply(mdFile); @@ -91,10 +91,9 @@ static public void verifyNoMultipleWriters(List mdFiles, Function bmList = new LinkedList<>(); bmList.add(new PlainBlobMetadata(mdFilename, 1)); @@ -167,7 +167,7 @@ public void testVerifyMultipleWriters_Translog() throws InterruptedException { bmList = new LinkedList<>(); bmList.add(new PlainBlobMetadata(mdFilename, 1)); - TranslogTransferMetadata tm3 = new TranslogTransferMetadata(1, 1, 1, 2, "node-2"); + TranslogTransferMetadata tm3 = new TranslogTransferMetadata(1, 1, 1, 2, "node--2"); bmList.add(new PlainBlobMetadata(tm3.getFileName(), 1)); List finalBmList = bmList; assertThrows( diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index 4a89b3c718f0b..0f44d5c3b2f53 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -67,6 +67,7 @@ import org.mockito.Mockito; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.METADATA_FILES_TO_FETCH; +import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.test.RemoteStoreTestUtils.createMetadataFileBytes; import static org.opensearch.test.RemoteStoreTestUtils.getDummyMetadata; import static org.hamcrest.CoreMatchers.is; @@ -213,9 +214,7 @@ public void testUploadedSegmentMetadataFromStringException() { } public void testGetPrimaryTermGenerationUuid() { - String[] filenameTokens = "abc__9223372036854775795__9223372036854775784__uuid_xyz".split( - RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR - ); + String[] filenameTokens = "abc__9223372036854775795__9223372036854775784__uuid_xyz".split(SEPARATOR); assertEquals(12, RemoteSegmentStoreDirectory.MetadataFilenameUtils.getPrimaryTerm(filenameTokens)); assertEquals(23, RemoteSegmentStoreDirectory.MetadataFilenameUtils.getGeneration(filenameTokens)); } @@ -1178,6 +1177,10 @@ public void testMetadataFileNameOrder() { actualList.sort(String::compareTo); assertEquals(List.of(file3, file2, file4, file6, file5, file1), actualList); + + long count = file1.chars().filter(ch -> ch == SEPARATOR.charAt(0)).count(); + // There should not be any `_` in mdFile name as it is used a separator . + assertEquals(14, count); } private static class WrapperIndexOutput extends IndexOutput { diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index a48dbdcdacb71..5deb4c83e52fc 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -44,6 +44,7 @@ import org.mockito.Mockito; +import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anySet; @@ -515,10 +516,13 @@ public void testMetadataConflict() throws InterruptedException { null, remoteTranslogTransferTracker ); - TranslogTransferMetadata tm = new TranslogTransferMetadata(1, 1, 1, 2, "node-1"); + TranslogTransferMetadata tm = new TranslogTransferMetadata(1, 1, 1, 2, "node--1"); String mdFilename = tm.getFileName(); + long count = mdFilename.chars().filter(ch -> ch == METADATA_SEPARATOR.charAt(0)).count(); + // There should not be any `_` in mdFile name as it is used a separator . + assertEquals(10, count); Thread.sleep(1); - TranslogTransferMetadata tm2 = new TranslogTransferMetadata(1, 1, 1, 2, "node-2"); + TranslogTransferMetadata tm2 = new TranslogTransferMetadata(1, 1, 1, 2, "node--2"); String mdFilename2 = tm2.getFileName(); doAnswer(invocation -> { From 732ce21a3e0ab5492d5cc327d287525b0170234e Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 7 Oct 2023 14:12:36 +0530 Subject: [PATCH 18/20] Fix flaky testRemoteRefreshRetryOnFailure in RemoteStoreRefreshListenerIT (#10464) Signed-off-by: Ashish Singh --- .../remotestore/RemoteStoreRefreshListenerIT.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java index 88760b7bbfad2..acdb21d072320 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java @@ -28,9 +28,13 @@ public class RemoteStoreRefreshListenerIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { public void testRemoteRefreshRetryOnFailure() throws Exception { - Path location = randomRepoPath().toAbsolutePath(); setup(location, randomDoubleBetween(0.1, 0.15, true), "metadata", 10L); + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_REFRESH_SEGMENT_PRESSURE_ENABLED.getKey(), false)) + .get(); // Here we are having flush/refresh after each iteration of indexing. However, the refresh will not always succeed // due to IOExceptions that are thrown while doing uploadBlobs. From 22ddcf19c7a0ceb8998cdc6344ba132b547cef1c Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 7 Oct 2023 15:48:09 +0530 Subject: [PATCH 19/20] Fix failing testGetPrimaryTermAndGeneration in TranslogTransferManagerTests (#10490) Signed-off-by: Ashish Singh --- .../transfer/TranslogTransferManagerTests.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 5deb4c83e52fc..af596e7df02c2 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -39,7 +39,9 @@ import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Objects; import java.util.Set; +import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import org.mockito.Mockito; @@ -504,8 +506,12 @@ private void assertTlogCkpDownloadStats() { } public void testGetPrimaryTermAndGeneration() { - String tm = new TranslogTransferMetadata(1, 2, 1, 2, "node-1").getFileName(); - assertEquals(new Tuple<>(new Tuple<>(1L, 2L), "node-1"), TranslogTransferMetadata.getNodeIdByPrimaryTermAndGeneration(tm)); + String nodeId = UUID.randomUUID().toString(); + String tm = new TranslogTransferMetadata(1, 2, 1, 2, nodeId).getFileName(); + Tuple, String> actualOutput = TranslogTransferMetadata.getNodeIdByPrimaryTermAndGeneration(tm); + assertEquals(1L, (long) (actualOutput.v1().v1())); + assertEquals(2L, (long) (actualOutput.v1().v2())); + assertEquals(String.valueOf(Objects.hash(nodeId)), actualOutput.v2()); } public void testMetadataConflict() throws InterruptedException { From 8bb11a68ec5c4fe784b2a1efdd459d2f6476cecc Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 7 Oct 2023 22:58:13 +0530 Subject: [PATCH 20/20] Fix bugs causing red indexes with remote indexes during translog upload & store recovery (#10449) --------- Signed-off-by: Ashish Singh --- ...emoteStoreMockRepositoryIntegTestCase.java | 2 +- ...moteStoreBackpressureAndResiliencyIT.java} | 74 ++++++++++++++++++- .../org/opensearch/index/IndexService.java | 2 +- .../opensearch/index/shard/IndexShard.java | 5 +- 4 files changed, 79 insertions(+), 4 deletions(-) rename server/src/internalClusterTest/java/org/opensearch/remotestore/{RemoteStoreBackpressureIT.java => RemoteStoreBackpressureAndResiliencyIT.java} (67%) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java index 9d4d8aa24bd51..2053800504c89 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java @@ -160,7 +160,7 @@ private String getLocalSegmentFilename(String remoteFilename) { return remoteFilename.split(RemoteSegmentStoreDirectory.SEGMENT_NAME_UUID_SEPARATOR)[0]; } - private IndexResponse indexSingleDoc() { + protected IndexResponse indexSingleDoc() { return client().prepareIndex(INDEX_NAME) .setId(UUIDs.randomBase64UUID()) .setSource(randomAlphaOfLength(5), randomAlphaOfLength(5)) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java similarity index 67% rename from server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureIT.java rename to server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java index 3462054c23630..2c6db6ae19a9a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java @@ -12,12 +12,18 @@ import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.AbstractAsyncTask; +import org.opensearch.common.util.concurrent.UncategorizedExecutionException; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.index.IndexService; import org.opensearch.index.remote.RemoteSegmentTransferTracker; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import org.opensearch.repositories.RepositoriesService; import org.opensearch.snapshots.mockstore.MockRepository; import org.opensearch.test.OpenSearchIntegTestCase; @@ -33,7 +39,7 @@ import static org.opensearch.index.remote.RemoteStorePressureSettings.REMOTE_REFRESH_SEGMENT_PRESSURE_ENABLED; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteStoreBackpressureIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { +public class RemoteStoreBackpressureAndResiliencyIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { public void testWritesRejectedDueToConsecutiveFailureBreach() throws Exception { // Here the doc size of the request remains same throughout the test. After initial indexing, all remote store interactions // fail leading to consecutive failure limit getting exceeded and leading to rejections. @@ -156,4 +162,70 @@ private String generateString(int sizeInBytes) { sb.append("}"); return sb.toString(); } + + /** + * Fixes Github#10398 + */ + public void testAsyncTrimTaskSucceeds() { + Path location = randomRepoPath().toAbsolutePath(); + String dataNodeName = setup(location, 0d, "metadata", Long.MAX_VALUE); + + logger.info("Increasing the frequency of async trim task to ensure it runs in background while indexing"); + IndexService indexService = internalCluster().getInstance(IndicesService.class, dataNodeName).iterator().next(); + ((AbstractAsyncTask) indexService.getTrimTranslogTask()).setInterval(TimeValue.timeValueMillis(100)); + + logger.info("--> Indexing data"); + indexData(randomIntBetween(2, 5), true); + logger.info("--> Indexing succeeded"); + + MockRepository translogRepo = (MockRepository) internalCluster().getInstance(RepositoriesService.class, dataNodeName) + .repository(TRANSLOG_REPOSITORY_NAME); + logger.info("--> Failing all remote store interaction"); + translogRepo.setRandomControlIOExceptionRate(1d); + + for (int i = 0; i < randomIntBetween(5, 10); i++) { + UncategorizedExecutionException exception = assertThrows(UncategorizedExecutionException.class, this::indexSingleDoc); + assertEquals("Failed execution", exception.getMessage()); + } + + translogRepo.setRandomControlIOExceptionRate(0d); + indexSingleDoc(); + logger.info("Indexed single doc successfully"); + } + + /** + * Fixes Github#10400 + */ + public void testSkipLoadGlobalCheckpointToReplicationTracker() { + Path location = randomRepoPath().toAbsolutePath(); + String dataNodeName = setup(location, 0d, "metadata", Long.MAX_VALUE); + + logger.info("--> Indexing data"); + indexData(randomIntBetween(1, 2), true); + logger.info("--> Indexing succeeded"); + + IndexService indexService = internalCluster().getInstance(IndicesService.class, dataNodeName).iterator().next(); + IndexShard indexShard = indexService.getShard(0); + indexShard.failShard("failing shard", null); + + ensureRed(INDEX_NAME); + + MockRepository translogRepo = (MockRepository) internalCluster().getInstance(RepositoriesService.class, dataNodeName) + .repository(TRANSLOG_REPOSITORY_NAME); + logger.info("--> Failing all remote store interaction"); + translogRepo.setRandomControlIOExceptionRate(1d); + client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + // CLuster stays red still as the remote interactions are still failing + ensureRed(INDEX_NAME); + + logger.info("Retrying to allocate failed shards"); + client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + // CLuster stays red still as the remote interactions are still failing + ensureRed(INDEX_NAME); + + logger.info("Stop failing all remote store interactions"); + translogRepo.setRandomControlIOExceptionRate(0d); + client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + ensureGreen(INDEX_NAME); + } } diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index df8e8070b8e03..af23145be9f89 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -1286,7 +1286,7 @@ AsyncTranslogFSync getFsyncTask() { // for tests return fsyncTask; } - AsyncTrimTranslogTask getTrimTranslogTask() { // for tests + public AsyncTrimTranslogTask getTrimTranslogTask() { // for tests return trimTranslogTask; } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 833c91c1766c8..251f9a5ae01c0 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -1465,6 +1465,9 @@ public void flush(FlushRequest request) { * {@link org.opensearch.index.translog.TranslogDeletionPolicy} for details */ public void trimTranslog() { + if (isRemoteTranslogEnabled()) { + return; + } verifyNotClosed(); final Engine engine = getEngine(); engine.translogManager().trimUnreferencedTranslogFiles(); @@ -2314,7 +2317,7 @@ public void openEngineAndRecoverFromTranslog() throws IOException { }; // Do not load the global checkpoint if this is a remote snapshot index - if (indexSettings.isRemoteSnapshot() == false) { + if (indexSettings.isRemoteSnapshot() == false && indexSettings.isRemoteTranslogStoreEnabled() == false) { loadGlobalCheckpointToReplicationTracker(); }