From 53ef5f3c1b038d14dbb1473cafa18223b33af2ce Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 6 Sep 2023 17:44:32 -0700 Subject: [PATCH 01/31] Code changes to add retry logic for GW returned 503.9002. --- .../src/ClientRetryPolicy.cs | 873 ++++++----- .../src/CosmosClientOptions.cs | 2 +- .../src/Routing/LocationCache.cs | 1336 ++++++++--------- 3 files changed, 1103 insertions(+), 1108 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2933baa1a9..2f007c6fbf 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -1,440 +1,435 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos -{ - using System; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Net; - using System.Net.Http; - using System.Threading; - using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Cosmos.Routing; - using Microsoft.Azure.Documents; - - /// - /// Client policy is combination of endpoint change retry + throttling retry. - /// - internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy - { - private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. - private const int MaxRetryCount = 120; - private const int MaxServiceUnavailableRetryCount = 1; - - private readonly IDocumentClientRetryPolicy throttlingRetry; - private readonly GlobalEndpointManager globalEndpointManager; - private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; - private readonly bool enableEndpointDiscovery; - private int failoverRetryCount; - - private int sessionTokenRetryCount; - private int serviceUnavailableRetryCount; - private bool isReadRequest; - private bool canUseMultipleWriteLocations; - private Uri locationEndpoint; - private RetryContext retryContext; - private DocumentServiceRequest documentServiceRequest; - - public ClientRetryPolicy( - GlobalEndpointManager globalEndpointManager, - GlobalPartitionEndpointManager partitionKeyRangeLocationCache, - bool enableEndpointDiscovery, - RetryOptions retryOptions) - { - this.throttlingRetry = new ResourceThrottleRetryPolicy( - retryOptions.MaxRetryAttemptsOnThrottledRequests, - retryOptions.MaxRetryWaitTimeInSeconds); - - this.globalEndpointManager = globalEndpointManager; - this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; - this.failoverRetryCount = 0; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.sessionTokenRetryCount = 0; - this.serviceUnavailableRetryCount = 0; - this.canUseMultipleWriteLocations = false; - } - - /// - /// Should the caller retry the operation. - /// - /// Exception that occurred when the operation was tried - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - Exception exception, - CancellationToken cancellationToken) - { - this.retryContext = null; - // Received Connection error (HttpRequestException), initiate the endpoint rediscovery - if (exception is HttpRequestException _) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark both read and write requests because it gateway exception. - // This means all requests going to the region will fail. - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: true, - forceRefresh: false, - retryOnPreferredLocations: true); - } - - if (exception is DocumentClientException clientException) - { - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - clientException?.StatusCode, - clientException?.GetSubStatus()); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - } - - return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); - } - - /// - /// Should the caller retry the operation. - /// - /// in return of the request - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - ResponseMessage cosmosResponseMessage, - CancellationToken cancellationToken) - { - this.retryContext = null; - - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - cosmosResponseMessage?.StatusCode, - cosmosResponseMessage?.Headers.SubStatusCode); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - - return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); - } - - /// - /// Method that is called before a request is sent to allow the retry policy implementation - /// to modify the state of the request. - /// - /// The request being sent to the service. - public void OnBeforeSendRequest(DocumentServiceRequest request) - { - this.isReadRequest = request.IsReadOnlyRequest; - this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); - this.documentServiceRequest = request; - - // clear previous location-based routing directive - request.RequestContext.ClearRouteToLocation(); - - if (this.retryContext != null) - { - if (this.retryContext.RouteToHub) - { - request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); - } - else - { - // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); - } - } - - // Resolve the endpoint for the request and pin the resolution to the resolved endpoint - // This enables marking the endpoint unavailability on endpoint failover/unreachability - this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); - request.RequestContext.RouteToLocation(this.locationEndpoint); - } - - private async Task ShouldRetryInternalAsync( - HttpStatusCode? statusCode, - SubStatusCodes? subStatusCode) - { - if (!statusCode.HasValue - && (!subStatusCode.HasValue - || subStatusCode.Value == SubStatusCodes.Unknown)) - { - return null; - } - - // Received request timeout - if (statusCode == HttpStatusCode.RequestTimeout) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition key range as unavailable to retry future request on a new region. - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - } - - // Received 403.3 on write region, initiate the endpoint rediscovery - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.WriteForbidden) - { - // It's a write forbidden so it safe to retry - if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest)) - { - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) - { - bool forceRefresh = false; - - if (this.retryContext != null && this.retryContext.RouteToHub) - { - forceRefresh = true; - - } - - ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: forceRefresh, - retryOnPreferredLocations: false, - overwriteEndpointDiscovery: true); - - if (retryResult.ShouldRetry) - { - this.retryContext.RouteToHub = true; - } - - return retryResult; - } - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: true, - retryOnPreferredLocations: false); - } - - // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.DatabaseAccountNotFound - && (this.isReadRequest || this.canUseMultipleWriteLocations)) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: false, - retryOnPreferredLocations: false); - } - - if (statusCode == HttpStatusCode.NotFound - && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { - return this.ShouldRetryOnSessionNotAvailable(); - } - - // Received 503 due to client connect timeout or Gateway - if (statusCode == HttpStatusCode.ServiceUnavailable - && ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode)) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition as unavailable. - // Let the ClientRetry logic decide if the request should be retried - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - - return this.ShouldRetryOnServiceUnavailable(); - } - - return null; - } - - private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode) - { - return subStatusCode == SubStatusCodes.Unknown || - (subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus()); - } - - private async Task ShouldRetryOnEndpointFailureAsync( - bool isReadRequest, - bool markBothReadAndWriteAsUnavailable, - bool forceRefresh, - bool retryOnPreferredLocations, - bool overwriteEndpointDiscovery = false) - { - if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", - this.failoverRetryCount, - this.locationEndpoint?.ToString() ?? string.Empty); - return ShouldRetryResult.NoRetry(); - } - - this.failoverRetryCount++; - - if (this.locationEndpoint != null && !overwriteEndpointDiscovery) - { - if (isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); - } - - if (!isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); - } - } - - TimeSpan retryDelay = TimeSpan.Zero; - if (!isReadRequest) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); - - if (this.failoverRetryCount > 1) - { - //if retried both endpoints, follow regular retry interval. - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - } - else - { - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - - await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); - - int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect - if (retryOnPreferredLocations) - { - retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list - } - - this.retryContext = new RetryContext - { - RetryLocationIndex = retryLocationIndex, - RetryRequestOnPreferredLocations = retryOnPreferredLocations, - }; - - return ShouldRetryResult.RetryAfter(retryDelay); - } - - private ShouldRetryResult ShouldRetryOnSessionNotAvailable() - { - this.sessionTokenRetryCount++; - - if (!this.enableEndpointDiscovery) - { - // if endpoint discovery is disabled, the request cannot be retried anywhere else - return ShouldRetryResult.NoRetry(); - } - else - { - if (this.canUseMultipleWriteLocations) - { - ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; - - if (this.sessionTokenRetryCount > endpoints.Count) - { - // When use multiple write locations is true and the request has been tried - // on all locations, then don't retry the request - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.sessionTokenRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - else - { - if (this.sessionTokenRetryCount > 1) - { - // When cannot use multiple write locations, then don't retry the request if - // we have already tried this request on the write location - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext - { - RetryLocationIndex = 0, - RetryRequestOnPreferredLocations = false - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - } - } - - /// - /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. - /// We try and retry the request only if there are other regions available. - /// - private ShouldRetryResult ShouldRetryOnServiceUnavailable() - { - if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) - { - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); - return ShouldRetryResult.NoRetry(); - } - - if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available - return ShouldRetryResult.NoRetry(); - } - - int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; - - if (availablePreferredLocations <= 1) - { - // No other regions to retry on - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); - return ShouldRetryResult.NoRetry(); - } - - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); - - // Retrying on second PreferredLocations - // RetryCount is used as zero-based index - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.serviceUnavailableRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - private sealed class RetryContext - { - public int RetryLocationIndex { get; set; } - public bool RetryRequestOnPreferredLocations { get; set; } - - public bool RouteToHub { get; set; } - } - } +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Net; + using System.Net.Http; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Documents; + + /// + /// Client policy is combination of endpoint change retry + throttling retry. + /// + internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy + { + private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. + private const int MaxRetryCount = 120; + private const int MaxServiceUnavailableRetryCount = 1; + + private readonly IDocumentClientRetryPolicy throttlingRetry; + private readonly GlobalEndpointManager globalEndpointManager; + private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; + private readonly bool enableEndpointDiscovery; + private int failoverRetryCount; + + private int sessionTokenRetryCount; + private int serviceUnavailableRetryCount; + private bool isReadRequest; + private bool canUseMultipleWriteLocations; + private Uri locationEndpoint; + private RetryContext retryContext; + private DocumentServiceRequest documentServiceRequest; + + public ClientRetryPolicy( + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManager partitionKeyRangeLocationCache, + bool enableEndpointDiscovery, + RetryOptions retryOptions) + { + this.throttlingRetry = new ResourceThrottleRetryPolicy( + retryOptions.MaxRetryAttemptsOnThrottledRequests, + retryOptions.MaxRetryWaitTimeInSeconds); + + this.globalEndpointManager = globalEndpointManager; + this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; + this.failoverRetryCount = 0; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.sessionTokenRetryCount = 0; + this.serviceUnavailableRetryCount = 0; + this.canUseMultipleWriteLocations = false; + } + + /// + /// Should the caller retry the operation. + /// + /// Exception that occurred when the operation was tried + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + Exception exception, + CancellationToken cancellationToken) + { + this.retryContext = null; + // Received Connection error (HttpRequestException), initiate the endpoint rediscovery + if (exception is HttpRequestException _) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark both read and write requests because it gateway exception. + // This means all requests going to the region will fail. + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: true, + forceRefresh: false, + retryOnPreferredLocations: true); + } + + if (exception is DocumentClientException clientException) + { + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + clientException?.StatusCode, + clientException?.GetSubStatus()); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + } + + return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); + } + + /// + /// Should the caller retry the operation. + /// + /// in return of the request + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + ResponseMessage cosmosResponseMessage, + CancellationToken cancellationToken) + { + this.retryContext = null; + + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + cosmosResponseMessage?.StatusCode, + cosmosResponseMessage?.Headers.SubStatusCode); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + + return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); + } + + /// + /// Method that is called before a request is sent to allow the retry policy implementation + /// to modify the state of the request. + /// + /// The request being sent to the service. + public void OnBeforeSendRequest(DocumentServiceRequest request) + { + this.isReadRequest = request.IsReadOnlyRequest; + this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); + this.documentServiceRequest = request; + + // clear previous location-based routing directive + request.RequestContext.ClearRouteToLocation(); + + if (this.retryContext != null) + { + if (this.retryContext.RouteToHub) + { + request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); + } + else + { + // set location-based routing directive based on request retry context + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); + } + } + + // Resolve the endpoint for the request and pin the resolution to the resolved endpoint + // This enables marking the endpoint unavailability on endpoint failover/unreachability + this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); + request.RequestContext.RouteToLocation(this.locationEndpoint); + } + + private async Task ShouldRetryInternalAsync( + HttpStatusCode? statusCode, + SubStatusCodes? subStatusCode) + { + if (!statusCode.HasValue + && (!subStatusCode.HasValue + || subStatusCode.Value == SubStatusCodes.Unknown)) + { + return null; + } + + // Console.WriteLine("Status Code: " + statusCode.Value + "Sub Status Code: " + subStatusCode.Value + "IsRead Request: " + this.isReadRequest); + + // Received request timeout + if (statusCode == HttpStatusCode.RequestTimeout) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition key range as unavailable to retry future request on a new region. + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + } + + // Received 403.3 on write region, initiate the endpoint rediscovery + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.WriteForbidden) + { + // It's a write forbidden so it safe to retry + if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest)) + { + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) + { + bool forceRefresh = false; + + if (this.retryContext != null && this.retryContext.RouteToHub) + { + forceRefresh = true; + + } + + ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: forceRefresh, + retryOnPreferredLocations: false, + overwriteEndpointDiscovery: true); + + if (retryResult.ShouldRetry) + { + this.retryContext.RouteToHub = true; + } + + return retryResult; + } + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: true, + retryOnPreferredLocations: false); + } + + // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.DatabaseAccountNotFound + && (this.isReadRequest || this.canUseMultipleWriteLocations)) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: false, + retryOnPreferredLocations: false); + } + + if (statusCode == HttpStatusCode.NotFound + && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) + { + return this.ShouldRetryOnSessionNotAvailable(); + } + + // Received 503 due to client connect timeout or Gateway + if (statusCode == HttpStatusCode.ServiceUnavailable) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition as unavailable. + // Let the ClientRetry logic decide if the request should be retried + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + + return this.ShouldRetryOnServiceUnavailable(); + } + + return null; + } + + private async Task ShouldRetryOnEndpointFailureAsync( + bool isReadRequest, + bool markBothReadAndWriteAsUnavailable, + bool forceRefresh, + bool retryOnPreferredLocations, + bool overwriteEndpointDiscovery = false) + { + if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", + this.failoverRetryCount, + this.locationEndpoint?.ToString() ?? string.Empty); + return ShouldRetryResult.NoRetry(); + } + + this.failoverRetryCount++; + + if (this.locationEndpoint != null && !overwriteEndpointDiscovery) + { + if (isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); + } + + if (!isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); + } + } + + TimeSpan retryDelay = TimeSpan.Zero; + if (!isReadRequest) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); + + if (this.failoverRetryCount > 1) + { + //if retried both endpoints, follow regular retry interval. + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + } + else + { + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + + await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); + + int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect + if (retryOnPreferredLocations) + { + retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list + } + + this.retryContext = new RetryContext + { + RetryLocationIndex = retryLocationIndex, + RetryRequestOnPreferredLocations = retryOnPreferredLocations, + }; + + return ShouldRetryResult.RetryAfter(retryDelay); + } + + private ShouldRetryResult ShouldRetryOnSessionNotAvailable() + { + this.sessionTokenRetryCount++; + + if (!this.enableEndpointDiscovery) + { + // if endpoint discovery is disabled, the request cannot be retried anywhere else + return ShouldRetryResult.NoRetry(); + } + else + { + if (this.canUseMultipleWriteLocations) + { + ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; + + if (this.sessionTokenRetryCount > endpoints.Count) + { + // When use multiple write locations is true and the request has been tried + // on all locations, then don't retry the request + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.sessionTokenRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + else + { + if (this.sessionTokenRetryCount > 1) + { + // When cannot use multiple write locations, then don't retry the request if + // we have already tried this request on the write location + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext + { + RetryLocationIndex = 0, + RetryRequestOnPreferredLocations = false + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + } + } + + /// + /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. + /// We try and retry the request only if there are other regions available. + /// + private ShouldRetryResult ShouldRetryOnServiceUnavailable() + { + if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) + { + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); + return ShouldRetryResult.NoRetry(); + } + + /*if (!this.canUseMultipleWriteLocations + && !this.isReadRequest) + { + // Write requests on single master cannot be retried, no other regions available + return ShouldRetryResult.NoRetry(); + }*/ + + int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; + + if (availablePreferredLocations <= 1) + { + // No other regions to retry on + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); + return ShouldRetryResult.NoRetry(); + } + + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); + + // Retrying on second PreferredLocations + // RetryCount is used as zero-based index + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.serviceUnavailableRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + private sealed class RetryContext + { + public int RetryLocationIndex { get; set; } + public bool RetryRequestOnPreferredLocations { get; set; } + + public bool RouteToHub { get; set; } + } + } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 2c07f060f8..ee67522d6b 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - internal bool EnablePartitionLevelFailover { get; set; } = false; + public bool EnablePartitionLevelFailover { get; set; } = false; /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 9c6308d8b6..6f07b7a52a 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -1,668 +1,668 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos.Routing -{ - using System; - using System.Collections.Concurrent; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Globalization; - using System.Linq; - using System.Net; - using global::Azure.Core; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Documents; - - /// - /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount - /// with multiple writable and readable locations. - /// - internal sealed class LocationCache - { - private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; - private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - - private readonly bool enableEndpointDiscovery; - private readonly Uri defaultEndpoint; - private readonly bool useMultipleWriteLocations; - private readonly object lockObject; - private readonly TimeSpan unavailableLocationsExpirationTime; - private readonly int connectionLimit; - private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; - - private DatabaseAccountLocationsInfo locationInfo; - private DateTime lastCacheUpdateTimestamp; - private bool enableMultipleWriteLocations; - - public LocationCache( - ReadOnlyCollection preferredLocations, - Uri defaultEndpoint, - bool enableEndpointDiscovery, - int connectionLimit, - bool useMultipleWriteLocations) - { - this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); - this.defaultEndpoint = defaultEndpoint; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.useMultipleWriteLocations = useMultipleWriteLocations; - this.connectionLimit = connectionLimit; - - this.lockObject = new object(); - this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); - this.lastCacheUpdateTimestamp = DateTime.MinValue; - this.enableMultipleWriteLocations = false; - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - -#if !(NETSTANDARD15 || NETSTANDARD16) -#if NETSTANDARD20 - // GetEntryAssembly returns null when loaded from native netstandard2.0 - if (System.Reflection.Assembly.GetEntryAssembly() != null) - { -#endif - string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; - if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) - { - int unavailableLocationsExpirationTimeinSecondsConfigValue; - - if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - } - else - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); - } - } -#if NETSTANDARD20 - } -#endif -#endif - } - - /// - /// Gets list of read endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection ReadEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.ReadEndpoints; - } - } - - /// - /// Gets list of write endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection WriteEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.WriteEndpoints; - } - } - - /// - /// Returns the location corresponding to the endpoint if location specific endpoint is provided. - /// For the defaultEndPoint, we will return the first available write location. - /// Returns null, in other cases. - /// - /// - /// Today we return null for defaultEndPoint if multiple write locations can be used. - /// This needs to be modifed to figure out proper location in such case. - /// - public string GetLocation(Uri endpoint) - { - string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; - - if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) - { - if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) - { - return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; - } - } - - return location; - } - - /// - /// Set region name for a location if present in the locationcache otherwise set region name as null. - /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. - /// - /// - /// - /// true if region found else false - public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) - { - if (Uri.Compare( - endpoint, - this.defaultEndpoint, - UriComponents.Host, - UriFormat.SafeUnescaped, - StringComparison.OrdinalIgnoreCase) == 0) - { - regionName = null; - return false; - } - - regionName = this.GetLocation(endpoint); - return true; - } - - /// - /// Marks the current location unavailable for read - /// - public void MarkEndpointUnavailableForRead(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Read); - } - - /// - /// Marks the current location unavailable for write - /// - public void MarkEndpointUnavailableForWrite(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Write); - } - - /// - /// Invoked when is read - /// - /// Read DatabaseAccoaunt - public void OnDatabaseAccountRead(AccountProperties databaseAccount) - { - this.UpdateLocationCache( - databaseAccount.WritableRegions, - databaseAccount.ReadableRegions, - preferenceList: null, - enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); - } - - /// - /// Invoked when changes - /// - /// - public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) - { - this.UpdateLocationCache( - preferenceList: preferredLocations); - } - - public bool IsMetaData(DocumentServiceRequest request) - { - return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || - request.ResourceType != ResourceType.Document; - - } - public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) - { - return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 - && this.IsMetaData(request) - && this.CanUseMultipleWriteLocations(); - - } - - public Uri GetHubUri() - { - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; - Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - return locationEndpointToRoute; - } - - /// - /// Resolves request to service endpoint. - /// 1. If this is a write request - /// (a) If UseMultipleWriteLocations = true - /// (i) For document writes, resolve to most preferred and available write endpoint. - /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will - /// be retried on next preferred available write endpoint. - /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) - /// write endpoint in . - /// Endpoint of first write location in is the only endpoint that supports - /// write operation on all resource types (except during that region's failover). - /// Only during manual failover, client would retry write on second write location in . - /// (b) Else resolve the request to first write endpoint in OR - /// second write endpoint in in case of manual failover of that location. - /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) - /// - /// Request for which endpoint is to be resolved - /// Resolved endpoint - public Uri ResolveServiceEndpoint(DocumentServiceRequest request) - { - if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) - { - return request.RequestContext.LocationEndpointToRoute; - } - - int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); - - Uri locationEndpointToRoute = this.defaultEndpoint; - - if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? - || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) - { - // For non-document resource types in case of client can use multiple write locations - // or when client cannot use multiple write locations, flip-flop between the - // first and the second writable region in DatabaseAccount (for manual failover) - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) - { - locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); - string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; - locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - } - } - else - { - ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; - locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; - } - - request.RequestContext.RouteToLocation(locationEndpointToRoute); - return locationEndpointToRoute; - } - - public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) - { - canRefreshInBackground = true; - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); - - // we should schedule refresh in background if we are unable to target the user's most preferredLocation. - if (this.enableEndpointDiscovery) - { - // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled - bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; - - ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; - - if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) - { - canRefreshInBackground = readLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", - readLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - - if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - Uri mostPreferredReadEndpoint; - - if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) - { - if (mostPreferredReadEndpoint != readLocationEndpoints[0]) - { - // For reads, we can always refresh in background as we can alternate to - // other available read endpoints - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); - return true; - } - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); - return true; - } - } - - Uri mostPreferredWriteEndpoint; - ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; - - if (!this.CanUseMultipleWriteLocations()) - { - if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) - { - // Since most preferred write endpoint is unavailable, we can only refresh in background if - // we have an alternate write endpoint - canRefreshInBackground = writeLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", - mostPreferredLocation, - writeLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - else - { - return shouldRefresh; - } - } - else if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) - { - shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); - return shouldRefresh; - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); - return true; - } - } - else - { - return shouldRefresh; - } - } - else - { - return false; - } - } - - public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) - { - return this.CanUseMultipleWriteLocations() && - (request.ResourceType == ResourceType.Document || - (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); - } - - private void ClearStaleEndpointUnavailabilityInfo() - { - if (this.locationUnavailablityInfoByEndpoint.Any()) - { - List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); - - foreach (Uri unavailableEndpoint in unavailableEndpoints) - { - LocationUnavailabilityInfo unavailabilityInfo; - LocationUnavailabilityInfo removed; - - if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) - && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) - { - DefaultTrace.TraceInformation( - "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", - unavailableEndpoint, - unavailabilityInfo.UnavailableOperations); - } - } - } - } - - private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) - { - LocationUnavailabilityInfo unavailabilityInfo; - - if (expectedAvailableOperations == OperationType.None - || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) - || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) - { - return false; - } - else - { - if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) - { - return false; - } - else - { - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", - endpoint, - unavailabilityInfo.UnavailableOperations); - // Unexpired entry present. Endpoint is unavailable - return true; - } - } - } - - private void MarkEndpointUnavailable( - Uri unavailableEndpoint, - OperationType unavailableOperationType) - { - DateTime currentTime = DateTime.UtcNow; - LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( - unavailableEndpoint, - (Uri endpoint) => - { - return new LocationUnavailabilityInfo() - { - LastUnavailabilityCheckTimeStamp = currentTime, - UnavailableOperations = unavailableOperationType, - }; - }, - (Uri endpoint, LocationUnavailabilityInfo info) => - { - info.LastUnavailabilityCheckTimeStamp = currentTime; - info.UnavailableOperations |= unavailableOperationType; - return info; - }); - - this.UpdateLocationCache(); - - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", - unavailableEndpoint, - unavailableOperationType, - updatedInfo.LastUnavailabilityCheckTimeStamp); - } - - private void UpdateLocationCache( - IEnumerable writeLocations = null, - IEnumerable readLocations = null, - ReadOnlyCollection preferenceList = null, - bool? enableMultipleWriteLocations = null) - { - lock (this.lockObject) - { - DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); - - if (preferenceList != null) - { - nextLocationInfo.PreferredLocations = preferenceList; - } - - if (enableMultipleWriteLocations.HasValue) - { - this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; - } - - this.ClearStaleEndpointUnavailabilityInfo(); - - if (readLocations != null) - { - ReadOnlyCollection availableReadLocations; - nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); - nextLocationInfo.AvailableReadLocations = availableReadLocations; - } - - if (writeLocations != null) - { - ReadOnlyCollection availableWriteLocations; - nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); - nextLocationInfo.AvailableWriteLocations = availableWriteLocations; - } - - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); - nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); - this.lastCacheUpdateTimestamp = DateTime.UtcNow; - - DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", - string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), - string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); - - this.locationInfo = nextLocationInfo; - } - } - - private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) - { - List endpoints = new List(); - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init - if (this.enableEndpointDiscovery) - { - if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) - { - List unavailableEndpoints = new List(); - - // When client can not use multiple write locations, preferred locations list should only be used - // determining read endpoints order. - // If client can use multiple write locations, preferred locations list should be used for determining - // both read and write endpoints order. - - foreach (string location in currentLocationInfo.PreferredLocations) - { - Uri endpoint; - if (endpointsByLocation.TryGetValue(location, out endpoint)) - { - if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) - { - unavailableEndpoints.Add(endpoint); - } - else - { - endpoints.Add(endpoint); - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - unavailableEndpoints.Remove(fallbackEndpoint); - } - - endpoints.AddRange(unavailableEndpoints); - } - else - { - foreach (string location in orderedLocations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location) && // location is empty during manual failover - endpointsByLocation.TryGetValue(location, out endpoint)) - { - endpoints.Add(endpoint); - } - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - } - - return endpoints.AsReadOnly(); - } - - private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) - { - Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); - List parsedLocations = new List(); - - foreach (AccountRegion location in locations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location.Name) - && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) - { - endpointsByLocation[location.Name] = endpoint; - parsedLocations.Add(location.Name); - this.SetServicePointConnectionLimit(endpoint); - } - else - { - DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", - location.Name, - location.Endpoint); - } - } - - orderedLocations = parsedLocations.AsReadOnly(); - return new ReadOnlyDictionary(endpointsByLocation); - } - - private bool CanUseMultipleWriteLocations() - { - return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; - } - - private void SetServicePointConnectionLimit(Uri endpoint) - { -#if !NETSTANDARD16 - ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); - servicePoint.ConnectionLimit = this.connectionLimit; -#endif - } - - private sealed class LocationUnavailabilityInfo - { - public DateTime LastUnavailabilityCheckTimeStamp { get; set; } - public OperationType UnavailableOperations { get; set; } - } - - private sealed class DatabaseAccountLocationsInfo - { - public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) - { - this.PreferredLocations = preferredLocations; - this.AvailableWriteLocations = new List().AsReadOnly(); - this.AvailableReadLocations = new List().AsReadOnly(); - this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - } - - public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) - { - this.PreferredLocations = other.PreferredLocations; - this.AvailableWriteLocations = other.AvailableWriteLocations; - this.AvailableReadLocations = other.AvailableReadLocations; - this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; - this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; - this.WriteEndpoints = other.WriteEndpoints; - this.ReadEndpoints = other.ReadEndpoints; - } - - public ReadOnlyCollection PreferredLocations { get; set; } - public ReadOnlyCollection AvailableWriteLocations { get; set; } - public ReadOnlyCollection AvailableReadLocations { get; set; } - public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } - public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } - public ReadOnlyCollection WriteEndpoints { get; set; } - public ReadOnlyCollection ReadEndpoints { get; set; } - } - - [Flags] - private enum OperationType - { - None = 0x0, - Read = 0x1, - Write = 0x2 - } - } -} +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Routing +{ + using System; + using System.Collections.Concurrent; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Globalization; + using System.Linq; + using System.Net; + using global::Azure.Core; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Documents; + + /// + /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount + /// with multiple writable and readable locations. + /// + internal sealed class LocationCache + { + private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; + private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; + + private readonly bool enableEndpointDiscovery; + private readonly Uri defaultEndpoint; + private readonly bool useMultipleWriteLocations; + private readonly object lockObject; + private readonly TimeSpan unavailableLocationsExpirationTime; + private readonly int connectionLimit; + private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; + + private DatabaseAccountLocationsInfo locationInfo; + private DateTime lastCacheUpdateTimestamp; + private bool enableMultipleWriteLocations; + + public LocationCache( + ReadOnlyCollection preferredLocations, + Uri defaultEndpoint, + bool enableEndpointDiscovery, + int connectionLimit, + bool useMultipleWriteLocations) + { + this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); + this.defaultEndpoint = defaultEndpoint; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.useMultipleWriteLocations = useMultipleWriteLocations; + this.connectionLimit = connectionLimit; + + this.lockObject = new object(); + this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); + this.lastCacheUpdateTimestamp = DateTime.MinValue; + this.enableMultipleWriteLocations = false; + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + +#if !(NETSTANDARD15 || NETSTANDARD16) +#if NETSTANDARD20 + // GetEntryAssembly returns null when loaded from native netstandard2.0 + if (System.Reflection.Assembly.GetEntryAssembly() != null) + { +#endif + string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; + if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) + { + int unavailableLocationsExpirationTimeinSecondsConfigValue; + + if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + } + else + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); + } + } +#if NETSTANDARD20 + } +#endif +#endif + } + + /// + /// Gets list of read endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection ReadEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.ReadEndpoints; + } + } + + /// + /// Gets list of write endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection WriteEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.WriteEndpoints; + } + } + + /// + /// Returns the location corresponding to the endpoint if location specific endpoint is provided. + /// For the defaultEndPoint, we will return the first available write location. + /// Returns null, in other cases. + /// + /// + /// Today we return null for defaultEndPoint if multiple write locations can be used. + /// This needs to be modifed to figure out proper location in such case. + /// + public string GetLocation(Uri endpoint) + { + string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; + + if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) + { + if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) + { + return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; + } + } + + return location; + } + + /// + /// Set region name for a location if present in the locationcache otherwise set region name as null. + /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. + /// + /// + /// + /// true if region found else false + public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) + { + if (Uri.Compare( + endpoint, + this.defaultEndpoint, + UriComponents.Host, + UriFormat.SafeUnescaped, + StringComparison.OrdinalIgnoreCase) == 0) + { + regionName = null; + return false; + } + + regionName = this.GetLocation(endpoint); + return true; + } + + /// + /// Marks the current location unavailable for read + /// + public void MarkEndpointUnavailableForRead(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Read); + } + + /// + /// Marks the current location unavailable for write + /// + public void MarkEndpointUnavailableForWrite(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Write); + } + + /// + /// Invoked when is read + /// + /// Read DatabaseAccoaunt + public void OnDatabaseAccountRead(AccountProperties databaseAccount) + { + this.UpdateLocationCache( + databaseAccount.WritableRegions, + databaseAccount.ReadableRegions, + preferenceList: null, + enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); + } + + /// + /// Invoked when changes + /// + /// + public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) + { + this.UpdateLocationCache( + preferenceList: preferredLocations); + } + + public bool IsMetaData(DocumentServiceRequest request) + { + return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || + request.ResourceType != ResourceType.Document; + + } + public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) + { + return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 + && this.IsMetaData(request) + && this.CanUseMultipleWriteLocations(); + + } + + public Uri GetHubUri() + { + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; + Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + return locationEndpointToRoute; + } + + /// + /// Resolves request to service endpoint. + /// 1. If this is a write request + /// (a) If UseMultipleWriteLocations = true + /// (i) For document writes, resolve to most preferred and available write endpoint. + /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will + /// be retried on next preferred available write endpoint. + /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) + /// write endpoint in . + /// Endpoint of first write location in is the only endpoint that supports + /// write operation on all resource types (except during that region's failover). + /// Only during manual failover, client would retry write on second write location in . + /// (b) Else resolve the request to first write endpoint in OR + /// second write endpoint in in case of manual failover of that location. + /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) + /// + /// Request for which endpoint is to be resolved + /// Resolved endpoint + public Uri ResolveServiceEndpoint(DocumentServiceRequest request) + { + if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) + { + return request.RequestContext.LocationEndpointToRoute; + } + + int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); + + Uri locationEndpointToRoute = this.defaultEndpoint; + + if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? + || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) + { + // For non-document resource types in case of client can use multiple write locations + // or when client cannot use multiple write locations, flip-flop between the + // first and the second writable region in DatabaseAccount (for manual failover) + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) + { + locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); + string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; + locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + } + } + else + { + ReadOnlyCollection endpoints = this.ReadEndpoints; + locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; + } + + request.RequestContext.RouteToLocation(locationEndpointToRoute); + return locationEndpointToRoute; + } + + public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) + { + canRefreshInBackground = true; + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); + + // we should schedule refresh in background if we are unable to target the user's most preferredLocation. + if (this.enableEndpointDiscovery) + { + // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled + bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; + + ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; + + if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) + { + canRefreshInBackground = readLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", + readLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + + if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + Uri mostPreferredReadEndpoint; + + if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) + { + if (mostPreferredReadEndpoint != readLocationEndpoints[0]) + { + // For reads, we can always refresh in background as we can alternate to + // other available read endpoints + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); + return true; + } + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); + return true; + } + } + + Uri mostPreferredWriteEndpoint; + ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; + + if (!this.CanUseMultipleWriteLocations()) + { + if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) + { + // Since most preferred write endpoint is unavailable, we can only refresh in background if + // we have an alternate write endpoint + canRefreshInBackground = writeLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", + mostPreferredLocation, + writeLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + else + { + return shouldRefresh; + } + } + else if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) + { + shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); + return shouldRefresh; + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); + return true; + } + } + else + { + return shouldRefresh; + } + } + else + { + return false; + } + } + + public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) + { + return this.CanUseMultipleWriteLocations() && + (request.ResourceType == ResourceType.Document || + (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); + } + + private void ClearStaleEndpointUnavailabilityInfo() + { + if (this.locationUnavailablityInfoByEndpoint.Any()) + { + List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); + + foreach (Uri unavailableEndpoint in unavailableEndpoints) + { + LocationUnavailabilityInfo unavailabilityInfo; + LocationUnavailabilityInfo removed; + + if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) + && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) + { + DefaultTrace.TraceInformation( + "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", + unavailableEndpoint, + unavailabilityInfo.UnavailableOperations); + } + } + } + } + + private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) + { + LocationUnavailabilityInfo unavailabilityInfo; + + if (expectedAvailableOperations == OperationType.None + || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) + || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) + { + return false; + } + else + { + if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) + { + return false; + } + else + { + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", + endpoint, + unavailabilityInfo.UnavailableOperations); + // Unexpired entry present. Endpoint is unavailable + return true; + } + } + } + + private void MarkEndpointUnavailable( + Uri unavailableEndpoint, + OperationType unavailableOperationType) + { + DateTime currentTime = DateTime.UtcNow; + LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( + unavailableEndpoint, + (Uri endpoint) => + { + return new LocationUnavailabilityInfo() + { + LastUnavailabilityCheckTimeStamp = currentTime, + UnavailableOperations = unavailableOperationType, + }; + }, + (Uri endpoint, LocationUnavailabilityInfo info) => + { + info.LastUnavailabilityCheckTimeStamp = currentTime; + info.UnavailableOperations |= unavailableOperationType; + return info; + }); + + this.UpdateLocationCache(); + + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", + unavailableEndpoint, + unavailableOperationType, + updatedInfo.LastUnavailabilityCheckTimeStamp); + } + + private void UpdateLocationCache( + IEnumerable writeLocations = null, + IEnumerable readLocations = null, + ReadOnlyCollection preferenceList = null, + bool? enableMultipleWriteLocations = null) + { + lock (this.lockObject) + { + DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); + + if (preferenceList != null) + { + nextLocationInfo.PreferredLocations = preferenceList; + } + + if (enableMultipleWriteLocations.HasValue) + { + this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; + } + + this.ClearStaleEndpointUnavailabilityInfo(); + + if (readLocations != null) + { + ReadOnlyCollection availableReadLocations; + nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); + nextLocationInfo.AvailableReadLocations = availableReadLocations; + } + + if (writeLocations != null) + { + ReadOnlyCollection availableWriteLocations; + nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); + nextLocationInfo.AvailableWriteLocations = availableWriteLocations; + } + + nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); + nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); + this.lastCacheUpdateTimestamp = DateTime.UtcNow; + + DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", + string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), + string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); + + this.locationInfo = nextLocationInfo; + } + } + + private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) + { + List endpoints = new List(); + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init + if (this.enableEndpointDiscovery) + { + if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) + { + List unavailableEndpoints = new List(); + + // When client can not use multiple write locations, preferred locations list should only be used + // determining read endpoints order. + // If client can use multiple write locations, preferred locations list should be used for determining + // both read and write endpoints order. + + foreach (string location in currentLocationInfo.PreferredLocations) + { + Uri endpoint; + if (endpointsByLocation.TryGetValue(location, out endpoint)) + { + if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) + { + unavailableEndpoints.Add(endpoint); + } + else + { + endpoints.Add(endpoint); + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + unavailableEndpoints.Remove(fallbackEndpoint); + } + + endpoints.AddRange(unavailableEndpoints); + } + else + { + foreach (string location in orderedLocations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location) && // location is empty during manual failover + endpointsByLocation.TryGetValue(location, out endpoint)) + { + endpoints.Add(endpoint); + } + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + } + + return endpoints.AsReadOnly(); + } + + private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) + { + Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); + List parsedLocations = new List(); + + foreach (AccountRegion location in locations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location.Name) + && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) + { + endpointsByLocation[location.Name] = endpoint; + parsedLocations.Add(location.Name); + this.SetServicePointConnectionLimit(endpoint); + } + else + { + DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", + location.Name, + location.Endpoint); + } + } + + orderedLocations = parsedLocations.AsReadOnly(); + return new ReadOnlyDictionary(endpointsByLocation); + } + + private bool CanUseMultipleWriteLocations() + { + return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; + } + + private void SetServicePointConnectionLimit(Uri endpoint) + { +#if !NETSTANDARD16 + ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); + servicePoint.ConnectionLimit = this.connectionLimit; +#endif + } + + private sealed class LocationUnavailabilityInfo + { + public DateTime LastUnavailabilityCheckTimeStamp { get; set; } + public OperationType UnavailableOperations { get; set; } + } + + private sealed class DatabaseAccountLocationsInfo + { + public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) + { + this.PreferredLocations = preferredLocations; + this.AvailableWriteLocations = new List().AsReadOnly(); + this.AvailableReadLocations = new List().AsReadOnly(); + this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + } + + public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) + { + this.PreferredLocations = other.PreferredLocations; + this.AvailableWriteLocations = other.AvailableWriteLocations; + this.AvailableReadLocations = other.AvailableReadLocations; + this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; + this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; + this.WriteEndpoints = other.WriteEndpoints; + this.ReadEndpoints = other.ReadEndpoints; + } + + public ReadOnlyCollection PreferredLocations { get; set; } + public ReadOnlyCollection AvailableWriteLocations { get; set; } + public ReadOnlyCollection AvailableReadLocations { get; set; } + public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } + public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } + public ReadOnlyCollection WriteEndpoints { get; set; } + public ReadOnlyCollection ReadEndpoints { get; set; } + } + + [Flags] + private enum OperationType + { + None = 0x0, + Read = 0x1, + Write = 0x2 + } + } +} From ef78559caccec2db64ab2cb01a666f137019288c Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Thu, 7 Sep 2023 11:55:58 -0700 Subject: [PATCH 02/31] Revert "Code changes to add retry logic for GW returned 503.9002." This reverts commit 53ef5f3c1b038d14dbb1473cafa18223b33af2ce. --- .../src/ClientRetryPolicy.cs | 873 +++++------ .../src/CosmosClientOptions.cs | 2 +- .../src/Routing/LocationCache.cs | 1336 ++++++++--------- 3 files changed, 1108 insertions(+), 1103 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2f007c6fbf..2933baa1a9 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -1,435 +1,440 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos -{ - using System; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Net; - using System.Net.Http; - using System.Threading; - using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Cosmos.Routing; - using Microsoft.Azure.Documents; - - /// - /// Client policy is combination of endpoint change retry + throttling retry. - /// - internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy - { - private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. - private const int MaxRetryCount = 120; - private const int MaxServiceUnavailableRetryCount = 1; - - private readonly IDocumentClientRetryPolicy throttlingRetry; - private readonly GlobalEndpointManager globalEndpointManager; - private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; - private readonly bool enableEndpointDiscovery; - private int failoverRetryCount; - - private int sessionTokenRetryCount; - private int serviceUnavailableRetryCount; - private bool isReadRequest; - private bool canUseMultipleWriteLocations; - private Uri locationEndpoint; - private RetryContext retryContext; - private DocumentServiceRequest documentServiceRequest; - - public ClientRetryPolicy( - GlobalEndpointManager globalEndpointManager, - GlobalPartitionEndpointManager partitionKeyRangeLocationCache, - bool enableEndpointDiscovery, - RetryOptions retryOptions) - { - this.throttlingRetry = new ResourceThrottleRetryPolicy( - retryOptions.MaxRetryAttemptsOnThrottledRequests, - retryOptions.MaxRetryWaitTimeInSeconds); - - this.globalEndpointManager = globalEndpointManager; - this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; - this.failoverRetryCount = 0; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.sessionTokenRetryCount = 0; - this.serviceUnavailableRetryCount = 0; - this.canUseMultipleWriteLocations = false; - } - - /// - /// Should the caller retry the operation. - /// - /// Exception that occurred when the operation was tried - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - Exception exception, - CancellationToken cancellationToken) - { - this.retryContext = null; - // Received Connection error (HttpRequestException), initiate the endpoint rediscovery - if (exception is HttpRequestException _) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark both read and write requests because it gateway exception. - // This means all requests going to the region will fail. - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: true, - forceRefresh: false, - retryOnPreferredLocations: true); - } - - if (exception is DocumentClientException clientException) - { - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - clientException?.StatusCode, - clientException?.GetSubStatus()); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - } - - return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); - } - - /// - /// Should the caller retry the operation. - /// - /// in return of the request - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - ResponseMessage cosmosResponseMessage, - CancellationToken cancellationToken) - { - this.retryContext = null; - - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - cosmosResponseMessage?.StatusCode, - cosmosResponseMessage?.Headers.SubStatusCode); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - - return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); - } - - /// - /// Method that is called before a request is sent to allow the retry policy implementation - /// to modify the state of the request. - /// - /// The request being sent to the service. - public void OnBeforeSendRequest(DocumentServiceRequest request) - { - this.isReadRequest = request.IsReadOnlyRequest; - this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); - this.documentServiceRequest = request; - - // clear previous location-based routing directive - request.RequestContext.ClearRouteToLocation(); - - if (this.retryContext != null) - { - if (this.retryContext.RouteToHub) - { - request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); - } - else - { - // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); - } - } - - // Resolve the endpoint for the request and pin the resolution to the resolved endpoint - // This enables marking the endpoint unavailability on endpoint failover/unreachability - this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); - request.RequestContext.RouteToLocation(this.locationEndpoint); - } - - private async Task ShouldRetryInternalAsync( - HttpStatusCode? statusCode, - SubStatusCodes? subStatusCode) - { - if (!statusCode.HasValue - && (!subStatusCode.HasValue - || subStatusCode.Value == SubStatusCodes.Unknown)) - { - return null; - } - - // Console.WriteLine("Status Code: " + statusCode.Value + "Sub Status Code: " + subStatusCode.Value + "IsRead Request: " + this.isReadRequest); - - // Received request timeout - if (statusCode == HttpStatusCode.RequestTimeout) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition key range as unavailable to retry future request on a new region. - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - } - - // Received 403.3 on write region, initiate the endpoint rediscovery - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.WriteForbidden) - { - // It's a write forbidden so it safe to retry - if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest)) - { - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) - { - bool forceRefresh = false; - - if (this.retryContext != null && this.retryContext.RouteToHub) - { - forceRefresh = true; - - } - - ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: forceRefresh, - retryOnPreferredLocations: false, - overwriteEndpointDiscovery: true); - - if (retryResult.ShouldRetry) - { - this.retryContext.RouteToHub = true; - } - - return retryResult; - } - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: true, - retryOnPreferredLocations: false); - } - - // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.DatabaseAccountNotFound - && (this.isReadRequest || this.canUseMultipleWriteLocations)) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: false, - retryOnPreferredLocations: false); - } - - if (statusCode == HttpStatusCode.NotFound - && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { - return this.ShouldRetryOnSessionNotAvailable(); - } - - // Received 503 due to client connect timeout or Gateway - if (statusCode == HttpStatusCode.ServiceUnavailable) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition as unavailable. - // Let the ClientRetry logic decide if the request should be retried - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - - return this.ShouldRetryOnServiceUnavailable(); - } - - return null; - } - - private async Task ShouldRetryOnEndpointFailureAsync( - bool isReadRequest, - bool markBothReadAndWriteAsUnavailable, - bool forceRefresh, - bool retryOnPreferredLocations, - bool overwriteEndpointDiscovery = false) - { - if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", - this.failoverRetryCount, - this.locationEndpoint?.ToString() ?? string.Empty); - return ShouldRetryResult.NoRetry(); - } - - this.failoverRetryCount++; - - if (this.locationEndpoint != null && !overwriteEndpointDiscovery) - { - if (isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); - } - - if (!isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); - } - } - - TimeSpan retryDelay = TimeSpan.Zero; - if (!isReadRequest) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); - - if (this.failoverRetryCount > 1) - { - //if retried both endpoints, follow regular retry interval. - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - } - else - { - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - - await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); - - int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect - if (retryOnPreferredLocations) - { - retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list - } - - this.retryContext = new RetryContext - { - RetryLocationIndex = retryLocationIndex, - RetryRequestOnPreferredLocations = retryOnPreferredLocations, - }; - - return ShouldRetryResult.RetryAfter(retryDelay); - } - - private ShouldRetryResult ShouldRetryOnSessionNotAvailable() - { - this.sessionTokenRetryCount++; - - if (!this.enableEndpointDiscovery) - { - // if endpoint discovery is disabled, the request cannot be retried anywhere else - return ShouldRetryResult.NoRetry(); - } - else - { - if (this.canUseMultipleWriteLocations) - { - ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; - - if (this.sessionTokenRetryCount > endpoints.Count) - { - // When use multiple write locations is true and the request has been tried - // on all locations, then don't retry the request - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.sessionTokenRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - else - { - if (this.sessionTokenRetryCount > 1) - { - // When cannot use multiple write locations, then don't retry the request if - // we have already tried this request on the write location - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext - { - RetryLocationIndex = 0, - RetryRequestOnPreferredLocations = false - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - } - } - - /// - /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. - /// We try and retry the request only if there are other regions available. - /// - private ShouldRetryResult ShouldRetryOnServiceUnavailable() - { - if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) - { - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); - return ShouldRetryResult.NoRetry(); - } - - /*if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available - return ShouldRetryResult.NoRetry(); - }*/ - - int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; - - if (availablePreferredLocations <= 1) - { - // No other regions to retry on - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); - return ShouldRetryResult.NoRetry(); - } - - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); - - // Retrying on second PreferredLocations - // RetryCount is used as zero-based index - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.serviceUnavailableRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - private sealed class RetryContext - { - public int RetryLocationIndex { get; set; } - public bool RetryRequestOnPreferredLocations { get; set; } - - public bool RouteToHub { get; set; } - } - } +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Net; + using System.Net.Http; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Documents; + + /// + /// Client policy is combination of endpoint change retry + throttling retry. + /// + internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy + { + private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. + private const int MaxRetryCount = 120; + private const int MaxServiceUnavailableRetryCount = 1; + + private readonly IDocumentClientRetryPolicy throttlingRetry; + private readonly GlobalEndpointManager globalEndpointManager; + private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; + private readonly bool enableEndpointDiscovery; + private int failoverRetryCount; + + private int sessionTokenRetryCount; + private int serviceUnavailableRetryCount; + private bool isReadRequest; + private bool canUseMultipleWriteLocations; + private Uri locationEndpoint; + private RetryContext retryContext; + private DocumentServiceRequest documentServiceRequest; + + public ClientRetryPolicy( + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManager partitionKeyRangeLocationCache, + bool enableEndpointDiscovery, + RetryOptions retryOptions) + { + this.throttlingRetry = new ResourceThrottleRetryPolicy( + retryOptions.MaxRetryAttemptsOnThrottledRequests, + retryOptions.MaxRetryWaitTimeInSeconds); + + this.globalEndpointManager = globalEndpointManager; + this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; + this.failoverRetryCount = 0; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.sessionTokenRetryCount = 0; + this.serviceUnavailableRetryCount = 0; + this.canUseMultipleWriteLocations = false; + } + + /// + /// Should the caller retry the operation. + /// + /// Exception that occurred when the operation was tried + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + Exception exception, + CancellationToken cancellationToken) + { + this.retryContext = null; + // Received Connection error (HttpRequestException), initiate the endpoint rediscovery + if (exception is HttpRequestException _) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark both read and write requests because it gateway exception. + // This means all requests going to the region will fail. + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: true, + forceRefresh: false, + retryOnPreferredLocations: true); + } + + if (exception is DocumentClientException clientException) + { + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + clientException?.StatusCode, + clientException?.GetSubStatus()); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + } + + return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); + } + + /// + /// Should the caller retry the operation. + /// + /// in return of the request + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + ResponseMessage cosmosResponseMessage, + CancellationToken cancellationToken) + { + this.retryContext = null; + + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + cosmosResponseMessage?.StatusCode, + cosmosResponseMessage?.Headers.SubStatusCode); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + + return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); + } + + /// + /// Method that is called before a request is sent to allow the retry policy implementation + /// to modify the state of the request. + /// + /// The request being sent to the service. + public void OnBeforeSendRequest(DocumentServiceRequest request) + { + this.isReadRequest = request.IsReadOnlyRequest; + this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); + this.documentServiceRequest = request; + + // clear previous location-based routing directive + request.RequestContext.ClearRouteToLocation(); + + if (this.retryContext != null) + { + if (this.retryContext.RouteToHub) + { + request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); + } + else + { + // set location-based routing directive based on request retry context + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); + } + } + + // Resolve the endpoint for the request and pin the resolution to the resolved endpoint + // This enables marking the endpoint unavailability on endpoint failover/unreachability + this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); + request.RequestContext.RouteToLocation(this.locationEndpoint); + } + + private async Task ShouldRetryInternalAsync( + HttpStatusCode? statusCode, + SubStatusCodes? subStatusCode) + { + if (!statusCode.HasValue + && (!subStatusCode.HasValue + || subStatusCode.Value == SubStatusCodes.Unknown)) + { + return null; + } + + // Received request timeout + if (statusCode == HttpStatusCode.RequestTimeout) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition key range as unavailable to retry future request on a new region. + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + } + + // Received 403.3 on write region, initiate the endpoint rediscovery + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.WriteForbidden) + { + // It's a write forbidden so it safe to retry + if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest)) + { + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) + { + bool forceRefresh = false; + + if (this.retryContext != null && this.retryContext.RouteToHub) + { + forceRefresh = true; + + } + + ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: forceRefresh, + retryOnPreferredLocations: false, + overwriteEndpointDiscovery: true); + + if (retryResult.ShouldRetry) + { + this.retryContext.RouteToHub = true; + } + + return retryResult; + } + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: true, + retryOnPreferredLocations: false); + } + + // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.DatabaseAccountNotFound + && (this.isReadRequest || this.canUseMultipleWriteLocations)) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: false, + retryOnPreferredLocations: false); + } + + if (statusCode == HttpStatusCode.NotFound + && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) + { + return this.ShouldRetryOnSessionNotAvailable(); + } + + // Received 503 due to client connect timeout or Gateway + if (statusCode == HttpStatusCode.ServiceUnavailable + && ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode)) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition as unavailable. + // Let the ClientRetry logic decide if the request should be retried + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + + return this.ShouldRetryOnServiceUnavailable(); + } + + return null; + } + + private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode) + { + return subStatusCode == SubStatusCodes.Unknown || + (subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus()); + } + + private async Task ShouldRetryOnEndpointFailureAsync( + bool isReadRequest, + bool markBothReadAndWriteAsUnavailable, + bool forceRefresh, + bool retryOnPreferredLocations, + bool overwriteEndpointDiscovery = false) + { + if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", + this.failoverRetryCount, + this.locationEndpoint?.ToString() ?? string.Empty); + return ShouldRetryResult.NoRetry(); + } + + this.failoverRetryCount++; + + if (this.locationEndpoint != null && !overwriteEndpointDiscovery) + { + if (isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); + } + + if (!isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); + } + } + + TimeSpan retryDelay = TimeSpan.Zero; + if (!isReadRequest) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); + + if (this.failoverRetryCount > 1) + { + //if retried both endpoints, follow regular retry interval. + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + } + else + { + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + + await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); + + int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect + if (retryOnPreferredLocations) + { + retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list + } + + this.retryContext = new RetryContext + { + RetryLocationIndex = retryLocationIndex, + RetryRequestOnPreferredLocations = retryOnPreferredLocations, + }; + + return ShouldRetryResult.RetryAfter(retryDelay); + } + + private ShouldRetryResult ShouldRetryOnSessionNotAvailable() + { + this.sessionTokenRetryCount++; + + if (!this.enableEndpointDiscovery) + { + // if endpoint discovery is disabled, the request cannot be retried anywhere else + return ShouldRetryResult.NoRetry(); + } + else + { + if (this.canUseMultipleWriteLocations) + { + ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; + + if (this.sessionTokenRetryCount > endpoints.Count) + { + // When use multiple write locations is true and the request has been tried + // on all locations, then don't retry the request + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.sessionTokenRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + else + { + if (this.sessionTokenRetryCount > 1) + { + // When cannot use multiple write locations, then don't retry the request if + // we have already tried this request on the write location + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext + { + RetryLocationIndex = 0, + RetryRequestOnPreferredLocations = false + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + } + } + + /// + /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. + /// We try and retry the request only if there are other regions available. + /// + private ShouldRetryResult ShouldRetryOnServiceUnavailable() + { + if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) + { + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); + return ShouldRetryResult.NoRetry(); + } + + if (!this.canUseMultipleWriteLocations + && !this.isReadRequest) + { + // Write requests on single master cannot be retried, no other regions available + return ShouldRetryResult.NoRetry(); + } + + int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; + + if (availablePreferredLocations <= 1) + { + // No other regions to retry on + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); + return ShouldRetryResult.NoRetry(); + } + + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); + + // Retrying on second PreferredLocations + // RetryCount is used as zero-based index + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.serviceUnavailableRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + private sealed class RetryContext + { + public int RetryLocationIndex { get; set; } + public bool RetryRequestOnPreferredLocations { get; set; } + + public bool RouteToHub { get; set; } + } + } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index ee67522d6b..2c07f060f8 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - public bool EnablePartitionLevelFailover { get; set; } = false; + internal bool EnablePartitionLevelFailover { get; set; } = false; /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 6f07b7a52a..9c6308d8b6 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -1,668 +1,668 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos.Routing -{ - using System; - using System.Collections.Concurrent; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Globalization; - using System.Linq; - using System.Net; - using global::Azure.Core; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Documents; - - /// - /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount - /// with multiple writable and readable locations. - /// - internal sealed class LocationCache - { - private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; - private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - - private readonly bool enableEndpointDiscovery; - private readonly Uri defaultEndpoint; - private readonly bool useMultipleWriteLocations; - private readonly object lockObject; - private readonly TimeSpan unavailableLocationsExpirationTime; - private readonly int connectionLimit; - private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; - - private DatabaseAccountLocationsInfo locationInfo; - private DateTime lastCacheUpdateTimestamp; - private bool enableMultipleWriteLocations; - - public LocationCache( - ReadOnlyCollection preferredLocations, - Uri defaultEndpoint, - bool enableEndpointDiscovery, - int connectionLimit, - bool useMultipleWriteLocations) - { - this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); - this.defaultEndpoint = defaultEndpoint; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.useMultipleWriteLocations = useMultipleWriteLocations; - this.connectionLimit = connectionLimit; - - this.lockObject = new object(); - this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); - this.lastCacheUpdateTimestamp = DateTime.MinValue; - this.enableMultipleWriteLocations = false; - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - -#if !(NETSTANDARD15 || NETSTANDARD16) -#if NETSTANDARD20 - // GetEntryAssembly returns null when loaded from native netstandard2.0 - if (System.Reflection.Assembly.GetEntryAssembly() != null) - { -#endif - string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; - if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) - { - int unavailableLocationsExpirationTimeinSecondsConfigValue; - - if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - } - else - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); - } - } -#if NETSTANDARD20 - } -#endif -#endif - } - - /// - /// Gets list of read endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection ReadEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.ReadEndpoints; - } - } - - /// - /// Gets list of write endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection WriteEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.WriteEndpoints; - } - } - - /// - /// Returns the location corresponding to the endpoint if location specific endpoint is provided. - /// For the defaultEndPoint, we will return the first available write location. - /// Returns null, in other cases. - /// - /// - /// Today we return null for defaultEndPoint if multiple write locations can be used. - /// This needs to be modifed to figure out proper location in such case. - /// - public string GetLocation(Uri endpoint) - { - string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; - - if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) - { - if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) - { - return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; - } - } - - return location; - } - - /// - /// Set region name for a location if present in the locationcache otherwise set region name as null. - /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. - /// - /// - /// - /// true if region found else false - public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) - { - if (Uri.Compare( - endpoint, - this.defaultEndpoint, - UriComponents.Host, - UriFormat.SafeUnescaped, - StringComparison.OrdinalIgnoreCase) == 0) - { - regionName = null; - return false; - } - - regionName = this.GetLocation(endpoint); - return true; - } - - /// - /// Marks the current location unavailable for read - /// - public void MarkEndpointUnavailableForRead(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Read); - } - - /// - /// Marks the current location unavailable for write - /// - public void MarkEndpointUnavailableForWrite(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Write); - } - - /// - /// Invoked when is read - /// - /// Read DatabaseAccoaunt - public void OnDatabaseAccountRead(AccountProperties databaseAccount) - { - this.UpdateLocationCache( - databaseAccount.WritableRegions, - databaseAccount.ReadableRegions, - preferenceList: null, - enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); - } - - /// - /// Invoked when changes - /// - /// - public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) - { - this.UpdateLocationCache( - preferenceList: preferredLocations); - } - - public bool IsMetaData(DocumentServiceRequest request) - { - return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || - request.ResourceType != ResourceType.Document; - - } - public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) - { - return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 - && this.IsMetaData(request) - && this.CanUseMultipleWriteLocations(); - - } - - public Uri GetHubUri() - { - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; - Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - return locationEndpointToRoute; - } - - /// - /// Resolves request to service endpoint. - /// 1. If this is a write request - /// (a) If UseMultipleWriteLocations = true - /// (i) For document writes, resolve to most preferred and available write endpoint. - /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will - /// be retried on next preferred available write endpoint. - /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) - /// write endpoint in . - /// Endpoint of first write location in is the only endpoint that supports - /// write operation on all resource types (except during that region's failover). - /// Only during manual failover, client would retry write on second write location in . - /// (b) Else resolve the request to first write endpoint in OR - /// second write endpoint in in case of manual failover of that location. - /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) - /// - /// Request for which endpoint is to be resolved - /// Resolved endpoint - public Uri ResolveServiceEndpoint(DocumentServiceRequest request) - { - if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) - { - return request.RequestContext.LocationEndpointToRoute; - } - - int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); - - Uri locationEndpointToRoute = this.defaultEndpoint; - - if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? - || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) - { - // For non-document resource types in case of client can use multiple write locations - // or when client cannot use multiple write locations, flip-flop between the - // first and the second writable region in DatabaseAccount (for manual failover) - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) - { - locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); - string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; - locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - } - } - else - { - ReadOnlyCollection endpoints = this.ReadEndpoints; - locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; - } - - request.RequestContext.RouteToLocation(locationEndpointToRoute); - return locationEndpointToRoute; - } - - public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) - { - canRefreshInBackground = true; - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); - - // we should schedule refresh in background if we are unable to target the user's most preferredLocation. - if (this.enableEndpointDiscovery) - { - // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled - bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; - - ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; - - if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) - { - canRefreshInBackground = readLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", - readLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - - if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - Uri mostPreferredReadEndpoint; - - if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) - { - if (mostPreferredReadEndpoint != readLocationEndpoints[0]) - { - // For reads, we can always refresh in background as we can alternate to - // other available read endpoints - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); - return true; - } - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); - return true; - } - } - - Uri mostPreferredWriteEndpoint; - ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; - - if (!this.CanUseMultipleWriteLocations()) - { - if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) - { - // Since most preferred write endpoint is unavailable, we can only refresh in background if - // we have an alternate write endpoint - canRefreshInBackground = writeLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", - mostPreferredLocation, - writeLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - else - { - return shouldRefresh; - } - } - else if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) - { - shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); - return shouldRefresh; - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); - return true; - } - } - else - { - return shouldRefresh; - } - } - else - { - return false; - } - } - - public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) - { - return this.CanUseMultipleWriteLocations() && - (request.ResourceType == ResourceType.Document || - (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); - } - - private void ClearStaleEndpointUnavailabilityInfo() - { - if (this.locationUnavailablityInfoByEndpoint.Any()) - { - List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); - - foreach (Uri unavailableEndpoint in unavailableEndpoints) - { - LocationUnavailabilityInfo unavailabilityInfo; - LocationUnavailabilityInfo removed; - - if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) - && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) - { - DefaultTrace.TraceInformation( - "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", - unavailableEndpoint, - unavailabilityInfo.UnavailableOperations); - } - } - } - } - - private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) - { - LocationUnavailabilityInfo unavailabilityInfo; - - if (expectedAvailableOperations == OperationType.None - || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) - || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) - { - return false; - } - else - { - if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) - { - return false; - } - else - { - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", - endpoint, - unavailabilityInfo.UnavailableOperations); - // Unexpired entry present. Endpoint is unavailable - return true; - } - } - } - - private void MarkEndpointUnavailable( - Uri unavailableEndpoint, - OperationType unavailableOperationType) - { - DateTime currentTime = DateTime.UtcNow; - LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( - unavailableEndpoint, - (Uri endpoint) => - { - return new LocationUnavailabilityInfo() - { - LastUnavailabilityCheckTimeStamp = currentTime, - UnavailableOperations = unavailableOperationType, - }; - }, - (Uri endpoint, LocationUnavailabilityInfo info) => - { - info.LastUnavailabilityCheckTimeStamp = currentTime; - info.UnavailableOperations |= unavailableOperationType; - return info; - }); - - this.UpdateLocationCache(); - - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", - unavailableEndpoint, - unavailableOperationType, - updatedInfo.LastUnavailabilityCheckTimeStamp); - } - - private void UpdateLocationCache( - IEnumerable writeLocations = null, - IEnumerable readLocations = null, - ReadOnlyCollection preferenceList = null, - bool? enableMultipleWriteLocations = null) - { - lock (this.lockObject) - { - DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); - - if (preferenceList != null) - { - nextLocationInfo.PreferredLocations = preferenceList; - } - - if (enableMultipleWriteLocations.HasValue) - { - this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; - } - - this.ClearStaleEndpointUnavailabilityInfo(); - - if (readLocations != null) - { - ReadOnlyCollection availableReadLocations; - nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); - nextLocationInfo.AvailableReadLocations = availableReadLocations; - } - - if (writeLocations != null) - { - ReadOnlyCollection availableWriteLocations; - nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); - nextLocationInfo.AvailableWriteLocations = availableWriteLocations; - } - - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); - nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); - this.lastCacheUpdateTimestamp = DateTime.UtcNow; - - DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", - string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), - string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); - - this.locationInfo = nextLocationInfo; - } - } - - private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) - { - List endpoints = new List(); - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init - if (this.enableEndpointDiscovery) - { - if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) - { - List unavailableEndpoints = new List(); - - // When client can not use multiple write locations, preferred locations list should only be used - // determining read endpoints order. - // If client can use multiple write locations, preferred locations list should be used for determining - // both read and write endpoints order. - - foreach (string location in currentLocationInfo.PreferredLocations) - { - Uri endpoint; - if (endpointsByLocation.TryGetValue(location, out endpoint)) - { - if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) - { - unavailableEndpoints.Add(endpoint); - } - else - { - endpoints.Add(endpoint); - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - unavailableEndpoints.Remove(fallbackEndpoint); - } - - endpoints.AddRange(unavailableEndpoints); - } - else - { - foreach (string location in orderedLocations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location) && // location is empty during manual failover - endpointsByLocation.TryGetValue(location, out endpoint)) - { - endpoints.Add(endpoint); - } - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - } - - return endpoints.AsReadOnly(); - } - - private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) - { - Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); - List parsedLocations = new List(); - - foreach (AccountRegion location in locations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location.Name) - && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) - { - endpointsByLocation[location.Name] = endpoint; - parsedLocations.Add(location.Name); - this.SetServicePointConnectionLimit(endpoint); - } - else - { - DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", - location.Name, - location.Endpoint); - } - } - - orderedLocations = parsedLocations.AsReadOnly(); - return new ReadOnlyDictionary(endpointsByLocation); - } - - private bool CanUseMultipleWriteLocations() - { - return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; - } - - private void SetServicePointConnectionLimit(Uri endpoint) - { -#if !NETSTANDARD16 - ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); - servicePoint.ConnectionLimit = this.connectionLimit; -#endif - } - - private sealed class LocationUnavailabilityInfo - { - public DateTime LastUnavailabilityCheckTimeStamp { get; set; } - public OperationType UnavailableOperations { get; set; } - } - - private sealed class DatabaseAccountLocationsInfo - { - public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) - { - this.PreferredLocations = preferredLocations; - this.AvailableWriteLocations = new List().AsReadOnly(); - this.AvailableReadLocations = new List().AsReadOnly(); - this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - } - - public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) - { - this.PreferredLocations = other.PreferredLocations; - this.AvailableWriteLocations = other.AvailableWriteLocations; - this.AvailableReadLocations = other.AvailableReadLocations; - this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; - this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; - this.WriteEndpoints = other.WriteEndpoints; - this.ReadEndpoints = other.ReadEndpoints; - } - - public ReadOnlyCollection PreferredLocations { get; set; } - public ReadOnlyCollection AvailableWriteLocations { get; set; } - public ReadOnlyCollection AvailableReadLocations { get; set; } - public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } - public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } - public ReadOnlyCollection WriteEndpoints { get; set; } - public ReadOnlyCollection ReadEndpoints { get; set; } - } - - [Flags] - private enum OperationType - { - None = 0x0, - Read = 0x1, - Write = 0x2 - } - } -} +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Routing +{ + using System; + using System.Collections.Concurrent; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Globalization; + using System.Linq; + using System.Net; + using global::Azure.Core; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Documents; + + /// + /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount + /// with multiple writable and readable locations. + /// + internal sealed class LocationCache + { + private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; + private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; + + private readonly bool enableEndpointDiscovery; + private readonly Uri defaultEndpoint; + private readonly bool useMultipleWriteLocations; + private readonly object lockObject; + private readonly TimeSpan unavailableLocationsExpirationTime; + private readonly int connectionLimit; + private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; + + private DatabaseAccountLocationsInfo locationInfo; + private DateTime lastCacheUpdateTimestamp; + private bool enableMultipleWriteLocations; + + public LocationCache( + ReadOnlyCollection preferredLocations, + Uri defaultEndpoint, + bool enableEndpointDiscovery, + int connectionLimit, + bool useMultipleWriteLocations) + { + this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); + this.defaultEndpoint = defaultEndpoint; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.useMultipleWriteLocations = useMultipleWriteLocations; + this.connectionLimit = connectionLimit; + + this.lockObject = new object(); + this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); + this.lastCacheUpdateTimestamp = DateTime.MinValue; + this.enableMultipleWriteLocations = false; + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + +#if !(NETSTANDARD15 || NETSTANDARD16) +#if NETSTANDARD20 + // GetEntryAssembly returns null when loaded from native netstandard2.0 + if (System.Reflection.Assembly.GetEntryAssembly() != null) + { +#endif + string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; + if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) + { + int unavailableLocationsExpirationTimeinSecondsConfigValue; + + if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + } + else + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); + } + } +#if NETSTANDARD20 + } +#endif +#endif + } + + /// + /// Gets list of read endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection ReadEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.ReadEndpoints; + } + } + + /// + /// Gets list of write endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection WriteEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.WriteEndpoints; + } + } + + /// + /// Returns the location corresponding to the endpoint if location specific endpoint is provided. + /// For the defaultEndPoint, we will return the first available write location. + /// Returns null, in other cases. + /// + /// + /// Today we return null for defaultEndPoint if multiple write locations can be used. + /// This needs to be modifed to figure out proper location in such case. + /// + public string GetLocation(Uri endpoint) + { + string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; + + if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) + { + if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) + { + return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; + } + } + + return location; + } + + /// + /// Set region name for a location if present in the locationcache otherwise set region name as null. + /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. + /// + /// + /// + /// true if region found else false + public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) + { + if (Uri.Compare( + endpoint, + this.defaultEndpoint, + UriComponents.Host, + UriFormat.SafeUnescaped, + StringComparison.OrdinalIgnoreCase) == 0) + { + regionName = null; + return false; + } + + regionName = this.GetLocation(endpoint); + return true; + } + + /// + /// Marks the current location unavailable for read + /// + public void MarkEndpointUnavailableForRead(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Read); + } + + /// + /// Marks the current location unavailable for write + /// + public void MarkEndpointUnavailableForWrite(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Write); + } + + /// + /// Invoked when is read + /// + /// Read DatabaseAccoaunt + public void OnDatabaseAccountRead(AccountProperties databaseAccount) + { + this.UpdateLocationCache( + databaseAccount.WritableRegions, + databaseAccount.ReadableRegions, + preferenceList: null, + enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); + } + + /// + /// Invoked when changes + /// + /// + public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) + { + this.UpdateLocationCache( + preferenceList: preferredLocations); + } + + public bool IsMetaData(DocumentServiceRequest request) + { + return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || + request.ResourceType != ResourceType.Document; + + } + public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) + { + return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 + && this.IsMetaData(request) + && this.CanUseMultipleWriteLocations(); + + } + + public Uri GetHubUri() + { + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; + Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + return locationEndpointToRoute; + } + + /// + /// Resolves request to service endpoint. + /// 1. If this is a write request + /// (a) If UseMultipleWriteLocations = true + /// (i) For document writes, resolve to most preferred and available write endpoint. + /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will + /// be retried on next preferred available write endpoint. + /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) + /// write endpoint in . + /// Endpoint of first write location in is the only endpoint that supports + /// write operation on all resource types (except during that region's failover). + /// Only during manual failover, client would retry write on second write location in . + /// (b) Else resolve the request to first write endpoint in OR + /// second write endpoint in in case of manual failover of that location. + /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) + /// + /// Request for which endpoint is to be resolved + /// Resolved endpoint + public Uri ResolveServiceEndpoint(DocumentServiceRequest request) + { + if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) + { + return request.RequestContext.LocationEndpointToRoute; + } + + int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); + + Uri locationEndpointToRoute = this.defaultEndpoint; + + if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? + || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) + { + // For non-document resource types in case of client can use multiple write locations + // or when client cannot use multiple write locations, flip-flop between the + // first and the second writable region in DatabaseAccount (for manual failover) + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) + { + locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); + string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; + locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + } + } + else + { + ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; + locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; + } + + request.RequestContext.RouteToLocation(locationEndpointToRoute); + return locationEndpointToRoute; + } + + public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) + { + canRefreshInBackground = true; + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); + + // we should schedule refresh in background if we are unable to target the user's most preferredLocation. + if (this.enableEndpointDiscovery) + { + // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled + bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; + + ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; + + if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) + { + canRefreshInBackground = readLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", + readLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + + if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + Uri mostPreferredReadEndpoint; + + if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) + { + if (mostPreferredReadEndpoint != readLocationEndpoints[0]) + { + // For reads, we can always refresh in background as we can alternate to + // other available read endpoints + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); + return true; + } + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); + return true; + } + } + + Uri mostPreferredWriteEndpoint; + ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; + + if (!this.CanUseMultipleWriteLocations()) + { + if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) + { + // Since most preferred write endpoint is unavailable, we can only refresh in background if + // we have an alternate write endpoint + canRefreshInBackground = writeLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", + mostPreferredLocation, + writeLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + else + { + return shouldRefresh; + } + } + else if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) + { + shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); + return shouldRefresh; + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); + return true; + } + } + else + { + return shouldRefresh; + } + } + else + { + return false; + } + } + + public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) + { + return this.CanUseMultipleWriteLocations() && + (request.ResourceType == ResourceType.Document || + (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); + } + + private void ClearStaleEndpointUnavailabilityInfo() + { + if (this.locationUnavailablityInfoByEndpoint.Any()) + { + List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); + + foreach (Uri unavailableEndpoint in unavailableEndpoints) + { + LocationUnavailabilityInfo unavailabilityInfo; + LocationUnavailabilityInfo removed; + + if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) + && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) + { + DefaultTrace.TraceInformation( + "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", + unavailableEndpoint, + unavailabilityInfo.UnavailableOperations); + } + } + } + } + + private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) + { + LocationUnavailabilityInfo unavailabilityInfo; + + if (expectedAvailableOperations == OperationType.None + || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) + || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) + { + return false; + } + else + { + if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) + { + return false; + } + else + { + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", + endpoint, + unavailabilityInfo.UnavailableOperations); + // Unexpired entry present. Endpoint is unavailable + return true; + } + } + } + + private void MarkEndpointUnavailable( + Uri unavailableEndpoint, + OperationType unavailableOperationType) + { + DateTime currentTime = DateTime.UtcNow; + LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( + unavailableEndpoint, + (Uri endpoint) => + { + return new LocationUnavailabilityInfo() + { + LastUnavailabilityCheckTimeStamp = currentTime, + UnavailableOperations = unavailableOperationType, + }; + }, + (Uri endpoint, LocationUnavailabilityInfo info) => + { + info.LastUnavailabilityCheckTimeStamp = currentTime; + info.UnavailableOperations |= unavailableOperationType; + return info; + }); + + this.UpdateLocationCache(); + + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", + unavailableEndpoint, + unavailableOperationType, + updatedInfo.LastUnavailabilityCheckTimeStamp); + } + + private void UpdateLocationCache( + IEnumerable writeLocations = null, + IEnumerable readLocations = null, + ReadOnlyCollection preferenceList = null, + bool? enableMultipleWriteLocations = null) + { + lock (this.lockObject) + { + DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); + + if (preferenceList != null) + { + nextLocationInfo.PreferredLocations = preferenceList; + } + + if (enableMultipleWriteLocations.HasValue) + { + this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; + } + + this.ClearStaleEndpointUnavailabilityInfo(); + + if (readLocations != null) + { + ReadOnlyCollection availableReadLocations; + nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); + nextLocationInfo.AvailableReadLocations = availableReadLocations; + } + + if (writeLocations != null) + { + ReadOnlyCollection availableWriteLocations; + nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); + nextLocationInfo.AvailableWriteLocations = availableWriteLocations; + } + + nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); + nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); + this.lastCacheUpdateTimestamp = DateTime.UtcNow; + + DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", + string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), + string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); + + this.locationInfo = nextLocationInfo; + } + } + + private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) + { + List endpoints = new List(); + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init + if (this.enableEndpointDiscovery) + { + if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) + { + List unavailableEndpoints = new List(); + + // When client can not use multiple write locations, preferred locations list should only be used + // determining read endpoints order. + // If client can use multiple write locations, preferred locations list should be used for determining + // both read and write endpoints order. + + foreach (string location in currentLocationInfo.PreferredLocations) + { + Uri endpoint; + if (endpointsByLocation.TryGetValue(location, out endpoint)) + { + if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) + { + unavailableEndpoints.Add(endpoint); + } + else + { + endpoints.Add(endpoint); + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + unavailableEndpoints.Remove(fallbackEndpoint); + } + + endpoints.AddRange(unavailableEndpoints); + } + else + { + foreach (string location in orderedLocations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location) && // location is empty during manual failover + endpointsByLocation.TryGetValue(location, out endpoint)) + { + endpoints.Add(endpoint); + } + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + } + + return endpoints.AsReadOnly(); + } + + private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) + { + Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); + List parsedLocations = new List(); + + foreach (AccountRegion location in locations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location.Name) + && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) + { + endpointsByLocation[location.Name] = endpoint; + parsedLocations.Add(location.Name); + this.SetServicePointConnectionLimit(endpoint); + } + else + { + DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", + location.Name, + location.Endpoint); + } + } + + orderedLocations = parsedLocations.AsReadOnly(); + return new ReadOnlyDictionary(endpointsByLocation); + } + + private bool CanUseMultipleWriteLocations() + { + return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; + } + + private void SetServicePointConnectionLimit(Uri endpoint) + { +#if !NETSTANDARD16 + ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); + servicePoint.ConnectionLimit = this.connectionLimit; +#endif + } + + private sealed class LocationUnavailabilityInfo + { + public DateTime LastUnavailabilityCheckTimeStamp { get; set; } + public OperationType UnavailableOperations { get; set; } + } + + private sealed class DatabaseAccountLocationsInfo + { + public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) + { + this.PreferredLocations = preferredLocations; + this.AvailableWriteLocations = new List().AsReadOnly(); + this.AvailableReadLocations = new List().AsReadOnly(); + this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + } + + public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) + { + this.PreferredLocations = other.PreferredLocations; + this.AvailableWriteLocations = other.AvailableWriteLocations; + this.AvailableReadLocations = other.AvailableReadLocations; + this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; + this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; + this.WriteEndpoints = other.WriteEndpoints; + this.ReadEndpoints = other.ReadEndpoints; + } + + public ReadOnlyCollection PreferredLocations { get; set; } + public ReadOnlyCollection AvailableWriteLocations { get; set; } + public ReadOnlyCollection AvailableReadLocations { get; set; } + public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } + public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } + public ReadOnlyCollection WriteEndpoints { get; set; } + public ReadOnlyCollection ReadEndpoints { get; set; } + } + + [Flags] + private enum OperationType + { + None = 0x0, + Read = 0x1, + Write = 0x2 + } + } +} From fdffdd80ce2af9588272cd0e26c2cb91d16b9c34 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Thu, 7 Sep 2023 12:04:14 -0700 Subject: [PATCH 03/31] Code changes to clean up the PPAF retry logic fix. --- .../src/ClientRetryPolicy.cs | 18 ++---------------- .../src/Routing/LocationCache.cs | 2 +- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2933baa1a9..5bc91aebd8 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -247,8 +247,7 @@ private async Task ShouldRetryInternalAsync( } // Received 503 due to client connect timeout or Gateway - if (statusCode == HttpStatusCode.ServiceUnavailable - && ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode)) + if (statusCode == HttpStatusCode.ServiceUnavailable) { DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, @@ -265,12 +264,6 @@ private async Task ShouldRetryInternalAsync( return null; } - private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode) - { - return subStatusCode == SubStatusCodes.Unknown || - (subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus()); - } - private async Task ShouldRetryOnEndpointFailureAsync( bool isReadRequest, bool markBothReadAndWriteAsUnavailable, @@ -390,7 +383,7 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable() /// /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. - /// We try and retry the request only if there are other regions available. + /// We try and retry the request only if there are other regions available. The retry logic is applicable for single master write accounts as well. /// private ShouldRetryResult ShouldRetryOnServiceUnavailable() { @@ -400,13 +393,6 @@ private ShouldRetryResult ShouldRetryOnServiceUnavailable() return ShouldRetryResult.NoRetry(); } - if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available - return ShouldRetryResult.NoRetry(); - } - int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; if (availablePreferredLocations <= 1) diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 9c6308d8b6..69fc57ecc4 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -276,7 +276,7 @@ public Uri ResolveServiceEndpoint(DocumentServiceRequest request) } else { - ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; + ReadOnlyCollection endpoints = this.ReadEndpoints; locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; } From 9cd3d97240bb1d926a04eb857bb5efe4d62e89e4 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 6 Sep 2023 17:44:32 -0700 Subject: [PATCH 04/31] Code changes to add retry logic for GW returned 503.9002. --- .../src/ClientRetryPolicy.cs | 873 ++++++----- .../src/CosmosClientOptions.cs | 2 +- .../src/Routing/LocationCache.cs | 1336 ++++++++--------- 3 files changed, 1103 insertions(+), 1108 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2933baa1a9..2f007c6fbf 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -1,440 +1,435 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos -{ - using System; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Net; - using System.Net.Http; - using System.Threading; - using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Cosmos.Routing; - using Microsoft.Azure.Documents; - - /// - /// Client policy is combination of endpoint change retry + throttling retry. - /// - internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy - { - private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. - private const int MaxRetryCount = 120; - private const int MaxServiceUnavailableRetryCount = 1; - - private readonly IDocumentClientRetryPolicy throttlingRetry; - private readonly GlobalEndpointManager globalEndpointManager; - private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; - private readonly bool enableEndpointDiscovery; - private int failoverRetryCount; - - private int sessionTokenRetryCount; - private int serviceUnavailableRetryCount; - private bool isReadRequest; - private bool canUseMultipleWriteLocations; - private Uri locationEndpoint; - private RetryContext retryContext; - private DocumentServiceRequest documentServiceRequest; - - public ClientRetryPolicy( - GlobalEndpointManager globalEndpointManager, - GlobalPartitionEndpointManager partitionKeyRangeLocationCache, - bool enableEndpointDiscovery, - RetryOptions retryOptions) - { - this.throttlingRetry = new ResourceThrottleRetryPolicy( - retryOptions.MaxRetryAttemptsOnThrottledRequests, - retryOptions.MaxRetryWaitTimeInSeconds); - - this.globalEndpointManager = globalEndpointManager; - this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; - this.failoverRetryCount = 0; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.sessionTokenRetryCount = 0; - this.serviceUnavailableRetryCount = 0; - this.canUseMultipleWriteLocations = false; - } - - /// - /// Should the caller retry the operation. - /// - /// Exception that occurred when the operation was tried - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - Exception exception, - CancellationToken cancellationToken) - { - this.retryContext = null; - // Received Connection error (HttpRequestException), initiate the endpoint rediscovery - if (exception is HttpRequestException _) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark both read and write requests because it gateway exception. - // This means all requests going to the region will fail. - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: true, - forceRefresh: false, - retryOnPreferredLocations: true); - } - - if (exception is DocumentClientException clientException) - { - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - clientException?.StatusCode, - clientException?.GetSubStatus()); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - } - - return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); - } - - /// - /// Should the caller retry the operation. - /// - /// in return of the request - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - ResponseMessage cosmosResponseMessage, - CancellationToken cancellationToken) - { - this.retryContext = null; - - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - cosmosResponseMessage?.StatusCode, - cosmosResponseMessage?.Headers.SubStatusCode); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - - return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); - } - - /// - /// Method that is called before a request is sent to allow the retry policy implementation - /// to modify the state of the request. - /// - /// The request being sent to the service. - public void OnBeforeSendRequest(DocumentServiceRequest request) - { - this.isReadRequest = request.IsReadOnlyRequest; - this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); - this.documentServiceRequest = request; - - // clear previous location-based routing directive - request.RequestContext.ClearRouteToLocation(); - - if (this.retryContext != null) - { - if (this.retryContext.RouteToHub) - { - request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); - } - else - { - // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); - } - } - - // Resolve the endpoint for the request and pin the resolution to the resolved endpoint - // This enables marking the endpoint unavailability on endpoint failover/unreachability - this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); - request.RequestContext.RouteToLocation(this.locationEndpoint); - } - - private async Task ShouldRetryInternalAsync( - HttpStatusCode? statusCode, - SubStatusCodes? subStatusCode) - { - if (!statusCode.HasValue - && (!subStatusCode.HasValue - || subStatusCode.Value == SubStatusCodes.Unknown)) - { - return null; - } - - // Received request timeout - if (statusCode == HttpStatusCode.RequestTimeout) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition key range as unavailable to retry future request on a new region. - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - } - - // Received 403.3 on write region, initiate the endpoint rediscovery - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.WriteForbidden) - { - // It's a write forbidden so it safe to retry - if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest)) - { - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) - { - bool forceRefresh = false; - - if (this.retryContext != null && this.retryContext.RouteToHub) - { - forceRefresh = true; - - } - - ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: forceRefresh, - retryOnPreferredLocations: false, - overwriteEndpointDiscovery: true); - - if (retryResult.ShouldRetry) - { - this.retryContext.RouteToHub = true; - } - - return retryResult; - } - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: true, - retryOnPreferredLocations: false); - } - - // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.DatabaseAccountNotFound - && (this.isReadRequest || this.canUseMultipleWriteLocations)) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: false, - retryOnPreferredLocations: false); - } - - if (statusCode == HttpStatusCode.NotFound - && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { - return this.ShouldRetryOnSessionNotAvailable(); - } - - // Received 503 due to client connect timeout or Gateway - if (statusCode == HttpStatusCode.ServiceUnavailable - && ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode)) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition as unavailable. - // Let the ClientRetry logic decide if the request should be retried - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - - return this.ShouldRetryOnServiceUnavailable(); - } - - return null; - } - - private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode) - { - return subStatusCode == SubStatusCodes.Unknown || - (subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus()); - } - - private async Task ShouldRetryOnEndpointFailureAsync( - bool isReadRequest, - bool markBothReadAndWriteAsUnavailable, - bool forceRefresh, - bool retryOnPreferredLocations, - bool overwriteEndpointDiscovery = false) - { - if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", - this.failoverRetryCount, - this.locationEndpoint?.ToString() ?? string.Empty); - return ShouldRetryResult.NoRetry(); - } - - this.failoverRetryCount++; - - if (this.locationEndpoint != null && !overwriteEndpointDiscovery) - { - if (isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); - } - - if (!isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); - } - } - - TimeSpan retryDelay = TimeSpan.Zero; - if (!isReadRequest) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); - - if (this.failoverRetryCount > 1) - { - //if retried both endpoints, follow regular retry interval. - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - } - else - { - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - - await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); - - int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect - if (retryOnPreferredLocations) - { - retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list - } - - this.retryContext = new RetryContext - { - RetryLocationIndex = retryLocationIndex, - RetryRequestOnPreferredLocations = retryOnPreferredLocations, - }; - - return ShouldRetryResult.RetryAfter(retryDelay); - } - - private ShouldRetryResult ShouldRetryOnSessionNotAvailable() - { - this.sessionTokenRetryCount++; - - if (!this.enableEndpointDiscovery) - { - // if endpoint discovery is disabled, the request cannot be retried anywhere else - return ShouldRetryResult.NoRetry(); - } - else - { - if (this.canUseMultipleWriteLocations) - { - ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; - - if (this.sessionTokenRetryCount > endpoints.Count) - { - // When use multiple write locations is true and the request has been tried - // on all locations, then don't retry the request - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.sessionTokenRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - else - { - if (this.sessionTokenRetryCount > 1) - { - // When cannot use multiple write locations, then don't retry the request if - // we have already tried this request on the write location - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext - { - RetryLocationIndex = 0, - RetryRequestOnPreferredLocations = false - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - } - } - - /// - /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. - /// We try and retry the request only if there are other regions available. - /// - private ShouldRetryResult ShouldRetryOnServiceUnavailable() - { - if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) - { - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); - return ShouldRetryResult.NoRetry(); - } - - if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available - return ShouldRetryResult.NoRetry(); - } - - int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; - - if (availablePreferredLocations <= 1) - { - // No other regions to retry on - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); - return ShouldRetryResult.NoRetry(); - } - - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); - - // Retrying on second PreferredLocations - // RetryCount is used as zero-based index - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.serviceUnavailableRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - private sealed class RetryContext - { - public int RetryLocationIndex { get; set; } - public bool RetryRequestOnPreferredLocations { get; set; } - - public bool RouteToHub { get; set; } - } - } +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Net; + using System.Net.Http; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Documents; + + /// + /// Client policy is combination of endpoint change retry + throttling retry. + /// + internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy + { + private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. + private const int MaxRetryCount = 120; + private const int MaxServiceUnavailableRetryCount = 1; + + private readonly IDocumentClientRetryPolicy throttlingRetry; + private readonly GlobalEndpointManager globalEndpointManager; + private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; + private readonly bool enableEndpointDiscovery; + private int failoverRetryCount; + + private int sessionTokenRetryCount; + private int serviceUnavailableRetryCount; + private bool isReadRequest; + private bool canUseMultipleWriteLocations; + private Uri locationEndpoint; + private RetryContext retryContext; + private DocumentServiceRequest documentServiceRequest; + + public ClientRetryPolicy( + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManager partitionKeyRangeLocationCache, + bool enableEndpointDiscovery, + RetryOptions retryOptions) + { + this.throttlingRetry = new ResourceThrottleRetryPolicy( + retryOptions.MaxRetryAttemptsOnThrottledRequests, + retryOptions.MaxRetryWaitTimeInSeconds); + + this.globalEndpointManager = globalEndpointManager; + this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; + this.failoverRetryCount = 0; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.sessionTokenRetryCount = 0; + this.serviceUnavailableRetryCount = 0; + this.canUseMultipleWriteLocations = false; + } + + /// + /// Should the caller retry the operation. + /// + /// Exception that occurred when the operation was tried + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + Exception exception, + CancellationToken cancellationToken) + { + this.retryContext = null; + // Received Connection error (HttpRequestException), initiate the endpoint rediscovery + if (exception is HttpRequestException _) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark both read and write requests because it gateway exception. + // This means all requests going to the region will fail. + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: true, + forceRefresh: false, + retryOnPreferredLocations: true); + } + + if (exception is DocumentClientException clientException) + { + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + clientException?.StatusCode, + clientException?.GetSubStatus()); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + } + + return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); + } + + /// + /// Should the caller retry the operation. + /// + /// in return of the request + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + ResponseMessage cosmosResponseMessage, + CancellationToken cancellationToken) + { + this.retryContext = null; + + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + cosmosResponseMessage?.StatusCode, + cosmosResponseMessage?.Headers.SubStatusCode); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + + return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); + } + + /// + /// Method that is called before a request is sent to allow the retry policy implementation + /// to modify the state of the request. + /// + /// The request being sent to the service. + public void OnBeforeSendRequest(DocumentServiceRequest request) + { + this.isReadRequest = request.IsReadOnlyRequest; + this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); + this.documentServiceRequest = request; + + // clear previous location-based routing directive + request.RequestContext.ClearRouteToLocation(); + + if (this.retryContext != null) + { + if (this.retryContext.RouteToHub) + { + request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); + } + else + { + // set location-based routing directive based on request retry context + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); + } + } + + // Resolve the endpoint for the request and pin the resolution to the resolved endpoint + // This enables marking the endpoint unavailability on endpoint failover/unreachability + this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); + request.RequestContext.RouteToLocation(this.locationEndpoint); + } + + private async Task ShouldRetryInternalAsync( + HttpStatusCode? statusCode, + SubStatusCodes? subStatusCode) + { + if (!statusCode.HasValue + && (!subStatusCode.HasValue + || subStatusCode.Value == SubStatusCodes.Unknown)) + { + return null; + } + + // Console.WriteLine("Status Code: " + statusCode.Value + "Sub Status Code: " + subStatusCode.Value + "IsRead Request: " + this.isReadRequest); + + // Received request timeout + if (statusCode == HttpStatusCode.RequestTimeout) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition key range as unavailable to retry future request on a new region. + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + } + + // Received 403.3 on write region, initiate the endpoint rediscovery + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.WriteForbidden) + { + // It's a write forbidden so it safe to retry + if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest)) + { + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) + { + bool forceRefresh = false; + + if (this.retryContext != null && this.retryContext.RouteToHub) + { + forceRefresh = true; + + } + + ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: forceRefresh, + retryOnPreferredLocations: false, + overwriteEndpointDiscovery: true); + + if (retryResult.ShouldRetry) + { + this.retryContext.RouteToHub = true; + } + + return retryResult; + } + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: true, + retryOnPreferredLocations: false); + } + + // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.DatabaseAccountNotFound + && (this.isReadRequest || this.canUseMultipleWriteLocations)) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: false, + retryOnPreferredLocations: false); + } + + if (statusCode == HttpStatusCode.NotFound + && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) + { + return this.ShouldRetryOnSessionNotAvailable(); + } + + // Received 503 due to client connect timeout or Gateway + if (statusCode == HttpStatusCode.ServiceUnavailable) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition as unavailable. + // Let the ClientRetry logic decide if the request should be retried + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + + return this.ShouldRetryOnServiceUnavailable(); + } + + return null; + } + + private async Task ShouldRetryOnEndpointFailureAsync( + bool isReadRequest, + bool markBothReadAndWriteAsUnavailable, + bool forceRefresh, + bool retryOnPreferredLocations, + bool overwriteEndpointDiscovery = false) + { + if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", + this.failoverRetryCount, + this.locationEndpoint?.ToString() ?? string.Empty); + return ShouldRetryResult.NoRetry(); + } + + this.failoverRetryCount++; + + if (this.locationEndpoint != null && !overwriteEndpointDiscovery) + { + if (isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); + } + + if (!isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); + } + } + + TimeSpan retryDelay = TimeSpan.Zero; + if (!isReadRequest) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); + + if (this.failoverRetryCount > 1) + { + //if retried both endpoints, follow regular retry interval. + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + } + else + { + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + + await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); + + int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect + if (retryOnPreferredLocations) + { + retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list + } + + this.retryContext = new RetryContext + { + RetryLocationIndex = retryLocationIndex, + RetryRequestOnPreferredLocations = retryOnPreferredLocations, + }; + + return ShouldRetryResult.RetryAfter(retryDelay); + } + + private ShouldRetryResult ShouldRetryOnSessionNotAvailable() + { + this.sessionTokenRetryCount++; + + if (!this.enableEndpointDiscovery) + { + // if endpoint discovery is disabled, the request cannot be retried anywhere else + return ShouldRetryResult.NoRetry(); + } + else + { + if (this.canUseMultipleWriteLocations) + { + ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; + + if (this.sessionTokenRetryCount > endpoints.Count) + { + // When use multiple write locations is true and the request has been tried + // on all locations, then don't retry the request + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.sessionTokenRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + else + { + if (this.sessionTokenRetryCount > 1) + { + // When cannot use multiple write locations, then don't retry the request if + // we have already tried this request on the write location + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext + { + RetryLocationIndex = 0, + RetryRequestOnPreferredLocations = false + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + } + } + + /// + /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. + /// We try and retry the request only if there are other regions available. + /// + private ShouldRetryResult ShouldRetryOnServiceUnavailable() + { + if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) + { + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); + return ShouldRetryResult.NoRetry(); + } + + /*if (!this.canUseMultipleWriteLocations + && !this.isReadRequest) + { + // Write requests on single master cannot be retried, no other regions available + return ShouldRetryResult.NoRetry(); + }*/ + + int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; + + if (availablePreferredLocations <= 1) + { + // No other regions to retry on + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); + return ShouldRetryResult.NoRetry(); + } + + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); + + // Retrying on second PreferredLocations + // RetryCount is used as zero-based index + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.serviceUnavailableRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + private sealed class RetryContext + { + public int RetryLocationIndex { get; set; } + public bool RetryRequestOnPreferredLocations { get; set; } + + public bool RouteToHub { get; set; } + } + } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 2c07f060f8..ee67522d6b 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - internal bool EnablePartitionLevelFailover { get; set; } = false; + public bool EnablePartitionLevelFailover { get; set; } = false; /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 9c6308d8b6..6f07b7a52a 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -1,668 +1,668 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos.Routing -{ - using System; - using System.Collections.Concurrent; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Globalization; - using System.Linq; - using System.Net; - using global::Azure.Core; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Documents; - - /// - /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount - /// with multiple writable and readable locations. - /// - internal sealed class LocationCache - { - private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; - private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - - private readonly bool enableEndpointDiscovery; - private readonly Uri defaultEndpoint; - private readonly bool useMultipleWriteLocations; - private readonly object lockObject; - private readonly TimeSpan unavailableLocationsExpirationTime; - private readonly int connectionLimit; - private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; - - private DatabaseAccountLocationsInfo locationInfo; - private DateTime lastCacheUpdateTimestamp; - private bool enableMultipleWriteLocations; - - public LocationCache( - ReadOnlyCollection preferredLocations, - Uri defaultEndpoint, - bool enableEndpointDiscovery, - int connectionLimit, - bool useMultipleWriteLocations) - { - this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); - this.defaultEndpoint = defaultEndpoint; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.useMultipleWriteLocations = useMultipleWriteLocations; - this.connectionLimit = connectionLimit; - - this.lockObject = new object(); - this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); - this.lastCacheUpdateTimestamp = DateTime.MinValue; - this.enableMultipleWriteLocations = false; - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - -#if !(NETSTANDARD15 || NETSTANDARD16) -#if NETSTANDARD20 - // GetEntryAssembly returns null when loaded from native netstandard2.0 - if (System.Reflection.Assembly.GetEntryAssembly() != null) - { -#endif - string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; - if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) - { - int unavailableLocationsExpirationTimeinSecondsConfigValue; - - if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - } - else - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); - } - } -#if NETSTANDARD20 - } -#endif -#endif - } - - /// - /// Gets list of read endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection ReadEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.ReadEndpoints; - } - } - - /// - /// Gets list of write endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection WriteEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.WriteEndpoints; - } - } - - /// - /// Returns the location corresponding to the endpoint if location specific endpoint is provided. - /// For the defaultEndPoint, we will return the first available write location. - /// Returns null, in other cases. - /// - /// - /// Today we return null for defaultEndPoint if multiple write locations can be used. - /// This needs to be modifed to figure out proper location in such case. - /// - public string GetLocation(Uri endpoint) - { - string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; - - if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) - { - if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) - { - return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; - } - } - - return location; - } - - /// - /// Set region name for a location if present in the locationcache otherwise set region name as null. - /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. - /// - /// - /// - /// true if region found else false - public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) - { - if (Uri.Compare( - endpoint, - this.defaultEndpoint, - UriComponents.Host, - UriFormat.SafeUnescaped, - StringComparison.OrdinalIgnoreCase) == 0) - { - regionName = null; - return false; - } - - regionName = this.GetLocation(endpoint); - return true; - } - - /// - /// Marks the current location unavailable for read - /// - public void MarkEndpointUnavailableForRead(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Read); - } - - /// - /// Marks the current location unavailable for write - /// - public void MarkEndpointUnavailableForWrite(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Write); - } - - /// - /// Invoked when is read - /// - /// Read DatabaseAccoaunt - public void OnDatabaseAccountRead(AccountProperties databaseAccount) - { - this.UpdateLocationCache( - databaseAccount.WritableRegions, - databaseAccount.ReadableRegions, - preferenceList: null, - enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); - } - - /// - /// Invoked when changes - /// - /// - public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) - { - this.UpdateLocationCache( - preferenceList: preferredLocations); - } - - public bool IsMetaData(DocumentServiceRequest request) - { - return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || - request.ResourceType != ResourceType.Document; - - } - public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) - { - return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 - && this.IsMetaData(request) - && this.CanUseMultipleWriteLocations(); - - } - - public Uri GetHubUri() - { - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; - Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - return locationEndpointToRoute; - } - - /// - /// Resolves request to service endpoint. - /// 1. If this is a write request - /// (a) If UseMultipleWriteLocations = true - /// (i) For document writes, resolve to most preferred and available write endpoint. - /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will - /// be retried on next preferred available write endpoint. - /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) - /// write endpoint in . - /// Endpoint of first write location in is the only endpoint that supports - /// write operation on all resource types (except during that region's failover). - /// Only during manual failover, client would retry write on second write location in . - /// (b) Else resolve the request to first write endpoint in OR - /// second write endpoint in in case of manual failover of that location. - /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) - /// - /// Request for which endpoint is to be resolved - /// Resolved endpoint - public Uri ResolveServiceEndpoint(DocumentServiceRequest request) - { - if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) - { - return request.RequestContext.LocationEndpointToRoute; - } - - int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); - - Uri locationEndpointToRoute = this.defaultEndpoint; - - if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? - || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) - { - // For non-document resource types in case of client can use multiple write locations - // or when client cannot use multiple write locations, flip-flop between the - // first and the second writable region in DatabaseAccount (for manual failover) - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) - { - locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); - string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; - locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - } - } - else - { - ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; - locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; - } - - request.RequestContext.RouteToLocation(locationEndpointToRoute); - return locationEndpointToRoute; - } - - public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) - { - canRefreshInBackground = true; - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); - - // we should schedule refresh in background if we are unable to target the user's most preferredLocation. - if (this.enableEndpointDiscovery) - { - // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled - bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; - - ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; - - if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) - { - canRefreshInBackground = readLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", - readLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - - if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - Uri mostPreferredReadEndpoint; - - if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) - { - if (mostPreferredReadEndpoint != readLocationEndpoints[0]) - { - // For reads, we can always refresh in background as we can alternate to - // other available read endpoints - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); - return true; - } - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); - return true; - } - } - - Uri mostPreferredWriteEndpoint; - ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; - - if (!this.CanUseMultipleWriteLocations()) - { - if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) - { - // Since most preferred write endpoint is unavailable, we can only refresh in background if - // we have an alternate write endpoint - canRefreshInBackground = writeLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", - mostPreferredLocation, - writeLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - else - { - return shouldRefresh; - } - } - else if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) - { - shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); - return shouldRefresh; - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); - return true; - } - } - else - { - return shouldRefresh; - } - } - else - { - return false; - } - } - - public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) - { - return this.CanUseMultipleWriteLocations() && - (request.ResourceType == ResourceType.Document || - (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); - } - - private void ClearStaleEndpointUnavailabilityInfo() - { - if (this.locationUnavailablityInfoByEndpoint.Any()) - { - List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); - - foreach (Uri unavailableEndpoint in unavailableEndpoints) - { - LocationUnavailabilityInfo unavailabilityInfo; - LocationUnavailabilityInfo removed; - - if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) - && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) - { - DefaultTrace.TraceInformation( - "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", - unavailableEndpoint, - unavailabilityInfo.UnavailableOperations); - } - } - } - } - - private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) - { - LocationUnavailabilityInfo unavailabilityInfo; - - if (expectedAvailableOperations == OperationType.None - || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) - || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) - { - return false; - } - else - { - if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) - { - return false; - } - else - { - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", - endpoint, - unavailabilityInfo.UnavailableOperations); - // Unexpired entry present. Endpoint is unavailable - return true; - } - } - } - - private void MarkEndpointUnavailable( - Uri unavailableEndpoint, - OperationType unavailableOperationType) - { - DateTime currentTime = DateTime.UtcNow; - LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( - unavailableEndpoint, - (Uri endpoint) => - { - return new LocationUnavailabilityInfo() - { - LastUnavailabilityCheckTimeStamp = currentTime, - UnavailableOperations = unavailableOperationType, - }; - }, - (Uri endpoint, LocationUnavailabilityInfo info) => - { - info.LastUnavailabilityCheckTimeStamp = currentTime; - info.UnavailableOperations |= unavailableOperationType; - return info; - }); - - this.UpdateLocationCache(); - - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", - unavailableEndpoint, - unavailableOperationType, - updatedInfo.LastUnavailabilityCheckTimeStamp); - } - - private void UpdateLocationCache( - IEnumerable writeLocations = null, - IEnumerable readLocations = null, - ReadOnlyCollection preferenceList = null, - bool? enableMultipleWriteLocations = null) - { - lock (this.lockObject) - { - DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); - - if (preferenceList != null) - { - nextLocationInfo.PreferredLocations = preferenceList; - } - - if (enableMultipleWriteLocations.HasValue) - { - this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; - } - - this.ClearStaleEndpointUnavailabilityInfo(); - - if (readLocations != null) - { - ReadOnlyCollection availableReadLocations; - nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); - nextLocationInfo.AvailableReadLocations = availableReadLocations; - } - - if (writeLocations != null) - { - ReadOnlyCollection availableWriteLocations; - nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); - nextLocationInfo.AvailableWriteLocations = availableWriteLocations; - } - - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); - nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); - this.lastCacheUpdateTimestamp = DateTime.UtcNow; - - DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", - string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), - string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); - - this.locationInfo = nextLocationInfo; - } - } - - private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) - { - List endpoints = new List(); - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init - if (this.enableEndpointDiscovery) - { - if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) - { - List unavailableEndpoints = new List(); - - // When client can not use multiple write locations, preferred locations list should only be used - // determining read endpoints order. - // If client can use multiple write locations, preferred locations list should be used for determining - // both read and write endpoints order. - - foreach (string location in currentLocationInfo.PreferredLocations) - { - Uri endpoint; - if (endpointsByLocation.TryGetValue(location, out endpoint)) - { - if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) - { - unavailableEndpoints.Add(endpoint); - } - else - { - endpoints.Add(endpoint); - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - unavailableEndpoints.Remove(fallbackEndpoint); - } - - endpoints.AddRange(unavailableEndpoints); - } - else - { - foreach (string location in orderedLocations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location) && // location is empty during manual failover - endpointsByLocation.TryGetValue(location, out endpoint)) - { - endpoints.Add(endpoint); - } - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - } - - return endpoints.AsReadOnly(); - } - - private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) - { - Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); - List parsedLocations = new List(); - - foreach (AccountRegion location in locations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location.Name) - && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) - { - endpointsByLocation[location.Name] = endpoint; - parsedLocations.Add(location.Name); - this.SetServicePointConnectionLimit(endpoint); - } - else - { - DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", - location.Name, - location.Endpoint); - } - } - - orderedLocations = parsedLocations.AsReadOnly(); - return new ReadOnlyDictionary(endpointsByLocation); - } - - private bool CanUseMultipleWriteLocations() - { - return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; - } - - private void SetServicePointConnectionLimit(Uri endpoint) - { -#if !NETSTANDARD16 - ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); - servicePoint.ConnectionLimit = this.connectionLimit; -#endif - } - - private sealed class LocationUnavailabilityInfo - { - public DateTime LastUnavailabilityCheckTimeStamp { get; set; } - public OperationType UnavailableOperations { get; set; } - } - - private sealed class DatabaseAccountLocationsInfo - { - public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) - { - this.PreferredLocations = preferredLocations; - this.AvailableWriteLocations = new List().AsReadOnly(); - this.AvailableReadLocations = new List().AsReadOnly(); - this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - } - - public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) - { - this.PreferredLocations = other.PreferredLocations; - this.AvailableWriteLocations = other.AvailableWriteLocations; - this.AvailableReadLocations = other.AvailableReadLocations; - this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; - this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; - this.WriteEndpoints = other.WriteEndpoints; - this.ReadEndpoints = other.ReadEndpoints; - } - - public ReadOnlyCollection PreferredLocations { get; set; } - public ReadOnlyCollection AvailableWriteLocations { get; set; } - public ReadOnlyCollection AvailableReadLocations { get; set; } - public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } - public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } - public ReadOnlyCollection WriteEndpoints { get; set; } - public ReadOnlyCollection ReadEndpoints { get; set; } - } - - [Flags] - private enum OperationType - { - None = 0x0, - Read = 0x1, - Write = 0x2 - } - } -} +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Routing +{ + using System; + using System.Collections.Concurrent; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Globalization; + using System.Linq; + using System.Net; + using global::Azure.Core; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Documents; + + /// + /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount + /// with multiple writable and readable locations. + /// + internal sealed class LocationCache + { + private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; + private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; + + private readonly bool enableEndpointDiscovery; + private readonly Uri defaultEndpoint; + private readonly bool useMultipleWriteLocations; + private readonly object lockObject; + private readonly TimeSpan unavailableLocationsExpirationTime; + private readonly int connectionLimit; + private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; + + private DatabaseAccountLocationsInfo locationInfo; + private DateTime lastCacheUpdateTimestamp; + private bool enableMultipleWriteLocations; + + public LocationCache( + ReadOnlyCollection preferredLocations, + Uri defaultEndpoint, + bool enableEndpointDiscovery, + int connectionLimit, + bool useMultipleWriteLocations) + { + this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); + this.defaultEndpoint = defaultEndpoint; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.useMultipleWriteLocations = useMultipleWriteLocations; + this.connectionLimit = connectionLimit; + + this.lockObject = new object(); + this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); + this.lastCacheUpdateTimestamp = DateTime.MinValue; + this.enableMultipleWriteLocations = false; + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + +#if !(NETSTANDARD15 || NETSTANDARD16) +#if NETSTANDARD20 + // GetEntryAssembly returns null when loaded from native netstandard2.0 + if (System.Reflection.Assembly.GetEntryAssembly() != null) + { +#endif + string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; + if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) + { + int unavailableLocationsExpirationTimeinSecondsConfigValue; + + if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + } + else + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); + } + } +#if NETSTANDARD20 + } +#endif +#endif + } + + /// + /// Gets list of read endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection ReadEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.ReadEndpoints; + } + } + + /// + /// Gets list of write endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection WriteEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.WriteEndpoints; + } + } + + /// + /// Returns the location corresponding to the endpoint if location specific endpoint is provided. + /// For the defaultEndPoint, we will return the first available write location. + /// Returns null, in other cases. + /// + /// + /// Today we return null for defaultEndPoint if multiple write locations can be used. + /// This needs to be modifed to figure out proper location in such case. + /// + public string GetLocation(Uri endpoint) + { + string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; + + if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) + { + if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) + { + return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; + } + } + + return location; + } + + /// + /// Set region name for a location if present in the locationcache otherwise set region name as null. + /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. + /// + /// + /// + /// true if region found else false + public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) + { + if (Uri.Compare( + endpoint, + this.defaultEndpoint, + UriComponents.Host, + UriFormat.SafeUnescaped, + StringComparison.OrdinalIgnoreCase) == 0) + { + regionName = null; + return false; + } + + regionName = this.GetLocation(endpoint); + return true; + } + + /// + /// Marks the current location unavailable for read + /// + public void MarkEndpointUnavailableForRead(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Read); + } + + /// + /// Marks the current location unavailable for write + /// + public void MarkEndpointUnavailableForWrite(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Write); + } + + /// + /// Invoked when is read + /// + /// Read DatabaseAccoaunt + public void OnDatabaseAccountRead(AccountProperties databaseAccount) + { + this.UpdateLocationCache( + databaseAccount.WritableRegions, + databaseAccount.ReadableRegions, + preferenceList: null, + enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); + } + + /// + /// Invoked when changes + /// + /// + public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) + { + this.UpdateLocationCache( + preferenceList: preferredLocations); + } + + public bool IsMetaData(DocumentServiceRequest request) + { + return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || + request.ResourceType != ResourceType.Document; + + } + public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) + { + return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 + && this.IsMetaData(request) + && this.CanUseMultipleWriteLocations(); + + } + + public Uri GetHubUri() + { + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; + Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + return locationEndpointToRoute; + } + + /// + /// Resolves request to service endpoint. + /// 1. If this is a write request + /// (a) If UseMultipleWriteLocations = true + /// (i) For document writes, resolve to most preferred and available write endpoint. + /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will + /// be retried on next preferred available write endpoint. + /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) + /// write endpoint in . + /// Endpoint of first write location in is the only endpoint that supports + /// write operation on all resource types (except during that region's failover). + /// Only during manual failover, client would retry write on second write location in . + /// (b) Else resolve the request to first write endpoint in OR + /// second write endpoint in in case of manual failover of that location. + /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) + /// + /// Request for which endpoint is to be resolved + /// Resolved endpoint + public Uri ResolveServiceEndpoint(DocumentServiceRequest request) + { + if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) + { + return request.RequestContext.LocationEndpointToRoute; + } + + int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); + + Uri locationEndpointToRoute = this.defaultEndpoint; + + if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? + || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) + { + // For non-document resource types in case of client can use multiple write locations + // or when client cannot use multiple write locations, flip-flop between the + // first and the second writable region in DatabaseAccount (for manual failover) + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) + { + locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); + string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; + locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + } + } + else + { + ReadOnlyCollection endpoints = this.ReadEndpoints; + locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; + } + + request.RequestContext.RouteToLocation(locationEndpointToRoute); + return locationEndpointToRoute; + } + + public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) + { + canRefreshInBackground = true; + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); + + // we should schedule refresh in background if we are unable to target the user's most preferredLocation. + if (this.enableEndpointDiscovery) + { + // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled + bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; + + ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; + + if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) + { + canRefreshInBackground = readLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", + readLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + + if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + Uri mostPreferredReadEndpoint; + + if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) + { + if (mostPreferredReadEndpoint != readLocationEndpoints[0]) + { + // For reads, we can always refresh in background as we can alternate to + // other available read endpoints + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); + return true; + } + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); + return true; + } + } + + Uri mostPreferredWriteEndpoint; + ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; + + if (!this.CanUseMultipleWriteLocations()) + { + if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) + { + // Since most preferred write endpoint is unavailable, we can only refresh in background if + // we have an alternate write endpoint + canRefreshInBackground = writeLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", + mostPreferredLocation, + writeLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + else + { + return shouldRefresh; + } + } + else if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) + { + shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); + return shouldRefresh; + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); + return true; + } + } + else + { + return shouldRefresh; + } + } + else + { + return false; + } + } + + public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) + { + return this.CanUseMultipleWriteLocations() && + (request.ResourceType == ResourceType.Document || + (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); + } + + private void ClearStaleEndpointUnavailabilityInfo() + { + if (this.locationUnavailablityInfoByEndpoint.Any()) + { + List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); + + foreach (Uri unavailableEndpoint in unavailableEndpoints) + { + LocationUnavailabilityInfo unavailabilityInfo; + LocationUnavailabilityInfo removed; + + if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) + && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) + { + DefaultTrace.TraceInformation( + "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", + unavailableEndpoint, + unavailabilityInfo.UnavailableOperations); + } + } + } + } + + private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) + { + LocationUnavailabilityInfo unavailabilityInfo; + + if (expectedAvailableOperations == OperationType.None + || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) + || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) + { + return false; + } + else + { + if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) + { + return false; + } + else + { + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", + endpoint, + unavailabilityInfo.UnavailableOperations); + // Unexpired entry present. Endpoint is unavailable + return true; + } + } + } + + private void MarkEndpointUnavailable( + Uri unavailableEndpoint, + OperationType unavailableOperationType) + { + DateTime currentTime = DateTime.UtcNow; + LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( + unavailableEndpoint, + (Uri endpoint) => + { + return new LocationUnavailabilityInfo() + { + LastUnavailabilityCheckTimeStamp = currentTime, + UnavailableOperations = unavailableOperationType, + }; + }, + (Uri endpoint, LocationUnavailabilityInfo info) => + { + info.LastUnavailabilityCheckTimeStamp = currentTime; + info.UnavailableOperations |= unavailableOperationType; + return info; + }); + + this.UpdateLocationCache(); + + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", + unavailableEndpoint, + unavailableOperationType, + updatedInfo.LastUnavailabilityCheckTimeStamp); + } + + private void UpdateLocationCache( + IEnumerable writeLocations = null, + IEnumerable readLocations = null, + ReadOnlyCollection preferenceList = null, + bool? enableMultipleWriteLocations = null) + { + lock (this.lockObject) + { + DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); + + if (preferenceList != null) + { + nextLocationInfo.PreferredLocations = preferenceList; + } + + if (enableMultipleWriteLocations.HasValue) + { + this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; + } + + this.ClearStaleEndpointUnavailabilityInfo(); + + if (readLocations != null) + { + ReadOnlyCollection availableReadLocations; + nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); + nextLocationInfo.AvailableReadLocations = availableReadLocations; + } + + if (writeLocations != null) + { + ReadOnlyCollection availableWriteLocations; + nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); + nextLocationInfo.AvailableWriteLocations = availableWriteLocations; + } + + nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); + nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); + this.lastCacheUpdateTimestamp = DateTime.UtcNow; + + DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", + string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), + string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); + + this.locationInfo = nextLocationInfo; + } + } + + private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) + { + List endpoints = new List(); + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init + if (this.enableEndpointDiscovery) + { + if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) + { + List unavailableEndpoints = new List(); + + // When client can not use multiple write locations, preferred locations list should only be used + // determining read endpoints order. + // If client can use multiple write locations, preferred locations list should be used for determining + // both read and write endpoints order. + + foreach (string location in currentLocationInfo.PreferredLocations) + { + Uri endpoint; + if (endpointsByLocation.TryGetValue(location, out endpoint)) + { + if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) + { + unavailableEndpoints.Add(endpoint); + } + else + { + endpoints.Add(endpoint); + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + unavailableEndpoints.Remove(fallbackEndpoint); + } + + endpoints.AddRange(unavailableEndpoints); + } + else + { + foreach (string location in orderedLocations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location) && // location is empty during manual failover + endpointsByLocation.TryGetValue(location, out endpoint)) + { + endpoints.Add(endpoint); + } + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + } + + return endpoints.AsReadOnly(); + } + + private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) + { + Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); + List parsedLocations = new List(); + + foreach (AccountRegion location in locations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location.Name) + && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) + { + endpointsByLocation[location.Name] = endpoint; + parsedLocations.Add(location.Name); + this.SetServicePointConnectionLimit(endpoint); + } + else + { + DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", + location.Name, + location.Endpoint); + } + } + + orderedLocations = parsedLocations.AsReadOnly(); + return new ReadOnlyDictionary(endpointsByLocation); + } + + private bool CanUseMultipleWriteLocations() + { + return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; + } + + private void SetServicePointConnectionLimit(Uri endpoint) + { +#if !NETSTANDARD16 + ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); + servicePoint.ConnectionLimit = this.connectionLimit; +#endif + } + + private sealed class LocationUnavailabilityInfo + { + public DateTime LastUnavailabilityCheckTimeStamp { get; set; } + public OperationType UnavailableOperations { get; set; } + } + + private sealed class DatabaseAccountLocationsInfo + { + public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) + { + this.PreferredLocations = preferredLocations; + this.AvailableWriteLocations = new List().AsReadOnly(); + this.AvailableReadLocations = new List().AsReadOnly(); + this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + } + + public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) + { + this.PreferredLocations = other.PreferredLocations; + this.AvailableWriteLocations = other.AvailableWriteLocations; + this.AvailableReadLocations = other.AvailableReadLocations; + this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; + this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; + this.WriteEndpoints = other.WriteEndpoints; + this.ReadEndpoints = other.ReadEndpoints; + } + + public ReadOnlyCollection PreferredLocations { get; set; } + public ReadOnlyCollection AvailableWriteLocations { get; set; } + public ReadOnlyCollection AvailableReadLocations { get; set; } + public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } + public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } + public ReadOnlyCollection WriteEndpoints { get; set; } + public ReadOnlyCollection ReadEndpoints { get; set; } + } + + [Flags] + private enum OperationType + { + None = 0x0, + Read = 0x1, + Write = 0x2 + } + } +} From 9165885a331504ea46ce5c09412f709e1aa20df6 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Thu, 7 Sep 2023 11:55:58 -0700 Subject: [PATCH 05/31] Revert "Code changes to add retry logic for GW returned 503.9002." This reverts commit 53ef5f3c1b038d14dbb1473cafa18223b33af2ce. --- .../src/ClientRetryPolicy.cs | 873 +++++------ .../src/CosmosClientOptions.cs | 2 +- .../src/Routing/LocationCache.cs | 1336 ++++++++--------- 3 files changed, 1108 insertions(+), 1103 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2f007c6fbf..2933baa1a9 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -1,435 +1,440 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos -{ - using System; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Net; - using System.Net.Http; - using System.Threading; - using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Cosmos.Routing; - using Microsoft.Azure.Documents; - - /// - /// Client policy is combination of endpoint change retry + throttling retry. - /// - internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy - { - private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. - private const int MaxRetryCount = 120; - private const int MaxServiceUnavailableRetryCount = 1; - - private readonly IDocumentClientRetryPolicy throttlingRetry; - private readonly GlobalEndpointManager globalEndpointManager; - private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; - private readonly bool enableEndpointDiscovery; - private int failoverRetryCount; - - private int sessionTokenRetryCount; - private int serviceUnavailableRetryCount; - private bool isReadRequest; - private bool canUseMultipleWriteLocations; - private Uri locationEndpoint; - private RetryContext retryContext; - private DocumentServiceRequest documentServiceRequest; - - public ClientRetryPolicy( - GlobalEndpointManager globalEndpointManager, - GlobalPartitionEndpointManager partitionKeyRangeLocationCache, - bool enableEndpointDiscovery, - RetryOptions retryOptions) - { - this.throttlingRetry = new ResourceThrottleRetryPolicy( - retryOptions.MaxRetryAttemptsOnThrottledRequests, - retryOptions.MaxRetryWaitTimeInSeconds); - - this.globalEndpointManager = globalEndpointManager; - this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; - this.failoverRetryCount = 0; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.sessionTokenRetryCount = 0; - this.serviceUnavailableRetryCount = 0; - this.canUseMultipleWriteLocations = false; - } - - /// - /// Should the caller retry the operation. - /// - /// Exception that occurred when the operation was tried - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - Exception exception, - CancellationToken cancellationToken) - { - this.retryContext = null; - // Received Connection error (HttpRequestException), initiate the endpoint rediscovery - if (exception is HttpRequestException _) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark both read and write requests because it gateway exception. - // This means all requests going to the region will fail. - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: true, - forceRefresh: false, - retryOnPreferredLocations: true); - } - - if (exception is DocumentClientException clientException) - { - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - clientException?.StatusCode, - clientException?.GetSubStatus()); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - } - - return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); - } - - /// - /// Should the caller retry the operation. - /// - /// in return of the request - /// - /// True indicates caller should retry, False otherwise - public async Task ShouldRetryAsync( - ResponseMessage cosmosResponseMessage, - CancellationToken cancellationToken) - { - this.retryContext = null; - - ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( - cosmosResponseMessage?.StatusCode, - cosmosResponseMessage?.Headers.SubStatusCode); - if (shouldRetryResult != null) - { - return shouldRetryResult; - } - - return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); - } - - /// - /// Method that is called before a request is sent to allow the retry policy implementation - /// to modify the state of the request. - /// - /// The request being sent to the service. - public void OnBeforeSendRequest(DocumentServiceRequest request) - { - this.isReadRequest = request.IsReadOnlyRequest; - this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); - this.documentServiceRequest = request; - - // clear previous location-based routing directive - request.RequestContext.ClearRouteToLocation(); - - if (this.retryContext != null) - { - if (this.retryContext.RouteToHub) - { - request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); - } - else - { - // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); - } - } - - // Resolve the endpoint for the request and pin the resolution to the resolved endpoint - // This enables marking the endpoint unavailability on endpoint failover/unreachability - this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); - request.RequestContext.RouteToLocation(this.locationEndpoint); - } - - private async Task ShouldRetryInternalAsync( - HttpStatusCode? statusCode, - SubStatusCodes? subStatusCode) - { - if (!statusCode.HasValue - && (!subStatusCode.HasValue - || subStatusCode.Value == SubStatusCodes.Unknown)) - { - return null; - } - - // Console.WriteLine("Status Code: " + statusCode.Value + "Sub Status Code: " + subStatusCode.Value + "IsRead Request: " + this.isReadRequest); - - // Received request timeout - if (statusCode == HttpStatusCode.RequestTimeout) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition key range as unavailable to retry future request on a new region. - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - } - - // Received 403.3 on write region, initiate the endpoint rediscovery - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.WriteForbidden) - { - // It's a write forbidden so it safe to retry - if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest)) - { - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) - { - bool forceRefresh = false; - - if (this.retryContext != null && this.retryContext.RouteToHub) - { - forceRefresh = true; - - } - - ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: forceRefresh, - retryOnPreferredLocations: false, - overwriteEndpointDiscovery: true); - - if (retryResult.ShouldRetry) - { - this.retryContext.RouteToHub = true; - } - - return retryResult; - } - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: false, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: true, - retryOnPreferredLocations: false); - } - - // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) - if (statusCode == HttpStatusCode.Forbidden - && subStatusCode == SubStatusCodes.DatabaseAccountNotFound - && (this.isReadRequest || this.canUseMultipleWriteLocations)) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - return await this.ShouldRetryOnEndpointFailureAsync( - isReadRequest: this.isReadRequest, - markBothReadAndWriteAsUnavailable: false, - forceRefresh: false, - retryOnPreferredLocations: false); - } - - if (statusCode == HttpStatusCode.NotFound - && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { - return this.ShouldRetryOnSessionNotAvailable(); - } - - // Received 503 due to client connect timeout or Gateway - if (statusCode == HttpStatusCode.ServiceUnavailable) - { - DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", - this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, - this.documentServiceRequest?.ResourceAddress ?? string.Empty); - - // Mark the partition as unavailable. - // Let the ClientRetry logic decide if the request should be retried - this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( - this.documentServiceRequest); - - return this.ShouldRetryOnServiceUnavailable(); - } - - return null; - } - - private async Task ShouldRetryOnEndpointFailureAsync( - bool isReadRequest, - bool markBothReadAndWriteAsUnavailable, - bool forceRefresh, - bool retryOnPreferredLocations, - bool overwriteEndpointDiscovery = false) - { - if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", - this.failoverRetryCount, - this.locationEndpoint?.ToString() ?? string.Empty); - return ShouldRetryResult.NoRetry(); - } - - this.failoverRetryCount++; - - if (this.locationEndpoint != null && !overwriteEndpointDiscovery) - { - if (isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); - } - - if (!isReadRequest || markBothReadAndWriteAsUnavailable) - { - this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); - } - } - - TimeSpan retryDelay = TimeSpan.Zero; - if (!isReadRequest) - { - DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); - - if (this.failoverRetryCount > 1) - { - //if retried both endpoints, follow regular retry interval. - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - } - else - { - retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); - } - - await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); - - int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect - if (retryOnPreferredLocations) - { - retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list - } - - this.retryContext = new RetryContext - { - RetryLocationIndex = retryLocationIndex, - RetryRequestOnPreferredLocations = retryOnPreferredLocations, - }; - - return ShouldRetryResult.RetryAfter(retryDelay); - } - - private ShouldRetryResult ShouldRetryOnSessionNotAvailable() - { - this.sessionTokenRetryCount++; - - if (!this.enableEndpointDiscovery) - { - // if endpoint discovery is disabled, the request cannot be retried anywhere else - return ShouldRetryResult.NoRetry(); - } - else - { - if (this.canUseMultipleWriteLocations) - { - ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; - - if (this.sessionTokenRetryCount > endpoints.Count) - { - // When use multiple write locations is true and the request has been tried - // on all locations, then don't retry the request - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.sessionTokenRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - else - { - if (this.sessionTokenRetryCount > 1) - { - // When cannot use multiple write locations, then don't retry the request if - // we have already tried this request on the write location - return ShouldRetryResult.NoRetry(); - } - else - { - this.retryContext = new RetryContext - { - RetryLocationIndex = 0, - RetryRequestOnPreferredLocations = false - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - } - } - } - - /// - /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. - /// We try and retry the request only if there are other regions available. - /// - private ShouldRetryResult ShouldRetryOnServiceUnavailable() - { - if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) - { - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); - return ShouldRetryResult.NoRetry(); - } - - /*if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available - return ShouldRetryResult.NoRetry(); - }*/ - - int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; - - if (availablePreferredLocations <= 1) - { - // No other regions to retry on - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); - return ShouldRetryResult.NoRetry(); - } - - DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); - - // Retrying on second PreferredLocations - // RetryCount is used as zero-based index - this.retryContext = new RetryContext() - { - RetryLocationIndex = this.serviceUnavailableRetryCount, - RetryRequestOnPreferredLocations = true - }; - - return ShouldRetryResult.RetryAfter(TimeSpan.Zero); - } - - private sealed class RetryContext - { - public int RetryLocationIndex { get; set; } - public bool RetryRequestOnPreferredLocations { get; set; } - - public bool RouteToHub { get; set; } - } - } +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Net; + using System.Net.Http; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Documents; + + /// + /// Client policy is combination of endpoint change retry + throttling retry. + /// + internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy + { + private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. + private const int MaxRetryCount = 120; + private const int MaxServiceUnavailableRetryCount = 1; + + private readonly IDocumentClientRetryPolicy throttlingRetry; + private readonly GlobalEndpointManager globalEndpointManager; + private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; + private readonly bool enableEndpointDiscovery; + private int failoverRetryCount; + + private int sessionTokenRetryCount; + private int serviceUnavailableRetryCount; + private bool isReadRequest; + private bool canUseMultipleWriteLocations; + private Uri locationEndpoint; + private RetryContext retryContext; + private DocumentServiceRequest documentServiceRequest; + + public ClientRetryPolicy( + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManager partitionKeyRangeLocationCache, + bool enableEndpointDiscovery, + RetryOptions retryOptions) + { + this.throttlingRetry = new ResourceThrottleRetryPolicy( + retryOptions.MaxRetryAttemptsOnThrottledRequests, + retryOptions.MaxRetryWaitTimeInSeconds); + + this.globalEndpointManager = globalEndpointManager; + this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; + this.failoverRetryCount = 0; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.sessionTokenRetryCount = 0; + this.serviceUnavailableRetryCount = 0; + this.canUseMultipleWriteLocations = false; + } + + /// + /// Should the caller retry the operation. + /// + /// Exception that occurred when the operation was tried + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + Exception exception, + CancellationToken cancellationToken) + { + this.retryContext = null; + // Received Connection error (HttpRequestException), initiate the endpoint rediscovery + if (exception is HttpRequestException _) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Gateway HttpRequestException Endpoint not reachable. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark both read and write requests because it gateway exception. + // This means all requests going to the region will fail. + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: true, + forceRefresh: false, + retryOnPreferredLocations: true); + } + + if (exception is DocumentClientException clientException) + { + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + clientException?.StatusCode, + clientException?.GetSubStatus()); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + } + + return await this.throttlingRetry.ShouldRetryAsync(exception, cancellationToken); + } + + /// + /// Should the caller retry the operation. + /// + /// in return of the request + /// + /// True indicates caller should retry, False otherwise + public async Task ShouldRetryAsync( + ResponseMessage cosmosResponseMessage, + CancellationToken cancellationToken) + { + this.retryContext = null; + + ShouldRetryResult shouldRetryResult = await this.ShouldRetryInternalAsync( + cosmosResponseMessage?.StatusCode, + cosmosResponseMessage?.Headers.SubStatusCode); + if (shouldRetryResult != null) + { + return shouldRetryResult; + } + + return await this.throttlingRetry.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); + } + + /// + /// Method that is called before a request is sent to allow the retry policy implementation + /// to modify the state of the request. + /// + /// The request being sent to the service. + public void OnBeforeSendRequest(DocumentServiceRequest request) + { + this.isReadRequest = request.IsReadOnlyRequest; + this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); + this.documentServiceRequest = request; + + // clear previous location-based routing directive + request.RequestContext.ClearRouteToLocation(); + + if (this.retryContext != null) + { + if (this.retryContext.RouteToHub) + { + request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); + } + else + { + // set location-based routing directive based on request retry context + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); + } + } + + // Resolve the endpoint for the request and pin the resolution to the resolved endpoint + // This enables marking the endpoint unavailability on endpoint failover/unreachability + this.locationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); + request.RequestContext.RouteToLocation(this.locationEndpoint); + } + + private async Task ShouldRetryInternalAsync( + HttpStatusCode? statusCode, + SubStatusCodes? subStatusCode) + { + if (!statusCode.HasValue + && (!subStatusCode.HasValue + || subStatusCode.Value == SubStatusCodes.Unknown)) + { + return null; + } + + // Received request timeout + if (statusCode == HttpStatusCode.RequestTimeout) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: RequestTimeout. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition key range as unavailable to retry future request on a new region. + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + } + + // Received 403.3 on write region, initiate the endpoint rediscovery + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.WriteForbidden) + { + // It's a write forbidden so it safe to retry + if (this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest)) + { + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not writable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + if (this.globalEndpointManager.IsMultimasterMetadataWriteRequest(this.documentServiceRequest)) + { + bool forceRefresh = false; + + if (this.retryContext != null && this.retryContext.RouteToHub) + { + forceRefresh = true; + + } + + ShouldRetryResult retryResult = await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: forceRefresh, + retryOnPreferredLocations: false, + overwriteEndpointDiscovery: true); + + if (retryResult.ShouldRetry) + { + this.retryContext.RouteToHub = true; + } + + return retryResult; + } + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: true, + retryOnPreferredLocations: false); + } + + // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) + if (statusCode == HttpStatusCode.Forbidden + && subStatusCode == SubStatusCodes.DatabaseAccountNotFound + && (this.isReadRequest || this.canUseMultipleWriteLocations)) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: Endpoint not available for reads. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + markBothReadAndWriteAsUnavailable: false, + forceRefresh: false, + retryOnPreferredLocations: false); + } + + if (statusCode == HttpStatusCode.NotFound + && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) + { + return this.ShouldRetryOnSessionNotAvailable(); + } + + // Received 503 due to client connect timeout or Gateway + if (statusCode == HttpStatusCode.ServiceUnavailable + && ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode)) + { + DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", + this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, + this.documentServiceRequest?.ResourceAddress ?? string.Empty); + + // Mark the partition as unavailable. + // Let the ClientRetry logic decide if the request should be retried + this.partitionKeyRangeLocationCache.TryMarkEndpointUnavailableForPartitionKeyRange( + this.documentServiceRequest); + + return this.ShouldRetryOnServiceUnavailable(); + } + + return null; + } + + private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode) + { + return subStatusCode == SubStatusCodes.Unknown || + (subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus()); + } + + private async Task ShouldRetryOnEndpointFailureAsync( + bool isReadRequest, + bool markBothReadAndWriteAsUnavailable, + bool forceRefresh, + bool retryOnPreferredLocations, + bool overwriteEndpointDiscovery = false) + { + if (this.failoverRetryCount > MaxRetryCount || (!this.enableEndpointDiscovery && !overwriteEndpointDiscovery)) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {0}, Endpoint = {1}", + this.failoverRetryCount, + this.locationEndpoint?.ToString() ?? string.Empty); + return ShouldRetryResult.NoRetry(); + } + + this.failoverRetryCount++; + + if (this.locationEndpoint != null && !overwriteEndpointDiscovery) + { + if (isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForRead(this.locationEndpoint); + } + + if (!isReadRequest || markBothReadAndWriteAsUnavailable) + { + this.globalEndpointManager.MarkEndpointUnavailableForWrite(this.locationEndpoint); + } + } + + TimeSpan retryDelay = TimeSpan.Zero; + if (!isReadRequest) + { + DefaultTrace.TraceInformation("ClientRetryPolicy: Failover happening. retryCount {0}", this.failoverRetryCount); + + if (this.failoverRetryCount > 1) + { + //if retried both endpoints, follow regular retry interval. + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + } + else + { + retryDelay = TimeSpan.FromMilliseconds(ClientRetryPolicy.RetryIntervalInMS); + } + + await this.globalEndpointManager.RefreshLocationAsync(forceRefresh); + + int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect + if (retryOnPreferredLocations) + { + retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list + } + + this.retryContext = new RetryContext + { + RetryLocationIndex = retryLocationIndex, + RetryRequestOnPreferredLocations = retryOnPreferredLocations, + }; + + return ShouldRetryResult.RetryAfter(retryDelay); + } + + private ShouldRetryResult ShouldRetryOnSessionNotAvailable() + { + this.sessionTokenRetryCount++; + + if (!this.enableEndpointDiscovery) + { + // if endpoint discovery is disabled, the request cannot be retried anywhere else + return ShouldRetryResult.NoRetry(); + } + else + { + if (this.canUseMultipleWriteLocations) + { + ReadOnlyCollection endpoints = this.isReadRequest ? this.globalEndpointManager.ReadEndpoints : this.globalEndpointManager.WriteEndpoints; + + if (this.sessionTokenRetryCount > endpoints.Count) + { + // When use multiple write locations is true and the request has been tried + // on all locations, then don't retry the request + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.sessionTokenRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + else + { + if (this.sessionTokenRetryCount > 1) + { + // When cannot use multiple write locations, then don't retry the request if + // we have already tried this request on the write location + return ShouldRetryResult.NoRetry(); + } + else + { + this.retryContext = new RetryContext + { + RetryLocationIndex = 0, + RetryRequestOnPreferredLocations = false + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + } + } + } + + /// + /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. + /// We try and retry the request only if there are other regions available. + /// + private ShouldRetryResult ShouldRetryOnServiceUnavailable() + { + if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) + { + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); + return ShouldRetryResult.NoRetry(); + } + + if (!this.canUseMultipleWriteLocations + && !this.isReadRequest) + { + // Write requests on single master cannot be retried, no other regions available + return ShouldRetryResult.NoRetry(); + } + + int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; + + if (availablePreferredLocations <= 1) + { + // No other regions to retry on + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); + return ShouldRetryResult.NoRetry(); + } + + DefaultTrace.TraceInformation($"ClientRetryPolicy: ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); + + // Retrying on second PreferredLocations + // RetryCount is used as zero-based index + this.retryContext = new RetryContext() + { + RetryLocationIndex = this.serviceUnavailableRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + + private sealed class RetryContext + { + public int RetryLocationIndex { get; set; } + public bool RetryRequestOnPreferredLocations { get; set; } + + public bool RouteToHub { get; set; } + } + } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index ee67522d6b..2c07f060f8 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - public bool EnablePartitionLevelFailover { get; set; } = false; + internal bool EnablePartitionLevelFailover { get; set; } = false; /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 6f07b7a52a..9c6308d8b6 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -1,668 +1,668 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos.Routing -{ - using System; - using System.Collections.Concurrent; - using System.Collections.Generic; - using System.Collections.ObjectModel; - using System.Globalization; - using System.Linq; - using System.Net; - using global::Azure.Core; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Documents; - - /// - /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount - /// with multiple writable and readable locations. - /// - internal sealed class LocationCache - { - private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; - private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - - private readonly bool enableEndpointDiscovery; - private readonly Uri defaultEndpoint; - private readonly bool useMultipleWriteLocations; - private readonly object lockObject; - private readonly TimeSpan unavailableLocationsExpirationTime; - private readonly int connectionLimit; - private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; - - private DatabaseAccountLocationsInfo locationInfo; - private DateTime lastCacheUpdateTimestamp; - private bool enableMultipleWriteLocations; - - public LocationCache( - ReadOnlyCollection preferredLocations, - Uri defaultEndpoint, - bool enableEndpointDiscovery, - int connectionLimit, - bool useMultipleWriteLocations) - { - this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); - this.defaultEndpoint = defaultEndpoint; - this.enableEndpointDiscovery = enableEndpointDiscovery; - this.useMultipleWriteLocations = useMultipleWriteLocations; - this.connectionLimit = connectionLimit; - - this.lockObject = new object(); - this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); - this.lastCacheUpdateTimestamp = DateTime.MinValue; - this.enableMultipleWriteLocations = false; - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - -#if !(NETSTANDARD15 || NETSTANDARD16) -#if NETSTANDARD20 - // GetEntryAssembly returns null when loaded from native netstandard2.0 - if (System.Reflection.Assembly.GetEntryAssembly() != null) - { -#endif - string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; - if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) - { - int unavailableLocationsExpirationTimeinSecondsConfigValue; - - if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); - } - else - { - this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); - } - } -#if NETSTANDARD20 - } -#endif -#endif - } - - /// - /// Gets list of read endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection ReadEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.ReadEndpoints; - } - } - - /// - /// Gets list of write endpoints ordered by - /// 1. Preferred location - /// 2. Endpoint availablity - /// - public ReadOnlyCollection WriteEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.WriteEndpoints; - } - } - - /// - /// Returns the location corresponding to the endpoint if location specific endpoint is provided. - /// For the defaultEndPoint, we will return the first available write location. - /// Returns null, in other cases. - /// - /// - /// Today we return null for defaultEndPoint if multiple write locations can be used. - /// This needs to be modifed to figure out proper location in such case. - /// - public string GetLocation(Uri endpoint) - { - string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; - - if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) - { - if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) - { - return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; - } - } - - return location; - } - - /// - /// Set region name for a location if present in the locationcache otherwise set region name as null. - /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. - /// - /// - /// - /// true if region found else false - public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) - { - if (Uri.Compare( - endpoint, - this.defaultEndpoint, - UriComponents.Host, - UriFormat.SafeUnescaped, - StringComparison.OrdinalIgnoreCase) == 0) - { - regionName = null; - return false; - } - - regionName = this.GetLocation(endpoint); - return true; - } - - /// - /// Marks the current location unavailable for read - /// - public void MarkEndpointUnavailableForRead(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Read); - } - - /// - /// Marks the current location unavailable for write - /// - public void MarkEndpointUnavailableForWrite(Uri endpoint) - { - this.MarkEndpointUnavailable(endpoint, OperationType.Write); - } - - /// - /// Invoked when is read - /// - /// Read DatabaseAccoaunt - public void OnDatabaseAccountRead(AccountProperties databaseAccount) - { - this.UpdateLocationCache( - databaseAccount.WritableRegions, - databaseAccount.ReadableRegions, - preferenceList: null, - enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); - } - - /// - /// Invoked when changes - /// - /// - public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) - { - this.UpdateLocationCache( - preferenceList: preferredLocations); - } - - public bool IsMetaData(DocumentServiceRequest request) - { - return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || - request.ResourceType != ResourceType.Document; - - } - public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) - { - return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 - && this.IsMetaData(request) - && this.CanUseMultipleWriteLocations(); - - } - - public Uri GetHubUri() - { - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; - Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - return locationEndpointToRoute; - } - - /// - /// Resolves request to service endpoint. - /// 1. If this is a write request - /// (a) If UseMultipleWriteLocations = true - /// (i) For document writes, resolve to most preferred and available write endpoint. - /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will - /// be retried on next preferred available write endpoint. - /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) - /// write endpoint in . - /// Endpoint of first write location in is the only endpoint that supports - /// write operation on all resource types (except during that region's failover). - /// Only during manual failover, client would retry write on second write location in . - /// (b) Else resolve the request to first write endpoint in OR - /// second write endpoint in in case of manual failover of that location. - /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) - /// - /// Request for which endpoint is to be resolved - /// Resolved endpoint - public Uri ResolveServiceEndpoint(DocumentServiceRequest request) - { - if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) - { - return request.RequestContext.LocationEndpointToRoute; - } - - int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); - - Uri locationEndpointToRoute = this.defaultEndpoint; - - if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? - || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) - { - // For non-document resource types in case of client can use multiple write locations - // or when client cannot use multiple write locations, flip-flop between the - // first and the second writable region in DatabaseAccount (for manual failover) - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) - { - locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); - string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; - locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; - } - } - else - { - ReadOnlyCollection endpoints = this.ReadEndpoints; - locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; - } - - request.RequestContext.RouteToLocation(locationEndpointToRoute); - return locationEndpointToRoute; - } - - public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) - { - canRefreshInBackground = true; - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); - - // we should schedule refresh in background if we are unable to target the user's most preferredLocation. - if (this.enableEndpointDiscovery) - { - // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled - bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; - - ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; - - if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) - { - canRefreshInBackground = readLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", - readLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - - if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - Uri mostPreferredReadEndpoint; - - if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) - { - if (mostPreferredReadEndpoint != readLocationEndpoints[0]) - { - // For reads, we can always refresh in background as we can alternate to - // other available read endpoints - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); - return true; - } - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); - return true; - } - } - - Uri mostPreferredWriteEndpoint; - ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; - - if (!this.CanUseMultipleWriteLocations()) - { - if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) - { - // Since most preferred write endpoint is unavailable, we can only refresh in background if - // we have an alternate write endpoint - canRefreshInBackground = writeLocationEndpoints.Count > 1; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", - mostPreferredLocation, - writeLocationEndpoints[0], - canRefreshInBackground); - - return true; - } - else - { - return shouldRefresh; - } - } - else if (!string.IsNullOrEmpty(mostPreferredLocation)) - { - if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) - { - shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); - return shouldRefresh; - } - else - { - DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); - return true; - } - } - else - { - return shouldRefresh; - } - } - else - { - return false; - } - } - - public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) - { - return this.CanUseMultipleWriteLocations() && - (request.ResourceType == ResourceType.Document || - (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); - } - - private void ClearStaleEndpointUnavailabilityInfo() - { - if (this.locationUnavailablityInfoByEndpoint.Any()) - { - List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); - - foreach (Uri unavailableEndpoint in unavailableEndpoints) - { - LocationUnavailabilityInfo unavailabilityInfo; - LocationUnavailabilityInfo removed; - - if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) - && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) - { - DefaultTrace.TraceInformation( - "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", - unavailableEndpoint, - unavailabilityInfo.UnavailableOperations); - } - } - } - } - - private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) - { - LocationUnavailabilityInfo unavailabilityInfo; - - if (expectedAvailableOperations == OperationType.None - || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) - || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) - { - return false; - } - else - { - if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) - { - return false; - } - else - { - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", - endpoint, - unavailabilityInfo.UnavailableOperations); - // Unexpired entry present. Endpoint is unavailable - return true; - } - } - } - - private void MarkEndpointUnavailable( - Uri unavailableEndpoint, - OperationType unavailableOperationType) - { - DateTime currentTime = DateTime.UtcNow; - LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( - unavailableEndpoint, - (Uri endpoint) => - { - return new LocationUnavailabilityInfo() - { - LastUnavailabilityCheckTimeStamp = currentTime, - UnavailableOperations = unavailableOperationType, - }; - }, - (Uri endpoint, LocationUnavailabilityInfo info) => - { - info.LastUnavailabilityCheckTimeStamp = currentTime; - info.UnavailableOperations |= unavailableOperationType; - return info; - }); - - this.UpdateLocationCache(); - - DefaultTrace.TraceInformation( - "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", - unavailableEndpoint, - unavailableOperationType, - updatedInfo.LastUnavailabilityCheckTimeStamp); - } - - private void UpdateLocationCache( - IEnumerable writeLocations = null, - IEnumerable readLocations = null, - ReadOnlyCollection preferenceList = null, - bool? enableMultipleWriteLocations = null) - { - lock (this.lockObject) - { - DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); - - if (preferenceList != null) - { - nextLocationInfo.PreferredLocations = preferenceList; - } - - if (enableMultipleWriteLocations.HasValue) - { - this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; - } - - this.ClearStaleEndpointUnavailabilityInfo(); - - if (readLocations != null) - { - ReadOnlyCollection availableReadLocations; - nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); - nextLocationInfo.AvailableReadLocations = availableReadLocations; - } - - if (writeLocations != null) - { - ReadOnlyCollection availableWriteLocations; - nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); - nextLocationInfo.AvailableWriteLocations = availableWriteLocations; - } - - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); - nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); - this.lastCacheUpdateTimestamp = DateTime.UtcNow; - - DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", - string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), - string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); - - this.locationInfo = nextLocationInfo; - } - } - - private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) - { - List endpoints = new List(); - DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; - - // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init - if (this.enableEndpointDiscovery) - { - if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) - { - List unavailableEndpoints = new List(); - - // When client can not use multiple write locations, preferred locations list should only be used - // determining read endpoints order. - // If client can use multiple write locations, preferred locations list should be used for determining - // both read and write endpoints order. - - foreach (string location in currentLocationInfo.PreferredLocations) - { - Uri endpoint; - if (endpointsByLocation.TryGetValue(location, out endpoint)) - { - if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) - { - unavailableEndpoints.Add(endpoint); - } - else - { - endpoints.Add(endpoint); - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - unavailableEndpoints.Remove(fallbackEndpoint); - } - - endpoints.AddRange(unavailableEndpoints); - } - else - { - foreach (string location in orderedLocations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location) && // location is empty during manual failover - endpointsByLocation.TryGetValue(location, out endpoint)) - { - endpoints.Add(endpoint); - } - } - } - } - - if (endpoints.Count == 0) - { - endpoints.Add(fallbackEndpoint); - } - - return endpoints.AsReadOnly(); - } - - private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) - { - Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); - List parsedLocations = new List(); - - foreach (AccountRegion location in locations) - { - Uri endpoint; - if (!string.IsNullOrEmpty(location.Name) - && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) - { - endpointsByLocation[location.Name] = endpoint; - parsedLocations.Add(location.Name); - this.SetServicePointConnectionLimit(endpoint); - } - else - { - DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", - location.Name, - location.Endpoint); - } - } - - orderedLocations = parsedLocations.AsReadOnly(); - return new ReadOnlyDictionary(endpointsByLocation); - } - - private bool CanUseMultipleWriteLocations() - { - return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; - } - - private void SetServicePointConnectionLimit(Uri endpoint) - { -#if !NETSTANDARD16 - ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); - servicePoint.ConnectionLimit = this.connectionLimit; -#endif - } - - private sealed class LocationUnavailabilityInfo - { - public DateTime LastUnavailabilityCheckTimeStamp { get; set; } - public OperationType UnavailableOperations { get; set; } - } - - private sealed class DatabaseAccountLocationsInfo - { - public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) - { - this.PreferredLocations = preferredLocations; - this.AvailableWriteLocations = new List().AsReadOnly(); - this.AvailableReadLocations = new List().AsReadOnly(); - this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); - this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - } - - public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) - { - this.PreferredLocations = other.PreferredLocations; - this.AvailableWriteLocations = other.AvailableWriteLocations; - this.AvailableReadLocations = other.AvailableReadLocations; - this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; - this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; - this.WriteEndpoints = other.WriteEndpoints; - this.ReadEndpoints = other.ReadEndpoints; - } - - public ReadOnlyCollection PreferredLocations { get; set; } - public ReadOnlyCollection AvailableWriteLocations { get; set; } - public ReadOnlyCollection AvailableReadLocations { get; set; } - public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } - public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } - public ReadOnlyCollection WriteEndpoints { get; set; } - public ReadOnlyCollection ReadEndpoints { get; set; } - } - - [Flags] - private enum OperationType - { - None = 0x0, - Read = 0x1, - Write = 0x2 - } - } -} +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Routing +{ + using System; + using System.Collections.Concurrent; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using System.Globalization; + using System.Linq; + using System.Net; + using global::Azure.Core; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Documents; + + /// + /// Implements the abstraction to resolve target location for geo-replicated DatabaseAccount + /// with multiple writable and readable locations. + /// + internal sealed class LocationCache + { + private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; + private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; + + private readonly bool enableEndpointDiscovery; + private readonly Uri defaultEndpoint; + private readonly bool useMultipleWriteLocations; + private readonly object lockObject; + private readonly TimeSpan unavailableLocationsExpirationTime; + private readonly int connectionLimit; + private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; + + private DatabaseAccountLocationsInfo locationInfo; + private DateTime lastCacheUpdateTimestamp; + private bool enableMultipleWriteLocations; + + public LocationCache( + ReadOnlyCollection preferredLocations, + Uri defaultEndpoint, + bool enableEndpointDiscovery, + int connectionLimit, + bool useMultipleWriteLocations) + { + this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); + this.defaultEndpoint = defaultEndpoint; + this.enableEndpointDiscovery = enableEndpointDiscovery; + this.useMultipleWriteLocations = useMultipleWriteLocations; + this.connectionLimit = connectionLimit; + + this.lockObject = new object(); + this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); + this.lastCacheUpdateTimestamp = DateTime.MinValue; + this.enableMultipleWriteLocations = false; + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + +#if !(NETSTANDARD15 || NETSTANDARD16) +#if NETSTANDARD20 + // GetEntryAssembly returns null when loaded from native netstandard2.0 + if (System.Reflection.Assembly.GetEntryAssembly() != null) + { +#endif + string unavailableLocationsExpirationTimeInSecondsConfig = System.Configuration.ConfigurationManager.AppSettings[LocationCache.UnavailableLocationsExpirationTimeInSeconds]; + if (!string.IsNullOrEmpty(unavailableLocationsExpirationTimeInSecondsConfig)) + { + int unavailableLocationsExpirationTimeinSecondsConfigValue; + + if (!int.TryParse(unavailableLocationsExpirationTimeInSecondsConfig, out unavailableLocationsExpirationTimeinSecondsConfigValue)) + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); + } + else + { + this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(unavailableLocationsExpirationTimeinSecondsConfigValue); + } + } +#if NETSTANDARD20 + } +#endif +#endif + } + + /// + /// Gets list of read endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection ReadEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.ReadEndpoints; + } + } + + /// + /// Gets list of write endpoints ordered by + /// 1. Preferred location + /// 2. Endpoint availablity + /// + public ReadOnlyCollection WriteEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.WriteEndpoints; + } + } + + /// + /// Returns the location corresponding to the endpoint if location specific endpoint is provided. + /// For the defaultEndPoint, we will return the first available write location. + /// Returns null, in other cases. + /// + /// + /// Today we return null for defaultEndPoint if multiple write locations can be used. + /// This needs to be modifed to figure out proper location in such case. + /// + public string GetLocation(Uri endpoint) + { + string location = this.locationInfo.AvailableWriteEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key ?? this.locationInfo.AvailableReadEndpointByLocation.FirstOrDefault(uri => uri.Value == endpoint).Key; + + if (location == null && endpoint == this.defaultEndpoint && !this.CanUseMultipleWriteLocations()) + { + if (this.locationInfo.AvailableWriteEndpointByLocation.Any()) + { + return this.locationInfo.AvailableWriteEndpointByLocation.First().Key; + } + } + + return location; + } + + /// + /// Set region name for a location if present in the locationcache otherwise set region name as null. + /// If endpoint's hostname is same as default endpoint hostname, set regionName as null. + /// + /// + /// + /// true if region found else false + public bool TryGetLocationForGatewayDiagnostics(Uri endpoint, out string regionName) + { + if (Uri.Compare( + endpoint, + this.defaultEndpoint, + UriComponents.Host, + UriFormat.SafeUnescaped, + StringComparison.OrdinalIgnoreCase) == 0) + { + regionName = null; + return false; + } + + regionName = this.GetLocation(endpoint); + return true; + } + + /// + /// Marks the current location unavailable for read + /// + public void MarkEndpointUnavailableForRead(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Read); + } + + /// + /// Marks the current location unavailable for write + /// + public void MarkEndpointUnavailableForWrite(Uri endpoint) + { + this.MarkEndpointUnavailable(endpoint, OperationType.Write); + } + + /// + /// Invoked when is read + /// + /// Read DatabaseAccoaunt + public void OnDatabaseAccountRead(AccountProperties databaseAccount) + { + this.UpdateLocationCache( + databaseAccount.WritableRegions, + databaseAccount.ReadableRegions, + preferenceList: null, + enableMultipleWriteLocations: databaseAccount.EnableMultipleWriteLocations); + } + + /// + /// Invoked when changes + /// + /// + public void OnLocationPreferenceChanged(ReadOnlyCollection preferredLocations) + { + this.UpdateLocationCache( + preferenceList: preferredLocations); + } + + public bool IsMetaData(DocumentServiceRequest request) + { + return (request.OperationType != Documents.OperationType.ExecuteJavaScript && request.ResourceType == ResourceType.StoredProcedure) || + request.ResourceType != ResourceType.Document; + + } + public bool IsMultimasterMetadataWriteRequest(DocumentServiceRequest request) + { + return !request.IsReadOnlyRequest && this.locationInfo.AvailableWriteLocations.Count > 1 + && this.IsMetaData(request) + && this.CanUseMultipleWriteLocations(); + + } + + public Uri GetHubUri() + { + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + string writeLocation = currentLocationInfo.AvailableWriteLocations[0]; + Uri locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + return locationEndpointToRoute; + } + + /// + /// Resolves request to service endpoint. + /// 1. If this is a write request + /// (a) If UseMultipleWriteLocations = true + /// (i) For document writes, resolve to most preferred and available write endpoint. + /// Once the endpoint is marked unavailable, it is moved to the end of available write endpoint. Current request will + /// be retried on next preferred available write endpoint. + /// (ii) For all other resources, always resolve to first/second (regardless of preferred locations) + /// write endpoint in . + /// Endpoint of first write location in is the only endpoint that supports + /// write operation on all resource types (except during that region's failover). + /// Only during manual failover, client would retry write on second write location in . + /// (b) Else resolve the request to first write endpoint in OR + /// second write endpoint in in case of manual failover of that location. + /// 2. Else resolve the request to most preferred available read endpoint (automatic failover for read requests) + /// + /// Request for which endpoint is to be resolved + /// Resolved endpoint + public Uri ResolveServiceEndpoint(DocumentServiceRequest request) + { + if (request.RequestContext != null && request.RequestContext.LocationEndpointToRoute != null) + { + return request.RequestContext.LocationEndpointToRoute; + } + + int locationIndex = request.RequestContext.LocationIndexToRoute.GetValueOrDefault(0); + + Uri locationEndpointToRoute = this.defaultEndpoint; + + if (!request.RequestContext.UsePreferredLocations.GetValueOrDefault(true) // Should not use preferred location ? + || (request.OperationType.IsWriteOperation() && !this.CanUseMultipleWriteLocations(request))) + { + // For non-document resource types in case of client can use multiple write locations + // or when client cannot use multiple write locations, flip-flop between the + // first and the second writable region in DatabaseAccount (for manual failover) + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + if (this.enableEndpointDiscovery && currentLocationInfo.AvailableWriteLocations.Count > 0) + { + locationIndex = Math.Min(locationIndex % 2, currentLocationInfo.AvailableWriteLocations.Count - 1); + string writeLocation = currentLocationInfo.AvailableWriteLocations[locationIndex]; + locationEndpointToRoute = currentLocationInfo.AvailableWriteEndpointByLocation[writeLocation]; + } + } + else + { + ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; + locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; + } + + request.RequestContext.RouteToLocation(locationEndpointToRoute); + return locationEndpointToRoute; + } + + public bool ShouldRefreshEndpoints(out bool canRefreshInBackground) + { + canRefreshInBackground = true; + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + string mostPreferredLocation = currentLocationInfo.PreferredLocations.FirstOrDefault(); + + // we should schedule refresh in background if we are unable to target the user's most preferredLocation. + if (this.enableEndpointDiscovery) + { + // Refresh if client opts-in to useMultipleWriteLocations but server-side setting is disabled + bool shouldRefresh = this.useMultipleWriteLocations && !this.enableMultipleWriteLocations; + + ReadOnlyCollection readLocationEndpoints = currentLocationInfo.ReadEndpoints; + + if (this.IsEndpointUnavailable(readLocationEndpoints[0], OperationType.Read)) + { + canRefreshInBackground = readLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since the first read endpoint {0} is not available for read. canRefreshInBackground = {1}", + readLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + + if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + Uri mostPreferredReadEndpoint; + + if (currentLocationInfo.AvailableReadEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredReadEndpoint)) + { + if (mostPreferredReadEndpoint != readLocationEndpoints[0]) + { + // For reads, we can always refresh in background as we can alternate to + // other available read endpoints + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not available for read.", mostPreferredLocation); + return true; + } + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available read locations.", mostPreferredLocation); + return true; + } + } + + Uri mostPreferredWriteEndpoint; + ReadOnlyCollection writeLocationEndpoints = currentLocationInfo.WriteEndpoints; + + if (!this.CanUseMultipleWriteLocations()) + { + if (this.IsEndpointUnavailable(writeLocationEndpoints[0], OperationType.Write)) + { + // Since most preferred write endpoint is unavailable, we can only refresh in background if + // we have an alternate write endpoint + canRefreshInBackground = writeLocationEndpoints.Count > 1; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} endpoint {1} is not available for write. canRefreshInBackground = {2}", + mostPreferredLocation, + writeLocationEndpoints[0], + canRefreshInBackground); + + return true; + } + else + { + return shouldRefresh; + } + } + else if (!string.IsNullOrEmpty(mostPreferredLocation)) + { + if (currentLocationInfo.AvailableWriteEndpointByLocation.TryGetValue(mostPreferredLocation, out mostPreferredWriteEndpoint)) + { + shouldRefresh |= mostPreferredWriteEndpoint != writeLocationEndpoints[0]; + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = {0} since most preferred location {1} is not available for write.", shouldRefresh, mostPreferredLocation); + return shouldRefresh; + } + else + { + DefaultTrace.TraceInformation("ShouldRefreshEndpoints = true since most preferred location {0} is not in available write locations", mostPreferredLocation); + return true; + } + } + else + { + return shouldRefresh; + } + } + else + { + return false; + } + } + + public bool CanUseMultipleWriteLocations(DocumentServiceRequest request) + { + return this.CanUseMultipleWriteLocations() && + (request.ResourceType == ResourceType.Document || + (request.ResourceType == ResourceType.StoredProcedure && request.OperationType == Documents.OperationType.ExecuteJavaScript)); + } + + private void ClearStaleEndpointUnavailabilityInfo() + { + if (this.locationUnavailablityInfoByEndpoint.Any()) + { + List unavailableEndpoints = this.locationUnavailablityInfoByEndpoint.Keys.ToList(); + + foreach (Uri unavailableEndpoint in unavailableEndpoints) + { + LocationUnavailabilityInfo unavailabilityInfo; + LocationUnavailabilityInfo removed; + + if (this.locationUnavailablityInfoByEndpoint.TryGetValue(unavailableEndpoint, out unavailabilityInfo) + && DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.TryRemove(unavailableEndpoint, out removed)) + { + DefaultTrace.TraceInformation( + "Removed endpoint {0} unavailable for operations {1} from unavailableEndpoints", + unavailableEndpoint, + unavailabilityInfo.UnavailableOperations); + } + } + } + } + + private bool IsEndpointUnavailable(Uri endpoint, OperationType expectedAvailableOperations) + { + LocationUnavailabilityInfo unavailabilityInfo; + + if (expectedAvailableOperations == OperationType.None + || !this.locationUnavailablityInfoByEndpoint.TryGetValue(endpoint, out unavailabilityInfo) + || !unavailabilityInfo.UnavailableOperations.HasFlag(expectedAvailableOperations)) + { + return false; + } + else + { + if (DateTime.UtcNow - unavailabilityInfo.LastUnavailabilityCheckTimeStamp > this.unavailableLocationsExpirationTime) + { + return false; + } + else + { + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for operations {1} present in unavailableEndpoints", + endpoint, + unavailabilityInfo.UnavailableOperations); + // Unexpired entry present. Endpoint is unavailable + return true; + } + } + } + + private void MarkEndpointUnavailable( + Uri unavailableEndpoint, + OperationType unavailableOperationType) + { + DateTime currentTime = DateTime.UtcNow; + LocationUnavailabilityInfo updatedInfo = this.locationUnavailablityInfoByEndpoint.AddOrUpdate( + unavailableEndpoint, + (Uri endpoint) => + { + return new LocationUnavailabilityInfo() + { + LastUnavailabilityCheckTimeStamp = currentTime, + UnavailableOperations = unavailableOperationType, + }; + }, + (Uri endpoint, LocationUnavailabilityInfo info) => + { + info.LastUnavailabilityCheckTimeStamp = currentTime; + info.UnavailableOperations |= unavailableOperationType; + return info; + }); + + this.UpdateLocationCache(); + + DefaultTrace.TraceInformation( + "Endpoint {0} unavailable for {1} added/updated to unavailableEndpoints with timestamp {2}", + unavailableEndpoint, + unavailableOperationType, + updatedInfo.LastUnavailabilityCheckTimeStamp); + } + + private void UpdateLocationCache( + IEnumerable writeLocations = null, + IEnumerable readLocations = null, + ReadOnlyCollection preferenceList = null, + bool? enableMultipleWriteLocations = null) + { + lock (this.lockObject) + { + DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); + + if (preferenceList != null) + { + nextLocationInfo.PreferredLocations = preferenceList; + } + + if (enableMultipleWriteLocations.HasValue) + { + this.enableMultipleWriteLocations = enableMultipleWriteLocations.Value; + } + + this.ClearStaleEndpointUnavailabilityInfo(); + + if (readLocations != null) + { + ReadOnlyCollection availableReadLocations; + nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); + nextLocationInfo.AvailableReadLocations = availableReadLocations; + } + + if (writeLocations != null) + { + ReadOnlyCollection availableWriteLocations; + nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); + nextLocationInfo.AvailableWriteLocations = availableWriteLocations; + } + + nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); + nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); + this.lastCacheUpdateTimestamp = DateTime.UtcNow; + + DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", + string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), + string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); + + this.locationInfo = nextLocationInfo; + } + } + + private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionary endpointsByLocation, ReadOnlyCollection orderedLocations, OperationType expectedAvailableOperation, Uri fallbackEndpoint) + { + List endpoints = new List(); + DatabaseAccountLocationsInfo currentLocationInfo = this.locationInfo; + + // if enableEndpointDiscovery is false, we always use the defaultEndpoint that user passed in during documentClient init + if (this.enableEndpointDiscovery) + { + if (this.CanUseMultipleWriteLocations() || expectedAvailableOperation.HasFlag(OperationType.Read)) + { + List unavailableEndpoints = new List(); + + // When client can not use multiple write locations, preferred locations list should only be used + // determining read endpoints order. + // If client can use multiple write locations, preferred locations list should be used for determining + // both read and write endpoints order. + + foreach (string location in currentLocationInfo.PreferredLocations) + { + Uri endpoint; + if (endpointsByLocation.TryGetValue(location, out endpoint)) + { + if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) + { + unavailableEndpoints.Add(endpoint); + } + else + { + endpoints.Add(endpoint); + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + unavailableEndpoints.Remove(fallbackEndpoint); + } + + endpoints.AddRange(unavailableEndpoints); + } + else + { + foreach (string location in orderedLocations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location) && // location is empty during manual failover + endpointsByLocation.TryGetValue(location, out endpoint)) + { + endpoints.Add(endpoint); + } + } + } + } + + if (endpoints.Count == 0) + { + endpoints.Add(fallbackEndpoint); + } + + return endpoints.AsReadOnly(); + } + + private ReadOnlyDictionary GetEndpointByLocation(IEnumerable locations, out ReadOnlyCollection orderedLocations) + { + Dictionary endpointsByLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); + List parsedLocations = new List(); + + foreach (AccountRegion location in locations) + { + Uri endpoint; + if (!string.IsNullOrEmpty(location.Name) + && Uri.TryCreate(location.Endpoint, UriKind.Absolute, out endpoint)) + { + endpointsByLocation[location.Name] = endpoint; + parsedLocations.Add(location.Name); + this.SetServicePointConnectionLimit(endpoint); + } + else + { + DefaultTrace.TraceInformation("GetAvailableEndpointsByLocation() - skipping add for location = {0} as it is location name is either empty or endpoint is malformed {1}", + location.Name, + location.Endpoint); + } + } + + orderedLocations = parsedLocations.AsReadOnly(); + return new ReadOnlyDictionary(endpointsByLocation); + } + + private bool CanUseMultipleWriteLocations() + { + return this.useMultipleWriteLocations && this.enableMultipleWriteLocations; + } + + private void SetServicePointConnectionLimit(Uri endpoint) + { +#if !NETSTANDARD16 + ServicePointAccessor servicePoint = ServicePointAccessor.FindServicePoint(endpoint); + servicePoint.ConnectionLimit = this.connectionLimit; +#endif + } + + private sealed class LocationUnavailabilityInfo + { + public DateTime LastUnavailabilityCheckTimeStamp { get; set; } + public OperationType UnavailableOperations { get; set; } + } + + private sealed class DatabaseAccountLocationsInfo + { + public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocations, Uri defaultEndpoint) + { + this.PreferredLocations = preferredLocations; + this.AvailableWriteLocations = new List().AsReadOnly(); + this.AvailableReadLocations = new List().AsReadOnly(); + this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); + this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + } + + public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) + { + this.PreferredLocations = other.PreferredLocations; + this.AvailableWriteLocations = other.AvailableWriteLocations; + this.AvailableReadLocations = other.AvailableReadLocations; + this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; + this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; + this.WriteEndpoints = other.WriteEndpoints; + this.ReadEndpoints = other.ReadEndpoints; + } + + public ReadOnlyCollection PreferredLocations { get; set; } + public ReadOnlyCollection AvailableWriteLocations { get; set; } + public ReadOnlyCollection AvailableReadLocations { get; set; } + public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } + public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } + public ReadOnlyCollection WriteEndpoints { get; set; } + public ReadOnlyCollection ReadEndpoints { get; set; } + } + + [Flags] + private enum OperationType + { + None = 0x0, + Read = 0x1, + Write = 0x2 + } + } +} From 9d61eae0859e264d99e317389d1e129033135679 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Thu, 7 Sep 2023 12:04:14 -0700 Subject: [PATCH 06/31] Code changes to clean up the PPAF retry logic fix. --- .../src/ClientRetryPolicy.cs | 18 ++---------------- .../src/Routing/LocationCache.cs | 2 +- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2933baa1a9..5bc91aebd8 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -247,8 +247,7 @@ private async Task ShouldRetryInternalAsync( } // Received 503 due to client connect timeout or Gateway - if (statusCode == HttpStatusCode.ServiceUnavailable - && ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode)) + if (statusCode == HttpStatusCode.ServiceUnavailable) { DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}", this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty, @@ -265,12 +264,6 @@ private async Task ShouldRetryInternalAsync( return null; } - private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode) - { - return subStatusCode == SubStatusCodes.Unknown || - (subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus()); - } - private async Task ShouldRetryOnEndpointFailureAsync( bool isReadRequest, bool markBothReadAndWriteAsUnavailable, @@ -390,7 +383,7 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable() /// /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. - /// We try and retry the request only if there are other regions available. + /// We try and retry the request only if there are other regions available. The retry logic is applicable for single master write accounts as well. /// private ShouldRetryResult ShouldRetryOnServiceUnavailable() { @@ -400,13 +393,6 @@ private ShouldRetryResult ShouldRetryOnServiceUnavailable() return ShouldRetryResult.NoRetry(); } - if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available - return ShouldRetryResult.NoRetry(); - } - int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; if (availablePreferredLocations <= 1) diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 9c6308d8b6..69fc57ecc4 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -276,7 +276,7 @@ public Uri ResolveServiceEndpoint(DocumentServiceRequest request) } else { - ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; + ReadOnlyCollection endpoints = this.ReadEndpoints; locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; } From d05bc19f86fe601e409fef62880cbe4225cce58f Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Fri, 8 Sep 2023 12:06:55 -0700 Subject: [PATCH 07/31] Code changes to revert location cache changes. --- Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs | 2 +- Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 2c07f060f8..ee67522d6b 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - internal bool EnablePartitionLevelFailover { get; set; } = false; + public bool EnablePartitionLevelFailover { get; set; } = false; /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 69fc57ecc4..9c6308d8b6 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -276,7 +276,7 @@ public Uri ResolveServiceEndpoint(DocumentServiceRequest request) } else { - ReadOnlyCollection endpoints = this.ReadEndpoints; + ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; } From cea2b1715849d2e4213411c9dd1b0f23696d3ba9 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Fri, 8 Sep 2023 12:10:05 -0700 Subject: [PATCH 08/31] Code changes ro revert location cache changes. --- Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 69fc57ecc4..9c6308d8b6 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -276,7 +276,7 @@ public Uri ResolveServiceEndpoint(DocumentServiceRequest request) } else { - ReadOnlyCollection endpoints = this.ReadEndpoints; + ReadOnlyCollection endpoints = request.OperationType.IsWriteOperation() ? this.WriteEndpoints : this.ReadEndpoints; locationEndpointToRoute = endpoints[locationIndex % endpoints.Count]; } From e23c50cdd31219c22a36cdbd3049be97d5219edf Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Fri, 15 Sep 2023 15:57:26 -0700 Subject: [PATCH 09/31] Code changes to fix some of the failing tests. --- .../src/CosmosClientOptions.cs | 2 +- Microsoft.Azure.Cosmos/src/DocumentClient.cs | 2 +- .../src/Util/ConfigurationManager.cs | 27 +++++++++++++++- .../LocationCacheTests.cs | 32 +++++++++---------- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index ee67522d6b..2c07f060f8 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - public bool EnablePartitionLevelFailover { get; set; } = false; + internal bool EnablePartitionLevelFailover { get; set; } = false; /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. diff --git a/Microsoft.Azure.Cosmos/src/DocumentClient.cs b/Microsoft.Azure.Cosmos/src/DocumentClient.cs index 9dee309028..ba86ad8b26 100644 --- a/Microsoft.Azure.Cosmos/src/DocumentClient.cs +++ b/Microsoft.Azure.Cosmos/src/DocumentClient.cs @@ -925,7 +925,7 @@ internal virtual void Initialize(Uri serviceEndpoint, #endif this.GlobalEndpointManager = new GlobalEndpointManager(this, this.ConnectionPolicy); - this.PartitionKeyRangeLocation = this.ConnectionPolicy.EnablePartitionLevelFailover + this.PartitionKeyRangeLocation = ConfigurationManager.IsPartitionLevelFailoverEnabled(this.ConnectionPolicy) ? new GlobalPartitionEndpointManagerCore(this.GlobalEndpointManager) : GlobalPartitionEndpointManagerNoOp.Instance; diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index 748f81833a..f82d03d532 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -10,11 +10,18 @@ internal static class ConfigurationManager { /// /// A read-only string containing the environment variablename for enabling replica validation. - /// This will eventually be removed oncereplica valdiatin is enabled by default for both preview + /// This will eventually be removed once replica valdiatin is enabled by default for both preview /// and GA. /// internal static readonly string ReplicaConnectivityValidationEnabled = "AZURE_COSMOS_REPLICA_VALIDATION_ENABLED"; + /// + /// A read-only string containing the environment variablename for enabling per partition automatic failover. + /// This will eventually be removed once per partition automatic failover is enabled by default for both preview + /// and GA. + /// + internal static readonly string PartitionLevelFailoverEnabled = "AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED"; + public static T GetEnvironmentVariable(string variable, T defaultValue) { string value = Environment.GetEnvironmentVariable(variable); @@ -50,5 +57,23 @@ public static bool IsReplicaAddressValidationEnabled( variable: ConfigurationManager.ReplicaConnectivityValidationEnabled, defaultValue: replicaValidationDefaultValue); } + + /// + /// Gets the boolean value of the partition level failover environment variable. Note that, partition level failover + /// is disabled by default for both preview and GA releases. The user can set the respective environment variable + /// 'AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED' to override the value for both preview and GA. The method will + /// eventually be removed, once partition level failover is enabled by default for both preview and GA. + /// + /// An instance of containing the client options. + /// A boolean flag indicating if partition level failover is enabled. + public static bool IsPartitionLevelFailoverEnabled( + ConnectionPolicy connectionPolicy) + { + return connectionPolicy != null + && ConfigurationManager + .GetEnvironmentVariable( + variable: ConfigurationManager.PartitionLevelFailoverEnabled, + defaultValue: connectionPolicy.EnablePartitionLevelFailover); + } } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index 8273897374..efcd174d6d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -676,22 +676,22 @@ await BackoffRetryUtility.ExecuteAsync( } [DataTestMethod] - [DataRow(true, false, false, false, false, DisplayName = "Read request - Single master - no preferred locations - should NOT retry")] - [DataRow(false, false, false, false, false, DisplayName = "Write request - Single master - no preferred locations - should NOT retry")] - [DataRow(true, true, false, false, false, DisplayName = "Read request - Multi master - no preferred locations - should NOT retry")] - [DataRow(false, true, false, false, false, DisplayName = "Write request - Multi master - no preferred locations - should NOT retry")] - [DataRow(true, false, true, true, false, DisplayName = "Read request - Single master - with preferred locations - should retry")] - [DataRow(false, false, true, false, false, DisplayName = "Write request - Single master - with preferred locations - should NOT retry")] - [DataRow(true, true, true, true, false, DisplayName = "Read request - Multi master - with preferred locations - should retry")] - [DataRow(false, true, true, true, false, DisplayName = "Write request - Multi master - with preferred locations - should retry")] - [DataRow(true, false, false, false, true, DisplayName = "Read request - Single master - no preferred locations - should NOT retry")] - [DataRow(false, false, false, false, true, DisplayName = "Write request - Single master - no preferred locations - should NOT retry")] - [DataRow(true, true, false, false, true, DisplayName = "Read request - Multi master - no preferred locations - should NOT retry")] - [DataRow(false, true, false, false, true, DisplayName = "Write request - Multi master - no preferred locations - should NOT retry")] - [DataRow(true, false, true, true, true, DisplayName = "Read request - Single master - with preferred locations - should retry")] - [DataRow(false, false, true, false, true, DisplayName = "Write request - Single master - with preferred locations - should NOT retry")] - [DataRow(true, true, true, true, true, DisplayName = "Read request - Multi master - with preferred locations - should retry")] - [DataRow(false, true, true, true, true, DisplayName = "Write request - Multi master - with preferred locations - should retry")] + [DataRow(true, false, false, false, false, DisplayName = "Read request - Single master - no preferred locations - without partition level failover - should NOT retry")] + [DataRow(false, false, false, false, false, DisplayName = "Write request - Single master - no preferred locations - without partition level failover - should NOT retry")] + [DataRow(true, true, false, false, false, DisplayName = "Read request - Multi master - no preferred locations - without partition level failover - should NOT retry")] + [DataRow(false, true, false, false, false, DisplayName = "Write request - Multi master - no preferred locations - without partition level failover - should NOT retry")] + [DataRow(true, false, true, true, false, DisplayName = "Read request - Single master - with preferred locations - without partition level failover - should retry")] + [DataRow(false, false, true, true, false, DisplayName = "Write request - Single master - with preferred locations - without partition level failover - should retry")] + [DataRow(true, true, true, true, false, DisplayName = "Read request - Multi master - with preferred locations - without partition level failover - should retry")] + [DataRow(false, true, true, true, false, DisplayName = "Write request - Multi master - with preferred locations - without partition level failover - should retry")] + [DataRow(true, false, false, false, true, DisplayName = "Read request - Single master - no preferred locations - with partition level failover - should NOT retry")] + [DataRow(false, false, false, false, true, DisplayName = "Write request - Single master - no preferred locations - with partition level failover - should NOT retry")] + [DataRow(true, true, false, false, true, DisplayName = "Read request - Multi master - no preferred locations - with partition level failover - should NOT retry")] + [DataRow(false, true, false, false, true, DisplayName = "Write request - Multi master - no preferred locations - with partition level failover - should NOT retry")] + [DataRow(true, false, true, true, true, DisplayName = "Read request - Single master - with preferred locations - with partition level failover - should retry")] + [DataRow(false, false, true, true, true, DisplayName = "Write request - Single master - with preferred locations - with partition level failover - should retry")] + [DataRow(true, true, true, true, true, DisplayName = "Read request - Multi master - with preferred locations - with partition level failover - should retry")] + [DataRow(false, true, true, true, true, DisplayName = "Write request - Multi master - with preferred locations - with partition level failover - should retry")] public async Task ClientRetryPolicy_ValidateRetryOnServiceUnavailable( bool isReadRequest, bool useMultipleWriteLocations, From 12d1193963cea3590c53b4dacd4b18baa9bbaf99 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 18 Sep 2023 16:40:39 -0700 Subject: [PATCH 10/31] Code changes to fix unit tests. --- .../GlobalPartitionEndpointManagerTests.cs | 85 ++++++++++++++++--- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs index a075d439f6..bac068518c 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs @@ -108,7 +108,7 @@ public async Task TestWriteForbiddenScenarioAsync() [TestMethod] [Timeout(10000)] - public async Task TestServiceUnavailableExceptionScenarioAsync() + public async Task CreateItemAsync_WithPreferredRegionsAndServiceUnavailable_ShouldRetryAndSucceed() { GlobalPartitionEndpointManagerTests.SetupAccountAndCacheOperations( out string secondaryRegionNameForUri, @@ -168,16 +168,7 @@ public async Task TestServiceUnavailableExceptionScenarioAsync() Pk = "TestPk" }; - // First create will fail because it is not certain if the payload was sent or not. - try - { - await container.CreateItemAsync(toDoActivity, new Cosmos.PartitionKey(toDoActivity.Pk)); - Assert.Fail("Should throw an exception"); - } - catch (CosmosException ce) when (ce.StatusCode == HttpStatusCode.ServiceUnavailable) - { - Assert.IsNotNull(ce); - } + await container.CreateItemAsync(toDoActivity, new Cosmos.PartitionKey(toDoActivity.Pk)); ItemResponse response = await container.CreateItemAsync(toDoActivity, new Cosmos.PartitionKey(toDoActivity.Pk)); Assert.AreEqual(HttpStatusCode.Created, response.StatusCode); @@ -203,6 +194,78 @@ public async Task TestServiceUnavailableExceptionScenarioAsync() Assert.AreEqual(HttpStatusCode.Created, response.StatusCode); } + + [TestMethod] + [Timeout(10000)] + public async Task CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_ShouldThrowServiceUnavailableException() + { + GlobalPartitionEndpointManagerTests.SetupAccountAndCacheOperations( + out string secondaryRegionNameForUri, + out string globalEndpoint, + out string secondaryRegionEndpiont, + out string databaseName, + out string containerName, + out ResourceId containerResourceId, + out Mock mockHttpHandler, + out IReadOnlyList primaryRegionPartitionKeyRangeIds, + out TransportAddressUri primaryRegionprimaryReplicaUri); + + Mock mockTransport = new Mock(MockBehavior.Strict); + + MockSetupsHelper.SetupServiceUnavailableException( + mockTransport, + primaryRegionprimaryReplicaUri); + + mockTransport.Setup(x => x.Dispose()); + + // Partition key ranges are the same in both regions so the SDK + // does not need to go the secondary to get the partition key ranges. + // Only the addresses need to be mocked on the secondary + MockSetupsHelper.SetupAddresses( + mockHttpHandler: mockHttpHandler, + partitionKeyRangeId: primaryRegionPartitionKeyRangeIds.First(), + regionEndpoint: secondaryRegionEndpiont, + regionName: secondaryRegionNameForUri, + containerResourceId: containerResourceId, + primaryReplicaUri: out TransportAddressUri secondaryRegionPrimaryReplicaUri); + + MockSetupsHelper.SetupCreateItemResponse( + mockTransport, + secondaryRegionPrimaryReplicaUri); + + CosmosClientOptions cosmosClientOptions = new CosmosClientOptions() + { + EnablePartitionLevelFailover = true, + ConsistencyLevel = Cosmos.ConsistencyLevel.Strong, + HttpClientFactory = () => new HttpClient(new HttpHandlerHelper(mockHttpHandler.Object)), + TransportClientHandlerFactory = (original) => mockTransport.Object, + }; + + using CosmosClient customClient = new CosmosClient( + globalEndpoint, + Convert.ToBase64String(Encoding.UTF8.GetBytes(Guid.NewGuid().ToString())), + cosmosClientOptions); + + Container container = customClient.GetContainer(databaseName, containerName); + + ToDoActivity toDoActivity = new ToDoActivity() + { + Id = "TestItem", + Pk = "TestPk" + }; + + // First create will fail because it is not certain if the payload was sent or not. + try + { + await container.CreateItemAsync(toDoActivity, new Cosmos.PartitionKey(toDoActivity.Pk)); + Assert.Fail("Should throw an exception"); + } + catch (CosmosException ce) when (ce.StatusCode == HttpStatusCode.ServiceUnavailable) + { + Assert.IsNotNull(ce); + } + } + [TestMethod] [Timeout(10000)] public async Task TestRequestTimeoutExceptionScenarioAsync() From 757ca0146aa5c6884612b668c5c99275087a37d3 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Tue, 19 Sep 2023 14:54:17 -0700 Subject: [PATCH 11/31] Code changes to add unit tests for client options. --- .../src/CosmosClientOptions.cs | 12 +- Microsoft.Azure.Cosmos/src/DocumentClient.cs | 2 +- .../src/Util/ConfigurationManager.cs | 9 +- .../CosmosClientOptionsUnitTests.cs | 141 +++++++++++++++++- 4 files changed, 152 insertions(+), 12 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 2c07f060f8..f8a48e70f3 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -608,7 +608,7 @@ public Func HttpClientFactory /// /// Enable partition key level failover /// - internal bool EnablePartitionLevelFailover { get; set; } = false; + internal bool EnablePartitionLevelFailover { get; set; } = ConfigurationManager.IsPartitionLevelFailoverEnabled(defaultValue: false); /// /// Quorum Read allowed with eventual consistency account or consistent prefix account. @@ -752,6 +752,7 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId) { this.ValidateDirectTCPSettings(); this.ValidateLimitToEndpointSettings(); + this.ValidatePartitionLevelFailoverSettings(); ConnectionPolicy connectionPolicy = new ConnectionPolicy() { @@ -884,6 +885,15 @@ private void ValidateLimitToEndpointSettings() } } + private void ValidatePartitionLevelFailoverSettings() + { + if (this.EnablePartitionLevelFailover + && (this.ApplicationPreferredRegions == null || this.ApplicationPreferredRegions.Count == 0)) + { + throw new ArgumentException($"{nameof(this.ApplicationPreferredRegions)} is required when {nameof(this.EnablePartitionLevelFailover)} is enabled."); + } + } + private void ValidateDirectTCPSettings() { string settingName = string.Empty; diff --git a/Microsoft.Azure.Cosmos/src/DocumentClient.cs b/Microsoft.Azure.Cosmos/src/DocumentClient.cs index ba86ad8b26..9dee309028 100644 --- a/Microsoft.Azure.Cosmos/src/DocumentClient.cs +++ b/Microsoft.Azure.Cosmos/src/DocumentClient.cs @@ -925,7 +925,7 @@ internal virtual void Initialize(Uri serviceEndpoint, #endif this.GlobalEndpointManager = new GlobalEndpointManager(this, this.ConnectionPolicy); - this.PartitionKeyRangeLocation = ConfigurationManager.IsPartitionLevelFailoverEnabled(this.ConnectionPolicy) + this.PartitionKeyRangeLocation = this.ConnectionPolicy.EnablePartitionLevelFailover ? new GlobalPartitionEndpointManagerCore(this.GlobalEndpointManager) : GlobalPartitionEndpointManagerNoOp.Instance; diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index f82d03d532..71943c7731 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -64,16 +64,15 @@ public static bool IsReplicaAddressValidationEnabled( /// 'AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED' to override the value for both preview and GA. The method will /// eventually be removed, once partition level failover is enabled by default for both preview and GA. /// - /// An instance of containing the client options. + /// A boolean field containing the default value for partition level failover. /// A boolean flag indicating if partition level failover is enabled. public static bool IsPartitionLevelFailoverEnabled( - ConnectionPolicy connectionPolicy) + bool defaultValue) { - return connectionPolicy != null - && ConfigurationManager + return ConfigurationManager .GetEnvironmentVariable( variable: ConfigurationManager.PartitionLevelFailoverEnabled, - defaultValue: connectionPolicy.EnablePartitionLevelFailover); + defaultValue: defaultValue); } } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index 37d89c389a..98866edc1e 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -10,7 +10,6 @@ namespace Microsoft.Azure.Cosmos.Tests using System.Linq; using System.Net; using System.Net.Http; - using Cosmos.Telemetry; using global::Azure.Core; using Microsoft.Azure.Cosmos.Fluent; using Microsoft.Azure.Documents; @@ -109,8 +108,7 @@ public void VerifyCosmosConfigurationPropertiesGetUpdated() .WithThrottlingRetryOptions(maxRetryWaitTime, maxRetryAttemptsOnThrottledRequests) .WithBulkExecution(true) .WithSerializerOptions(cosmosSerializerOptions) - .WithConsistencyLevel(consistencyLevel) - .WithPartitionLevelFailoverEnabled(); + .WithConsistencyLevel(consistencyLevel); cosmosClient = cosmosClientBuilder.Build(new MockDocumentClient()); clientOptions = cosmosClient.ClientOptions; @@ -133,7 +131,7 @@ public void VerifyCosmosConfigurationPropertiesGetUpdated() Assert.IsTrue(object.ReferenceEquals(webProxy, clientOptions.WebProxy)); Assert.IsTrue(clientOptions.AllowBulkExecution); Assert.AreEqual(consistencyLevel, clientOptions.ConsistencyLevel); - Assert.IsTrue(clientOptions.EnablePartitionLevelFailover); + Assert.IsFalse(clientOptions.EnablePartitionLevelFailover); Assert.IsTrue(clientOptions.EnableAdvancedReplicaSelectionForTcp.HasValue && clientOptions.EnableAdvancedReplicaSelectionForTcp.Value); //Verify GetConnectionPolicy returns the correct values @@ -148,7 +146,7 @@ public void VerifyCosmosConfigurationPropertiesGetUpdated() Assert.AreEqual(maxRetryAttemptsOnThrottledRequests, policy.RetryOptions.MaxRetryAttemptsOnThrottledRequests); Assert.AreEqual((int)maxRetryWaitTime.TotalSeconds, policy.RetryOptions.MaxRetryWaitTimeInSeconds); Assert.AreEqual((Documents.ConsistencyLevel)consistencyLevel, clientOptions.GetDocumentsConsistencyLevel()); - Assert.IsTrue(policy.EnablePartitionLevelFailover); + Assert.IsFalse(policy.EnablePartitionLevelFailover); Assert.IsTrue(clientOptions.EnableAdvancedReplicaSelectionForTcp.Value); IReadOnlyList preferredLocations = new List() { Regions.AustraliaCentral, Regions.AustraliaCentral2 }; @@ -193,6 +191,139 @@ public void VerifyCosmosConfigurationPropertiesGetUpdated() CollectionAssert.AreEqual(preferredLocations.ToArray(), policy.PreferredLocations.ToArray()); } + [TestMethod] + [DataRow(true, DisplayName = "Validate that when enevironment variable is used to enable PPAF, the outcome of the test should be same.")] + [DataRow(false, DisplayName = "Validate that when CosmosClientOptions is used to enable PPAF, the outcome of the test should be same.")] + [Owner("dkunda")] + public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredRegionsNotSet_ShouldThrowArgumentException(bool useEnvironmentVariable) + { + try + { + if (useEnvironmentVariable) + { + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelFailoverEnabled, "True"); + } + + string endpoint = AccountEndpoint; + string key = MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey; + TimeSpan requestTimeout = TimeSpan.FromDays(1); + string userAgentSuffix = "testSuffix"; + RequestHandler preProcessHandler = new TestHandler(); + ApiType apiType = ApiType.Sql; + int maxRetryAttemptsOnThrottledRequests = 9999; + TimeSpan maxRetryWaitTime = TimeSpan.FromHours(6); + CosmosSerializationOptions cosmosSerializerOptions = new CosmosSerializationOptions() + { + IgnoreNullValues = true, + PropertyNamingPolicy = CosmosPropertyNamingPolicy.CamelCase, + }; + + Cosmos.ConsistencyLevel consistencyLevel = Cosmos.ConsistencyLevel.ConsistentPrefix; + CosmosClientBuilder cosmosClientBuilder = new( + accountEndpoint: endpoint, + authKeyOrResourceToken: key); + + cosmosClientBuilder + .WithConnectionModeDirect() + .WithRequestTimeout(requestTimeout) + .WithApplicationName(userAgentSuffix) + .AddCustomHandlers(preProcessHandler) + .WithApiType(apiType) + .WithThrottlingRetryOptions(maxRetryWaitTime, maxRetryAttemptsOnThrottledRequests) + .WithSerializerOptions(cosmosSerializerOptions) + .WithConsistencyLevel(consistencyLevel); + + if (!useEnvironmentVariable) + { + cosmosClientBuilder + .WithPartitionLevelFailoverEnabled(); + } + + ArgumentException exception = Assert.ThrowsException(() => cosmosClientBuilder.Build()); + + Assert.AreEqual( + expected: "ApplicationPreferredRegions is required when EnablePartitionLevelFailover is enabled.", + actual: exception.Message); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelFailoverEnabled, null); + } + } + + [TestMethod] + [DataRow(true, DisplayName = "Validate that when enevironment variable is used to enable PPAF, the outcome of the test should be same.")] + [DataRow(false, DisplayName = "Validate that when CosmosClientOptions is used to enable PPAF, the outcome of the test should be same.")] + [Owner("dkunda")] + public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredRegionsSet_ShouldInitializeSuccessfully(bool useEnvironmentVariable) + { + try + { + if (useEnvironmentVariable) + { + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelFailoverEnabled, "True"); + } + + string endpoint = AccountEndpoint; + string key = MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey; + TimeSpan requestTimeout = TimeSpan.FromDays(1); + string userAgentSuffix = "testSuffix"; + RequestHandler preProcessHandler = new TestHandler(); + ApiType apiType = ApiType.Sql; + int maxRetryAttemptsOnThrottledRequests = 9999; + TimeSpan maxRetryWaitTime = TimeSpan.FromHours(6); + CosmosSerializationOptions cosmosSerializerOptions = new CosmosSerializationOptions() + { + IgnoreNullValues = true, + PropertyNamingPolicy = CosmosPropertyNamingPolicy.CamelCase, + }; + + Cosmos.ConsistencyLevel consistencyLevel = Cosmos.ConsistencyLevel.ConsistentPrefix; + CosmosClientBuilder cosmosClientBuilder = new( + accountEndpoint: endpoint, + authKeyOrResourceToken: key); + + cosmosClientBuilder + .WithConnectionModeDirect() + .WithRequestTimeout(requestTimeout) + .WithApplicationName(userAgentSuffix) + .AddCustomHandlers(preProcessHandler) + .WithApiType(apiType) + .WithThrottlingRetryOptions(maxRetryWaitTime, maxRetryAttemptsOnThrottledRequests) + .WithSerializerOptions(cosmosSerializerOptions) + .WithConsistencyLevel(consistencyLevel) + .WithPartitionLevelFailoverEnabled() + .WithApplicationPreferredRegions( + new List() + { + Regions.NorthCentralUS, + Regions.WestUS, + Regions.EastAsia, + }); + + CosmosClientOptions clientOptions = cosmosClientBuilder.Build().ClientOptions; + + Assert.AreEqual(ConnectionMode.Direct, clientOptions.ConnectionMode); + Assert.AreEqual(requestTimeout, clientOptions.RequestTimeout); + Assert.AreEqual(userAgentSuffix, clientOptions.ApplicationName); + Assert.AreEqual(preProcessHandler, clientOptions.CustomHandlers[0]); + Assert.AreEqual(apiType, clientOptions.ApiType); + Assert.AreEqual(maxRetryAttemptsOnThrottledRequests, clientOptions.MaxRetryAttemptsOnRateLimitedRequests); + Assert.AreEqual(maxRetryWaitTime, clientOptions.MaxRetryWaitTimeOnRateLimitedRequests); + Assert.AreEqual(cosmosSerializerOptions.IgnoreNullValues, clientOptions.SerializerOptions.IgnoreNullValues); + Assert.AreEqual(cosmosSerializerOptions.PropertyNamingPolicy, clientOptions.SerializerOptions.PropertyNamingPolicy); + Assert.AreEqual(cosmosSerializerOptions.Indented, clientOptions.SerializerOptions.Indented); + Assert.IsFalse(clientOptions.AllowBulkExecution); + Assert.AreEqual(consistencyLevel, clientOptions.ConsistencyLevel); + Assert.IsTrue(clientOptions.EnablePartitionLevelFailover); + Assert.IsNotNull(clientOptions.ApplicationPreferredRegions); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelFailoverEnabled, null); + } + } + [TestMethod] public void VerifyConsisentencyLevels() { From 20b3d21cf971fcc165927f94030c6108e2fe8078 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Tue, 19 Sep 2023 17:06:38 -0700 Subject: [PATCH 12/31] Code changes to draft docs for PPAF design approach. --- docs/PerPartitionAutomaticFailoverDesign.md | 48 +++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 docs/PerPartitionAutomaticFailoverDesign.md diff --git a/docs/PerPartitionAutomaticFailoverDesign.md b/docs/PerPartitionAutomaticFailoverDesign.md new file mode 100644 index 0000000000..65d1cb0ffa --- /dev/null +++ b/docs/PerPartitionAutomaticFailoverDesign.md @@ -0,0 +1,48 @@ +# Working Principles for Per Partition Automatic Failover + +## Table of Contents + +* [Scope.](#scope) +* [Backgraound.](#backgraound) +* [Proposed Solution.](#proposed-solution) +* [Design Approach.](#design-approach) + * [Outline.](#outline) + * [Updated Sequence Diagram for `CosmosClient` initialization.](#updated-sequence-diagram-for-cosmosclient-initialization) + * [Sequence Diagram when `StoreReader` invokes the `GatewayAddressCache` to resolve addresses and leverages `AddressEnumerator` to enumerate the transport addresses.](#sequence-diagram-when-storereader-invokes-the-gatewayaddresscache-to-resolve-addresses-and-leverages-addressenumerator-to-enumerate-the-transport-addresses) + * [State Diagram to Understand the `TransportAddressUri` Health State Transformations.](#state-diagram-to-understand-the-transportaddressuri-health-state-transformations) + * [`Microsoft.Azure.Cosmos.Direct` package class diagrams.](#azurecosmosdirect-package-class-diagrams) + * [`Microsoft.Azure.Cosmos` package class diagrams.](#microsoftazurecosmos-package-class-diagrams) +* [Pull Request with Sample Code Changes.](#pull-request-with-sample-code-changes) +* [References.](#references) + +## Scope + +The scope of the per partition automatic failover is applicable for the `CosmosClient` configured for both `Gateway` and `Direct` mode. + +## Backgraund + +During an upgrade scenario in the backend replica nodes, there has been an observation of increased request latency. One of the primary reason for the latency is that, during an upgrade, a replica which is still undergoing upgrade may still be returned back to SDK, when an address refresh occurres. As of today, the incoming request will have `25%` chance to hit the replica that not ready yet, therefore causing the `ConnectionTimeoutException`, which contributes to the increased latency. + +To understand the problem statement better, please take a look at the below sequence diagram which reflects the connection timeouts caused by the replica upgrade. + +## Design Approach + +Today, the partition level failover is applicable for multi-master write accounts. In order to enable the partition level failover for single master write accounts, below changes are required to be made: + +- In the `ClientRetryPolicy.ShouldRetryOnServiceUnavailable()`, enable the retry for Single Master write accounts. This is done by removing the below piece of code: + + ``` + if (!this.canUseMultipleWriteLocations + && !this.isReadRequest) + { + // Write requests on single master cannot be retried, no other regions available. + return ShouldRetryResult.NoRetry(); + } + ``` + + +- Today, when a call to get the collection for a specific region fails in the Gateway, the Gateway returns a `Service Unavailable - 503` Status, with a Sub Status code `9002`. Per the current behavior, our .NET SDK doesn't retry for `503.9002`, and it only does so for `503.Unknown` code. Therefore the SDK was not retrying initially. In order to resolve this, delete the `ClientRetryPolicy.IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode)` method completely and with this in place, the SDK should retry on any service unavailable by default and it will not depend upon the sub-status codes to make the retry decision. + +## How Does the SDK knows Which Region to Fail Over + +Right now, the .NET SDK depends upon the `GlobalPartitionEndpointManagerCore` to resolve the endpoints. There is a method `TryMarkEndpointUnavailableForPartitionKeyRange()` within the class, that is responsible to add the override by iterating over the next read regions. This is how the .NET SDK knows which region to fail over. \ No newline at end of file From 5f862601ccd842e4e1da8d722ff2b1f356365389 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Tue, 26 Sep 2023 16:03:06 -0700 Subject: [PATCH 13/31] Code changes to add SDK side design docs for PPAF. --- docs/PerPartitionAutomaticFailoverDesign.md | 31 +++++++++++---------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/docs/PerPartitionAutomaticFailoverDesign.md b/docs/PerPartitionAutomaticFailoverDesign.md index 65d1cb0ffa..1979241e6d 100644 --- a/docs/PerPartitionAutomaticFailoverDesign.md +++ b/docs/PerPartitionAutomaticFailoverDesign.md @@ -4,30 +4,23 @@ * [Scope.](#scope) * [Backgraound.](#backgraound) -* [Proposed Solution.](#proposed-solution) * [Design Approach.](#design-approach) - * [Outline.](#outline) - * [Updated Sequence Diagram for `CosmosClient` initialization.](#updated-sequence-diagram-for-cosmosclient-initialization) - * [Sequence Diagram when `StoreReader` invokes the `GatewayAddressCache` to resolve addresses and leverages `AddressEnumerator` to enumerate the transport addresses.](#sequence-diagram-when-storereader-invokes-the-gatewayaddresscache-to-resolve-addresses-and-leverages-addressenumerator-to-enumerate-the-transport-addresses) - * [State Diagram to Understand the `TransportAddressUri` Health State Transformations.](#state-diagram-to-understand-the-transportaddressuri-health-state-transformations) - * [`Microsoft.Azure.Cosmos.Direct` package class diagrams.](#azurecosmosdirect-package-class-diagrams) - * [`Microsoft.Azure.Cosmos` package class diagrams.](#microsoftazurecosmos-package-class-diagrams) -* [Pull Request with Sample Code Changes.](#pull-request-with-sample-code-changes) +* [How Does the SDK know Which Region to Fail Over.](#how-does-the-sdk-know-which-region-to-fail-over) * [References.](#references) ## Scope -The scope of the per partition automatic failover is applicable for the `CosmosClient` configured for both `Gateway` and `Direct` mode. +The scope of the per partition automatic failover design document is applicable for the Cosmos .NET SDK configured for both `Gateway` and `Direct` mode. ## Backgraund -During an upgrade scenario in the backend replica nodes, there has been an observation of increased request latency. One of the primary reason for the latency is that, during an upgrade, a replica which is still undergoing upgrade may still be returned back to SDK, when an address refresh occurres. As of today, the incoming request will have `25%` chance to hit the replica that not ready yet, therefore causing the `ConnectionTimeoutException`, which contributes to the increased latency. +Today, the partition level failovers are applicable for multi master write acounts, for a simple reason. If one of the write region fails with a write forbidden 403 exception, then the SDK has the knowledge (by looking up the `ApplicationPreferredRegions`) of the other regions to failover. With the per partition automatic failover, if a partition is in quorum loss, then the backend automatically marks another region as the write region, based on the account configuration. Therefore, any retry for the write requests, to the next preferred region should be successful. -To understand the problem statement better, please take a look at the below sequence diagram which reflects the connection timeouts caused by the replica upgrade. +This idea extends the SDK's retry logic to retry the write requests for single master write accounts, for any service unavailable (status codes 503) errors. ## Design Approach -Today, the partition level failover is applicable for multi-master write accounts. In order to enable the partition level failover for single master write accounts, below changes are required to be made: +Today, the partition level failover is applicable only for the multi-master write accounts. In order to enable the partition level failover for single master write accounts, below changes are required to be made: - In the `ClientRetryPolicy.ShouldRetryOnServiceUnavailable()`, enable the retry for Single Master write accounts. This is done by removing the below piece of code: @@ -38,11 +31,19 @@ Today, the partition level failover is applicable for multi-master write account // Write requests on single master cannot be retried, no other regions available. return ShouldRetryResult.NoRetry(); } - ``` + ``` - Today, when a call to get the collection for a specific region fails in the Gateway, the Gateway returns a `Service Unavailable - 503` Status, with a Sub Status code `9002`. Per the current behavior, our .NET SDK doesn't retry for `503.9002`, and it only does so for `503.Unknown` code. Therefore the SDK was not retrying initially. In order to resolve this, delete the `ClientRetryPolicy.IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode)` method completely and with this in place, the SDK should retry on any service unavailable by default and it will not depend upon the sub-status codes to make the retry decision. -## How Does the SDK knows Which Region to Fail Over +- For the customers to enable the partition level failover, we have agreed to make the `ApplicationPreferredRegions` as a mandatory parameter the `CosmosClientOptions`. Therefore, if the partition level failover is enabled, and the `ApplicationPreferredRegions` list is not provided, an `ArgumentException` will be thrown. This will be a change in the behavior. + +## How Does the SDK know Which Region to Fail Over + +Right now, the .NET SDK depends upon the `GlobalPartitionEndpointManagerCore` to resolve the endpoints. There is a method `TryMarkEndpointUnavailableForPartitionKeyRange()` within the class, that is responsible to add the override by iterating over the next read regions. This is how the .NET SDK knows which region to fail over. + +## References -Right now, the .NET SDK depends upon the `GlobalPartitionEndpointManagerCore` to resolve the endpoints. There is a method `TryMarkEndpointUnavailableForPartitionKeyRange()` within the class, that is responsible to add the override by iterating over the next read regions. This is how the .NET SDK knows which region to fail over. \ No newline at end of file +- [SDK not retrying with next region in case address resolution call to Gateway call fails with 503.](https://msdata.visualstudio.com/CosmosDB/_workitems/edit/2475521/) +- [First client write request after failover is failing with 503(21005)](https://msdata.visualstudio.com/CosmosDB/_workitems/edit/2492475/) +- [PPAF Testing in Test, Staging and Prod.](https://microsoft.sharepoint.com/:w:/r/teams/DocumentDB/_layouts/15/doc2.aspx?sourcedoc=%7B7587D267-212F-47BE-AAD6-18FC53482B68%7D&file=PPAF%20Testing%20in%20Test%2C%20Staging%20and%20Prod.docx&action=default&mobileredirect=true) \ No newline at end of file From f740c9d089ffc3c81f3313e978d305b20eeef75e Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Tue, 26 Sep 2023 16:13:06 -0700 Subject: [PATCH 14/31] Code changes to modify the PPAF design. --- docs/PerPartitionAutomaticFailoverDesign.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/PerPartitionAutomaticFailoverDesign.md b/docs/PerPartitionAutomaticFailoverDesign.md index 1979241e6d..a216f9ab82 100644 --- a/docs/PerPartitionAutomaticFailoverDesign.md +++ b/docs/PerPartitionAutomaticFailoverDesign.md @@ -36,6 +36,8 @@ Today, the partition level failover is applicable only for the multi-master writ - Today, when a call to get the collection for a specific region fails in the Gateway, the Gateway returns a `Service Unavailable - 503` Status, with a Sub Status code `9002`. Per the current behavior, our .NET SDK doesn't retry for `503.9002`, and it only does so for `503.Unknown` code. Therefore the SDK was not retrying initially. In order to resolve this, delete the `ClientRetryPolicy.IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode)` method completely and with this in place, the SDK should retry on any service unavailable by default and it will not depend upon the sub-status codes to make the retry decision. +- Currently, there is an option `EnablePartitionLevelFailover` in the `CosmosClientOptions` to enable or disable the per partition automatic failover. However this option is not `public` yet. The approach we would like to take is to develop this feature behind a feature flag called `AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED`. By setting this feature flag, the external customers can enable of disable the partition level failover. + - For the customers to enable the partition level failover, we have agreed to make the `ApplicationPreferredRegions` as a mandatory parameter the `CosmosClientOptions`. Therefore, if the partition level failover is enabled, and the `ApplicationPreferredRegions` list is not provided, an `ArgumentException` will be thrown. This will be a change in the behavior. ## How Does the SDK know Which Region to Fail Over From 237acc02ea6b5a0d652f4cb4038a4f7b2dc45b54 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 27 Sep 2023 12:33:47 -0700 Subject: [PATCH 15/31] Code changes to fix unit test. --- .../GlobalPartitionEndpointManagerTests.cs | 28 ++++--------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs index bac068518c..d8e5fd2d53 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs @@ -196,8 +196,7 @@ public async Task CreateItemAsync_WithPreferredRegionsAndServiceUnavailable_Shou [TestMethod] - [Timeout(10000)] - public async Task CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_ShouldThrowServiceUnavailableException() + public void CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_ShouldThrowArgumentException() { GlobalPartitionEndpointManagerTests.SetupAccountAndCacheOperations( out string secondaryRegionNameForUri, @@ -241,29 +240,14 @@ public async Task CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_Sh TransportClientHandlerFactory = (original) => mockTransport.Object, }; - using CosmosClient customClient = new CosmosClient( + ArgumentException exception = Assert.ThrowsException(() => new CosmosClient( globalEndpoint, Convert.ToBase64String(Encoding.UTF8.GetBytes(Guid.NewGuid().ToString())), - cosmosClientOptions); - - Container container = customClient.GetContainer(databaseName, containerName); - - ToDoActivity toDoActivity = new ToDoActivity() - { - Id = "TestItem", - Pk = "TestPk" - }; + cosmosClientOptions)); - // First create will fail because it is not certain if the payload was sent or not. - try - { - await container.CreateItemAsync(toDoActivity, new Cosmos.PartitionKey(toDoActivity.Pk)); - Assert.Fail("Should throw an exception"); - } - catch (CosmosException ce) when (ce.StatusCode == HttpStatusCode.ServiceUnavailable) - { - Assert.IsNotNull(ce); - } + Assert.AreEqual( + expected: "ApplicationPreferredRegions is required when EnablePartitionLevelFailover is enabled.", + actual: exception.Message); } [TestMethod] From 3dd56789431f0bdd197f1d47e658dbd9927322b4 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 27 Sep 2023 12:40:25 -0700 Subject: [PATCH 16/31] Code changes to rename test name. --- .../GlobalPartitionEndpointManagerTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs index d8e5fd2d53..bfe550a46b 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs @@ -108,7 +108,7 @@ public async Task TestWriteForbiddenScenarioAsync() [TestMethod] [Timeout(10000)] - public async Task CreateItemAsync_WithPreferredRegionsAndServiceUnavailable_ShouldRetryAndSucceed() + public async Task CreateItemAsync_WithPreferredRegionsAndServiceUnavailableForFirstPreferredRegion_ShouldRetryAndSucceedToTheNextPreferredRegion() { GlobalPartitionEndpointManagerTests.SetupAccountAndCacheOperations( out string secondaryRegionNameForUri, From 0d59eb823c3d4dc0ba1701c7d1b6a88256d0786f Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 27 Sep 2023 12:55:34 -0700 Subject: [PATCH 17/31] Code changes to add some cosmetic changes. --- .../src/Util/ConfigurationManager.cs | 4 ++-- .../CosmosClientOptionsUnitTests.cs | 12 ++++++++++-- .../GlobalPartitionEndpointManagerTests.cs | 9 ++++++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index 71943c7731..0b7786d8c3 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -9,14 +9,14 @@ namespace Microsoft.Azure.Cosmos internal static class ConfigurationManager { /// - /// A read-only string containing the environment variablename for enabling replica validation. + /// A read-only string containing the environment variable name for enabling replica validation. /// This will eventually be removed once replica valdiatin is enabled by default for both preview /// and GA. /// internal static readonly string ReplicaConnectivityValidationEnabled = "AZURE_COSMOS_REPLICA_VALIDATION_ENABLED"; /// - /// A read-only string containing the environment variablename for enabling per partition automatic failover. + /// A read-only string containing the environment variable name for enabling per partition automatic failover. /// This will eventually be removed once per partition automatic failover is enabled by default for both preview /// and GA. /// diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index 98866edc1e..8b6c0a152e 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -191,10 +191,14 @@ public void VerifyCosmosConfigurationPropertiesGetUpdated() CollectionAssert.AreEqual(preferredLocations.ToArray(), policy.PreferredLocations.ToArray()); } + /// + /// Test to validate that when the partition level failover is enabled with the preferred regions list is missing, then the client + /// initialization should throw an argument exception and fail. This should hold true for both environment variable and CosmosClientOptions. + /// [TestMethod] + [Owner("dkunda")] [DataRow(true, DisplayName = "Validate that when enevironment variable is used to enable PPAF, the outcome of the test should be same.")] [DataRow(false, DisplayName = "Validate that when CosmosClientOptions is used to enable PPAF, the outcome of the test should be same.")] - [Owner("dkunda")] public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredRegionsNotSet_ShouldThrowArgumentException(bool useEnvironmentVariable) { try @@ -251,10 +255,14 @@ public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredReg } } + /// + /// Test to validate that when the partition level failover is enabled with the preferred regions list is provided, then the client + /// initialization should be successful. This holds true for both environment variable and CosmosClientOptions. + /// [TestMethod] + [Owner("dkunda")] [DataRow(true, DisplayName = "Validate that when enevironment variable is used to enable PPAF, the outcome of the test should be same.")] [DataRow(false, DisplayName = "Validate that when CosmosClientOptions is used to enable PPAF, the outcome of the test should be same.")] - [Owner("dkunda")] public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredRegionsSet_ShouldInitializeSuccessfully(bool useEnvironmentVariable) { try diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs index bfe550a46b..05cbfc8384 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs @@ -106,6 +106,10 @@ public async Task TestWriteForbiddenScenarioAsync() } } + /// + /// Test to validate that when the partition level failover is enabled with the preferred regions list provided, if the first + /// region is unavailable for write, then the write should eventually get retried to the next preferred region. + /// [TestMethod] [Timeout(10000)] public async Task CreateItemAsync_WithPreferredRegionsAndServiceUnavailableForFirstPreferredRegion_ShouldRetryAndSucceedToTheNextPreferredRegion() @@ -194,7 +198,10 @@ public async Task CreateItemAsync_WithPreferredRegionsAndServiceUnavailableForFi Assert.AreEqual(HttpStatusCode.Created, response.StatusCode); } - + /// + /// Test to validate that when the partition level failover is enabled with the preferred regions list is missing, then the client + /// initialization should throw an argument exception and fail. + /// [TestMethod] public void CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_ShouldThrowArgumentException() { From 07c644aa475000fb8b4e0d939cad58055468df39 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 9 Oct 2023 11:43:05 -0700 Subject: [PATCH 18/31] Code changes to enable retry on write for all regions in single master accounts. --- .../GlobalPartitionEndpointManagerCore.cs | 11 ++++- .../src/Routing/LocationCache.cs | 40 +++++++++++++------ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs index a4ab1221e6..d0c362701b 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs @@ -134,11 +134,18 @@ public override bool TryMarkEndpointUnavailableForPartitionKeyRange( PartitionKeyRangeFailoverInfo partionFailover = this.PartitionKeyRangeToLocation.Value.GetOrAdd( partitionKeyRange, - (_) => new PartitionKeyRangeFailoverInfo(failedLocation)); + (_) => new PartitionKeyRangeFailoverInfo(failedLocation)); + + ReadOnlyCollection nextLocations = this.globalEndpointManager.ReadEndpoints; + + if (!this.globalEndpointManager.CanUseMultipleWriteLocations(request)) + { + nextLocations = this.globalEndpointManager.WriteEndpoints; + } // Will return true if it was able to update to a new region if (partionFailover.TryMoveNextLocation( - locations: this.globalEndpointManager.ReadEndpoints, + locations: nextLocations, failedLocation: failedLocation)) { DefaultTrace.TraceInformation("Partition level override added to new location. PartitionKeyRange: {0}, failedLocation: {1}, new location: {2}", diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 9c6308d8b6..79a1e84bf3 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -491,26 +491,42 @@ private void UpdateLocationCache( if (readLocations != null) { - ReadOnlyCollection availableReadLocations; - nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations); + nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation( + readLocations, + out ReadOnlyCollection availableReadLocations); + nextLocationInfo.AvailableReadLocations = availableReadLocations; } if (writeLocations != null) { - ReadOnlyCollection availableWriteLocations; - nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations); + nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation( + writeLocations, + out ReadOnlyCollection availableWriteLocations); + nextLocationInfo.AvailableWriteLocations = availableWriteLocations; } - - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint); - nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]); + + bool isMultiMasterAccount = this.CanUseMultipleWriteLocations(); + + nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( + endpointsByLocation: isMultiMasterAccount ? nextLocationInfo.AvailableWriteEndpointByLocation : nextLocationInfo.AvailableReadEndpointByLocation, + orderedLocations: isMultiMasterAccount ? nextLocationInfo.AvailableWriteLocations : nextLocationInfo.AvailableReadLocations, + expectedAvailableOperation: OperationType.Write, + fallbackEndpoint: this.defaultEndpoint); + + nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints( + endpointsByLocation: nextLocationInfo.AvailableReadEndpointByLocation, + orderedLocations: nextLocationInfo.AvailableReadLocations, + expectedAvailableOperation: OperationType.Read, + fallbackEndpoint: nextLocationInfo.WriteEndpoints[0]); + this.lastCacheUpdateTimestamp = DateTime.UtcNow; DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); - + this.locationInfo = nextLocationInfo; } } @@ -534,8 +550,7 @@ private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionar foreach (string location in currentLocationInfo.PreferredLocations) { - Uri endpoint; - if (endpointsByLocation.TryGetValue(location, out endpoint)) + if (endpointsByLocation.TryGetValue(location, out Uri endpoint)) { if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation)) { @@ -560,9 +575,8 @@ private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionar { foreach (string location in orderedLocations) { - Uri endpoint; if (!string.IsNullOrEmpty(location) && // location is empty during manual failover - endpointsByLocation.TryGetValue(location, out endpoint)) + endpointsByLocation.TryGetValue(location, out Uri endpoint)) { endpoints.Add(endpoint); } @@ -573,7 +587,7 @@ private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionar if (endpoints.Count == 0) { endpoints.Add(fallbackEndpoint); - } + } return endpoints.AsReadOnly(); } From 48a134877af0bb2c89504a37a57db4268757ba7b Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 11 Oct 2023 14:00:45 -0700 Subject: [PATCH 19/31] Code changes to add code comments. --- .../src/Routing/GlobalPartitionEndpointManagerCore.cs | 1 + Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs | 1 + 2 files changed, 2 insertions(+) diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs index d0c362701b..488c916177 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs @@ -138,6 +138,7 @@ public override bool TryMarkEndpointUnavailableForPartitionKeyRange( ReadOnlyCollection nextLocations = this.globalEndpointManager.ReadEndpoints; + // Add documentation. if (!this.globalEndpointManager.CanUseMultipleWriteLocations(request)) { nextLocations = this.globalEndpointManager.WriteEndpoints; diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 79a1e84bf3..cb1c50d826 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -507,6 +507,7 @@ private void UpdateLocationCache( nextLocationInfo.AvailableWriteLocations = availableWriteLocations; } + // Add Documentation. bool isMultiMasterAccount = this.CanUseMultipleWriteLocations(); nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( From 232a950b8682b87bffc8e862d8fb3a130c6b3395 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Wed, 11 Oct 2023 17:20:30 -0700 Subject: [PATCH 20/31] Code changes to clean up and handle endpoints in location cache. --- .../src/Routing/GlobalEndpointManager.cs | 3 ++- .../GlobalPartitionEndpointManagerCore.cs | 13 +++++----- .../src/Routing/LocationCache.cs | 25 +++++++++++-------- .../ClientRetryPolicyTests.cs | 3 ++- .../LocationCacheTests.cs | 3 ++- 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs index 956212d5a2..c4f6237c9e 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs @@ -49,7 +49,8 @@ public GlobalEndpointManager(IDocumentClientInternal owner, ConnectionPolicy con owner.ServiceEndpoint, connectionPolicy.EnableEndpointDiscovery, connectionPolicy.MaxConnectionLimit, - connectionPolicy.UseMultipleWriteLocations); + connectionPolicy.UseMultipleWriteLocations, + connectionPolicy.EnablePartitionLevelFailover); this.owner = owner; this.defaultEndpoint = owner.ServiceEndpoint; diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs index 488c916177..ef1e292794 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs @@ -136,14 +136,13 @@ public override bool TryMarkEndpointUnavailableForPartitionKeyRange( partitionKeyRange, (_) => new PartitionKeyRangeFailoverInfo(failedLocation)); - ReadOnlyCollection nextLocations = this.globalEndpointManager.ReadEndpoints; + // For any single master write accounts, the next locations to fail over to are the write regions configured + // at the account level. For multi master write accounts, since all the regions are treated as write regions, + // the next locations to fail over would be the read regions. + ReadOnlyCollection nextLocations = !this.globalEndpointManager.CanUseMultipleWriteLocations(request) + ? this.globalEndpointManager.WriteEndpoints + : this.globalEndpointManager.ReadEndpoints; - // Add documentation. - if (!this.globalEndpointManager.CanUseMultipleWriteLocations(request)) - { - nextLocations = this.globalEndpointManager.WriteEndpoints; - } - // Will return true if it was able to update to a new region if (partionFailover.TryMoveNextLocation( locations: nextLocations, diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index cb1c50d826..f1395e0659 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -22,8 +22,9 @@ namespace Microsoft.Azure.Cosmos.Routing internal sealed class LocationCache { private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; - private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - + private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; + + private readonly bool partitionLevelFailoverEnabled; private readonly bool enableEndpointDiscovery; private readonly Uri defaultEndpoint; private readonly bool useMultipleWriteLocations; @@ -34,14 +35,15 @@ internal sealed class LocationCache private DatabaseAccountLocationsInfo locationInfo; private DateTime lastCacheUpdateTimestamp; - private bool enableMultipleWriteLocations; + private bool enableMultipleWriteLocations; public LocationCache( ReadOnlyCollection preferredLocations, Uri defaultEndpoint, bool enableEndpointDiscovery, int connectionLimit, - bool useMultipleWriteLocations) + bool useMultipleWriteLocations, + bool partitionLevelFailoverEnabled) { this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); this.defaultEndpoint = defaultEndpoint; @@ -52,7 +54,8 @@ public LocationCache( this.lockObject = new object(); this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); this.lastCacheUpdateTimestamp = DateTime.MinValue; - this.enableMultipleWriteLocations = false; + this.enableMultipleWriteLocations = false; + this.partitionLevelFailoverEnabled = partitionLevelFailoverEnabled; this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); #if !(NETSTANDARD15 || NETSTANDARD16) @@ -505,14 +508,16 @@ private void UpdateLocationCache( out ReadOnlyCollection availableWriteLocations); nextLocationInfo.AvailableWriteLocations = availableWriteLocations; - } + } - // Add Documentation. - bool isMultiMasterAccount = this.CanUseMultipleWriteLocations(); + // For any multi master write accounts, the write endpoints would be the available write regions + // configured at the account level. For single master write accounts, the write endpoints would be + // the available read regions configured at the account level. + bool canUsePartitionLevelFailover = !this.CanUseMultipleWriteLocations() && this.partitionLevelFailoverEnabled; nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( - endpointsByLocation: isMultiMasterAccount ? nextLocationInfo.AvailableWriteEndpointByLocation : nextLocationInfo.AvailableReadEndpointByLocation, - orderedLocations: isMultiMasterAccount ? nextLocationInfo.AvailableWriteLocations : nextLocationInfo.AvailableReadLocations, + endpointsByLocation: canUsePartitionLevelFailover ? nextLocationInfo.AvailableReadEndpointByLocation : nextLocationInfo.AvailableWriteEndpointByLocation, + orderedLocations: canUsePartitionLevelFailover ? nextLocationInfo.AvailableReadLocations : nextLocationInfo.AvailableWriteLocations, expectedAvailableOperation: OperationType.Write, fallbackEndpoint: this.defaultEndpoint); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 0851952f40..1db7767e14 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -433,7 +433,8 @@ private MockDocumentClientContext InitializeMockedDocumentClient( new Uri("https://default.documents.azure.com"), true, 10, - useMultipleWriteLocations); + useMultipleWriteLocations, + false); mockDocumentClientContext.LocationCache.OnDatabaseAccountRead(mockDocumentClientContext.DatabaseAccount); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index efcd174d6d..eab0d786fb 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -836,7 +836,8 @@ private GlobalEndpointManager Initialize( LocationCacheTests.DefaultEndpoint, enableEndpointDiscovery, 10, - useMultipleWriteLocations); + useMultipleWriteLocations, + false); this.cache.OnDatabaseAccountRead(this.databaseAccount); From c32809bb06b50a1d542c781a34675c84e8215acb Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Thu, 12 Oct 2023 13:32:03 -0700 Subject: [PATCH 21/31] Code changes to fix unit tests. Added detailed code comments. --- .../Routing/GlobalPartitionEndpointManagerCore.cs | 7 ++++--- Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs | 12 ++++++------ .../GlobalPartitionEndpointManagerUnitTests.cs | 3 ++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs index ef1e292794..c162f148a9 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs @@ -136,9 +136,10 @@ public override bool TryMarkEndpointUnavailableForPartitionKeyRange( partitionKeyRange, (_) => new PartitionKeyRangeFailoverInfo(failedLocation)); - // For any single master write accounts, the next locations to fail over to are the write regions configured - // at the account level. For multi master write accounts, since all the regions are treated as write regions, - // the next locations to fail over would be the read regions. + // For any single master write accounts, the next locations to fail over will be the read regions configured at the account level. Here + // the globalEndpointManager.WriteEndpoints are basically initialized from the Location Cache, with the read regions, configured at the + // account level. For multi master write accounts, since all the regions are treated as write regions, the next locations to fail over + // will be the preferred read regions that are configured in the application preferred regions in the CosmosClientOptions. ReadOnlyCollection nextLocations = !this.globalEndpointManager.CanUseMultipleWriteLocations(request) ? this.globalEndpointManager.WriteEndpoints : this.globalEndpointManager.ReadEndpoints; diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index f1395e0659..54fb853d44 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -510,14 +510,14 @@ private void UpdateLocationCache( nextLocationInfo.AvailableWriteLocations = availableWriteLocations; } - // For any multi master write accounts, the write endpoints would be the available write regions - // configured at the account level. For single master write accounts, the write endpoints would be - // the available read regions configured at the account level. - bool canUsePartitionLevelFailover = !this.CanUseMultipleWriteLocations() && this.partitionLevelFailoverEnabled; + // For any single master write accounts, the write endpoints would be the read regions configured at the account level. + // For multi master write accounts, since all the regions are treated as write regions, the write endpoints would be + // the preferred write regions that are configured in the application preferred regions in the CosmosClientOptions. + bool isSingleMasterAndPartitionLevelFailoverEnabled = !this.CanUseMultipleWriteLocations() && this.partitionLevelFailoverEnabled; nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( - endpointsByLocation: canUsePartitionLevelFailover ? nextLocationInfo.AvailableReadEndpointByLocation : nextLocationInfo.AvailableWriteEndpointByLocation, - orderedLocations: canUsePartitionLevelFailover ? nextLocationInfo.AvailableReadLocations : nextLocationInfo.AvailableWriteLocations, + endpointsByLocation: isSingleMasterAndPartitionLevelFailoverEnabled ? nextLocationInfo.AvailableReadEndpointByLocation : nextLocationInfo.AvailableWriteEndpointByLocation, + orderedLocations: isSingleMasterAndPartitionLevelFailoverEnabled ? nextLocationInfo.AvailableReadLocations : nextLocationInfo.AvailableWriteLocations, expectedAvailableOperation: OperationType.Write, fallbackEndpoint: this.defaultEndpoint); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs index 8f1da91c53..8ff76e8162 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs @@ -73,13 +73,14 @@ public void VerifyAllReadRegionsAreVisited(int numOfReadRegions) Mock mockEndpointManager = new Mock(MockBehavior.Strict); GlobalPartitionEndpointManagerCore failoverManager = new GlobalPartitionEndpointManagerCore(mockEndpointManager.Object); - List readRegions = new List(); + List readRegions = new (), writeRegions = new(); for(int i = 0; i < numOfReadRegions; i++) { readRegions.Add(new Uri($"https://localhost:{i}/")); } mockEndpointManager.Setup(x => x.ReadEndpoints).Returns(() => new ReadOnlyCollection(readRegions)); + mockEndpointManager.Setup(x => x.WriteEndpoints).Returns(() => new ReadOnlyCollection(readRegions)); // Create a random pk range PartitionKeyRange partitionKeyRange = new PartitionKeyRange() From 3e2aba253e7f7d8d51dc3cbd62dd67afc843c717 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Fri, 13 Oct 2023 15:40:35 -0700 Subject: [PATCH 22/31] Code changes to clean up the account read endpoints generation logic. --- .../src/Routing/GlobalEndpointManager.cs | 9 ++-- .../GlobalPartitionEndpointManagerCore.cs | 11 +++-- .../src/Routing/IGlobalEndpointManager.cs | 2 + .../src/Routing/LocationCache.cs | 46 ++++++++++++------- .../ClientRetryPolicyTests.cs | 3 +- .../LocationCacheTests.cs | 3 +- 6 files changed, 45 insertions(+), 29 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs index c4f6237c9e..bcaae809f4 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs @@ -49,8 +49,7 @@ public GlobalEndpointManager(IDocumentClientInternal owner, ConnectionPolicy con owner.ServiceEndpoint, connectionPolicy.EnableEndpointDiscovery, connectionPolicy.MaxConnectionLimit, - connectionPolicy.UseMultipleWriteLocations, - connectionPolicy.EnablePartitionLevelFailover); + connectionPolicy.UseMultipleWriteLocations); this.owner = owner; this.defaultEndpoint = owner.ServiceEndpoint; @@ -90,9 +89,11 @@ public GlobalEndpointManager(IDocumentClientInternal owner, ConnectionPolicy con } } - public ReadOnlyCollection ReadEndpoints => this.locationCache.ReadEndpoints; + public ReadOnlyCollection ReadEndpoints => this.locationCache.ReadEndpoints; + + public ReadOnlyCollection AccountReadEndpoints => this.locationCache.AccountReadEndpoints; - public ReadOnlyCollection WriteEndpoints => this.locationCache.WriteEndpoints; + public ReadOnlyCollection WriteEndpoints => this.locationCache.WriteEndpoints; public int PreferredLocationCount => this.connectionPolicy.PreferredLocations != null ? this.connectionPolicy.PreferredLocations.Count : 0; diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs index c162f148a9..fe2e256601 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalPartitionEndpointManagerCore.cs @@ -136,12 +136,13 @@ public override bool TryMarkEndpointUnavailableForPartitionKeyRange( partitionKeyRange, (_) => new PartitionKeyRangeFailoverInfo(failedLocation)); - // For any single master write accounts, the next locations to fail over will be the read regions configured at the account level. Here - // the globalEndpointManager.WriteEndpoints are basically initialized from the Location Cache, with the read regions, configured at the - // account level. For multi master write accounts, since all the regions are treated as write regions, the next locations to fail over + // For any single master write accounts, the next locations to fail over will be the read regions configured at the account level. + // For multi master write accounts, since all the regions are treated as write regions, the next locations to fail over // will be the preferred read regions that are configured in the application preferred regions in the CosmosClientOptions. - ReadOnlyCollection nextLocations = !this.globalEndpointManager.CanUseMultipleWriteLocations(request) - ? this.globalEndpointManager.WriteEndpoints + bool isSingleMasterWriteAccount = !this.globalEndpointManager.CanUseMultipleWriteLocations(request); + + ReadOnlyCollection nextLocations = isSingleMasterWriteAccount + ? this.globalEndpointManager.AccountReadEndpoints : this.globalEndpointManager.ReadEndpoints; // Will return true if it was able to update to a new region diff --git a/Microsoft.Azure.Cosmos/src/Routing/IGlobalEndpointManager.cs b/Microsoft.Azure.Cosmos/src/Routing/IGlobalEndpointManager.cs index dd3d8a2611..83f49cf0de 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/IGlobalEndpointManager.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/IGlobalEndpointManager.cs @@ -13,6 +13,8 @@ internal interface IGlobalEndpointManager : IDisposable { ReadOnlyCollection ReadEndpoints { get; } + ReadOnlyCollection AccountReadEndpoints { get; } + ReadOnlyCollection WriteEndpoints { get; } int PreferredLocationCount { get; } diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 54fb853d44..9a92d0a382 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -24,7 +24,6 @@ internal sealed class LocationCache private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - private readonly bool partitionLevelFailoverEnabled; private readonly bool enableEndpointDiscovery; private readonly Uri defaultEndpoint; private readonly bool useMultipleWriteLocations; @@ -42,8 +41,7 @@ public LocationCache( Uri defaultEndpoint, bool enableEndpointDiscovery, int connectionLimit, - bool useMultipleWriteLocations, - bool partitionLevelFailoverEnabled) + bool useMultipleWriteLocations) { this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); this.defaultEndpoint = defaultEndpoint; @@ -55,7 +53,6 @@ public LocationCache( this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); this.lastCacheUpdateTimestamp = DateTime.MinValue; this.enableMultipleWriteLocations = false; - this.partitionLevelFailoverEnabled = partitionLevelFailoverEnabled; this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); #if !(NETSTANDARD15 || NETSTANDARD16) @@ -103,6 +100,24 @@ public ReadOnlyCollection ReadEndpoints return this.locationInfo.ReadEndpoints; } } + + /// + /// Gets list of account level read endpoints. + /// + public ReadOnlyCollection AccountReadEndpoints + { + get + { + // Hot-path: avoid ConcurrentDictionary methods which acquire locks + if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime + && this.locationUnavailablityInfoByEndpoint.Any()) + { + this.UpdateLocationCache(); + } + + return this.locationInfo.AccountReadEndpoints; + } + } /// /// Gets list of write endpoints ordered by @@ -122,8 +137,8 @@ public ReadOnlyCollection WriteEndpoints return this.locationInfo.WriteEndpoints; } - } - + } + /// /// Returns the location corresponding to the endpoint if location specific endpoint is provided. /// For the defaultEndPoint, we will return the first available write location. @@ -498,7 +513,8 @@ private void UpdateLocationCache( readLocations, out ReadOnlyCollection availableReadLocations); - nextLocationInfo.AvailableReadLocations = availableReadLocations; + nextLocationInfo.AvailableReadLocations = availableReadLocations; + nextLocationInfo.AccountReadEndpoints = nextLocationInfo.AvailableReadEndpointByLocation.Select(x => x.Value).ToList().AsReadOnly(); } if (writeLocations != null) @@ -510,14 +526,9 @@ private void UpdateLocationCache( nextLocationInfo.AvailableWriteLocations = availableWriteLocations; } - // For any single master write accounts, the write endpoints would be the read regions configured at the account level. - // For multi master write accounts, since all the regions are treated as write regions, the write endpoints would be - // the preferred write regions that are configured in the application preferred regions in the CosmosClientOptions. - bool isSingleMasterAndPartitionLevelFailoverEnabled = !this.CanUseMultipleWriteLocations() && this.partitionLevelFailoverEnabled; - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( - endpointsByLocation: isSingleMasterAndPartitionLevelFailoverEnabled ? nextLocationInfo.AvailableReadEndpointByLocation : nextLocationInfo.AvailableWriteEndpointByLocation, - orderedLocations: isSingleMasterAndPartitionLevelFailoverEnabled ? nextLocationInfo.AvailableReadLocations : nextLocationInfo.AvailableWriteLocations, + endpointsByLocation: nextLocationInfo.AvailableWriteEndpointByLocation, + orderedLocations: nextLocationInfo.AvailableWriteLocations, expectedAvailableOperation: OperationType.Write, fallbackEndpoint: this.defaultEndpoint); @@ -654,6 +665,7 @@ public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocation this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + this.AccountReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); } @@ -664,7 +676,8 @@ public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) this.AvailableReadLocations = other.AvailableReadLocations; this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; - this.WriteEndpoints = other.WriteEndpoints; + this.WriteEndpoints = other.WriteEndpoints; + this.AccountReadEndpoints = other.AccountReadEndpoints; this.ReadEndpoints = other.ReadEndpoints; } @@ -674,7 +687,8 @@ public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } public ReadOnlyCollection WriteEndpoints { get; set; } - public ReadOnlyCollection ReadEndpoints { get; set; } + public ReadOnlyCollection ReadEndpoints { get; set; } + public ReadOnlyCollection AccountReadEndpoints { get; set; } } [Flags] diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 1db7767e14..0851952f40 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -433,8 +433,7 @@ private MockDocumentClientContext InitializeMockedDocumentClient( new Uri("https://default.documents.azure.com"), true, 10, - useMultipleWriteLocations, - false); + useMultipleWriteLocations); mockDocumentClientContext.LocationCache.OnDatabaseAccountRead(mockDocumentClientContext.DatabaseAccount); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index eab0d786fb..efcd174d6d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -836,8 +836,7 @@ private GlobalEndpointManager Initialize( LocationCacheTests.DefaultEndpoint, enableEndpointDiscovery, 10, - useMultipleWriteLocations, - false); + useMultipleWriteLocations); this.cache.OnDatabaseAccountRead(this.databaseAccount); From 466545c3b59cb82a7aab9c88ebe27519d0bdda5a Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 16 Oct 2023 15:49:06 -0700 Subject: [PATCH 23/31] Code changes to fix unit tests. --- .../GlobalPartitionEndpointManagerUnitTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs index 8ff76e8162..06ca0a4c2c 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerUnitTests.cs @@ -80,6 +80,7 @@ public void VerifyAllReadRegionsAreVisited(int numOfReadRegions) } mockEndpointManager.Setup(x => x.ReadEndpoints).Returns(() => new ReadOnlyCollection(readRegions)); + mockEndpointManager.Setup(x => x.AccountReadEndpoints).Returns(() => new ReadOnlyCollection(readRegions)); mockEndpointManager.Setup(x => x.WriteEndpoints).Returns(() => new ReadOnlyCollection(readRegions)); // Create a random pk range From aabbe5e763650600824fa39026b99302c1b9d3e8 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Fri, 20 Oct 2023 14:42:15 -0700 Subject: [PATCH 24/31] Code changes to disable retry when ppaf is not enabled. Also validated application preferred region. --- Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs | 14 +++++++++++++- Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs | 5 +++-- Microsoft.Azure.Cosmos/src/RetryPolicy.cs | 9 ++++++--- .../ClientRetryPolicyTests.cs | 10 +++++----- .../LocationCacheTests.cs | 5 +++-- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 5bc91aebd8..7cef90d541 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -28,6 +28,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private readonly GlobalEndpointManager globalEndpointManager; private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; private readonly bool enableEndpointDiscovery; + private readonly bool isPertitionLevelFailoverEnabled; private int failoverRetryCount; private int sessionTokenRetryCount; @@ -41,8 +42,9 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy public ClientRetryPolicy( GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManager partitionKeyRangeLocationCache, + RetryOptions retryOptions, bool enableEndpointDiscovery, - RetryOptions retryOptions) + bool isPertitionLevelFailoverEnabled) { this.throttlingRetry = new ResourceThrottleRetryPolicy( retryOptions.MaxRetryAttemptsOnThrottledRequests, @@ -55,6 +57,7 @@ public ClientRetryPolicy( this.sessionTokenRetryCount = 0; this.serviceUnavailableRetryCount = 0; this.canUseMultipleWriteLocations = false; + this.isPertitionLevelFailoverEnabled = isPertitionLevelFailoverEnabled; } /// @@ -393,6 +396,15 @@ private ShouldRetryResult ShouldRetryOnServiceUnavailable() return ShouldRetryResult.NoRetry(); } + if (!this.canUseMultipleWriteLocations + && !this.isReadRequest + && !this.isPertitionLevelFailoverEnabled) + { + // Write requests on single master cannot be retried if partition level failover is disabled. + // This means there are no other regions available to serve the writes. + return ShouldRetryResult.NoRetry(); + } + int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; if (availablePreferredLocations <= 1) diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index a90a2e04a6..4e08885007 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -892,9 +892,10 @@ private void ValidateLimitToEndpointSettings() private void ValidatePartitionLevelFailoverSettings() { if (this.EnablePartitionLevelFailover - && (this.ApplicationPreferredRegions == null || this.ApplicationPreferredRegions.Count == 0)) + && (this.ApplicationPreferredRegions == null || this.ApplicationPreferredRegions.Count == 0) + && string.IsNullOrEmpty(this.ApplicationRegion)) { - throw new ArgumentException($"{nameof(this.ApplicationPreferredRegions)} is required when {nameof(this.EnablePartitionLevelFailover)} is enabled."); + throw new ArgumentException($"Either {nameof(this.ApplicationPreferredRegions)} or {nameof(this.ApplicationRegion)} is required when {nameof(this.EnablePartitionLevelFailover)} is enabled."); } } diff --git a/Microsoft.Azure.Cosmos/src/RetryPolicy.cs b/Microsoft.Azure.Cosmos/src/RetryPolicy.cs index 4ec05a3b65..47e64f0cbc 100644 --- a/Microsoft.Azure.Cosmos/src/RetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/RetryPolicy.cs @@ -13,17 +13,19 @@ internal sealed class RetryPolicy : IRetryPolicyFactory private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache; private readonly GlobalEndpointManager globalEndpointManager; private readonly bool enableEndpointDiscovery; + private readonly bool isPertitionLevelFailoverEnabled; private readonly RetryOptions retryOptions; /// /// Initialize the instance of the RetryPolicy class /// public RetryPolicy( - GlobalEndpointManager globalEndpointManager, + GlobalEndpointManager globalEndpointManager, ConnectionPolicy connectionPolicy, GlobalPartitionEndpointManager partitionKeyRangeLocationCache) { this.enableEndpointDiscovery = connectionPolicy.EnableEndpointDiscovery; + this.isPertitionLevelFailoverEnabled = connectionPolicy.EnablePartitionLevelFailover; this.globalEndpointManager = globalEndpointManager; this.retryOptions = connectionPolicy.RetryOptions; this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache; @@ -37,10 +39,11 @@ public IDocumentClientRetryPolicy GetRequestPolicy() ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy( this.globalEndpointManager, this.partitionKeyRangeLocationCache, + this.retryOptions, this.enableEndpointDiscovery, - this.retryOptions); + this.isPertitionLevelFailoverEnabled); return clientRetryPolicy; } } -} +} \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 0851952f40..cf1c739388 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -47,7 +47,7 @@ public void MultimasterMetadataWriteRetryTest() multimasterMetadataWriteRetryTest: true); - ClientRetryPolicy retryPolicy = new ClientRetryPolicy(endpointManager, this.partitionKeyRangeLocationCache, enableEndpointDiscovery, new RetryOptions()); + ClientRetryPolicy retryPolicy = new ClientRetryPolicy(endpointManager, this.partitionKeyRangeLocationCache, new RetryOptions(), enableEndpointDiscovery, false); //Creates a metadata write request DocumentServiceRequest request = this.CreateRequest(false, true); @@ -102,8 +102,8 @@ public void Http503SubStatusHandelingTests(int testCode) isPreferredLocationsListEmpty: true); //Create Retry Policy - ClientRetryPolicy retryPolicy = new ClientRetryPolicy(endpointManager, this.partitionKeyRangeLocationCache, enableEndpointDiscovery, new RetryOptions()); - + ClientRetryPolicy retryPolicy = new ClientRetryPolicy(endpointManager, this.partitionKeyRangeLocationCache, new RetryOptions(), enableEndpointDiscovery, false); + CancellationToken cancellationToken = new CancellationToken(); Exception serviceUnavailableException = new Exception(); Mock nameValueCollection = new Mock(); @@ -216,8 +216,8 @@ private async Task ValidateConnectTimeoutTriggersClientRetryPolicy( replicatedResourceClient.GoneAndRetryWithRetryTimeoutInSecondsOverride = 1; this.partitionKeyRangeLocationCache = GlobalPartitionEndpointManagerNoOp.Instance; - - ClientRetryPolicy retryPolicy = new ClientRetryPolicy(mockDocumentClientContext.GlobalEndpointManager, this.partitionKeyRangeLocationCache, enableEndpointDiscovery: true, new RetryOptions()); + + ClientRetryPolicy retryPolicy = new ClientRetryPolicy(mockDocumentClientContext.GlobalEndpointManager, this.partitionKeyRangeLocationCache, new RetryOptions(), enableEndpointDiscovery: true, isPertitionLevelFailoverEnabled: false); INameValueCollection headers = new DictionaryNameValueCollection(); headers.Set(HttpConstants.HttpHeaders.ConsistencyLevel, ConsistencyLevel.BoundedStaleness.ToString()); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index efcd174d6d..6d3a9a7f4d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -180,8 +180,9 @@ private ClientRetryPolicy CreateClientRetryPolicy( return new ClientRetryPolicy( endpointManager, this.partitionKeyRangeLocationCache, - enableEndpointDiscovery, - new RetryOptions()); + new RetryOptions(), + enableEndpointDiscovery, + isPertitionLevelFailoverEnabled: false); } [TestMethod] From a7c17fe918d78bcce2041ce8e6bc7c1be3e09d6d Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 23 Oct 2023 15:00:37 -0700 Subject: [PATCH 25/31] Code changes to fix unit tests. --- .../LocationCacheTests.cs | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index 6d3a9a7f4d..b93107b302 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -131,7 +131,7 @@ private async Task ValidateRetryOnSessionNotAvailabeWithEndpointDiscoveryDisable useMultipleWriteLocations: useMultipleWriteLocations, enableEndpointDiscovery: enableEndpointDiscovery, isPreferredLocationsListEmpty: isPreferredLocationsListEmpty); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, partitionLevelFailoverEnabled: false, endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false)) { @@ -174,7 +174,8 @@ await BackoffRetryUtility.ExecuteAsync( } private ClientRetryPolicy CreateClientRetryPolicy( - bool enableEndpointDiscovery, + bool enableEndpointDiscovery, + bool partitionLevelFailoverEnabled, GlobalEndpointManager endpointManager) { return new ClientRetryPolicy( @@ -182,7 +183,7 @@ private ClientRetryPolicy CreateClientRetryPolicy( this.partitionKeyRangeLocationCache, new RetryOptions(), enableEndpointDiscovery, - isPertitionLevelFailoverEnabled: false); + isPertitionLevelFailoverEnabled: partitionLevelFailoverEnabled); } [TestMethod] @@ -204,7 +205,7 @@ private async Task ValidateRetryOnSessionNotAvailabeWithDisableMultipleWriteLoca isPreferredLocationsListEmpty: isPreferredLocationsListEmpty); endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, partitionLevelFailoverEnabled: false, endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: true, isMasterResourceType: false)) { @@ -282,7 +283,7 @@ private async Task ValidateRetryOnReadSessionNotAvailabeWithEnableMultipleWriteL preferedRegionListOverride: preferredList); endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, partitionLevelFailoverEnabled: false, endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: true, isMasterResourceType: false)) { @@ -358,7 +359,7 @@ private async Task ValidateRetryOnWriteSessionNotAvailabeWithEnableMultipleWrite preferedRegionListOverride: preferredList); endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, partitionLevelFailoverEnabled: false, endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: false, isMasterResourceType: false)) { @@ -430,7 +431,7 @@ public async Task ValidateRetryOnWriteForbiddenExceptionAsync() isPreferredLocationsListEmpty: false); endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery: true, endpointManager: endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery: true, partitionLevelFailoverEnabled: false, endpointManager: endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: false, isMasterResourceType: false)) { @@ -502,7 +503,7 @@ private async Task ValidateRetryOnDatabaseAccountNotFoundAsync(bool enableMultip isPreferredLocationsListEmpty: false); endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery: true, endpointManager: endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery: true, partitionLevelFailoverEnabled: false, endpointManager: endpointManager); int expectedRetryCount = isReadRequest || enableMultipleWriteLocations ? 2 : 1; @@ -607,7 +608,7 @@ private async Task ValidateRetryOnHttpExceptionAsync(bool enableMultipleWriteLoc enforceSingleMasterSingleWriteLocation: true); endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery: true, endpointManager: endpointManager); + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery: true, partitionLevelFailoverEnabled: false, endpointManager: endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false)) { @@ -682,14 +683,14 @@ await BackoffRetryUtility.ExecuteAsync( [DataRow(true, true, false, false, false, DisplayName = "Read request - Multi master - no preferred locations - without partition level failover - should NOT retry")] [DataRow(false, true, false, false, false, DisplayName = "Write request - Multi master - no preferred locations - without partition level failover - should NOT retry")] [DataRow(true, false, true, true, false, DisplayName = "Read request - Single master - with preferred locations - without partition level failover - should retry")] - [DataRow(false, false, true, true, false, DisplayName = "Write request - Single master - with preferred locations - without partition level failover - should retry")] + [DataRow(false, false, true, false, false, DisplayName = "Write request - Single master - with preferred locations - without partition level failover - should NOT retry")] [DataRow(true, true, true, true, false, DisplayName = "Read request - Multi master - with preferred locations - without partition level failover - should retry")] [DataRow(false, true, true, true, false, DisplayName = "Write request - Multi master - with preferred locations - without partition level failover - should retry")] [DataRow(true, false, false, false, true, DisplayName = "Read request - Single master - no preferred locations - with partition level failover - should NOT retry")] [DataRow(false, false, false, false, true, DisplayName = "Write request - Single master - no preferred locations - with partition level failover - should NOT retry")] [DataRow(true, true, false, false, true, DisplayName = "Read request - Multi master - no preferred locations - with partition level failover - should NOT retry")] [DataRow(false, true, false, false, true, DisplayName = "Write request - Multi master - no preferred locations - with partition level failover - should NOT retry")] - [DataRow(true, false, true, true, true, DisplayName = "Read request - Single master - with preferred locations - with partition level failover - should retry")] + [DataRow(true, false, true, true, true, DisplayName = "Read request - Single master - with preferred locations - with partition level failover - should NOT retry")] [DataRow(false, false, true, true, true, DisplayName = "Write request - Single master - with preferred locations - with partition level failover - should retry")] [DataRow(true, true, true, true, true, DisplayName = "Read request - Multi master - with preferred locations - with partition level failover - should retry")] [DataRow(false, true, true, true, true, DisplayName = "Write request - Multi master - with preferred locations - with partition level failover - should retry")] @@ -715,8 +716,9 @@ public async Task ClientRetryPolicy_ValidateRetryOnServiceUnavailable( preferedRegionListOverride: preferredList, enforceSingleMasterSingleWriteLocation: true); - endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); - ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, endpointManager); + endpointManager.InitializeAccountPropertiesAndStartBackgroundRefresh(this.databaseAccount); + + ClientRetryPolicy retryPolicy = this.CreateClientRetryPolicy(enableEndpointDiscovery, partitionLevelFailoverEnabled: enablePartitionLevelFailover, endpointManager); using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false)) { From 9215f0c1e6128f4ba7d408f150479cef5a67d3ea Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 23 Oct 2023 15:22:26 -0700 Subject: [PATCH 26/31] Code changes to update md file. --- docs/PerPartitionAutomaticFailoverDesign.md | 27 ++------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/docs/PerPartitionAutomaticFailoverDesign.md b/docs/PerPartitionAutomaticFailoverDesign.md index a216f9ab82..dc334e4400 100644 --- a/docs/PerPartitionAutomaticFailoverDesign.md +++ b/docs/PerPartitionAutomaticFailoverDesign.md @@ -3,8 +3,7 @@ ## Table of Contents * [Scope.](#scope) -* [Backgraound.](#backgraound) -* [Design Approach.](#design-approach) +* [Background.](#background) * [How Does the SDK know Which Region to Fail Over.](#how-does-the-sdk-know-which-region-to-fail-over) * [References.](#references) @@ -12,34 +11,12 @@ The scope of the per partition automatic failover design document is applicable for the Cosmos .NET SDK configured for both `Gateway` and `Direct` mode. -## Backgraund +## Background Today, the partition level failovers are applicable for multi master write acounts, for a simple reason. If one of the write region fails with a write forbidden 403 exception, then the SDK has the knowledge (by looking up the `ApplicationPreferredRegions`) of the other regions to failover. With the per partition automatic failover, if a partition is in quorum loss, then the backend automatically marks another region as the write region, based on the account configuration. Therefore, any retry for the write requests, to the next preferred region should be successful. This idea extends the SDK's retry logic to retry the write requests for single master write accounts, for any service unavailable (status codes 503) errors. -## Design Approach - -Today, the partition level failover is applicable only for the multi-master write accounts. In order to enable the partition level failover for single master write accounts, below changes are required to be made: - -- In the `ClientRetryPolicy.ShouldRetryOnServiceUnavailable()`, enable the retry for Single Master write accounts. This is done by removing the below piece of code: - - ``` - if (!this.canUseMultipleWriteLocations - && !this.isReadRequest) - { - // Write requests on single master cannot be retried, no other regions available. - return ShouldRetryResult.NoRetry(); - } - ``` - - -- Today, when a call to get the collection for a specific region fails in the Gateway, the Gateway returns a `Service Unavailable - 503` Status, with a Sub Status code `9002`. Per the current behavior, our .NET SDK doesn't retry for `503.9002`, and it only does so for `503.Unknown` code. Therefore the SDK was not retrying initially. In order to resolve this, delete the `ClientRetryPolicy.IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode)` method completely and with this in place, the SDK should retry on any service unavailable by default and it will not depend upon the sub-status codes to make the retry decision. - -- Currently, there is an option `EnablePartitionLevelFailover` in the `CosmosClientOptions` to enable or disable the per partition automatic failover. However this option is not `public` yet. The approach we would like to take is to develop this feature behind a feature flag called `AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED`. By setting this feature flag, the external customers can enable of disable the partition level failover. - -- For the customers to enable the partition level failover, we have agreed to make the `ApplicationPreferredRegions` as a mandatory parameter the `CosmosClientOptions`. Therefore, if the partition level failover is enabled, and the `ApplicationPreferredRegions` list is not provided, an `ArgumentException` will be thrown. This will be a change in the behavior. - ## How Does the SDK know Which Region to Fail Over Right now, the .NET SDK depends upon the `GlobalPartitionEndpointManagerCore` to resolve the endpoints. There is a method `TryMarkEndpointUnavailableForPartitionKeyRange()` within the class, that is responsible to add the override by iterating over the next read regions. This is how the .NET SDK knows which region to fail over. From 813ea2de560725088de54d07bc59297844fb0748 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 23 Oct 2023 15:53:42 -0700 Subject: [PATCH 27/31] Code changes to remove chache expiry check for account read endpoints. --- .../src/Routing/LocationCache.cs | 85 ++++++++----------- 1 file changed, 36 insertions(+), 49 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 9a92d0a382..4480df1826 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -22,8 +22,8 @@ namespace Microsoft.Azure.Cosmos.Routing internal sealed class LocationCache { private const string UnavailableLocationsExpirationTimeInSeconds = "UnavailableLocationsExpirationTimeInSeconds"; - private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; - + private static int DefaultUnavailableLocationsExpirationTimeInSeconds = 5 * 60; + private readonly bool enableEndpointDiscovery; private readonly Uri defaultEndpoint; private readonly bool useMultipleWriteLocations; @@ -34,7 +34,7 @@ internal sealed class LocationCache private DatabaseAccountLocationsInfo locationInfo; private DateTime lastCacheUpdateTimestamp; - private bool enableMultipleWriteLocations; + private bool enableMultipleWriteLocations; public LocationCache( ReadOnlyCollection preferredLocations, @@ -52,7 +52,7 @@ public LocationCache( this.lockObject = new object(); this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); this.lastCacheUpdateTimestamp = DateTime.MinValue; - this.enableMultipleWriteLocations = false; + this.enableMultipleWriteLocations = false; this.unavailableLocationsExpirationTime = TimeSpan.FromSeconds(LocationCache.DefaultUnavailableLocationsExpirationTimeInSeconds); #if !(NETSTANDARD15 || NETSTANDARD16) @@ -100,24 +100,11 @@ public ReadOnlyCollection ReadEndpoints return this.locationInfo.ReadEndpoints; } } - + /// /// Gets list of account level read endpoints. - /// - public ReadOnlyCollection AccountReadEndpoints - { - get - { - // Hot-path: avoid ConcurrentDictionary methods which acquire locks - if (DateTime.UtcNow - this.lastCacheUpdateTimestamp > this.unavailableLocationsExpirationTime - && this.locationUnavailablityInfoByEndpoint.Any()) - { - this.UpdateLocationCache(); - } - - return this.locationInfo.AccountReadEndpoints; - } - } + /// + public ReadOnlyCollection AccountReadEndpoints => this.locationInfo.AccountReadEndpoints; /// /// Gets list of write endpoints ordered by @@ -137,8 +124,8 @@ public ReadOnlyCollection WriteEndpoints return this.locationInfo.WriteEndpoints; } - } - + } + /// /// Returns the location corresponding to the endpoint if location specific endpoint is provided. /// For the defaultEndPoint, we will return the first available write location. @@ -509,41 +496,41 @@ private void UpdateLocationCache( if (readLocations != null) { - nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation( - readLocations, - out ReadOnlyCollection availableReadLocations); + nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation( + readLocations, + out ReadOnlyCollection availableReadLocations); - nextLocationInfo.AvailableReadLocations = availableReadLocations; - nextLocationInfo.AccountReadEndpoints = nextLocationInfo.AvailableReadEndpointByLocation.Select(x => x.Value).ToList().AsReadOnly(); + nextLocationInfo.AvailableReadLocations = availableReadLocations; + nextLocationInfo.AccountReadEndpoints = nextLocationInfo.AvailableReadEndpointByLocation.Select(x => x.Value).ToList().AsReadOnly(); } if (writeLocations != null) { - nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation( - writeLocations, - out ReadOnlyCollection availableWriteLocations); + nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation( + writeLocations, + out ReadOnlyCollection availableWriteLocations); nextLocationInfo.AvailableWriteLocations = availableWriteLocations; - } - - nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( - endpointsByLocation: nextLocationInfo.AvailableWriteEndpointByLocation, - orderedLocations: nextLocationInfo.AvailableWriteLocations, - expectedAvailableOperation: OperationType.Write, - fallbackEndpoint: this.defaultEndpoint); - - nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints( - endpointsByLocation: nextLocationInfo.AvailableReadEndpointByLocation, - orderedLocations: nextLocationInfo.AvailableReadLocations, - expectedAvailableOperation: OperationType.Read, + } + + nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints( + endpointsByLocation: nextLocationInfo.AvailableWriteEndpointByLocation, + orderedLocations: nextLocationInfo.AvailableWriteLocations, + expectedAvailableOperation: OperationType.Write, + fallbackEndpoint: this.defaultEndpoint); + + nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints( + endpointsByLocation: nextLocationInfo.AvailableReadEndpointByLocation, + orderedLocations: nextLocationInfo.AvailableReadLocations, + expectedAvailableOperation: OperationType.Read, fallbackEndpoint: nextLocationInfo.WriteEndpoints[0]); - + this.lastCacheUpdateTimestamp = DateTime.UtcNow; DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})", string.Join(", ", nextLocationInfo.WriteEndpoints.Select(endpoint => endpoint.ToString())), string.Join(", ", nextLocationInfo.ReadEndpoints.Select(endpoint => endpoint.ToString()))); - + this.locationInfo = nextLocationInfo; } } @@ -604,7 +591,7 @@ private ReadOnlyCollection GetPreferredAvailableEndpoints(ReadOnlyDictionar if (endpoints.Count == 0) { endpoints.Add(fallbackEndpoint); - } + } return endpoints.AsReadOnly(); } @@ -665,7 +652,7 @@ public DatabaseAccountLocationsInfo(ReadOnlyCollection preferredLocation this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); this.AvailableReadEndpointByLocation = new ReadOnlyDictionary(new Dictionary(StringComparer.OrdinalIgnoreCase)); this.WriteEndpoints = new List() { defaultEndpoint }.AsReadOnly(); - this.AccountReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); + this.AccountReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); this.ReadEndpoints = new List() { defaultEndpoint }.AsReadOnly(); } @@ -676,8 +663,8 @@ public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) this.AvailableReadLocations = other.AvailableReadLocations; this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation; this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation; - this.WriteEndpoints = other.WriteEndpoints; - this.AccountReadEndpoints = other.AccountReadEndpoints; + this.WriteEndpoints = other.WriteEndpoints; + this.AccountReadEndpoints = other.AccountReadEndpoints; this.ReadEndpoints = other.ReadEndpoints; } @@ -687,7 +674,7 @@ public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other) public ReadOnlyDictionary AvailableWriteEndpointByLocation { get; set; } public ReadOnlyDictionary AvailableReadEndpointByLocation { get; set; } public ReadOnlyCollection WriteEndpoints { get; set; } - public ReadOnlyCollection ReadEndpoints { get; set; } + public ReadOnlyCollection ReadEndpoints { get; set; } public ReadOnlyCollection AccountReadEndpoints { get; set; } } From 3cb1053f7d6c2e2d67591ee446710aa16dc0d9a0 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 23 Oct 2023 17:11:55 -0700 Subject: [PATCH 28/31] Code changes to fix unit test. --- .../CosmosClientOptionsUnitTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index b625858c09..d1ce323b7f 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -259,7 +259,7 @@ public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredReg ArgumentException exception = Assert.ThrowsException(() => cosmosClientBuilder.Build()); Assert.AreEqual( - expected: "ApplicationPreferredRegions is required when EnablePartitionLevelFailover is enabled.", + expected: "Either ApplicationPreferredRegions or ApplicationRegion is required when EnablePartitionLevelFailover is enabled.", actual: exception.Message); } finally From 8f782fcb528c669f72aaf5d1fafeb83e8e6c85e6 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Mon, 23 Oct 2023 18:14:49 -0700 Subject: [PATCH 29/31] Code changes to fix more tests. --- .../GlobalPartitionEndpointManagerTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs index 05cbfc8384..9788f7c128 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs @@ -253,7 +253,7 @@ public void CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_ShouldTh cosmosClientOptions)); Assert.AreEqual( - expected: "ApplicationPreferredRegions is required when EnablePartitionLevelFailover is enabled.", + expected: "Either ApplicationPreferredRegions or ApplicationRegion is required when EnablePartitionLevelFailover is enabled.", actual: exception.Message); } From 1e94fc148a47567312a1af2b569d27c0d099e60e Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Tue, 24 Oct 2023 14:49:09 -0700 Subject: [PATCH 30/31] Code changes to address review comments. --- Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs | 5 ++--- .../CosmosClientOptionsUnitTests.cs | 2 +- .../GlobalPartitionEndpointManagerTests.cs | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 4e08885007..a90a2e04a6 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -892,10 +892,9 @@ private void ValidateLimitToEndpointSettings() private void ValidatePartitionLevelFailoverSettings() { if (this.EnablePartitionLevelFailover - && (this.ApplicationPreferredRegions == null || this.ApplicationPreferredRegions.Count == 0) - && string.IsNullOrEmpty(this.ApplicationRegion)) + && (this.ApplicationPreferredRegions == null || this.ApplicationPreferredRegions.Count == 0)) { - throw new ArgumentException($"Either {nameof(this.ApplicationPreferredRegions)} or {nameof(this.ApplicationRegion)} is required when {nameof(this.EnablePartitionLevelFailover)} is enabled."); + throw new ArgumentException($"{nameof(this.ApplicationPreferredRegions)} is required when {nameof(this.EnablePartitionLevelFailover)} is enabled."); } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index d1ce323b7f..b625858c09 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -259,7 +259,7 @@ public void CosmosClientOptions_WhenPartitionLevelFailoverEnabledAndPreferredReg ArgumentException exception = Assert.ThrowsException(() => cosmosClientBuilder.Build()); Assert.AreEqual( - expected: "Either ApplicationPreferredRegions or ApplicationRegion is required when EnablePartitionLevelFailover is enabled.", + expected: "ApplicationPreferredRegions is required when EnablePartitionLevelFailover is enabled.", actual: exception.Message); } finally diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs index 9788f7c128..05cbfc8384 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/PartitionKeyRangeFailoverTests/GlobalPartitionEndpointManagerTests.cs @@ -253,7 +253,7 @@ public void CreateItemAsync_WithNoPreferredRegionsAndServiceUnavailable_ShouldTh cosmosClientOptions)); Assert.AreEqual( - expected: "Either ApplicationPreferredRegions or ApplicationRegion is required when EnablePartitionLevelFailover is enabled.", + expected: "ApplicationPreferredRegions is required when EnablePartitionLevelFailover is enabled.", actual: exception.Message); } From 4180263a02e8177bd47178d7f40d44986e6ab271 Mon Sep 17 00:00:00 2001 From: Debdatta Kunda Date: Thu, 26 Oct 2023 12:37:53 -0700 Subject: [PATCH 31/31] Code changes to fix verbaige in design document. --- docs/PerPartitionAutomaticFailoverDesign.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/PerPartitionAutomaticFailoverDesign.md b/docs/PerPartitionAutomaticFailoverDesign.md index dc334e4400..fa1425c5d3 100644 --- a/docs/PerPartitionAutomaticFailoverDesign.md +++ b/docs/PerPartitionAutomaticFailoverDesign.md @@ -9,7 +9,7 @@ ## Scope -The scope of the per partition automatic failover design document is applicable for the Cosmos .NET SDK configured for both `Gateway` and `Direct` mode. +The scope of the per partition automatic failover design document is applicable for the Cosmos .NET SDK configured only for `Direct` mode at the moment. ## Background