Skip to content

Commit

Permalink
Per Partition Automatic Failover: Fixes Gateway 503 Cold Start Issue (#…
Browse files Browse the repository at this point in the history
…4073)

* Code changes to add retry logic for GW returned 503.9002.

* Revert "Code changes to add retry logic for GW returned 503.9002."

This reverts commit 53ef5f3.

* Code changes to clean up the PPAF retry logic fix.

* Code changes to add retry logic for GW returned 503.9002.

* Revert "Code changes to add retry logic for GW returned 503.9002."

This reverts commit 53ef5f3.

* Code changes to clean up the PPAF retry logic fix.

* Code changes to revert location cache changes.

* Code changes ro revert location cache changes.

* Code changes to fix some of the failing tests.

* Code changes to fix unit tests.

* Code changes to add unit tests for client options.

* Code changes to draft docs for PPAF design approach.

* Code changes to add SDK side design docs for PPAF.

* Code changes to modify the PPAF design.

* Code changes to fix unit test.

* Code changes to rename test name.

* Code changes to add some cosmetic changes.

* Code changes to enable retry on write for all regions in single master accounts.

* Code changes to add code comments.

* Code changes to clean up and handle endpoints in location cache.

* Code changes to fix unit tests. Added detailed code comments.

* Code changes to clean up the account read endpoints generation logic.

* Code changes to fix unit tests.

* Code changes to disable retry when ppaf is not enabled. Also validated application preferred region.

* Code changes to fix unit tests.

* Code changes to update md file.

* Code changes to remove chache expiry check for account read endpoints.

* Code changes to fix unit test.

* Code changes to fix more tests.

* Code changes to address review comments.

* Code changes to fix verbaige in design document.
  • Loading branch information
kundadebdatta authored Oct 27, 2023
1 parent 083df87 commit a4836ff
Show file tree
Hide file tree
Showing 14 changed files with 378 additions and 83 deletions.
22 changes: 10 additions & 12 deletions Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy
private readonly GlobalEndpointManager globalEndpointManager;
private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache;
private readonly bool enableEndpointDiscovery;
private readonly bool isPertitionLevelFailoverEnabled;
private int failoverRetryCount;

private int sessionTokenRetryCount;
Expand All @@ -41,8 +42,9 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy
public ClientRetryPolicy(
GlobalEndpointManager globalEndpointManager,
GlobalPartitionEndpointManager partitionKeyRangeLocationCache,
RetryOptions retryOptions,
bool enableEndpointDiscovery,
RetryOptions retryOptions)
bool isPertitionLevelFailoverEnabled)
{
this.throttlingRetry = new ResourceThrottleRetryPolicy(
retryOptions.MaxRetryAttemptsOnThrottledRequests,
Expand All @@ -55,6 +57,7 @@ public ClientRetryPolicy(
this.sessionTokenRetryCount = 0;
this.serviceUnavailableRetryCount = 0;
this.canUseMultipleWriteLocations = false;
this.isPertitionLevelFailoverEnabled = isPertitionLevelFailoverEnabled;
}

/// <summary>
Expand Down Expand Up @@ -247,8 +250,7 @@ private async Task<ShouldRetryResult> ShouldRetryInternalAsync(
}

// Received 503 due to client connect timeout or Gateway
if (statusCode == HttpStatusCode.ServiceUnavailable
&& ClientRetryPolicy.IsRetriableServiceUnavailable(subStatusCode))
if (statusCode == HttpStatusCode.ServiceUnavailable)
{
DefaultTrace.TraceWarning("ClientRetryPolicy: ServiceUnavailable. Refresh cache and retry. Failed Location: {0}; ResourceAddress: {1}",
this.documentServiceRequest?.RequestContext?.LocationEndpointToRoute?.ToString() ?? string.Empty,
Expand All @@ -265,12 +267,6 @@ private async Task<ShouldRetryResult> ShouldRetryInternalAsync(
return null;
}

private static bool IsRetriableServiceUnavailable(SubStatusCodes? subStatusCode)
{
return subStatusCode == SubStatusCodes.Unknown ||
(subStatusCode.HasValue && subStatusCode.Value.IsSDKGeneratedSubStatus());
}

private async Task<ShouldRetryResult> ShouldRetryOnEndpointFailureAsync(
bool isReadRequest,
bool markBothReadAndWriteAsUnavailable,
Expand Down Expand Up @@ -390,7 +386,7 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable()

/// <summary>
/// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available.
/// We try and retry the request only if there are other regions available.
/// We try and retry the request only if there are other regions available. The retry logic is applicable for single master write accounts as well.
/// </summary>
private ShouldRetryResult ShouldRetryOnServiceUnavailable()
{
Expand All @@ -401,9 +397,11 @@ private ShouldRetryResult ShouldRetryOnServiceUnavailable()
}

if (!this.canUseMultipleWriteLocations
&& !this.isReadRequest)
&& !this.isReadRequest
&& !this.isPertitionLevelFailoverEnabled)
{
// Write requests on single master cannot be retried, no other regions available
// Write requests on single master cannot be retried if partition level failover is disabled.
// This means there are no other regions available to serve the writes.
return ShouldRetryResult.NoRetry();
}

Expand Down
12 changes: 11 additions & 1 deletion Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ public Func<HttpClient> HttpClientFactory
/// <summary>
/// Enable partition key level failover
/// </summary>
internal bool EnablePartitionLevelFailover { get; set; } = false;
internal bool EnablePartitionLevelFailover { get; set; } = ConfigurationManager.IsPartitionLevelFailoverEnabled(defaultValue: false);

/// <summary>
/// Quorum Read allowed with eventual consistency account or consistent prefix account.
Expand Down Expand Up @@ -752,6 +752,7 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId)
{
this.ValidateDirectTCPSettings();
this.ValidateLimitToEndpointSettings();
this.ValidatePartitionLevelFailoverSettings();

ConnectionPolicy connectionPolicy = new ConnectionPolicy()
{
Expand Down Expand Up @@ -888,6 +889,15 @@ private void ValidateLimitToEndpointSettings()
}
}

private void ValidatePartitionLevelFailoverSettings()
{
if (this.EnablePartitionLevelFailover
&& (this.ApplicationPreferredRegions == null || this.ApplicationPreferredRegions.Count == 0))
{
throw new ArgumentException($"{nameof(this.ApplicationPreferredRegions)} is required when {nameof(this.EnablePartitionLevelFailover)} is enabled.");
}
}

private void ValidateDirectTCPSettings()
{
string settingName = string.Empty;
Expand Down
9 changes: 6 additions & 3 deletions Microsoft.Azure.Cosmos/src/RetryPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@ internal sealed class RetryPolicy : IRetryPolicyFactory
private readonly GlobalPartitionEndpointManager partitionKeyRangeLocationCache;
private readonly GlobalEndpointManager globalEndpointManager;
private readonly bool enableEndpointDiscovery;
private readonly bool isPertitionLevelFailoverEnabled;
private readonly RetryOptions retryOptions;

/// <summary>
/// Initialize the instance of the RetryPolicy class
/// </summary>
public RetryPolicy(
GlobalEndpointManager globalEndpointManager,
GlobalEndpointManager globalEndpointManager,
ConnectionPolicy connectionPolicy,
GlobalPartitionEndpointManager partitionKeyRangeLocationCache)
{
this.enableEndpointDiscovery = connectionPolicy.EnableEndpointDiscovery;
this.isPertitionLevelFailoverEnabled = connectionPolicy.EnablePartitionLevelFailover;
this.globalEndpointManager = globalEndpointManager;
this.retryOptions = connectionPolicy.RetryOptions;
this.partitionKeyRangeLocationCache = partitionKeyRangeLocationCache;
Expand All @@ -37,10 +39,11 @@ public IDocumentClientRetryPolicy GetRequestPolicy()
ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(
this.globalEndpointManager,
this.partitionKeyRangeLocationCache,
this.retryOptions,
this.enableEndpointDiscovery,
this.retryOptions);
this.isPertitionLevelFailoverEnabled);

return clientRetryPolicy;
}
}
}
}
6 changes: 4 additions & 2 deletions Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,11 @@ public GlobalEndpointManager(IDocumentClientInternal owner, ConnectionPolicy con
}
}

public ReadOnlyCollection<Uri> ReadEndpoints => this.locationCache.ReadEndpoints;
public ReadOnlyCollection<Uri> ReadEndpoints => this.locationCache.ReadEndpoints;

public ReadOnlyCollection<Uri> AccountReadEndpoints => this.locationCache.AccountReadEndpoints;

public ReadOnlyCollection<Uri> WriteEndpoints => this.locationCache.WriteEndpoints;
public ReadOnlyCollection<Uri> WriteEndpoints => this.locationCache.WriteEndpoints;

public int PreferredLocationCount => this.connectionPolicy.PreferredLocations != null ? this.connectionPolicy.PreferredLocations.Count : 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,20 @@ public override bool TryMarkEndpointUnavailableForPartitionKeyRange(

PartitionKeyRangeFailoverInfo partionFailover = this.PartitionKeyRangeToLocation.Value.GetOrAdd(
partitionKeyRange,
(_) => new PartitionKeyRangeFailoverInfo(failedLocation));

(_) => new PartitionKeyRangeFailoverInfo(failedLocation));

// For any single master write accounts, the next locations to fail over will be the read regions configured at the account level.
// For multi master write accounts, since all the regions are treated as write regions, the next locations to fail over
// will be the preferred read regions that are configured in the application preferred regions in the CosmosClientOptions.
bool isSingleMasterWriteAccount = !this.globalEndpointManager.CanUseMultipleWriteLocations(request);

ReadOnlyCollection<Uri> nextLocations = isSingleMasterWriteAccount
? this.globalEndpointManager.AccountReadEndpoints
: this.globalEndpointManager.ReadEndpoints;

// Will return true if it was able to update to a new region
if (partionFailover.TryMoveNextLocation(
locations: this.globalEndpointManager.ReadEndpoints,
locations: nextLocations,
failedLocation: failedLocation))
{
DefaultTrace.TraceInformation("Partition level override added to new location. PartitionKeyRange: {0}, failedLocation: {1}, new location: {2}",
Expand Down
2 changes: 2 additions & 0 deletions Microsoft.Azure.Cosmos/src/Routing/IGlobalEndpointManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ internal interface IGlobalEndpointManager : IDisposable
{
ReadOnlyCollection<Uri> ReadEndpoints { get; }

ReadOnlyCollection<Uri> AccountReadEndpoints { get; }

ReadOnlyCollection<Uri> WriteEndpoints { get; }

int PreferredLocationCount { get; }
Expand Down
41 changes: 31 additions & 10 deletions Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ public ReadOnlyCollection<Uri> ReadEndpoints
}
}

/// <summary>
/// Gets list of account level read endpoints.
/// </summary>
public ReadOnlyCollection<Uri> AccountReadEndpoints => this.locationInfo.AccountReadEndpoints;

/// <summary>
/// Gets list of write endpoints ordered by
/// 1. Preferred location
Expand Down Expand Up @@ -491,20 +496,35 @@ private void UpdateLocationCache(

if (readLocations != null)
{
ReadOnlyCollection<string> availableReadLocations;
nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(readLocations, out availableReadLocations);
nextLocationInfo.AvailableReadEndpointByLocation = this.GetEndpointByLocation(
readLocations,
out ReadOnlyCollection<string> availableReadLocations);

nextLocationInfo.AvailableReadLocations = availableReadLocations;
nextLocationInfo.AccountReadEndpoints = nextLocationInfo.AvailableReadEndpointByLocation.Select(x => x.Value).ToList().AsReadOnly();
}

if (writeLocations != null)
{
ReadOnlyCollection<string> availableWriteLocations;
nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(writeLocations, out availableWriteLocations);
nextLocationInfo.AvailableWriteEndpointByLocation = this.GetEndpointByLocation(
writeLocations,
out ReadOnlyCollection<string> availableWriteLocations);

nextLocationInfo.AvailableWriteLocations = availableWriteLocations;
}

nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableWriteEndpointByLocation, nextLocationInfo.AvailableWriteLocations, OperationType.Write, this.defaultEndpoint);
nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(nextLocationInfo.AvailableReadEndpointByLocation, nextLocationInfo.AvailableReadLocations, OperationType.Read, nextLocationInfo.WriteEndpoints[0]);
nextLocationInfo.WriteEndpoints = this.GetPreferredAvailableEndpoints(
endpointsByLocation: nextLocationInfo.AvailableWriteEndpointByLocation,
orderedLocations: nextLocationInfo.AvailableWriteLocations,
expectedAvailableOperation: OperationType.Write,
fallbackEndpoint: this.defaultEndpoint);

nextLocationInfo.ReadEndpoints = this.GetPreferredAvailableEndpoints(
endpointsByLocation: nextLocationInfo.AvailableReadEndpointByLocation,
orderedLocations: nextLocationInfo.AvailableReadLocations,
expectedAvailableOperation: OperationType.Read,
fallbackEndpoint: nextLocationInfo.WriteEndpoints[0]);

this.lastCacheUpdateTimestamp = DateTime.UtcNow;

DefaultTrace.TraceInformation("Current WriteEndpoints = ({0}) ReadEndpoints = ({1})",
Expand Down Expand Up @@ -534,8 +554,7 @@ private ReadOnlyCollection<Uri> GetPreferredAvailableEndpoints(ReadOnlyDictionar

foreach (string location in currentLocationInfo.PreferredLocations)
{
Uri endpoint;
if (endpointsByLocation.TryGetValue(location, out endpoint))
if (endpointsByLocation.TryGetValue(location, out Uri endpoint))
{
if (this.IsEndpointUnavailable(endpoint, expectedAvailableOperation))
{
Expand All @@ -560,9 +579,8 @@ private ReadOnlyCollection<Uri> GetPreferredAvailableEndpoints(ReadOnlyDictionar
{
foreach (string location in orderedLocations)
{
Uri endpoint;
if (!string.IsNullOrEmpty(location) && // location is empty during manual failover
endpointsByLocation.TryGetValue(location, out endpoint))
endpointsByLocation.TryGetValue(location, out Uri endpoint))
{
endpoints.Add(endpoint);
}
Expand Down Expand Up @@ -634,6 +652,7 @@ public DatabaseAccountLocationsInfo(ReadOnlyCollection<string> preferredLocation
this.AvailableWriteEndpointByLocation = new ReadOnlyDictionary<string, Uri>(new Dictionary<string, Uri>(StringComparer.OrdinalIgnoreCase));
this.AvailableReadEndpointByLocation = new ReadOnlyDictionary<string, Uri>(new Dictionary<string, Uri>(StringComparer.OrdinalIgnoreCase));
this.WriteEndpoints = new List<Uri>() { defaultEndpoint }.AsReadOnly();
this.AccountReadEndpoints = new List<Uri>() { defaultEndpoint }.AsReadOnly();
this.ReadEndpoints = new List<Uri>() { defaultEndpoint }.AsReadOnly();
}

Expand All @@ -645,6 +664,7 @@ public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other)
this.AvailableWriteEndpointByLocation = other.AvailableWriteEndpointByLocation;
this.AvailableReadEndpointByLocation = other.AvailableReadEndpointByLocation;
this.WriteEndpoints = other.WriteEndpoints;
this.AccountReadEndpoints = other.AccountReadEndpoints;
this.ReadEndpoints = other.ReadEndpoints;
}

Expand All @@ -655,6 +675,7 @@ public DatabaseAccountLocationsInfo(DatabaseAccountLocationsInfo other)
public ReadOnlyDictionary<string, Uri> AvailableReadEndpointByLocation { get; set; }
public ReadOnlyCollection<Uri> WriteEndpoints { get; set; }
public ReadOnlyCollection<Uri> ReadEndpoints { get; set; }
public ReadOnlyCollection<Uri> AccountReadEndpoints { get; set; }
}

[Flags]
Expand Down
28 changes: 26 additions & 2 deletions Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@ namespace Microsoft.Azure.Cosmos
internal static class ConfigurationManager
{
/// <summary>
/// A read-only string containing the environment variablename for enabling replica validation.
/// This will eventually be removed oncereplica valdiatin is enabled by default for both preview
/// A read-only string containing the environment variable name for enabling replica validation.
/// This will eventually be removed once replica valdiatin is enabled by default for both preview
/// and GA.
/// </summary>
internal static readonly string ReplicaConnectivityValidationEnabled = "AZURE_COSMOS_REPLICA_VALIDATION_ENABLED";

/// <summary>
/// A read-only string containing the environment variable name for enabling per partition automatic failover.
/// This will eventually be removed once per partition automatic failover is enabled by default for both preview
/// and GA.
/// </summary>
internal static readonly string PartitionLevelFailoverEnabled = "AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED";

public static T GetEnvironmentVariable<T>(string variable, T defaultValue)
{
string value = Environment.GetEnvironmentVariable(variable);
Expand Down Expand Up @@ -50,5 +57,22 @@ public static bool IsReplicaAddressValidationEnabled(
variable: ConfigurationManager.ReplicaConnectivityValidationEnabled,
defaultValue: replicaValidationDefaultValue);
}

/// <summary>
/// Gets the boolean value of the partition level failover environment variable. Note that, partition level failover
/// is disabled by default for both preview and GA releases. The user can set the respective environment variable
/// 'AZURE_COSMOS_PARTITION_LEVEL_FAILOVER_ENABLED' to override the value for both preview and GA. The method will
/// eventually be removed, once partition level failover is enabled by default for both preview and GA.
/// </summary>
/// <param name="defaultValue">A boolean field containing the default value for partition level failover.</param>
/// <returns>A boolean flag indicating if partition level failover is enabled.</returns>
public static bool IsPartitionLevelFailoverEnabled(
bool defaultValue)
{
return ConfigurationManager
.GetEnvironmentVariable(
variable: ConfigurationManager.PartitionLevelFailoverEnabled,
defaultValue: defaultValue);
}
}
}
Loading

0 comments on commit a4836ff

Please sign in to comment.