Skip to content

Commit

Permalink
Merge branch 'master' into dev/iainx/regionname-mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
iain authored Sep 12, 2023
2 parents 534bf8a + be0c098 commit 70c8c52
Show file tree
Hide file tree
Showing 94 changed files with 2,243 additions and 1,264 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,11 @@
"customScriptUrl": "[concat('https://raw.githubusercontent.com/Azure/azure-cosmos-dotnet-v3/',parameters('benchmarkingToolsBranchName'),'/Microsoft.Azure.Cosmos.Samples/Tools/Benchmark/AzureVmBenchmark/scripts/custom-script.sh')]",
"vmScriptExtensionScriptName": "execute.sh",
"convertedDatetime": "[dateTimeFromEpoch(parameters('startDate'))]",
"metricsPrefix" : "[if(startsWith(parameters('workloadType'), 'Read'), 'Read', if(startsWith(parameters('workloadType'), 'Query'), 'Query', if(startsWith(parameters('workloadType'), 'Insert'), 'Insert', '')))]",
"appInsightsResourceIds": "[concat('/subscriptions/', subscription().subscriptionId, '/resourceGroups/', resourceGroup().name, '/providers/microsoft.insights/components/', parameters('applicationInsightsName'))]",
"chart0Expression": "[concat('customMetrics\n| where name == \"ReadOperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 50),\n percentile(value, 75),\n percentile(value, 90),\n percentile(value, 95)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart1Expression": "[concat('customMetrics\n| where name == \"ReadOperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 99),\n percentile(value, 99.9),\n percentile(value, 99.99)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart2Expression": "[concat('customMetrics\n| where name == \"ReadOperationRps\" and timestamp > ago(1d)\n| summarize\n avg(value)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]"
"chart0Expression": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 50),\n percentile(value, 75),\n percentile(value, 90),\n percentile(value, 95)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart1Expression": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 99),\n percentile(value, 99.9),\n percentile(value, 99.99)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart2Expression": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationRps\" and timestamp > ago(1d)\n| summarize\n avg(value)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]"

},
"resources": [
Expand Down Expand Up @@ -776,7 +777,7 @@
},
{
"name": "Query",
"value": "customMetrics\n| where name == \"ReadOperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n",
"value": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n')]",
"isOptional": true
},
{
Expand Down Expand Up @@ -834,7 +835,7 @@
"type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
"settings": {
"content": {
"Query": "customMetrics\n| where name == \"ReadOperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n",
"Query": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n')]",
"Dimensions": {
"xAxis": {
"name": "ts",
Expand Down Expand Up @@ -903,7 +904,7 @@
},
{
"name": "Query",
"value": "customMetrics\n| where name == \"ReadOperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n",
"value": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n')]",
"isOptional": true
},
{
Expand Down Expand Up @@ -961,7 +962,7 @@
"type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
"settings": {
"content": {
"Query": "customMetrics\n| where name == \"ReadOperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n",
"Query": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n')]",
"Dimensions": {
"xAxis": {
"name": "ts",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ git checkout ${BENCHMARKING_TOOLS_BRANCH_NAME}
cd 'Microsoft.Azure.Cosmos.Samples/Tools/Benchmark'

echo "########## Build benckmark tool ##########"
dotnet build --configuration Release -p:"OSSProjectRef=true;ShouldUnsetParentConfigurationAndPlatform=false"
dotnet build --configuration Release -p:"OSSProjectRef=true"

echo "########## Run benchmark ##########"
nohup dotnet run -c Release -e ${COSMOS_URI} -k ${COSMOS_KEY} -t ${THROUGHPUT} -n ${DOCUMENTS} --pl ${PARALLELISM} \
nohup dotnet run -c Release -p:"OSSProjectRef=true" -e ${COSMOS_URI} -k ${COSMOS_KEY} -t ${THROUGHPUT} -n ${DOCUMENTS} --pl ${PARALLELISM} \
--enablelatencypercentiles true --resultscontainer ${RESULTS_CONTAINER} --resultspartitionkeyvalue "pk" \
--DiagnosticsStorageConnectionString ${DIAGNOSTICS_STORAGE_CONNECTION_STRING} \
--DiagnosticLatencyThresholdInMs ${DIAGNOSTICS_LATENCY_THRESHOLD_IN_MS} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ packages:
- azure-cli

runcmd:
- wget https://aka.ms/downloadazcopy-v10-linux
- tar -xvf downloadazcopy-v10-linux
- sudo cp ./azcopy_linux_amd64_*/azcopy /usr/bin/
- wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb
- sudo dpkg -i packages-microsoft-prod.deb
- sudo apt update
Expand Down
40 changes: 12 additions & 28 deletions Microsoft.Azure.Cosmos.Samples/Tools/Benchmark/BenchmarkConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace CosmosBenchmark
using System.Linq;
using System.Runtime;
using CommandLine;
using Microsoft.Azure.Cosmos.Telemetry;
using Microsoft.Azure.Documents.Client;
using Newtonsoft.Json;

Expand Down Expand Up @@ -102,18 +103,12 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Disable core SDK logging")]
public bool DisableCoreSdkLogging { get; set; }

[Option(Required = false, HelpText = "Enable Client Telemetry")]
public bool EnableTelemetry { get; set; }

[Option(Required = false, HelpText = "Enable Distributed Tracing")]
public bool EnableDistributedTracing { get; set; }

[Option(Required = false, HelpText = "Client Telemetry Schedule in Seconds")]
public int TelemetryScheduleInSec { get; set; }

[Option(Required = false, HelpText = "Client Telemetry Endpoint")]
public string TelemetryEndpoint { get; set; }

[Option(Required = false, HelpText = "Endpoint to publish results to")]
public string ResultsEndpoint { get; set; }

Expand Down Expand Up @@ -143,6 +138,9 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Application Insights connection string")]
public string AppInsightsConnectionString { get; set; }

[Option(Required = false, HelpText = "Enable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool EnableClientTelemetry { get; set; } = true;

internal int GetTaskCount(int containerThroughput)
{
int taskCount = this.DegreeOfParallelism;
Expand Down Expand Up @@ -210,35 +208,21 @@ private string GetUserAgentPrefix()

internal Microsoft.Azure.Cosmos.CosmosClient CreateCosmosClient(string accountKey)
{
// Overwrite the default timespan if configured
if(this.TelemetryScheduleInSec > 0)
{
ClientTelemetryOptions.DefaultIntervalForTelemetryJob = TimeSpan.FromSeconds(this.TelemetryScheduleInSec);
}

Microsoft.Azure.Cosmos.CosmosClientOptions clientOptions = new Microsoft.Azure.Cosmos.CosmosClientOptions()
{
ApplicationName = this.GetUserAgentPrefix(),
MaxRetryAttemptsOnRateLimitedRequests = 0,
MaxRequestsPerTcpConnection = this.MaxRequestsPerTcpConnection,
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint,
EnableClientTelemetry = this.EnableClientTelemetry
};

if (this.EnableTelemetry)
{
Environment.SetEnvironmentVariable(
Microsoft.Azure.Cosmos.Telemetry.ClientTelemetryOptions.EnvPropsClientTelemetryEnabled,
"true");

if (this.TelemetryScheduleInSec > 0)
{
Environment.SetEnvironmentVariable(
Microsoft.Azure.Cosmos.Telemetry.ClientTelemetryOptions.EnvPropsClientTelemetrySchedulingInSeconds,
Convert.ToString(this.TelemetryScheduleInSec));
}

if (!string.IsNullOrEmpty(this.TelemetryEndpoint))
{
Environment.SetEnvironmentVariable(
Microsoft.Azure.Cosmos.Telemetry.ClientTelemetryOptions.EnvPropsClientTelemetryEndpoint,
this.TelemetryEndpoint);
}
}

if (!string.IsNullOrWhiteSpace(this.ConsistencyLevel))
{
clientOptions.ConsistencyLevel = (Microsoft.Azure.Cosmos.ConsistencyLevel)Enum.Parse(typeof(Microsoft.Azure.Cosmos.ConsistencyLevel), this.ConsistencyLevel, ignoreCase: true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,17 @@ public void LatencyDiagnostics(
this.WriteEvent(1, dbName, containerName, durationInMs, lazyDiagnostics());
}
}

[Event(2, Level = EventLevel.Informational)]
public void OnOperationSuccess(int operationType, double durationInMs)
{
this.WriteEvent(2, operationType, durationInMs);
}

[Event(3, Level = EventLevel.Informational)]
public void OnOperationFailure(int operationType, double durationInMs)
{
this.WriteEvent(3, operationType, durationInMs);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
namespace CosmosBenchmark
{
using System;
using System.Diagnostics.Tracing;
using System.Threading;
using System.Threading.Tasks;
using OpenTelemetry.Metrics;

internal class CosmosBenchmarkEventListener : EventListener
{
static readonly string CosmosBenchmarkEventSourceName = "Azure.Cosmos.Benchmark";

private readonly MeterProvider meterProvider;
private readonly MetricsCollector[] metricsCollectors;
private readonly MetricCollectionWindow metricCollectionWindow;
private const int WindowCheckInterval = 10;

public CosmosBenchmarkEventListener(MeterProvider meterProvider, BenchmarkConfig config)
{
this.meterProvider = meterProvider;
this.metricCollectionWindow ??= new MetricCollectionWindow(config.MetricsReportingIntervalInSec);

this.metricsCollectors = new MetricsCollector[Enum.GetValues<BenchmarkOperationType>().Length];
foreach (BenchmarkOperationType entry in Enum.GetValues<BenchmarkOperationType>())
{
this.metricsCollectors[(int)entry] = new MetricsCollector(entry);
}

/// <summary>
/// Flush metrics every <see cref="AppConfig.MetricsReportingIntervalInSec"/>
/// </summary>
ThreadPool.QueueUserWorkItem(async state =>
{
while (true)
{
// Reset metricCollectionWindow and flush.
if (this.metricCollectionWindow.IsInvalid())
{
this.meterProvider.ForceFlush();
this.metricCollectionWindow.Reset();
}
await Task.Delay(TimeSpan.FromMilliseconds(CosmosBenchmarkEventListener.WindowCheckInterval));
}
});
}



/// <summary>
/// Override this method to get a list of all the eventSources that exist.
/// </summary>
protected override void OnEventSourceCreated(EventSource eventSource)
{
// Because we want to turn on every EventSource, we subscribe to a callback that triggers
// when new EventSources are created. It is also fired when the EventListener is created
// for all pre-existing EventSources. Thus this callback get called once for every
// EventSource regardless of the order of EventSource and EventListener creation.

// For any EventSource we learn about, turn it on.
if (eventSource.Name == CosmosBenchmarkEventSourceName)
{
this.EnableEvents(eventSource, EventLevel.Informational, EventKeywords.All);
}
}

/// <summary>
/// We override this method to get a callback on every event we subscribed to with EnableEvents
/// </summary>
/// <param name="eventData"></param>
protected override void OnEventWritten(EventWrittenEventArgs eventData)
{
if (eventData.EventId == 2 // Successful
|| eventData.EventId == 3) // Failure
{
int operationTypeIndex = (int)eventData.Payload[0];
double durationInMs = (double)eventData.Payload[1];

switch (eventData.EventId)
{
case 2:
this.metricsCollectors[operationTypeIndex].OnOperationSuccess(durationInMs);
break;
case 3:
this.metricsCollectors[operationTypeIndex].OnOperationFailure(durationInMs);
break;
default:
break;
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,16 @@ public DiagnosticDataListener(BenchmarkConfig config)
/// <param name="eventData">An instance of <see cref="EventWrittenEventArgs "/> containing the request latency and diagnostics.</param>
protected override void OnEventWritten(EventWrittenEventArgs eventData)
{
try
if (eventData.EventId == 1)
{
this.Writer.WriteLine($"{eventData.Payload[2]} ; {eventData.Payload[3]}");
}
catch (Exception ex)
{
Utility.TraceError("An exception ocured while writing diagnostic data to the file", ex);
try
{
this.Writer.WriteLine($"{eventData.Payload[2]} ; {eventData.Payload[3]}");
}
catch (Exception ex)
{
Utility.TraceError("An exception ocured while writing diagnostic data to the file", ex);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ public Task<RunSummary> ExecuteAsync(
BenchmarkConfig benchmarkConfig,
int serialExecutorConcurrency,
int serialExecutorIterationCount,
double warmupFraction,
MetricsCollectorProvider metricsCollectorProvider);
double warmupFraction);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ public Task ExecuteAsync(
bool isWarmup,
bool traceFailures,
Action completionCallback,
BenchmarkConfig benchmarkConfig,
MetricsCollectorProvider metricsCollectorProvider);
BenchmarkConfig benchmarkConfig);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,13 @@ namespace CosmosBenchmark
public interface IMetricsCollector
{
/// <summary>
/// Collects the number of successful operations.
/// Successful operation with latency
/// </summary>
void CollectMetricsOnSuccess();
void OnOperationSuccess(double operationLatencyInMs);

/// <summary>
/// Collects the number of failed operations.
/// Failed operation with latency
/// </summary>
void CollectMetricsOnFailure();

/// <summary>
/// Records latency for success operations in milliseconda.
/// </summary>
/// <param name="milliseconds">The number of milliseconds to record.</param>
void RecordSuccessOpLatencyAndRps(TimeSpan timeSpan);

/// <summary>
/// Records latency for failed operations in milliseconda.
/// </summary>
/// <param name="milliseconds">The number of milliseconds to record.</param>
void RecordFailedOpLatencyAndRps(TimeSpan timeSpan);
void OnOperationFailure(double operationLatencyInMs);
}
}
Loading

0 comments on commit 70c8c52

Please sign in to comment.