Skip to content

Commit

Permalink
Merge branch 'master' into users/sourabhjain/nullchecks
Browse files Browse the repository at this point in the history
  • Loading branch information
sourabh1007 authored Aug 31, 2023
2 parents b1c4872 + c72ebc8 commit 11ecfb9
Show file tree
Hide file tree
Showing 24 changed files with 508 additions and 280 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,11 @@
"customScriptUrl": "[concat('https://raw.githubusercontent.com/Azure/azure-cosmos-dotnet-v3/',parameters('benchmarkingToolsBranchName'),'/Microsoft.Azure.Cosmos.Samples/Tools/Benchmark/AzureVmBenchmark/scripts/custom-script.sh')]",
"vmScriptExtensionScriptName": "execute.sh",
"convertedDatetime": "[dateTimeFromEpoch(parameters('startDate'))]",
"metricsPrefix" : "[if(startsWith(parameters('workloadType'), 'Read'), 'Read', if(startsWith(parameters('workloadType'), 'Query'), 'Query', if(startsWith(parameters('workloadType'), 'Insert'), 'Insert', '')))]",
"appInsightsResourceIds": "[concat('/subscriptions/', subscription().subscriptionId, '/resourceGroups/', resourceGroup().name, '/providers/microsoft.insights/components/', parameters('applicationInsightsName'))]",
"chart0Expression": "[concat('customMetrics\n| where name == \"ReadOperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 50),\n percentile(value, 75),\n percentile(value, 90),\n percentile(value, 95)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart1Expression": "[concat('customMetrics\n| where name == \"ReadOperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 99),\n percentile(value, 99.9),\n percentile(value, 99.99)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart2Expression": "[concat('customMetrics\n| where name == \"ReadOperationRps\" and timestamp > ago(1d)\n| summarize\n avg(value)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]"
"chart0Expression": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 50),\n percentile(value, 75),\n percentile(value, 90),\n percentile(value, 95)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart1Expression": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationLatencyInMs\" and timestamp > ago(1d)\n| summarize\n percentile(value, 99),\n percentile(value, 99.9),\n percentile(value, 99.99)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]",
"chart2Expression": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationRps\" and timestamp > ago(1d)\n| summarize\n avg(value)\n by ts = bin(timestamp, ', parameters('metricsReportingIntervalInSec'), 's)\n| render timechart \n\n')]"

},
"resources": [
Expand Down Expand Up @@ -776,7 +777,7 @@
},
{
"name": "Query",
"value": "customMetrics\n| where name == \"ReadOperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n",
"value": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n')]",
"isOptional": true
},
{
Expand Down Expand Up @@ -834,7 +835,7 @@
"type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
"settings": {
"content": {
"Query": "customMetrics\n| where name == \"ReadOperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n",
"Query": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationFailure\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n')]",
"Dimensions": {
"xAxis": {
"name": "ts",
Expand Down Expand Up @@ -903,7 +904,7 @@
},
{
"name": "Query",
"value": "customMetrics\n| where name == \"ReadOperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n",
"value": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n')]",
"isOptional": true
},
{
Expand Down Expand Up @@ -961,7 +962,7 @@
"type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
"settings": {
"content": {
"Query": "customMetrics\n| where name == \"ReadOperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n",
"Query": "[concat('customMetrics\n| where name == \"', variables('metricsPrefix'), 'OperationSuccess\" and timestamp > ago(1d)\n| summarize\n sum(value)\n by ts = bin(timestamp, 1s)\n | render timechart\n\n')]",
"Dimensions": {
"xAxis": {
"name": "ts",
Expand Down
40 changes: 12 additions & 28 deletions Microsoft.Azure.Cosmos.Samples/Tools/Benchmark/BenchmarkConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace CosmosBenchmark
using System.Linq;
using System.Runtime;
using CommandLine;
using Microsoft.Azure.Cosmos.Telemetry;
using Microsoft.Azure.Documents.Client;
using Newtonsoft.Json;

Expand Down Expand Up @@ -102,18 +103,12 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Disable core SDK logging")]
public bool DisableCoreSdkLogging { get; set; }

[Option(Required = false, HelpText = "Enable Client Telemetry")]
public bool EnableTelemetry { get; set; }

[Option(Required = false, HelpText = "Enable Distributed Tracing")]
public bool EnableDistributedTracing { get; set; }

[Option(Required = false, HelpText = "Client Telemetry Schedule in Seconds")]
public int TelemetryScheduleInSec { get; set; }

[Option(Required = false, HelpText = "Client Telemetry Endpoint")]
public string TelemetryEndpoint { get; set; }

[Option(Required = false, HelpText = "Endpoint to publish results to")]
public string ResultsEndpoint { get; set; }

Expand Down Expand Up @@ -143,6 +138,9 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Application Insights connection string")]
public string AppInsightsConnectionString { get; set; }

[Option(Required = false, HelpText = "Enable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool EnableClientTelemetry { get; set; } = true;

internal int GetTaskCount(int containerThroughput)
{
int taskCount = this.DegreeOfParallelism;
Expand Down Expand Up @@ -210,35 +208,21 @@ private string GetUserAgentPrefix()

internal Microsoft.Azure.Cosmos.CosmosClient CreateCosmosClient(string accountKey)
{
// Overwrite the default timespan if configured
if(this.TelemetryScheduleInSec > 0)
{
ClientTelemetryOptions.DefaultIntervalForTelemetryJob = TimeSpan.FromSeconds(this.TelemetryScheduleInSec);
}

Microsoft.Azure.Cosmos.CosmosClientOptions clientOptions = new Microsoft.Azure.Cosmos.CosmosClientOptions()
{
ApplicationName = this.GetUserAgentPrefix(),
MaxRetryAttemptsOnRateLimitedRequests = 0,
MaxRequestsPerTcpConnection = this.MaxRequestsPerTcpConnection,
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint,
EnableClientTelemetry = this.EnableClientTelemetry
};

if (this.EnableTelemetry)
{
Environment.SetEnvironmentVariable(
Microsoft.Azure.Cosmos.Telemetry.ClientTelemetryOptions.EnvPropsClientTelemetryEnabled,
"true");

if (this.TelemetryScheduleInSec > 0)
{
Environment.SetEnvironmentVariable(
Microsoft.Azure.Cosmos.Telemetry.ClientTelemetryOptions.EnvPropsClientTelemetrySchedulingInSeconds,
Convert.ToString(this.TelemetryScheduleInSec));
}

if (!string.IsNullOrEmpty(this.TelemetryEndpoint))
{
Environment.SetEnvironmentVariable(
Microsoft.Azure.Cosmos.Telemetry.ClientTelemetryOptions.EnvPropsClientTelemetryEndpoint,
this.TelemetryEndpoint);
}
}

if (!string.IsNullOrWhiteSpace(this.ConsistencyLevel))
{
clientOptions.ConsistencyLevel = (Microsoft.Azure.Cosmos.ConsistencyLevel)Enum.Parse(typeof(Microsoft.Azure.Cosmos.ConsistencyLevel), this.ConsistencyLevel, ignoreCase: true);
Expand Down
11 changes: 4 additions & 7 deletions Microsoft.Azure.Cosmos.Samples/Tools/CTL/CTLConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,15 @@ public string DiagnosticsThresholdDuration
[Option("ctl_logging_context", Required = false, HelpText = "Defines a custom context to use on metrics")]
public string LogginContext { get; set; } = string.Empty;

[Option("ctl_telemetry_endpoint", Required = false, HelpText = "telemetry juno end point")]
public string TelemetryEndpoint { get; set; }

[Option("ctl_telemetry_schedule_in_sec", Required = false, HelpText = "telemetry task schedule time in sec")]
public string TelemetryScheduleInSeconds { get; set; }

[Option("ctl_reservoir_type", Required = false, HelpText = "Defines the reservoir type. Valid values are: Uniform, SlidingWindow and ExponentialDecay. The default value is SlidingWindow.")]
public ReservoirTypes ReservoirType { get; set; } = ReservoirTypes.SlidingWindow;

[Option("ctl_reservoir_sample_size", Required = false, HelpText = "The reservoir sample size.")]
public int ReservoirSampleSize { get; set; } = 1028;

[Option("ctl_enable_client_telemetry", Required = false, HelpText = "Enable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool EnableClientTelemetry { get; set; } = true;

internal TimeSpan RunningTimeDurationAsTimespan { get; private set; } = TimeSpan.FromHours(10);
internal TimeSpan DiagnosticsThresholdDurationAsTimespan { get; private set; } = TimeSpan.FromSeconds(60);

Expand All @@ -133,7 +130,7 @@ internal CosmosClient CreateCosmosClient()
CosmosClientOptions clientOptions = new CosmosClientOptions()
{
ApplicationName = CTLConfig.UserAgentSuffix,
EnableClientTelemetry = true
EnableClientTelemetry = this.EnableClientTelemetry
};

if (this.UseGatewayMode)
Expand Down
14 changes: 0 additions & 14 deletions Microsoft.Azure.Cosmos.Samples/Tools/CTL/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ public static async Task Main(string[] args)
{
CTLConfig config = CTLConfig.From(args);

SetEnvironmentVariables(config);

if (config.OutputEventTraces)
{
EnableTraceSourcesToConsole();
Expand All @@ -54,12 +52,6 @@ await scenario.InitializeAsync(

logger.LogInformation("Initialization completed.");

if(client.ClientOptions.EnableClientTelemetry.GetValueOrDefault()) {
logger.LogInformation("Telemetry is enabled for CTL.");
} else {
logger.LogInformation("Telemetry is disabled for CTL.");
}

List<Task> tasks = new List<Task>
{
scenario.RunAsync(
Expand Down Expand Up @@ -148,12 +140,6 @@ await scenario.InitializeAsync(
}
}

private static void SetEnvironmentVariables(CTLConfig config)
{
Environment.SetEnvironmentVariable(ClientTelemetryOptions.EnvPropsClientTelemetryEndpoint, config.TelemetryEndpoint);
Environment.SetEnvironmentVariable(ClientTelemetryOptions.EnvPropsClientTelemetrySchedulingInSeconds, config.TelemetryScheduleInSeconds);
}

private static IMetricsRoot ConfigureReporting(
CTLConfig config,
ILogger logger)
Expand Down
5 changes: 4 additions & 1 deletion Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public ConnectionPolicy()
this.MaxConnectionLimit = defaultMaxConcurrentConnectionLimit;
this.RetryOptions = new RetryOptions();
this.EnableReadRequestsFallback = null;
this.EnableClientTelemetry = ClientTelemetryOptions.IsClientTelemetryEnabled();
this.EnableClientTelemetry = false; // by default feature flag is off
this.ServerCertificateCustomValidationCallback = null;
}

Expand Down Expand Up @@ -211,6 +211,9 @@ public bool EnableTcpConnectionEndpointRediscovery
set;
}

/// <summary>
/// Gets or sets the flag to enable client telemetry feature.
/// </summary>
internal bool EnableClientTelemetry
{
get;
Expand Down
4 changes: 3 additions & 1 deletion Microsoft.Azure.Cosmos/src/Handler/DiagnosticsHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Microsoft.Azure.Cosmos.Handlers
{
using System;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Azure.Cosmos.Handler;
Expand All @@ -25,7 +26,8 @@ public override async Task<ResponseMessage> SendAsync(
ResponseMessage responseMessage = await base.SendAsync(request, cancellationToken);

// Record the diagnostics on the response to get the CPU of when the request was executing
SystemUsageHistory systemUsageHistory = DiagnosticsHandlerHelper.Instance.GetDiagnosticsSystemHistory();
SystemUsageHistory systemUsageHistory = DiagnosticsHandlerHelper.GetInstance().GetDiagnosticsSystemHistory();

if (systemUsageHistory != null)
{
request.Trace.AddDatum(
Expand Down
59 changes: 45 additions & 14 deletions Microsoft.Azure.Cosmos/src/Handler/DiagnosticsHandlerHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ namespace Microsoft.Azure.Cosmos.Handler
using System.Collections.Generic;
using Documents.Rntbd;
using Microsoft.Azure.Cosmos.Core.Trace;
using Microsoft.Azure.Cosmos.Telemetry;

/// <summary>
/// This is a helper class that creates a single static instance to avoid each
Expand All @@ -22,34 +21,69 @@ internal class DiagnosticsHandlerHelper
private const string Telemetrykey = "telemetry";

public static readonly TimeSpan DiagnosticsRefreshInterval = TimeSpan.FromSeconds(10);
private readonly SystemUsageRecorder diagnosticSystemUsageRecorder = new SystemUsageRecorder(
private static readonly SystemUsageRecorder DiagnosticSystemUsageRecorder = new SystemUsageRecorder(
identifier: Diagnostickey,
historyLength: 6,
refreshInterval: DiagnosticsHandlerHelper.DiagnosticsRefreshInterval);

private static readonly TimeSpan ClientTelemetryRefreshInterval = TimeSpan.FromSeconds(5);
private readonly SystemUsageRecorder telemetrySystemUsageRecorder = new SystemUsageRecorder(
private static readonly SystemUsageRecorder TelemetrySystemUsageRecorder = new SystemUsageRecorder(
identifier: Telemetrykey,
historyLength: 120,
refreshInterval: DiagnosticsHandlerHelper.ClientTelemetryRefreshInterval);

private static bool isDiagnosticsMonitoringEnabled = false;
private static bool isTelemetryMonitoringEnabled = false;

/// <summary>
/// Singleton to make sure only one instance of DiagnosticHandlerHelper is there.
/// The system usage collection is disabled for internal builds so it is set to null to avoid
/// compute for accidentally creating an instance or trying to use it.
/// </summary>
public static readonly DiagnosticsHandlerHelper Instance =
private static DiagnosticsHandlerHelper Instance =
#if INTERNAL
null;
#else
new DiagnosticsHandlerHelper();
#endif

private static bool isDiagnosticsMonitoringEnabled;
private static bool isTelemetryMonitoringEnabled;

private readonly SystemUsageMonitor systemUsageMonitor = null;

public static DiagnosticsHandlerHelper GetInstance()
{
return DiagnosticsHandlerHelper.Instance;
}

/// <summary>
/// Restart the monitor with client telemetry recorder if telemetry is enabled
/// </summary>
/// <param name="isClientTelemetryEnabled"></param>
public static void Refresh(bool isClientTelemetryEnabled)
{
if (isClientTelemetryEnabled != DiagnosticsHandlerHelper.isTelemetryMonitoringEnabled)
{
DiagnosticsHandlerHelper.Instance.StopSystemMonitor();

// Update telemetry flag
DiagnosticsHandlerHelper.isTelemetryMonitoringEnabled = isClientTelemetryEnabled;

// Create new instance, it will start a new system monitor job
DiagnosticsHandlerHelper.Instance = new DiagnosticsHandlerHelper();
}
}

private void StopSystemMonitor()
{
try
{
this.systemUsageMonitor?.Dispose();
}
catch (ObjectDisposedException ex)
{
DefaultTrace.TraceError($"Error while stopping system usage monitor. {0} ", ex);
}
}

/// <summary>
/// Start System Usage Monitor with Diagnostic and Telemetry Recorder if Telemetry is enabled
/// Otherwise Start System Usage Monitor with only Diagnostic Recorder
Expand All @@ -61,16 +95,14 @@ private DiagnosticsHandlerHelper()
// If the CPU monitor fails for some reason don't block the application
try
{
DiagnosticsHandlerHelper.isTelemetryMonitoringEnabled = ClientTelemetryOptions.IsClientTelemetryEnabled();

List<SystemUsageRecorder> recorders = new List<SystemUsageRecorder>()
{
this.diagnosticSystemUsageRecorder,
DiagnosticsHandlerHelper.DiagnosticSystemUsageRecorder,
};

if (DiagnosticsHandlerHelper.isTelemetryMonitoringEnabled)
{
recorders.Add(this.telemetrySystemUsageRecorder);
recorders.Add(DiagnosticsHandlerHelper.TelemetrySystemUsageRecorder);
}

this.systemUsageMonitor = SystemUsageMonitor.CreateAndStart(recorders);
Expand All @@ -82,7 +114,6 @@ private DiagnosticsHandlerHelper()
DefaultTrace.TraceError(ex.Message);

DiagnosticsHandlerHelper.isDiagnosticsMonitoringEnabled = false;
DiagnosticsHandlerHelper.isTelemetryMonitoringEnabled = false;
}
}

Expand All @@ -99,7 +130,7 @@ public SystemUsageHistory GetDiagnosticsSystemHistory()

try
{
return this.diagnosticSystemUsageRecorder.Data;
return DiagnosticsHandlerHelper.DiagnosticSystemUsageRecorder.Data;
}
catch (Exception ex)
{
Expand All @@ -123,7 +154,7 @@ public SystemUsageHistory GetClientTelemetrySystemHistory()

try
{
return this.telemetrySystemUsageRecorder.Data;
return DiagnosticsHandlerHelper.TelemetrySystemUsageRecorder.Data;
}
catch (Exception ex)
{
Expand Down
Loading

0 comments on commit 11ecfb9

Please sign in to comment.