Skip to content

Commit

Permalink
EES-5147 - implemented proper Health Check function. Added configurat…
Browse files Browse the repository at this point in the history
…ion into Bicep for configuring Health Checks and creating metrics. Reduced retry attempts on deploying Data Processor code and removed manual sleep, as sleeps are included in retry mechanism.
  • Loading branch information
duncan-at-hiveit committed May 30, 2024
1 parent 1e9f1ab commit 86bfc21
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 32 deletions.
71 changes: 70 additions & 1 deletion infrastructure/templates/public-api/components/functionApp.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ param resourcePrefix string
@description('Specifies the location for all resources.')
param location string

@description('Function App name')
@description('Specifies the Function App name suffix')
param functionAppName string

@description('Function App Plan : operating system')
Expand Down Expand Up @@ -71,6 +71,12 @@ param preWarmedInstanceCount int?
@description('Specifies whether or not the Function App will always be on and not idle after periods of no traffic - must be compatible with the chosen hosting plan')
param alwaysOn bool?

@description('Specifies configuration for setting up automatic health checks and metric alerts')
param healthCheck {
path: string
unhealthyMetricName: string
}?

@description('Specifies additional Azure Storage Accounts to make available to this Function App')
param azureFileShares {
storageName: string
Expand Down Expand Up @@ -202,6 +208,7 @@ var commonSiteProperties = {
reserved: reserved
siteConfig: {
alwaysOn: alwaysOn ?? null
healthCheckPath: healthCheck != null ? healthCheck!.path : null
preWarmedInstanceCount: preWarmedInstanceCount ?? null
netFrameworkVersion: '8.0'
linuxFxVersion: appServicePlanOS == 'Linux' ? 'DOTNET-ISOLATED|8.0' : null
Expand Down Expand Up @@ -280,6 +287,68 @@ resource stagingSlotAuthSettings 'Microsoft.Web/sites/slots/config@2022-03-01' =
properties: authSettingsV2Properties
}

resource alertsActionGroup 'Microsoft.Insights/actionGroups@2023-01-01' existing = {
name: '${subscription}-ag-ees-alertedusers'
}

var commonUnhealthyMetricAlertRuleProperties = {
enabled: true
severity: 1
evaluationFrequency: 'PT5M'
windowSize: 'PT5M'
criteria: {
'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria'
allOf: [
{
name: 'Metric1'
criterionType: 'StaticThresholdCriterion'
metricName: 'HealthCheckStatus'
timeAggregation: 'Minimum'
operator: 'LessThan'
threshold: 100
skipMetricValidation: false
}
]
}
actions: [
{
actionGroupId: alertsActionGroup.id
}
]
}

resource functionAppUnhealthyMetricAlertRule 'Microsoft.Insights/metricAlerts@2018-03-01' = if (healthCheck != null) {
name: healthCheck!.unhealthyMetricName
location: 'Global'
properties: union(commonUnhealthyMetricAlertRuleProperties, {
scopes: [functionApp.id]
criteria: {
allOf: [union(
commonUnhealthyMetricAlertRuleProperties.criteria.allOf[0],
{
metricNamespace: 'Microsoft.Web/sites'
}
)]
}
})
}

resource stagingSlotUnhealthyMetricAlertRule 'Microsoft.Insights/metricAlerts@2018-03-01' = if (healthCheck != null) {
name: '${healthCheck!.unhealthyMetricName}Staging'
location: 'Global'
properties: union(commonUnhealthyMetricAlertRuleProperties, {
scopes: [stagingSlot.id]
criteria: {
allOf: [union(
commonUnhealthyMetricAlertRuleProperties.criteria.allOf[0],
{
metricNamespace: 'Microsoft.Web/sites/slots'
}
)]
}
})
}

// Allow Key Vault references passed as secure appsettings to be resolved by the Function App and its deployment slots.
// Where the staging slot's managed identity differs from the main slot's managed identity, add its id to the list.
var keyVaultPrincipalIds = userAssignedManagedIdentityParams != null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,13 @@ stages:
# definition and using the $(servicePrincipalId) variable.
- task: AzureCLI@2
displayName: 'Deploy Data Processor Function App - deploy to staging slot'
retryCountOnTaskFailure: 20
retryCountOnTaskFailure: 10
inputs:
azureSubscription: ${{parameters.serviceConnection}}
scriptType: bash
scriptLocation: inlineScript
inlineScript: |
set -e
sleep 5
az functionapp deployment source config-zip \
--src '$(Pipeline.Workspace)/EESBuildPipeline/public-api-data-processor-$(upstreamPipelineBuildNumber)/GovUk.Education.ExploreEducationStatistics.Public.Data.Processor.zip' \
--name $(dataProcessorFunctionAppName) \
Expand Down
4 changes: 4 additions & 0 deletions infrastructure/templates/public-api/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,10 @@ module dataProcessorFunctionAppModule 'components/functionApp.bicep' = {
family: 'EP'
}
preWarmedInstanceCount: 1
healthCheck: {
path: '/api/HealthCheck'
unhealthyMetricName: '${subscription}PublicDataProcessorUnhealthy'
}
azureFileShares: [{
storageName: parquetFileShareModule.outputs.fileShareName
storageAccountKey: publicApiStorageAccountAccessKey
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#nullable enable
using System;
using System.Linq;
using System.Net;
using GovUk.Education.ExploreEducationStatistics.Common.Extensions;
using GovUk.Education.ExploreEducationStatistics.Common.ViewModels;
using Microsoft.AspNetCore.Http;
Expand Down Expand Up @@ -29,14 +30,31 @@ public static T AssertOkObjectResult<T>(this IActionResult result, T? expectedVa
public static T AssertOkResult<T>(this ActionResult<T> result, T? expectedValue = null) where T : class
{
Assert.IsAssignableFrom<ActionResult<T>>(result);
Assert.IsAssignableFrom<T>(result.Value);
var value = Assert.IsAssignableFrom<T>(result.Value);

if (expectedValue != null)
{
Assert.Equal(expectedValue, result.Value);
Assert.Equal(expectedValue, value);
}

return value;
}

public static T AssertObjectResult<T>(
this IActionResult result,
HttpStatusCode expectedStatusCode,
T? expectedValue = null) where T : class
{
var objectResult = Assert.IsAssignableFrom<ObjectResult>(result);
Assert.Equal((int) expectedStatusCode, objectResult.StatusCode);
var value = Assert.IsAssignableFrom<T>(objectResult.Value);

if (expectedValue != null)
{
Assert.Equal(expectedValue, value);
}

return result.Value!;
return value;
}

public static void AssertNotFoundResult<T>(this ActionResult<T> result) where T : class
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
using System.Net;
using GovUk.Education.ExploreEducationStatistics.Common.Tests.Extensions;
using GovUk.Education.ExploreEducationStatistics.Public.Data.Processor.Functions;
using GovUk.Education.ExploreEducationStatistics.Public.Data.Services.Interfaces;
using Microsoft.AspNetCore.Http;

namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Processor.Tests.Functions;

public abstract class HealthCheckFunctionTests(ProcessorFunctionsIntegrationTestFixture fixture)
: ProcessorFunctionsIntegrationTest(fixture)
{
public class HealthCheckTests(ProcessorFunctionsIntegrationTestFixture fixture)
: HealthCheckFunctionTests(fixture)
{
[Fact]
public async Task Success()
{
// Ensure that the test folder for simulating the File Share Mount is present prior to
// running the Health Check.
var dataSetVersionPathResolver = GetRequiredService<IDataSetVersionPathResolver>();
Directory.CreateDirectory(dataSetVersionPathResolver.BasePath());

var function = GetRequiredService<HealthCheckFunctions>();

var httpContext = new DefaultHttpContext();
var result = await function.HealthCheck(httpContext.Request);

var expectedHealthCheckResult = new HealthCheckFunctions.HealthCheckResponse(
PsqlConnection: new HealthCheckFunctions.HealthCheckSummary(Healthy: true),
FileShareMount: new HealthCheckFunctions.HealthCheckSummary(Healthy: true));

result.AssertOkObjectResult(expectedHealthCheckResult);
Assert.Equal((int) HttpStatusCode.OK, httpContext.Response.StatusCode);
}

[Fact]
public async Task Failure_NoFileShareMount()
{
var function = GetRequiredService<HealthCheckFunctions>();

// Call the Health Check without firstly adding a test File Share folder.
var httpContext = new DefaultHttpContext();
var result = await function.HealthCheck(httpContext.Request);

var expectedHealthCheckResult = new HealthCheckFunctions.HealthCheckResponse(
PsqlConnection: new HealthCheckFunctions.HealthCheckSummary(Healthy: true),
FileShareMount: new HealthCheckFunctions.HealthCheckSummary(
Healthy: false,
"File Share Mount folder does not exist"));

result.AssertObjectResult(HttpStatusCode.InternalServerError, expectedHealthCheckResult);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ protected override IEnumerable<Type> GetFunctionTypes()
typeof(CopyCsvFilesFunction),
typeof(CreateInitialDataSetVersionFunction),
typeof(ProcessInitialDataSetVersionFunction),
typeof(HealthCheckFunctions),
];
}
}
Original file line number Diff line number Diff line change
@@ -1,63 +1,82 @@
using GovUk.Education.ExploreEducationStatistics.Public.Data.Model.Database;
using GovUk.Education.ExploreEducationStatistics.Public.Data.Services.Options;
using GovUk.Education.ExploreEducationStatistics.Public.Data.Services.Interfaces;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.Functions.Worker;
using Microsoft.Azure.Functions.Worker.Http;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;

namespace GovUk.Education.ExploreEducationStatistics.Public.Data.Processor.Functions;

// ReSharper disable once ClassNeverInstantiated.Global
public class HealthCheckFunctions(
ILogger<HealthCheckFunctions> logger,
PublicDataDbContext publicDataDbContext,
IOptions<ParquetFilesOptions> parquetFileOptions)
IDataSetVersionPathResolver dataSetVersionPathResolver)
{
[Function(nameof(CountDataSets))]
public async Task<string> CountDataSets(
[Function(nameof(HealthCheck))]
[Produces("application/json")]
public async Task<IActionResult> HealthCheck(
#pragma warning disable IDE0060
[HttpTrigger(AuthorizationLevel.Function, "get")] HttpRequestData request)
[HttpTrigger(AuthorizationLevel.Anonymous, "get")] HttpRequest request)
#pragma warning restore IDE0060
{
var psqlConnectionHealthCheck = await CheckPsqlConnectionHealthy();
var fileShareMountHealthCheck = CheckFileShareMountHealth();
var healthCheckResponse = new HealthCheckResponse(
PsqlConnection: psqlConnectionHealthCheck,
FileShareMount: fileShareMountHealthCheck);

if (healthCheckResponse.Healthy)
{
return new OkObjectResult(healthCheckResponse);
}

return new ObjectResult(healthCheckResponse){
StatusCode = StatusCodes.Status500InternalServerError
};
}

private async Task<HealthCheckSummary> CheckPsqlConnectionHealthy()
{
logger.LogInformation("Attempting to test PSQL health");

try
{
var message = $"Found {await publicDataDbContext.DataSets.CountAsync()} datasets.";
logger.LogInformation(message);
return message;
await publicDataDbContext.DataSets.AnyAsync();
return new HealthCheckSummary(true);
}
catch (Exception e)
{
logger.LogError(e, "Error encountered when querying Data Sets");
throw;
logger.LogError(e, "Error encountered when testing PSQL connection health");
return new HealthCheckSummary(false, e.Message);
}
}

[Function(nameof(CheckForFileShareMount))]
public Task CheckForFileShareMount(
#pragma warning disable IDE0060
[HttpTrigger(AuthorizationLevel.Function, "get")] HttpRequestData request)
#pragma warning restore IDE0060
private HealthCheckSummary CheckFileShareMountHealth()
{
logger.LogInformation("Attempting to read from file share");

try
{
if (Directory.Exists(parquetFileOptions.Value.BasePath))
{
logger.LogInformation("Successfully found the file share mount");
}
else
if (Directory.Exists(dataSetVersionPathResolver.BasePath()))
{
logger.LogError("Unable to find the file share mount");
return new HealthCheckSummary(true);
}

return new HealthCheckSummary(false, "File Share Mount folder does not exist");
}
catch (Exception e)
{
logger.LogError(e, "Error encountered when attempting to find the file share mount");
throw;
return new HealthCheckSummary(false, e.Message);
}

return Task.CompletedTask;
}

public record HealthCheckResponse(HealthCheckSummary PsqlConnection, HealthCheckSummary FileShareMount)
{
public bool Healthy => PsqlConnection.Healthy && FileShareMount.Healthy;
};

public record HealthCheckSummary(bool Healthy, string? UnhealthyReason = null);
}

0 comments on commit 86bfc21

Please sign in to comment.