Skip to content

Commit

Permalink
Add alarms
Browse files Browse the repository at this point in the history
Signed-off-by: Sayali Gaikawad <[email protected]>
  • Loading branch information
gaiksaya committed Dec 15, 2023
1 parent 7bc5fe4 commit bdd2be6
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 66 deletions.
5 changes: 5 additions & 0 deletions lib/cloudwatch/metrics-section.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ export interface ProcstatMetricDefinition {
interface EditableCloudwatchMetricsSection {
// eslint-disable-next-line camelcase
namespace?: string;
// eslint-disable-next-line camelcase
append_dimensions?: any;
// eslint-disable-next-line camelcase
aggregation_dimensions?: any;
// eslint-disable-next-line camelcase
metrics_collected: {
procstat?: ProcstatMetricDefinition[],
cpu?: MetricDefinition,
Expand Down
47 changes: 20 additions & 27 deletions lib/infra/infra-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
import {
AutoScalingGroup, BlockDeviceVolume, EbsDeviceVolumeType, Signals,
} from 'aws-cdk-lib/aws-autoscaling';
import { Metric, Unit } from 'aws-cdk-lib/aws-cloudwatch';
import { MathExpression, Metric, Unit } from 'aws-cdk-lib/aws-cloudwatch';
import {
AmazonLinuxCpuType,
AmazonLinuxGeneration,
Expand Down Expand Up @@ -77,11 +77,12 @@ export interface infraProps extends StackProps {

export class InfraStack extends Stack {
private instanceRole: Role;

public readonly alarmMetrics: {
memUsed: Metric,
diskUsed: Metric,
openSearchProcessNotFound: Metric,
openSearchDashboardsProcessNotFound: Metric
memUsed: Metric | MathExpression,
diskUsed: Metric| MathExpression,
openSearchProcessNotFound: Metric | MathExpression,
openSearchDashboardsProcessNotFound?: Metric | MathExpression,
}

constructor(scope: Stack, id: string, props: infraProps) {
Expand All @@ -100,29 +101,16 @@ export class InfraStack extends Stack {
metricName: 'mem_used_percent',
namespace: `${this.stackName}/InfraStack`,
}),
diskUsed: new Metric({
metricName: 'disk_used_percent',
namespace: `${this.stackName}/InfraStack`,
dimensionsMap: {
device: "nvme0n1p1",
fstype: "xfs"
}
diskUsed: new MathExpression({
expression: `SELECT AVG(disk_used_percent) FROM "${this.stackName}/InfraStack" WHERE "fstype" = 'xfs'`,
}),
openSearchProcessNotFound: new Metric({
metricName: 'procstat_lookup_pid_count',
namespace: `${this.stackName}/InfraStack`,
dimensionsMap: {
pattern: '-Dopensearch'
}
openSearchProcessNotFound: new MathExpression({
expression: `SELECT AVG(procstat_lookup_pid_count) FROM "${this.stackName}/InfraStack" WHERE "pattern" = '-Dopensearch'`,
}),
openSearchDashboardsProcessNotFound: new Metric({
metricName: 'procstat_lookup_pid_count',
namespace: `${this.stackName}/InfraStack`,
dimensionsMap: {
pattern: 'opensearch-dashboards'
}
})
}
openSearchDashboardsProcessNotFound: new MathExpression({
expression: `SELECT AVG(procstat_lookup_pid_count) FROM "${this.stackName}/InfraStack" WHERE "pattern" = 'opensearch-dashboards'`,
}),
};

const clusterLogGroup = new LogGroup(this, 'opensearchLogGroup', {
logGroupName: `${id}LogGroup/opensearch.log`,
Expand Down Expand Up @@ -408,7 +396,7 @@ export class InfraStack extends Stack {
value: nlb.loadBalancerDnsName,
});

const monitoring = new Monitoring(this)
const monitoring = new Monitoring(this, props);
}

private static getCfnInitElement(scope: Stack, logGroup: LogGroup, props: infraProps, nodeType?: string): InitElement[] {
Expand Down Expand Up @@ -443,6 +431,11 @@ export class InfraStack extends Stack {
debug: false,
},
metrics: {
append_dimensions: {
// eslint-disable-next-line no-template-curly-in-string
InstanceId: '${aws:InstanceId}',
},
aggregation_dimensions: [[]], // Create rollups without instance id
namespace: `${scope.stackName}/InfraStack`,
metrics_collected: {
procstat: procstatConfig,
Expand Down
86 changes: 47 additions & 39 deletions lib/monitoring/alarms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,50 +6,58 @@
* compatible open source license.
*/

import { Alarm, ComparisonOperator, Dashboard, TreatMissingData } from "aws-cdk-lib/aws-cloudwatch";
import { InfraStack } from "../infra/infra-stack";
import {
Alarm, AlarmWidget, ComparisonOperator, Dashboard, TreatMissingData,
} from 'aws-cdk-lib/aws-cloudwatch';
import { InfraStack, infraProps } from '../infra/infra-stack';

export class Monitoring {
public readonly alarms: Alarm[] = []

constructor(stack: InfraStack) {
const alarmDashboard = new Dashboard(stack, 'AlarmDashboard');
this.alarms.push(new Alarm(stack, 'OpenSearchProcessNotFound', {
alarmDescription: 'OpenSearch Process not found',
metric: stack.alarmMetrics.openSearchProcessNotFound.with({statistic: 'avg'}),
evaluationPeriods: 3,
threshold: 1,
datapointsToAlarm: 3,
comparisonOperator: ComparisonOperator.LESS_THAN_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
constructor(infraStack: InfraStack, infraprops: infraProps) {
const alarmDashboard = new Dashboard(infraStack, 'AlarmDashboard');
this.alarms.push(new Alarm(infraStack, 'OpenSearchProcessNotFound', {
alarmDescription: 'OpenSearch Process not found',
metric: infraStack.alarmMetrics.openSearchProcessNotFound.with({ statistic: 'avg' }),
evaluationPeriods: 3,
threshold: 1,
datapointsToAlarm: 3,
comparisonOperator: ComparisonOperator.LESS_THAN_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}));

if (infraprops.dashboardsUrl !== 'undefined' && infraStack.alarmMetrics.openSearchDashboardsProcessNotFound !== undefined) {
this.alarms.push(new Alarm(infraStack, 'OpenSearchDashboardsProcessNotFound', {
alarmDescription: 'OpenSearch Dashboards Process not found',
metric: infraStack.alarmMetrics.openSearchDashboardsProcessNotFound.with({ statistic: 'avg' }),
evaluationPeriods: 3,
threshold: 1,
datapointsToAlarm: 3,
comparisonOperator: ComparisonOperator.LESS_THAN_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}));
}

this.alarms.push(new Alarm(infraStack, 'HighMemoryUtilization', {
alarmDescription: 'The process is using more memory than expected',
metric: infraStack.alarmMetrics.memUsed.with({ statistic: 'avg' }),
evaluationPeriods: 5,
threshold: 65,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}));

this.alarms.push(new Alarm(stack, 'OpenSearchDashboardsProcessNotFound', {
alarmDescription: 'OpenSearch Dashboards Process not found',
metric: stack.alarmMetrics.openSearchDashboardsProcessNotFound.with({statistic: 'avg'}),
evaluationPeriods: 3,
threshold: 1,
datapointsToAlarm: 3,
comparisonOperator: ComparisonOperator.LESS_THAN_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}))
this.alarms.push(new Alarm(infraStack, 'HighDiskUtilization', {
alarmDescription: 'High disk utilization found',
metric: infraStack.alarmMetrics.diskUsed.with({ statistic: 'avg' }),
evaluationPeriods: 5,
threshold: 70,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}));

this.alarms.push(new Alarm(stack, 'HighMemoryUtilization', {
alarmDescription: 'The process is using more memory than expected',
metric: stack.alarmMetrics.memUsed.with({ statistic: 'avg' }),
evaluationPeriods: 5,
threshold: 65,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}));

this.alarms.push(new Alarm(stack, 'HighDiskUtilization',{
alarmDescription: 'High disk utilization found',
metric: stack.alarmMetrics.diskUsed.with({statistic: 'avg'}),
evaluationPeriods: 5,
threshold: 70,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}))
this.alarms
.map((alarm) => new AlarmWidget({ alarm }))
.forEach((widget) => alarmDashboard.addWidgets(widget));
}
}
}
2 changes: 2 additions & 0 deletions test/os-cluster.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ test('Test Resources with security disabled multi-node default instance types',
infraTemplate.resourceCountIs('AWS::ElasticLoadBalancingV2::Listener', 2);
infraTemplate.resourceCountIs('AWS::ElasticLoadBalancingV2::TargetGroup', 2);
infraTemplate.resourceCountIs('AWS::AutoScaling::LaunchConfiguration', 3);
infraTemplate.resourceCountIs('AWS::CloudWatch::Alarm', 4);
infraTemplate.resourceCountIs('AWS::CloudWatch::Dashboard', 1);
infraTemplate.hasResourceProperties('AWS::ElasticLoadBalancingV2::Listener', {
Port: 80,
Protocol: 'TCP',
Expand Down

0 comments on commit bdd2be6

Please sign in to comment.