Skip to content

Commit

Permalink
imp: add darwin alerts and signers
Browse files Browse the repository at this point in the history
  • Loading branch information
johnalotoski committed Nov 29, 2022
1 parent 12d8791 commit 7b9ed23
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 6 deletions.
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions nix/cloud/alerts.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
inputs,
cell,
}: {
ci-world-darwin = {
datasource = "vm";
rules = [
{
alert = "DarwinSshFailure";
expr = ''probe_success{job="blackbox-ssh-darwin"} == 0'';
for = "5m";
labels.severity = "critical";
annotations = {
description = ''
Cluster ssh connectivity to darwin builder {{ $labels.alias }} at {{ $labels.instance }}
has been down for more than 5 minutes. Darwin CI capacity is degraded or down.'';
summary = "Connectivity to Darwin builder {{ $labels.alias }} is down";
};
}
];
};

ci-world-spongix = {
datasource = "vm";
rules = [
Expand Down
1 change: 1 addition & 0 deletions nix/cloud/hydrationProfile.nix
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ in {
# Cell Block local declared dashboards
inherit
(cell.alerts)
ci-world-darwin
ci-world-spongix
ci-world-nomad-follower
# Upstream alerts which may have downstream deps can be imported here
Expand Down
64 changes: 61 additions & 3 deletions nix/metal/bitteProfile/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,65 @@ in {
subnet = cluster.vpc.subnets.core-1;
volumeSize = 300;

modules = [bitte.profiles.monitoring];
modules = [
bitte.profiles.monitoring
({lib, ...}: {
services.prometheus.exporters.blackbox = lib.mkForce {
enable = true;
configFile = pkgs.toPrettyJSON "blackbox-exporter.yaml" {
modules = {
ssh_banner = {
prober = "tcp";
timeout = "10s";
tcp = {
preferred_ip_protocol = "ip4";
query_response = [
{
expect = "^SSH-2.0-";
send = "SSH-2.0-blackbox-ssh-check";
}
];
};
};
};
};
};

services.vmagent.promscrapeConfig = let
mkTarget = ip: machine: {
targets = ["${ip}:22"];
labels.alias = machine;
};
in [
{
job_name = "blackbox-ssh-darwin";
scrape_interval = "60s";
metrics_path = "/probe";
params.module = ["ssh_banner"];
static_configs = [
(mkTarget "10.10.0.1" "mm1-builder")
(mkTarget "10.10.0.2" "mm2-builder")
(mkTarget "10.10.0.101" "mm1-signer")
(mkTarget "10.10.0.102" "mm2-signer")
];
relabel_configs = [
{
source_labels = ["__address__"];
target_label = "__param_target";
}
{
source_labels = ["__param_target"];
target_label = "instance";
}
{
replacement = "127.0.0.1:9115";
target_label = "__address__";
}
];
}
];
})
];

securityGroupRules = {
inherit
Expand Down Expand Up @@ -278,13 +336,13 @@ in {
# mm1
{
publicKey = "nvKCarVUXdO0WtoDsEjTzU+bX0bwWYHJAM2Y3XhO0Ao=";
allowedIPs = ["10.10.0.1/32"];
allowedIPs = ["10.10.0.1/32" "10.10.0.101/32"];
persistentKeepalive = 30;
}
# mm2
{
publicKey = "VcOEVp/0EG4luwL2bMmvGvlDNDbCzk7Vkazd3RRl51w=";
allowedIPs = ["10.10.0.2/32"];
allowedIPs = ["10.10.0.2/32" "10.10.0.102/32"];
persistentKeepalive = 30;
}
];
Expand Down

0 comments on commit 7b9ed23

Please sign in to comment.