From 84248179d75b048f5802e57b41ff7eeb9382b429 Mon Sep 17 00:00:00 2001 From: Amelia Crate Date: Thu, 7 Mar 2024 10:31:53 -0800 Subject: [PATCH] Set up CIT alerting Bug guest os images on alert For cit-periodics: alert when 1.2% of cit tests are failing 3 times in a row. We run 4x a day so this should be enough to alert if CIT is consistently failing many tests for a whole day. 1.2% is enough to pass if all el7 networking tests fail, plus a small margin for a few flaky individual tests. For oslogin-periodics: alert when 1% of tests are failing. Less than 4 failing tests basically, which is a number that we've been hitting extremely consistently since adding more test users. Alert on 2 failures, and drop the interval to 12 hours so we're running this twice a day. --- .../GoogleCloudPlatform/gcp-guest/gcp-guest-config.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/prow/prowjobs/GoogleCloudPlatform/gcp-guest/gcp-guest-config.yaml b/prow/prowjobs/GoogleCloudPlatform/gcp-guest/gcp-guest-config.yaml index db349cb4c6..f1e7bcb3be 100644 --- a/prow/prowjobs/GoogleCloudPlatform/gcp-guest/gcp-guest-config.yaml +++ b/prow/prowjobs/GoogleCloudPlatform/gcp-guest/gcp-guest-config.yaml @@ -516,6 +516,9 @@ periodics: annotations: testgrid-dashboards: googleoss-gcp-guest testgrid-tab-name: cit-periodics + testgrid-broken-column-threshold: '0.012' # Allow 1.2% of tests to be failing and still mark column as passing + testgrid-bug-component: '117405' + testgrid-num-failures-to-alert: '3' interval: 6h spec: containers: @@ -571,7 +574,10 @@ periodics: annotations: testgrid-dashboards: googleoss-gcp-guest testgrid-tab-name: oslogin-periodics - interval: 24h + testgrid-broken-column-threshold: '0.01' + testgrid-num-failures-to-alert: '2' + testgrid-bug-component: '117405' + interval: 12h spec: containers: - image: gcr.io/compute-image-tools/cloud-image-tests:latest