From 36cfd15fb25a25213199eccfa1289962ee2cd7c7 Mon Sep 17 00:00:00 2001 From: Raymond Kim <109366641+tt-rkim@users.noreply.github.com> Date: Wed, 28 Aug 2024 17:15:59 -0400 Subject: [PATCH] #10718: Fix issue with negative pipeline queue times (#12010) * #10718: Filter out jobs with a start time lower than the pipeline start time because that means that it's a re-run and we need to account for already-passing jobs * #10718: Comment for the thing we just did * #10718: Add workflow run attempt as an arg so we can test diff attempts --- .github/workflows/_produce-data.yaml | 6 +++++- infra/data_collection/github/utils.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_produce-data.yaml b/.github/workflows/_produce-data.yaml index 242ed2ca329..8f0edf71f52 100644 --- a/.github/workflows/_produce-data.yaml +++ b/.github/workflows/_produce-data.yaml @@ -8,6 +8,10 @@ on: description: "Unique GitHub workflow run ID to use for data" default: 10066309412 type: number + test_workflow_run_attempt: + description: "Run attempt of the workflow run" + default: 1 + type: number workflow_run: workflows: - "All post-commit tests" @@ -44,7 +48,7 @@ jobs: event_name="${{ github.event_name }}" if [[ "$event_name" == "workflow_dispatch" ]]; then run_id="${{ inputs.test_workflow_run_id }}" - attempt_number="1" + attempt_number="${{ inputs.test_workflow_run_attempt }}" elif [[ "$event_name" == "workflow_run" ]]; then run_id="${{ github.event.workflow_run.id }}" attempt_number="${{ github.event.workflow_run.run_attempt }}" diff --git a/infra/data_collection/github/utils.py b/infra/data_collection/github/utils.py index 54911294573..84ef0d97a70 100644 --- a/infra/data_collection/github/utils.py +++ b/infra/data_collection/github/utils.py @@ -59,7 +59,17 @@ def get_pipeline_row_from_github_info(github_runner_environment, github_pipeline jobs = github_jobs_json["jobs"] jobs_start_times = list(map(lambda job_: get_datetime_from_github_datetime(job_["started_at"]), jobs)) - sorted_jobs_start_times = sorted(jobs_start_times) + # We filter out jobs that started before because that means they're from a previous attempt for that pipeline + eligible_jobs_start_times = list( + filter( + lambda job_start_time_: job_start_time_ >= get_datetime_from_github_datetime(pipeline_submission_ts), + jobs_start_times, + ) + ) + sorted_jobs_start_times = sorted(eligible_jobs_start_times) + assert ( + sorted_jobs_start_times + ), f"It seems that this pipeline does not have any jobs that started on or after the pipeline was submitted, which should be impossible. Please directly inspect the JSON objects" pipeline_start_ts = get_data_pipeline_datetime_from_datetime(sorted_jobs_start_times[0]) pipeline_end_ts = github_pipeline_json["updated_at"]