diff --git a/.github/workflows/_produce-data.yaml b/.github/workflows/_produce-data.yaml index 242ed2ca329..8f0edf71f52 100644 --- a/.github/workflows/_produce-data.yaml +++ b/.github/workflows/_produce-data.yaml @@ -8,6 +8,10 @@ on: description: "Unique GitHub workflow run ID to use for data" default: 10066309412 type: number + test_workflow_run_attempt: + description: "Run attempt of the workflow run" + default: 1 + type: number workflow_run: workflows: - "All post-commit tests" @@ -44,7 +48,7 @@ jobs: event_name="${{ github.event_name }}" if [[ "$event_name" == "workflow_dispatch" ]]; then run_id="${{ inputs.test_workflow_run_id }}" - attempt_number="1" + attempt_number="${{ inputs.test_workflow_run_attempt }}" elif [[ "$event_name" == "workflow_run" ]]; then run_id="${{ github.event.workflow_run.id }}" attempt_number="${{ github.event.workflow_run.run_attempt }}" diff --git a/infra/data_collection/github/utils.py b/infra/data_collection/github/utils.py index 54911294573..84ef0d97a70 100644 --- a/infra/data_collection/github/utils.py +++ b/infra/data_collection/github/utils.py @@ -59,7 +59,17 @@ def get_pipeline_row_from_github_info(github_runner_environment, github_pipeline jobs = github_jobs_json["jobs"] jobs_start_times = list(map(lambda job_: get_datetime_from_github_datetime(job_["started_at"]), jobs)) - sorted_jobs_start_times = sorted(jobs_start_times) + # We filter out jobs that started before because that means they're from a previous attempt for that pipeline + eligible_jobs_start_times = list( + filter( + lambda job_start_time_: job_start_time_ >= get_datetime_from_github_datetime(pipeline_submission_ts), + jobs_start_times, + ) + ) + sorted_jobs_start_times = sorted(eligible_jobs_start_times) + assert ( + sorted_jobs_start_times + ), f"It seems that this pipeline does not have any jobs that started on or after the pipeline was submitted, which should be impossible. Please directly inspect the JSON objects" pipeline_start_ts = get_data_pipeline_datetime_from_datetime(sorted_jobs_start_times[0]) pipeline_end_ts = github_pipeline_json["updated_at"]