Skip to content

Commit

Permalink
#10718: Fix issue with negative pipeline queue times (#12010)
Browse files Browse the repository at this point in the history
* #10718: Filter out jobs with a start time lower than the pipeline start time because that means that it's a re-run and we need to account for already-passing jobs

* #10718: Comment for the thing we just did

* #10718: Add workflow run attempt as an arg so we can test diff attempts
  • Loading branch information
tt-rkim authored Aug 28, 2024
1 parent 516a891 commit 36cfd15
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/_produce-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ on:
description: "Unique GitHub workflow run ID to use for data"
default: 10066309412
type: number
test_workflow_run_attempt:
description: "Run attempt of the workflow run"
default: 1
type: number
workflow_run:
workflows:
- "All post-commit tests"
Expand Down Expand Up @@ -44,7 +48,7 @@ jobs:
event_name="${{ github.event_name }}"
if [[ "$event_name" == "workflow_dispatch" ]]; then
run_id="${{ inputs.test_workflow_run_id }}"
attempt_number="1"
attempt_number="${{ inputs.test_workflow_run_attempt }}"
elif [[ "$event_name" == "workflow_run" ]]; then
run_id="${{ github.event.workflow_run.id }}"
attempt_number="${{ github.event.workflow_run.run_attempt }}"
Expand Down
12 changes: 11 additions & 1 deletion infra/data_collection/github/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,17 @@ def get_pipeline_row_from_github_info(github_runner_environment, github_pipeline

jobs = github_jobs_json["jobs"]
jobs_start_times = list(map(lambda job_: get_datetime_from_github_datetime(job_["started_at"]), jobs))
sorted_jobs_start_times = sorted(jobs_start_times)
# We filter out jobs that started before because that means they're from a previous attempt for that pipeline
eligible_jobs_start_times = list(
filter(
lambda job_start_time_: job_start_time_ >= get_datetime_from_github_datetime(pipeline_submission_ts),
jobs_start_times,
)
)
sorted_jobs_start_times = sorted(eligible_jobs_start_times)
assert (
sorted_jobs_start_times
), f"It seems that this pipeline does not have any jobs that started on or after the pipeline was submitted, which should be impossible. Please directly inspect the JSON objects"
pipeline_start_ts = get_data_pipeline_datetime_from_datetime(sorted_jobs_start_times[0])

pipeline_end_ts = github_pipeline_json["updated_at"]
Expand Down

0 comments on commit 36cfd15

Please sign in to comment.