Skip to content

Commit

Permalink
PTFE-780 Fix bug with runner status and API status update (#380)
Browse files Browse the repository at this point in the history
  • Loading branch information
tcarmet authored Aug 25, 2023
1 parent a99de2d commit 72028d7
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 31 deletions.
29 changes: 17 additions & 12 deletions runner_manager/models/runner.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from datetime import datetime, timedelta
from enum import Enum
from typing import List, Literal, Optional
Expand All @@ -10,9 +11,9 @@
from redis_om import Field, NotFoundError

from runner_manager.clients.github import GitHub
from runner_manager.logging import log
from runner_manager.models.base import BaseModel

log = logging.getLogger(__name__)
# Ideally the runner model would have been inherited
# from githubkit.rest.models.Runner, like the following:
# class Runner(BaseModel, githubkit.rest.models.Runner):
Expand All @@ -25,7 +26,6 @@

class RunnerStatus(str, Enum):
online = "online"
idle = "idle"
offline = "offline"


Expand Down Expand Up @@ -89,17 +89,19 @@ def find_from_webhook(cls, webhook: WorkflowJobEvent) -> "Runner":
return runner

@property
def is_online(self) -> bool:
"""Check if the runner is online
def is_active(self) -> bool:
"""Check if the runner is active.
An active runner is a runner that is running a job.
Returns:
bool: True if the runner is online, False otherwise.
bool: True if the runner is active, False otherwise.
"""
return self.status == RunnerStatus.online
return self.status == RunnerStatus.online and self.busy is True

@property
def is_offline(self) -> bool:
"""Check if the runner is offline
"""Check if the runner is offline.
Returns:
bool: True if the runner is offline, False otherwise.
Expand All @@ -108,12 +110,15 @@ def is_offline(self) -> bool:

@property
def is_idle(self) -> bool:
"""Check if the runner is idle
"""Check if the runner is idle.
An idle runner is a runner that is online and
properly attached to GitHub but is not running a job.
Returns:
bool: True if the runner is idle, False otherwise.
"""
return self.status == RunnerStatus.idle
return self.status == RunnerStatus.online and self.busy is False

@property
def time_since_created(self) -> timedelta:
Expand Down Expand Up @@ -144,7 +149,7 @@ def time_to_start_expired(self, timeout: timedelta) -> bool:
return self.is_offline and self.time_since_created > timeout

def time_to_live_expired(self, time_to_live: timedelta) -> bool:
return self.is_online and self.time_since_started > time_to_live
return self.is_active and self.time_since_started > time_to_live

def update_from_github(self, github: GitHub) -> "Runner":
if self.id is not None:
Expand All @@ -153,9 +158,9 @@ def update_from_github(self, github: GitHub) -> "Runner":
org=self.organization, runner_id=self.id
).parsed_data
)
self.status = RunnerStatus(self.status)
self.status = RunnerStatus(github_runner.status)
self.busy = github_runner.busy
log.info(f"Runner {self.name} status updated to {self.status}")
log.info(f"Runner {self.name} status updated to {self.status}")
return self.save()

def generate_jit_config(self, github: GitHub) -> "Runner":
Expand Down
10 changes: 8 additions & 2 deletions runner_manager/models/runner_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,9 @@ def delete_runner(self, runner: Runner) -> int:
@property
def need_new_runner(self) -> bool:
runners = self.get_runners()
idle = len([runner for runner in runners if runner.busy is False])
not_active = len([runner for runner in runners if runner.is_active is False])
count = len(runners)
return idle < self.min and count < self.max
return not_active < self.min and count < self.max

def create_github_group(self, github: GitHub) -> GitHubRunnerGroup:
"""Create a GitHub runner group."""
Expand Down Expand Up @@ -254,6 +254,12 @@ def healthcheck(
runner: Runner = self.create_runner(github)
if runner:
log.info(f"Runner {runner.name} created")
idle_runners = [runner for runner in self.get_runners() if runner.is_idle]
# check if there's more idle runners than the minimum
while len(idle_runners) > self.min:
runner = idle_runners.pop()
self.delete_runner(runner)
log.info(f"Runner {runner.name} deleted")

@classmethod
def find_from_base(cls, basegroup: "BaseRunnerGroup") -> "RunnerGroup":
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ def runner(settings) -> Runner:
runner: Runner = Runner(
id=1,
name="test",
organization="octo-org",
runner_group_id=1,
status="online",
status="offline",
busy=False,
labels=[RunnerLabel(name="label")],
manager=settings.name,
Expand Down
23 changes: 13 additions & 10 deletions tests/unit/jobs/test_healthchecks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,25 @@ def test_healthchecks_hypothesis(
def test_group_healthcheck(
runner_group: RunnerGroup, settings: Settings, github: GitHub
):
assert settings.timeout_runner
assert settings.time_to_live
assert runner_group.min == 0
runner_group.save(github=github)

runner_tts: Runner = runner_group.create_runner(github)
assert runner_tts is not None
runner_tts.created_at = datetime.now() - (
settings.timeout_runner + timedelta(minutes=1)
)
# Removing id to avoid retrieving info from GitHub mock API
runner_tts.id = None
runner_tts.save()
runner_ttl: Runner = runner_group.create_runner(github)
assert runner_ttl is not None
# Removing id to avoid retrieving info from GitHub mock API
runner_ttl.id = None
runner_ttl.status = RunnerStatus.online
runner_ttl.busy = True
runner_ttl.started_at = datetime.now() - (
settings.time_to_live + timedelta(minutes=1)
)
Expand All @@ -68,9 +77,10 @@ def test_group_healthcheck(
def test_need_new_runner_healthcheck(
runner_group: RunnerGroup, settings: Settings, github: GitHub
):
runner_group.max = 2
runner_group.max = 1
runner_group.min = 1
runner_group.save()
assert len(runner_group.get_runners()) == 0
assert runner_group.need_new_runner is True
runner_group.healthcheck(settings.time_to_live, settings.timeout_runner, github)
assert runner_group.need_new_runner is False
Expand All @@ -91,23 +101,16 @@ def test_time_to_start(runner: Runner, settings: Settings):


def test_time_to_live(runner: Runner, settings: Settings):
assert settings.time_to_live
runner.started_at = datetime.now() - (settings.time_to_live + timedelta(minutes=1))
runner.status = RunnerStatus.online
runner.busy = True
assert runner.time_to_live_expired(settings.time_to_live) is True

runner.started_at = datetime.now() - (settings.time_to_live - timedelta(minutes=1))
assert runner.time_to_live_expired(settings.time_to_live) is False


def test_need_new_runner(runner_group: RunnerGroup, github: GitHub):
runner_group.max = 2
runner_group.min = 1
runner_group.save()
assert runner_group.need_new_runner is True
runner_group.create_runner(github)
assert runner_group.need_new_runner is False


def test_healthcheck_job(
runner_group: RunnerGroup, settings: Settings, queue: Queue, github: GitHub
):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/jobs/test_workflow_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_workflow_job_in_progress(
id=webhook.workflow_job.runner_id,
name=webhook.workflow_job.runner_name,
busy=False,
status="idle",
status="online",
manager=settings.name,
runner_group_id=webhook.workflow_job.runner_group_id,
runner_group_name=webhook.workflow_job.runner_group_name,
Expand Down
16 changes: 15 additions & 1 deletion tests/unit/models/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from hypothesis import strategies as st
from redis_om import Migrator, NotFoundError

from runner_manager.clients.github import GitHub
from runner_manager.models.runner import Runner

from ...strategies import WorkflowJobCompletedStrategy
Expand All @@ -12,7 +13,7 @@
@given(st.builds(Runner))
def test_validate_runner(instance: Runner):
assert instance.name is not None
assert instance.status in ["online", "offline", "idle"]
assert instance.status in ["online", "offline"]
assert isinstance(instance.busy, bool)


Expand Down Expand Up @@ -47,3 +48,16 @@ def test_find_from_webhook(runner: Runner, webhook: WorkflowJobCompleted):
assert Runner.find_from_webhook(webhook) == runner
runner.delete(runner.pk)
assert Runner.find_from_webhook(webhook) is None


def test_update_from_github(runner: Runner, github: GitHub):
runner.save()
assert runner.id is not None, "Runner must have an id"
github_runner = github.rest.actions.get_self_hosted_runner_for_org(
org=runner.organization, runner_id=runner.id
).parsed_data
print(github_runner)
runner.update_from_github(github)
assert runner.busy == github_runner.busy
assert runner.status == github_runner.status
assert runner.status == "online"
9 changes: 5 additions & 4 deletions tests/unit/models/test_runner_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,17 @@ def test_runner_group_name():


def test_need_new_runner(runner_group: RunnerGroup, github: GitHub):
runner_group.min = 1
runner_group.max = 2
runner_group.min = 1
runner_group.save()
assert runner_group.need_new_runner is True
runner = runner_group.create_runner(github)
assert runner is not None
# One runner is expected to be created we don't need a new one.
assert runner_group.need_new_runner is False
assert runner is not None
# Pretend the runner is now active.
runner.status = RunnerStatus.online
runner.busy = True
runner.save()
Migrator().run()
assert runner_group.need_new_runner is True
runner_group.create_runner(github)
assert runner_group.need_new_runner is False

0 comments on commit 72028d7

Please sign in to comment.