Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create addon boot failed issue for repair #5397

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion supervisor/addons/addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
from ..homeassistant.const import WSEvent, WSType
from ..jobs.const import JobExecutionLimit
from ..jobs.decorator import Job
from ..resolution.const import UnhealthyReason
from ..resolution.const import ContextType, IssueType, UnhealthyReason
from ..resolution.data import Issue
from ..store.addon import AddonStore
from ..utils import check_port
from ..utils.apparmor import adjust_profile
Expand Down Expand Up @@ -144,11 +145,19 @@ def __init__(self, coresys: CoreSys, slug: str):
self._listeners: list[EventListener] = []
self._startup_event = asyncio.Event()
self._startup_task: asyncio.Task | None = None
self._boot_failed_issue = Issue(
IssueType.BOOT_FAIL, ContextType.ADDON, reference=self.slug
)

def __repr__(self) -> str:
"""Return internal representation."""
return f"<Addon: {self.slug}>"

@property
def boot_failed_issue(self) -> Issue:
"""Get issue used if start on boot failed."""
return self._boot_failed_issue

@property
def state(self) -> AddonState:
"""Return state of the add-on."""
Expand All @@ -166,6 +175,13 @@ def state(self, new_state: AddonState) -> None:
if new_state == AddonState.STARTED or old_state == AddonState.STARTUP:
self._startup_event.set()

# Dismiss boot failed issue if present and we started
if (
new_state == AddonState.STARTED
and self.boot_failed_issue in self.sys_resolution.issues
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling boot_failed_issue will lead to a new object created, is there maybe a better way to check for this issue type?

Copy link
Contributor Author

@mdegat01 mdegat01 Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I followed the same pattern we had used in Mounts. But yea I guess I could cache the object for reuse? The only weird part of that would be the UUID would always be identical each time the property was accessed but I guess that's not really a big deal, we don't use that for anything other then referencing it in the API anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it does feel weird to me to create an object just to check if it's in there. But since we use that pattern already, let's go with that for now.

Maybe worth a follow up which does this a bit more efficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok fixed it in both places. Issue is created on initialization and reused. When actually added to the resolution center we clone it first. I don't think the UUID sharing thing is an issue but just in case. That's the only time we kind of want a new object.

):
self.sys_resolution.dismiss_issue(self.boot_failed_issue)

self.sys_homeassistant.websocket.send_message(
{
ATTR_TYPE: WSType.SUPERVISOR_EVENT,
Expand Down Expand Up @@ -322,6 +338,13 @@ def boot(self, value: AddonBoot) -> None:
"""Store user boot options."""
self.persist[ATTR_BOOT] = value

# Dismiss boot failed issue if present and boot at start disabled
if (
value == AddonBoot.MANUAL
and self._boot_failed_issue in self.sys_resolution.issues
):
self.sys_resolution.dismiss_issue(self._boot_failed_issue)

@property
def auto_update(self) -> bool:
"""Return if auto update is enable."""
Expand Down
34 changes: 22 additions & 12 deletions supervisor/addons/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,22 @@
import tarfile
from typing import Union

from attr import evolve

from ..const import AddonBoot, AddonStartup, AddonState
from ..coresys import CoreSys, CoreSysAttributes
from ..exceptions import (
AddonConfigurationError,
AddonsError,
AddonsJobError,
AddonsNotSupportedError,
CoreDNSError,
DockerAPIError,
DockerError,
DockerNotFound,
HassioError,
HomeAssistantAPIError,
)
from ..jobs.decorator import Job, JobCondition
from ..resolution.const import ContextType, IssueType, SuggestionType
from ..store.addon import AddonStore
from ..utils import check_exception_chain
from ..utils.sentry import capture_exception
from .addon import Addon
from .const import ADDON_UPDATE_CONDITIONS
Expand Down Expand Up @@ -118,15 +116,14 @@ async def boot(self, stage: AddonStartup) -> None:
try:
if start_task := await addon.start():
wait_boot.append(start_task)
except AddonsError as err:
# Check if there is an system/user issue
if check_exception_chain(
err, (DockerAPIError, DockerNotFound, AddonConfigurationError)
):
addon.boot = AddonBoot.MANUAL
addon.save_persist()
except HassioError:
pass # These are already handled
self.sys_resolution.add_issue(
evolve(addon.boot_failed_issue),
suggestions=[
SuggestionType.EXECUTE_START,
SuggestionType.DISABLE_BOOT,
],
)
else:
continue

Expand All @@ -135,6 +132,19 @@ async def boot(self, stage: AddonStartup) -> None:
# Ignore exceptions from waiting for addon startup, addon errors handled elsewhere
await asyncio.gather(*wait_boot, return_exceptions=True)

# After waiting for startup, create an issue for boot addons that are error or unknown state
# Ignore stopped as single shot addons can be run at boot and this is successful exit
# Timeout waiting for startup is not a failure, addon is probably just slow
for addon in tasks:
if addon.state in {AddonState.ERROR, AddonState.UNKNOWN}:
self.sys_resolution.add_issue(
evolve(addon.boot_failed_issue),
suggestions=[
SuggestionType.EXECUTE_START,
SuggestionType.DISABLE_BOOT,
],
)

async def shutdown(self, stage: AddonStartup) -> None:
"""Shutdown addons."""
tasks: list[Addon] = []
Expand Down
4 changes: 3 additions & 1 deletion supervisor/mounts/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import logging
from pathlib import PurePath

from attr import evolve

from ..const import ATTR_NAME
from ..coresys import CoreSys, CoreSysAttributes
from ..dbus.const import UnitActiveState
Expand Down Expand Up @@ -171,7 +173,7 @@ async def _mount_errors_to_issues(
capture_exception(errors[i])

self.sys_resolution.add_issue(
mounts[i].failed_issue,
evolve(mounts[i].failed_issue),
suggestions=[
SuggestionType.EXECUTE_RELOAD,
SuggestionType.EXECUTE_REMOVE,
Expand Down
5 changes: 4 additions & 1 deletion supervisor/mounts/mount.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def __init__(self, coresys: CoreSys, data: MountData) -> None:
self._data: MountData = data
self._unit: SystemdUnit | None = None
self._state: UnitActiveState | None = None
self._failed_issue = Issue(
IssueType.MOUNT_FAILED, ContextType.MOUNT, reference=self.name
)

@classmethod
def from_dict(cls, coresys: CoreSys, data: MountData) -> "Mount":
Expand Down Expand Up @@ -162,7 +165,7 @@ def local_where(self) -> Path | None:
@property
def failed_issue(self) -> Issue:
"""Get issue used if this mount has failed."""
return Issue(IssueType.MOUNT_FAILED, ContextType.MOUNT, reference=self.name)
return self._failed_issue

async def is_mounted(self) -> bool:
"""Return true if successfully mounted and available."""
Expand Down
3 changes: 3 additions & 0 deletions supervisor/resolution/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class UnhealthyReason(StrEnum):
class IssueType(StrEnum):
"""Issue type."""

BOOT_FAIL = "boot_fail"
CORRUPT_DOCKER = "corrupt_docker"
CORRUPT_REPOSITORY = "corrupt_repository"
CORRUPT_FILESYSTEM = "corrupt_filesystem"
Expand Down Expand Up @@ -103,13 +104,15 @@ class SuggestionType(StrEnum):
ADOPT_DATA_DISK = "adopt_data_disk"
CLEAR_FULL_BACKUP = "clear_full_backup"
CREATE_FULL_BACKUP = "create_full_backup"
DISABLE_BOOT = "disable_boot"
EXECUTE_INTEGRITY = "execute_integrity"
EXECUTE_REBOOT = "execute_reboot"
EXECUTE_REBUILD = "execute_rebuild"
EXECUTE_RELOAD = "execute_reload"
EXECUTE_REMOVE = "execute_remove"
EXECUTE_REPAIR = "execute_repair"
EXECUTE_RESET = "execute_reset"
EXECUTE_START = "execute_start"
EXECUTE_STOP = "execute_stop"
EXECUTE_UPDATE = "execute_update"
REGISTRY_LOGIN = "registry_login"
Expand Down
48 changes: 48 additions & 0 deletions supervisor/resolution/fixups/addon_disable_boot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Helpers to fix addon by disabling boot."""

import logging

from ...const import AddonBoot
from ...coresys import CoreSys
from ..const import ContextType, IssueType, SuggestionType
from .base import FixupBase

_LOGGER: logging.Logger = logging.getLogger(__name__)


def setup(coresys: CoreSys) -> FixupBase:
"""Check setup function."""
return FixupAddonDisableBoot(coresys)


class FixupAddonDisableBoot(FixupBase):
"""Storage class for fixup."""

async def process_fixup(self, reference: str | None = None) -> None:
"""Initialize the fixup class."""
if not (addon := self.sys_addons.get(reference, local_only=True)):
_LOGGER.info("Cannot change addon %s as it does not exist", reference)
return

# Disable boot on addon
addon.boot = AddonBoot.MANUAL

mdegat01 marked this conversation as resolved.
Show resolved Hide resolved
@property
def suggestion(self) -> SuggestionType:
"""Return a SuggestionType enum."""
return SuggestionType.DISABLE_BOOT

@property
def context(self) -> ContextType:
"""Return a ContextType enum."""
return ContextType.ADDON

@property
def issues(self) -> list[IssueType]:
"""Return a IssueType enum list."""
return [IssueType.BOOT_FAIL]

@property
def auto(self) -> bool:
"""Return if a fixup can be apply as auto fix."""
return False
59 changes: 59 additions & 0 deletions supervisor/resolution/fixups/addon_execute_start.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Helpers to fix addon by starting it."""

import logging

from ...const import AddonState
from ...coresys import CoreSys
from ...exceptions import AddonsError, ResolutionFixupError
from ..const import ContextType, IssueType, SuggestionType
from .base import FixupBase

_LOGGER: logging.Logger = logging.getLogger(__name__)


def setup(coresys: CoreSys) -> FixupBase:
"""Check setup function."""
return FixupAddonExecuteStart(coresys)


class FixupAddonExecuteStart(FixupBase):
"""Storage class for fixup."""

async def process_fixup(self, reference: str | None = None) -> None:
"""Initialize the fixup class."""
if not (addon := self.sys_addons.get(reference, local_only=True)):
_LOGGER.info("Cannot start addon %s as it does not exist", reference)
return

mdegat01 marked this conversation as resolved.
Show resolved Hide resolved
# Start addon
try:
start_task = await addon.start()
except AddonsError as err:
_LOGGER.error("Could not start %s due to %s", reference, err)
raise ResolutionFixupError() from None

# Wait for addon start. If it ends up in error or unknown state it's not fixed
await start_task
if addon.state in {AddonState.ERROR, AddonState.UNKNOWN}:
_LOGGER.error("Addon %s could not start successfully", reference)
raise ResolutionFixupError()

mdegat01 marked this conversation as resolved.
Show resolved Hide resolved
@property
def suggestion(self) -> SuggestionType:
"""Return a SuggestionType enum."""
return SuggestionType.EXECUTE_START

@property
def context(self) -> ContextType:
"""Return a ContextType enum."""
return ContextType.ADDON

@property
def issues(self) -> list[IssueType]:
"""Return a IssueType enum list."""
return [IssueType.BOOT_FAIL]

@property
def auto(self) -> bool:
"""Return if a fixup can be apply as auto fix."""
return False
33 changes: 32 additions & 1 deletion tests/addons/test_addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from supervisor.addons.addon import Addon
from supervisor.addons.const import AddonBackupMode
from supervisor.addons.model import AddonModel
from supervisor.const import AddonState, BusEvent
from supervisor.const import AddonBoot, AddonState, BusEvent
from supervisor.coresys import CoreSys
from supervisor.docker.addon import DockerAddon
from supervisor.docker.const import ContainerState
Expand All @@ -24,6 +24,8 @@
from supervisor.store.repository import Repository
from supervisor.utils.dt import utcnow

from .test_manager import BOOT_FAIL_ISSUE, BOOT_FAIL_SUGGESTIONS

from tests.common import get_fixture_path
from tests.const import TEST_ADDON_SLUG

Expand Down Expand Up @@ -895,3 +897,32 @@ async def test_addon_manual_only_boot(coresys: CoreSys, install_addon_example: A
# However boot mode can change on update and user may have set auto before, ensure it is ignored
install_addon_example.boot = "auto"
assert install_addon_example.boot == "manual"


async def test_addon_start_dismisses_boot_fail(
coresys: CoreSys, install_addon_ssh: Addon
):
"""Test a successful start dismisses the boot fail issue."""
install_addon_ssh.state = AddonState.ERROR
coresys.resolution.add_issue(
BOOT_FAIL_ISSUE, [suggestion.type for suggestion in BOOT_FAIL_SUGGESTIONS]
)

install_addon_ssh.state = AddonState.STARTED
assert coresys.resolution.issues == []
assert coresys.resolution.suggestions == []


async def test_addon_disable_boot_dismisses_boot_fail(
coresys: CoreSys, install_addon_ssh: Addon
):
"""Test a disabling boot dismisses the boot fail issue."""
install_addon_ssh.boot = AddonBoot.AUTO
install_addon_ssh.state = AddonState.ERROR
coresys.resolution.add_issue(
BOOT_FAIL_ISSUE, [suggestion.type for suggestion in BOOT_FAIL_SUGGESTIONS]
)

install_addon_ssh.boot = AddonBoot.MANUAL
assert coresys.resolution.issues == []
assert coresys.resolution.suggestions == []
Loading
Loading